1 //===--- UnwrappedLineFormatter.cpp - Format C++ code ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "UnwrappedLineFormatter.h" 10 #include "FormatToken.h" 11 #include "NamespaceEndCommentsFixer.h" 12 #include "WhitespaceManager.h" 13 #include "llvm/Support/Debug.h" 14 #include <queue> 15 16 #define DEBUG_TYPE "format-formatter" 17 18 namespace clang { 19 namespace format { 20 21 namespace { 22 23 bool startsExternCBlock(const AnnotatedLine &Line) { 24 const FormatToken *Next = Line.First->getNextNonComment(); 25 const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr; 26 return Line.startsWith(tok::kw_extern) && Next && Next->isStringLiteral() && 27 NextNext && NextNext->is(tok::l_brace); 28 } 29 30 bool isRecordLBrace(const FormatToken &Tok) { 31 return Tok.isOneOf(TT_ClassLBrace, TT_EnumLBrace, TT_RecordLBrace, 32 TT_StructLBrace, TT_UnionLBrace); 33 } 34 35 /// Tracks the indent level of \c AnnotatedLines across levels. 36 /// 37 /// \c nextLine must be called for each \c AnnotatedLine, after which \c 38 /// getIndent() will return the indent for the last line \c nextLine was called 39 /// with. 40 /// If the line is not formatted (and thus the indent does not change), calling 41 /// \c adjustToUnmodifiedLine after the call to \c nextLine will cause 42 /// subsequent lines on the same level to be indented at the same level as the 43 /// given line. 44 class LevelIndentTracker { 45 public: 46 LevelIndentTracker(const FormatStyle &Style, 47 const AdditionalKeywords &Keywords, unsigned StartLevel, 48 int AdditionalIndent) 49 : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) { 50 for (unsigned i = 0; i != StartLevel; ++i) 51 IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); 52 } 53 54 /// Returns the indent for the current line. 55 unsigned getIndent() const { return Indent; } 56 57 /// Update the indent state given that \p Line is going to be formatted 58 /// next. 59 void nextLine(const AnnotatedLine &Line) { 60 Offset = getIndentOffset(Line); 61 // Update the indent level cache size so that we can rely on it 62 // having the right size in adjustToUnmodifiedline. 63 if (Line.Level >= IndentForLevel.size()) 64 IndentForLevel.resize(Line.Level + 1, -1); 65 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None && 66 (Line.InPPDirective || 67 (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 68 Line.Type == LT_CommentAbovePPDirective))) { 69 unsigned PPIndentWidth = 70 (Style.PPIndentWidth >= 0) ? Style.PPIndentWidth : Style.IndentWidth; 71 Indent = Line.InMacroBody 72 ? Line.PPLevel * PPIndentWidth + 73 (Line.Level - Line.PPLevel) * Style.IndentWidth 74 : Line.Level * PPIndentWidth; 75 Indent += AdditionalIndent; 76 } else { 77 // When going to lower levels, forget previous higher levels so that we 78 // recompute future higher levels. But don't forget them if we enter a PP 79 // directive, since these do not terminate a C++ code block. 80 if (!Line.InPPDirective) { 81 assert(Line.Level <= IndentForLevel.size()); 82 IndentForLevel.resize(Line.Level + 1); 83 } 84 Indent = getIndent(Line.Level); 85 } 86 if (static_cast<int>(Indent) + Offset >= 0) 87 Indent += Offset; 88 if (Line.IsContinuation) 89 Indent = Line.Level * Style.IndentWidth + Style.ContinuationIndentWidth; 90 } 91 92 /// Update the level indent to adapt to the given \p Line. 93 /// 94 /// When a line is not formatted, we move the subsequent lines on the same 95 /// level to the same indent. 96 /// Note that \c nextLine must have been called before this method. 97 void adjustToUnmodifiedLine(const AnnotatedLine &Line) { 98 if (Line.InPPDirective || Line.IsContinuation) 99 return; 100 assert(Line.Level < IndentForLevel.size()); 101 if (Line.First->is(tok::comment) && IndentForLevel[Line.Level] != -1) 102 return; 103 unsigned LevelIndent = Line.First->OriginalColumn; 104 if (static_cast<int>(LevelIndent) - Offset >= 0) 105 LevelIndent -= Offset; 106 IndentForLevel[Line.Level] = LevelIndent; 107 } 108 109 private: 110 /// Get the offset of the line relatively to the level. 111 /// 112 /// For example, 'public:' labels in classes are offset by 1 or 2 113 /// characters to the left from their level. 114 int getIndentOffset(const AnnotatedLine &Line) { 115 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 116 Style.isCSharp()) { 117 return 0; 118 } 119 120 auto IsAccessModifier = [&](const FormatToken &RootToken) { 121 if (Line.Type == LT_AccessModifier || RootToken.isObjCAccessSpecifier()) 122 return true; 123 124 const auto *Next = RootToken.Next; 125 126 // Handle Qt signals. 127 if (RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) && 128 Next && Next->is(tok::colon)) { 129 return true; 130 } 131 132 if (Next && Next->isOneOf(Keywords.kw_slots, Keywords.kw_qslots) && 133 Next->Next && Next->Next->is(tok::colon)) { 134 return true; 135 } 136 137 // Handle malformed access specifier e.g. 'private' without trailing ':'. 138 return !Next && RootToken.isAccessSpecifier(false); 139 }; 140 141 if (IsAccessModifier(*Line.First)) { 142 // The AccessModifierOffset may be overridden by IndentAccessModifiers, 143 // in which case we take a negative value of the IndentWidth to simulate 144 // the upper indent level. 145 return Style.IndentAccessModifiers ? -Style.IndentWidth 146 : Style.AccessModifierOffset; 147 } 148 149 return 0; 150 } 151 152 /// Get the indent of \p Level from \p IndentForLevel. 153 /// 154 /// \p IndentForLevel must contain the indent for the level \c l 155 /// at \p IndentForLevel[l], or a value < 0 if the indent for 156 /// that level is unknown. 157 unsigned getIndent(unsigned Level) const { 158 assert(Level < IndentForLevel.size()); 159 if (IndentForLevel[Level] != -1) 160 return IndentForLevel[Level]; 161 if (Level == 0) 162 return 0; 163 return getIndent(Level - 1) + Style.IndentWidth; 164 } 165 166 const FormatStyle &Style; 167 const AdditionalKeywords &Keywords; 168 const unsigned AdditionalIndent; 169 170 /// The indent in characters for each level. It remembers the indent of 171 /// previous lines (that are not PP directives) of equal or lower levels. This 172 /// is used to align formatted lines to the indent of previous non-formatted 173 /// lines. Think about the --lines parameter of clang-format. 174 SmallVector<int> IndentForLevel; 175 176 /// Offset of the current line relative to the indent level. 177 /// 178 /// For example, the 'public' keywords is often indented with a negative 179 /// offset. 180 int Offset = 0; 181 182 /// The current line's indent. 183 unsigned Indent = 0; 184 }; 185 186 const FormatToken *getMatchingNamespaceToken( 187 const AnnotatedLine *Line, 188 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 189 if (!Line->startsWith(tok::r_brace)) 190 return nullptr; 191 size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex; 192 if (StartLineIndex == UnwrappedLine::kInvalidIndex) 193 return nullptr; 194 assert(StartLineIndex < AnnotatedLines.size()); 195 return AnnotatedLines[StartLineIndex]->First->getNamespaceToken(); 196 } 197 198 StringRef getNamespaceTokenText(const AnnotatedLine *Line) { 199 const FormatToken *NamespaceToken = Line->First->getNamespaceToken(); 200 return NamespaceToken ? NamespaceToken->TokenText : StringRef(); 201 } 202 203 StringRef getMatchingNamespaceTokenText( 204 const AnnotatedLine *Line, 205 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 206 const FormatToken *NamespaceToken = 207 getMatchingNamespaceToken(Line, AnnotatedLines); 208 return NamespaceToken ? NamespaceToken->TokenText : StringRef(); 209 } 210 211 class LineJoiner { 212 public: 213 LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords, 214 const SmallVectorImpl<AnnotatedLine *> &Lines) 215 : Style(Style), Keywords(Keywords), End(Lines.end()), Next(Lines.begin()), 216 AnnotatedLines(Lines) {} 217 218 /// Returns the next line, merging multiple lines into one if possible. 219 const AnnotatedLine *getNextMergedLine(bool DryRun, 220 LevelIndentTracker &IndentTracker) { 221 if (Next == End) 222 return nullptr; 223 const AnnotatedLine *Current = *Next; 224 IndentTracker.nextLine(*Current); 225 unsigned MergedLines = tryFitMultipleLinesInOne(IndentTracker, Next, End); 226 if (MergedLines > 0 && Style.ColumnLimit == 0) { 227 // Disallow line merging if there is a break at the start of one of the 228 // input lines. 229 for (unsigned i = 0; i < MergedLines; ++i) 230 if (Next[i + 1]->First->NewlinesBefore > 0) 231 MergedLines = 0; 232 } 233 if (!DryRun) 234 for (unsigned i = 0; i < MergedLines; ++i) 235 join(*Next[0], *Next[i + 1]); 236 Next = Next + MergedLines + 1; 237 return Current; 238 } 239 240 private: 241 /// Calculates how many lines can be merged into 1 starting at \p I. 242 unsigned 243 tryFitMultipleLinesInOne(LevelIndentTracker &IndentTracker, 244 SmallVectorImpl<AnnotatedLine *>::const_iterator I, 245 SmallVectorImpl<AnnotatedLine *>::const_iterator E) { 246 const unsigned Indent = IndentTracker.getIndent(); 247 248 // Can't join the last line with anything. 249 if (I + 1 == E) 250 return 0; 251 // We can never merge stuff if there are trailing line comments. 252 const AnnotatedLine *TheLine = *I; 253 if (TheLine->Last->is(TT_LineComment)) 254 return 0; 255 const auto &NextLine = *I[1]; 256 if (NextLine.Type == LT_Invalid || NextLine.First->MustBreakBefore) 257 return 0; 258 if (TheLine->InPPDirective && 259 (!NextLine.InPPDirective || NextLine.First->HasUnescapedNewline)) { 260 return 0; 261 } 262 263 if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) 264 return 0; 265 266 unsigned Limit = 267 Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent; 268 // If we already exceed the column limit, we set 'Limit' to 0. The different 269 // tryMerge..() functions can then decide whether to still do merging. 270 Limit = TheLine->Last->TotalLength > Limit 271 ? 0 272 : Limit - TheLine->Last->TotalLength; 273 274 if (TheLine->Last->is(TT_FunctionLBrace) && 275 TheLine->First == TheLine->Last && 276 !Style.BraceWrapping.SplitEmptyFunction && 277 NextLine.First->is(tok::r_brace)) { 278 return tryMergeSimpleBlock(I, E, Limit); 279 } 280 281 const auto *PreviousLine = I != AnnotatedLines.begin() ? I[-1] : nullptr; 282 // Handle empty record blocks where the brace has already been wrapped. 283 if (PreviousLine && TheLine->Last->is(tok::l_brace) && 284 TheLine->First == TheLine->Last) { 285 bool EmptyBlock = NextLine.First->is(tok::r_brace); 286 287 const FormatToken *Tok = PreviousLine->First; 288 if (Tok && Tok->is(tok::comment)) 289 Tok = Tok->getNextNonComment(); 290 291 if (Tok && Tok->getNamespaceToken()) { 292 return !Style.BraceWrapping.SplitEmptyNamespace && EmptyBlock 293 ? tryMergeSimpleBlock(I, E, Limit) 294 : 0; 295 } 296 297 if (Tok && Tok->is(tok::kw_typedef)) 298 Tok = Tok->getNextNonComment(); 299 if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union, 300 tok::kw_extern, Keywords.kw_interface)) { 301 return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock 302 ? tryMergeSimpleBlock(I, E, Limit) 303 : 0; 304 } 305 306 if (Tok && Tok->is(tok::kw_template) && 307 Style.BraceWrapping.SplitEmptyRecord && EmptyBlock) { 308 return 0; 309 } 310 } 311 312 auto ShouldMergeShortFunctions = [this, &I, &NextLine, PreviousLine, 313 TheLine]() { 314 if (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All) 315 return true; 316 if (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty && 317 NextLine.First->is(tok::r_brace)) { 318 return true; 319 } 320 321 if (Style.AllowShortFunctionsOnASingleLine & 322 FormatStyle::SFS_InlineOnly) { 323 // Just checking TheLine->Level != 0 is not enough, because it 324 // provokes treating functions inside indented namespaces as short. 325 if (Style.isJavaScript() && TheLine->Last->is(TT_FunctionLBrace)) 326 return true; 327 328 if (TheLine->Level != 0) { 329 if (!PreviousLine) 330 return false; 331 332 // TODO: Use IndentTracker to avoid loop? 333 // Find the last line with lower level. 334 const AnnotatedLine *Line = nullptr; 335 for (auto J = I - 1; J >= AnnotatedLines.begin(); --J) { 336 assert(*J); 337 if (!(*J)->InPPDirective && !(*J)->isComment() && 338 (*J)->Level < TheLine->Level) { 339 Line = *J; 340 break; 341 } 342 } 343 344 if (!Line) 345 return false; 346 347 // Check if the found line starts a record. 348 const auto *LastNonComment = Line->getLastNonComment(); 349 // There must be another token (usually `{`), because we chose a 350 // non-PPDirective and non-comment line that has a smaller level. 351 assert(LastNonComment); 352 return isRecordLBrace(*LastNonComment); 353 } 354 } 355 356 return false; 357 }; 358 359 bool MergeShortFunctions = ShouldMergeShortFunctions(); 360 361 const auto *FirstNonComment = TheLine->getFirstNonComment(); 362 if (!FirstNonComment) 363 return 0; 364 // FIXME: There are probably cases where we should use FirstNonComment 365 // instead of TheLine->First. 366 367 if (Style.CompactNamespaces) { 368 if (const auto *NSToken = TheLine->First->getNamespaceToken()) { 369 int J = 1; 370 assert(TheLine->MatchingClosingBlockLineIndex > 0); 371 for (auto ClosingLineIndex = TheLine->MatchingClosingBlockLineIndex - 1; 372 I + J != E && NSToken->TokenText == getNamespaceTokenText(I[J]) && 373 ClosingLineIndex == I[J]->MatchingClosingBlockLineIndex && 374 I[J]->Last->TotalLength < Limit; 375 ++J, --ClosingLineIndex) { 376 Limit -= I[J]->Last->TotalLength; 377 378 // Reduce indent level for bodies of namespaces which were compacted, 379 // but only if their content was indented in the first place. 380 auto *ClosingLine = AnnotatedLines.begin() + ClosingLineIndex + 1; 381 const int OutdentBy = I[J]->Level - TheLine->Level; 382 assert(OutdentBy >= 0); 383 for (auto *CompactedLine = I + J; CompactedLine <= ClosingLine; 384 ++CompactedLine) { 385 if (!(*CompactedLine)->InPPDirective) { 386 const int Level = (*CompactedLine)->Level; 387 (*CompactedLine)->Level = std::max(Level - OutdentBy, 0); 388 } 389 } 390 } 391 return J - 1; 392 } 393 394 if (auto nsToken = getMatchingNamespaceToken(TheLine, AnnotatedLines)) { 395 int i = 0; 396 unsigned openingLine = TheLine->MatchingOpeningBlockLineIndex - 1; 397 for (; I + 1 + i != E && 398 nsToken->TokenText == 399 getMatchingNamespaceTokenText(I[i + 1], AnnotatedLines) && 400 openingLine == I[i + 1]->MatchingOpeningBlockLineIndex; 401 i++, --openingLine) { 402 // No space between consecutive braces. 403 I[i + 1]->First->SpacesRequiredBefore = 404 I[i]->Last->isNot(tok::r_brace); 405 406 // Indent like the outer-most namespace. 407 IndentTracker.nextLine(*I[i + 1]); 408 } 409 return i; 410 } 411 } 412 413 const auto *LastNonComment = TheLine->getLastNonComment(); 414 assert(LastNonComment); 415 // FIXME: There are probably cases where we should use LastNonComment 416 // instead of TheLine->Last. 417 418 // Try to merge a function block with left brace unwrapped. 419 if (LastNonComment->is(TT_FunctionLBrace) && 420 TheLine->First != LastNonComment) { 421 return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; 422 } 423 // Try to merge a control statement block with left brace unwrapped. 424 if (TheLine->Last->is(tok::l_brace) && FirstNonComment != TheLine->Last && 425 FirstNonComment->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for, 426 TT_ForEachMacro)) { 427 return Style.AllowShortBlocksOnASingleLine != FormatStyle::SBS_Never 428 ? tryMergeSimpleBlock(I, E, Limit) 429 : 0; 430 } 431 // Try to merge a control statement block with left brace wrapped. 432 if (NextLine.First->is(tok::l_brace)) { 433 if ((TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, 434 tok::kw_for, tok::kw_switch, tok::kw_try, 435 tok::kw_do, TT_ForEachMacro) || 436 (TheLine->First->is(tok::r_brace) && TheLine->First->Next && 437 TheLine->First->Next->isOneOf(tok::kw_else, tok::kw_catch))) && 438 Style.BraceWrapping.AfterControlStatement == 439 FormatStyle::BWACS_MultiLine) { 440 // If possible, merge the next line's wrapped left brace with the 441 // current line. Otherwise, leave it on the next line, as this is a 442 // multi-line control statement. 443 return (Style.ColumnLimit == 0 || TheLine->Level * Style.IndentWidth + 444 TheLine->Last->TotalLength <= 445 Style.ColumnLimit) 446 ? 1 447 : 0; 448 } 449 if (TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, 450 tok::kw_for, TT_ForEachMacro)) { 451 return (Style.BraceWrapping.AfterControlStatement == 452 FormatStyle::BWACS_Always) 453 ? tryMergeSimpleBlock(I, E, Limit) 454 : 0; 455 } 456 if (TheLine->First->isOneOf(tok::kw_else, tok::kw_catch) && 457 Style.BraceWrapping.AfterControlStatement == 458 FormatStyle::BWACS_MultiLine) { 459 // This case if different from the upper BWACS_MultiLine processing 460 // in that a preceding r_brace is not on the same line as else/catch 461 // most likely because of BeforeElse/BeforeCatch set to true. 462 // If the line length doesn't fit ColumnLimit, leave l_brace on the 463 // next line to respect the BWACS_MultiLine. 464 return (Style.ColumnLimit == 0 || 465 TheLine->Last->TotalLength <= Style.ColumnLimit) 466 ? 1 467 : 0; 468 } 469 } 470 if (PreviousLine && TheLine->First->is(tok::l_brace)) { 471 switch (PreviousLine->First->Tok.getKind()) { 472 case tok::at: 473 // Don't merge block with left brace wrapped after ObjC special blocks. 474 if (PreviousLine->First->Next) { 475 tok::ObjCKeywordKind kwId = 476 PreviousLine->First->Next->Tok.getObjCKeywordID(); 477 if (kwId == tok::objc_autoreleasepool || 478 kwId == tok::objc_synchronized) { 479 return 0; 480 } 481 } 482 break; 483 484 case tok::kw_case: 485 case tok::kw_default: 486 // Don't merge block with left brace wrapped after case labels. 487 return 0; 488 489 default: 490 break; 491 } 492 } 493 494 // Don't merge an empty template class or struct if SplitEmptyRecords 495 // is defined. 496 if (PreviousLine && Style.BraceWrapping.SplitEmptyRecord && 497 TheLine->Last->is(tok::l_brace) && PreviousLine->Last) { 498 const FormatToken *Previous = PreviousLine->Last; 499 if (Previous) { 500 if (Previous->is(tok::comment)) 501 Previous = Previous->getPreviousNonComment(); 502 if (Previous) { 503 if (Previous->is(tok::greater) && !PreviousLine->InPPDirective) 504 return 0; 505 if (Previous->is(tok::identifier)) { 506 const FormatToken *PreviousPrevious = 507 Previous->getPreviousNonComment(); 508 if (PreviousPrevious && 509 PreviousPrevious->isOneOf(tok::kw_class, tok::kw_struct)) { 510 return 0; 511 } 512 } 513 } 514 } 515 } 516 517 if (TheLine->First->is(TT_SwitchExpressionLabel)) { 518 return Style.AllowShortCaseExpressionOnASingleLine 519 ? tryMergeShortCaseLabels(I, E, Limit) 520 : 0; 521 } 522 523 if (TheLine->Last->is(tok::l_brace)) { 524 bool ShouldMerge = false; 525 // Try to merge records. 526 if (TheLine->Last->is(TT_EnumLBrace)) { 527 ShouldMerge = Style.AllowShortEnumsOnASingleLine; 528 } else if (TheLine->Last->is(TT_RequiresExpressionLBrace)) { 529 ShouldMerge = Style.AllowShortCompoundRequirementOnASingleLine; 530 } else if (TheLine->Last->isOneOf(TT_ClassLBrace, TT_StructLBrace)) { 531 // NOTE: We use AfterClass (whereas AfterStruct exists) for both classes 532 // and structs, but it seems that wrapping is still handled correctly 533 // elsewhere. 534 ShouldMerge = !Style.BraceWrapping.AfterClass || 535 (NextLine.First->is(tok::r_brace) && 536 !Style.BraceWrapping.SplitEmptyRecord); 537 } else if (TheLine->InPPDirective || 538 !TheLine->First->isOneOf(tok::kw_class, tok::kw_enum, 539 tok::kw_struct)) { 540 // Try to merge a block with left brace unwrapped that wasn't yet 541 // covered. 542 ShouldMerge = !Style.BraceWrapping.AfterFunction || 543 (NextLine.First->is(tok::r_brace) && 544 !Style.BraceWrapping.SplitEmptyFunction); 545 } 546 return ShouldMerge ? tryMergeSimpleBlock(I, E, Limit) : 0; 547 } 548 549 // Try to merge a function block with left brace wrapped. 550 if (NextLine.First->is(TT_FunctionLBrace) && 551 Style.BraceWrapping.AfterFunction) { 552 if (NextLine.Last->is(TT_LineComment)) 553 return 0; 554 555 // Check for Limit <= 2 to account for the " {". 556 if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine))) 557 return 0; 558 Limit -= 2; 559 560 unsigned MergedLines = 0; 561 if (MergeShortFunctions || 562 (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty && 563 NextLine.First == NextLine.Last && I + 2 != E && 564 I[2]->First->is(tok::r_brace))) { 565 MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); 566 // If we managed to merge the block, count the function header, which is 567 // on a separate line. 568 if (MergedLines > 0) 569 ++MergedLines; 570 } 571 return MergedLines; 572 } 573 auto IsElseLine = [&TheLine]() -> bool { 574 const FormatToken *First = TheLine->First; 575 if (First->is(tok::kw_else)) 576 return true; 577 578 return First->is(tok::r_brace) && First->Next && 579 First->Next->is(tok::kw_else); 580 }; 581 if (TheLine->First->is(tok::kw_if) || 582 (IsElseLine() && (Style.AllowShortIfStatementsOnASingleLine == 583 FormatStyle::SIS_AllIfsAndElse))) { 584 return Style.AllowShortIfStatementsOnASingleLine 585 ? tryMergeSimpleControlStatement(I, E, Limit) 586 : 0; 587 } 588 if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while, tok::kw_do, 589 TT_ForEachMacro)) { 590 return Style.AllowShortLoopsOnASingleLine 591 ? tryMergeSimpleControlStatement(I, E, Limit) 592 : 0; 593 } 594 if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) { 595 return Style.AllowShortCaseLabelsOnASingleLine 596 ? tryMergeShortCaseLabels(I, E, Limit) 597 : 0; 598 } 599 if (TheLine->InPPDirective && 600 (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) { 601 return tryMergeSimplePPDirective(I, E, Limit); 602 } 603 return 0; 604 } 605 606 unsigned 607 tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 608 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 609 unsigned Limit) { 610 if (Limit == 0) 611 return 0; 612 if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline) 613 return 0; 614 if (1 + I[1]->Last->TotalLength > Limit) 615 return 0; 616 return 1; 617 } 618 619 unsigned tryMergeSimpleControlStatement( 620 SmallVectorImpl<AnnotatedLine *>::const_iterator I, 621 SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { 622 if (Limit == 0) 623 return 0; 624 if (Style.BraceWrapping.AfterControlStatement == 625 FormatStyle::BWACS_Always && 626 I[1]->First->is(tok::l_brace) && 627 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) { 628 return 0; 629 } 630 if (I[1]->InPPDirective != (*I)->InPPDirective || 631 (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) { 632 return 0; 633 } 634 Limit = limitConsideringMacros(I + 1, E, Limit); 635 AnnotatedLine &Line = **I; 636 if (Line.First->isNot(tok::kw_do) && Line.First->isNot(tok::kw_else) && 637 Line.Last->isNot(tok::kw_else) && Line.Last->isNot(tok::r_paren)) { 638 return 0; 639 } 640 // Only merge `do while` if `do` is the only statement on the line. 641 if (Line.First->is(tok::kw_do) && Line.Last->isNot(tok::kw_do)) 642 return 0; 643 if (1 + I[1]->Last->TotalLength > Limit) 644 return 0; 645 // Don't merge with loops, ifs, a single semicolon or a line comment. 646 if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while, 647 TT_ForEachMacro, TT_LineComment)) { 648 return 0; 649 } 650 // Only inline simple if's (no nested if or else), unless specified 651 if (Style.AllowShortIfStatementsOnASingleLine == 652 FormatStyle::SIS_WithoutElse) { 653 if (I + 2 != E && Line.startsWith(tok::kw_if) && 654 I[2]->First->is(tok::kw_else)) { 655 return 0; 656 } 657 } 658 return 1; 659 } 660 661 unsigned 662 tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 663 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 664 unsigned Limit) { 665 if (Limit == 0 || I + 1 == E || 666 I[1]->First->isOneOf(tok::kw_case, tok::kw_default)) { 667 return 0; 668 } 669 if (I[0]->Last->is(tok::l_brace) || I[1]->First->is(tok::l_brace)) 670 return 0; 671 unsigned NumStmts = 0; 672 unsigned Length = 0; 673 bool EndsWithComment = false; 674 bool InPPDirective = I[0]->InPPDirective; 675 bool InMacroBody = I[0]->InMacroBody; 676 const unsigned Level = I[0]->Level; 677 for (; NumStmts < 3; ++NumStmts) { 678 if (I + 1 + NumStmts == E) 679 break; 680 const AnnotatedLine *Line = I[1 + NumStmts]; 681 if (Line->InPPDirective != InPPDirective) 682 break; 683 if (Line->InMacroBody != InMacroBody) 684 break; 685 if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace)) 686 break; 687 if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch, 688 tok::kw_while) || 689 EndsWithComment) { 690 return 0; 691 } 692 if (Line->First->is(tok::comment)) { 693 if (Level != Line->Level) 694 return 0; 695 SmallVectorImpl<AnnotatedLine *>::const_iterator J = I + 2 + NumStmts; 696 for (; J != E; ++J) { 697 Line = *J; 698 if (Line->InPPDirective != InPPDirective) 699 break; 700 if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace)) 701 break; 702 if (Line->First->isNot(tok::comment) || Level != Line->Level) 703 return 0; 704 } 705 break; 706 } 707 if (Line->Last->is(tok::comment)) 708 EndsWithComment = true; 709 Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space. 710 } 711 if (NumStmts == 0 || NumStmts == 3 || Length > Limit) 712 return 0; 713 return NumStmts; 714 } 715 716 unsigned 717 tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 718 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 719 unsigned Limit) { 720 // Don't merge with a preprocessor directive. 721 if (I[1]->Type == LT_PreprocessorDirective) 722 return 0; 723 724 AnnotatedLine &Line = **I; 725 726 // Don't merge ObjC @ keywords and methods. 727 // FIXME: If an option to allow short exception handling clauses on a single 728 // line is added, change this to not return for @try and friends. 729 if (Style.Language != FormatStyle::LK_Java && 730 Line.First->isOneOf(tok::at, tok::minus, tok::plus)) { 731 return 0; 732 } 733 734 // Check that the current line allows merging. This depends on whether we 735 // are in a control flow statements as well as several style flags. 736 if (Line.First->is(tok::kw_case) || 737 (Line.First->Next && Line.First->Next->is(tok::kw_else))) { 738 return 0; 739 } 740 // default: in switch statement 741 if (Line.First->is(tok::kw_default)) { 742 const FormatToken *Tok = Line.First->getNextNonComment(); 743 if (Tok && Tok->is(tok::colon)) 744 return 0; 745 } 746 747 auto IsCtrlStmt = [](const auto &Line) { 748 return Line.First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, 749 tok::kw_do, tok::kw_for, TT_ForEachMacro); 750 }; 751 752 const bool IsSplitBlock = 753 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never || 754 (Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Empty && 755 I[1]->First->isNot(tok::r_brace)); 756 757 if (IsCtrlStmt(Line) || 758 Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch, 759 tok::kw___finally, tok::r_brace, 760 Keywords.kw___except)) { 761 if (IsSplitBlock) 762 return 0; 763 // Don't merge when we can't except the case when 764 // the control statement block is empty 765 if (!Style.AllowShortIfStatementsOnASingleLine && 766 Line.First->isOneOf(tok::kw_if, tok::kw_else) && 767 !Style.BraceWrapping.AfterControlStatement && 768 I[1]->First->isNot(tok::r_brace)) { 769 return 0; 770 } 771 if (!Style.AllowShortIfStatementsOnASingleLine && 772 Line.First->isOneOf(tok::kw_if, tok::kw_else) && 773 Style.BraceWrapping.AfterControlStatement == 774 FormatStyle::BWACS_Always && 775 I + 2 != E && I[2]->First->isNot(tok::r_brace)) { 776 return 0; 777 } 778 if (!Style.AllowShortLoopsOnASingleLine && 779 Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for, 780 TT_ForEachMacro) && 781 !Style.BraceWrapping.AfterControlStatement && 782 I[1]->First->isNot(tok::r_brace)) { 783 return 0; 784 } 785 if (!Style.AllowShortLoopsOnASingleLine && 786 Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for, 787 TT_ForEachMacro) && 788 Style.BraceWrapping.AfterControlStatement == 789 FormatStyle::BWACS_Always && 790 I + 2 != E && I[2]->First->isNot(tok::r_brace)) { 791 return 0; 792 } 793 // FIXME: Consider an option to allow short exception handling clauses on 794 // a single line. 795 // FIXME: This isn't covered by tests. 796 // FIXME: For catch, __except, __finally the first token on the line 797 // is '}', so this isn't correct here. 798 if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch, 799 Keywords.kw___except, tok::kw___finally)) { 800 return 0; 801 } 802 } 803 804 if (Line.endsWith(tok::l_brace)) { 805 if (Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never && 806 Line.First->is(TT_BlockLBrace)) { 807 return 0; 808 } 809 810 if (IsSplitBlock && Line.First == Line.Last && 811 I > AnnotatedLines.begin() && 812 (I[-1]->endsWith(tok::kw_else) || IsCtrlStmt(*I[-1]))) { 813 return 0; 814 } 815 FormatToken *Tok = I[1]->First; 816 auto ShouldMerge = [Tok]() { 817 if (Tok->isNot(tok::r_brace) || Tok->MustBreakBefore) 818 return false; 819 const FormatToken *Next = Tok->getNextNonComment(); 820 return !Next || Next->is(tok::semi); 821 }; 822 823 if (ShouldMerge()) { 824 // We merge empty blocks even if the line exceeds the column limit. 825 Tok->SpacesRequiredBefore = 826 (Style.SpaceInEmptyBlock || Line.Last->is(tok::comment)) ? 1 : 0; 827 Tok->CanBreakBefore = true; 828 return 1; 829 } else if (Limit != 0 && !Line.startsWithNamespace() && 830 !startsExternCBlock(Line)) { 831 // We don't merge short records. 832 if (isRecordLBrace(*Line.Last)) 833 return 0; 834 835 // Check that we still have three lines and they fit into the limit. 836 if (I + 2 == E || I[2]->Type == LT_Invalid) 837 return 0; 838 Limit = limitConsideringMacros(I + 2, E, Limit); 839 840 if (!nextTwoLinesFitInto(I, Limit)) 841 return 0; 842 843 // Second, check that the next line does not contain any braces - if it 844 // does, readability declines when putting it into a single line. 845 if (I[1]->Last->is(TT_LineComment)) 846 return 0; 847 do { 848 if (Tok->is(tok::l_brace) && Tok->isNot(BK_BracedInit)) 849 return 0; 850 Tok = Tok->Next; 851 } while (Tok); 852 853 // Last, check that the third line starts with a closing brace. 854 Tok = I[2]->First; 855 if (Tok->isNot(tok::r_brace)) 856 return 0; 857 858 // Don't merge "if (a) { .. } else {". 859 if (Tok->Next && Tok->Next->is(tok::kw_else)) 860 return 0; 861 862 // Don't merge a trailing multi-line control statement block like: 863 // } else if (foo && 864 // bar) 865 // { <-- current Line 866 // baz(); 867 // } 868 if (Line.First == Line.Last && Line.First->isNot(TT_FunctionLBrace) && 869 Style.BraceWrapping.AfterControlStatement == 870 FormatStyle::BWACS_MultiLine) { 871 return 0; 872 } 873 874 return 2; 875 } 876 } else if (I[1]->First->is(tok::l_brace)) { 877 if (I[1]->Last->is(TT_LineComment)) 878 return 0; 879 880 // Check for Limit <= 2 to account for the " {". 881 if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(*I))) 882 return 0; 883 Limit -= 2; 884 unsigned MergedLines = 0; 885 if (Style.AllowShortBlocksOnASingleLine != FormatStyle::SBS_Never || 886 (I[1]->First == I[1]->Last && I + 2 != E && 887 I[2]->First->is(tok::r_brace))) { 888 MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); 889 // If we managed to merge the block, count the statement header, which 890 // is on a separate line. 891 if (MergedLines > 0) 892 ++MergedLines; 893 } 894 return MergedLines; 895 } 896 return 0; 897 } 898 899 /// Returns the modified column limit for \p I if it is inside a macro and 900 /// needs a trailing '\'. 901 unsigned 902 limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 903 SmallVectorImpl<AnnotatedLine *>::const_iterator E, 904 unsigned Limit) { 905 if (I[0]->InPPDirective && I + 1 != E && 906 !I[1]->First->HasUnescapedNewline && I[1]->First->isNot(tok::eof)) { 907 return Limit < 2 ? 0 : Limit - 2; 908 } 909 return Limit; 910 } 911 912 bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I, 913 unsigned Limit) { 914 if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore) 915 return false; 916 return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit; 917 } 918 919 bool containsMustBreak(const AnnotatedLine *Line) { 920 assert(Line->First); 921 // Ignore the first token, because in this situation, it applies more to the 922 // last token of the previous line. 923 for (const FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) 924 if (Tok->MustBreakBefore) 925 return true; 926 return false; 927 } 928 929 void join(AnnotatedLine &A, const AnnotatedLine &B) { 930 assert(!A.Last->Next); 931 assert(!B.First->Previous); 932 if (B.Affected) 933 A.Affected = true; 934 A.Last->Next = B.First; 935 B.First->Previous = A.Last; 936 B.First->CanBreakBefore = true; 937 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; 938 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { 939 Tok->TotalLength += LengthA; 940 A.Last = Tok; 941 } 942 } 943 944 const FormatStyle &Style; 945 const AdditionalKeywords &Keywords; 946 const SmallVectorImpl<AnnotatedLine *>::const_iterator End; 947 948 SmallVectorImpl<AnnotatedLine *>::const_iterator Next; 949 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines; 950 }; 951 952 static void markFinalized(FormatToken *Tok) { 953 if (Tok->is(tok::hash) && !Tok->Previous && Tok->Next && 954 Tok->Next->isOneOf(tok::pp_if, tok::pp_ifdef, tok::pp_ifndef, 955 tok::pp_elif, tok::pp_elifdef, tok::pp_elifndef, 956 tok::pp_else, tok::pp_endif)) { 957 Tok = Tok->Next; 958 } 959 for (; Tok; Tok = Tok->Next) { 960 if (Tok->MacroCtx && Tok->MacroCtx->Role == MR_ExpandedArg) { 961 // In the first pass we format all macro arguments in the expanded token 962 // stream. Instead of finalizing the macro arguments, we mark that they 963 // will be modified as unexpanded arguments (as part of the macro call 964 // formatting) in the next pass. 965 Tok->MacroCtx->Role = MR_UnexpandedArg; 966 // Reset whether spaces or a line break are required before this token, as 967 // that is context dependent, and that context may change when formatting 968 // the macro call. For example, given M(x) -> 2 * x, and the macro call 969 // M(var), the token 'var' will have SpacesRequiredBefore = 1 after being 970 // formatted as part of the expanded macro, but SpacesRequiredBefore = 0 971 // for its position within the macro call. 972 Tok->SpacesRequiredBefore = 0; 973 if (!Tok->MustBreakBeforeFinalized) 974 Tok->MustBreakBefore = 0; 975 } else { 976 Tok->Finalized = true; 977 } 978 } 979 } 980 981 #ifndef NDEBUG 982 static void printLineState(const LineState &State) { 983 llvm::dbgs() << "State: "; 984 for (const ParenState &P : State.Stack) { 985 llvm::dbgs() << (P.Tok ? P.Tok->TokenText : "F") << "|" << P.Indent << "|" 986 << P.LastSpace << "|" << P.NestedBlockIndent << " "; 987 } 988 llvm::dbgs() << State.NextToken->TokenText << "\n"; 989 } 990 #endif 991 992 /// Base class for classes that format one \c AnnotatedLine. 993 class LineFormatter { 994 public: 995 LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces, 996 const FormatStyle &Style, 997 UnwrappedLineFormatter *BlockFormatter) 998 : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), 999 BlockFormatter(BlockFormatter) {} 1000 virtual ~LineFormatter() {} 1001 1002 /// Formats an \c AnnotatedLine and returns the penalty. 1003 /// 1004 /// If \p DryRun is \c false, directly applies the changes. 1005 virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, 1006 unsigned FirstStartColumn, bool DryRun) = 0; 1007 1008 protected: 1009 /// If the \p State's next token is an r_brace closing a nested block, 1010 /// format the nested block before it. 1011 /// 1012 /// Returns \c true if all children could be placed successfully and adapts 1013 /// \p Penalty as well as \p State. If \p DryRun is false, also directly 1014 /// creates changes using \c Whitespaces. 1015 /// 1016 /// The crucial idea here is that children always get formatted upon 1017 /// encountering the closing brace right after the nested block. Now, if we 1018 /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is 1019 /// \c false), the entire block has to be kept on the same line (which is only 1020 /// possible if it fits on the line, only contains a single statement, etc. 1021 /// 1022 /// If \p NewLine is true, we format the nested block on separate lines, i.e. 1023 /// break after the "{", format all lines with correct indentation and the put 1024 /// the closing "}" on yet another new line. 1025 /// 1026 /// This enables us to keep the simple structure of the 1027 /// \c UnwrappedLineFormatter, where we only have two options for each token: 1028 /// break or don't break. 1029 bool formatChildren(LineState &State, bool NewLine, bool DryRun, 1030 unsigned &Penalty) { 1031 const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); 1032 bool HasLBrace = LBrace && LBrace->is(tok::l_brace) && LBrace->is(BK_Block); 1033 FormatToken &Previous = *State.NextToken->Previous; 1034 if (Previous.Children.size() == 0 || (!HasLBrace && !LBrace->MacroParent)) { 1035 // The previous token does not open a block. Nothing to do. We don't 1036 // assert so that we can simply call this function for all tokens. 1037 return true; 1038 } 1039 1040 if (NewLine || Previous.MacroParent) { 1041 const ParenState &P = State.Stack.back(); 1042 1043 int AdditionalIndent = 1044 P.Indent - Previous.Children[0]->Level * Style.IndentWidth; 1045 Penalty += 1046 BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent, 1047 /*FixBadIndentation=*/true); 1048 return true; 1049 } 1050 1051 if (Previous.Children[0]->First->MustBreakBefore) 1052 return false; 1053 1054 // Cannot merge into one line if this line ends on a comment. 1055 if (Previous.is(tok::comment)) 1056 return false; 1057 1058 // Cannot merge multiple statements into a single line. 1059 if (Previous.Children.size() > 1) 1060 return false; 1061 1062 const AnnotatedLine *Child = Previous.Children[0]; 1063 // We can't put the closing "}" on a line with a trailing comment. 1064 if (Child->Last->isTrailingComment()) 1065 return false; 1066 1067 // If the child line exceeds the column limit, we wouldn't want to merge it. 1068 // We add +2 for the trailing " }". 1069 if (Style.ColumnLimit > 0 && 1070 Child->Last->TotalLength + State.Column + 2 > Style.ColumnLimit) { 1071 return false; 1072 } 1073 1074 if (!DryRun) { 1075 Whitespaces->replaceWhitespace( 1076 *Child->First, /*Newlines=*/0, /*Spaces=*/1, 1077 /*StartOfTokenColumn=*/State.Column, /*IsAligned=*/false, 1078 State.Line->InPPDirective); 1079 } 1080 Penalty += 1081 formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun); 1082 if (!DryRun) 1083 markFinalized(Child->First); 1084 1085 State.Column += 1 + Child->Last->TotalLength; 1086 return true; 1087 } 1088 1089 ContinuationIndenter *Indenter; 1090 1091 private: 1092 WhitespaceManager *Whitespaces; 1093 const FormatStyle &Style; 1094 UnwrappedLineFormatter *BlockFormatter; 1095 }; 1096 1097 /// Formatter that keeps the existing line breaks. 1098 class NoColumnLimitLineFormatter : public LineFormatter { 1099 public: 1100 NoColumnLimitLineFormatter(ContinuationIndenter *Indenter, 1101 WhitespaceManager *Whitespaces, 1102 const FormatStyle &Style, 1103 UnwrappedLineFormatter *BlockFormatter) 1104 : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} 1105 1106 /// Formats the line, simply keeping all of the input's line breaking 1107 /// decisions. 1108 unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, 1109 unsigned FirstStartColumn, bool DryRun) override { 1110 assert(!DryRun); 1111 LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn, 1112 &Line, /*DryRun=*/false); 1113 while (State.NextToken) { 1114 bool Newline = 1115 Indenter->mustBreak(State) || 1116 (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); 1117 unsigned Penalty = 0; 1118 formatChildren(State, Newline, /*DryRun=*/false, Penalty); 1119 Indenter->addTokenToState(State, Newline, /*DryRun=*/false); 1120 } 1121 return 0; 1122 } 1123 }; 1124 1125 /// Formatter that puts all tokens into a single line without breaks. 1126 class NoLineBreakFormatter : public LineFormatter { 1127 public: 1128 NoLineBreakFormatter(ContinuationIndenter *Indenter, 1129 WhitespaceManager *Whitespaces, const FormatStyle &Style, 1130 UnwrappedLineFormatter *BlockFormatter) 1131 : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} 1132 1133 /// Puts all tokens into a single line. 1134 unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, 1135 unsigned FirstStartColumn, bool DryRun) override { 1136 unsigned Penalty = 0; 1137 LineState State = 1138 Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); 1139 while (State.NextToken) { 1140 formatChildren(State, /*NewLine=*/false, DryRun, Penalty); 1141 Indenter->addTokenToState( 1142 State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun); 1143 } 1144 return Penalty; 1145 } 1146 }; 1147 1148 /// Finds the best way to break lines. 1149 class OptimizingLineFormatter : public LineFormatter { 1150 public: 1151 OptimizingLineFormatter(ContinuationIndenter *Indenter, 1152 WhitespaceManager *Whitespaces, 1153 const FormatStyle &Style, 1154 UnwrappedLineFormatter *BlockFormatter) 1155 : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} 1156 1157 /// Formats the line by finding the best line breaks with line lengths 1158 /// below the column limit. 1159 unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, 1160 unsigned FirstStartColumn, bool DryRun) override { 1161 LineState State = 1162 Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); 1163 1164 // If the ObjC method declaration does not fit on a line, we should format 1165 // it with one arg per line. 1166 if (State.Line->Type == LT_ObjCMethodDecl) 1167 State.Stack.back().BreakBeforeParameter = true; 1168 1169 // Find best solution in solution space. 1170 return analyzeSolutionSpace(State, DryRun); 1171 } 1172 1173 private: 1174 struct CompareLineStatePointers { 1175 bool operator()(LineState *obj1, LineState *obj2) const { 1176 return *obj1 < *obj2; 1177 } 1178 }; 1179 1180 /// A pair of <penalty, count> that is used to prioritize the BFS on. 1181 /// 1182 /// In case of equal penalties, we want to prefer states that were inserted 1183 /// first. During state generation we make sure that we insert states first 1184 /// that break the line as late as possible. 1185 typedef std::pair<unsigned, unsigned> OrderedPenalty; 1186 1187 /// An edge in the solution space from \c Previous->State to \c State, 1188 /// inserting a newline dependent on the \c NewLine. 1189 struct StateNode { 1190 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 1191 : State(State), NewLine(NewLine), Previous(Previous) {} 1192 LineState State; 1193 bool NewLine; 1194 StateNode *Previous; 1195 }; 1196 1197 /// An item in the prioritized BFS search queue. The \c StateNode's 1198 /// \c State has the given \c OrderedPenalty. 1199 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 1200 1201 /// The BFS queue type. 1202 typedef std::priority_queue<QueueItem, SmallVector<QueueItem>, 1203 std::greater<QueueItem>> 1204 QueueType; 1205 1206 /// Analyze the entire solution space starting from \p InitialState. 1207 /// 1208 /// This implements a variant of Dijkstra's algorithm on the graph that spans 1209 /// the solution space (\c LineStates are the nodes). The algorithm tries to 1210 /// find the shortest path (the one with lowest penalty) from \p InitialState 1211 /// to a state where all tokens are placed. Returns the penalty. 1212 /// 1213 /// If \p DryRun is \c false, directly applies the changes. 1214 unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) { 1215 std::set<LineState *, CompareLineStatePointers> Seen; 1216 1217 // Increasing count of \c StateNode items we have created. This is used to 1218 // create a deterministic order independent of the container. 1219 unsigned Count = 0; 1220 QueueType Queue; 1221 1222 // Insert start element into queue. 1223 StateNode *RootNode = 1224 new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); 1225 Queue.push(QueueItem(OrderedPenalty(0, Count), RootNode)); 1226 ++Count; 1227 1228 unsigned Penalty = 0; 1229 1230 // While not empty, take first element and follow edges. 1231 while (!Queue.empty()) { 1232 // Quit if we still haven't found a solution by now. 1233 if (Count > 25'000'000) 1234 return 0; 1235 1236 Penalty = Queue.top().first.first; 1237 StateNode *Node = Queue.top().second; 1238 if (!Node->State.NextToken) { 1239 LLVM_DEBUG(llvm::dbgs() 1240 << "\n---\nPenalty for line: " << Penalty << "\n"); 1241 break; 1242 } 1243 Queue.pop(); 1244 1245 // Cut off the analysis of certain solutions if the analysis gets too 1246 // complex. See description of IgnoreStackForComparison. 1247 if (Count > 50'000) 1248 Node->State.IgnoreStackForComparison = true; 1249 1250 if (!Seen.insert(&Node->State).second) { 1251 // State already examined with lower penalty. 1252 continue; 1253 } 1254 1255 FormatDecision LastFormat = Node->State.NextToken->getDecision(); 1256 if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) 1257 addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); 1258 if (LastFormat == FD_Unformatted || LastFormat == FD_Break) 1259 addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); 1260 } 1261 1262 if (Queue.empty()) { 1263 // We were unable to find a solution, do nothing. 1264 // FIXME: Add diagnostic? 1265 LLVM_DEBUG(llvm::dbgs() << "Could not find a solution.\n"); 1266 return 0; 1267 } 1268 1269 // Reconstruct the solution. 1270 if (!DryRun) 1271 reconstructPath(InitialState, Queue.top().second); 1272 1273 LLVM_DEBUG(llvm::dbgs() 1274 << "Total number of analyzed states: " << Count << "\n"); 1275 LLVM_DEBUG(llvm::dbgs() << "---\n"); 1276 1277 return Penalty; 1278 } 1279 1280 /// Add the following state to the analysis queue \c Queue. 1281 /// 1282 /// Assume the current state is \p PreviousNode and has been reached with a 1283 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 1284 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 1285 bool NewLine, unsigned *Count, QueueType *Queue) { 1286 if (NewLine && !Indenter->canBreak(PreviousNode->State)) 1287 return; 1288 if (!NewLine && Indenter->mustBreak(PreviousNode->State)) 1289 return; 1290 1291 StateNode *Node = new (Allocator.Allocate()) 1292 StateNode(PreviousNode->State, NewLine, PreviousNode); 1293 if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) 1294 return; 1295 1296 Penalty += Indenter->addTokenToState(Node->State, NewLine, true); 1297 1298 Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); 1299 ++(*Count); 1300 } 1301 1302 /// Applies the best formatting by reconstructing the path in the 1303 /// solution space that leads to \c Best. 1304 void reconstructPath(LineState &State, StateNode *Best) { 1305 llvm::SmallVector<StateNode *> Path; 1306 // We do not need a break before the initial token. 1307 while (Best->Previous) { 1308 Path.push_back(Best); 1309 Best = Best->Previous; 1310 } 1311 for (const auto &Node : llvm::reverse(Path)) { 1312 unsigned Penalty = 0; 1313 formatChildren(State, Node->NewLine, /*DryRun=*/false, Penalty); 1314 Penalty += Indenter->addTokenToState(State, Node->NewLine, false); 1315 1316 LLVM_DEBUG({ 1317 printLineState(Node->Previous->State); 1318 if (Node->NewLine) { 1319 llvm::dbgs() << "Penalty for placing " 1320 << Node->Previous->State.NextToken->Tok.getName() 1321 << " on a new line: " << Penalty << "\n"; 1322 } 1323 }); 1324 } 1325 } 1326 1327 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1328 }; 1329 1330 } // anonymous namespace 1331 1332 unsigned UnwrappedLineFormatter::format( 1333 const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun, 1334 int AdditionalIndent, bool FixBadIndentation, unsigned FirstStartColumn, 1335 unsigned NextStartColumn, unsigned LastStartColumn) { 1336 LineJoiner Joiner(Style, Keywords, Lines); 1337 1338 // Try to look up already computed penalty in DryRun-mode. 1339 std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey( 1340 &Lines, AdditionalIndent); 1341 auto CacheIt = PenaltyCache.find(CacheKey); 1342 if (DryRun && CacheIt != PenaltyCache.end()) 1343 return CacheIt->second; 1344 1345 assert(!Lines.empty()); 1346 unsigned Penalty = 0; 1347 LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level, 1348 AdditionalIndent); 1349 const AnnotatedLine *PrevPrevLine = nullptr; 1350 const AnnotatedLine *PreviousLine = nullptr; 1351 const AnnotatedLine *NextLine = nullptr; 1352 1353 // The minimum level of consecutive lines that have been formatted. 1354 unsigned RangeMinLevel = UINT_MAX; 1355 1356 bool FirstLine = true; 1357 for (const AnnotatedLine *Line = 1358 Joiner.getNextMergedLine(DryRun, IndentTracker); 1359 Line; PrevPrevLine = PreviousLine, PreviousLine = Line, Line = NextLine, 1360 FirstLine = false) { 1361 assert(Line->First); 1362 const AnnotatedLine &TheLine = *Line; 1363 unsigned Indent = IndentTracker.getIndent(); 1364 1365 // We continue formatting unchanged lines to adjust their indent, e.g. if a 1366 // scope was added. However, we need to carefully stop doing this when we 1367 // exit the scope of affected lines to prevent indenting the entire 1368 // remaining file if it currently missing a closing brace. 1369 bool PreviousRBrace = 1370 PreviousLine && PreviousLine->startsWith(tok::r_brace); 1371 bool ContinueFormatting = 1372 TheLine.Level > RangeMinLevel || 1373 (TheLine.Level == RangeMinLevel && !PreviousRBrace && 1374 !TheLine.startsWith(tok::r_brace)); 1375 1376 bool FixIndentation = (FixBadIndentation || ContinueFormatting) && 1377 Indent != TheLine.First->OriginalColumn; 1378 bool ShouldFormat = TheLine.Affected || FixIndentation; 1379 // We cannot format this line; if the reason is that the line had a 1380 // parsing error, remember that. 1381 if (ShouldFormat && TheLine.Type == LT_Invalid && Status) { 1382 Status->FormatComplete = false; 1383 Status->Line = 1384 SourceMgr.getSpellingLineNumber(TheLine.First->Tok.getLocation()); 1385 } 1386 1387 if (ShouldFormat && TheLine.Type != LT_Invalid) { 1388 if (!DryRun) { 1389 bool LastLine = TheLine.First->is(tok::eof); 1390 formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines, Indent, 1391 LastLine ? LastStartColumn : NextStartColumn + Indent); 1392 } 1393 1394 NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); 1395 unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); 1396 bool FitsIntoOneLine = 1397 !TheLine.ContainsMacroCall && 1398 (TheLine.Last->TotalLength + Indent <= ColumnLimit || 1399 (TheLine.Type == LT_ImportStatement && 1400 (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) || 1401 (Style.isCSharp() && 1402 TheLine.InPPDirective)); // don't split #regions in C# 1403 if (Style.ColumnLimit == 0) { 1404 NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) 1405 .formatLine(TheLine, NextStartColumn + Indent, 1406 FirstLine ? FirstStartColumn : 0, DryRun); 1407 } else if (FitsIntoOneLine) { 1408 Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this) 1409 .formatLine(TheLine, NextStartColumn + Indent, 1410 FirstLine ? FirstStartColumn : 0, DryRun); 1411 } else { 1412 Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this) 1413 .formatLine(TheLine, NextStartColumn + Indent, 1414 FirstLine ? FirstStartColumn : 0, DryRun); 1415 } 1416 RangeMinLevel = std::min(RangeMinLevel, TheLine.Level); 1417 } else { 1418 // If no token in the current line is affected, we still need to format 1419 // affected children. 1420 if (TheLine.ChildrenAffected) { 1421 for (const FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) 1422 if (!Tok->Children.empty()) 1423 format(Tok->Children, DryRun); 1424 } 1425 1426 // Adapt following lines on the current indent level to the same level 1427 // unless the current \c AnnotatedLine is not at the beginning of a line. 1428 bool StartsNewLine = 1429 TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst; 1430 if (StartsNewLine) 1431 IndentTracker.adjustToUnmodifiedLine(TheLine); 1432 if (!DryRun) { 1433 bool ReformatLeadingWhitespace = 1434 StartsNewLine && ((PreviousLine && PreviousLine->Affected) || 1435 TheLine.LeadingEmptyLinesAffected); 1436 // Format the first token. 1437 if (ReformatLeadingWhitespace) { 1438 formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines, 1439 TheLine.First->OriginalColumn, 1440 TheLine.First->OriginalColumn); 1441 } else { 1442 Whitespaces->addUntouchableToken(*TheLine.First, 1443 TheLine.InPPDirective); 1444 } 1445 1446 // Notify the WhitespaceManager about the unchanged whitespace. 1447 for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next) 1448 Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); 1449 } 1450 NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); 1451 RangeMinLevel = UINT_MAX; 1452 } 1453 if (!DryRun) 1454 markFinalized(TheLine.First); 1455 } 1456 PenaltyCache[CacheKey] = Penalty; 1457 return Penalty; 1458 } 1459 1460 static auto computeNewlines(const AnnotatedLine &Line, 1461 const AnnotatedLine *PreviousLine, 1462 const AnnotatedLine *PrevPrevLine, 1463 const SmallVectorImpl<AnnotatedLine *> &Lines, 1464 const FormatStyle &Style) { 1465 const auto &RootToken = *Line.First; 1466 auto Newlines = 1467 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 1468 // Remove empty lines before "}" where applicable. 1469 if (RootToken.is(tok::r_brace) && 1470 (!RootToken.Next || 1471 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)) && 1472 // Do not remove empty lines before namespace closing "}". 1473 !getNamespaceToken(&Line, Lines)) { 1474 Newlines = std::min(Newlines, 1u); 1475 } 1476 // Remove empty lines at the start of nested blocks (lambdas/arrow functions) 1477 if (!PreviousLine && Line.Level > 0) 1478 Newlines = std::min(Newlines, 1u); 1479 if (Newlines == 0 && !RootToken.IsFirst) 1480 Newlines = 1; 1481 if (RootToken.IsFirst && 1482 (!Style.KeepEmptyLines.AtStartOfFile || !RootToken.HasUnescapedNewline)) { 1483 Newlines = 0; 1484 } 1485 1486 // Remove empty lines after "{". 1487 if (!Style.KeepEmptyLines.AtStartOfBlock && PreviousLine && 1488 PreviousLine->Last->is(tok::l_brace) && 1489 !PreviousLine->startsWithNamespace() && 1490 !(PrevPrevLine && PrevPrevLine->startsWithNamespace() && 1491 PreviousLine->startsWith(tok::l_brace)) && 1492 !startsExternCBlock(*PreviousLine)) { 1493 Newlines = 1; 1494 } 1495 1496 // Insert or remove empty line before access specifiers. 1497 if (PreviousLine && RootToken.isAccessSpecifier()) { 1498 switch (Style.EmptyLineBeforeAccessModifier) { 1499 case FormatStyle::ELBAMS_Never: 1500 if (Newlines > 1) 1501 Newlines = 1; 1502 break; 1503 case FormatStyle::ELBAMS_Leave: 1504 Newlines = std::max(RootToken.NewlinesBefore, 1u); 1505 break; 1506 case FormatStyle::ELBAMS_LogicalBlock: 1507 if (PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && Newlines <= 1) 1508 Newlines = 2; 1509 if (PreviousLine->First->isAccessSpecifier()) 1510 Newlines = 1; // Previous is an access modifier remove all new lines. 1511 break; 1512 case FormatStyle::ELBAMS_Always: { 1513 const FormatToken *previousToken; 1514 if (PreviousLine->Last->is(tok::comment)) 1515 previousToken = PreviousLine->Last->getPreviousNonComment(); 1516 else 1517 previousToken = PreviousLine->Last; 1518 if ((!previousToken || previousToken->isNot(tok::l_brace)) && 1519 Newlines <= 1) { 1520 Newlines = 2; 1521 } 1522 } break; 1523 } 1524 } 1525 1526 // Insert or remove empty line after access specifiers. 1527 if (PreviousLine && PreviousLine->First->isAccessSpecifier() && 1528 (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) { 1529 // EmptyLineBeforeAccessModifier is handling the case when two access 1530 // modifiers follow each other. 1531 if (!RootToken.isAccessSpecifier()) { 1532 switch (Style.EmptyLineAfterAccessModifier) { 1533 case FormatStyle::ELAAMS_Never: 1534 Newlines = 1; 1535 break; 1536 case FormatStyle::ELAAMS_Leave: 1537 Newlines = std::max(Newlines, 1u); 1538 break; 1539 case FormatStyle::ELAAMS_Always: 1540 if (RootToken.is(tok::r_brace)) // Do not add at end of class. 1541 Newlines = 1u; 1542 else 1543 Newlines = std::max(Newlines, 2u); 1544 break; 1545 } 1546 } 1547 } 1548 1549 return Newlines; 1550 } 1551 1552 void UnwrappedLineFormatter::formatFirstToken( 1553 const AnnotatedLine &Line, const AnnotatedLine *PreviousLine, 1554 const AnnotatedLine *PrevPrevLine, 1555 const SmallVectorImpl<AnnotatedLine *> &Lines, unsigned Indent, 1556 unsigned NewlineIndent) { 1557 FormatToken &RootToken = *Line.First; 1558 if (RootToken.is(tok::eof)) { 1559 unsigned Newlines = std::min( 1560 RootToken.NewlinesBefore, 1561 Style.KeepEmptyLines.AtEndOfFile ? Style.MaxEmptyLinesToKeep + 1 : 1); 1562 unsigned TokenIndent = Newlines ? NewlineIndent : 0; 1563 Whitespaces->replaceWhitespace(RootToken, Newlines, TokenIndent, 1564 TokenIndent); 1565 return; 1566 } 1567 1568 if (RootToken.Newlines < 0) { 1569 RootToken.Newlines = 1570 computeNewlines(Line, PreviousLine, PrevPrevLine, Lines, Style); 1571 assert(RootToken.Newlines >= 0); 1572 } 1573 1574 if (RootToken.Newlines > 0) 1575 Indent = NewlineIndent; 1576 1577 // Preprocessor directives get indented before the hash only if specified. In 1578 // Javascript import statements are indented like normal statements. 1579 if (!Style.isJavaScript() && 1580 Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && 1581 (Line.Type == LT_PreprocessorDirective || 1582 Line.Type == LT_ImportStatement)) { 1583 Indent = 0; 1584 } 1585 1586 Whitespaces->replaceWhitespace(RootToken, RootToken.Newlines, Indent, Indent, 1587 /*IsAligned=*/false, 1588 Line.InPPDirective && 1589 !RootToken.HasUnescapedNewline); 1590 } 1591 1592 unsigned 1593 UnwrappedLineFormatter::getColumnLimit(bool InPPDirective, 1594 const AnnotatedLine *NextLine) const { 1595 // In preprocessor directives reserve two chars for trailing " \" if the 1596 // next line continues the preprocessor directive. 1597 bool ContinuesPPDirective = 1598 InPPDirective && 1599 // If there is no next line, this is likely a child line and the parent 1600 // continues the preprocessor directive. 1601 (!NextLine || 1602 (NextLine->InPPDirective && 1603 // If there is an unescaped newline between this line and the next, the 1604 // next line starts a new preprocessor directive. 1605 !NextLine->First->HasUnescapedNewline)); 1606 return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0); 1607 } 1608 1609 } // namespace format 1610 } // namespace clang 1611