1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Contains implementation of BreakableToken class and classes derived 11 /// from it. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "BreakableToken.h" 16 #include "ContinuationIndenter.h" 17 #include "clang/Basic/CharInfo.h" 18 #include "clang/Format/Format.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/Support/Debug.h" 21 #include <algorithm> 22 23 #define DEBUG_TYPE "format-token-breaker" 24 25 namespace clang { 26 namespace format { 27 28 static constexpr StringRef Blanks = " \t\v\f\r"; 29 30 static StringRef getLineCommentIndentPrefix(StringRef Comment, 31 const FormatStyle &Style) { 32 static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///", 33 "//!", "//:", "//"}; 34 static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##", 35 "//", "#"}; 36 ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes); 37 if (Style.isTextProto()) 38 KnownPrefixes = KnownTextProtoPrefixes; 39 40 assert( 41 llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept { 42 return Lhs.size() > Rhs.size(); 43 })); 44 45 for (StringRef KnownPrefix : KnownPrefixes) { 46 if (Comment.starts_with(KnownPrefix)) { 47 const auto PrefixLength = 48 Comment.find_first_not_of(' ', KnownPrefix.size()); 49 return Comment.substr(0, PrefixLength); 50 } 51 } 52 return {}; 53 } 54 55 static BreakableToken::Split 56 getCommentSplit(StringRef Text, unsigned ContentStartColumn, 57 unsigned ColumnLimit, unsigned TabWidth, 58 encoding::Encoding Encoding, const FormatStyle &Style, 59 bool DecorationEndsWithStar = false) { 60 LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text 61 << "\", Column limit: " << ColumnLimit 62 << ", Content start: " << ContentStartColumn << "\n"); 63 if (ColumnLimit <= ContentStartColumn + 1) 64 return BreakableToken::Split(StringRef::npos, 0); 65 66 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 67 unsigned MaxSplitBytes = 0; 68 69 for (unsigned NumChars = 0; 70 NumChars < MaxSplit && MaxSplitBytes < Text.size();) { 71 unsigned BytesInChar = 72 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 73 NumChars += encoding::columnWidthWithTabs( 74 Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars, 75 TabWidth, Encoding); 76 MaxSplitBytes += BytesInChar; 77 } 78 79 // In JavaScript, some @tags can be followed by {, and machinery that parses 80 // these comments will fail to understand the comment if followed by a line 81 // break. So avoid ever breaking before a {. 82 if (Style.isJavaScript()) { 83 StringRef::size_type SpaceOffset = 84 Text.find_first_of(Blanks, MaxSplitBytes); 85 if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() && 86 Text[SpaceOffset + 1] == '{') { 87 MaxSplitBytes = SpaceOffset + 1; 88 } 89 } 90 91 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 92 93 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\."); 94 // Some spaces are unacceptable to break on, rewind past them. 95 while (SpaceOffset != StringRef::npos) { 96 // If a line-comment ends with `\`, the next line continues the comment, 97 // whether or not it starts with `//`. This is confusing and triggers 98 // -Wcomment. 99 // Avoid introducing multiline comments by not allowing a break right 100 // after '\'. 101 if (Style.isCpp()) { 102 StringRef::size_type LastNonBlank = 103 Text.find_last_not_of(Blanks, SpaceOffset); 104 if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') { 105 SpaceOffset = Text.find_last_of(Blanks, LastNonBlank); 106 continue; 107 } 108 } 109 110 // Do not split before a number followed by a dot: this would be interpreted 111 // as a numbered list, which would prevent re-flowing in subsequent passes. 112 if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) { 113 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); 114 continue; 115 } 116 117 // Avoid ever breaking before a @tag or a { in JavaScript. 118 if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() && 119 (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) { 120 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); 121 continue; 122 } 123 124 break; 125 } 126 127 if (SpaceOffset == StringRef::npos || 128 // Don't break at leading whitespace. 129 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 130 // Make sure that we don't break at leading whitespace that 131 // reaches past MaxSplit. 132 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 133 if (FirstNonWhitespace == StringRef::npos) { 134 // If the comment is only whitespace, we cannot split. 135 return BreakableToken::Split(StringRef::npos, 0); 136 } 137 SpaceOffset = Text.find_first_of( 138 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 139 } 140 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 141 // adaptStartOfLine will break after lines starting with /** if the comment 142 // is broken anywhere. Avoid emitting this break twice here. 143 // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will 144 // insert a break after /**, so this code must not insert the same break. 145 if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*') 146 return BreakableToken::Split(StringRef::npos, 0); 147 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 148 StringRef AfterCut = Text.substr(SpaceOffset); 149 if (!DecorationEndsWithStar) 150 AfterCut = AfterCut.ltrim(Blanks); 151 return BreakableToken::Split(BeforeCut.size(), 152 AfterCut.begin() - BeforeCut.end()); 153 } 154 return BreakableToken::Split(StringRef::npos, 0); 155 } 156 157 static BreakableToken::Split 158 getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, 159 unsigned TabWidth, encoding::Encoding Encoding) { 160 // FIXME: Reduce unit test case. 161 if (Text.empty()) 162 return BreakableToken::Split(StringRef::npos, 0); 163 if (ColumnLimit <= UsedColumns) 164 return BreakableToken::Split(StringRef::npos, 0); 165 unsigned MaxSplit = ColumnLimit - UsedColumns; 166 StringRef::size_type SpaceOffset = 0; 167 StringRef::size_type SlashOffset = 0; 168 StringRef::size_type WordStartOffset = 0; 169 StringRef::size_type SplitPoint = 0; 170 for (unsigned Chars = 0;;) { 171 unsigned Advance; 172 if (Text[0] == '\\') { 173 Advance = encoding::getEscapeSequenceLength(Text); 174 Chars += Advance; 175 } else { 176 Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 177 Chars += encoding::columnWidthWithTabs( 178 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); 179 } 180 181 if (Chars > MaxSplit || Text.size() <= Advance) 182 break; 183 184 if (Blanks.contains(Text[0])) 185 SpaceOffset = SplitPoint; 186 if (Text[0] == '/') 187 SlashOffset = SplitPoint; 188 if (Advance == 1 && !isAlphanumeric(Text[0])) 189 WordStartOffset = SplitPoint; 190 191 SplitPoint += Advance; 192 Text = Text.substr(Advance); 193 } 194 195 if (SpaceOffset != 0) 196 return BreakableToken::Split(SpaceOffset + 1, 0); 197 if (SlashOffset != 0) 198 return BreakableToken::Split(SlashOffset + 1, 0); 199 if (WordStartOffset != 0) 200 return BreakableToken::Split(WordStartOffset + 1, 0); 201 if (SplitPoint != 0) 202 return BreakableToken::Split(SplitPoint, 0); 203 return BreakableToken::Split(StringRef::npos, 0); 204 } 205 206 bool switchesFormatting(const FormatToken &Token) { 207 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) && 208 "formatting regions are switched by comment tokens"); 209 StringRef Content = Token.TokenText.substr(2).ltrim(); 210 return Content.starts_with("clang-format on") || 211 Content.starts_with("clang-format off"); 212 } 213 214 unsigned 215 BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns, 216 Split Split) const { 217 // Example: consider the content 218 // lala lala 219 // - RemainingTokenColumns is the original number of columns, 10; 220 // - Split is (4, 2), denoting the two spaces between the two words; 221 // 222 // We compute the number of columns when the split is compressed into a single 223 // space, like: 224 // lala lala 225 // 226 // FIXME: Correctly measure the length of whitespace in Split.second so it 227 // works with tabs. 228 return RemainingTokenColumns + 1 - Split.second; 229 } 230 231 unsigned BreakableStringLiteral::getLineCount() const { return 1; } 232 233 unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex, 234 unsigned Offset, 235 StringRef::size_type Length, 236 unsigned StartColumn) const { 237 llvm_unreachable("Getting the length of a part of the string literal " 238 "indicates that the code tries to reflow it."); 239 } 240 241 unsigned 242 BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset, 243 unsigned StartColumn) const { 244 return UnbreakableTailLength + Postfix.size() + 245 encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn, 246 Style.TabWidth, Encoding); 247 } 248 249 unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex, 250 bool Break) const { 251 return StartColumn + Prefix.size(); 252 } 253 254 BreakableStringLiteral::BreakableStringLiteral( 255 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, 256 StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, 257 encoding::Encoding Encoding, const FormatStyle &Style) 258 : BreakableToken(Tok, InPPDirective, Encoding, Style), 259 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix), 260 UnbreakableTailLength(UnbreakableTailLength) { 261 assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix)); 262 Line = Tok.TokenText.substr( 263 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 264 } 265 266 BreakableToken::Split BreakableStringLiteral::getSplit( 267 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, 268 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const { 269 return getStringSplit(Line.substr(TailOffset), ContentStartColumn, 270 ColumnLimit - Postfix.size(), Style.TabWidth, Encoding); 271 } 272 273 void BreakableStringLiteral::insertBreak(unsigned LineIndex, 274 unsigned TailOffset, Split Split, 275 unsigned ContentIndent, 276 WhitespaceManager &Whitespaces) const { 277 Whitespaces.replaceWhitespaceInToken( 278 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 279 Prefix, InPPDirective, 1, StartColumn); 280 } 281 282 BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators( 283 const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, 284 unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, 285 encoding::Encoding Encoding, const FormatStyle &Style) 286 : BreakableStringLiteral( 287 Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'" 288 : QuoteStyle == AtDoubleQuotes ? "@\"" 289 : "\"", 290 /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"", 291 UnbreakableTailLength, InPPDirective, Encoding, Style), 292 BracesNeeded(Tok.isNot(TT_StringInConcatenation)), 293 QuoteStyle(QuoteStyle) { 294 // Find the replacement text for inserting braces and quotes and line breaks. 295 // We don't create an allocated string concatenated from parts here because it 296 // has to outlive the BreakableStringliteral object. The brace replacements 297 // include a quote so that WhitespaceManager can tell it apart from whitespace 298 // replacements between the string and surrounding tokens. 299 300 // The option is not implemented in JavaScript. 301 bool SignOnNewLine = 302 !Style.isJavaScript() && 303 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; 304 305 if (Style.isVerilog()) { 306 // In Verilog, all strings are quoted by double quotes, joined by commas, 307 // and wrapped in braces. The comma is always before the newline. 308 assert(QuoteStyle == DoubleQuotes); 309 LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \""; 310 RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }"; 311 Postfix = "\","; 312 Prefix = "\""; 313 } else { 314 // The plus sign may be on either line. And also C# and JavaScript have 315 // several quoting styles. 316 if (QuoteStyle == SingleQuotes) { 317 LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('"; 318 RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')"; 319 Postfix = SignOnNewLine ? "'" : "' +"; 320 Prefix = SignOnNewLine ? "+ '" : "'"; 321 } else { 322 if (QuoteStyle == AtDoubleQuotes) { 323 LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@"; 324 Prefix = SignOnNewLine ? "+ @\"" : "@\""; 325 } else { 326 LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\""; 327 Prefix = SignOnNewLine ? "+ \"" : "\""; 328 } 329 RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")"; 330 Postfix = SignOnNewLine ? "\"" : "\" +"; 331 } 332 } 333 334 // Following lines are indented by the width of the brace and space if any. 335 ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0; 336 // The plus sign may need to be unindented depending on the style. 337 // FIXME: Add support for DontAlign. 338 if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus && 339 Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) { 340 ContinuationIndent -= 2; 341 } 342 } 343 344 unsigned BreakableStringLiteralUsingOperators::getRemainingLength( 345 unsigned LineIndex, unsigned Offset, unsigned StartColumn) const { 346 return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) + 347 encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn, 348 Style.TabWidth, Encoding); 349 } 350 351 unsigned 352 BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex, 353 bool Break) const { 354 return std::max( 355 0, 356 static_cast<int>(StartColumn) + 357 (Break ? ContinuationIndent + static_cast<int>(Prefix.size()) 358 : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1 359 : 0) + 360 (QuoteStyle == AtDoubleQuotes ? 2 : 1))); 361 } 362 363 void BreakableStringLiteralUsingOperators::insertBreak( 364 unsigned LineIndex, unsigned TailOffset, Split Split, 365 unsigned ContentIndent, WhitespaceManager &Whitespaces) const { 366 Whitespaces.replaceWhitespaceInToken( 367 Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset + 368 Split.first, 369 /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix, 370 /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1, 371 /*Spaces=*/ 372 std::max(0, static_cast<int>(StartColumn) + ContinuationIndent)); 373 } 374 375 void BreakableStringLiteralUsingOperators::updateAfterBroken( 376 WhitespaceManager &Whitespaces) const { 377 // Add the braces required for breaking the token if they are needed. 378 if (!BracesNeeded) 379 return; 380 381 // To add a brace or parenthesis, we replace the quote (or the at sign) with a 382 // brace and another quote. This is because the rest of the program requires 383 // one replacement for each source range. If we replace the empty strings 384 // around the string, it may conflict with whitespace replacements between the 385 // string and adjacent tokens. 386 Whitespaces.replaceWhitespaceInToken( 387 Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"", 388 /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0, 389 /*Spaces=*/0); 390 Whitespaces.replaceWhitespaceInToken( 391 Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1, 392 /*PreviousPostfix=*/RightBraceQuote, 393 /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0); 394 } 395 396 BreakableComment::BreakableComment(const FormatToken &Token, 397 unsigned StartColumn, bool InPPDirective, 398 encoding::Encoding Encoding, 399 const FormatStyle &Style) 400 : BreakableToken(Token, InPPDirective, Encoding, Style), 401 StartColumn(StartColumn) {} 402 403 unsigned BreakableComment::getLineCount() const { return Lines.size(); } 404 405 BreakableToken::Split 406 BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset, 407 unsigned ColumnLimit, unsigned ContentStartColumn, 408 const llvm::Regex &CommentPragmasRegex) const { 409 // Don't break lines matching the comment pragmas regex. 410 if (!AlwaysReflow || CommentPragmasRegex.match(Content[LineIndex])) 411 return Split(StringRef::npos, 0); 412 return getCommentSplit(Content[LineIndex].substr(TailOffset), 413 ContentStartColumn, ColumnLimit, Style.TabWidth, 414 Encoding, Style); 415 } 416 417 void BreakableComment::compressWhitespace( 418 unsigned LineIndex, unsigned TailOffset, Split Split, 419 WhitespaceManager &Whitespaces) const { 420 StringRef Text = Content[LineIndex].substr(TailOffset); 421 // Text is relative to the content line, but Whitespaces operates relative to 422 // the start of the corresponding token, so compute the start of the Split 423 // that needs to be compressed into a single space relative to the start of 424 // its token. 425 unsigned BreakOffsetInToken = 426 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 427 unsigned CharsToRemove = Split.second; 428 Whitespaces.replaceWhitespaceInToken( 429 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "", 430 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); 431 } 432 433 const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const { 434 return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok; 435 } 436 437 static bool mayReflowContent(StringRef Content) { 438 Content = Content.trim(Blanks); 439 // Lines starting with '@' or '\' commonly have special meaning. 440 // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. 441 bool hasSpecialMeaningPrefix = false; 442 for (StringRef Prefix : 443 {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) { 444 if (Content.starts_with(Prefix)) { 445 hasSpecialMeaningPrefix = true; 446 break; 447 } 448 } 449 450 // Numbered lists may also start with a number followed by '.' 451 // To avoid issues if a line starts with a number which is actually the end 452 // of a previous line, we only consider numbers with up to 2 digits. 453 static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. "); 454 hasSpecialMeaningPrefix = 455 hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content); 456 457 // Simple heuristic for what to reflow: content should contain at least two 458 // characters and either the first or second character must be 459 // non-punctuation. 460 return Content.size() >= 2 && !hasSpecialMeaningPrefix && 461 !Content.ends_with("\\") && 462 // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is 463 // true, then the first code point must be 1 byte long. 464 (!isPunctuation(Content[0]) || !isPunctuation(Content[1])); 465 } 466 467 BreakableBlockComment::BreakableBlockComment( 468 const FormatToken &Token, unsigned StartColumn, 469 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 470 encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF) 471 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style), 472 DelimitersOnNewline(false), 473 UnbreakableTailLength(Token.UnbreakableTailLength) { 474 assert(Tok.is(TT_BlockComment) && 475 "block comment section must start with a block comment"); 476 477 StringRef TokenText(Tok.TokenText); 478 assert(TokenText.starts_with("/*") && TokenText.ends_with("*/")); 479 TokenText.substr(2, TokenText.size() - 4) 480 .split(Lines, UseCRLF ? "\r\n" : "\n"); 481 482 int IndentDelta = StartColumn - OriginalStartColumn; 483 Content.resize(Lines.size()); 484 Content[0] = Lines[0]; 485 ContentColumn.resize(Lines.size()); 486 // Account for the initial '/*'. 487 ContentColumn[0] = StartColumn + 2; 488 Tokens.resize(Lines.size()); 489 for (size_t i = 1; i < Lines.size(); ++i) 490 adjustWhitespace(i, IndentDelta); 491 492 // Align decorations with the column of the star on the first line, 493 // that is one column after the start "/*". 494 DecorationColumn = StartColumn + 1; 495 496 // Account for comment decoration patterns like this: 497 // 498 // /* 499 // ** blah blah blah 500 // */ 501 if (Lines.size() >= 2 && Content[1].starts_with("**") && 502 static_cast<unsigned>(ContentColumn[1]) == StartColumn) { 503 DecorationColumn = StartColumn; 504 } 505 506 Decoration = "* "; 507 if (Lines.size() == 1 && !FirstInLine) { 508 // Comments for which FirstInLine is false can start on arbitrary column, 509 // and available horizontal space can be too small to align consecutive 510 // lines with the first one. 511 // FIXME: We could, probably, align them to current indentation level, but 512 // now we just wrap them without stars. 513 Decoration = ""; 514 } 515 for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) { 516 const StringRef &Text = Content[i]; 517 if (i + 1 == e) { 518 // If the last line is empty, the closing "*/" will have a star. 519 if (Text.empty()) 520 break; 521 } else if (!Text.empty() && Decoration.starts_with(Text)) { 522 continue; 523 } 524 while (!Text.starts_with(Decoration)) 525 Decoration = Decoration.drop_back(1); 526 } 527 528 LastLineNeedsDecoration = true; 529 IndentAtLineBreak = ContentColumn[0] + 1; 530 for (size_t i = 1, e = Lines.size(); i < e; ++i) { 531 if (Content[i].empty()) { 532 if (i + 1 == e) { 533 // Empty last line means that we already have a star as a part of the 534 // trailing */. We also need to preserve whitespace, so that */ is 535 // correctly indented. 536 LastLineNeedsDecoration = false; 537 // Align the star in the last '*/' with the stars on the previous lines. 538 if (e >= 2 && !Decoration.empty()) 539 ContentColumn[i] = DecorationColumn; 540 } else if (Decoration.empty()) { 541 // For all other lines, set the start column to 0 if they're empty, so 542 // we do not insert trailing whitespace anywhere. 543 ContentColumn[i] = 0; 544 } 545 continue; 546 } 547 548 // The first line already excludes the star. 549 // The last line excludes the star if LastLineNeedsDecoration is false. 550 // For all other lines, adjust the line to exclude the star and 551 // (optionally) the first whitespace. 552 unsigned DecorationSize = Decoration.starts_with(Content[i]) 553 ? Content[i].size() 554 : Decoration.size(); 555 if (DecorationSize) 556 ContentColumn[i] = DecorationColumn + DecorationSize; 557 Content[i] = Content[i].substr(DecorationSize); 558 if (!Decoration.starts_with(Content[i])) { 559 IndentAtLineBreak = 560 std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i])); 561 } 562 } 563 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 564 565 // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case. 566 if (Style.isJavaScript() || Style.isJava()) { 567 if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) { 568 // This is a multiline jsdoc comment. 569 DelimitersOnNewline = true; 570 } else if (Lines[0].starts_with("* ") && Lines.size() == 1) { 571 // Detect a long single-line comment, like: 572 // /** long long long */ 573 // Below, '2' is the width of '*/'. 574 unsigned EndColumn = 575 ContentColumn[0] + 576 encoding::columnWidthWithTabs(Lines[0], ContentColumn[0], 577 Style.TabWidth, Encoding) + 578 2; 579 DelimitersOnNewline = EndColumn > Style.ColumnLimit; 580 } 581 } 582 583 LLVM_DEBUG({ 584 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; 585 llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n"; 586 for (size_t i = 0; i < Lines.size(); ++i) { 587 llvm::dbgs() << i << " |" << Content[i] << "| " 588 << "CC=" << ContentColumn[i] << "| " 589 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n"; 590 } 591 }); 592 } 593 594 BreakableToken::Split BreakableBlockComment::getSplit( 595 unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, 596 unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const { 597 // Don't break lines matching the comment pragmas regex. 598 if (!AlwaysReflow || CommentPragmasRegex.match(Content[LineIndex])) 599 return Split(StringRef::npos, 0); 600 return getCommentSplit(Content[LineIndex].substr(TailOffset), 601 ContentStartColumn, ColumnLimit, Style.TabWidth, 602 Encoding, Style, Decoration.ends_with("*")); 603 } 604 605 void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, 606 int IndentDelta) { 607 // When in a preprocessor directive, the trailing backslash in a block comment 608 // is not needed, but can serve a purpose of uniformity with necessary escaped 609 // newlines outside the comment. In this case we remove it here before 610 // trimming the trailing whitespace. The backslash will be re-added later when 611 // inserting a line break. 612 size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 613 if (InPPDirective && Lines[LineIndex - 1].ends_with("\\")) 614 --EndOfPreviousLine; 615 616 // Calculate the end of the non-whitespace text in the previous line. 617 EndOfPreviousLine = 618 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 619 if (EndOfPreviousLine == StringRef::npos) 620 EndOfPreviousLine = 0; 621 else 622 ++EndOfPreviousLine; 623 // Calculate the start of the non-whitespace text in the current line. 624 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 625 if (StartOfLine == StringRef::npos) 626 StartOfLine = Lines[LineIndex].size(); 627 628 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); 629 // Adjust Lines to only contain relevant text. 630 size_t PreviousContentOffset = 631 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data(); 632 Content[LineIndex - 1] = Lines[LineIndex - 1].substr( 633 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset); 634 Content[LineIndex] = Lines[LineIndex].substr(StartOfLine); 635 636 // Adjust the start column uniformly across all lines. 637 ContentColumn[LineIndex] = 638 encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + 639 IndentDelta; 640 } 641 642 unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex, 643 unsigned Offset, 644 StringRef::size_type Length, 645 unsigned StartColumn) const { 646 return encoding::columnWidthWithTabs( 647 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth, 648 Encoding); 649 } 650 651 unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex, 652 unsigned Offset, 653 unsigned StartColumn) const { 654 unsigned LineLength = 655 UnbreakableTailLength + 656 getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn); 657 if (LineIndex + 1 == Lines.size()) { 658 LineLength += 2; 659 // We never need a decoration when breaking just the trailing "*/" postfix. 660 bool HasRemainingText = Offset < Content[LineIndex].size(); 661 if (!HasRemainingText) { 662 bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration); 663 if (HasDecoration) 664 LineLength -= Decoration.size(); 665 } 666 } 667 return LineLength; 668 } 669 670 unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 671 bool Break) const { 672 if (Break) 673 return IndentAtLineBreak; 674 return std::max(0, ContentColumn[LineIndex]); 675 } 676 677 const llvm::StringSet<> 678 BreakableBlockComment::ContentIndentingJavadocAnnotations = { 679 "@param", "@return", "@returns", "@throws", "@type", "@template", 680 "@see", "@deprecated", "@define", "@exports", "@mods", "@private", 681 }; 682 683 unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const { 684 if (!Style.isJava() && !Style.isJavaScript()) 685 return 0; 686 // The content at LineIndex 0 of a comment like: 687 // /** line 0 */ 688 // is "* line 0", so we need to skip over the decoration in that case. 689 StringRef ContentWithNoDecoration = Content[LineIndex]; 690 if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*")) 691 ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks); 692 StringRef FirstWord = ContentWithNoDecoration.substr( 693 0, ContentWithNoDecoration.find_first_of(Blanks)); 694 if (ContentIndentingJavadocAnnotations.contains(FirstWord)) 695 return Style.ContinuationIndentWidth; 696 return 0; 697 } 698 699 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 700 Split Split, unsigned ContentIndent, 701 WhitespaceManager &Whitespaces) const { 702 StringRef Text = Content[LineIndex].substr(TailOffset); 703 StringRef Prefix = Decoration; 704 // We need this to account for the case when we have a decoration "* " for all 705 // the lines except for the last one, where the star in "*/" acts as a 706 // decoration. 707 unsigned LocalIndentAtLineBreak = IndentAtLineBreak; 708 if (LineIndex + 1 == Lines.size() && 709 Text.size() == Split.first + Split.second) { 710 // For the last line we need to break before "*/", but not to add "* ". 711 Prefix = ""; 712 if (LocalIndentAtLineBreak >= 2) 713 LocalIndentAtLineBreak -= 2; 714 } 715 // The split offset is from the beginning of the line. Convert it to an offset 716 // from the beginning of the token text. 717 unsigned BreakOffsetInToken = 718 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 719 unsigned CharsToRemove = Split.second; 720 assert(LocalIndentAtLineBreak >= Prefix.size()); 721 std::string PrefixWithTrailingIndent = std::string(Prefix); 722 PrefixWithTrailingIndent.append(ContentIndent, ' '); 723 Whitespaces.replaceWhitespaceInToken( 724 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", 725 PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1, 726 /*Spaces=*/LocalIndentAtLineBreak + ContentIndent - 727 PrefixWithTrailingIndent.size()); 728 } 729 730 BreakableToken::Split BreakableBlockComment::getReflowSplit( 731 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 732 if (!mayReflow(LineIndex, CommentPragmasRegex)) 733 return Split(StringRef::npos, 0); 734 735 // If we're reflowing into a line with content indent, only reflow the next 736 // line if its starting whitespace matches the content indent. 737 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); 738 if (LineIndex) { 739 unsigned PreviousContentIndent = getContentIndent(LineIndex - 1); 740 if (PreviousContentIndent && Trimmed != StringRef::npos && 741 Trimmed != PreviousContentIndent) { 742 return Split(StringRef::npos, 0); 743 } 744 } 745 746 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); 747 } 748 749 bool BreakableBlockComment::introducesBreakBeforeToken() const { 750 // A break is introduced when we want delimiters on newline. 751 return DelimitersOnNewline && 752 Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos; 753 } 754 755 void BreakableBlockComment::reflow(unsigned LineIndex, 756 WhitespaceManager &Whitespaces) const { 757 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); 758 // Here we need to reflow. 759 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && 760 "Reflowing whitespace within a token"); 761 // This is the offset of the end of the last line relative to the start of 762 // the token text in the token. 763 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + 764 Content[LineIndex - 1].size() - 765 tokenAt(LineIndex).TokenText.data(); 766 unsigned WhitespaceLength = TrimmedContent.data() - 767 tokenAt(LineIndex).TokenText.data() - 768 WhitespaceOffsetInToken; 769 Whitespaces.replaceWhitespaceInToken( 770 tokenAt(LineIndex), WhitespaceOffsetInToken, 771 /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", 772 /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0, 773 /*Spaces=*/0); 774 } 775 776 void BreakableBlockComment::adaptStartOfLine( 777 unsigned LineIndex, WhitespaceManager &Whitespaces) const { 778 if (LineIndex == 0) { 779 if (DelimitersOnNewline) { 780 // Since we're breaking at index 1 below, the break position and the 781 // break length are the same. 782 // Note: this works because getCommentSplit is careful never to split at 783 // the beginning of a line. 784 size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks); 785 if (BreakLength != StringRef::npos) { 786 insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0, 787 Whitespaces); 788 } 789 } 790 return; 791 } 792 // Here no reflow with the previous line will happen. 793 // Fix the decoration of the line at LineIndex. 794 StringRef Prefix = Decoration; 795 if (Content[LineIndex].empty()) { 796 if (LineIndex + 1 == Lines.size()) { 797 if (!LastLineNeedsDecoration) { 798 // If the last line was empty, we don't need a prefix, as the */ will 799 // line up with the decoration (if it exists). 800 Prefix = ""; 801 } 802 } else if (!Decoration.empty()) { 803 // For other empty lines, if we do have a decoration, adapt it to not 804 // contain a trailing whitespace. 805 Prefix = Prefix.substr(0, 1); 806 } 807 } else if (ContentColumn[LineIndex] == 1) { 808 // This line starts immediately after the decorating *. 809 Prefix = Prefix.substr(0, 1); 810 } 811 // This is the offset of the end of the last line relative to the start of the 812 // token text in the token. 813 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + 814 Content[LineIndex - 1].size() - 815 tokenAt(LineIndex).TokenText.data(); 816 unsigned WhitespaceLength = Content[LineIndex].data() - 817 tokenAt(LineIndex).TokenText.data() - 818 WhitespaceOffsetInToken; 819 Whitespaces.replaceWhitespaceInToken( 820 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix, 821 InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size()); 822 } 823 824 BreakableToken::Split 825 BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const { 826 if (DelimitersOnNewline) { 827 // Replace the trailing whitespace of the last line with a newline. 828 // In case the last line is empty, the ending '*/' is already on its own 829 // line. 830 StringRef Line = Content.back().substr(TailOffset); 831 StringRef TrimmedLine = Line.rtrim(Blanks); 832 if (!TrimmedLine.empty()) 833 return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size()); 834 } 835 return Split(StringRef::npos, 0); 836 } 837 838 bool BreakableBlockComment::mayReflow( 839 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 840 // Content[LineIndex] may exclude the indent after the '*' decoration. In that 841 // case, we compute the start of the comment pragma manually. 842 StringRef IndentContent = Content[LineIndex]; 843 if (Lines[LineIndex].ltrim(Blanks).starts_with("*")) 844 IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1); 845 return LineIndex > 0 && AlwaysReflow && 846 !CommentPragmasRegex.match(IndentContent) && 847 mayReflowContent(Content[LineIndex]) && !Tok.Finalized && 848 !switchesFormatting(tokenAt(LineIndex)); 849 } 850 851 BreakableLineCommentSection::BreakableLineCommentSection( 852 const FormatToken &Token, unsigned StartColumn, bool InPPDirective, 853 encoding::Encoding Encoding, const FormatStyle &Style) 854 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) { 855 assert(Tok.is(TT_LineComment) && 856 "line comment section must start with a line comment"); 857 FormatToken *LineTok = nullptr; 858 const int Minimum = Style.SpacesInLineCommentPrefix.Minimum; 859 // How many spaces we changed in the first line of the section, this will be 860 // applied in all following lines 861 int FirstLineSpaceChange = 0; 862 for (const FormatToken *CurrentTok = &Tok; 863 CurrentTok && CurrentTok->is(TT_LineComment); 864 CurrentTok = CurrentTok->Next) { 865 LastLineTok = LineTok; 866 StringRef TokenText(CurrentTok->TokenText); 867 assert((TokenText.starts_with("//") || TokenText.starts_with("#")) && 868 "unsupported line comment prefix, '//' and '#' are supported"); 869 size_t FirstLineIndex = Lines.size(); 870 TokenText.split(Lines, "\n"); 871 Content.resize(Lines.size()); 872 ContentColumn.resize(Lines.size()); 873 PrefixSpaceChange.resize(Lines.size()); 874 Tokens.resize(Lines.size()); 875 Prefix.resize(Lines.size()); 876 OriginalPrefix.resize(Lines.size()); 877 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) { 878 Lines[i] = Lines[i].ltrim(Blanks); 879 StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style); 880 OriginalPrefix[i] = IndentPrefix; 881 const int SpacesInPrefix = llvm::count(IndentPrefix, ' '); 882 883 // This lambda also considers multibyte character that is not handled in 884 // functions like isPunctuation provided by CharInfo. 885 const auto NoSpaceBeforeFirstCommentChar = [&]() { 886 assert(Lines[i].size() > IndentPrefix.size()); 887 const char FirstCommentChar = Lines[i][IndentPrefix.size()]; 888 const unsigned FirstCharByteSize = 889 encoding::getCodePointNumBytes(FirstCommentChar, Encoding); 890 if (encoding::columnWidth( 891 Lines[i].substr(IndentPrefix.size(), FirstCharByteSize), 892 Encoding) != 1) { 893 return false; 894 } 895 // In C-like comments, add a space before #. For example this is useful 896 // to preserve the relative indentation when commenting out code with 897 // #includes. 898 // 899 // In languages using # as the comment leader such as proto, don't 900 // add a space to support patterns like: 901 // ######### 902 // # section 903 // ######### 904 if (FirstCommentChar == '#' && !TokenText.starts_with("#")) 905 return false; 906 return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) || 907 isHorizontalWhitespace(FirstCommentChar); 908 }; 909 910 // On the first line of the comment section we calculate how many spaces 911 // are to be added or removed, all lines after that just get only the 912 // change and we will not look at the maximum anymore. Additionally to the 913 // actual first line, we calculate that when the non space Prefix changes, 914 // e.g. from "///" to "//". 915 if (i == 0 || OriginalPrefix[i].rtrim(Blanks) != 916 OriginalPrefix[i - 1].rtrim(Blanks)) { 917 if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() && 918 !NoSpaceBeforeFirstCommentChar()) { 919 FirstLineSpaceChange = Minimum - SpacesInPrefix; 920 } else if (static_cast<unsigned>(SpacesInPrefix) > 921 Style.SpacesInLineCommentPrefix.Maximum) { 922 FirstLineSpaceChange = 923 Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix; 924 } else { 925 FirstLineSpaceChange = 0; 926 } 927 } 928 929 if (Lines[i].size() != IndentPrefix.size()) { 930 assert(Lines[i].size() > IndentPrefix.size()); 931 932 PrefixSpaceChange[i] = SpacesInPrefix + FirstLineSpaceChange < Minimum 933 ? Minimum - SpacesInPrefix 934 : FirstLineSpaceChange; 935 936 const auto FirstNonSpace = Lines[i][IndentPrefix.size()]; 937 const bool IsFormatComment = LineTok && switchesFormatting(*LineTok); 938 const bool LineRequiresLeadingSpace = 939 !NoSpaceBeforeFirstCommentChar() || 940 (FirstNonSpace == '}' && FirstLineSpaceChange != 0); 941 const bool AllowsSpaceChange = 942 !IsFormatComment && 943 (SpacesInPrefix != 0 || LineRequiresLeadingSpace); 944 945 if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) { 946 Prefix[i] = IndentPrefix.str(); 947 Prefix[i].append(PrefixSpaceChange[i], ' '); 948 } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) { 949 Prefix[i] = IndentPrefix 950 .drop_back(std::min<std::size_t>( 951 -PrefixSpaceChange[i], SpacesInPrefix)) 952 .str(); 953 } else { 954 Prefix[i] = IndentPrefix.str(); 955 } 956 } else { 957 // If the IndentPrefix is the whole line, there is no content and we 958 // drop just all space 959 Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str(); 960 } 961 962 Tokens[i] = LineTok; 963 Content[i] = Lines[i].substr(IndentPrefix.size()); 964 ContentColumn[i] = 965 StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn, 966 Style.TabWidth, Encoding); 967 968 // Calculate the end of the non-whitespace text in this line. 969 size_t EndOfLine = Content[i].find_last_not_of(Blanks); 970 if (EndOfLine == StringRef::npos) 971 EndOfLine = Content[i].size(); 972 else 973 ++EndOfLine; 974 Content[i] = Content[i].substr(0, EndOfLine); 975 } 976 LineTok = CurrentTok->Next; 977 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) { 978 // A line comment section needs to broken by a line comment that is 979 // preceded by at least two newlines. Note that we put this break here 980 // instead of breaking at a previous stage during parsing, since that 981 // would split the contents of the enum into two unwrapped lines in this 982 // example, which is undesirable: 983 // enum A { 984 // a, // comment about a 985 // 986 // // comment about b 987 // b 988 // }; 989 // 990 // FIXME: Consider putting separate line comment sections as children to 991 // the unwrapped line instead. 992 break; 993 } 994 } 995 } 996 997 unsigned 998 BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset, 999 StringRef::size_type Length, 1000 unsigned StartColumn) const { 1001 return encoding::columnWidthWithTabs( 1002 Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth, 1003 Encoding); 1004 } 1005 1006 unsigned 1007 BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, 1008 bool /*Break*/) const { 1009 return ContentColumn[LineIndex]; 1010 } 1011 1012 void BreakableLineCommentSection::insertBreak( 1013 unsigned LineIndex, unsigned TailOffset, Split Split, 1014 unsigned ContentIndent, WhitespaceManager &Whitespaces) const { 1015 StringRef Text = Content[LineIndex].substr(TailOffset); 1016 // Compute the offset of the split relative to the beginning of the token 1017 // text. 1018 unsigned BreakOffsetInToken = 1019 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 1020 unsigned CharsToRemove = Split.second; 1021 Whitespaces.replaceWhitespaceInToken( 1022 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", 1023 Prefix[LineIndex], InPPDirective, /*Newlines=*/1, 1024 /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size()); 1025 } 1026 1027 BreakableComment::Split BreakableLineCommentSection::getReflowSplit( 1028 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 1029 if (!mayReflow(LineIndex, CommentPragmasRegex)) 1030 return Split(StringRef::npos, 0); 1031 1032 size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); 1033 1034 // In a line comment section each line is a separate token; thus, after a 1035 // split we replace all whitespace before the current line comment token 1036 // (which does not need to be included in the split), plus the start of the 1037 // line up to where the content starts. 1038 return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); 1039 } 1040 1041 void BreakableLineCommentSection::reflow(unsigned LineIndex, 1042 WhitespaceManager &Whitespaces) const { 1043 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { 1044 // Reflow happens between tokens. Replace the whitespace between the 1045 // tokens by the empty string. 1046 Whitespaces.replaceWhitespace( 1047 *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, 1048 /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true, 1049 /*InPPDirective=*/false); 1050 } else if (LineIndex > 0) { 1051 // In case we're reflowing after the '\' in: 1052 // 1053 // // line comment \ 1054 // // line 2 1055 // 1056 // the reflow happens inside the single comment token (it is a single line 1057 // comment with an unescaped newline). 1058 // Replace the whitespace between the '\' and '//' with the empty string. 1059 // 1060 // Offset points to after the '\' relative to start of the token. 1061 unsigned Offset = Lines[LineIndex - 1].data() + 1062 Lines[LineIndex - 1].size() - 1063 tokenAt(LineIndex - 1).TokenText.data(); 1064 // WhitespaceLength is the number of chars between the '\' and the '//' on 1065 // the next line. 1066 unsigned WhitespaceLength = 1067 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset; 1068 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset, 1069 /*ReplaceChars=*/WhitespaceLength, 1070 /*PreviousPostfix=*/"", 1071 /*CurrentPrefix=*/"", 1072 /*InPPDirective=*/false, 1073 /*Newlines=*/0, 1074 /*Spaces=*/0); 1075 } 1076 // Replace the indent and prefix of the token with the reflow prefix. 1077 unsigned Offset = 1078 Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); 1079 unsigned WhitespaceLength = 1080 Content[LineIndex].data() - Lines[LineIndex].data(); 1081 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset, 1082 /*ReplaceChars=*/WhitespaceLength, 1083 /*PreviousPostfix=*/"", 1084 /*CurrentPrefix=*/ReflowPrefix, 1085 /*InPPDirective=*/false, 1086 /*Newlines=*/0, 1087 /*Spaces=*/0); 1088 } 1089 1090 void BreakableLineCommentSection::adaptStartOfLine( 1091 unsigned LineIndex, WhitespaceManager &Whitespaces) const { 1092 // If this is the first line of a token, we need to inform Whitespace Manager 1093 // about it: either adapt the whitespace range preceding it, or mark it as an 1094 // untouchable token. 1095 // This happens for instance here: 1096 // // line 1 \ 1097 // // line 2 1098 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { 1099 // This is the first line for the current token, but no reflow with the 1100 // previous token is necessary. However, we still may need to adjust the 1101 // start column. Note that ContentColumn[LineIndex] is the expected 1102 // content column after a possible update to the prefix, hence the prefix 1103 // length change is included. 1104 unsigned LineColumn = 1105 ContentColumn[LineIndex] - 1106 (Content[LineIndex].data() - Lines[LineIndex].data()) + 1107 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size()); 1108 1109 // We always want to create a replacement instead of adding an untouchable 1110 // token, even if LineColumn is the same as the original column of the 1111 // token. This is because WhitespaceManager doesn't align trailing 1112 // comments if they are untouchable. 1113 Whitespaces.replaceWhitespace(*Tokens[LineIndex], 1114 /*Newlines=*/1, 1115 /*Spaces=*/LineColumn, 1116 /*StartOfTokenColumn=*/LineColumn, 1117 /*IsAligned=*/true, 1118 /*InPPDirective=*/false); 1119 } 1120 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) { 1121 // Adjust the prefix if necessary. 1122 const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0); 1123 const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0); 1124 Whitespaces.replaceWhitespaceInToken( 1125 tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove, 1126 /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false, 1127 /*Newlines=*/0, /*Spaces=*/SpacesToAdd); 1128 } 1129 } 1130 1131 void BreakableLineCommentSection::updateNextToken(LineState &State) const { 1132 if (LastLineTok) 1133 State.NextToken = LastLineTok->Next; 1134 } 1135 1136 bool BreakableLineCommentSection::mayReflow( 1137 unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 1138 // Line comments have the indent as part of the prefix, so we need to 1139 // recompute the start of the line. 1140 StringRef IndentContent = Content[LineIndex]; 1141 if (Lines[LineIndex].starts_with("//")) 1142 IndentContent = Lines[LineIndex].substr(2); 1143 // FIXME: Decide whether we want to reflow non-regular indents: 1144 // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the 1145 // OriginalPrefix[LineIndex-1]. That means we don't reflow 1146 // // text that protrudes 1147 // // into text with different indent 1148 // We do reflow in that case in block comments. 1149 return LineIndex > 0 && AlwaysReflow && 1150 !CommentPragmasRegex.match(IndentContent) && 1151 mayReflowContent(Content[LineIndex]) && !Tok.Finalized && 1152 !switchesFormatting(tokenAt(LineIndex)) && 1153 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]; 1154 } 1155 1156 } // namespace format 1157 } // namespace clang 1158