1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements an indenter that manages the indentation of 11 /// continuations. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 17 18 #include "Encoding.h" 19 #include "FormatToken.h" 20 21 namespace clang { 22 class SourceManager; 23 24 namespace format { 25 26 class AnnotatedLine; 27 class BreakableToken; 28 struct FormatToken; 29 struct LineState; 30 struct ParenState; 31 struct RawStringFormatStyleManager; 32 class WhitespaceManager; 33 34 struct RawStringFormatStyleManager { 35 llvm::StringMap<FormatStyle> DelimiterStyle; 36 llvm::StringMap<FormatStyle> EnclosingFunctionStyle; 37 38 RawStringFormatStyleManager(const FormatStyle &CodeStyle); 39 40 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; 41 42 std::optional<FormatStyle> 43 getEnclosingFunctionStyle(StringRef EnclosingFunction) const; 44 }; 45 46 class ContinuationIndenter { 47 public: 48 /// Constructs a \c ContinuationIndenter to format \p Line starting in 49 /// column \p FirstIndent. 50 ContinuationIndenter(const FormatStyle &Style, 51 const AdditionalKeywords &Keywords, 52 const SourceManager &SourceMgr, 53 WhitespaceManager &Whitespaces, 54 encoding::Encoding Encoding, 55 bool BinPackInconclusiveFunctions); 56 57 /// Get the initial state, i.e. the state after placing \p Line's 58 /// first token at \p FirstIndent. When reformatting a fragment of code, as in 59 /// the case of formatting inside raw string literals, \p FirstStartColumn is 60 /// the column at which the state of the parent formatter is. 61 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, 62 const AnnotatedLine *Line, bool DryRun); 63 64 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 65 // better home. 66 /// Returns \c true, if a line break after \p State is allowed. 67 bool canBreak(const LineState &State); 68 69 /// Returns \c true, if a line break after \p State is mandatory. 70 bool mustBreak(const LineState &State); 71 72 /// Appends the next token to \p State and updates information 73 /// necessary for indentation. 74 /// 75 /// Puts the token on the current line if \p Newline is \c false and adds a 76 /// line break and necessary indentation otherwise. 77 /// 78 /// If \p DryRun is \c false, also creates and stores the required 79 /// \c Replacement. 80 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 81 unsigned ExtraSpaces = 0); 82 83 /// Get the column limit for this line. This is the style's column 84 /// limit, potentially reduced for preprocessor definitions. 85 unsigned getColumnLimit(const LineState &State) const; 86 87 private: 88 /// Mark the next token as consumed in \p State and modify its stacks 89 /// accordingly. 90 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 91 92 /// Update 'State' according to the next token's fake left parentheses. 93 void moveStatePastFakeLParens(LineState &State, bool Newline); 94 /// Update 'State' according to the next token's fake r_parens. 95 void moveStatePastFakeRParens(LineState &State); 96 97 /// Update 'State' according to the next token being one of "(<{[". 98 void moveStatePastScopeOpener(LineState &State, bool Newline); 99 /// Update 'State' according to the next token being one of ")>}]". 100 void moveStatePastScopeCloser(LineState &State); 101 /// Update 'State' with the next token opening a nested block. 102 void moveStateToNewBlock(LineState &State, bool NewLine); 103 104 /// Reformats a raw string literal. 105 /// 106 /// \returns An extra penalty induced by reformatting the token. 107 unsigned reformatRawStringLiteral(const FormatToken &Current, 108 LineState &State, 109 const FormatStyle &RawStringStyle, 110 bool DryRun, bool Newline); 111 112 /// If the current token is at the end of the current line, handle 113 /// the transition to the next line. 114 unsigned handleEndOfLine(const FormatToken &Current, LineState &State, 115 bool DryRun, bool AllowBreak, bool Newline); 116 117 /// If \p Current is a raw string that is configured to be reformatted, 118 /// return the style to be used. 119 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current, 120 const LineState &State); 121 122 /// If the current token sticks out over the end of the line, break 123 /// it if possible. 124 /// 125 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty 126 /// when tokens are broken or lines exceed the column limit, and exceeded 127 /// indicates whether the algorithm purposefully left lines exceeding the 128 /// column limit. 129 /// 130 /// The returned penalty will cover the cost of the additional line breaks 131 /// and column limit violation in all lines except for the last one. The 132 /// penalty for the column limit violation in the last line (and in single 133 /// line tokens) is handled in \c addNextStateToQueue. 134 /// 135 /// \p Strict indicates whether reflowing is allowed to leave characters 136 /// protruding the column limit; if true, lines will be split strictly within 137 /// the column limit where possible; if false, words are allowed to protrude 138 /// over the column limit as long as the penalty is less than the penalty 139 /// of a break. 140 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, 141 LineState &State, 142 bool AllowBreak, bool DryRun, 143 bool Strict); 144 145 /// Returns the \c BreakableToken starting at \p Current, or nullptr 146 /// if the current token cannot be broken. 147 std::unique_ptr<BreakableToken> 148 createBreakableToken(const FormatToken &Current, LineState &State, 149 bool AllowBreak); 150 151 /// Appends the next token to \p State and updates information 152 /// necessary for indentation. 153 /// 154 /// Puts the token on the current line. 155 /// 156 /// If \p DryRun is \c false, also creates and stores the required 157 /// \c Replacement. 158 void addTokenOnCurrentLine(LineState &State, bool DryRun, 159 unsigned ExtraSpaces); 160 161 /// Appends the next token to \p State and updates information 162 /// necessary for indentation. 163 /// 164 /// Adds a line break and necessary indentation. 165 /// 166 /// If \p DryRun is \c false, also creates and stores the required 167 /// \c Replacement. 168 unsigned addTokenOnNewLine(LineState &State, bool DryRun); 169 170 /// Calculate the new column for a line wrap before the next token. 171 unsigned getNewLineColumn(const LineState &State); 172 173 /// Adds a multiline token to the \p State. 174 /// 175 /// \returns Extra penalty for the first line of the literal: last line is 176 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 177 /// matter, as we don't change them. 178 unsigned addMultilineToken(const FormatToken &Current, LineState &State); 179 180 /// Returns \c true if the next token starts a multiline string 181 /// literal. 182 /// 183 /// This includes implicitly concatenated strings, strings that will be broken 184 /// by clang-format and string literals with escaped newlines. 185 bool nextIsMultilineString(const LineState &State); 186 187 FormatStyle Style; 188 const AdditionalKeywords &Keywords; 189 const SourceManager &SourceMgr; 190 WhitespaceManager &Whitespaces; 191 encoding::Encoding Encoding; 192 bool BinPackInconclusiveFunctions; 193 llvm::Regex CommentPragmasRegex; 194 const RawStringFormatStyleManager RawStringFormats; 195 }; 196 197 struct ParenState { ParenStateParenState198 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, 199 bool AvoidBinPacking, bool NoLineBreak) 200 : Tok(Tok), Indent(Indent), LastSpace(LastSpace), 201 NestedBlockIndent(Indent), IsAligned(false), 202 BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false), 203 BreakBeforeClosingAngle(false), AvoidBinPacking(AvoidBinPacking), 204 BreakBeforeParameter(false), NoLineBreak(NoLineBreak), 205 NoLineBreakInOperand(false), LastOperatorWrapped(true), 206 ContainsLineBreak(false), ContainsUnwrappedBuilder(false), 207 AlignColons(true), ObjCSelectorNameFound(false), 208 HasMultipleNestedBlocks(false), NestedBlockInlined(false), 209 IsInsideObjCArrayLiteral(false), IsCSharpGenericTypeConstraint(false), 210 IsChainedConditional(false), IsWrappedConditional(false), 211 UnindentOperator(false) {} 212 213 /// The token opening this parenthesis level, or nullptr if this level is 214 /// opened by fake parenthesis. 215 /// 216 /// Not considered for memoization as it will always have the same value at 217 /// the same token. 218 const FormatToken *Tok; 219 220 /// The position to which a specific parenthesis level needs to be 221 /// indented. 222 unsigned Indent; 223 224 /// The position of the last space on each level. 225 /// 226 /// Used e.g. to break like: 227 /// functionCall(Parameter, otherCall( 228 /// OtherParameter)); 229 unsigned LastSpace; 230 231 /// If a block relative to this parenthesis level gets wrapped, indent 232 /// it this much. 233 unsigned NestedBlockIndent; 234 235 /// The position the first "<<" operator encountered on each level. 236 /// 237 /// Used to align "<<" operators. 0 if no such operator has been encountered 238 /// on a level. 239 unsigned FirstLessLess = 0; 240 241 /// The column of a \c ? in a conditional expression; 242 unsigned QuestionColumn = 0; 243 244 /// The position of the colon in an ObjC method declaration/call. 245 unsigned ColonPos = 0; 246 247 /// The start of the most recent function in a builder-type call. 248 unsigned StartOfFunctionCall = 0; 249 250 /// Contains the start of array subscript expressions, so that they 251 /// can be aligned. 252 unsigned StartOfArraySubscripts = 0; 253 254 /// If a nested name specifier was broken over multiple lines, this 255 /// contains the start column of the second line. Otherwise 0. 256 unsigned NestedNameSpecifierContinuation = 0; 257 258 /// If a call expression was broken over multiple lines, this 259 /// contains the start column of the second line. Otherwise 0. 260 unsigned CallContinuation = 0; 261 262 /// The column of the first variable name in a variable declaration. 263 /// 264 /// Used to align further variables if necessary. 265 unsigned VariablePos = 0; 266 267 /// Whether this block's indentation is used for alignment. 268 bool IsAligned : 1; 269 270 /// Whether a newline needs to be inserted before the block's closing 271 /// brace. 272 /// 273 /// We only want to insert a newline before the closing brace if there also 274 /// was a newline after the beginning left brace. 275 bool BreakBeforeClosingBrace : 1; 276 277 /// Whether a newline needs to be inserted before the block's closing 278 /// paren. 279 /// 280 /// We only want to insert a newline before the closing paren if there also 281 /// was a newline after the beginning left paren. 282 bool BreakBeforeClosingParen : 1; 283 284 /// Whether a newline needs to be inserted before a closing angle `>`. 285 bool BreakBeforeClosingAngle : 1; 286 287 /// Avoid bin packing, i.e. multiple parameters/elements on multiple 288 /// lines, in this context. 289 bool AvoidBinPacking : 1; 290 291 /// Break after the next comma (or all the commas in this context if 292 /// \c AvoidBinPacking is \c true). 293 bool BreakBeforeParameter : 1; 294 295 /// Line breaking in this context would break a formatting rule. 296 bool NoLineBreak : 1; 297 298 /// Same as \c NoLineBreak, but is restricted until the end of the 299 /// operand (including the next ","). 300 bool NoLineBreakInOperand : 1; 301 302 /// True if the last binary operator on this level was wrapped to the 303 /// next line. 304 bool LastOperatorWrapped : 1; 305 306 /// \c true if this \c ParenState already contains a line-break. 307 /// 308 /// The first line break in a certain \c ParenState causes extra penalty so 309 /// that clang-format prefers similar breaks, i.e. breaks in the same 310 /// parenthesis. 311 bool ContainsLineBreak : 1; 312 313 /// \c true if this \c ParenState contains multiple segments of a 314 /// builder-type call on one line. 315 bool ContainsUnwrappedBuilder : 1; 316 317 /// \c true if the colons of the curren ObjC method expression should 318 /// be aligned. 319 /// 320 /// Not considered for memoization as it will always have the same value at 321 /// the same token. 322 bool AlignColons : 1; 323 324 /// \c true if at least one selector name was found in the current 325 /// ObjC method expression. 326 /// 327 /// Not considered for memoization as it will always have the same value at 328 /// the same token. 329 bool ObjCSelectorNameFound : 1; 330 331 /// \c true if there are multiple nested blocks inside these parens. 332 /// 333 /// Not considered for memoization as it will always have the same value at 334 /// the same token. 335 bool HasMultipleNestedBlocks : 1; 336 337 /// The start of a nested block (e.g. lambda introducer in C++ or 338 /// "function" in JavaScript) is not wrapped to a new line. 339 bool NestedBlockInlined : 1; 340 341 /// \c true if the current \c ParenState represents an Objective-C 342 /// array literal. 343 bool IsInsideObjCArrayLiteral : 1; 344 345 bool IsCSharpGenericTypeConstraint : 1; 346 347 /// true if the current \c ParenState represents the false branch of a chained 348 /// conditional expression (e.g. else-if) 349 bool IsChainedConditional : 1; 350 351 /// true if there conditionnal was wrapped on the first operator (the question 352 /// mark) 353 bool IsWrappedConditional : 1; 354 355 /// Indicates the indent should be reduced by the length of the operator. 356 bool UnindentOperator : 1; 357 358 bool operator<(const ParenState &Other) const { 359 if (Indent != Other.Indent) 360 return Indent < Other.Indent; 361 if (LastSpace != Other.LastSpace) 362 return LastSpace < Other.LastSpace; 363 if (NestedBlockIndent != Other.NestedBlockIndent) 364 return NestedBlockIndent < Other.NestedBlockIndent; 365 if (FirstLessLess != Other.FirstLessLess) 366 return FirstLessLess < Other.FirstLessLess; 367 if (IsAligned != Other.IsAligned) 368 return IsAligned; 369 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 370 return BreakBeforeClosingBrace; 371 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen) 372 return BreakBeforeClosingParen; 373 if (BreakBeforeClosingAngle != Other.BreakBeforeClosingAngle) 374 return BreakBeforeClosingAngle; 375 if (QuestionColumn != Other.QuestionColumn) 376 return QuestionColumn < Other.QuestionColumn; 377 if (AvoidBinPacking != Other.AvoidBinPacking) 378 return AvoidBinPacking; 379 if (BreakBeforeParameter != Other.BreakBeforeParameter) 380 return BreakBeforeParameter; 381 if (NoLineBreak != Other.NoLineBreak) 382 return NoLineBreak; 383 if (LastOperatorWrapped != Other.LastOperatorWrapped) 384 return LastOperatorWrapped; 385 if (ColonPos != Other.ColonPos) 386 return ColonPos < Other.ColonPos; 387 if (StartOfFunctionCall != Other.StartOfFunctionCall) 388 return StartOfFunctionCall < Other.StartOfFunctionCall; 389 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 390 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 391 if (CallContinuation != Other.CallContinuation) 392 return CallContinuation < Other.CallContinuation; 393 if (VariablePos != Other.VariablePos) 394 return VariablePos < Other.VariablePos; 395 if (ContainsLineBreak != Other.ContainsLineBreak) 396 return ContainsLineBreak; 397 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 398 return ContainsUnwrappedBuilder; 399 if (NestedBlockInlined != Other.NestedBlockInlined) 400 return NestedBlockInlined; 401 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint) 402 return IsCSharpGenericTypeConstraint; 403 if (IsChainedConditional != Other.IsChainedConditional) 404 return IsChainedConditional; 405 if (IsWrappedConditional != Other.IsWrappedConditional) 406 return IsWrappedConditional; 407 if (UnindentOperator != Other.UnindentOperator) 408 return UnindentOperator; 409 return false; 410 } 411 }; 412 413 /// The current state when indenting a unwrapped line. 414 /// 415 /// As the indenting tries different combinations this is copied by value. 416 struct LineState { 417 /// The number of used columns in the current line. 418 unsigned Column; 419 420 /// The token that needs to be next formatted. 421 FormatToken *NextToken; 422 423 /// \c true if \p NextToken should not continue this line. 424 bool NoContinuation; 425 426 /// The \c NestingLevel at the start of this line. 427 unsigned StartOfLineLevel; 428 429 /// The lowest \c NestingLevel on the current line. 430 unsigned LowestLevelOnLine; 431 432 /// The start column of the string literal, if we're in a string 433 /// literal sequence, 0 otherwise. 434 unsigned StartOfStringLiteral; 435 436 /// Disallow line breaks for this line. 437 bool NoLineBreak; 438 439 /// A stack keeping track of properties applying to parenthesis 440 /// levels. 441 SmallVector<ParenState> Stack; 442 443 /// Ignore the stack of \c ParenStates for state comparison. 444 /// 445 /// In long and deeply nested unwrapped lines, the current algorithm can 446 /// be insufficient for finding the best formatting with a reasonable amount 447 /// of time and memory. Setting this flag will effectively lead to the 448 /// algorithm not analyzing some combinations. However, these combinations 449 /// rarely contain the optimal solution: In short, accepting a higher 450 /// penalty early would need to lead to different values in the \c 451 /// ParenState stack (in an otherwise identical state) and these different 452 /// values would need to lead to a significant amount of avoided penalty 453 /// later. 454 /// 455 /// FIXME: Come up with a better algorithm instead. 456 bool IgnoreStackForComparison; 457 458 /// The indent of the first token. 459 unsigned FirstIndent; 460 461 /// The line that is being formatted. 462 /// 463 /// Does not need to be considered for memoization because it doesn't change. 464 const AnnotatedLine *Line; 465 466 /// Comparison operator to be able to used \c LineState in \c map. 467 bool operator<(const LineState &Other) const { 468 if (NextToken != Other.NextToken) 469 return NextToken < Other.NextToken; 470 if (Column != Other.Column) 471 return Column < Other.Column; 472 if (NoContinuation != Other.NoContinuation) 473 return NoContinuation; 474 if (StartOfLineLevel != Other.StartOfLineLevel) 475 return StartOfLineLevel < Other.StartOfLineLevel; 476 if (LowestLevelOnLine != Other.LowestLevelOnLine) 477 return LowestLevelOnLine < Other.LowestLevelOnLine; 478 if (StartOfStringLiteral != Other.StartOfStringLiteral) 479 return StartOfStringLiteral < Other.StartOfStringLiteral; 480 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 481 return false; 482 return Stack < Other.Stack; 483 } 484 }; 485 486 } // end namespace format 487 } // end namespace clang 488 489 #endif 490