1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements an indenter that manages the indentation of 11 /// continuations. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 17 18 #include "Encoding.h" 19 #include "FormatToken.h" 20 21 namespace clang { 22 class SourceManager; 23 24 namespace format { 25 26 class AnnotatedLine; 27 class BreakableToken; 28 struct FormatToken; 29 struct LineState; 30 struct ParenState; 31 struct RawStringFormatStyleManager; 32 class WhitespaceManager; 33 34 struct RawStringFormatStyleManager { 35 llvm::StringMap<FormatStyle> DelimiterStyle; 36 llvm::StringMap<FormatStyle> EnclosingFunctionStyle; 37 38 RawStringFormatStyleManager(const FormatStyle &CodeStyle); 39 40 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; 41 42 std::optional<FormatStyle> 43 getEnclosingFunctionStyle(StringRef EnclosingFunction) const; 44 }; 45 46 class ContinuationIndenter { 47 public: 48 /// Constructs a \c ContinuationIndenter to format \p Line starting in 49 /// column \p FirstIndent. 50 ContinuationIndenter(const FormatStyle &Style, 51 const AdditionalKeywords &Keywords, 52 const SourceManager &SourceMgr, 53 WhitespaceManager &Whitespaces, 54 encoding::Encoding Encoding, 55 bool BinPackInconclusiveFunctions); 56 57 /// Get the initial state, i.e. the state after placing \p Line's 58 /// first token at \p FirstIndent. When reformatting a fragment of code, as in 59 /// the case of formatting inside raw string literals, \p FirstStartColumn is 60 /// the column at which the state of the parent formatter is. 61 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, 62 const AnnotatedLine *Line, bool DryRun); 63 64 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 65 // better home. 66 /// Returns \c true, if a line break after \p State is allowed. 67 bool canBreak(const LineState &State); 68 69 /// Returns \c true, if a line break after \p State is mandatory. 70 bool mustBreak(const LineState &State); 71 72 /// Appends the next token to \p State and updates information 73 /// necessary for indentation. 74 /// 75 /// Puts the token on the current line if \p Newline is \c false and adds a 76 /// line break and necessary indentation otherwise. 77 /// 78 /// If \p DryRun is \c false, also creates and stores the required 79 /// \c Replacement. 80 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 81 unsigned ExtraSpaces = 0); 82 83 /// Get the column limit for this line. This is the style's column 84 /// limit, potentially reduced for preprocessor definitions. 85 unsigned getColumnLimit(const LineState &State) const; 86 87 private: 88 /// Mark the next token as consumed in \p State and modify its stacks 89 /// accordingly. 90 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 91 92 /// Update 'State' according to the next token's fake left parentheses. 93 void moveStatePastFakeLParens(LineState &State, bool Newline); 94 /// Update 'State' according to the next token's fake r_parens. 95 void moveStatePastFakeRParens(LineState &State); 96 97 /// Update 'State' according to the next token being one of "(<{[". 98 void moveStatePastScopeOpener(LineState &State, bool Newline); 99 /// Update 'State' according to the next token being one of ")>}]". 100 void moveStatePastScopeCloser(LineState &State); 101 /// Update 'State' with the next token opening a nested block. 102 void moveStateToNewBlock(LineState &State, bool NewLine); 103 104 /// Reformats a raw string literal. 105 /// 106 /// \returns An extra penalty induced by reformatting the token. 107 unsigned reformatRawStringLiteral(const FormatToken &Current, 108 LineState &State, 109 const FormatStyle &RawStringStyle, 110 bool DryRun, bool Newline); 111 112 /// If the current token is at the end of the current line, handle 113 /// the transition to the next line. 114 unsigned handleEndOfLine(const FormatToken &Current, LineState &State, 115 bool DryRun, bool AllowBreak, bool Newline); 116 117 /// If \p Current is a raw string that is configured to be reformatted, 118 /// return the style to be used. 119 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current, 120 const LineState &State); 121 122 /// If the current token sticks out over the end of the line, break 123 /// it if possible. 124 /// 125 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty 126 /// when tokens are broken or lines exceed the column limit, and exceeded 127 /// indicates whether the algorithm purposefully left lines exceeding the 128 /// column limit. 129 /// 130 /// The returned penalty will cover the cost of the additional line breaks 131 /// and column limit violation in all lines except for the last one. The 132 /// penalty for the column limit violation in the last line (and in single 133 /// line tokens) is handled in \c addNextStateToQueue. 134 /// 135 /// \p Strict indicates whether reflowing is allowed to leave characters 136 /// protruding the column limit; if true, lines will be split strictly within 137 /// the column limit where possible; if false, words are allowed to protrude 138 /// over the column limit as long as the penalty is less than the penalty 139 /// of a break. 140 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, 141 LineState &State, 142 bool AllowBreak, bool DryRun, 143 bool Strict); 144 145 /// Returns the \c BreakableToken starting at \p Current, or nullptr 146 /// if the current token cannot be broken. 147 std::unique_ptr<BreakableToken> 148 createBreakableToken(const FormatToken &Current, LineState &State, 149 bool AllowBreak); 150 151 /// Appends the next token to \p State and updates information 152 /// necessary for indentation. 153 /// 154 /// Puts the token on the current line. 155 /// 156 /// If \p DryRun is \c false, also creates and stores the required 157 /// \c Replacement. 158 void addTokenOnCurrentLine(LineState &State, bool DryRun, 159 unsigned ExtraSpaces); 160 161 /// Appends the next token to \p State and updates information 162 /// necessary for indentation. 163 /// 164 /// Adds a line break and necessary indentation. 165 /// 166 /// If \p DryRun is \c false, also creates and stores the required 167 /// \c Replacement. 168 unsigned addTokenOnNewLine(LineState &State, bool DryRun); 169 170 /// Calculate the new column for a line wrap before the next token. 171 unsigned getNewLineColumn(const LineState &State); 172 173 /// Adds a multiline token to the \p State. 174 /// 175 /// \returns Extra penalty for the first line of the literal: last line is 176 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 177 /// matter, as we don't change them. 178 unsigned addMultilineToken(const FormatToken &Current, LineState &State); 179 180 /// Returns \c true if the next token starts a multiline string 181 /// literal. 182 /// 183 /// This includes implicitly concatenated strings, strings that will be broken 184 /// by clang-format and string literals with escaped newlines. 185 bool nextIsMultilineString(const LineState &State); 186 187 FormatStyle Style; 188 const AdditionalKeywords &Keywords; 189 const SourceManager &SourceMgr; 190 WhitespaceManager &Whitespaces; 191 encoding::Encoding Encoding; 192 bool BinPackInconclusiveFunctions; 193 llvm::Regex CommentPragmasRegex; 194 const RawStringFormatStyleManager RawStringFormats; 195 }; 196 197 struct ParenState { ParenStateParenState198 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, 199 bool AvoidBinPacking, bool NoLineBreak) 200 : Tok(Tok), Indent(Indent), LastSpace(LastSpace), 201 NestedBlockIndent(Indent), IsAligned(false), 202 BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false), 203 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 204 NoLineBreak(NoLineBreak), NoLineBreakInOperand(false), 205 LastOperatorWrapped(true), ContainsLineBreak(false), 206 ContainsUnwrappedBuilder(false), AlignColons(true), 207 ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), 208 NestedBlockInlined(false), IsInsideObjCArrayLiteral(false), 209 IsCSharpGenericTypeConstraint(false), IsChainedConditional(false), 210 IsWrappedConditional(false), UnindentOperator(false) {} 211 212 /// \brief The token opening this parenthesis level, or nullptr if this level 213 /// is opened by fake parenthesis. 214 /// 215 /// Not considered for memoization as it will always have the same value at 216 /// the same token. 217 const FormatToken *Tok; 218 219 /// The position to which a specific parenthesis level needs to be 220 /// indented. 221 unsigned Indent; 222 223 /// The position of the last space on each level. 224 /// 225 /// Used e.g. to break like: 226 /// functionCall(Parameter, otherCall( 227 /// OtherParameter)); 228 unsigned LastSpace; 229 230 /// If a block relative to this parenthesis level gets wrapped, indent 231 /// it this much. 232 unsigned NestedBlockIndent; 233 234 /// The position the first "<<" operator encountered on each level. 235 /// 236 /// Used to align "<<" operators. 0 if no such operator has been encountered 237 /// on a level. 238 unsigned FirstLessLess = 0; 239 240 /// The column of a \c ? in a conditional expression; 241 unsigned QuestionColumn = 0; 242 243 /// The position of the colon in an ObjC method declaration/call. 244 unsigned ColonPos = 0; 245 246 /// The start of the most recent function in a builder-type call. 247 unsigned StartOfFunctionCall = 0; 248 249 /// Contains the start of array subscript expressions, so that they 250 /// can be aligned. 251 unsigned StartOfArraySubscripts = 0; 252 253 /// If a nested name specifier was broken over multiple lines, this 254 /// contains the start column of the second line. Otherwise 0. 255 unsigned NestedNameSpecifierContinuation = 0; 256 257 /// If a call expression was broken over multiple lines, this 258 /// contains the start column of the second line. Otherwise 0. 259 unsigned CallContinuation = 0; 260 261 /// The column of the first variable name in a variable declaration. 262 /// 263 /// Used to align further variables if necessary. 264 unsigned VariablePos = 0; 265 266 /// Whether this block's indentation is used for alignment. 267 bool IsAligned : 1; 268 269 /// Whether a newline needs to be inserted before the block's closing 270 /// brace. 271 /// 272 /// We only want to insert a newline before the closing brace if there also 273 /// was a newline after the beginning left brace. 274 bool BreakBeforeClosingBrace : 1; 275 276 /// Whether a newline needs to be inserted before the block's closing 277 /// paren. 278 /// 279 /// We only want to insert a newline before the closing paren if there also 280 /// was a newline after the beginning left paren. 281 bool BreakBeforeClosingParen : 1; 282 283 /// Avoid bin packing, i.e. multiple parameters/elements on multiple 284 /// lines, in this context. 285 bool AvoidBinPacking : 1; 286 287 /// Break after the next comma (or all the commas in this context if 288 /// \c AvoidBinPacking is \c true). 289 bool BreakBeforeParameter : 1; 290 291 /// Line breaking in this context would break a formatting rule. 292 bool NoLineBreak : 1; 293 294 /// Same as \c NoLineBreak, but is restricted until the end of the 295 /// operand (including the next ","). 296 bool NoLineBreakInOperand : 1; 297 298 /// True if the last binary operator on this level was wrapped to the 299 /// next line. 300 bool LastOperatorWrapped : 1; 301 302 /// \c true if this \c ParenState already contains a line-break. 303 /// 304 /// The first line break in a certain \c ParenState causes extra penalty so 305 /// that clang-format prefers similar breaks, i.e. breaks in the same 306 /// parenthesis. 307 bool ContainsLineBreak : 1; 308 309 /// \c true if this \c ParenState contains multiple segments of a 310 /// builder-type call on one line. 311 bool ContainsUnwrappedBuilder : 1; 312 313 /// \c true if the colons of the curren ObjC method expression should 314 /// be aligned. 315 /// 316 /// Not considered for memoization as it will always have the same value at 317 /// the same token. 318 bool AlignColons : 1; 319 320 /// \c true if at least one selector name was found in the current 321 /// ObjC method expression. 322 /// 323 /// Not considered for memoization as it will always have the same value at 324 /// the same token. 325 bool ObjCSelectorNameFound : 1; 326 327 /// \c true if there are multiple nested blocks inside these parens. 328 /// 329 /// Not considered for memoization as it will always have the same value at 330 /// the same token. 331 bool HasMultipleNestedBlocks : 1; 332 333 /// The start of a nested block (e.g. lambda introducer in C++ or 334 /// "function" in JavaScript) is not wrapped to a new line. 335 bool NestedBlockInlined : 1; 336 337 /// \c true if the current \c ParenState represents an Objective-C 338 /// array literal. 339 bool IsInsideObjCArrayLiteral : 1; 340 341 bool IsCSharpGenericTypeConstraint : 1; 342 343 /// \brief true if the current \c ParenState represents the false branch of 344 /// a chained conditional expression (e.g. else-if) 345 bool IsChainedConditional : 1; 346 347 /// \brief true if there conditionnal was wrapped on the first operator (the 348 /// question mark) 349 bool IsWrappedConditional : 1; 350 351 /// \brief Indicates the indent should be reduced by the length of the 352 /// operator. 353 bool UnindentOperator : 1; 354 355 bool operator<(const ParenState &Other) const { 356 if (Indent != Other.Indent) 357 return Indent < Other.Indent; 358 if (LastSpace != Other.LastSpace) 359 return LastSpace < Other.LastSpace; 360 if (NestedBlockIndent != Other.NestedBlockIndent) 361 return NestedBlockIndent < Other.NestedBlockIndent; 362 if (FirstLessLess != Other.FirstLessLess) 363 return FirstLessLess < Other.FirstLessLess; 364 if (IsAligned != Other.IsAligned) 365 return IsAligned; 366 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 367 return BreakBeforeClosingBrace; 368 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen) 369 return BreakBeforeClosingParen; 370 if (QuestionColumn != Other.QuestionColumn) 371 return QuestionColumn < Other.QuestionColumn; 372 if (AvoidBinPacking != Other.AvoidBinPacking) 373 return AvoidBinPacking; 374 if (BreakBeforeParameter != Other.BreakBeforeParameter) 375 return BreakBeforeParameter; 376 if (NoLineBreak != Other.NoLineBreak) 377 return NoLineBreak; 378 if (LastOperatorWrapped != Other.LastOperatorWrapped) 379 return LastOperatorWrapped; 380 if (ColonPos != Other.ColonPos) 381 return ColonPos < Other.ColonPos; 382 if (StartOfFunctionCall != Other.StartOfFunctionCall) 383 return StartOfFunctionCall < Other.StartOfFunctionCall; 384 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 385 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 386 if (CallContinuation != Other.CallContinuation) 387 return CallContinuation < Other.CallContinuation; 388 if (VariablePos != Other.VariablePos) 389 return VariablePos < Other.VariablePos; 390 if (ContainsLineBreak != Other.ContainsLineBreak) 391 return ContainsLineBreak; 392 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 393 return ContainsUnwrappedBuilder; 394 if (NestedBlockInlined != Other.NestedBlockInlined) 395 return NestedBlockInlined; 396 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint) 397 return IsCSharpGenericTypeConstraint; 398 if (IsChainedConditional != Other.IsChainedConditional) 399 return IsChainedConditional; 400 if (IsWrappedConditional != Other.IsWrappedConditional) 401 return IsWrappedConditional; 402 if (UnindentOperator != Other.UnindentOperator) 403 return UnindentOperator; 404 return false; 405 } 406 }; 407 408 /// The current state when indenting a unwrapped line. 409 /// 410 /// As the indenting tries different combinations this is copied by value. 411 struct LineState { 412 /// The number of used columns in the current line. 413 unsigned Column; 414 415 /// The token that needs to be next formatted. 416 FormatToken *NextToken; 417 418 /// \c true if \p NextToken should not continue this line. 419 bool NoContinuation; 420 421 /// The \c NestingLevel at the start of this line. 422 unsigned StartOfLineLevel; 423 424 /// The lowest \c NestingLevel on the current line. 425 unsigned LowestLevelOnLine; 426 427 /// The start column of the string literal, if we're in a string 428 /// literal sequence, 0 otherwise. 429 unsigned StartOfStringLiteral; 430 431 /// Disallow line breaks for this line. 432 bool NoLineBreak; 433 434 /// A stack keeping track of properties applying to parenthesis 435 /// levels. 436 SmallVector<ParenState> Stack; 437 438 /// Ignore the stack of \c ParenStates for state comparison. 439 /// 440 /// In long and deeply nested unwrapped lines, the current algorithm can 441 /// be insufficient for finding the best formatting with a reasonable amount 442 /// of time and memory. Setting this flag will effectively lead to the 443 /// algorithm not analyzing some combinations. However, these combinations 444 /// rarely contain the optimal solution: In short, accepting a higher 445 /// penalty early would need to lead to different values in the \c 446 /// ParenState stack (in an otherwise identical state) and these different 447 /// values would need to lead to a significant amount of avoided penalty 448 /// later. 449 /// 450 /// FIXME: Come up with a better algorithm instead. 451 bool IgnoreStackForComparison; 452 453 /// The indent of the first token. 454 unsigned FirstIndent; 455 456 /// The line that is being formatted. 457 /// 458 /// Does not need to be considered for memoization because it doesn't change. 459 const AnnotatedLine *Line; 460 461 /// Comparison operator to be able to used \c LineState in \c map. 462 bool operator<(const LineState &Other) const { 463 if (NextToken != Other.NextToken) 464 return NextToken < Other.NextToken; 465 if (Column != Other.Column) 466 return Column < Other.Column; 467 if (NoContinuation != Other.NoContinuation) 468 return NoContinuation; 469 if (StartOfLineLevel != Other.StartOfLineLevel) 470 return StartOfLineLevel < Other.StartOfLineLevel; 471 if (LowestLevelOnLine != Other.LowestLevelOnLine) 472 return LowestLevelOnLine < Other.LowestLevelOnLine; 473 if (StartOfStringLiteral != Other.StartOfStringLiteral) 474 return StartOfStringLiteral < Other.StartOfStringLiteral; 475 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 476 return false; 477 return Stack < Other.Stack; 478 } 479 }; 480 481 } // end namespace format 482 } // end namespace clang 483 484 #endif 485