1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements an indenter that manages the indentation of 11 /// continuations. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 17 18 #include "Encoding.h" 19 #include "FormatToken.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/Support/Regex.h" 22 #include <map> 23 #include <optional> 24 #include <tuple> 25 26 namespace clang { 27 class SourceManager; 28 29 namespace format { 30 31 class AnnotatedLine; 32 class BreakableToken; 33 struct FormatToken; 34 struct LineState; 35 struct ParenState; 36 struct RawStringFormatStyleManager; 37 class WhitespaceManager; 38 39 struct RawStringFormatStyleManager { 40 llvm::StringMap<FormatStyle> DelimiterStyle; 41 llvm::StringMap<FormatStyle> EnclosingFunctionStyle; 42 43 RawStringFormatStyleManager(const FormatStyle &CodeStyle); 44 45 std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; 46 47 std::optional<FormatStyle> 48 getEnclosingFunctionStyle(StringRef EnclosingFunction) const; 49 }; 50 51 class ContinuationIndenter { 52 public: 53 /// Constructs a \c ContinuationIndenter to format \p Line starting in 54 /// column \p FirstIndent. 55 ContinuationIndenter(const FormatStyle &Style, 56 const AdditionalKeywords &Keywords, 57 const SourceManager &SourceMgr, 58 WhitespaceManager &Whitespaces, 59 encoding::Encoding Encoding, 60 bool BinPackInconclusiveFunctions); 61 62 /// Get the initial state, i.e. the state after placing \p Line's 63 /// first token at \p FirstIndent. When reformatting a fragment of code, as in 64 /// the case of formatting inside raw string literals, \p FirstStartColumn is 65 /// the column at which the state of the parent formatter is. 66 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, 67 const AnnotatedLine *Line, bool DryRun); 68 69 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 70 // better home. 71 /// Returns \c true, if a line break after \p State is allowed. 72 bool canBreak(const LineState &State); 73 74 /// Returns \c true, if a line break after \p State is mandatory. 75 bool mustBreak(const LineState &State); 76 77 /// Appends the next token to \p State and updates information 78 /// necessary for indentation. 79 /// 80 /// Puts the token on the current line if \p Newline is \c false and adds a 81 /// line break and necessary indentation otherwise. 82 /// 83 /// If \p DryRun is \c false, also creates and stores the required 84 /// \c Replacement. 85 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 86 unsigned ExtraSpaces = 0); 87 88 /// Get the column limit for this line. This is the style's column 89 /// limit, potentially reduced for preprocessor definitions. 90 unsigned getColumnLimit(const LineState &State) const; 91 92 private: 93 /// Mark the next token as consumed in \p State and modify its stacks 94 /// accordingly. 95 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 96 97 /// Update 'State' according to the next token's fake left parentheses. 98 void moveStatePastFakeLParens(LineState &State, bool Newline); 99 /// Update 'State' according to the next token's fake r_parens. 100 void moveStatePastFakeRParens(LineState &State); 101 102 /// Update 'State' according to the next token being one of "(<{[". 103 void moveStatePastScopeOpener(LineState &State, bool Newline); 104 /// Update 'State' according to the next token being one of ")>}]". 105 void moveStatePastScopeCloser(LineState &State); 106 /// Update 'State' with the next token opening a nested block. 107 void moveStateToNewBlock(LineState &State); 108 109 /// Reformats a raw string literal. 110 /// 111 /// \returns An extra penalty induced by reformatting the token. 112 unsigned reformatRawStringLiteral(const FormatToken &Current, 113 LineState &State, 114 const FormatStyle &RawStringStyle, 115 bool DryRun, bool Newline); 116 117 /// If the current token is at the end of the current line, handle 118 /// the transition to the next line. 119 unsigned handleEndOfLine(const FormatToken &Current, LineState &State, 120 bool DryRun, bool AllowBreak, bool Newline); 121 122 /// If \p Current is a raw string that is configured to be reformatted, 123 /// return the style to be used. 124 std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current, 125 const LineState &State); 126 127 /// If the current token sticks out over the end of the line, break 128 /// it if possible. 129 /// 130 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty 131 /// when tokens are broken or lines exceed the column limit, and exceeded 132 /// indicates whether the algorithm purposefully left lines exceeding the 133 /// column limit. 134 /// 135 /// The returned penalty will cover the cost of the additional line breaks 136 /// and column limit violation in all lines except for the last one. The 137 /// penalty for the column limit violation in the last line (and in single 138 /// line tokens) is handled in \c addNextStateToQueue. 139 /// 140 /// \p Strict indicates whether reflowing is allowed to leave characters 141 /// protruding the column limit; if true, lines will be split strictly within 142 /// the column limit where possible; if false, words are allowed to protrude 143 /// over the column limit as long as the penalty is less than the penalty 144 /// of a break. 145 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, 146 LineState &State, 147 bool AllowBreak, bool DryRun, 148 bool Strict); 149 150 /// Returns the \c BreakableToken starting at \p Current, or nullptr 151 /// if the current token cannot be broken. 152 std::unique_ptr<BreakableToken> 153 createBreakableToken(const FormatToken &Current, LineState &State, 154 bool AllowBreak); 155 156 /// Appends the next token to \p State and updates information 157 /// necessary for indentation. 158 /// 159 /// Puts the token on the current line. 160 /// 161 /// If \p DryRun is \c false, also creates and stores the required 162 /// \c Replacement. 163 void addTokenOnCurrentLine(LineState &State, bool DryRun, 164 unsigned ExtraSpaces); 165 166 /// Appends the next token to \p State and updates information 167 /// necessary for indentation. 168 /// 169 /// Adds a line break and necessary indentation. 170 /// 171 /// If \p DryRun is \c false, also creates and stores the required 172 /// \c Replacement. 173 unsigned addTokenOnNewLine(LineState &State, bool DryRun); 174 175 /// Calculate the new column for a line wrap before the next token. 176 unsigned getNewLineColumn(const LineState &State); 177 178 /// Adds a multiline token to the \p State. 179 /// 180 /// \returns Extra penalty for the first line of the literal: last line is 181 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 182 /// matter, as we don't change them. 183 unsigned addMultilineToken(const FormatToken &Current, LineState &State); 184 185 /// Returns \c true if the next token starts a multiline string 186 /// literal. 187 /// 188 /// This includes implicitly concatenated strings, strings that will be broken 189 /// by clang-format and string literals with escaped newlines. 190 bool nextIsMultilineString(const LineState &State); 191 192 FormatStyle Style; 193 const AdditionalKeywords &Keywords; 194 const SourceManager &SourceMgr; 195 WhitespaceManager &Whitespaces; 196 encoding::Encoding Encoding; 197 bool BinPackInconclusiveFunctions; 198 llvm::Regex CommentPragmasRegex; 199 const RawStringFormatStyleManager RawStringFormats; 200 }; 201 202 struct ParenState { 203 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, 204 bool AvoidBinPacking, bool NoLineBreak) 205 : Tok(Tok), Indent(Indent), LastSpace(LastSpace), 206 NestedBlockIndent(Indent), IsAligned(false), 207 BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false), 208 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 209 NoLineBreak(NoLineBreak), NoLineBreakInOperand(false), 210 LastOperatorWrapped(true), ContainsLineBreak(false), 211 ContainsUnwrappedBuilder(false), AlignColons(true), 212 ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), 213 NestedBlockInlined(false), IsInsideObjCArrayLiteral(false), 214 IsCSharpGenericTypeConstraint(false), IsChainedConditional(false), 215 IsWrappedConditional(false), UnindentOperator(false) {} 216 217 /// \brief The token opening this parenthesis level, or nullptr if this level 218 /// is opened by fake parenthesis. 219 /// 220 /// Not considered for memoization as it will always have the same value at 221 /// the same token. 222 const FormatToken *Tok; 223 224 /// The position to which a specific parenthesis level needs to be 225 /// indented. 226 unsigned Indent; 227 228 /// The position of the last space on each level. 229 /// 230 /// Used e.g. to break like: 231 /// functionCall(Parameter, otherCall( 232 /// OtherParameter)); 233 unsigned LastSpace; 234 235 /// If a block relative to this parenthesis level gets wrapped, indent 236 /// it this much. 237 unsigned NestedBlockIndent; 238 239 /// The position the first "<<" operator encountered on each level. 240 /// 241 /// Used to align "<<" operators. 0 if no such operator has been encountered 242 /// on a level. 243 unsigned FirstLessLess = 0; 244 245 /// The column of a \c ? in a conditional expression; 246 unsigned QuestionColumn = 0; 247 248 /// The position of the colon in an ObjC method declaration/call. 249 unsigned ColonPos = 0; 250 251 /// The start of the most recent function in a builder-type call. 252 unsigned StartOfFunctionCall = 0; 253 254 /// Contains the start of array subscript expressions, so that they 255 /// can be aligned. 256 unsigned StartOfArraySubscripts = 0; 257 258 /// If a nested name specifier was broken over multiple lines, this 259 /// contains the start column of the second line. Otherwise 0. 260 unsigned NestedNameSpecifierContinuation = 0; 261 262 /// If a call expression was broken over multiple lines, this 263 /// contains the start column of the second line. Otherwise 0. 264 unsigned CallContinuation = 0; 265 266 /// The column of the first variable name in a variable declaration. 267 /// 268 /// Used to align further variables if necessary. 269 unsigned VariablePos = 0; 270 271 /// Whether this block's indentation is used for alignment. 272 bool IsAligned : 1; 273 274 /// Whether a newline needs to be inserted before the block's closing 275 /// brace. 276 /// 277 /// We only want to insert a newline before the closing brace if there also 278 /// was a newline after the beginning left brace. 279 bool BreakBeforeClosingBrace : 1; 280 281 /// Whether a newline needs to be inserted before the block's closing 282 /// paren. 283 /// 284 /// We only want to insert a newline before the closing paren if there also 285 /// was a newline after the beginning left paren. 286 bool BreakBeforeClosingParen : 1; 287 288 /// Avoid bin packing, i.e. multiple parameters/elements on multiple 289 /// lines, in this context. 290 bool AvoidBinPacking : 1; 291 292 /// Break after the next comma (or all the commas in this context if 293 /// \c AvoidBinPacking is \c true). 294 bool BreakBeforeParameter : 1; 295 296 /// Line breaking in this context would break a formatting rule. 297 bool NoLineBreak : 1; 298 299 /// Same as \c NoLineBreak, but is restricted until the end of the 300 /// operand (including the next ","). 301 bool NoLineBreakInOperand : 1; 302 303 /// True if the last binary operator on this level was wrapped to the 304 /// next line. 305 bool LastOperatorWrapped : 1; 306 307 /// \c true if this \c ParenState already contains a line-break. 308 /// 309 /// The first line break in a certain \c ParenState causes extra penalty so 310 /// that clang-format prefers similar breaks, i.e. breaks in the same 311 /// parenthesis. 312 bool ContainsLineBreak : 1; 313 314 /// \c true if this \c ParenState contains multiple segments of a 315 /// builder-type call on one line. 316 bool ContainsUnwrappedBuilder : 1; 317 318 /// \c true if the colons of the curren ObjC method expression should 319 /// be aligned. 320 /// 321 /// Not considered for memoization as it will always have the same value at 322 /// the same token. 323 bool AlignColons : 1; 324 325 /// \c true if at least one selector name was found in the current 326 /// ObjC method expression. 327 /// 328 /// Not considered for memoization as it will always have the same value at 329 /// the same token. 330 bool ObjCSelectorNameFound : 1; 331 332 /// \c true if there are multiple nested blocks inside these parens. 333 /// 334 /// Not considered for memoization as it will always have the same value at 335 /// the same token. 336 bool HasMultipleNestedBlocks : 1; 337 338 /// The start of a nested block (e.g. lambda introducer in C++ or 339 /// "function" in JavaScript) is not wrapped to a new line. 340 bool NestedBlockInlined : 1; 341 342 /// \c true if the current \c ParenState represents an Objective-C 343 /// array literal. 344 bool IsInsideObjCArrayLiteral : 1; 345 346 bool IsCSharpGenericTypeConstraint : 1; 347 348 /// \brief true if the current \c ParenState represents the false branch of 349 /// a chained conditional expression (e.g. else-if) 350 bool IsChainedConditional : 1; 351 352 /// \brief true if there conditionnal was wrapped on the first operator (the 353 /// question mark) 354 bool IsWrappedConditional : 1; 355 356 /// \brief Indicates the indent should be reduced by the length of the 357 /// operator. 358 bool UnindentOperator : 1; 359 360 bool operator<(const ParenState &Other) const { 361 if (Indent != Other.Indent) 362 return Indent < Other.Indent; 363 if (LastSpace != Other.LastSpace) 364 return LastSpace < Other.LastSpace; 365 if (NestedBlockIndent != Other.NestedBlockIndent) 366 return NestedBlockIndent < Other.NestedBlockIndent; 367 if (FirstLessLess != Other.FirstLessLess) 368 return FirstLessLess < Other.FirstLessLess; 369 if (IsAligned != Other.IsAligned) 370 return IsAligned; 371 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 372 return BreakBeforeClosingBrace; 373 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen) 374 return BreakBeforeClosingParen; 375 if (QuestionColumn != Other.QuestionColumn) 376 return QuestionColumn < Other.QuestionColumn; 377 if (AvoidBinPacking != Other.AvoidBinPacking) 378 return AvoidBinPacking; 379 if (BreakBeforeParameter != Other.BreakBeforeParameter) 380 return BreakBeforeParameter; 381 if (NoLineBreak != Other.NoLineBreak) 382 return NoLineBreak; 383 if (LastOperatorWrapped != Other.LastOperatorWrapped) 384 return LastOperatorWrapped; 385 if (ColonPos != Other.ColonPos) 386 return ColonPos < Other.ColonPos; 387 if (StartOfFunctionCall != Other.StartOfFunctionCall) 388 return StartOfFunctionCall < Other.StartOfFunctionCall; 389 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 390 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 391 if (CallContinuation != Other.CallContinuation) 392 return CallContinuation < Other.CallContinuation; 393 if (VariablePos != Other.VariablePos) 394 return VariablePos < Other.VariablePos; 395 if (ContainsLineBreak != Other.ContainsLineBreak) 396 return ContainsLineBreak; 397 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 398 return ContainsUnwrappedBuilder; 399 if (NestedBlockInlined != Other.NestedBlockInlined) 400 return NestedBlockInlined; 401 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint) 402 return IsCSharpGenericTypeConstraint; 403 if (IsChainedConditional != Other.IsChainedConditional) 404 return IsChainedConditional; 405 if (IsWrappedConditional != Other.IsWrappedConditional) 406 return IsWrappedConditional; 407 if (UnindentOperator != Other.UnindentOperator) 408 return UnindentOperator; 409 return false; 410 } 411 }; 412 413 /// The current state when indenting a unwrapped line. 414 /// 415 /// As the indenting tries different combinations this is copied by value. 416 struct LineState { 417 /// The number of used columns in the current line. 418 unsigned Column; 419 420 /// The token that needs to be next formatted. 421 FormatToken *NextToken; 422 423 /// \c true if \p NextToken should not continue this line. 424 bool NoContinuation; 425 426 /// The \c NestingLevel at the start of this line. 427 unsigned StartOfLineLevel; 428 429 /// The lowest \c NestingLevel on the current line. 430 unsigned LowestLevelOnLine; 431 432 /// The start column of the string literal, if we're in a string 433 /// literal sequence, 0 otherwise. 434 unsigned StartOfStringLiteral; 435 436 /// Disallow line breaks for this line. 437 bool NoLineBreak; 438 439 /// A stack keeping track of properties applying to parenthesis 440 /// levels. 441 SmallVector<ParenState> Stack; 442 443 /// Ignore the stack of \c ParenStates for state comparison. 444 /// 445 /// In long and deeply nested unwrapped lines, the current algorithm can 446 /// be insufficient for finding the best formatting with a reasonable amount 447 /// of time and memory. Setting this flag will effectively lead to the 448 /// algorithm not analyzing some combinations. However, these combinations 449 /// rarely contain the optimal solution: In short, accepting a higher 450 /// penalty early would need to lead to different values in the \c 451 /// ParenState stack (in an otherwise identical state) and these different 452 /// values would need to lead to a significant amount of avoided penalty 453 /// later. 454 /// 455 /// FIXME: Come up with a better algorithm instead. 456 bool IgnoreStackForComparison; 457 458 /// The indent of the first token. 459 unsigned FirstIndent; 460 461 /// The line that is being formatted. 462 /// 463 /// Does not need to be considered for memoization because it doesn't change. 464 const AnnotatedLine *Line; 465 466 /// Comparison operator to be able to used \c LineState in \c map. 467 bool operator<(const LineState &Other) const { 468 if (NextToken != Other.NextToken) 469 return NextToken < Other.NextToken; 470 if (Column != Other.Column) 471 return Column < Other.Column; 472 if (NoContinuation != Other.NoContinuation) 473 return NoContinuation; 474 if (StartOfLineLevel != Other.StartOfLineLevel) 475 return StartOfLineLevel < Other.StartOfLineLevel; 476 if (LowestLevelOnLine != Other.LowestLevelOnLine) 477 return LowestLevelOnLine < Other.LowestLevelOnLine; 478 if (StartOfStringLiteral != Other.StartOfStringLiteral) 479 return StartOfStringLiteral < Other.StartOfStringLiteral; 480 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 481 return false; 482 return Stack < Other.Stack; 483 } 484 }; 485 486 } // end namespace format 487 } // end namespace clang 488 489 #endif 490