1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements an indenter that manages the indentation of 11 /// continuations. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 17 18 #include "Encoding.h" 19 #include "FormatToken.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/Support/Regex.h" 22 #include <map> 23 #include <tuple> 24 25 namespace clang { 26 class SourceManager; 27 28 namespace format { 29 30 class AnnotatedLine; 31 class BreakableToken; 32 struct FormatToken; 33 struct LineState; 34 struct ParenState; 35 struct RawStringFormatStyleManager; 36 class WhitespaceManager; 37 38 struct RawStringFormatStyleManager { 39 llvm::StringMap<FormatStyle> DelimiterStyle; 40 llvm::StringMap<FormatStyle> EnclosingFunctionStyle; 41 42 RawStringFormatStyleManager(const FormatStyle &CodeStyle); 43 44 llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; 45 46 llvm::Optional<FormatStyle> 47 getEnclosingFunctionStyle(StringRef EnclosingFunction) const; 48 }; 49 50 class ContinuationIndenter { 51 public: 52 /// Constructs a \c ContinuationIndenter to format \p Line starting in 53 /// column \p FirstIndent. 54 ContinuationIndenter(const FormatStyle &Style, 55 const AdditionalKeywords &Keywords, 56 const SourceManager &SourceMgr, 57 WhitespaceManager &Whitespaces, 58 encoding::Encoding Encoding, 59 bool BinPackInconclusiveFunctions); 60 61 /// Get the initial state, i.e. the state after placing \p Line's 62 /// first token at \p FirstIndent. When reformatting a fragment of code, as in 63 /// the case of formatting inside raw string literals, \p FirstStartColumn is 64 /// the column at which the state of the parent formatter is. 65 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, 66 const AnnotatedLine *Line, bool DryRun); 67 68 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 69 // better home. 70 /// Returns \c true, if a line break after \p State is allowed. 71 bool canBreak(const LineState &State); 72 73 /// Returns \c true, if a line break after \p State is mandatory. 74 bool mustBreak(const LineState &State); 75 76 /// Appends the next token to \p State and updates information 77 /// necessary for indentation. 78 /// 79 /// Puts the token on the current line if \p Newline is \c false and adds a 80 /// line break and necessary indentation otherwise. 81 /// 82 /// If \p DryRun is \c false, also creates and stores the required 83 /// \c Replacement. 84 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 85 unsigned ExtraSpaces = 0); 86 87 /// Get the column limit for this line. This is the style's column 88 /// limit, potentially reduced for preprocessor definitions. 89 unsigned getColumnLimit(const LineState &State) const; 90 91 private: 92 /// Mark the next token as consumed in \p State and modify its stacks 93 /// accordingly. 94 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 95 96 /// Update 'State' according to the next token's fake left parentheses. 97 void moveStatePastFakeLParens(LineState &State, bool Newline); 98 /// Update 'State' according to the next token's fake r_parens. 99 void moveStatePastFakeRParens(LineState &State); 100 101 /// Update 'State' according to the next token being one of "(<{[". 102 void moveStatePastScopeOpener(LineState &State, bool Newline); 103 /// Update 'State' according to the next token being one of ")>}]". 104 void moveStatePastScopeCloser(LineState &State); 105 /// Update 'State' with the next token opening a nested block. 106 void moveStateToNewBlock(LineState &State); 107 108 /// Reformats a raw string literal. 109 /// 110 /// \returns An extra penalty induced by reformatting the token. 111 unsigned reformatRawStringLiteral(const FormatToken &Current, 112 LineState &State, 113 const FormatStyle &RawStringStyle, 114 bool DryRun, bool Newline); 115 116 /// If the current token is at the end of the current line, handle 117 /// the transition to the next line. 118 unsigned handleEndOfLine(const FormatToken &Current, LineState &State, 119 bool DryRun, bool AllowBreak, bool Newline); 120 121 /// If \p Current is a raw string that is configured to be reformatted, 122 /// return the style to be used. 123 llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current, 124 const LineState &State); 125 126 /// If the current token sticks out over the end of the line, break 127 /// it if possible. 128 /// 129 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty 130 /// when tokens are broken or lines exceed the column limit, and exceeded 131 /// indicates whether the algorithm purposefully left lines exceeding the 132 /// column limit. 133 /// 134 /// The returned penalty will cover the cost of the additional line breaks 135 /// and column limit violation in all lines except for the last one. The 136 /// penalty for the column limit violation in the last line (and in single 137 /// line tokens) is handled in \c addNextStateToQueue. 138 /// 139 /// \p Strict indicates whether reflowing is allowed to leave characters 140 /// protruding the column limit; if true, lines will be split strictly within 141 /// the column limit where possible; if false, words are allowed to protrude 142 /// over the column limit as long as the penalty is less than the penalty 143 /// of a break. 144 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, 145 LineState &State, 146 bool AllowBreak, bool DryRun, 147 bool Strict); 148 149 /// Returns the \c BreakableToken starting at \p Current, or nullptr 150 /// if the current token cannot be broken. 151 std::unique_ptr<BreakableToken> 152 createBreakableToken(const FormatToken &Current, LineState &State, 153 bool AllowBreak); 154 155 /// Appends the next token to \p State and updates information 156 /// necessary for indentation. 157 /// 158 /// Puts the token on the current line. 159 /// 160 /// If \p DryRun is \c false, also creates and stores the required 161 /// \c Replacement. 162 void addTokenOnCurrentLine(LineState &State, bool DryRun, 163 unsigned ExtraSpaces); 164 165 /// Appends the next token to \p State and updates information 166 /// necessary for indentation. 167 /// 168 /// Adds a line break and necessary indentation. 169 /// 170 /// If \p DryRun is \c false, also creates and stores the required 171 /// \c Replacement. 172 unsigned addTokenOnNewLine(LineState &State, bool DryRun); 173 174 /// Calculate the new column for a line wrap before the next token. 175 unsigned getNewLineColumn(const LineState &State); 176 177 /// Adds a multiline token to the \p State. 178 /// 179 /// \returns Extra penalty for the first line of the literal: last line is 180 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 181 /// matter, as we don't change them. 182 unsigned addMultilineToken(const FormatToken &Current, LineState &State); 183 184 /// Returns \c true if the next token starts a multiline string 185 /// literal. 186 /// 187 /// This includes implicitly concatenated strings, strings that will be broken 188 /// by clang-format and string literals with escaped newlines. 189 bool nextIsMultilineString(const LineState &State); 190 191 FormatStyle Style; 192 const AdditionalKeywords &Keywords; 193 const SourceManager &SourceMgr; 194 WhitespaceManager &Whitespaces; 195 encoding::Encoding Encoding; 196 bool BinPackInconclusiveFunctions; 197 llvm::Regex CommentPragmasRegex; 198 const RawStringFormatStyleManager RawStringFormats; 199 }; 200 201 struct ParenState { 202 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, 203 bool AvoidBinPacking, bool NoLineBreak) 204 : Tok(Tok), Indent(Indent), LastSpace(LastSpace), 205 NestedBlockIndent(Indent), IsAligned(false), 206 BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false), 207 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 208 NoLineBreak(NoLineBreak), NoLineBreakInOperand(false), 209 LastOperatorWrapped(true), ContainsLineBreak(false), 210 ContainsUnwrappedBuilder(false), AlignColons(true), 211 ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), 212 NestedBlockInlined(false), IsInsideObjCArrayLiteral(false), 213 IsCSharpGenericTypeConstraint(false), IsChainedConditional(false), 214 IsWrappedConditional(false), UnindentOperator(false) {} 215 216 /// \brief The token opening this parenthesis level, or nullptr if this level 217 /// is opened by fake parenthesis. 218 /// 219 /// Not considered for memoization as it will always have the same value at 220 /// the same token. 221 const FormatToken *Tok; 222 223 /// The position to which a specific parenthesis level needs to be 224 /// indented. 225 unsigned Indent; 226 227 /// The position of the last space on each level. 228 /// 229 /// Used e.g. to break like: 230 /// functionCall(Parameter, otherCall( 231 /// OtherParameter)); 232 unsigned LastSpace; 233 234 /// If a block relative to this parenthesis level gets wrapped, indent 235 /// it this much. 236 unsigned NestedBlockIndent; 237 238 /// The position the first "<<" operator encountered on each level. 239 /// 240 /// Used to align "<<" operators. 0 if no such operator has been encountered 241 /// on a level. 242 unsigned FirstLessLess = 0; 243 244 /// The column of a \c ? in a conditional expression; 245 unsigned QuestionColumn = 0; 246 247 /// The position of the colon in an ObjC method declaration/call. 248 unsigned ColonPos = 0; 249 250 /// The start of the most recent function in a builder-type call. 251 unsigned StartOfFunctionCall = 0; 252 253 /// Contains the start of array subscript expressions, so that they 254 /// can be aligned. 255 unsigned StartOfArraySubscripts = 0; 256 257 /// If a nested name specifier was broken over multiple lines, this 258 /// contains the start column of the second line. Otherwise 0. 259 unsigned NestedNameSpecifierContinuation = 0; 260 261 /// If a call expression was broken over multiple lines, this 262 /// contains the start column of the second line. Otherwise 0. 263 unsigned CallContinuation = 0; 264 265 /// The column of the first variable name in a variable declaration. 266 /// 267 /// Used to align further variables if necessary. 268 unsigned VariablePos = 0; 269 270 /// Whether this block's indentation is used for alignment. 271 bool IsAligned : 1; 272 273 /// Whether a newline needs to be inserted before the block's closing 274 /// brace. 275 /// 276 /// We only want to insert a newline before the closing brace if there also 277 /// was a newline after the beginning left brace. 278 bool BreakBeforeClosingBrace : 1; 279 280 /// Whether a newline needs to be inserted before the block's closing 281 /// paren. 282 /// 283 /// We only want to insert a newline before the closing paren if there also 284 /// was a newline after the beginning left paren. 285 bool BreakBeforeClosingParen : 1; 286 287 /// Avoid bin packing, i.e. multiple parameters/elements on multiple 288 /// lines, in this context. 289 bool AvoidBinPacking : 1; 290 291 /// Break after the next comma (or all the commas in this context if 292 /// \c AvoidBinPacking is \c true). 293 bool BreakBeforeParameter : 1; 294 295 /// Line breaking in this context would break a formatting rule. 296 bool NoLineBreak : 1; 297 298 /// Same as \c NoLineBreak, but is restricted until the end of the 299 /// operand (including the next ","). 300 bool NoLineBreakInOperand : 1; 301 302 /// True if the last binary operator on this level was wrapped to the 303 /// next line. 304 bool LastOperatorWrapped : 1; 305 306 /// \c true if this \c ParenState already contains a line-break. 307 /// 308 /// The first line break in a certain \c ParenState causes extra penalty so 309 /// that clang-format prefers similar breaks, i.e. breaks in the same 310 /// parenthesis. 311 bool ContainsLineBreak : 1; 312 313 /// \c true if this \c ParenState contains multiple segments of a 314 /// builder-type call on one line. 315 bool ContainsUnwrappedBuilder : 1; 316 317 /// \c true if the colons of the curren ObjC method expression should 318 /// be aligned. 319 /// 320 /// Not considered for memoization as it will always have the same value at 321 /// the same token. 322 bool AlignColons : 1; 323 324 /// \c true if at least one selector name was found in the current 325 /// ObjC method expression. 326 /// 327 /// Not considered for memoization as it will always have the same value at 328 /// the same token. 329 bool ObjCSelectorNameFound : 1; 330 331 /// \c true if there are multiple nested blocks inside these parens. 332 /// 333 /// Not considered for memoization as it will always have the same value at 334 /// the same token. 335 bool HasMultipleNestedBlocks : 1; 336 337 /// The start of a nested block (e.g. lambda introducer in C++ or 338 /// "function" in JavaScript) is not wrapped to a new line. 339 bool NestedBlockInlined : 1; 340 341 /// \c true if the current \c ParenState represents an Objective-C 342 /// array literal. 343 bool IsInsideObjCArrayLiteral : 1; 344 345 bool IsCSharpGenericTypeConstraint : 1; 346 347 /// \brief true if the current \c ParenState represents the false branch of 348 /// a chained conditional expression (e.g. else-if) 349 bool IsChainedConditional : 1; 350 351 /// \brief true if there conditionnal was wrapped on the first operator (the 352 /// question mark) 353 bool IsWrappedConditional : 1; 354 355 /// \brief Indicates the indent should be reduced by the length of the 356 /// operator. 357 bool UnindentOperator : 1; 358 359 bool operator<(const ParenState &Other) const { 360 if (Indent != Other.Indent) 361 return Indent < Other.Indent; 362 if (LastSpace != Other.LastSpace) 363 return LastSpace < Other.LastSpace; 364 if (NestedBlockIndent != Other.NestedBlockIndent) 365 return NestedBlockIndent < Other.NestedBlockIndent; 366 if (FirstLessLess != Other.FirstLessLess) 367 return FirstLessLess < Other.FirstLessLess; 368 if (IsAligned != Other.IsAligned) 369 return IsAligned; 370 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 371 return BreakBeforeClosingBrace; 372 if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen) 373 return BreakBeforeClosingParen; 374 if (QuestionColumn != Other.QuestionColumn) 375 return QuestionColumn < Other.QuestionColumn; 376 if (AvoidBinPacking != Other.AvoidBinPacking) 377 return AvoidBinPacking; 378 if (BreakBeforeParameter != Other.BreakBeforeParameter) 379 return BreakBeforeParameter; 380 if (NoLineBreak != Other.NoLineBreak) 381 return NoLineBreak; 382 if (LastOperatorWrapped != Other.LastOperatorWrapped) 383 return LastOperatorWrapped; 384 if (ColonPos != Other.ColonPos) 385 return ColonPos < Other.ColonPos; 386 if (StartOfFunctionCall != Other.StartOfFunctionCall) 387 return StartOfFunctionCall < Other.StartOfFunctionCall; 388 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 389 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 390 if (CallContinuation != Other.CallContinuation) 391 return CallContinuation < Other.CallContinuation; 392 if (VariablePos != Other.VariablePos) 393 return VariablePos < Other.VariablePos; 394 if (ContainsLineBreak != Other.ContainsLineBreak) 395 return ContainsLineBreak; 396 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 397 return ContainsUnwrappedBuilder; 398 if (NestedBlockInlined != Other.NestedBlockInlined) 399 return NestedBlockInlined; 400 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint) 401 return IsCSharpGenericTypeConstraint; 402 if (IsChainedConditional != Other.IsChainedConditional) 403 return IsChainedConditional; 404 if (IsWrappedConditional != Other.IsWrappedConditional) 405 return IsWrappedConditional; 406 if (UnindentOperator != Other.UnindentOperator) 407 return UnindentOperator; 408 return false; 409 } 410 }; 411 412 /// The current state when indenting a unwrapped line. 413 /// 414 /// As the indenting tries different combinations this is copied by value. 415 struct LineState { 416 /// The number of used columns in the current line. 417 unsigned Column; 418 419 /// The token that needs to be next formatted. 420 FormatToken *NextToken; 421 422 /// \c true if this line contains a continued for-loop section. 423 bool LineContainsContinuedForLoopSection; 424 425 /// \c true if \p NextToken should not continue this line. 426 bool NoContinuation; 427 428 /// The \c NestingLevel at the start of this line. 429 unsigned StartOfLineLevel; 430 431 /// The lowest \c NestingLevel on the current line. 432 unsigned LowestLevelOnLine; 433 434 /// The start column of the string literal, if we're in a string 435 /// literal sequence, 0 otherwise. 436 unsigned StartOfStringLiteral; 437 438 /// A stack keeping track of properties applying to parenthesis 439 /// levels. 440 std::vector<ParenState> Stack; 441 442 /// Ignore the stack of \c ParenStates for state comparison. 443 /// 444 /// In long and deeply nested unwrapped lines, the current algorithm can 445 /// be insufficient for finding the best formatting with a reasonable amount 446 /// of time and memory. Setting this flag will effectively lead to the 447 /// algorithm not analyzing some combinations. However, these combinations 448 /// rarely contain the optimal solution: In short, accepting a higher 449 /// penalty early would need to lead to different values in the \c 450 /// ParenState stack (in an otherwise identical state) and these different 451 /// values would need to lead to a significant amount of avoided penalty 452 /// later. 453 /// 454 /// FIXME: Come up with a better algorithm instead. 455 bool IgnoreStackForComparison; 456 457 /// The indent of the first token. 458 unsigned FirstIndent; 459 460 /// The line that is being formatted. 461 /// 462 /// Does not need to be considered for memoization because it doesn't change. 463 const AnnotatedLine *Line; 464 465 /// Comparison operator to be able to used \c LineState in \c map. 466 bool operator<(const LineState &Other) const { 467 if (NextToken != Other.NextToken) 468 return NextToken < Other.NextToken; 469 if (Column != Other.Column) 470 return Column < Other.Column; 471 if (LineContainsContinuedForLoopSection != 472 Other.LineContainsContinuedForLoopSection) 473 return LineContainsContinuedForLoopSection; 474 if (NoContinuation != Other.NoContinuation) 475 return NoContinuation; 476 if (StartOfLineLevel != Other.StartOfLineLevel) 477 return StartOfLineLevel < Other.StartOfLineLevel; 478 if (LowestLevelOnLine != Other.LowestLevelOnLine) 479 return LowestLevelOnLine < Other.LowestLevelOnLine; 480 if (StartOfStringLiteral != Other.StartOfStringLiteral) 481 return StartOfStringLiteral < Other.StartOfStringLiteral; 482 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 483 return false; 484 return Stack < Other.Stack; 485 } 486 }; 487 488 } // end namespace format 489 } // end namespace clang 490 491 #endif 492