1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements an indenter that manages the indentation of 11 /// continuations. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 17 18 #include "Encoding.h" 19 #include "FormatToken.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/Support/Regex.h" 22 #include <map> 23 #include <tuple> 24 25 namespace clang { 26 class SourceManager; 27 28 namespace format { 29 30 class AnnotatedLine; 31 class BreakableToken; 32 struct FormatToken; 33 struct LineState; 34 struct ParenState; 35 struct RawStringFormatStyleManager; 36 class WhitespaceManager; 37 38 struct RawStringFormatStyleManager { 39 llvm::StringMap<FormatStyle> DelimiterStyle; 40 llvm::StringMap<FormatStyle> EnclosingFunctionStyle; 41 42 RawStringFormatStyleManager(const FormatStyle &CodeStyle); 43 44 llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; 45 46 llvm::Optional<FormatStyle> 47 getEnclosingFunctionStyle(StringRef EnclosingFunction) const; 48 }; 49 50 class ContinuationIndenter { 51 public: 52 /// Constructs a \c ContinuationIndenter to format \p Line starting in 53 /// column \p FirstIndent. 54 ContinuationIndenter(const FormatStyle &Style, 55 const AdditionalKeywords &Keywords, 56 const SourceManager &SourceMgr, 57 WhitespaceManager &Whitespaces, 58 encoding::Encoding Encoding, 59 bool BinPackInconclusiveFunctions); 60 61 /// Get the initial state, i.e. the state after placing \p Line's 62 /// first token at \p FirstIndent. When reformatting a fragment of code, as in 63 /// the case of formatting inside raw string literals, \p FirstStartColumn is 64 /// the column at which the state of the parent formatter is. 65 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, 66 const AnnotatedLine *Line, bool DryRun); 67 68 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 69 // better home. 70 /// Returns \c true, if a line break after \p State is allowed. 71 bool canBreak(const LineState &State); 72 73 /// Returns \c true, if a line break after \p State is mandatory. 74 bool mustBreak(const LineState &State); 75 76 /// Appends the next token to \p State and updates information 77 /// necessary for indentation. 78 /// 79 /// Puts the token on the current line if \p Newline is \c false and adds a 80 /// line break and necessary indentation otherwise. 81 /// 82 /// If \p DryRun is \c false, also creates and stores the required 83 /// \c Replacement. 84 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 85 unsigned ExtraSpaces = 0); 86 87 /// Get the column limit for this line. This is the style's column 88 /// limit, potentially reduced for preprocessor definitions. 89 unsigned getColumnLimit(const LineState &State) const; 90 91 private: 92 /// Mark the next token as consumed in \p State and modify its stacks 93 /// accordingly. 94 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 95 96 /// Update 'State' according to the next token's fake left parentheses. 97 void moveStatePastFakeLParens(LineState &State, bool Newline); 98 /// Update 'State' according to the next token's fake r_parens. 99 void moveStatePastFakeRParens(LineState &State); 100 101 /// Update 'State' according to the next token being one of "(<{[". 102 void moveStatePastScopeOpener(LineState &State, bool Newline); 103 /// Update 'State' according to the next token being one of ")>}]". 104 void moveStatePastScopeCloser(LineState &State); 105 /// Update 'State' with the next token opening a nested block. 106 void moveStateToNewBlock(LineState &State); 107 108 /// Reformats a raw string literal. 109 /// 110 /// \returns An extra penalty induced by reformatting the token. 111 unsigned reformatRawStringLiteral(const FormatToken &Current, 112 LineState &State, 113 const FormatStyle &RawStringStyle, 114 bool DryRun, bool Newline); 115 116 /// If the current token is at the end of the current line, handle 117 /// the transition to the next line. 118 unsigned handleEndOfLine(const FormatToken &Current, LineState &State, 119 bool DryRun, bool AllowBreak, bool Newline); 120 121 /// If \p Current is a raw string that is configured to be reformatted, 122 /// return the style to be used. 123 llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current, 124 const LineState &State); 125 126 /// If the current token sticks out over the end of the line, break 127 /// it if possible. 128 /// 129 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty 130 /// when tokens are broken or lines exceed the column limit, and exceeded 131 /// indicates whether the algorithm purposefully left lines exceeding the 132 /// column limit. 133 /// 134 /// The returned penalty will cover the cost of the additional line breaks 135 /// and column limit violation in all lines except for the last one. The 136 /// penalty for the column limit violation in the last line (and in single 137 /// line tokens) is handled in \c addNextStateToQueue. 138 /// 139 /// \p Strict indicates whether reflowing is allowed to leave characters 140 /// protruding the column limit; if true, lines will be split strictly within 141 /// the column limit where possible; if false, words are allowed to protrude 142 /// over the column limit as long as the penalty is less than the penalty 143 /// of a break. 144 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, 145 LineState &State, 146 bool AllowBreak, bool DryRun, 147 bool Strict); 148 149 /// Returns the \c BreakableToken starting at \p Current, or nullptr 150 /// if the current token cannot be broken. 151 std::unique_ptr<BreakableToken> 152 createBreakableToken(const FormatToken &Current, LineState &State, 153 bool AllowBreak); 154 155 /// Appends the next token to \p State and updates information 156 /// necessary for indentation. 157 /// 158 /// Puts the token on the current line. 159 /// 160 /// If \p DryRun is \c false, also creates and stores the required 161 /// \c Replacement. 162 void addTokenOnCurrentLine(LineState &State, bool DryRun, 163 unsigned ExtraSpaces); 164 165 /// Appends the next token to \p State and updates information 166 /// necessary for indentation. 167 /// 168 /// Adds a line break and necessary indentation. 169 /// 170 /// If \p DryRun is \c false, also creates and stores the required 171 /// \c Replacement. 172 unsigned addTokenOnNewLine(LineState &State, bool DryRun); 173 174 /// Calculate the new column for a line wrap before the next token. 175 unsigned getNewLineColumn(const LineState &State); 176 177 /// Adds a multiline token to the \p State. 178 /// 179 /// \returns Extra penalty for the first line of the literal: last line is 180 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 181 /// matter, as we don't change them. 182 unsigned addMultilineToken(const FormatToken &Current, LineState &State); 183 184 /// Returns \c true if the next token starts a multiline string 185 /// literal. 186 /// 187 /// This includes implicitly concatenated strings, strings that will be broken 188 /// by clang-format and string literals with escaped newlines. 189 bool nextIsMultilineString(const LineState &State); 190 191 FormatStyle Style; 192 const AdditionalKeywords &Keywords; 193 const SourceManager &SourceMgr; 194 WhitespaceManager &Whitespaces; 195 encoding::Encoding Encoding; 196 bool BinPackInconclusiveFunctions; 197 llvm::Regex CommentPragmasRegex; 198 const RawStringFormatStyleManager RawStringFormats; 199 }; 200 201 struct ParenState { 202 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, 203 bool AvoidBinPacking, bool NoLineBreak) 204 : Tok(Tok), Indent(Indent), LastSpace(LastSpace), 205 NestedBlockIndent(Indent), IsAligned(false), 206 BreakBeforeClosingBrace(false), AvoidBinPacking(AvoidBinPacking), 207 BreakBeforeParameter(false), NoLineBreak(NoLineBreak), 208 NoLineBreakInOperand(false), LastOperatorWrapped(true), 209 ContainsLineBreak(false), ContainsUnwrappedBuilder(false), 210 AlignColons(true), ObjCSelectorNameFound(false), 211 HasMultipleNestedBlocks(false), NestedBlockInlined(false), 212 IsInsideObjCArrayLiteral(false), IsCSharpGenericTypeConstraint(false), 213 IsChainedConditional(false), IsWrappedConditional(false), 214 UnindentOperator(false) {} 215 216 /// \brief The token opening this parenthesis level, or nullptr if this level 217 /// is opened by fake parenthesis. 218 /// 219 /// Not considered for memoization as it will always have the same value at 220 /// the same token. 221 const FormatToken *Tok; 222 223 /// The position to which a specific parenthesis level needs to be 224 /// indented. 225 unsigned Indent; 226 227 /// The position of the last space on each level. 228 /// 229 /// Used e.g. to break like: 230 /// functionCall(Parameter, otherCall( 231 /// OtherParameter)); 232 unsigned LastSpace; 233 234 /// If a block relative to this parenthesis level gets wrapped, indent 235 /// it this much. 236 unsigned NestedBlockIndent; 237 238 /// The position the first "<<" operator encountered on each level. 239 /// 240 /// Used to align "<<" operators. 0 if no such operator has been encountered 241 /// on a level. 242 unsigned FirstLessLess = 0; 243 244 /// The column of a \c ? in a conditional expression; 245 unsigned QuestionColumn = 0; 246 247 /// The position of the colon in an ObjC method declaration/call. 248 unsigned ColonPos = 0; 249 250 /// The start of the most recent function in a builder-type call. 251 unsigned StartOfFunctionCall = 0; 252 253 /// Contains the start of array subscript expressions, so that they 254 /// can be aligned. 255 unsigned StartOfArraySubscripts = 0; 256 257 /// If a nested name specifier was broken over multiple lines, this 258 /// contains the start column of the second line. Otherwise 0. 259 unsigned NestedNameSpecifierContinuation = 0; 260 261 /// If a call expression was broken over multiple lines, this 262 /// contains the start column of the second line. Otherwise 0. 263 unsigned CallContinuation = 0; 264 265 /// The column of the first variable name in a variable declaration. 266 /// 267 /// Used to align further variables if necessary. 268 unsigned VariablePos = 0; 269 270 /// Whether this block's indentation is used for alignment. 271 bool IsAligned : 1; 272 273 /// Whether a newline needs to be inserted before the block's closing 274 /// brace. 275 /// 276 /// We only want to insert a newline before the closing brace if there also 277 /// was a newline after the beginning left brace. 278 bool BreakBeforeClosingBrace : 1; 279 280 /// Avoid bin packing, i.e. multiple parameters/elements on multiple 281 /// lines, in this context. 282 bool AvoidBinPacking : 1; 283 284 /// Break after the next comma (or all the commas in this context if 285 /// \c AvoidBinPacking is \c true). 286 bool BreakBeforeParameter : 1; 287 288 /// Line breaking in this context would break a formatting rule. 289 bool NoLineBreak : 1; 290 291 /// Same as \c NoLineBreak, but is restricted until the end of the 292 /// operand (including the next ","). 293 bool NoLineBreakInOperand : 1; 294 295 /// True if the last binary operator on this level was wrapped to the 296 /// next line. 297 bool LastOperatorWrapped : 1; 298 299 /// \c true if this \c ParenState already contains a line-break. 300 /// 301 /// The first line break in a certain \c ParenState causes extra penalty so 302 /// that clang-format prefers similar breaks, i.e. breaks in the same 303 /// parenthesis. 304 bool ContainsLineBreak : 1; 305 306 /// \c true if this \c ParenState contains multiple segments of a 307 /// builder-type call on one line. 308 bool ContainsUnwrappedBuilder : 1; 309 310 /// \c true if the colons of the curren ObjC method expression should 311 /// be aligned. 312 /// 313 /// Not considered for memoization as it will always have the same value at 314 /// the same token. 315 bool AlignColons : 1; 316 317 /// \c true if at least one selector name was found in the current 318 /// ObjC method expression. 319 /// 320 /// Not considered for memoization as it will always have the same value at 321 /// the same token. 322 bool ObjCSelectorNameFound : 1; 323 324 /// \c true if there are multiple nested blocks inside these parens. 325 /// 326 /// Not considered for memoization as it will always have the same value at 327 /// the same token. 328 bool HasMultipleNestedBlocks : 1; 329 330 /// The start of a nested block (e.g. lambda introducer in C++ or 331 /// "function" in JavaScript) is not wrapped to a new line. 332 bool NestedBlockInlined : 1; 333 334 /// \c true if the current \c ParenState represents an Objective-C 335 /// array literal. 336 bool IsInsideObjCArrayLiteral : 1; 337 338 bool IsCSharpGenericTypeConstraint : 1; 339 340 /// \brief true if the current \c ParenState represents the false branch of 341 /// a chained conditional expression (e.g. else-if) 342 bool IsChainedConditional : 1; 343 344 /// \brief true if there conditionnal was wrapped on the first operator (the 345 /// question mark) 346 bool IsWrappedConditional : 1; 347 348 /// \brief Indicates the indent should be reduced by the length of the 349 /// operator. 350 bool UnindentOperator : 1; 351 352 bool operator<(const ParenState &Other) const { 353 if (Indent != Other.Indent) 354 return Indent < Other.Indent; 355 if (LastSpace != Other.LastSpace) 356 return LastSpace < Other.LastSpace; 357 if (NestedBlockIndent != Other.NestedBlockIndent) 358 return NestedBlockIndent < Other.NestedBlockIndent; 359 if (FirstLessLess != Other.FirstLessLess) 360 return FirstLessLess < Other.FirstLessLess; 361 if (IsAligned != Other.IsAligned) 362 return IsAligned; 363 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 364 return BreakBeforeClosingBrace; 365 if (QuestionColumn != Other.QuestionColumn) 366 return QuestionColumn < Other.QuestionColumn; 367 if (AvoidBinPacking != Other.AvoidBinPacking) 368 return AvoidBinPacking; 369 if (BreakBeforeParameter != Other.BreakBeforeParameter) 370 return BreakBeforeParameter; 371 if (NoLineBreak != Other.NoLineBreak) 372 return NoLineBreak; 373 if (LastOperatorWrapped != Other.LastOperatorWrapped) 374 return LastOperatorWrapped; 375 if (ColonPos != Other.ColonPos) 376 return ColonPos < Other.ColonPos; 377 if (StartOfFunctionCall != Other.StartOfFunctionCall) 378 return StartOfFunctionCall < Other.StartOfFunctionCall; 379 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 380 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 381 if (CallContinuation != Other.CallContinuation) 382 return CallContinuation < Other.CallContinuation; 383 if (VariablePos != Other.VariablePos) 384 return VariablePos < Other.VariablePos; 385 if (ContainsLineBreak != Other.ContainsLineBreak) 386 return ContainsLineBreak; 387 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 388 return ContainsUnwrappedBuilder; 389 if (NestedBlockInlined != Other.NestedBlockInlined) 390 return NestedBlockInlined; 391 if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint) 392 return IsCSharpGenericTypeConstraint; 393 if (IsChainedConditional != Other.IsChainedConditional) 394 return IsChainedConditional; 395 if (IsWrappedConditional != Other.IsWrappedConditional) 396 return IsWrappedConditional; 397 if (UnindentOperator != Other.UnindentOperator) 398 return UnindentOperator; 399 return false; 400 } 401 }; 402 403 /// The current state when indenting a unwrapped line. 404 /// 405 /// As the indenting tries different combinations this is copied by value. 406 struct LineState { 407 /// The number of used columns in the current line. 408 unsigned Column; 409 410 /// The token that needs to be next formatted. 411 FormatToken *NextToken; 412 413 /// \c true if this line contains a continued for-loop section. 414 bool LineContainsContinuedForLoopSection; 415 416 /// \c true if \p NextToken should not continue this line. 417 bool NoContinuation; 418 419 /// The \c NestingLevel at the start of this line. 420 unsigned StartOfLineLevel; 421 422 /// The lowest \c NestingLevel on the current line. 423 unsigned LowestLevelOnLine; 424 425 /// The start column of the string literal, if we're in a string 426 /// literal sequence, 0 otherwise. 427 unsigned StartOfStringLiteral; 428 429 /// A stack keeping track of properties applying to parenthesis 430 /// levels. 431 std::vector<ParenState> Stack; 432 433 /// Ignore the stack of \c ParenStates for state comparison. 434 /// 435 /// In long and deeply nested unwrapped lines, the current algorithm can 436 /// be insufficient for finding the best formatting with a reasonable amount 437 /// of time and memory. Setting this flag will effectively lead to the 438 /// algorithm not analyzing some combinations. However, these combinations 439 /// rarely contain the optimal solution: In short, accepting a higher 440 /// penalty early would need to lead to different values in the \c 441 /// ParenState stack (in an otherwise identical state) and these different 442 /// values would need to lead to a significant amount of avoided penalty 443 /// later. 444 /// 445 /// FIXME: Come up with a better algorithm instead. 446 bool IgnoreStackForComparison; 447 448 /// The indent of the first token. 449 unsigned FirstIndent; 450 451 /// The line that is being formatted. 452 /// 453 /// Does not need to be considered for memoization because it doesn't change. 454 const AnnotatedLine *Line; 455 456 /// Comparison operator to be able to used \c LineState in \c map. 457 bool operator<(const LineState &Other) const { 458 if (NextToken != Other.NextToken) 459 return NextToken < Other.NextToken; 460 if (Column != Other.Column) 461 return Column < Other.Column; 462 if (LineContainsContinuedForLoopSection != 463 Other.LineContainsContinuedForLoopSection) 464 return LineContainsContinuedForLoopSection; 465 if (NoContinuation != Other.NoContinuation) 466 return NoContinuation; 467 if (StartOfLineLevel != Other.StartOfLineLevel) 468 return StartOfLineLevel < Other.StartOfLineLevel; 469 if (LowestLevelOnLine != Other.LowestLevelOnLine) 470 return LowestLevelOnLine < Other.LowestLevelOnLine; 471 if (StartOfStringLiteral != Other.StartOfStringLiteral) 472 return StartOfStringLiteral < Other.StartOfStringLiteral; 473 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 474 return false; 475 return Stack < Other.Stack; 476 } 477 }; 478 479 } // end namespace format 480 } // end namespace clang 481 482 #endif 483