1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements an indenter that manages the indentation of 11 /// continuations. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 17 18 #include "Encoding.h" 19 #include "FormatToken.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/Support/Regex.h" 22 #include <map> 23 #include <tuple> 24 25 namespace clang { 26 class SourceManager; 27 28 namespace format { 29 30 class AnnotatedLine; 31 class BreakableToken; 32 struct FormatToken; 33 struct LineState; 34 struct ParenState; 35 struct RawStringFormatStyleManager; 36 class WhitespaceManager; 37 38 struct RawStringFormatStyleManager { 39 llvm::StringMap<FormatStyle> DelimiterStyle; 40 llvm::StringMap<FormatStyle> EnclosingFunctionStyle; 41 42 RawStringFormatStyleManager(const FormatStyle &CodeStyle); 43 44 llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; 45 46 llvm::Optional<FormatStyle> 47 getEnclosingFunctionStyle(StringRef EnclosingFunction) const; 48 }; 49 50 class ContinuationIndenter { 51 public: 52 /// Constructs a \c ContinuationIndenter to format \p Line starting in 53 /// column \p FirstIndent. 54 ContinuationIndenter(const FormatStyle &Style, 55 const AdditionalKeywords &Keywords, 56 const SourceManager &SourceMgr, 57 WhitespaceManager &Whitespaces, 58 encoding::Encoding Encoding, 59 bool BinPackInconclusiveFunctions); 60 61 /// Get the initial state, i.e. the state after placing \p Line's 62 /// first token at \p FirstIndent. When reformatting a fragment of code, as in 63 /// the case of formatting inside raw string literals, \p FirstStartColumn is 64 /// the column at which the state of the parent formatter is. 65 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, 66 const AnnotatedLine *Line, bool DryRun); 67 68 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 69 // better home. 70 /// Returns \c true, if a line break after \p State is allowed. 71 bool canBreak(const LineState &State); 72 73 /// Returns \c true, if a line break after \p State is mandatory. 74 bool mustBreak(const LineState &State); 75 76 /// Appends the next token to \p State and updates information 77 /// necessary for indentation. 78 /// 79 /// Puts the token on the current line if \p Newline is \c false and adds a 80 /// line break and necessary indentation otherwise. 81 /// 82 /// If \p DryRun is \c false, also creates and stores the required 83 /// \c Replacement. 84 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 85 unsigned ExtraSpaces = 0); 86 87 /// Get the column limit for this line. This is the style's column 88 /// limit, potentially reduced for preprocessor definitions. 89 unsigned getColumnLimit(const LineState &State) const; 90 91 private: 92 /// Mark the next token as consumed in \p State and modify its stacks 93 /// accordingly. 94 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 95 96 /// Update 'State' according to the next token's fake left parentheses. 97 void moveStatePastFakeLParens(LineState &State, bool Newline); 98 /// Update 'State' according to the next token's fake r_parens. 99 void moveStatePastFakeRParens(LineState &State); 100 101 /// Update 'State' according to the next token being one of "(<{[". 102 void moveStatePastScopeOpener(LineState &State, bool Newline); 103 /// Update 'State' according to the next token being one of ")>}]". 104 void moveStatePastScopeCloser(LineState &State); 105 /// Update 'State' with the next token opening a nested block. 106 void moveStateToNewBlock(LineState &State); 107 108 /// Reformats a raw string literal. 109 /// 110 /// \returns An extra penalty induced by reformatting the token. 111 unsigned reformatRawStringLiteral(const FormatToken &Current, 112 LineState &State, 113 const FormatStyle &RawStringStyle, 114 bool DryRun, bool Newline); 115 116 /// If the current token is at the end of the current line, handle 117 /// the transition to the next line. 118 unsigned handleEndOfLine(const FormatToken &Current, LineState &State, 119 bool DryRun, bool AllowBreak, bool Newline); 120 121 /// If \p Current is a raw string that is configured to be reformatted, 122 /// return the style to be used. 123 llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current, 124 const LineState &State); 125 126 /// If the current token sticks out over the end of the line, break 127 /// it if possible. 128 /// 129 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty 130 /// when tokens are broken or lines exceed the column limit, and exceeded 131 /// indicates whether the algorithm purposefully left lines exceeding the 132 /// column limit. 133 /// 134 /// The returned penalty will cover the cost of the additional line breaks 135 /// and column limit violation in all lines except for the last one. The 136 /// penalty for the column limit violation in the last line (and in single 137 /// line tokens) is handled in \c addNextStateToQueue. 138 /// 139 /// \p Strict indicates whether reflowing is allowed to leave characters 140 /// protruding the column limit; if true, lines will be split strictly within 141 /// the column limit where possible; if false, words are allowed to protrude 142 /// over the column limit as long as the penalty is less than the penalty 143 /// of a break. 144 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, 145 LineState &State, 146 bool AllowBreak, bool DryRun, 147 bool Strict); 148 149 /// Returns the \c BreakableToken starting at \p Current, or nullptr 150 /// if the current token cannot be broken. 151 std::unique_ptr<BreakableToken> 152 createBreakableToken(const FormatToken &Current, LineState &State, 153 bool AllowBreak); 154 155 /// Appends the next token to \p State and updates information 156 /// necessary for indentation. 157 /// 158 /// Puts the token on the current line. 159 /// 160 /// If \p DryRun is \c false, also creates and stores the required 161 /// \c Replacement. 162 void addTokenOnCurrentLine(LineState &State, bool DryRun, 163 unsigned ExtraSpaces); 164 165 /// Appends the next token to \p State and updates information 166 /// necessary for indentation. 167 /// 168 /// Adds a line break and necessary indentation. 169 /// 170 /// If \p DryRun is \c false, also creates and stores the required 171 /// \c Replacement. 172 unsigned addTokenOnNewLine(LineState &State, bool DryRun); 173 174 /// Calculate the new column for a line wrap before the next token. 175 unsigned getNewLineColumn(const LineState &State); 176 177 /// Adds a multiline token to the \p State. 178 /// 179 /// \returns Extra penalty for the first line of the literal: last line is 180 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 181 /// matter, as we don't change them. 182 unsigned addMultilineToken(const FormatToken &Current, LineState &State); 183 184 /// Returns \c true if the next token starts a multiline string 185 /// literal. 186 /// 187 /// This includes implicitly concatenated strings, strings that will be broken 188 /// by clang-format and string literals with escaped newlines. 189 bool nextIsMultilineString(const LineState &State); 190 191 FormatStyle Style; 192 const AdditionalKeywords &Keywords; 193 const SourceManager &SourceMgr; 194 WhitespaceManager &Whitespaces; 195 encoding::Encoding Encoding; 196 bool BinPackInconclusiveFunctions; 197 llvm::Regex CommentPragmasRegex; 198 const RawStringFormatStyleManager RawStringFormats; 199 }; 200 201 struct ParenState { 202 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, 203 bool AvoidBinPacking, bool NoLineBreak) 204 : Tok(Tok), Indent(Indent), LastSpace(LastSpace), 205 NestedBlockIndent(Indent), BreakBeforeClosingBrace(false), 206 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 207 NoLineBreak(NoLineBreak), NoLineBreakInOperand(false), 208 LastOperatorWrapped(true), ContainsLineBreak(false), 209 ContainsUnwrappedBuilder(false), AlignColons(true), 210 ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), 211 NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {} 212 213 /// \brief The token opening this parenthesis level, or nullptr if this level 214 /// is opened by fake parenthesis. 215 /// 216 /// Not considered for memoization as it will always have the same value at 217 /// the same token. 218 const FormatToken *Tok; 219 220 /// The position to which a specific parenthesis level needs to be 221 /// indented. 222 unsigned Indent; 223 224 /// The position of the last space on each level. 225 /// 226 /// Used e.g. to break like: 227 /// functionCall(Parameter, otherCall( 228 /// OtherParameter)); 229 unsigned LastSpace; 230 231 /// If a block relative to this parenthesis level gets wrapped, indent 232 /// it this much. 233 unsigned NestedBlockIndent; 234 235 /// The position the first "<<" operator encountered on each level. 236 /// 237 /// Used to align "<<" operators. 0 if no such operator has been encountered 238 /// on a level. 239 unsigned FirstLessLess = 0; 240 241 /// The column of a \c ? in a conditional expression; 242 unsigned QuestionColumn = 0; 243 244 /// The position of the colon in an ObjC method declaration/call. 245 unsigned ColonPos = 0; 246 247 /// The start of the most recent function in a builder-type call. 248 unsigned StartOfFunctionCall = 0; 249 250 /// Contains the start of array subscript expressions, so that they 251 /// can be aligned. 252 unsigned StartOfArraySubscripts = 0; 253 254 /// If a nested name specifier was broken over multiple lines, this 255 /// contains the start column of the second line. Otherwise 0. 256 unsigned NestedNameSpecifierContinuation = 0; 257 258 /// If a call expression was broken over multiple lines, this 259 /// contains the start column of the second line. Otherwise 0. 260 unsigned CallContinuation = 0; 261 262 /// The column of the first variable name in a variable declaration. 263 /// 264 /// Used to align further variables if necessary. 265 unsigned VariablePos = 0; 266 267 /// Whether a newline needs to be inserted before the block's closing 268 /// brace. 269 /// 270 /// We only want to insert a newline before the closing brace if there also 271 /// was a newline after the beginning left brace. 272 bool BreakBeforeClosingBrace : 1; 273 274 /// Avoid bin packing, i.e. multiple parameters/elements on multiple 275 /// lines, in this context. 276 bool AvoidBinPacking : 1; 277 278 /// Break after the next comma (or all the commas in this context if 279 /// \c AvoidBinPacking is \c true). 280 bool BreakBeforeParameter : 1; 281 282 /// Line breaking in this context would break a formatting rule. 283 bool NoLineBreak : 1; 284 285 /// Same as \c NoLineBreak, but is restricted until the end of the 286 /// operand (including the next ","). 287 bool NoLineBreakInOperand : 1; 288 289 /// True if the last binary operator on this level was wrapped to the 290 /// next line. 291 bool LastOperatorWrapped : 1; 292 293 /// \c true if this \c ParenState already contains a line-break. 294 /// 295 /// The first line break in a certain \c ParenState causes extra penalty so 296 /// that clang-format prefers similar breaks, i.e. breaks in the same 297 /// parenthesis. 298 bool ContainsLineBreak : 1; 299 300 /// \c true if this \c ParenState contains multiple segments of a 301 /// builder-type call on one line. 302 bool ContainsUnwrappedBuilder : 1; 303 304 /// \c true if the colons of the curren ObjC method expression should 305 /// be aligned. 306 /// 307 /// Not considered for memoization as it will always have the same value at 308 /// the same token. 309 bool AlignColons : 1; 310 311 /// \c true if at least one selector name was found in the current 312 /// ObjC method expression. 313 /// 314 /// Not considered for memoization as it will always have the same value at 315 /// the same token. 316 bool ObjCSelectorNameFound : 1; 317 318 /// \c true if there are multiple nested blocks inside these parens. 319 /// 320 /// Not considered for memoization as it will always have the same value at 321 /// the same token. 322 bool HasMultipleNestedBlocks : 1; 323 324 /// The start of a nested block (e.g. lambda introducer in C++ or 325 /// "function" in JavaScript) is not wrapped to a new line. 326 bool NestedBlockInlined : 1; 327 328 /// \c true if the current \c ParenState represents an Objective-C 329 /// array literal. 330 bool IsInsideObjCArrayLiteral : 1; 331 332 bool operator<(const ParenState &Other) const { 333 if (Indent != Other.Indent) 334 return Indent < Other.Indent; 335 if (LastSpace != Other.LastSpace) 336 return LastSpace < Other.LastSpace; 337 if (NestedBlockIndent != Other.NestedBlockIndent) 338 return NestedBlockIndent < Other.NestedBlockIndent; 339 if (FirstLessLess != Other.FirstLessLess) 340 return FirstLessLess < Other.FirstLessLess; 341 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 342 return BreakBeforeClosingBrace; 343 if (QuestionColumn != Other.QuestionColumn) 344 return QuestionColumn < Other.QuestionColumn; 345 if (AvoidBinPacking != Other.AvoidBinPacking) 346 return AvoidBinPacking; 347 if (BreakBeforeParameter != Other.BreakBeforeParameter) 348 return BreakBeforeParameter; 349 if (NoLineBreak != Other.NoLineBreak) 350 return NoLineBreak; 351 if (LastOperatorWrapped != Other.LastOperatorWrapped) 352 return LastOperatorWrapped; 353 if (ColonPos != Other.ColonPos) 354 return ColonPos < Other.ColonPos; 355 if (StartOfFunctionCall != Other.StartOfFunctionCall) 356 return StartOfFunctionCall < Other.StartOfFunctionCall; 357 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 358 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 359 if (CallContinuation != Other.CallContinuation) 360 return CallContinuation < Other.CallContinuation; 361 if (VariablePos != Other.VariablePos) 362 return VariablePos < Other.VariablePos; 363 if (ContainsLineBreak != Other.ContainsLineBreak) 364 return ContainsLineBreak; 365 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 366 return ContainsUnwrappedBuilder; 367 if (NestedBlockInlined != Other.NestedBlockInlined) 368 return NestedBlockInlined; 369 return false; 370 } 371 }; 372 373 /// The current state when indenting a unwrapped line. 374 /// 375 /// As the indenting tries different combinations this is copied by value. 376 struct LineState { 377 /// The number of used columns in the current line. 378 unsigned Column; 379 380 /// The token that needs to be next formatted. 381 FormatToken *NextToken; 382 383 /// \c true if this line contains a continued for-loop section. 384 bool LineContainsContinuedForLoopSection; 385 386 /// \c true if \p NextToken should not continue this line. 387 bool NoContinuation; 388 389 /// The \c NestingLevel at the start of this line. 390 unsigned StartOfLineLevel; 391 392 /// The lowest \c NestingLevel on the current line. 393 unsigned LowestLevelOnLine; 394 395 /// The start column of the string literal, if we're in a string 396 /// literal sequence, 0 otherwise. 397 unsigned StartOfStringLiteral; 398 399 /// A stack keeping track of properties applying to parenthesis 400 /// levels. 401 std::vector<ParenState> Stack; 402 403 /// Ignore the stack of \c ParenStates for state comparison. 404 /// 405 /// In long and deeply nested unwrapped lines, the current algorithm can 406 /// be insufficient for finding the best formatting with a reasonable amount 407 /// of time and memory. Setting this flag will effectively lead to the 408 /// algorithm not analyzing some combinations. However, these combinations 409 /// rarely contain the optimal solution: In short, accepting a higher 410 /// penalty early would need to lead to different values in the \c 411 /// ParenState stack (in an otherwise identical state) and these different 412 /// values would need to lead to a significant amount of avoided penalty 413 /// later. 414 /// 415 /// FIXME: Come up with a better algorithm instead. 416 bool IgnoreStackForComparison; 417 418 /// The indent of the first token. 419 unsigned FirstIndent; 420 421 /// The line that is being formatted. 422 /// 423 /// Does not need to be considered for memoization because it doesn't change. 424 const AnnotatedLine *Line; 425 426 /// Comparison operator to be able to used \c LineState in \c map. 427 bool operator<(const LineState &Other) const { 428 if (NextToken != Other.NextToken) 429 return NextToken < Other.NextToken; 430 if (Column != Other.Column) 431 return Column < Other.Column; 432 if (LineContainsContinuedForLoopSection != 433 Other.LineContainsContinuedForLoopSection) 434 return LineContainsContinuedForLoopSection; 435 if (NoContinuation != Other.NoContinuation) 436 return NoContinuation; 437 if (StartOfLineLevel != Other.StartOfLineLevel) 438 return StartOfLineLevel < Other.StartOfLineLevel; 439 if (LowestLevelOnLine != Other.LowestLevelOnLine) 440 return LowestLevelOnLine < Other.LowestLevelOnLine; 441 if (StartOfStringLiteral != Other.StartOfStringLiteral) 442 return StartOfStringLiteral < Other.StartOfStringLiteral; 443 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 444 return false; 445 return Stack < Other.Stack; 446 } 447 }; 448 449 } // end namespace format 450 } // end namespace clang 451 452 #endif 453