xref: /freebsd/contrib/llvm-project/clang/lib/Format/ContinuationIndenter.cpp (revision 6c4b055cfb6bf549e9145dde6454cc6b178c35e4)
1 //===--- ContinuationIndenter.cpp - Format C++ code -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the continuation indenter.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "ContinuationIndenter.h"
15 #include "BreakableToken.h"
16 #include "FormatInternal.h"
17 #include "FormatToken.h"
18 #include "WhitespaceManager.h"
19 #include "clang/Basic/OperatorPrecedence.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "clang/Format/Format.h"
23 #include "llvm/ADT/StringSet.h"
24 #include "llvm/Support/Debug.h"
25 #include <optional>
26 
27 #define DEBUG_TYPE "format-indenter"
28 
29 namespace clang {
30 namespace format {
31 
32 // Returns true if a TT_SelectorName should be indented when wrapped,
33 // false otherwise.
shouldIndentWrappedSelectorName(const FormatStyle & Style,LineType LineType)34 static bool shouldIndentWrappedSelectorName(const FormatStyle &Style,
35                                             LineType LineType) {
36   return Style.IndentWrappedFunctionNames || LineType == LT_ObjCMethodDecl;
37 }
38 
39 // Returns true if a binary operator following \p Tok should be unindented when
40 // the style permits it.
shouldUnindentNextOperator(const FormatToken & Tok)41 static bool shouldUnindentNextOperator(const FormatToken &Tok) {
42   const FormatToken *Previous = Tok.getPreviousNonComment();
43   return Previous && (Previous->getPrecedence() == prec::Assignment ||
44                       Previous->isOneOf(tok::kw_return, TT_RequiresClause));
45 }
46 
47 // Returns the length of everything up to the first possible line break after
48 // the ), ], } or > matching \c Tok.
getLengthToMatchingParen(const FormatToken & Tok,ArrayRef<ParenState> Stack)49 static unsigned getLengthToMatchingParen(const FormatToken &Tok,
50                                          ArrayRef<ParenState> Stack) {
51   // Normally whether or not a break before T is possible is calculated and
52   // stored in T.CanBreakBefore. Braces, array initializers and text proto
53   // messages like `key: < ... >` are an exception: a break is possible
54   // before a closing brace R if a break was inserted after the corresponding
55   // opening brace. The information about whether or not a break is needed
56   // before a closing brace R is stored in the ParenState field
57   // S.BreakBeforeClosingBrace where S is the state that R closes.
58   //
59   // In order to decide whether there can be a break before encountered right
60   // braces, this implementation iterates over the sequence of tokens and over
61   // the paren stack in lockstep, keeping track of the stack level which visited
62   // right braces correspond to in MatchingStackIndex.
63   //
64   // For example, consider:
65   // L. <- line number
66   // 1. {
67   // 2. {1},
68   // 3. {2},
69   // 4. {{3}}}
70   //     ^ where we call this method with this token.
71   // The paren stack at this point contains 3 brace levels:
72   //  0. { at line 1, BreakBeforeClosingBrace: true
73   //  1. first { at line 4, BreakBeforeClosingBrace: false
74   //  2. second { at line 4, BreakBeforeClosingBrace: false,
75   //  where there might be fake parens levels in-between these levels.
76   // The algorithm will start at the first } on line 4, which is the matching
77   // brace of the initial left brace and at level 2 of the stack. Then,
78   // examining BreakBeforeClosingBrace: false at level 2, it will continue to
79   // the second } on line 4, and will traverse the stack downwards until it
80   // finds the matching { on level 1. Then, examining BreakBeforeClosingBrace:
81   // false at level 1, it will continue to the third } on line 4 and will
82   // traverse the stack downwards until it finds the matching { on level 0.
83   // Then, examining BreakBeforeClosingBrace: true at level 0, the algorithm
84   // will stop and will use the second } on line 4 to determine the length to
85   // return, as in this example the range will include the tokens: {3}}
86   //
87   // The algorithm will only traverse the stack if it encounters braces, array
88   // initializer squares or text proto angle brackets.
89   if (!Tok.MatchingParen)
90     return 0;
91   FormatToken *End = Tok.MatchingParen;
92   // Maintains a stack level corresponding to the current End token.
93   int MatchingStackIndex = Stack.size() - 1;
94   // Traverses the stack downwards, looking for the level to which LBrace
95   // corresponds. Returns either a pointer to the matching level or nullptr if
96   // LParen is not found in the initial portion of the stack up to
97   // MatchingStackIndex.
98   auto FindParenState = [&](const FormatToken *LBrace) -> const ParenState * {
99     while (MatchingStackIndex >= 0 && Stack[MatchingStackIndex].Tok != LBrace)
100       --MatchingStackIndex;
101     return MatchingStackIndex >= 0 ? &Stack[MatchingStackIndex] : nullptr;
102   };
103   for (; End->Next; End = End->Next) {
104     if (End->Next->CanBreakBefore)
105       break;
106     if (!End->Next->closesScope())
107       continue;
108     if (End->Next->MatchingParen &&
109         End->Next->MatchingParen->isOneOf(
110             tok::l_brace, TT_ArrayInitializerLSquare, tok::less)) {
111       const ParenState *State = FindParenState(End->Next->MatchingParen);
112       if (State && State->BreakBeforeClosingBrace)
113         break;
114     }
115   }
116   return End->TotalLength - Tok.TotalLength + 1;
117 }
118 
getLengthToNextOperator(const FormatToken & Tok)119 static unsigned getLengthToNextOperator(const FormatToken &Tok) {
120   if (!Tok.NextOperator)
121     return 0;
122   return Tok.NextOperator->TotalLength - Tok.TotalLength;
123 }
124 
125 // Returns \c true if \c Tok is the "." or "->" of a call and starts the next
126 // segment of a builder type call.
startsSegmentOfBuilderTypeCall(const FormatToken & Tok)127 static bool startsSegmentOfBuilderTypeCall(const FormatToken &Tok) {
128   return Tok.isMemberAccess() && Tok.Previous && Tok.Previous->closesScope();
129 }
130 
131 // Returns \c true if \c Current starts a new parameter.
startsNextParameter(const FormatToken & Current,const FormatStyle & Style)132 static bool startsNextParameter(const FormatToken &Current,
133                                 const FormatStyle &Style) {
134   const FormatToken &Previous = *Current.Previous;
135   if (Current.is(TT_CtorInitializerComma) &&
136       Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
137     return true;
138   }
139   if (Style.Language == FormatStyle::LK_Proto && Current.is(TT_SelectorName))
140     return true;
141   return Previous.is(tok::comma) && !Current.isTrailingComment() &&
142          ((Previous.isNot(TT_CtorInitializerComma) ||
143            Style.BreakConstructorInitializers !=
144                FormatStyle::BCIS_BeforeComma) &&
145           (Previous.isNot(TT_InheritanceComma) ||
146            Style.BreakInheritanceList != FormatStyle::BILS_BeforeComma));
147 }
148 
opensProtoMessageField(const FormatToken & LessTok,const FormatStyle & Style)149 static bool opensProtoMessageField(const FormatToken &LessTok,
150                                    const FormatStyle &Style) {
151   if (LessTok.isNot(tok::less))
152     return false;
153   return Style.Language == FormatStyle::LK_TextProto ||
154          (Style.Language == FormatStyle::LK_Proto &&
155           (LessTok.NestingLevel > 0 ||
156            (LessTok.Previous && LessTok.Previous->is(tok::equal))));
157 }
158 
159 // Returns the delimiter of a raw string literal, or std::nullopt if TokenText
160 // is not the text of a raw string literal. The delimiter could be the empty
161 // string.  For example, the delimiter of R"deli(cont)deli" is deli.
getRawStringDelimiter(StringRef TokenText)162 static std::optional<StringRef> getRawStringDelimiter(StringRef TokenText) {
163   if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'.
164       || !TokenText.starts_with("R\"") || !TokenText.ends_with("\"")) {
165     return std::nullopt;
166   }
167 
168   // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has
169   // size at most 16 by the standard, so the first '(' must be among the first
170   // 19 bytes.
171   size_t LParenPos = TokenText.substr(0, 19).find_first_of('(');
172   if (LParenPos == StringRef::npos)
173     return std::nullopt;
174   StringRef Delimiter = TokenText.substr(2, LParenPos - 2);
175 
176   // Check that the string ends in ')Delimiter"'.
177   size_t RParenPos = TokenText.size() - Delimiter.size() - 2;
178   if (TokenText[RParenPos] != ')')
179     return std::nullopt;
180   if (!TokenText.substr(RParenPos + 1).starts_with(Delimiter))
181     return std::nullopt;
182   return Delimiter;
183 }
184 
185 // Returns the canonical delimiter for \p Language, or the empty string if no
186 // canonical delimiter is specified.
187 static StringRef
getCanonicalRawStringDelimiter(const FormatStyle & Style,FormatStyle::LanguageKind Language)188 getCanonicalRawStringDelimiter(const FormatStyle &Style,
189                                FormatStyle::LanguageKind Language) {
190   for (const auto &Format : Style.RawStringFormats)
191     if (Format.Language == Language)
192       return StringRef(Format.CanonicalDelimiter);
193   return "";
194 }
195 
RawStringFormatStyleManager(const FormatStyle & CodeStyle)196 RawStringFormatStyleManager::RawStringFormatStyleManager(
197     const FormatStyle &CodeStyle) {
198   for (const auto &RawStringFormat : CodeStyle.RawStringFormats) {
199     std::optional<FormatStyle> LanguageStyle =
200         CodeStyle.GetLanguageStyle(RawStringFormat.Language);
201     if (!LanguageStyle) {
202       FormatStyle PredefinedStyle;
203       if (!getPredefinedStyle(RawStringFormat.BasedOnStyle,
204                               RawStringFormat.Language, &PredefinedStyle)) {
205         PredefinedStyle = getLLVMStyle();
206         PredefinedStyle.Language = RawStringFormat.Language;
207       }
208       LanguageStyle = PredefinedStyle;
209     }
210     LanguageStyle->ColumnLimit = CodeStyle.ColumnLimit;
211     for (StringRef Delimiter : RawStringFormat.Delimiters)
212       DelimiterStyle.insert({Delimiter, *LanguageStyle});
213     for (StringRef EnclosingFunction : RawStringFormat.EnclosingFunctions)
214       EnclosingFunctionStyle.insert({EnclosingFunction, *LanguageStyle});
215   }
216 }
217 
218 std::optional<FormatStyle>
getDelimiterStyle(StringRef Delimiter) const219 RawStringFormatStyleManager::getDelimiterStyle(StringRef Delimiter) const {
220   auto It = DelimiterStyle.find(Delimiter);
221   if (It == DelimiterStyle.end())
222     return std::nullopt;
223   return It->second;
224 }
225 
226 std::optional<FormatStyle>
getEnclosingFunctionStyle(StringRef EnclosingFunction) const227 RawStringFormatStyleManager::getEnclosingFunctionStyle(
228     StringRef EnclosingFunction) const {
229   auto It = EnclosingFunctionStyle.find(EnclosingFunction);
230   if (It == EnclosingFunctionStyle.end())
231     return std::nullopt;
232   return It->second;
233 }
234 
ContinuationIndenter(const FormatStyle & Style,const AdditionalKeywords & Keywords,const SourceManager & SourceMgr,WhitespaceManager & Whitespaces,encoding::Encoding Encoding,bool BinPackInconclusiveFunctions)235 ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style,
236                                            const AdditionalKeywords &Keywords,
237                                            const SourceManager &SourceMgr,
238                                            WhitespaceManager &Whitespaces,
239                                            encoding::Encoding Encoding,
240                                            bool BinPackInconclusiveFunctions)
241     : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr),
242       Whitespaces(Whitespaces), Encoding(Encoding),
243       BinPackInconclusiveFunctions(BinPackInconclusiveFunctions),
244       CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {}
245 
getInitialState(unsigned FirstIndent,unsigned FirstStartColumn,const AnnotatedLine * Line,bool DryRun)246 LineState ContinuationIndenter::getInitialState(unsigned FirstIndent,
247                                                 unsigned FirstStartColumn,
248                                                 const AnnotatedLine *Line,
249                                                 bool DryRun) {
250   LineState State;
251   State.FirstIndent = FirstIndent;
252   if (FirstStartColumn && Line->First->NewlinesBefore == 0)
253     State.Column = FirstStartColumn;
254   else
255     State.Column = FirstIndent;
256   // With preprocessor directive indentation, the line starts on column 0
257   // since it's indented after the hash, but FirstIndent is set to the
258   // preprocessor indent.
259   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash &&
260       (Line->Type == LT_PreprocessorDirective ||
261        Line->Type == LT_ImportStatement)) {
262     State.Column = 0;
263   }
264   State.Line = Line;
265   State.NextToken = Line->First;
266   State.Stack.push_back(ParenState(/*Tok=*/nullptr, FirstIndent, FirstIndent,
267                                    /*AvoidBinPacking=*/false,
268                                    /*NoLineBreak=*/false));
269   State.NoContinuation = false;
270   State.StartOfStringLiteral = 0;
271   State.NoLineBreak = false;
272   State.StartOfLineLevel = 0;
273   State.LowestLevelOnLine = 0;
274   State.IgnoreStackForComparison = false;
275 
276   if (Style.Language == FormatStyle::LK_TextProto) {
277     // We need this in order to deal with the bin packing of text fields at
278     // global scope.
279     auto &CurrentState = State.Stack.back();
280     CurrentState.AvoidBinPacking = true;
281     CurrentState.BreakBeforeParameter = true;
282     CurrentState.AlignColons = false;
283   }
284 
285   // The first token has already been indented and thus consumed.
286   moveStateToNextToken(State, DryRun, /*Newline=*/false);
287   return State;
288 }
289 
canBreak(const LineState & State)290 bool ContinuationIndenter::canBreak(const LineState &State) {
291   const FormatToken &Current = *State.NextToken;
292   const FormatToken &Previous = *Current.Previous;
293   const auto &CurrentState = State.Stack.back();
294   assert(&Previous == Current.Previous);
295   if (!Current.CanBreakBefore && !(CurrentState.BreakBeforeClosingBrace &&
296                                    Current.closesBlockOrBlockTypeList(Style))) {
297     return false;
298   }
299   // The opening "{" of a braced list has to be on the same line as the first
300   // element if it is nested in another braced init list or function call.
301   if (!Current.MustBreakBefore && Previous.is(tok::l_brace) &&
302       Previous.isNot(TT_DictLiteral) && Previous.is(BK_BracedInit) &&
303       Previous.Previous &&
304       Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) {
305     return false;
306   }
307   // This prevents breaks like:
308   //   ...
309   //   SomeParameter, OtherParameter).DoSomething(
310   //   ...
311   // As they hide "DoSomething" and are generally bad for readability.
312   if (Previous.opensScope() && Previous.isNot(tok::l_brace) &&
313       State.LowestLevelOnLine < State.StartOfLineLevel &&
314       State.LowestLevelOnLine < Current.NestingLevel) {
315     return false;
316   }
317   if (Current.isMemberAccess() && CurrentState.ContainsUnwrappedBuilder)
318     return false;
319 
320   // Don't create a 'hanging' indent if there are multiple blocks in a single
321   // statement and we are aligning lambda blocks to their signatures.
322   if (Previous.is(tok::l_brace) && State.Stack.size() > 1 &&
323       State.Stack[State.Stack.size() - 2].NestedBlockInlined &&
324       State.Stack[State.Stack.size() - 2].HasMultipleNestedBlocks &&
325       Style.LambdaBodyIndentation == FormatStyle::LBI_Signature) {
326     return false;
327   }
328 
329   // Don't break after very short return types (e.g. "void") as that is often
330   // unexpected.
331   if (Current.is(TT_FunctionDeclarationName)) {
332     if (Style.BreakAfterReturnType == FormatStyle::RTBS_None &&
333         State.Column < 6) {
334       return false;
335     }
336 
337     if (Style.BreakAfterReturnType == FormatStyle::RTBS_ExceptShortType) {
338       assert(State.Column >= State.FirstIndent);
339       if (State.Column - State.FirstIndent < 6)
340         return false;
341     }
342   }
343 
344   // If binary operators are moved to the next line (including commas for some
345   // styles of constructor initializers), that's always ok.
346   if (!Current.isOneOf(TT_BinaryOperator, tok::comma) &&
347       // Allow breaking opening brace of lambdas (when passed as function
348       // arguments) to a new line when BeforeLambdaBody brace wrapping is
349       // enabled.
350       (!Style.BraceWrapping.BeforeLambdaBody ||
351        Current.isNot(TT_LambdaLBrace)) &&
352       CurrentState.NoLineBreakInOperand) {
353     return false;
354   }
355 
356   if (Previous.is(tok::l_square) && Previous.is(TT_ObjCMethodExpr))
357     return false;
358 
359   if (Current.is(TT_ConditionalExpr) && Previous.is(tok::r_paren) &&
360       Previous.MatchingParen && Previous.MatchingParen->Previous &&
361       Previous.MatchingParen->Previous->MatchingParen &&
362       Previous.MatchingParen->Previous->MatchingParen->is(TT_LambdaLBrace)) {
363     // We have a lambda within a conditional expression, allow breaking here.
364     assert(Previous.MatchingParen->Previous->is(tok::r_brace));
365     return true;
366   }
367 
368   return !State.NoLineBreak && !CurrentState.NoLineBreak;
369 }
370 
mustBreak(const LineState & State)371 bool ContinuationIndenter::mustBreak(const LineState &State) {
372   const FormatToken &Current = *State.NextToken;
373   const FormatToken &Previous = *Current.Previous;
374   const auto &CurrentState = State.Stack.back();
375   if (Style.BraceWrapping.BeforeLambdaBody && Current.CanBreakBefore &&
376       Current.is(TT_LambdaLBrace) && Previous.isNot(TT_LineComment)) {
377     auto LambdaBodyLength = getLengthToMatchingParen(Current, State.Stack);
378     return LambdaBodyLength > getColumnLimit(State);
379   }
380   if (Current.MustBreakBefore ||
381       (Current.is(TT_InlineASMColon) &&
382        (Style.BreakBeforeInlineASMColon == FormatStyle::BBIAS_Always ||
383         (Style.BreakBeforeInlineASMColon == FormatStyle::BBIAS_OnlyMultiline &&
384          Style.ColumnLimit > 0)))) {
385     return true;
386   }
387   if (CurrentState.BreakBeforeClosingBrace &&
388       (Current.closesBlockOrBlockTypeList(Style) ||
389        (Current.is(tok::r_brace) &&
390         Current.isBlockIndentedInitRBrace(Style)))) {
391     return true;
392   }
393   if (CurrentState.BreakBeforeClosingParen && Current.is(tok::r_paren))
394     return true;
395   if (Style.Language == FormatStyle::LK_ObjC &&
396       Style.ObjCBreakBeforeNestedBlockParam &&
397       Current.ObjCSelectorNameParts > 1 &&
398       Current.startsSequence(TT_SelectorName, tok::colon, tok::caret)) {
399     return true;
400   }
401   // Avoid producing inconsistent states by requiring breaks where they are not
402   // permitted for C# generic type constraints.
403   if (CurrentState.IsCSharpGenericTypeConstraint &&
404       Previous.isNot(TT_CSharpGenericTypeConstraintComma)) {
405     return false;
406   }
407   if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
408        (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) &&
409         State.Line->First->isNot(TT_AttributeSquare) && Style.isCpp() &&
410         // FIXME: This is a temporary workaround for the case where clang-format
411         // sets BreakBeforeParameter to avoid bin packing and this creates a
412         // completely unnecessary line break after a template type that isn't
413         // line-wrapped.
414         (Previous.NestingLevel == 1 || Style.BinPackParameters)) ||
415        (Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) &&
416         Previous.isNot(tok::question)) ||
417        (!Style.BreakBeforeTernaryOperators &&
418         Previous.is(TT_ConditionalExpr))) &&
419       CurrentState.BreakBeforeParameter && !Current.isTrailingComment() &&
420       !Current.isOneOf(tok::r_paren, tok::r_brace)) {
421     return true;
422   }
423   if (CurrentState.IsChainedConditional &&
424       ((Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) &&
425         Current.is(tok::colon)) ||
426        (!Style.BreakBeforeTernaryOperators && Previous.is(TT_ConditionalExpr) &&
427         Previous.is(tok::colon)))) {
428     return true;
429   }
430   if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) ||
431        (Previous.is(TT_ArrayInitializerLSquare) &&
432         Previous.ParameterCount > 1) ||
433        opensProtoMessageField(Previous, Style)) &&
434       Style.ColumnLimit > 0 &&
435       getLengthToMatchingParen(Previous, State.Stack) + State.Column - 1 >
436           getColumnLimit(State)) {
437     return true;
438   }
439 
440   const FormatToken &BreakConstructorInitializersToken =
441       Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon
442           ? Previous
443           : Current;
444   if (BreakConstructorInitializersToken.is(TT_CtorInitializerColon) &&
445       (State.Column + State.Line->Last->TotalLength - Previous.TotalLength >
446            getColumnLimit(State) ||
447        CurrentState.BreakBeforeParameter) &&
448       (!Current.isTrailingComment() || Current.NewlinesBefore > 0) &&
449       (Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All ||
450        Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon ||
451        Style.ColumnLimit != 0)) {
452     return true;
453   }
454 
455   if (Current.is(TT_ObjCMethodExpr) && Previous.isNot(TT_SelectorName) &&
456       State.Line->startsWith(TT_ObjCMethodSpecifier)) {
457     return true;
458   }
459   if (Current.is(TT_SelectorName) && Previous.isNot(tok::at) &&
460       CurrentState.ObjCSelectorNameFound && CurrentState.BreakBeforeParameter &&
461       (Style.ObjCBreakBeforeNestedBlockParam ||
462        !Current.startsSequence(TT_SelectorName, tok::colon, tok::caret))) {
463     return true;
464   }
465 
466   unsigned NewLineColumn = getNewLineColumn(State);
467   if (Current.isMemberAccess() && Style.ColumnLimit != 0 &&
468       State.Column + getLengthToNextOperator(Current) > Style.ColumnLimit &&
469       (State.Column > NewLineColumn ||
470        Current.NestingLevel < State.StartOfLineLevel)) {
471     return true;
472   }
473 
474   if (startsSegmentOfBuilderTypeCall(Current) &&
475       (CurrentState.CallContinuation != 0 ||
476        CurrentState.BreakBeforeParameter) &&
477       // JavaScript is treated different here as there is a frequent pattern:
478       //   SomeFunction(function() {
479       //     ...
480       //   }.bind(...));
481       // FIXME: We should find a more generic solution to this problem.
482       !(State.Column <= NewLineColumn && Style.isJavaScript()) &&
483       !(Previous.closesScopeAfterBlock() && State.Column <= NewLineColumn)) {
484     return true;
485   }
486 
487   // If the template declaration spans multiple lines, force wrap before the
488   // function/class declaration.
489   if (Previous.ClosesTemplateDeclaration && CurrentState.BreakBeforeParameter &&
490       Current.CanBreakBefore) {
491     return true;
492   }
493 
494   if (State.Line->First->isNot(tok::kw_enum) && State.Column <= NewLineColumn)
495     return false;
496 
497   if (Style.AlwaysBreakBeforeMultilineStrings &&
498       (NewLineColumn == State.FirstIndent + Style.ContinuationIndentWidth ||
499        Previous.is(tok::comma) || Current.NestingLevel < 2) &&
500       !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at,
501                         Keywords.kw_dollar) &&
502       !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) &&
503       nextIsMultilineString(State)) {
504     return true;
505   }
506 
507   // Using CanBreakBefore here and below takes care of the decision whether the
508   // current style uses wrapping before or after operators for the given
509   // operator.
510   if (Previous.is(TT_BinaryOperator) && Current.CanBreakBefore) {
511     const auto PreviousPrecedence = Previous.getPrecedence();
512     if (PreviousPrecedence != prec::Assignment &&
513         CurrentState.BreakBeforeParameter && !Current.isTrailingComment()) {
514       const bool LHSIsBinaryExpr =
515           Previous.Previous && Previous.Previous->EndsBinaryExpression;
516       if (LHSIsBinaryExpr)
517         return true;
518       // If we need to break somewhere inside the LHS of a binary expression, we
519       // should also break after the operator. Otherwise, the formatting would
520       // hide the operator precedence, e.g. in:
521       //   if (aaaaaaaaaaaaaa ==
522       //           bbbbbbbbbbbbbb && c) {..
523       // For comparisons, we only apply this rule, if the LHS is a binary
524       // expression itself as otherwise, the line breaks seem superfluous.
525       // We need special cases for ">>" which we have split into two ">" while
526       // lexing in order to make template parsing easier.
527       const bool IsComparison =
528           (PreviousPrecedence == prec::Relational ||
529            PreviousPrecedence == prec::Equality ||
530            PreviousPrecedence == prec::Spaceship) &&
531           Previous.Previous &&
532           Previous.Previous->isNot(TT_BinaryOperator); // For >>.
533       if (!IsComparison)
534         return true;
535     }
536   } else if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore &&
537              CurrentState.BreakBeforeParameter) {
538     return true;
539   }
540 
541   // Same as above, but for the first "<<" operator.
542   if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator) &&
543       CurrentState.BreakBeforeParameter && CurrentState.FirstLessLess == 0) {
544     return true;
545   }
546 
547   if (Current.NestingLevel == 0 && !Current.isTrailingComment()) {
548     // Always break after "template <...>"(*) and leading annotations. This is
549     // only for cases where the entire line does not fit on a single line as a
550     // different LineFormatter would be used otherwise.
551     // *: Except when another option interferes with that, like concepts.
552     if (Previous.ClosesTemplateDeclaration) {
553       if (Current.is(tok::kw_concept)) {
554         switch (Style.BreakBeforeConceptDeclarations) {
555         case FormatStyle::BBCDS_Allowed:
556           break;
557         case FormatStyle::BBCDS_Always:
558           return true;
559         case FormatStyle::BBCDS_Never:
560           return false;
561         }
562       }
563       if (Current.is(TT_RequiresClause)) {
564         switch (Style.RequiresClausePosition) {
565         case FormatStyle::RCPS_SingleLine:
566         case FormatStyle::RCPS_WithPreceding:
567           return false;
568         default:
569           return true;
570         }
571       }
572       return Style.BreakTemplateDeclarations != FormatStyle::BTDS_No &&
573              (Style.BreakTemplateDeclarations != FormatStyle::BTDS_Leave ||
574               Current.NewlinesBefore > 0);
575     }
576     if (Previous.is(TT_FunctionAnnotationRParen) &&
577         State.Line->Type != LT_PreprocessorDirective) {
578       return true;
579     }
580     if (Previous.is(TT_LeadingJavaAnnotation) && Current.isNot(tok::l_paren) &&
581         Current.isNot(TT_LeadingJavaAnnotation)) {
582       return true;
583     }
584   }
585 
586   if (Style.isJavaScript() && Previous.is(tok::r_paren) &&
587       Previous.is(TT_JavaAnnotation)) {
588     // Break after the closing parenthesis of TypeScript decorators before
589     // functions, getters and setters.
590     static const llvm::StringSet<> BreakBeforeDecoratedTokens = {"get", "set",
591                                                                  "function"};
592     if (BreakBeforeDecoratedTokens.contains(Current.TokenText))
593       return true;
594   }
595 
596   if (Current.is(TT_FunctionDeclarationName) &&
597       !State.Line->ReturnTypeWrapped &&
598       // Don't break before a C# function when no break after return type.
599       (!Style.isCSharp() ||
600        Style.BreakAfterReturnType > FormatStyle::RTBS_ExceptShortType) &&
601       // Don't always break between a JavaScript `function` and the function
602       // name.
603       !Style.isJavaScript() && Previous.isNot(tok::kw_template) &&
604       CurrentState.BreakBeforeParameter) {
605     return true;
606   }
607 
608   // The following could be precomputed as they do not depend on the state.
609   // However, as they should take effect only if the UnwrappedLine does not fit
610   // into the ColumnLimit, they are checked here in the ContinuationIndenter.
611   if (Style.ColumnLimit != 0 && Previous.is(BK_Block) &&
612       Previous.is(tok::l_brace) &&
613       !Current.isOneOf(tok::r_brace, tok::comment)) {
614     return true;
615   }
616 
617   if (Current.is(tok::lessless) &&
618       ((Previous.is(tok::identifier) && Previous.TokenText == "endl") ||
619        (Previous.Tok.isLiteral() && (Previous.TokenText.ends_with("\\n\"") ||
620                                      Previous.TokenText == "\'\\n\'")))) {
621     return true;
622   }
623 
624   if (Previous.is(TT_BlockComment) && Previous.IsMultiline)
625     return true;
626 
627   if (State.NoContinuation)
628     return true;
629 
630   return false;
631 }
632 
addTokenToState(LineState & State,bool Newline,bool DryRun,unsigned ExtraSpaces)633 unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
634                                                bool DryRun,
635                                                unsigned ExtraSpaces) {
636   const FormatToken &Current = *State.NextToken;
637   assert(State.NextToken->Previous);
638   const FormatToken &Previous = *State.NextToken->Previous;
639 
640   assert(!State.Stack.empty());
641   State.NoContinuation = false;
642 
643   if (Current.is(TT_ImplicitStringLiteral) &&
644       (!Previous.Tok.getIdentifierInfo() ||
645        Previous.Tok.getIdentifierInfo()->getPPKeywordID() ==
646            tok::pp_not_keyword)) {
647     unsigned EndColumn =
648         SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getEnd());
649     if (Current.LastNewlineOffset != 0) {
650       // If there is a newline within this token, the final column will solely
651       // determined by the current end column.
652       State.Column = EndColumn;
653     } else {
654       unsigned StartColumn =
655           SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getBegin());
656       assert(EndColumn >= StartColumn);
657       State.Column += EndColumn - StartColumn;
658     }
659     moveStateToNextToken(State, DryRun, /*Newline=*/false);
660     return 0;
661   }
662 
663   unsigned Penalty = 0;
664   if (Newline)
665     Penalty = addTokenOnNewLine(State, DryRun);
666   else
667     addTokenOnCurrentLine(State, DryRun, ExtraSpaces);
668 
669   return moveStateToNextToken(State, DryRun, Newline) + Penalty;
670 }
671 
addTokenOnCurrentLine(LineState & State,bool DryRun,unsigned ExtraSpaces)672 void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
673                                                  unsigned ExtraSpaces) {
674   FormatToken &Current = *State.NextToken;
675   assert(State.NextToken->Previous);
676   const FormatToken &Previous = *State.NextToken->Previous;
677   auto &CurrentState = State.Stack.back();
678 
679   bool DisallowLineBreaksOnThisLine =
680       Style.LambdaBodyIndentation == FormatStyle::LBI_Signature &&
681       Style.isCpp() && [&Current] {
682         // Deal with lambda arguments in C++. The aim here is to ensure that we
683         // don't over-indent lambda function bodies when lambdas are passed as
684         // arguments to function calls. We do this by ensuring that either all
685         // arguments (including any lambdas) go on the same line as the function
686         // call, or we break before the first argument.
687         const auto *Prev = Current.Previous;
688         if (!Prev)
689           return false;
690         // For example, `/*Newline=*/false`.
691         if (Prev->is(TT_BlockComment) && Current.SpacesRequiredBefore == 0)
692           return false;
693         const auto *PrevNonComment = Current.getPreviousNonComment();
694         if (!PrevNonComment || PrevNonComment->isNot(tok::l_paren))
695           return false;
696         if (Current.isOneOf(tok::comment, tok::l_paren, TT_LambdaLSquare))
697           return false;
698         auto BlockParameterCount = PrevNonComment->BlockParameterCount;
699         if (BlockParameterCount == 0)
700           return false;
701 
702         // Multiple lambdas in the same function call.
703         if (BlockParameterCount > 1)
704           return true;
705 
706         // A lambda followed by another arg.
707         if (!PrevNonComment->Role)
708           return false;
709         auto Comma = PrevNonComment->Role->lastComma();
710         if (!Comma)
711           return false;
712         auto Next = Comma->getNextNonComment();
713         return Next &&
714                !Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret);
715       }();
716 
717   if (DisallowLineBreaksOnThisLine)
718     State.NoLineBreak = true;
719 
720   if (Current.is(tok::equal) &&
721       (State.Line->First->is(tok::kw_for) || Current.NestingLevel == 0) &&
722       CurrentState.VariablePos == 0 &&
723       (!Previous.Previous ||
724        Previous.Previous->isNot(TT_DesignatedInitializerPeriod))) {
725     CurrentState.VariablePos = State.Column;
726     // Move over * and & if they are bound to the variable name.
727     const FormatToken *Tok = &Previous;
728     while (Tok && CurrentState.VariablePos >= Tok->ColumnWidth) {
729       CurrentState.VariablePos -= Tok->ColumnWidth;
730       if (Tok->SpacesRequiredBefore != 0)
731         break;
732       Tok = Tok->Previous;
733     }
734     if (Previous.PartOfMultiVariableDeclStmt)
735       CurrentState.LastSpace = CurrentState.VariablePos;
736   }
737 
738   unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces;
739 
740   // Indent preprocessor directives after the hash if required.
741   int PPColumnCorrection = 0;
742   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash &&
743       Previous.is(tok::hash) && State.FirstIndent > 0 &&
744       &Previous == State.Line->First &&
745       (State.Line->Type == LT_PreprocessorDirective ||
746        State.Line->Type == LT_ImportStatement)) {
747     Spaces += State.FirstIndent;
748 
749     // For preprocessor indent with tabs, State.Column will be 1 because of the
750     // hash. This causes second-level indents onward to have an extra space
751     // after the tabs. We avoid this misalignment by subtracting 1 from the
752     // column value passed to replaceWhitespace().
753     if (Style.UseTab != FormatStyle::UT_Never)
754       PPColumnCorrection = -1;
755   }
756 
757   if (!DryRun) {
758     Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces,
759                                   State.Column + Spaces + PPColumnCorrection,
760                                   /*IsAligned=*/false, State.Line->InMacroBody);
761   }
762 
763   // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance
764   // declaration unless there is multiple inheritance.
765   if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
766       Current.is(TT_InheritanceColon)) {
767     CurrentState.NoLineBreak = true;
768   }
769   if (Style.BreakInheritanceList == FormatStyle::BILS_AfterColon &&
770       Previous.is(TT_InheritanceColon)) {
771     CurrentState.NoLineBreak = true;
772   }
773 
774   if (Current.is(TT_SelectorName) && !CurrentState.ObjCSelectorNameFound) {
775     unsigned MinIndent = std::max(
776         State.FirstIndent + Style.ContinuationIndentWidth, CurrentState.Indent);
777     unsigned FirstColonPos = State.Column + Spaces + Current.ColumnWidth;
778     if (Current.LongestObjCSelectorName == 0)
779       CurrentState.AlignColons = false;
780     else if (MinIndent + Current.LongestObjCSelectorName > FirstColonPos)
781       CurrentState.ColonPos = MinIndent + Current.LongestObjCSelectorName;
782     else
783       CurrentState.ColonPos = FirstColonPos;
784   }
785 
786   // In "AlwaysBreak" or "BlockIndent" mode, enforce wrapping directly after the
787   // parenthesis by disallowing any further line breaks if there is no line
788   // break after the opening parenthesis. Don't break if it doesn't conserve
789   // columns.
790   auto IsOpeningBracket = [&](const FormatToken &Tok) {
791     auto IsStartOfBracedList = [&]() {
792       return Tok.is(tok::l_brace) && Tok.isNot(BK_Block) &&
793              Style.Cpp11BracedListStyle;
794     };
795     if (!Tok.isOneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) &&
796         !IsStartOfBracedList()) {
797       return false;
798     }
799     if (!Tok.Previous)
800       return true;
801     if (Tok.Previous->isIf())
802       return Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak;
803     return !Tok.Previous->isOneOf(TT_CastRParen, tok::kw_for, tok::kw_while,
804                                   tok::kw_switch);
805   };
806   if ((Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak ||
807        Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) &&
808       IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) &&
809       // Don't do this for simple (no expressions) one-argument function calls
810       // as that feels like needlessly wasting whitespace, e.g.:
811       //
812       //   caaaaaaaaaaaall(
813       //       caaaaaaaaaaaall(
814       //           caaaaaaaaaaaall(
815       //               caaaaaaaaaaaaaaaaaaaaaaall(aaaaaaaaaaaaaa, aaaaaaaaa))));
816       Current.FakeLParens.size() > 0 &&
817       Current.FakeLParens.back() > prec::Unknown) {
818     CurrentState.NoLineBreak = true;
819   }
820   if (Previous.is(TT_TemplateString) && Previous.opensScope())
821     CurrentState.NoLineBreak = true;
822 
823   // Align following lines within parentheses / brackets if configured.
824   // Note: This doesn't apply to macro expansion lines, which are MACRO( , , )
825   // with args as children of the '(' and ',' tokens. It does not make sense to
826   // align the commas with the opening paren.
827   if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
828       !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() &&
829       Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) &&
830       Previous.isNot(TT_TableGenDAGArgOpener) &&
831       Previous.isNot(TT_TableGenDAGArgOpenerToBreak) &&
832       !(Current.MacroParent && Previous.MacroParent) &&
833       (Current.isNot(TT_LineComment) ||
834        Previous.isOneOf(BK_BracedInit, TT_VerilogMultiLineListLParen))) {
835     CurrentState.Indent = State.Column + Spaces;
836     CurrentState.IsAligned = true;
837   }
838   if (CurrentState.AvoidBinPacking && startsNextParameter(Current, Style))
839     CurrentState.NoLineBreak = true;
840   if (startsSegmentOfBuilderTypeCall(Current) &&
841       State.Column > getNewLineColumn(State)) {
842     CurrentState.ContainsUnwrappedBuilder = true;
843   }
844 
845   if (Current.is(TT_LambdaArrow) && Style.Language == FormatStyle::LK_Java)
846     CurrentState.NoLineBreak = true;
847   if (Current.isMemberAccess() && Previous.is(tok::r_paren) &&
848       (Previous.MatchingParen &&
849        (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) {
850     // If there is a function call with long parameters, break before trailing
851     // calls. This prevents things like:
852     //   EXPECT_CALL(SomeLongParameter).Times(
853     //       2);
854     // We don't want to do this for short parameters as they can just be
855     // indexes.
856     CurrentState.NoLineBreak = true;
857   }
858 
859   // Don't allow the RHS of an operator to be split over multiple lines unless
860   // there is a line-break right after the operator.
861   // Exclude relational operators, as there, it is always more desirable to
862   // have the LHS 'left' of the RHS.
863   const FormatToken *P = Current.getPreviousNonComment();
864   if (Current.isNot(tok::comment) && P &&
865       (P->isOneOf(TT_BinaryOperator, tok::comma) ||
866        (P->is(TT_ConditionalExpr) && P->is(tok::colon))) &&
867       !P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) &&
868       P->getPrecedence() != prec::Assignment &&
869       P->getPrecedence() != prec::Relational &&
870       P->getPrecedence() != prec::Spaceship) {
871     bool BreakBeforeOperator =
872         P->MustBreakBefore || P->is(tok::lessless) ||
873         (P->is(TT_BinaryOperator) &&
874          Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None) ||
875         (P->is(TT_ConditionalExpr) && Style.BreakBeforeTernaryOperators);
876     // Don't do this if there are only two operands. In these cases, there is
877     // always a nice vertical separation between them and the extra line break
878     // does not help.
879     bool HasTwoOperands = P->OperatorIndex == 0 && !P->NextOperator &&
880                           P->isNot(TT_ConditionalExpr);
881     if ((!BreakBeforeOperator &&
882          !(HasTwoOperands &&
883            Style.AlignOperands != FormatStyle::OAS_DontAlign)) ||
884         (!CurrentState.LastOperatorWrapped && BreakBeforeOperator)) {
885       CurrentState.NoLineBreakInOperand = true;
886     }
887   }
888 
889   State.Column += Spaces;
890   if (Current.isNot(tok::comment) && Previous.is(tok::l_paren) &&
891       Previous.Previous &&
892       (Previous.Previous->is(tok::kw_for) || Previous.Previous->isIf())) {
893     // Treat the condition inside an if as if it was a second function
894     // parameter, i.e. let nested calls have a continuation indent.
895     CurrentState.LastSpace = State.Column;
896     CurrentState.NestedBlockIndent = State.Column;
897   } else if (!Current.isOneOf(tok::comment, tok::caret) &&
898              ((Previous.is(tok::comma) &&
899                Previous.isNot(TT_OverloadedOperator)) ||
900               (Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) {
901     CurrentState.LastSpace = State.Column;
902   } else if (Previous.is(TT_CtorInitializerColon) &&
903              (!Current.isTrailingComment() || Current.NewlinesBefore > 0) &&
904              Style.BreakConstructorInitializers ==
905                  FormatStyle::BCIS_AfterColon) {
906     CurrentState.Indent = State.Column;
907     CurrentState.LastSpace = State.Column;
908   } else if (Previous.isOneOf(TT_ConditionalExpr, TT_CtorInitializerColon)) {
909     CurrentState.LastSpace = State.Column;
910   } else if (Previous.is(TT_BinaryOperator) &&
911              ((Previous.getPrecedence() != prec::Assignment &&
912                (Previous.isNot(tok::lessless) || Previous.OperatorIndex != 0 ||
913                 Previous.NextOperator)) ||
914               Current.StartsBinaryExpression)) {
915     // Indent relative to the RHS of the expression unless this is a simple
916     // assignment without binary expression on the RHS.
917     if (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None)
918       CurrentState.LastSpace = State.Column;
919   } else if (Previous.is(TT_InheritanceColon)) {
920     CurrentState.Indent = State.Column;
921     CurrentState.LastSpace = State.Column;
922   } else if (Current.is(TT_CSharpGenericTypeConstraintColon)) {
923     CurrentState.ColonPos = State.Column;
924   } else if (Previous.opensScope()) {
925     // If a function has a trailing call, indent all parameters from the
926     // opening parenthesis. This avoids confusing indents like:
927     //   OuterFunction(InnerFunctionCall( // break
928     //       ParameterToInnerFunction))   // break
929     //       .SecondInnerFunctionCall();
930     if (Previous.MatchingParen) {
931       const FormatToken *Next = Previous.MatchingParen->getNextNonComment();
932       if (Next && Next->isMemberAccess() && State.Stack.size() > 1 &&
933           State.Stack[State.Stack.size() - 2].CallContinuation == 0) {
934         CurrentState.LastSpace = State.Column;
935       }
936     }
937   }
938 }
939 
addTokenOnNewLine(LineState & State,bool DryRun)940 unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
941                                                  bool DryRun) {
942   FormatToken &Current = *State.NextToken;
943   assert(State.NextToken->Previous);
944   const FormatToken &Previous = *State.NextToken->Previous;
945   auto &CurrentState = State.Stack.back();
946 
947   // Extra penalty that needs to be added because of the way certain line
948   // breaks are chosen.
949   unsigned Penalty = 0;
950 
951   const FormatToken *PreviousNonComment = Current.getPreviousNonComment();
952   const FormatToken *NextNonComment = Previous.getNextNonComment();
953   if (!NextNonComment)
954     NextNonComment = &Current;
955   // The first line break on any NestingLevel causes an extra penalty in order
956   // prefer similar line breaks.
957   if (!CurrentState.ContainsLineBreak)
958     Penalty += 15;
959   CurrentState.ContainsLineBreak = true;
960 
961   Penalty += State.NextToken->SplitPenalty;
962 
963   // Breaking before the first "<<" is generally not desirable if the LHS is
964   // short. Also always add the penalty if the LHS is split over multiple lines
965   // to avoid unnecessary line breaks that just work around this penalty.
966   if (NextNonComment->is(tok::lessless) && CurrentState.FirstLessLess == 0 &&
967       (State.Column <= Style.ColumnLimit / 3 ||
968        CurrentState.BreakBeforeParameter)) {
969     Penalty += Style.PenaltyBreakFirstLessLess;
970   }
971 
972   State.Column = getNewLineColumn(State);
973 
974   // Add Penalty proportional to amount of whitespace away from FirstColumn
975   // This tends to penalize several lines that are far-right indented,
976   // and prefers a line-break prior to such a block, e.g:
977   //
978   // Constructor() :
979   //   member(value), looooooooooooooooong_member(
980   //                      looooooooooong_call(param_1, param_2, param_3))
981   // would then become
982   // Constructor() :
983   //   member(value),
984   //   looooooooooooooooong_member(
985   //       looooooooooong_call(param_1, param_2, param_3))
986   if (State.Column > State.FirstIndent) {
987     Penalty +=
988         Style.PenaltyIndentedWhitespace * (State.Column - State.FirstIndent);
989   }
990 
991   // Indent nested blocks relative to this column, unless in a very specific
992   // JavaScript special case where:
993   //
994   //   var loooooong_name =
995   //       function() {
996   //     // code
997   //   }
998   //
999   // is common and should be formatted like a free-standing function. The same
1000   // goes for wrapping before the lambda return type arrow.
1001   if (Current.isNot(TT_LambdaArrow) &&
1002       (!Style.isJavaScript() || Current.NestingLevel != 0 ||
1003        !PreviousNonComment || PreviousNonComment->isNot(tok::equal) ||
1004        !Current.isOneOf(Keywords.kw_async, Keywords.kw_function))) {
1005     CurrentState.NestedBlockIndent = State.Column;
1006   }
1007 
1008   if (NextNonComment->isMemberAccess()) {
1009     if (CurrentState.CallContinuation == 0)
1010       CurrentState.CallContinuation = State.Column;
1011   } else if (NextNonComment->is(TT_SelectorName)) {
1012     if (!CurrentState.ObjCSelectorNameFound) {
1013       if (NextNonComment->LongestObjCSelectorName == 0) {
1014         CurrentState.AlignColons = false;
1015       } else {
1016         CurrentState.ColonPos =
1017             (shouldIndentWrappedSelectorName(Style, State.Line->Type)
1018                  ? std::max(CurrentState.Indent,
1019                             State.FirstIndent + Style.ContinuationIndentWidth)
1020                  : CurrentState.Indent) +
1021             std::max(NextNonComment->LongestObjCSelectorName,
1022                      NextNonComment->ColumnWidth);
1023       }
1024     } else if (CurrentState.AlignColons &&
1025                CurrentState.ColonPos <= NextNonComment->ColumnWidth) {
1026       CurrentState.ColonPos = State.Column + NextNonComment->ColumnWidth;
1027     }
1028   } else if (PreviousNonComment && PreviousNonComment->is(tok::colon) &&
1029              PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) {
1030     // FIXME: This is hacky, find a better way. The problem is that in an ObjC
1031     // method expression, the block should be aligned to the line starting it,
1032     // e.g.:
1033     //   [aaaaaaaaaaaaaaa aaaaaaaaa: \\ break for some reason
1034     //                        ^(int *i) {
1035     //                            // ...
1036     //                        }];
1037     // Thus, we set LastSpace of the next higher NestingLevel, to which we move
1038     // when we consume all of the "}"'s FakeRParens at the "{".
1039     if (State.Stack.size() > 1) {
1040       State.Stack[State.Stack.size() - 2].LastSpace =
1041           std::max(CurrentState.LastSpace, CurrentState.Indent) +
1042           Style.ContinuationIndentWidth;
1043     }
1044   }
1045 
1046   if ((PreviousNonComment &&
1047        PreviousNonComment->isOneOf(tok::comma, tok::semi) &&
1048        !CurrentState.AvoidBinPacking) ||
1049       Previous.is(TT_BinaryOperator)) {
1050     CurrentState.BreakBeforeParameter = false;
1051   }
1052   if (PreviousNonComment &&
1053       (PreviousNonComment->isOneOf(TT_TemplateCloser, TT_JavaAnnotation) ||
1054        PreviousNonComment->ClosesRequiresClause) &&
1055       Current.NestingLevel == 0) {
1056     CurrentState.BreakBeforeParameter = false;
1057   }
1058   if (NextNonComment->is(tok::question) ||
1059       (PreviousNonComment && PreviousNonComment->is(tok::question))) {
1060     CurrentState.BreakBeforeParameter = true;
1061   }
1062   if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore)
1063     CurrentState.BreakBeforeParameter = false;
1064 
1065   if (!DryRun) {
1066     unsigned MaxEmptyLinesToKeep = Style.MaxEmptyLinesToKeep + 1;
1067     if (Current.is(tok::r_brace) && Current.MatchingParen &&
1068         // Only strip trailing empty lines for l_braces that have children, i.e.
1069         // for function expressions (lambdas, arrows, etc).
1070         !Current.MatchingParen->Children.empty()) {
1071       // lambdas and arrow functions are expressions, thus their r_brace is not
1072       // on its own line, and thus not covered by UnwrappedLineFormatter's logic
1073       // about removing empty lines on closing blocks. Special case them here.
1074       MaxEmptyLinesToKeep = 1;
1075     }
1076     unsigned Newlines =
1077         std::max(1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep));
1078     bool ContinuePPDirective =
1079         State.Line->InPPDirective && State.Line->Type != LT_ImportStatement;
1080     Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column,
1081                                   CurrentState.IsAligned, ContinuePPDirective);
1082   }
1083 
1084   if (!Current.isTrailingComment())
1085     CurrentState.LastSpace = State.Column;
1086   if (Current.is(tok::lessless)) {
1087     // If we are breaking before a "<<", we always want to indent relative to
1088     // RHS. This is necessary only for "<<", as we special-case it and don't
1089     // always indent relative to the RHS.
1090     CurrentState.LastSpace += 3; // 3 -> width of "<< ".
1091   }
1092 
1093   State.StartOfLineLevel = Current.NestingLevel;
1094   State.LowestLevelOnLine = Current.NestingLevel;
1095 
1096   // Any break on this level means that the parent level has been broken
1097   // and we need to avoid bin packing there.
1098   bool NestedBlockSpecialCase =
1099       (!Style.isCpp() && Current.is(tok::r_brace) && State.Stack.size() > 1 &&
1100        State.Stack[State.Stack.size() - 2].NestedBlockInlined) ||
1101       (Style.Language == FormatStyle::LK_ObjC && Current.is(tok::r_brace) &&
1102        State.Stack.size() > 1 && !Style.ObjCBreakBeforeNestedBlockParam);
1103   // Do not force parameter break for statements with requires expressions.
1104   NestedBlockSpecialCase =
1105       NestedBlockSpecialCase ||
1106       (Current.MatchingParen &&
1107        Current.MatchingParen->is(TT_RequiresExpressionLBrace));
1108   if (!NestedBlockSpecialCase) {
1109     auto ParentLevelIt = std::next(State.Stack.rbegin());
1110     if (Style.LambdaBodyIndentation == FormatStyle::LBI_OuterScope &&
1111         Current.MatchingParen && Current.MatchingParen->is(TT_LambdaLBrace)) {
1112       // If the first character on the new line is a lambda's closing brace, the
1113       // stack still contains that lambda's parenthesis. As such, we need to
1114       // recurse further down the stack than usual to find the parenthesis level
1115       // containing the lambda, which is where we want to set
1116       // BreakBeforeParameter.
1117       //
1118       // We specifically special case "OuterScope"-formatted lambdas here
1119       // because, when using that setting, breaking before the parameter
1120       // directly following the lambda is particularly unsightly. However, when
1121       // "OuterScope" is not set, the logic to find the parent parenthesis level
1122       // still appears to be sometimes incorrect. It has not been fixed yet
1123       // because it would lead to significant changes in existing behaviour.
1124       //
1125       // TODO: fix the non-"OuterScope" case too.
1126       auto FindCurrentLevel = [&](const auto &It) {
1127         return std::find_if(It, State.Stack.rend(), [](const auto &PState) {
1128           return PState.Tok != nullptr; // Ignore fake parens.
1129         });
1130       };
1131       auto MaybeIncrement = [&](const auto &It) {
1132         return It != State.Stack.rend() ? std::next(It) : It;
1133       };
1134       auto LambdaLevelIt = FindCurrentLevel(State.Stack.rbegin());
1135       auto LevelContainingLambdaIt =
1136           FindCurrentLevel(MaybeIncrement(LambdaLevelIt));
1137       ParentLevelIt = MaybeIncrement(LevelContainingLambdaIt);
1138     }
1139     for (auto I = ParentLevelIt, E = State.Stack.rend(); I != E; ++I)
1140       I->BreakBeforeParameter = true;
1141   }
1142 
1143   if (PreviousNonComment &&
1144       !PreviousNonComment->isOneOf(tok::comma, tok::colon, tok::semi) &&
1145       ((PreviousNonComment->isNot(TT_TemplateCloser) &&
1146         !PreviousNonComment->ClosesRequiresClause) ||
1147        Current.NestingLevel != 0) &&
1148       !PreviousNonComment->isOneOf(
1149           TT_BinaryOperator, TT_FunctionAnnotationRParen, TT_JavaAnnotation,
1150           TT_LeadingJavaAnnotation) &&
1151       Current.isNot(TT_BinaryOperator) && !PreviousNonComment->opensScope() &&
1152       // We don't want to enforce line breaks for subsequent arguments just
1153       // because we have been forced to break before a lambda body.
1154       (!Style.BraceWrapping.BeforeLambdaBody ||
1155        Current.isNot(TT_LambdaLBrace))) {
1156     CurrentState.BreakBeforeParameter = true;
1157   }
1158 
1159   // If we break after { or the [ of an array initializer, we should also break
1160   // before the corresponding } or ].
1161   if (PreviousNonComment &&
1162       (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
1163        opensProtoMessageField(*PreviousNonComment, Style))) {
1164     CurrentState.BreakBeforeClosingBrace = true;
1165   }
1166 
1167   if (PreviousNonComment && PreviousNonComment->is(tok::l_paren)) {
1168     CurrentState.BreakBeforeClosingParen =
1169         Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent;
1170   }
1171 
1172   if (CurrentState.AvoidBinPacking) {
1173     // If we are breaking after '(', '{', '<', or this is the break after a ':'
1174     // to start a member initializer list in a constructor, this should not
1175     // be considered bin packing unless the relevant AllowAll option is false or
1176     // this is a dict/object literal.
1177     bool PreviousIsBreakingCtorInitializerColon =
1178         PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) &&
1179         Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon;
1180     bool AllowAllConstructorInitializersOnNextLine =
1181         Style.PackConstructorInitializers == FormatStyle::PCIS_NextLine ||
1182         Style.PackConstructorInitializers == FormatStyle::PCIS_NextLineOnly;
1183     if (!(Previous.isOneOf(tok::l_paren, tok::l_brace, TT_BinaryOperator) ||
1184           PreviousIsBreakingCtorInitializerColon) ||
1185         (!Style.AllowAllParametersOfDeclarationOnNextLine &&
1186          State.Line->MustBeDeclaration) ||
1187         (!Style.AllowAllArgumentsOnNextLine &&
1188          !State.Line->MustBeDeclaration) ||
1189         (!AllowAllConstructorInitializersOnNextLine &&
1190          PreviousIsBreakingCtorInitializerColon) ||
1191         Previous.is(TT_DictLiteral)) {
1192       CurrentState.BreakBeforeParameter = true;
1193     }
1194 
1195     // If we are breaking after a ':' to start a member initializer list,
1196     // and we allow all arguments on the next line, we should not break
1197     // before the next parameter.
1198     if (PreviousIsBreakingCtorInitializerColon &&
1199         AllowAllConstructorInitializersOnNextLine) {
1200       CurrentState.BreakBeforeParameter = false;
1201     }
1202   }
1203 
1204   return Penalty;
1205 }
1206 
getNewLineColumn(const LineState & State)1207 unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
1208   if (!State.NextToken || !State.NextToken->Previous)
1209     return 0;
1210 
1211   FormatToken &Current = *State.NextToken;
1212   const auto &CurrentState = State.Stack.back();
1213 
1214   if (CurrentState.IsCSharpGenericTypeConstraint &&
1215       Current.isNot(TT_CSharpGenericTypeConstraint)) {
1216     return CurrentState.ColonPos + 2;
1217   }
1218 
1219   const FormatToken &Previous = *Current.Previous;
1220   // If we are continuing an expression, we want to use the continuation indent.
1221   unsigned ContinuationIndent =
1222       std::max(CurrentState.LastSpace, CurrentState.Indent) +
1223       Style.ContinuationIndentWidth;
1224   const FormatToken *PreviousNonComment = Current.getPreviousNonComment();
1225   const FormatToken *NextNonComment = Previous.getNextNonComment();
1226   if (!NextNonComment)
1227     NextNonComment = &Current;
1228 
1229   // Java specific bits.
1230   if (Style.Language == FormatStyle::LK_Java &&
1231       Current.isOneOf(Keywords.kw_implements, Keywords.kw_extends)) {
1232     return std::max(CurrentState.LastSpace,
1233                     CurrentState.Indent + Style.ContinuationIndentWidth);
1234   }
1235 
1236   // Indentation of the statement following a Verilog case label is taken care
1237   // of in moveStateToNextToken.
1238   if (Style.isVerilog() && PreviousNonComment &&
1239       Keywords.isVerilogEndOfLabel(*PreviousNonComment)) {
1240     return State.FirstIndent;
1241   }
1242 
1243   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths &&
1244       State.Line->First->is(tok::kw_enum)) {
1245     return (Style.IndentWidth * State.Line->First->IndentLevel) +
1246            Style.IndentWidth;
1247   }
1248 
1249   if ((NextNonComment->is(tok::l_brace) && NextNonComment->is(BK_Block)) ||
1250       (Style.isVerilog() && Keywords.isVerilogBegin(*NextNonComment))) {
1251     if (Current.NestingLevel == 0 ||
1252         (Style.LambdaBodyIndentation == FormatStyle::LBI_OuterScope &&
1253          State.NextToken->is(TT_LambdaLBrace))) {
1254       return State.FirstIndent;
1255     }
1256     return CurrentState.Indent;
1257   }
1258   if (Current.is(TT_LambdaArrow) &&
1259       Previous.isOneOf(tok::kw_noexcept, tok::kw_mutable, tok::kw_constexpr,
1260                        tok::kw_consteval, tok::kw_static, TT_AttributeSquare)) {
1261     return ContinuationIndent;
1262   }
1263   if ((Current.isOneOf(tok::r_brace, tok::r_square) ||
1264        (Current.is(tok::greater) && (Style.isProto() || Style.isTableGen()))) &&
1265       State.Stack.size() > 1) {
1266     if (Current.closesBlockOrBlockTypeList(Style))
1267       return State.Stack[State.Stack.size() - 2].NestedBlockIndent;
1268     if (Current.MatchingParen && Current.MatchingParen->is(BK_BracedInit))
1269       return State.Stack[State.Stack.size() - 2].LastSpace;
1270     return State.FirstIndent;
1271   }
1272   // Indent a closing parenthesis at the previous level if followed by a semi,
1273   // const, or opening brace. This allows indentations such as:
1274   //     foo(
1275   //       a,
1276   //     );
1277   //     int Foo::getter(
1278   //         //
1279   //     ) const {
1280   //       return foo;
1281   //     }
1282   //     function foo(
1283   //       a,
1284   //     ) {
1285   //       code(); //
1286   //     }
1287   if (Current.is(tok::r_paren) && State.Stack.size() > 1 &&
1288       (!Current.Next ||
1289        Current.Next->isOneOf(tok::semi, tok::kw_const, tok::l_brace))) {
1290     return State.Stack[State.Stack.size() - 2].LastSpace;
1291   }
1292   // When DAGArg closer exists top of line, it should be aligned in the similar
1293   // way as function call above.
1294   if (Style.isTableGen() && Current.is(TT_TableGenDAGArgCloser) &&
1295       State.Stack.size() > 1) {
1296     return State.Stack[State.Stack.size() - 2].LastSpace;
1297   }
1298   if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent &&
1299       (Current.is(tok::r_paren) ||
1300        (Current.is(tok::r_brace) && Current.MatchingParen &&
1301         Current.MatchingParen->is(BK_BracedInit))) &&
1302       State.Stack.size() > 1) {
1303     return State.Stack[State.Stack.size() - 2].LastSpace;
1304   }
1305   if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope())
1306     return State.Stack[State.Stack.size() - 2].LastSpace;
1307   // Field labels in a nested type should be aligned to the brace. For example
1308   // in ProtoBuf:
1309   //   optional int32 b = 2 [(foo_options) = {aaaaaaaaaaaaaaaaaaa: 123,
1310   //                                          bbbbbbbbbbbbbbbbbbbbbbbb:"baz"}];
1311   // For Verilog, a quote following a brace is treated as an identifier.  And
1312   // Both braces and colons get annotated as TT_DictLiteral.  So we have to
1313   // check.
1314   if (Current.is(tok::identifier) && Current.Next &&
1315       (!Style.isVerilog() || Current.Next->is(tok::colon)) &&
1316       (Current.Next->is(TT_DictLiteral) ||
1317        (Style.isProto() && Current.Next->isOneOf(tok::less, tok::l_brace)))) {
1318     return CurrentState.Indent;
1319   }
1320   if (NextNonComment->is(TT_ObjCStringLiteral) &&
1321       State.StartOfStringLiteral != 0) {
1322     return State.StartOfStringLiteral - 1;
1323   }
1324   if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0)
1325     return State.StartOfStringLiteral;
1326   if (NextNonComment->is(tok::lessless) && CurrentState.FirstLessLess != 0)
1327     return CurrentState.FirstLessLess;
1328   if (NextNonComment->isMemberAccess()) {
1329     if (CurrentState.CallContinuation == 0)
1330       return ContinuationIndent;
1331     return CurrentState.CallContinuation;
1332   }
1333   if (CurrentState.QuestionColumn != 0 &&
1334       ((NextNonComment->is(tok::colon) &&
1335         NextNonComment->is(TT_ConditionalExpr)) ||
1336        Previous.is(TT_ConditionalExpr))) {
1337     if (((NextNonComment->is(tok::colon) && NextNonComment->Next &&
1338           !NextNonComment->Next->FakeLParens.empty() &&
1339           NextNonComment->Next->FakeLParens.back() == prec::Conditional) ||
1340          (Previous.is(tok::colon) && !Current.FakeLParens.empty() &&
1341           Current.FakeLParens.back() == prec::Conditional)) &&
1342         !CurrentState.IsWrappedConditional) {
1343       // NOTE: we may tweak this slightly:
1344       //    * not remove the 'lead' ContinuationIndentWidth
1345       //    * always un-indent by the operator when
1346       //    BreakBeforeTernaryOperators=true
1347       unsigned Indent = CurrentState.Indent;
1348       if (Style.AlignOperands != FormatStyle::OAS_DontAlign)
1349         Indent -= Style.ContinuationIndentWidth;
1350       if (Style.BreakBeforeTernaryOperators && CurrentState.UnindentOperator)
1351         Indent -= 2;
1352       return Indent;
1353     }
1354     return CurrentState.QuestionColumn;
1355   }
1356   if (Previous.is(tok::comma) && CurrentState.VariablePos != 0)
1357     return CurrentState.VariablePos;
1358   if (Current.is(TT_RequiresClause)) {
1359     if (Style.IndentRequiresClause)
1360       return CurrentState.Indent + Style.IndentWidth;
1361     switch (Style.RequiresClausePosition) {
1362     case FormatStyle::RCPS_OwnLine:
1363     case FormatStyle::RCPS_WithFollowing:
1364       return CurrentState.Indent;
1365     default:
1366       break;
1367     }
1368   }
1369   if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon,
1370                               TT_InheritanceComma)) {
1371     return State.FirstIndent + Style.ConstructorInitializerIndentWidth;
1372   }
1373   if ((PreviousNonComment &&
1374        (PreviousNonComment->ClosesTemplateDeclaration ||
1375         PreviousNonComment->ClosesRequiresClause ||
1376         (PreviousNonComment->is(TT_AttributeMacro) &&
1377          Current.isNot(tok::l_paren)) ||
1378         PreviousNonComment->isOneOf(
1379             TT_AttributeRParen, TT_AttributeSquare, TT_FunctionAnnotationRParen,
1380             TT_JavaAnnotation, TT_LeadingJavaAnnotation))) ||
1381       (!Style.IndentWrappedFunctionNames &&
1382        NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName))) {
1383     return std::max(CurrentState.LastSpace, CurrentState.Indent);
1384   }
1385   if (NextNonComment->is(TT_SelectorName)) {
1386     if (!CurrentState.ObjCSelectorNameFound) {
1387       unsigned MinIndent = CurrentState.Indent;
1388       if (shouldIndentWrappedSelectorName(Style, State.Line->Type)) {
1389         MinIndent = std::max(MinIndent,
1390                              State.FirstIndent + Style.ContinuationIndentWidth);
1391       }
1392       // If LongestObjCSelectorName is 0, we are indenting the first
1393       // part of an ObjC selector (or a selector component which is
1394       // not colon-aligned due to block formatting).
1395       //
1396       // Otherwise, we are indenting a subsequent part of an ObjC
1397       // selector which should be colon-aligned to the longest
1398       // component of the ObjC selector.
1399       //
1400       // In either case, we want to respect Style.IndentWrappedFunctionNames.
1401       return MinIndent +
1402              std::max(NextNonComment->LongestObjCSelectorName,
1403                       NextNonComment->ColumnWidth) -
1404              NextNonComment->ColumnWidth;
1405     }
1406     if (!CurrentState.AlignColons)
1407       return CurrentState.Indent;
1408     if (CurrentState.ColonPos > NextNonComment->ColumnWidth)
1409       return CurrentState.ColonPos - NextNonComment->ColumnWidth;
1410     return CurrentState.Indent;
1411   }
1412   if (NextNonComment->is(tok::colon) && NextNonComment->is(TT_ObjCMethodExpr))
1413     return CurrentState.ColonPos;
1414   if (NextNonComment->is(TT_ArraySubscriptLSquare)) {
1415     if (CurrentState.StartOfArraySubscripts != 0) {
1416       return CurrentState.StartOfArraySubscripts;
1417     } else if (Style.isCSharp()) { // C# allows `["key"] = value` inside object
1418                                    // initializers.
1419       return CurrentState.Indent;
1420     }
1421     return ContinuationIndent;
1422   }
1423 
1424   // OpenMP clauses want to get additional indentation when they are pushed onto
1425   // the next line.
1426   if (State.Line->InPragmaDirective) {
1427     FormatToken *PragmaType = State.Line->First->Next->Next;
1428     if (PragmaType && PragmaType->TokenText == "omp")
1429       return CurrentState.Indent + Style.ContinuationIndentWidth;
1430   }
1431 
1432   // This ensure that we correctly format ObjC methods calls without inputs,
1433   // i.e. where the last element isn't selector like: [callee method];
1434   if (NextNonComment->is(tok::identifier) && NextNonComment->FakeRParens == 0 &&
1435       NextNonComment->Next && NextNonComment->Next->is(TT_ObjCMethodExpr)) {
1436     return CurrentState.Indent;
1437   }
1438 
1439   if (NextNonComment->isOneOf(TT_StartOfName, TT_PointerOrReference) ||
1440       Previous.isOneOf(tok::coloncolon, tok::equal, TT_JsTypeColon)) {
1441     return ContinuationIndent;
1442   }
1443   if (PreviousNonComment && PreviousNonComment->is(tok::colon) &&
1444       PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) {
1445     return ContinuationIndent;
1446   }
1447   if (NextNonComment->is(TT_CtorInitializerComma))
1448     return CurrentState.Indent;
1449   if (PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) &&
1450       Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) {
1451     return CurrentState.Indent;
1452   }
1453   if (PreviousNonComment && PreviousNonComment->is(TT_InheritanceColon) &&
1454       Style.BreakInheritanceList == FormatStyle::BILS_AfterColon) {
1455     return CurrentState.Indent;
1456   }
1457   if (Previous.is(tok::r_paren) &&
1458       Previous.isNot(TT_TableGenDAGArgOperatorToBreak) &&
1459       !Current.isBinaryOperator() &&
1460       !Current.isOneOf(tok::colon, tok::comment)) {
1461     return ContinuationIndent;
1462   }
1463   if (Current.is(TT_ProtoExtensionLSquare))
1464     return CurrentState.Indent;
1465   if (Current.isBinaryOperator() && CurrentState.UnindentOperator) {
1466     return CurrentState.Indent - Current.Tok.getLength() -
1467            Current.SpacesRequiredBefore;
1468   }
1469   if (Current.is(tok::comment) && NextNonComment->isBinaryOperator() &&
1470       CurrentState.UnindentOperator) {
1471     return CurrentState.Indent - NextNonComment->Tok.getLength() -
1472            NextNonComment->SpacesRequiredBefore;
1473   }
1474   if (CurrentState.Indent == State.FirstIndent && PreviousNonComment &&
1475       !PreviousNonComment->isOneOf(tok::r_brace, TT_CtorInitializerComma)) {
1476     // Ensure that we fall back to the continuation indent width instead of
1477     // just flushing continuations left.
1478     return CurrentState.Indent + Style.ContinuationIndentWidth;
1479   }
1480   return CurrentState.Indent;
1481 }
1482 
hasNestedBlockInlined(const FormatToken * Previous,const FormatToken & Current,const FormatStyle & Style)1483 static bool hasNestedBlockInlined(const FormatToken *Previous,
1484                                   const FormatToken &Current,
1485                                   const FormatStyle &Style) {
1486   if (Previous->isNot(tok::l_paren))
1487     return true;
1488   if (Previous->ParameterCount > 1)
1489     return true;
1490 
1491   // Also a nested block if contains a lambda inside function with 1 parameter.
1492   return Style.BraceWrapping.BeforeLambdaBody && Current.is(TT_LambdaLSquare);
1493 }
1494 
moveStateToNextToken(LineState & State,bool DryRun,bool Newline)1495 unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
1496                                                     bool DryRun, bool Newline) {
1497   assert(State.Stack.size());
1498   const FormatToken &Current = *State.NextToken;
1499   auto &CurrentState = State.Stack.back();
1500 
1501   if (Current.is(TT_CSharpGenericTypeConstraint))
1502     CurrentState.IsCSharpGenericTypeConstraint = true;
1503   if (Current.isOneOf(tok::comma, TT_BinaryOperator))
1504     CurrentState.NoLineBreakInOperand = false;
1505   if (Current.isOneOf(TT_InheritanceColon, TT_CSharpGenericTypeConstraintColon))
1506     CurrentState.AvoidBinPacking = true;
1507   if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator)) {
1508     if (CurrentState.FirstLessLess == 0)
1509       CurrentState.FirstLessLess = State.Column;
1510     else
1511       CurrentState.LastOperatorWrapped = Newline;
1512   }
1513   if (Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless))
1514     CurrentState.LastOperatorWrapped = Newline;
1515   if (Current.is(TT_ConditionalExpr) && Current.Previous &&
1516       Current.Previous->isNot(TT_ConditionalExpr)) {
1517     CurrentState.LastOperatorWrapped = Newline;
1518   }
1519   if (Current.is(TT_ArraySubscriptLSquare) &&
1520       CurrentState.StartOfArraySubscripts == 0) {
1521     CurrentState.StartOfArraySubscripts = State.Column;
1522   }
1523 
1524   auto IsWrappedConditional = [](const FormatToken &Tok) {
1525     if (!(Tok.is(TT_ConditionalExpr) && Tok.is(tok::question)))
1526       return false;
1527     if (Tok.MustBreakBefore)
1528       return true;
1529 
1530     const FormatToken *Next = Tok.getNextNonComment();
1531     return Next && Next->MustBreakBefore;
1532   };
1533   if (IsWrappedConditional(Current))
1534     CurrentState.IsWrappedConditional = true;
1535   if (Style.BreakBeforeTernaryOperators && Current.is(tok::question))
1536     CurrentState.QuestionColumn = State.Column;
1537   if (!Style.BreakBeforeTernaryOperators && Current.isNot(tok::colon)) {
1538     const FormatToken *Previous = Current.Previous;
1539     while (Previous && Previous->isTrailingComment())
1540       Previous = Previous->Previous;
1541     if (Previous && Previous->is(tok::question))
1542       CurrentState.QuestionColumn = State.Column;
1543   }
1544   if (!Current.opensScope() && !Current.closesScope() &&
1545       Current.isNot(TT_PointerOrReference)) {
1546     State.LowestLevelOnLine =
1547         std::min(State.LowestLevelOnLine, Current.NestingLevel);
1548   }
1549   if (Current.isMemberAccess())
1550     CurrentState.StartOfFunctionCall = !Current.NextOperator ? 0 : State.Column;
1551   if (Current.is(TT_SelectorName))
1552     CurrentState.ObjCSelectorNameFound = true;
1553   if (Current.is(TT_CtorInitializerColon) &&
1554       Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon) {
1555     // Indent 2 from the column, so:
1556     // SomeClass::SomeClass()
1557     //     : First(...), ...
1558     //       Next(...)
1559     //       ^ line up here.
1560     CurrentState.Indent = State.Column + (Style.BreakConstructorInitializers ==
1561                                                   FormatStyle::BCIS_BeforeComma
1562                                               ? 0
1563                                               : 2);
1564     CurrentState.NestedBlockIndent = CurrentState.Indent;
1565     if (Style.PackConstructorInitializers > FormatStyle::PCIS_BinPack) {
1566       CurrentState.AvoidBinPacking = true;
1567       CurrentState.BreakBeforeParameter =
1568           Style.ColumnLimit > 0 &&
1569           Style.PackConstructorInitializers != FormatStyle::PCIS_NextLine &&
1570           Style.PackConstructorInitializers != FormatStyle::PCIS_NextLineOnly;
1571     } else {
1572       CurrentState.BreakBeforeParameter = false;
1573     }
1574   }
1575   if (Current.is(TT_CtorInitializerColon) &&
1576       Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) {
1577     CurrentState.Indent =
1578         State.FirstIndent + Style.ConstructorInitializerIndentWidth;
1579     CurrentState.NestedBlockIndent = CurrentState.Indent;
1580     if (Style.PackConstructorInitializers > FormatStyle::PCIS_BinPack)
1581       CurrentState.AvoidBinPacking = true;
1582     else
1583       CurrentState.BreakBeforeParameter = false;
1584   }
1585   if (Current.is(TT_InheritanceColon)) {
1586     CurrentState.Indent =
1587         State.FirstIndent + Style.ConstructorInitializerIndentWidth;
1588   }
1589   if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline)
1590     CurrentState.NestedBlockIndent = State.Column + Current.ColumnWidth + 1;
1591   if (Current.isOneOf(TT_LambdaLSquare, TT_LambdaArrow))
1592     CurrentState.LastSpace = State.Column;
1593   if (Current.is(TT_RequiresExpression) &&
1594       Style.RequiresExpressionIndentation == FormatStyle::REI_Keyword) {
1595     CurrentState.NestedBlockIndent = State.Column;
1596   }
1597 
1598   // Insert scopes created by fake parenthesis.
1599   const FormatToken *Previous = Current.getPreviousNonComment();
1600 
1601   // Add special behavior to support a format commonly used for JavaScript
1602   // closures:
1603   //   SomeFunction(function() {
1604   //     foo();
1605   //     bar();
1606   //   }, a, b, c);
1607   if (Current.isNot(tok::comment) && !Current.ClosesRequiresClause &&
1608       Previous && Previous->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) &&
1609       Previous->isNot(TT_DictLiteral) && State.Stack.size() > 1 &&
1610       !CurrentState.HasMultipleNestedBlocks) {
1611     if (State.Stack[State.Stack.size() - 2].NestedBlockInlined && Newline)
1612       for (ParenState &PState : llvm::drop_end(State.Stack))
1613         PState.NoLineBreak = true;
1614     State.Stack[State.Stack.size() - 2].NestedBlockInlined = false;
1615   }
1616   if (Previous && (Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr) ||
1617                    (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) &&
1618                     !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)))) {
1619     CurrentState.NestedBlockInlined =
1620         !Newline && hasNestedBlockInlined(Previous, Current, Style);
1621   }
1622 
1623   moveStatePastFakeLParens(State, Newline);
1624   moveStatePastScopeCloser(State);
1625   // Do not use CurrentState here, since the two functions before may change the
1626   // Stack.
1627   bool AllowBreak = !State.Stack.back().NoLineBreak &&
1628                     !State.Stack.back().NoLineBreakInOperand;
1629   moveStatePastScopeOpener(State, Newline);
1630   moveStatePastFakeRParens(State);
1631 
1632   if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0)
1633     State.StartOfStringLiteral = State.Column + 1;
1634   if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0) {
1635     State.StartOfStringLiteral = State.Column + 1;
1636   } else if (Current.is(TT_TableGenMultiLineString) &&
1637              State.StartOfStringLiteral == 0) {
1638     State.StartOfStringLiteral = State.Column + 1;
1639   } else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) {
1640     State.StartOfStringLiteral = State.Column;
1641   } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
1642              !Current.isStringLiteral()) {
1643     State.StartOfStringLiteral = 0;
1644   }
1645 
1646   State.Column += Current.ColumnWidth;
1647   State.NextToken = State.NextToken->Next;
1648   // Verilog case labels are on the same unwrapped lines as the statements that
1649   // follow. TokenAnnotator identifies them and sets MustBreakBefore.
1650   // Indentation is taken care of here. A case label can only have 1 statement
1651   // in Verilog, so we don't have to worry about lines that follow.
1652   if (Style.isVerilog() && State.NextToken &&
1653       State.NextToken->MustBreakBefore &&
1654       Keywords.isVerilogEndOfLabel(Current)) {
1655     State.FirstIndent += Style.IndentWidth;
1656     CurrentState.Indent = State.FirstIndent;
1657   }
1658 
1659   unsigned Penalty =
1660       handleEndOfLine(Current, State, DryRun, AllowBreak, Newline);
1661 
1662   if (Current.Role)
1663     Current.Role->formatFromToken(State, this, DryRun);
1664   // If the previous has a special role, let it consume tokens as appropriate.
1665   // It is necessary to start at the previous token for the only implemented
1666   // role (comma separated list). That way, the decision whether or not to break
1667   // after the "{" is already done and both options are tried and evaluated.
1668   // FIXME: This is ugly, find a better way.
1669   if (Previous && Previous->Role)
1670     Penalty += Previous->Role->formatAfterToken(State, this, DryRun);
1671 
1672   return Penalty;
1673 }
1674 
moveStatePastFakeLParens(LineState & State,bool Newline)1675 void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
1676                                                     bool Newline) {
1677   const FormatToken &Current = *State.NextToken;
1678   if (Current.FakeLParens.empty())
1679     return;
1680 
1681   const FormatToken *Previous = Current.getPreviousNonComment();
1682 
1683   // Don't add extra indentation for the first fake parenthesis after
1684   // 'return', assignments, opening <({[, or requires clauses. The indentation
1685   // for these cases is special cased.
1686   bool SkipFirstExtraIndent =
1687       Previous &&
1688       (Previous->opensScope() ||
1689        Previous->isOneOf(tok::semi, tok::kw_return, TT_RequiresClause) ||
1690        (Previous->getPrecedence() == prec::Assignment &&
1691         Style.AlignOperands != FormatStyle::OAS_DontAlign) ||
1692        Previous->is(TT_ObjCMethodExpr));
1693   for (const auto &PrecedenceLevel : llvm::reverse(Current.FakeLParens)) {
1694     const auto &CurrentState = State.Stack.back();
1695     ParenState NewParenState = CurrentState;
1696     NewParenState.Tok = nullptr;
1697     NewParenState.ContainsLineBreak = false;
1698     NewParenState.LastOperatorWrapped = true;
1699     NewParenState.IsChainedConditional = false;
1700     NewParenState.IsWrappedConditional = false;
1701     NewParenState.UnindentOperator = false;
1702     NewParenState.NoLineBreak =
1703         NewParenState.NoLineBreak || CurrentState.NoLineBreakInOperand;
1704 
1705     // Don't propagate AvoidBinPacking into subexpressions of arg/param lists.
1706     if (PrecedenceLevel > prec::Comma)
1707       NewParenState.AvoidBinPacking = false;
1708 
1709     // Indent from 'LastSpace' unless these are fake parentheses encapsulating
1710     // a builder type call after 'return' or, if the alignment after opening
1711     // brackets is disabled.
1712     if (!Current.isTrailingComment() &&
1713         (Style.AlignOperands != FormatStyle::OAS_DontAlign ||
1714          PrecedenceLevel < prec::Assignment) &&
1715         (!Previous || Previous->isNot(tok::kw_return) ||
1716          (Style.Language != FormatStyle::LK_Java && PrecedenceLevel > 0)) &&
1717         (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign ||
1718          PrecedenceLevel > prec::Comma || Current.NestingLevel == 0) &&
1719         (!Style.isTableGen() ||
1720          (Previous && Previous->isOneOf(TT_TableGenDAGArgListComma,
1721                                         TT_TableGenDAGArgListCommaToBreak)))) {
1722       NewParenState.Indent = std::max(
1723           std::max(State.Column, NewParenState.Indent), CurrentState.LastSpace);
1724     }
1725 
1726     // Special case for generic selection expressions, its comma-separated
1727     // expressions are not aligned to the opening paren like regular calls, but
1728     // rather continuation-indented relative to the _Generic keyword.
1729     if (Previous && Previous->endsSequence(tok::l_paren, tok::kw__Generic) &&
1730         State.Stack.size() > 1) {
1731       NewParenState.Indent = State.Stack[State.Stack.size() - 2].Indent +
1732                              Style.ContinuationIndentWidth;
1733     }
1734 
1735     if ((shouldUnindentNextOperator(Current) ||
1736          (Previous &&
1737           (PrecedenceLevel == prec::Conditional &&
1738            Previous->is(tok::question) && Previous->is(TT_ConditionalExpr)))) &&
1739         !Newline) {
1740       // If BreakBeforeBinaryOperators is set, un-indent a bit to account for
1741       // the operator and keep the operands aligned.
1742       if (Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator)
1743         NewParenState.UnindentOperator = true;
1744       // Mark indentation as alignment if the expression is aligned.
1745       if (Style.AlignOperands != FormatStyle::OAS_DontAlign)
1746         NewParenState.IsAligned = true;
1747     }
1748 
1749     // Do not indent relative to the fake parentheses inserted for "." or "->".
1750     // This is a special case to make the following to statements consistent:
1751     //   OuterFunction(InnerFunctionCall( // break
1752     //       ParameterToInnerFunction));
1753     //   OuterFunction(SomeObject.InnerFunctionCall( // break
1754     //       ParameterToInnerFunction));
1755     if (PrecedenceLevel > prec::Unknown)
1756       NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column);
1757     if (PrecedenceLevel != prec::Conditional &&
1758         Current.isNot(TT_UnaryOperator) &&
1759         Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
1760       NewParenState.StartOfFunctionCall = State.Column;
1761     }
1762 
1763     // Indent conditional expressions, unless they are chained "else-if"
1764     // conditionals. Never indent expression where the 'operator' is ',', ';' or
1765     // an assignment (i.e. *I <= prec::Assignment) as those have different
1766     // indentation rules. Indent other expression, unless the indentation needs
1767     // to be skipped.
1768     if (PrecedenceLevel == prec::Conditional && Previous &&
1769         Previous->is(tok::colon) && Previous->is(TT_ConditionalExpr) &&
1770         &PrecedenceLevel == &Current.FakeLParens.back() &&
1771         !CurrentState.IsWrappedConditional) {
1772       NewParenState.IsChainedConditional = true;
1773       NewParenState.UnindentOperator = State.Stack.back().UnindentOperator;
1774     } else if (PrecedenceLevel == prec::Conditional ||
1775                (!SkipFirstExtraIndent && PrecedenceLevel > prec::Assignment &&
1776                 !Current.isTrailingComment())) {
1777       NewParenState.Indent += Style.ContinuationIndentWidth;
1778     }
1779     if ((Previous && !Previous->opensScope()) || PrecedenceLevel != prec::Comma)
1780       NewParenState.BreakBeforeParameter = false;
1781     State.Stack.push_back(NewParenState);
1782     SkipFirstExtraIndent = false;
1783   }
1784 }
1785 
moveStatePastFakeRParens(LineState & State)1786 void ContinuationIndenter::moveStatePastFakeRParens(LineState &State) {
1787   for (unsigned i = 0, e = State.NextToken->FakeRParens; i != e; ++i) {
1788     unsigned VariablePos = State.Stack.back().VariablePos;
1789     if (State.Stack.size() == 1) {
1790       // Do not pop the last element.
1791       break;
1792     }
1793     State.Stack.pop_back();
1794     State.Stack.back().VariablePos = VariablePos;
1795   }
1796 
1797   if (State.NextToken->ClosesRequiresClause && Style.IndentRequiresClause) {
1798     // Remove the indentation of the requires clauses (which is not in Indent,
1799     // but in LastSpace).
1800     State.Stack.back().LastSpace -= Style.IndentWidth;
1801   }
1802 }
1803 
moveStatePastScopeOpener(LineState & State,bool Newline)1804 void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
1805                                                     bool Newline) {
1806   const FormatToken &Current = *State.NextToken;
1807   if (!Current.opensScope())
1808     return;
1809 
1810   const auto &CurrentState = State.Stack.back();
1811 
1812   // Don't allow '<' or '(' in C# generic type constraints to start new scopes.
1813   if (Current.isOneOf(tok::less, tok::l_paren) &&
1814       CurrentState.IsCSharpGenericTypeConstraint) {
1815     return;
1816   }
1817 
1818   if (Current.MatchingParen && Current.is(BK_Block)) {
1819     moveStateToNewBlock(State, Newline);
1820     return;
1821   }
1822 
1823   unsigned NewIndent;
1824   unsigned LastSpace = CurrentState.LastSpace;
1825   bool AvoidBinPacking;
1826   bool BreakBeforeParameter = false;
1827   unsigned NestedBlockIndent = std::max(CurrentState.StartOfFunctionCall,
1828                                         CurrentState.NestedBlockIndent);
1829   if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
1830       opensProtoMessageField(Current, Style)) {
1831     if (Current.opensBlockOrBlockTypeList(Style)) {
1832       NewIndent = Style.IndentWidth +
1833                   std::min(State.Column, CurrentState.NestedBlockIndent);
1834     } else if (Current.is(tok::l_brace)) {
1835       NewIndent =
1836           CurrentState.LastSpace + Style.BracedInitializerIndentWidth.value_or(
1837                                        Style.ContinuationIndentWidth);
1838     } else {
1839       NewIndent = CurrentState.LastSpace + Style.ContinuationIndentWidth;
1840     }
1841     const FormatToken *NextNonComment = Current.getNextNonComment();
1842     bool EndsInComma = Current.MatchingParen &&
1843                        Current.MatchingParen->Previous &&
1844                        Current.MatchingParen->Previous->is(tok::comma);
1845     AvoidBinPacking = EndsInComma || Current.is(TT_DictLiteral) ||
1846                       Style.isProto() || !Style.BinPackArguments ||
1847                       (NextNonComment && NextNonComment->isOneOf(
1848                                              TT_DesignatedInitializerPeriod,
1849                                              TT_DesignatedInitializerLSquare));
1850     BreakBeforeParameter = EndsInComma;
1851     if (Current.ParameterCount > 1)
1852       NestedBlockIndent = std::max(NestedBlockIndent, State.Column + 1);
1853   } else {
1854     NewIndent =
1855         Style.ContinuationIndentWidth +
1856         std::max(CurrentState.LastSpace, CurrentState.StartOfFunctionCall);
1857 
1858     if (Style.isTableGen() && Current.is(TT_TableGenDAGArgOpenerToBreak) &&
1859         Style.TableGenBreakInsideDAGArg == FormatStyle::DAS_BreakElements) {
1860       // For the case the next token is a TableGen DAGArg operator identifier
1861       // that is not marked to have a line break after it.
1862       // In this case the option DAS_BreakElements requires to align the
1863       // DAGArg elements to the operator.
1864       const FormatToken *Next = Current.Next;
1865       if (Next && Next->is(TT_TableGenDAGArgOperatorID))
1866         NewIndent = State.Column + Next->TokenText.size() + 2;
1867     }
1868 
1869     // Ensure that different different brackets force relative alignment, e.g.:
1870     // void SomeFunction(vector<  // break
1871     //                       int> v);
1872     // FIXME: We likely want to do this for more combinations of brackets.
1873     if (Current.is(tok::less) && Current.ParentBracket == tok::l_paren) {
1874       NewIndent = std::max(NewIndent, CurrentState.Indent);
1875       LastSpace = std::max(LastSpace, CurrentState.Indent);
1876     }
1877 
1878     bool EndsInComma =
1879         Current.MatchingParen &&
1880         Current.MatchingParen->getPreviousNonComment() &&
1881         Current.MatchingParen->getPreviousNonComment()->is(tok::comma);
1882 
1883     // If ObjCBinPackProtocolList is unspecified, fall back to BinPackParameters
1884     // for backwards compatibility.
1885     bool ObjCBinPackProtocolList =
1886         (Style.ObjCBinPackProtocolList == FormatStyle::BPS_Auto &&
1887          Style.BinPackParameters) ||
1888         Style.ObjCBinPackProtocolList == FormatStyle::BPS_Always;
1889 
1890     bool BinPackDeclaration =
1891         (State.Line->Type != LT_ObjCDecl && Style.BinPackParameters) ||
1892         (State.Line->Type == LT_ObjCDecl && ObjCBinPackProtocolList);
1893 
1894     bool GenericSelection =
1895         Current.getPreviousNonComment() &&
1896         Current.getPreviousNonComment()->is(tok::kw__Generic);
1897 
1898     AvoidBinPacking =
1899         (CurrentState.IsCSharpGenericTypeConstraint) || GenericSelection ||
1900         (Style.isJavaScript() && EndsInComma) ||
1901         (State.Line->MustBeDeclaration && !BinPackDeclaration) ||
1902         (!State.Line->MustBeDeclaration && !Style.BinPackArguments) ||
1903         (Style.ExperimentalAutoDetectBinPacking &&
1904          (Current.is(PPK_OnePerLine) ||
1905           (!BinPackInconclusiveFunctions && Current.is(PPK_Inconclusive))));
1906 
1907     if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen &&
1908         Style.ObjCBreakBeforeNestedBlockParam) {
1909       if (Style.ColumnLimit) {
1910         // If this '[' opens an ObjC call, determine whether all parameters fit
1911         // into one line and put one per line if they don't.
1912         if (getLengthToMatchingParen(Current, State.Stack) + State.Column >
1913             getColumnLimit(State)) {
1914           BreakBeforeParameter = true;
1915         }
1916       } else {
1917         // For ColumnLimit = 0, we have to figure out whether there is or has to
1918         // be a line break within this call.
1919         for (const FormatToken *Tok = &Current;
1920              Tok && Tok != Current.MatchingParen; Tok = Tok->Next) {
1921           if (Tok->MustBreakBefore ||
1922               (Tok->CanBreakBefore && Tok->NewlinesBefore > 0)) {
1923             BreakBeforeParameter = true;
1924             break;
1925           }
1926         }
1927       }
1928     }
1929 
1930     if (Style.isJavaScript() && EndsInComma)
1931       BreakBeforeParameter = true;
1932   }
1933   // Generally inherit NoLineBreak from the current scope to nested scope.
1934   // However, don't do this for non-empty nested blocks, dict literals and
1935   // array literals as these follow different indentation rules.
1936   bool NoLineBreak =
1937       Current.Children.empty() &&
1938       !Current.isOneOf(TT_DictLiteral, TT_ArrayInitializerLSquare) &&
1939       (CurrentState.NoLineBreak || CurrentState.NoLineBreakInOperand ||
1940        (Current.is(TT_TemplateOpener) &&
1941         CurrentState.ContainsUnwrappedBuilder));
1942   State.Stack.push_back(
1943       ParenState(&Current, NewIndent, LastSpace, AvoidBinPacking, NoLineBreak));
1944   auto &NewState = State.Stack.back();
1945   NewState.NestedBlockIndent = NestedBlockIndent;
1946   NewState.BreakBeforeParameter = BreakBeforeParameter;
1947   NewState.HasMultipleNestedBlocks = (Current.BlockParameterCount > 1);
1948 
1949   if (Style.BraceWrapping.BeforeLambdaBody && Current.Next &&
1950       Current.is(tok::l_paren)) {
1951     // Search for any parameter that is a lambda.
1952     FormatToken const *next = Current.Next;
1953     while (next) {
1954       if (next->is(TT_LambdaLSquare)) {
1955         NewState.HasMultipleNestedBlocks = true;
1956         break;
1957       }
1958       next = next->Next;
1959     }
1960   }
1961 
1962   NewState.IsInsideObjCArrayLiteral = Current.is(TT_ArrayInitializerLSquare) &&
1963                                       Current.Previous &&
1964                                       Current.Previous->is(tok::at);
1965 }
1966 
moveStatePastScopeCloser(LineState & State)1967 void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) {
1968   const FormatToken &Current = *State.NextToken;
1969   if (!Current.closesScope())
1970     return;
1971 
1972   // If we encounter a closing ), ], } or >, we can remove a level from our
1973   // stacks.
1974   if (State.Stack.size() > 1 &&
1975       (Current.isOneOf(tok::r_paren, tok::r_square, TT_TemplateString) ||
1976        (Current.is(tok::r_brace) && State.NextToken != State.Line->First) ||
1977        State.NextToken->is(TT_TemplateCloser) ||
1978        State.NextToken->is(TT_TableGenListCloser) ||
1979        (Current.is(tok::greater) && Current.is(TT_DictLiteral)))) {
1980     State.Stack.pop_back();
1981   }
1982 
1983   auto &CurrentState = State.Stack.back();
1984 
1985   // Reevaluate whether ObjC message arguments fit into one line.
1986   // If a receiver spans multiple lines, e.g.:
1987   //   [[object block:^{
1988   //     return 42;
1989   //   }] a:42 b:42];
1990   // BreakBeforeParameter is calculated based on an incorrect assumption
1991   // (it is checked whether the whole expression fits into one line without
1992   // considering a line break inside a message receiver).
1993   // We check whether arguments fit after receiver scope closer (into the same
1994   // line).
1995   if (CurrentState.BreakBeforeParameter && Current.MatchingParen &&
1996       Current.MatchingParen->Previous) {
1997     const FormatToken &CurrentScopeOpener = *Current.MatchingParen->Previous;
1998     if (CurrentScopeOpener.is(TT_ObjCMethodExpr) &&
1999         CurrentScopeOpener.MatchingParen) {
2000       int NecessarySpaceInLine =
2001           getLengthToMatchingParen(CurrentScopeOpener, State.Stack) +
2002           CurrentScopeOpener.TotalLength - Current.TotalLength - 1;
2003       if (State.Column + Current.ColumnWidth + NecessarySpaceInLine <=
2004           Style.ColumnLimit) {
2005         CurrentState.BreakBeforeParameter = false;
2006       }
2007     }
2008   }
2009 
2010   if (Current.is(tok::r_square)) {
2011     // If this ends the array subscript expr, reset the corresponding value.
2012     const FormatToken *NextNonComment = Current.getNextNonComment();
2013     if (NextNonComment && NextNonComment->isNot(tok::l_square))
2014       CurrentState.StartOfArraySubscripts = 0;
2015   }
2016 }
2017 
moveStateToNewBlock(LineState & State,bool NewLine)2018 void ContinuationIndenter::moveStateToNewBlock(LineState &State, bool NewLine) {
2019   if (Style.LambdaBodyIndentation == FormatStyle::LBI_OuterScope &&
2020       State.NextToken->is(TT_LambdaLBrace) &&
2021       !State.Line->MightBeFunctionDecl) {
2022     State.Stack.back().NestedBlockIndent = State.FirstIndent;
2023   }
2024   unsigned NestedBlockIndent = State.Stack.back().NestedBlockIndent;
2025   // ObjC block sometimes follow special indentation rules.
2026   unsigned NewIndent =
2027       NestedBlockIndent + (State.NextToken->is(TT_ObjCBlockLBrace)
2028                                ? Style.ObjCBlockIndentWidth
2029                                : Style.IndentWidth);
2030 
2031   // Even when wrapping before lambda body, the left brace can still be added to
2032   // the same line. This occurs when checking whether the whole lambda body can
2033   // go on a single line. In this case we have to make sure there are no line
2034   // breaks in the body, otherwise we could just end up with a regular lambda
2035   // body without the brace wrapped.
2036   bool NoLineBreak = Style.BraceWrapping.BeforeLambdaBody && !NewLine &&
2037                      State.NextToken->is(TT_LambdaLBrace);
2038 
2039   State.Stack.push_back(ParenState(State.NextToken, NewIndent,
2040                                    State.Stack.back().LastSpace,
2041                                    /*AvoidBinPacking=*/true, NoLineBreak));
2042   State.Stack.back().NestedBlockIndent = NestedBlockIndent;
2043   State.Stack.back().BreakBeforeParameter = true;
2044 }
2045 
getLastLineEndColumn(StringRef Text,unsigned StartColumn,unsigned TabWidth,encoding::Encoding Encoding)2046 static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn,
2047                                      unsigned TabWidth,
2048                                      encoding::Encoding Encoding) {
2049   size_t LastNewlinePos = Text.find_last_of("\n");
2050   if (LastNewlinePos == StringRef::npos) {
2051     return StartColumn +
2052            encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding);
2053   } else {
2054     return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos),
2055                                          /*StartColumn=*/0, TabWidth, Encoding);
2056   }
2057 }
2058 
reformatRawStringLiteral(const FormatToken & Current,LineState & State,const FormatStyle & RawStringStyle,bool DryRun,bool Newline)2059 unsigned ContinuationIndenter::reformatRawStringLiteral(
2060     const FormatToken &Current, LineState &State,
2061     const FormatStyle &RawStringStyle, bool DryRun, bool Newline) {
2062   unsigned StartColumn = State.Column - Current.ColumnWidth;
2063   StringRef OldDelimiter = *getRawStringDelimiter(Current.TokenText);
2064   StringRef NewDelimiter =
2065       getCanonicalRawStringDelimiter(Style, RawStringStyle.Language);
2066   if (NewDelimiter.empty())
2067     NewDelimiter = OldDelimiter;
2068   // The text of a raw string is between the leading 'R"delimiter(' and the
2069   // trailing 'delimiter)"'.
2070   unsigned OldPrefixSize = 3 + OldDelimiter.size();
2071   unsigned OldSuffixSize = 2 + OldDelimiter.size();
2072   // We create a virtual text environment which expects a null-terminated
2073   // string, so we cannot use StringRef.
2074   std::string RawText = std::string(
2075       Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize));
2076   if (NewDelimiter != OldDelimiter) {
2077     // Don't update to the canonical delimiter 'deli' if ')deli"' occurs in the
2078     // raw string.
2079     std::string CanonicalDelimiterSuffix = (")" + NewDelimiter + "\"").str();
2080     if (StringRef(RawText).contains(CanonicalDelimiterSuffix))
2081       NewDelimiter = OldDelimiter;
2082   }
2083 
2084   unsigned NewPrefixSize = 3 + NewDelimiter.size();
2085   unsigned NewSuffixSize = 2 + NewDelimiter.size();
2086 
2087   // The first start column is the column the raw text starts after formatting.
2088   unsigned FirstStartColumn = StartColumn + NewPrefixSize;
2089 
2090   // The next start column is the intended indentation a line break inside
2091   // the raw string at level 0. It is determined by the following rules:
2092   //   - if the content starts on newline, it is one level more than the current
2093   //     indent, and
2094   //   - if the content does not start on a newline, it is the first start
2095   //     column.
2096   // These rules have the advantage that the formatted content both does not
2097   // violate the rectangle rule and visually flows within the surrounding
2098   // source.
2099   bool ContentStartsOnNewline = Current.TokenText[OldPrefixSize] == '\n';
2100   // If this token is the last parameter (checked by looking if it's followed by
2101   // `)` and is not on a newline, the base the indent off the line's nested
2102   // block indent. Otherwise, base the indent off the arguments indent, so we
2103   // can achieve:
2104   //
2105   // fffffffffff(1, 2, 3, R"pb(
2106   //     key1: 1  #
2107   //     key2: 2)pb");
2108   //
2109   // fffffffffff(1, 2, 3,
2110   //             R"pb(
2111   //               key1: 1  #
2112   //               key2: 2
2113   //             )pb");
2114   //
2115   // fffffffffff(1, 2, 3,
2116   //             R"pb(
2117   //               key1: 1  #
2118   //               key2: 2
2119   //             )pb",
2120   //             5);
2121   unsigned CurrentIndent =
2122       (!Newline && Current.Next && Current.Next->is(tok::r_paren))
2123           ? State.Stack.back().NestedBlockIndent
2124           : State.Stack.back().Indent;
2125   unsigned NextStartColumn = ContentStartsOnNewline
2126                                  ? CurrentIndent + Style.IndentWidth
2127                                  : FirstStartColumn;
2128 
2129   // The last start column is the column the raw string suffix starts if it is
2130   // put on a newline.
2131   // The last start column is the intended indentation of the raw string postfix
2132   // if it is put on a newline. It is determined by the following rules:
2133   //   - if the raw string prefix starts on a newline, it is the column where
2134   //     that raw string prefix starts, and
2135   //   - if the raw string prefix does not start on a newline, it is the current
2136   //     indent.
2137   unsigned LastStartColumn =
2138       Current.NewlinesBefore ? FirstStartColumn - NewPrefixSize : CurrentIndent;
2139 
2140   std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat(
2141       RawStringStyle, RawText, {tooling::Range(0, RawText.size())},
2142       FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>",
2143       /*Status=*/nullptr);
2144 
2145   auto NewCode = applyAllReplacements(RawText, Fixes.first);
2146   tooling::Replacements NoFixes;
2147   if (!NewCode)
2148     return addMultilineToken(Current, State);
2149   if (!DryRun) {
2150     if (NewDelimiter != OldDelimiter) {
2151       // In 'R"delimiter(...', the delimiter starts 2 characters after the start
2152       // of the token.
2153       SourceLocation PrefixDelimiterStart =
2154           Current.Tok.getLocation().getLocWithOffset(2);
2155       auto PrefixErr = Whitespaces.addReplacement(tooling::Replacement(
2156           SourceMgr, PrefixDelimiterStart, OldDelimiter.size(), NewDelimiter));
2157       if (PrefixErr) {
2158         llvm::errs()
2159             << "Failed to update the prefix delimiter of a raw string: "
2160             << llvm::toString(std::move(PrefixErr)) << "\n";
2161       }
2162       // In 'R"delimiter(...)delimiter"', the suffix delimiter starts at
2163       // position length - 1 - |delimiter|.
2164       SourceLocation SuffixDelimiterStart =
2165           Current.Tok.getLocation().getLocWithOffset(Current.TokenText.size() -
2166                                                      1 - OldDelimiter.size());
2167       auto SuffixErr = Whitespaces.addReplacement(tooling::Replacement(
2168           SourceMgr, SuffixDelimiterStart, OldDelimiter.size(), NewDelimiter));
2169       if (SuffixErr) {
2170         llvm::errs()
2171             << "Failed to update the suffix delimiter of a raw string: "
2172             << llvm::toString(std::move(SuffixErr)) << "\n";
2173       }
2174     }
2175     SourceLocation OriginLoc =
2176         Current.Tok.getLocation().getLocWithOffset(OldPrefixSize);
2177     for (const tooling::Replacement &Fix : Fixes.first) {
2178       auto Err = Whitespaces.addReplacement(tooling::Replacement(
2179           SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()),
2180           Fix.getLength(), Fix.getReplacementText()));
2181       if (Err) {
2182         llvm::errs() << "Failed to reformat raw string: "
2183                      << llvm::toString(std::move(Err)) << "\n";
2184       }
2185     }
2186   }
2187   unsigned RawLastLineEndColumn = getLastLineEndColumn(
2188       *NewCode, FirstStartColumn, Style.TabWidth, Encoding);
2189   State.Column = RawLastLineEndColumn + NewSuffixSize;
2190   // Since we're updating the column to after the raw string literal here, we
2191   // have to manually add the penalty for the prefix R"delim( over the column
2192   // limit.
2193   unsigned PrefixExcessCharacters =
2194       StartColumn + NewPrefixSize > Style.ColumnLimit
2195           ? StartColumn + NewPrefixSize - Style.ColumnLimit
2196           : 0;
2197   bool IsMultiline =
2198       ContentStartsOnNewline || (NewCode->find('\n') != std::string::npos);
2199   if (IsMultiline) {
2200     // Break before further function parameters on all levels.
2201     for (ParenState &Paren : State.Stack)
2202       Paren.BreakBeforeParameter = true;
2203   }
2204   return Fixes.second + PrefixExcessCharacters * Style.PenaltyExcessCharacter;
2205 }
2206 
addMultilineToken(const FormatToken & Current,LineState & State)2207 unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
2208                                                  LineState &State) {
2209   // Break before further function parameters on all levels.
2210   for (ParenState &Paren : State.Stack)
2211     Paren.BreakBeforeParameter = true;
2212 
2213   unsigned ColumnsUsed = State.Column;
2214   // We can only affect layout of the first and the last line, so the penalty
2215   // for all other lines is constant, and we ignore it.
2216   State.Column = Current.LastLineColumnWidth;
2217 
2218   if (ColumnsUsed > getColumnLimit(State))
2219     return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State));
2220   return 0;
2221 }
2222 
handleEndOfLine(const FormatToken & Current,LineState & State,bool DryRun,bool AllowBreak,bool Newline)2223 unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current,
2224                                                LineState &State, bool DryRun,
2225                                                bool AllowBreak, bool Newline) {
2226   unsigned Penalty = 0;
2227   // Compute the raw string style to use in case this is a raw string literal
2228   // that can be reformatted.
2229   auto RawStringStyle = getRawStringStyle(Current, State);
2230   if (RawStringStyle && !Current.Finalized) {
2231     Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun,
2232                                        Newline);
2233   } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) {
2234     // Don't break multi-line tokens other than block comments and raw string
2235     // literals. Instead, just update the state.
2236     Penalty = addMultilineToken(Current, State);
2237   } else if (State.Line->Type != LT_ImportStatement) {
2238     // We generally don't break import statements.
2239     LineState OriginalState = State;
2240 
2241     // Whether we force the reflowing algorithm to stay strictly within the
2242     // column limit.
2243     bool Strict = false;
2244     // Whether the first non-strict attempt at reflowing did intentionally
2245     // exceed the column limit.
2246     bool Exceeded = false;
2247     std::tie(Penalty, Exceeded) = breakProtrudingToken(
2248         Current, State, AllowBreak, /*DryRun=*/true, Strict);
2249     if (Exceeded) {
2250       // If non-strict reflowing exceeds the column limit, try whether strict
2251       // reflowing leads to an overall lower penalty.
2252       LineState StrictState = OriginalState;
2253       unsigned StrictPenalty =
2254           breakProtrudingToken(Current, StrictState, AllowBreak,
2255                                /*DryRun=*/true, /*Strict=*/true)
2256               .first;
2257       Strict = StrictPenalty <= Penalty;
2258       if (Strict) {
2259         Penalty = StrictPenalty;
2260         State = StrictState;
2261       }
2262     }
2263     if (!DryRun) {
2264       // If we're not in dry-run mode, apply the changes with the decision on
2265       // strictness made above.
2266       breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false,
2267                            Strict);
2268     }
2269   }
2270   if (State.Column > getColumnLimit(State)) {
2271     unsigned ExcessCharacters = State.Column - getColumnLimit(State);
2272     Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
2273   }
2274   return Penalty;
2275 }
2276 
2277 // Returns the enclosing function name of a token, or the empty string if not
2278 // found.
getEnclosingFunctionName(const FormatToken & Current)2279 static StringRef getEnclosingFunctionName(const FormatToken &Current) {
2280   // Look for: 'function(' or 'function<templates>(' before Current.
2281   auto Tok = Current.getPreviousNonComment();
2282   if (!Tok || Tok->isNot(tok::l_paren))
2283     return "";
2284   Tok = Tok->getPreviousNonComment();
2285   if (!Tok)
2286     return "";
2287   if (Tok->is(TT_TemplateCloser)) {
2288     Tok = Tok->MatchingParen;
2289     if (Tok)
2290       Tok = Tok->getPreviousNonComment();
2291   }
2292   if (!Tok || Tok->isNot(tok::identifier))
2293     return "";
2294   return Tok->TokenText;
2295 }
2296 
2297 std::optional<FormatStyle>
getRawStringStyle(const FormatToken & Current,const LineState & State)2298 ContinuationIndenter::getRawStringStyle(const FormatToken &Current,
2299                                         const LineState &State) {
2300   if (!Current.isStringLiteral())
2301     return std::nullopt;
2302   auto Delimiter = getRawStringDelimiter(Current.TokenText);
2303   if (!Delimiter)
2304     return std::nullopt;
2305   auto RawStringStyle = RawStringFormats.getDelimiterStyle(*Delimiter);
2306   if (!RawStringStyle && Delimiter->empty()) {
2307     RawStringStyle = RawStringFormats.getEnclosingFunctionStyle(
2308         getEnclosingFunctionName(Current));
2309   }
2310   if (!RawStringStyle)
2311     return std::nullopt;
2312   RawStringStyle->ColumnLimit = getColumnLimit(State);
2313   return RawStringStyle;
2314 }
2315 
2316 std::unique_ptr<BreakableToken>
createBreakableToken(const FormatToken & Current,LineState & State,bool AllowBreak)2317 ContinuationIndenter::createBreakableToken(const FormatToken &Current,
2318                                            LineState &State, bool AllowBreak) {
2319   unsigned StartColumn = State.Column - Current.ColumnWidth;
2320   if (Current.isStringLiteral()) {
2321     // Strings in JSON cannot be broken. Breaking strings in JavaScript is
2322     // disabled for now.
2323     if (Style.isJson() || Style.isJavaScript() || !Style.BreakStringLiterals ||
2324         !AllowBreak) {
2325       return nullptr;
2326     }
2327 
2328     // Don't break string literals inside preprocessor directives (except for
2329     // #define directives, as their contents are stored in separate lines and
2330     // are not affected by this check).
2331     // This way we avoid breaking code with line directives and unknown
2332     // preprocessor directives that contain long string literals.
2333     if (State.Line->Type == LT_PreprocessorDirective)
2334       return nullptr;
2335     // Exempts unterminated string literals from line breaking. The user will
2336     // likely want to terminate the string before any line breaking is done.
2337     if (Current.IsUnterminatedLiteral)
2338       return nullptr;
2339     // Don't break string literals inside Objective-C array literals (doing so
2340     // raises the warning -Wobjc-string-concatenation).
2341     if (State.Stack.back().IsInsideObjCArrayLiteral)
2342       return nullptr;
2343 
2344     // The "DPI"/"DPI-C" in SystemVerilog direct programming interface
2345     // imports/exports cannot be split, e.g.
2346     // `import "DPI" function foo();`
2347     // FIXME: make this use same infra as C++ import checks
2348     if (Style.isVerilog() && Current.Previous &&
2349         Current.Previous->isOneOf(tok::kw_export, Keywords.kw_import)) {
2350       return nullptr;
2351     }
2352     StringRef Text = Current.TokenText;
2353 
2354     // We need this to address the case where there is an unbreakable tail only
2355     // if certain other formatting decisions have been taken. The
2356     // UnbreakableTailLength of Current is an overapproximation in that case and
2357     // we need to be correct here.
2358     unsigned UnbreakableTailLength = (State.NextToken && canBreak(State))
2359                                          ? 0
2360                                          : Current.UnbreakableTailLength;
2361 
2362     if (Style.isVerilog() || Style.Language == FormatStyle::LK_Java ||
2363         Style.isJavaScript() || Style.isCSharp()) {
2364       BreakableStringLiteralUsingOperators::QuoteStyleType QuoteStyle;
2365       if (Style.isJavaScript() && Text.starts_with("'") &&
2366           Text.ends_with("'")) {
2367         QuoteStyle = BreakableStringLiteralUsingOperators::SingleQuotes;
2368       } else if (Style.isCSharp() && Text.starts_with("@\"") &&
2369                  Text.ends_with("\"")) {
2370         QuoteStyle = BreakableStringLiteralUsingOperators::AtDoubleQuotes;
2371       } else if (Text.starts_with("\"") && Text.ends_with("\"")) {
2372         QuoteStyle = BreakableStringLiteralUsingOperators::DoubleQuotes;
2373       } else {
2374         return nullptr;
2375       }
2376       return std::make_unique<BreakableStringLiteralUsingOperators>(
2377           Current, QuoteStyle,
2378           /*UnindentPlus=*/shouldUnindentNextOperator(Current), StartColumn,
2379           UnbreakableTailLength, State.Line->InPPDirective, Encoding, Style);
2380     }
2381 
2382     StringRef Prefix;
2383     StringRef Postfix;
2384     // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
2385     // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to
2386     // reduce the overhead) for each FormatToken, which is a string, so that we
2387     // don't run multiple checks here on the hot path.
2388     if ((Text.ends_with(Postfix = "\"") &&
2389          (Text.starts_with(Prefix = "@\"") || Text.starts_with(Prefix = "\"") ||
2390           Text.starts_with(Prefix = "u\"") ||
2391           Text.starts_with(Prefix = "U\"") ||
2392           Text.starts_with(Prefix = "u8\"") ||
2393           Text.starts_with(Prefix = "L\""))) ||
2394         (Text.starts_with(Prefix = "_T(\"") &&
2395          Text.ends_with(Postfix = "\")"))) {
2396       return std::make_unique<BreakableStringLiteral>(
2397           Current, StartColumn, Prefix, Postfix, UnbreakableTailLength,
2398           State.Line->InPPDirective, Encoding, Style);
2399     }
2400   } else if (Current.is(TT_BlockComment)) {
2401     if (!Style.ReflowComments ||
2402         // If a comment token switches formatting, like
2403         // /* clang-format on */, we don't want to break it further,
2404         // but we may still want to adjust its indentation.
2405         switchesFormatting(Current)) {
2406       return nullptr;
2407     }
2408     return std::make_unique<BreakableBlockComment>(
2409         Current, StartColumn, Current.OriginalColumn, !Current.Previous,
2410         State.Line->InPPDirective, Encoding, Style, Whitespaces.useCRLF());
2411   } else if (Current.is(TT_LineComment) &&
2412              (!Current.Previous ||
2413               Current.Previous->isNot(TT_ImplicitStringLiteral))) {
2414     bool RegularComments = [&]() {
2415       for (const FormatToken *T = &Current; T && T->is(TT_LineComment);
2416            T = T->Next) {
2417         if (!(T->TokenText.starts_with("//") || T->TokenText.starts_with("#")))
2418           return false;
2419       }
2420       return true;
2421     }();
2422     if (!Style.ReflowComments ||
2423         CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
2424         switchesFormatting(Current) || !RegularComments) {
2425       return nullptr;
2426     }
2427     return std::make_unique<BreakableLineCommentSection>(
2428         Current, StartColumn, /*InPPDirective=*/false, Encoding, Style);
2429   }
2430   return nullptr;
2431 }
2432 
2433 std::pair<unsigned, bool>
breakProtrudingToken(const FormatToken & Current,LineState & State,bool AllowBreak,bool DryRun,bool Strict)2434 ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
2435                                            LineState &State, bool AllowBreak,
2436                                            bool DryRun, bool Strict) {
2437   std::unique_ptr<const BreakableToken> Token =
2438       createBreakableToken(Current, State, AllowBreak);
2439   if (!Token)
2440     return {0, false};
2441   assert(Token->getLineCount() > 0);
2442   unsigned ColumnLimit = getColumnLimit(State);
2443   if (Current.is(TT_LineComment)) {
2444     // We don't insert backslashes when breaking line comments.
2445     ColumnLimit = Style.ColumnLimit;
2446   }
2447   if (ColumnLimit == 0) {
2448     // To make the rest of the function easier set the column limit to the
2449     // maximum, if there should be no limit.
2450     ColumnLimit = std::numeric_limits<decltype(ColumnLimit)>::max();
2451   }
2452   if (Current.UnbreakableTailLength >= ColumnLimit)
2453     return {0, false};
2454   // ColumnWidth was already accounted into State.Column before calling
2455   // breakProtrudingToken.
2456   unsigned StartColumn = State.Column - Current.ColumnWidth;
2457   unsigned NewBreakPenalty = Current.isStringLiteral()
2458                                  ? Style.PenaltyBreakString
2459                                  : Style.PenaltyBreakComment;
2460   // Stores whether we intentionally decide to let a line exceed the column
2461   // limit.
2462   bool Exceeded = false;
2463   // Stores whether we introduce a break anywhere in the token.
2464   bool BreakInserted = Token->introducesBreakBeforeToken();
2465   // Store whether we inserted a new line break at the end of the previous
2466   // logical line.
2467   bool NewBreakBefore = false;
2468   // We use a conservative reflowing strategy. Reflow starts after a line is
2469   // broken or the corresponding whitespace compressed. Reflow ends as soon as a
2470   // line that doesn't get reflown with the previous line is reached.
2471   bool Reflow = false;
2472   // Keep track of where we are in the token:
2473   // Where we are in the content of the current logical line.
2474   unsigned TailOffset = 0;
2475   // The column number we're currently at.
2476   unsigned ContentStartColumn =
2477       Token->getContentStartColumn(0, /*Break=*/false);
2478   // The number of columns left in the current logical line after TailOffset.
2479   unsigned RemainingTokenColumns =
2480       Token->getRemainingLength(0, TailOffset, ContentStartColumn);
2481   // Adapt the start of the token, for example indent.
2482   if (!DryRun)
2483     Token->adaptStartOfLine(0, Whitespaces);
2484 
2485   unsigned ContentIndent = 0;
2486   unsigned Penalty = 0;
2487   LLVM_DEBUG(llvm::dbgs() << "Breaking protruding token at column "
2488                           << StartColumn << ".\n");
2489   for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
2490        LineIndex != EndIndex; ++LineIndex) {
2491     LLVM_DEBUG(llvm::dbgs()
2492                << "  Line: " << LineIndex << " (Reflow: " << Reflow << ")\n");
2493     NewBreakBefore = false;
2494     // If we did reflow the previous line, we'll try reflowing again. Otherwise
2495     // we'll start reflowing if the current line is broken or whitespace is
2496     // compressed.
2497     bool TryReflow = Reflow;
2498     // Break the current token until we can fit the rest of the line.
2499     while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
2500       LLVM_DEBUG(llvm::dbgs() << "    Over limit, need: "
2501                               << (ContentStartColumn + RemainingTokenColumns)
2502                               << ", space: " << ColumnLimit
2503                               << ", reflown prefix: " << ContentStartColumn
2504                               << ", offset in line: " << TailOffset << "\n");
2505       // If the current token doesn't fit, find the latest possible split in the
2506       // current line so that breaking at it will be under the column limit.
2507       // FIXME: Use the earliest possible split while reflowing to correctly
2508       // compress whitespace within a line.
2509       BreakableToken::Split Split =
2510           Token->getSplit(LineIndex, TailOffset, ColumnLimit,
2511                           ContentStartColumn, CommentPragmasRegex);
2512       if (Split.first == StringRef::npos) {
2513         // No break opportunity - update the penalty and continue with the next
2514         // logical line.
2515         if (LineIndex < EndIndex - 1) {
2516           // The last line's penalty is handled in addNextStateToQueue() or when
2517           // calling replaceWhitespaceAfterLastLine below.
2518           Penalty += Style.PenaltyExcessCharacter *
2519                      (ContentStartColumn + RemainingTokenColumns - ColumnLimit);
2520         }
2521         LLVM_DEBUG(llvm::dbgs() << "    No break opportunity.\n");
2522         break;
2523       }
2524       assert(Split.first != 0);
2525 
2526       if (Token->supportsReflow()) {
2527         // Check whether the next natural split point after the current one can
2528         // still fit the line, either because we can compress away whitespace,
2529         // or because the penalty the excess characters introduce is lower than
2530         // the break penalty.
2531         // We only do this for tokens that support reflowing, and thus allow us
2532         // to change the whitespace arbitrarily (e.g. comments).
2533         // Other tokens, like string literals, can be broken on arbitrary
2534         // positions.
2535 
2536         // First, compute the columns from TailOffset to the next possible split
2537         // position.
2538         // For example:
2539         // ColumnLimit:     |
2540         // // Some text   that    breaks
2541         //    ^ tail offset
2542         //             ^-- split
2543         //    ^-------- to split columns
2544         //                    ^--- next split
2545         //    ^--------------- to next split columns
2546         unsigned ToSplitColumns = Token->getRangeLength(
2547             LineIndex, TailOffset, Split.first, ContentStartColumn);
2548         LLVM_DEBUG(llvm::dbgs() << "    ToSplit: " << ToSplitColumns << "\n");
2549 
2550         BreakableToken::Split NextSplit = Token->getSplit(
2551             LineIndex, TailOffset + Split.first + Split.second, ColumnLimit,
2552             ContentStartColumn + ToSplitColumns + 1, CommentPragmasRegex);
2553         // Compute the columns necessary to fit the next non-breakable sequence
2554         // into the current line.
2555         unsigned ToNextSplitColumns = 0;
2556         if (NextSplit.first == StringRef::npos) {
2557           ToNextSplitColumns = Token->getRemainingLength(LineIndex, TailOffset,
2558                                                          ContentStartColumn);
2559         } else {
2560           ToNextSplitColumns = Token->getRangeLength(
2561               LineIndex, TailOffset,
2562               Split.first + Split.second + NextSplit.first, ContentStartColumn);
2563         }
2564         // Compress the whitespace between the break and the start of the next
2565         // unbreakable sequence.
2566         ToNextSplitColumns =
2567             Token->getLengthAfterCompression(ToNextSplitColumns, Split);
2568         LLVM_DEBUG(llvm::dbgs()
2569                    << "    ContentStartColumn: " << ContentStartColumn << "\n");
2570         LLVM_DEBUG(llvm::dbgs()
2571                    << "    ToNextSplit: " << ToNextSplitColumns << "\n");
2572         // If the whitespace compression makes us fit, continue on the current
2573         // line.
2574         bool ContinueOnLine =
2575             ContentStartColumn + ToNextSplitColumns <= ColumnLimit;
2576         unsigned ExcessCharactersPenalty = 0;
2577         if (!ContinueOnLine && !Strict) {
2578           // Similarly, if the excess characters' penalty is lower than the
2579           // penalty of introducing a new break, continue on the current line.
2580           ExcessCharactersPenalty =
2581               (ContentStartColumn + ToNextSplitColumns - ColumnLimit) *
2582               Style.PenaltyExcessCharacter;
2583           LLVM_DEBUG(llvm::dbgs()
2584                      << "    Penalty excess: " << ExcessCharactersPenalty
2585                      << "\n            break : " << NewBreakPenalty << "\n");
2586           if (ExcessCharactersPenalty < NewBreakPenalty) {
2587             Exceeded = true;
2588             ContinueOnLine = true;
2589           }
2590         }
2591         if (ContinueOnLine) {
2592           LLVM_DEBUG(llvm::dbgs() << "    Continuing on line...\n");
2593           // The current line fits after compressing the whitespace - reflow
2594           // the next line into it if possible.
2595           TryReflow = true;
2596           if (!DryRun) {
2597             Token->compressWhitespace(LineIndex, TailOffset, Split,
2598                                       Whitespaces);
2599           }
2600           // When we continue on the same line, leave one space between content.
2601           ContentStartColumn += ToSplitColumns + 1;
2602           Penalty += ExcessCharactersPenalty;
2603           TailOffset += Split.first + Split.second;
2604           RemainingTokenColumns = Token->getRemainingLength(
2605               LineIndex, TailOffset, ContentStartColumn);
2606           continue;
2607         }
2608       }
2609       LLVM_DEBUG(llvm::dbgs() << "    Breaking...\n");
2610       // Update the ContentIndent only if the current line was not reflown with
2611       // the previous line, since in that case the previous line should still
2612       // determine the ContentIndent. Also never intent the last line.
2613       if (!Reflow)
2614         ContentIndent = Token->getContentIndent(LineIndex);
2615       LLVM_DEBUG(llvm::dbgs()
2616                  << "    ContentIndent: " << ContentIndent << "\n");
2617       ContentStartColumn = ContentIndent + Token->getContentStartColumn(
2618                                                LineIndex, /*Break=*/true);
2619 
2620       unsigned NewRemainingTokenColumns = Token->getRemainingLength(
2621           LineIndex, TailOffset + Split.first + Split.second,
2622           ContentStartColumn);
2623       if (NewRemainingTokenColumns == 0) {
2624         // No content to indent.
2625         ContentIndent = 0;
2626         ContentStartColumn =
2627             Token->getContentStartColumn(LineIndex, /*Break=*/true);
2628         NewRemainingTokenColumns = Token->getRemainingLength(
2629             LineIndex, TailOffset + Split.first + Split.second,
2630             ContentStartColumn);
2631       }
2632 
2633       // When breaking before a tab character, it may be moved by a few columns,
2634       // but will still be expanded to the next tab stop, so we don't save any
2635       // columns.
2636       if (NewRemainingTokenColumns >= RemainingTokenColumns) {
2637         // FIXME: Do we need to adjust the penalty?
2638         break;
2639       }
2640 
2641       LLVM_DEBUG(llvm::dbgs() << "    Breaking at: " << TailOffset + Split.first
2642                               << ", " << Split.second << "\n");
2643       if (!DryRun) {
2644         Token->insertBreak(LineIndex, TailOffset, Split, ContentIndent,
2645                            Whitespaces);
2646       }
2647 
2648       Penalty += NewBreakPenalty;
2649       TailOffset += Split.first + Split.second;
2650       RemainingTokenColumns = NewRemainingTokenColumns;
2651       BreakInserted = true;
2652       NewBreakBefore = true;
2653     }
2654     // In case there's another line, prepare the state for the start of the next
2655     // line.
2656     if (LineIndex + 1 != EndIndex) {
2657       unsigned NextLineIndex = LineIndex + 1;
2658       if (NewBreakBefore) {
2659         // After breaking a line, try to reflow the next line into the current
2660         // one once RemainingTokenColumns fits.
2661         TryReflow = true;
2662       }
2663       if (TryReflow) {
2664         // We decided that we want to try reflowing the next line into the
2665         // current one.
2666         // We will now adjust the state as if the reflow is successful (in
2667         // preparation for the next line), and see whether that works. If we
2668         // decide that we cannot reflow, we will later reset the state to the
2669         // start of the next line.
2670         Reflow = false;
2671         // As we did not continue breaking the line, RemainingTokenColumns is
2672         // known to fit after ContentStartColumn. Adapt ContentStartColumn to
2673         // the position at which we want to format the next line if we do
2674         // actually reflow.
2675         // When we reflow, we need to add a space between the end of the current
2676         // line and the next line's start column.
2677         ContentStartColumn += RemainingTokenColumns + 1;
2678         // Get the split that we need to reflow next logical line into the end
2679         // of the current one; the split will include any leading whitespace of
2680         // the next logical line.
2681         BreakableToken::Split SplitBeforeNext =
2682             Token->getReflowSplit(NextLineIndex, CommentPragmasRegex);
2683         LLVM_DEBUG(llvm::dbgs()
2684                    << "    Size of reflown text: " << ContentStartColumn
2685                    << "\n    Potential reflow split: ");
2686         if (SplitBeforeNext.first != StringRef::npos) {
2687           LLVM_DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", "
2688                                   << SplitBeforeNext.second << "\n");
2689           TailOffset = SplitBeforeNext.first + SplitBeforeNext.second;
2690           // If the rest of the next line fits into the current line below the
2691           // column limit, we can safely reflow.
2692           RemainingTokenColumns = Token->getRemainingLength(
2693               NextLineIndex, TailOffset, ContentStartColumn);
2694           Reflow = true;
2695           if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
2696             LLVM_DEBUG(llvm::dbgs()
2697                        << "    Over limit after reflow, need: "
2698                        << (ContentStartColumn + RemainingTokenColumns)
2699                        << ", space: " << ColumnLimit
2700                        << ", reflown prefix: " << ContentStartColumn
2701                        << ", offset in line: " << TailOffset << "\n");
2702             // If the whole next line does not fit, try to find a point in
2703             // the next line at which we can break so that attaching the part
2704             // of the next line to that break point onto the current line is
2705             // below the column limit.
2706             BreakableToken::Split Split =
2707                 Token->getSplit(NextLineIndex, TailOffset, ColumnLimit,
2708                                 ContentStartColumn, CommentPragmasRegex);
2709             if (Split.first == StringRef::npos) {
2710               LLVM_DEBUG(llvm::dbgs() << "    Did not find later break\n");
2711               Reflow = false;
2712             } else {
2713               // Check whether the first split point gets us below the column
2714               // limit. Note that we will execute this split below as part of
2715               // the normal token breaking and reflow logic within the line.
2716               unsigned ToSplitColumns = Token->getRangeLength(
2717                   NextLineIndex, TailOffset, Split.first, ContentStartColumn);
2718               if (ContentStartColumn + ToSplitColumns > ColumnLimit) {
2719                 LLVM_DEBUG(llvm::dbgs() << "    Next split protrudes, need: "
2720                                         << (ContentStartColumn + ToSplitColumns)
2721                                         << ", space: " << ColumnLimit);
2722                 unsigned ExcessCharactersPenalty =
2723                     (ContentStartColumn + ToSplitColumns - ColumnLimit) *
2724                     Style.PenaltyExcessCharacter;
2725                 if (NewBreakPenalty < ExcessCharactersPenalty)
2726                   Reflow = false;
2727               }
2728             }
2729           }
2730         } else {
2731           LLVM_DEBUG(llvm::dbgs() << "not found.\n");
2732         }
2733       }
2734       if (!Reflow) {
2735         // If we didn't reflow into the next line, the only space to consider is
2736         // the next logical line. Reset our state to match the start of the next
2737         // line.
2738         TailOffset = 0;
2739         ContentStartColumn =
2740             Token->getContentStartColumn(NextLineIndex, /*Break=*/false);
2741         RemainingTokenColumns = Token->getRemainingLength(
2742             NextLineIndex, TailOffset, ContentStartColumn);
2743         // Adapt the start of the token, for example indent.
2744         if (!DryRun)
2745           Token->adaptStartOfLine(NextLineIndex, Whitespaces);
2746       } else {
2747         // If we found a reflow split and have added a new break before the next
2748         // line, we are going to remove the line break at the start of the next
2749         // logical line. For example, here we'll add a new line break after
2750         // 'text', and subsequently delete the line break between 'that' and
2751         // 'reflows'.
2752         //   // some text that
2753         //   // reflows
2754         // ->
2755         //   // some text
2756         //   // that reflows
2757         // When adding the line break, we also added the penalty for it, so we
2758         // need to subtract that penalty again when we remove the line break due
2759         // to reflowing.
2760         if (NewBreakBefore) {
2761           assert(Penalty >= NewBreakPenalty);
2762           Penalty -= NewBreakPenalty;
2763         }
2764         if (!DryRun)
2765           Token->reflow(NextLineIndex, Whitespaces);
2766       }
2767     }
2768   }
2769 
2770   BreakableToken::Split SplitAfterLastLine =
2771       Token->getSplitAfterLastLine(TailOffset);
2772   if (SplitAfterLastLine.first != StringRef::npos) {
2773     LLVM_DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n");
2774 
2775     // We add the last line's penalty here, since that line is going to be split
2776     // now.
2777     Penalty += Style.PenaltyExcessCharacter *
2778                (ContentStartColumn + RemainingTokenColumns - ColumnLimit);
2779 
2780     if (!DryRun) {
2781       Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine,
2782                                             Whitespaces);
2783     }
2784     ContentStartColumn =
2785         Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true);
2786     RemainingTokenColumns = Token->getRemainingLength(
2787         Token->getLineCount() - 1,
2788         TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second,
2789         ContentStartColumn);
2790   }
2791 
2792   State.Column = ContentStartColumn + RemainingTokenColumns -
2793                  Current.UnbreakableTailLength;
2794 
2795   if (BreakInserted) {
2796     if (!DryRun)
2797       Token->updateAfterBroken(Whitespaces);
2798 
2799     // If we break the token inside a parameter list, we need to break before
2800     // the next parameter on all levels, so that the next parameter is clearly
2801     // visible. Line comments already introduce a break.
2802     if (Current.isNot(TT_LineComment))
2803       for (ParenState &Paren : State.Stack)
2804         Paren.BreakBeforeParameter = true;
2805 
2806     if (Current.is(TT_BlockComment))
2807       State.NoContinuation = true;
2808 
2809     State.Stack.back().LastSpace = StartColumn;
2810   }
2811 
2812   Token->updateNextToken(State);
2813 
2814   return {Penalty, Exceeded};
2815 }
2816 
getColumnLimit(const LineState & State) const2817 unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const {
2818   // In preprocessor directives reserve two chars for trailing " \".
2819   return Style.ColumnLimit - (State.Line->InPPDirective ? 2 : 0);
2820 }
2821 
nextIsMultilineString(const LineState & State)2822 bool ContinuationIndenter::nextIsMultilineString(const LineState &State) {
2823   const FormatToken &Current = *State.NextToken;
2824   if (!Current.isStringLiteral() || Current.is(TT_ImplicitStringLiteral))
2825     return false;
2826   // We never consider raw string literals "multiline" for the purpose of
2827   // AlwaysBreakBeforeMultilineStrings implementation as they are special-cased
2828   // (see TokenAnnotator::mustBreakBefore().
2829   if (Current.TokenText.starts_with("R\""))
2830     return false;
2831   if (Current.IsMultiline)
2832     return true;
2833   if (Current.getNextNonComment() &&
2834       Current.getNextNonComment()->isStringLiteral()) {
2835     return true; // Implicit concatenation.
2836   }
2837   if (Style.ColumnLimit != 0 && Style.BreakStringLiterals &&
2838       State.Column + Current.ColumnWidth + Current.UnbreakableTailLength >
2839           Style.ColumnLimit) {
2840     return true; // String will be split.
2841   }
2842   return false;
2843 }
2844 
2845 } // namespace format
2846 } // namespace clang
2847