xref: /freebsd/contrib/llvm-project/clang/lib/Format/ContinuationIndenter.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements an indenter that manages the indentation of
11 /// continuations.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 
18 #include "Encoding.h"
19 #include "FormatToken.h"
20 
21 namespace clang {
22 class SourceManager;
23 
24 namespace format {
25 
26 class AnnotatedLine;
27 class BreakableToken;
28 struct FormatToken;
29 struct LineState;
30 struct ParenState;
31 struct RawStringFormatStyleManager;
32 class WhitespaceManager;
33 
34 struct RawStringFormatStyleManager {
35   llvm::StringMap<FormatStyle> DelimiterStyle;
36   llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37 
38   RawStringFormatStyleManager(const FormatStyle &CodeStyle);
39 
40   std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41 
42   std::optional<FormatStyle>
43   getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44 };
45 
46 class ContinuationIndenter {
47 public:
48   /// Constructs a \c ContinuationIndenter to format \p Line starting in
49   /// column \p FirstIndent.
50   ContinuationIndenter(const FormatStyle &Style,
51                        const AdditionalKeywords &Keywords,
52                        const SourceManager &SourceMgr,
53                        WhitespaceManager &Whitespaces,
54                        encoding::Encoding Encoding,
55                        bool BinPackInconclusiveFunctions);
56 
57   /// Get the initial state, i.e. the state after placing \p Line's
58   /// first token at \p FirstIndent. When reformatting a fragment of code, as in
59   /// the case of formatting inside raw string literals, \p FirstStartColumn is
60   /// the column at which the state of the parent formatter is.
61   LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
62                             const AnnotatedLine *Line, bool DryRun);
63 
64   // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
65   // better home.
66   /// Returns \c true, if a line break after \p State is allowed.
67   bool canBreak(const LineState &State);
68 
69   /// Returns \c true, if a line break after \p State is mandatory.
70   bool mustBreak(const LineState &State);
71 
72   /// Appends the next token to \p State and updates information
73   /// necessary for indentation.
74   ///
75   /// Puts the token on the current line if \p Newline is \c false and adds a
76   /// line break and necessary indentation otherwise.
77   ///
78   /// If \p DryRun is \c false, also creates and stores the required
79   /// \c Replacement.
80   unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
81                            unsigned ExtraSpaces = 0);
82 
83   /// Get the column limit for this line. This is the style's column
84   /// limit, potentially reduced for preprocessor definitions.
85   unsigned getColumnLimit(const LineState &State) const;
86 
87 private:
88   /// Mark the next token as consumed in \p State and modify its stacks
89   /// accordingly.
90   unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
91 
92   /// Update 'State' according to the next token's fake left parentheses.
93   void moveStatePastFakeLParens(LineState &State, bool Newline);
94   /// Update 'State' according to the next token's fake r_parens.
95   void moveStatePastFakeRParens(LineState &State);
96 
97   /// Update 'State' according to the next token being one of "(<{[".
98   void moveStatePastScopeOpener(LineState &State, bool Newline);
99   /// Update 'State' according to the next token being one of ")>}]".
100   void moveStatePastScopeCloser(LineState &State);
101   /// Update 'State' with the next token opening a nested block.
102   void moveStateToNewBlock(LineState &State, bool NewLine);
103 
104   /// Reformats a raw string literal.
105   ///
106   /// \returns An extra penalty induced by reformatting the token.
107   unsigned reformatRawStringLiteral(const FormatToken &Current,
108                                     LineState &State,
109                                     const FormatStyle &RawStringStyle,
110                                     bool DryRun, bool Newline);
111 
112   /// If the current token is at the end of the current line, handle
113   /// the transition to the next line.
114   unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
115                            bool DryRun, bool AllowBreak, bool Newline);
116 
117   /// If \p Current is a raw string that is configured to be reformatted,
118   /// return the style to be used.
119   std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
120                                                const LineState &State);
121 
122   /// If the current token sticks out over the end of the line, break
123   /// it if possible.
124   ///
125   /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
126   /// when tokens are broken or lines exceed the column limit, and exceeded
127   /// indicates whether the algorithm purposefully left lines exceeding the
128   /// column limit.
129   ///
130   /// The returned penalty will cover the cost of the additional line breaks
131   /// and column limit violation in all lines except for the last one. The
132   /// penalty for the column limit violation in the last line (and in single
133   /// line tokens) is handled in \c addNextStateToQueue.
134   ///
135   /// \p Strict indicates whether reflowing is allowed to leave characters
136   /// protruding the column limit; if true, lines will be split strictly within
137   /// the column limit where possible; if false, words are allowed to protrude
138   /// over the column limit as long as the penalty is less than the penalty
139   /// of a break.
140   std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
141                                                  LineState &State,
142                                                  bool AllowBreak, bool DryRun,
143                                                  bool Strict);
144 
145   /// Returns the \c BreakableToken starting at \p Current, or nullptr
146   /// if the current token cannot be broken.
147   std::unique_ptr<BreakableToken>
148   createBreakableToken(const FormatToken &Current, LineState &State,
149                        bool AllowBreak);
150 
151   /// Appends the next token to \p State and updates information
152   /// necessary for indentation.
153   ///
154   /// Puts the token on the current line.
155   ///
156   /// If \p DryRun is \c false, also creates and stores the required
157   /// \c Replacement.
158   void addTokenOnCurrentLine(LineState &State, bool DryRun,
159                              unsigned ExtraSpaces);
160 
161   /// Appends the next token to \p State and updates information
162   /// necessary for indentation.
163   ///
164   /// Adds a line break and necessary indentation.
165   ///
166   /// If \p DryRun is \c false, also creates and stores the required
167   /// \c Replacement.
168   unsigned addTokenOnNewLine(LineState &State, bool DryRun);
169 
170   /// Calculate the new column for a line wrap before the next token.
171   unsigned getNewLineColumn(const LineState &State);
172 
173   /// Adds a multiline token to the \p State.
174   ///
175   /// \returns Extra penalty for the first line of the literal: last line is
176   /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
177   /// matter, as we don't change them.
178   unsigned addMultilineToken(const FormatToken &Current, LineState &State);
179 
180   /// Returns \c true if the next token starts a multiline string
181   /// literal.
182   ///
183   /// This includes implicitly concatenated strings, strings that will be broken
184   /// by clang-format and string literals with escaped newlines.
185   bool nextIsMultilineString(const LineState &State);
186 
187   FormatStyle Style;
188   const AdditionalKeywords &Keywords;
189   const SourceManager &SourceMgr;
190   WhitespaceManager &Whitespaces;
191   encoding::Encoding Encoding;
192   bool BinPackInconclusiveFunctions;
193   llvm::Regex CommentPragmasRegex;
194   const RawStringFormatStyleManager RawStringFormats;
195 };
196 
197 struct ParenState {
ParenStateParenState198   ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
199              bool AvoidBinPacking, bool NoLineBreak)
200       : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
201         NestedBlockIndent(Indent), IsAligned(false),
202         BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
203         AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
204         NoLineBreak(NoLineBreak), NoLineBreakInOperand(false),
205         LastOperatorWrapped(true), ContainsLineBreak(false),
206         ContainsUnwrappedBuilder(false), AlignColons(true),
207         ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false),
208         NestedBlockInlined(false), IsInsideObjCArrayLiteral(false),
209         IsCSharpGenericTypeConstraint(false), IsChainedConditional(false),
210         IsWrappedConditional(false), UnindentOperator(false) {}
211 
212   /// \brief The token opening this parenthesis level, or nullptr if this level
213   /// is opened by fake parenthesis.
214   ///
215   /// Not considered for memoization as it will always have the same value at
216   /// the same token.
217   const FormatToken *Tok;
218 
219   /// The position to which a specific parenthesis level needs to be
220   /// indented.
221   unsigned Indent;
222 
223   /// The position of the last space on each level.
224   ///
225   /// Used e.g. to break like:
226   /// functionCall(Parameter, otherCall(
227   ///                             OtherParameter));
228   unsigned LastSpace;
229 
230   /// If a block relative to this parenthesis level gets wrapped, indent
231   /// it this much.
232   unsigned NestedBlockIndent;
233 
234   /// The position the first "<<" operator encountered on each level.
235   ///
236   /// Used to align "<<" operators. 0 if no such operator has been encountered
237   /// on a level.
238   unsigned FirstLessLess = 0;
239 
240   /// The column of a \c ? in a conditional expression;
241   unsigned QuestionColumn = 0;
242 
243   /// The position of the colon in an ObjC method declaration/call.
244   unsigned ColonPos = 0;
245 
246   /// The start of the most recent function in a builder-type call.
247   unsigned StartOfFunctionCall = 0;
248 
249   /// Contains the start of array subscript expressions, so that they
250   /// can be aligned.
251   unsigned StartOfArraySubscripts = 0;
252 
253   /// If a nested name specifier was broken over multiple lines, this
254   /// contains the start column of the second line. Otherwise 0.
255   unsigned NestedNameSpecifierContinuation = 0;
256 
257   /// If a call expression was broken over multiple lines, this
258   /// contains the start column of the second line. Otherwise 0.
259   unsigned CallContinuation = 0;
260 
261   /// The column of the first variable name in a variable declaration.
262   ///
263   /// Used to align further variables if necessary.
264   unsigned VariablePos = 0;
265 
266   /// Whether this block's indentation is used for alignment.
267   bool IsAligned : 1;
268 
269   /// Whether a newline needs to be inserted before the block's closing
270   /// brace.
271   ///
272   /// We only want to insert a newline before the closing brace if there also
273   /// was a newline after the beginning left brace.
274   bool BreakBeforeClosingBrace : 1;
275 
276   /// Whether a newline needs to be inserted before the block's closing
277   /// paren.
278   ///
279   /// We only want to insert a newline before the closing paren if there also
280   /// was a newline after the beginning left paren.
281   bool BreakBeforeClosingParen : 1;
282 
283   /// Avoid bin packing, i.e. multiple parameters/elements on multiple
284   /// lines, in this context.
285   bool AvoidBinPacking : 1;
286 
287   /// Break after the next comma (or all the commas in this context if
288   /// \c AvoidBinPacking is \c true).
289   bool BreakBeforeParameter : 1;
290 
291   /// Line breaking in this context would break a formatting rule.
292   bool NoLineBreak : 1;
293 
294   /// Same as \c NoLineBreak, but is restricted until the end of the
295   /// operand (including the next ",").
296   bool NoLineBreakInOperand : 1;
297 
298   /// True if the last binary operator on this level was wrapped to the
299   /// next line.
300   bool LastOperatorWrapped : 1;
301 
302   /// \c true if this \c ParenState already contains a line-break.
303   ///
304   /// The first line break in a certain \c ParenState causes extra penalty so
305   /// that clang-format prefers similar breaks, i.e. breaks in the same
306   /// parenthesis.
307   bool ContainsLineBreak : 1;
308 
309   /// \c true if this \c ParenState contains multiple segments of a
310   /// builder-type call on one line.
311   bool ContainsUnwrappedBuilder : 1;
312 
313   /// \c true if the colons of the curren ObjC method expression should
314   /// be aligned.
315   ///
316   /// Not considered for memoization as it will always have the same value at
317   /// the same token.
318   bool AlignColons : 1;
319 
320   /// \c true if at least one selector name was found in the current
321   /// ObjC method expression.
322   ///
323   /// Not considered for memoization as it will always have the same value at
324   /// the same token.
325   bool ObjCSelectorNameFound : 1;
326 
327   /// \c true if there are multiple nested blocks inside these parens.
328   ///
329   /// Not considered for memoization as it will always have the same value at
330   /// the same token.
331   bool HasMultipleNestedBlocks : 1;
332 
333   /// The start of a nested block (e.g. lambda introducer in C++ or
334   /// "function" in JavaScript) is not wrapped to a new line.
335   bool NestedBlockInlined : 1;
336 
337   /// \c true if the current \c ParenState represents an Objective-C
338   /// array literal.
339   bool IsInsideObjCArrayLiteral : 1;
340 
341   bool IsCSharpGenericTypeConstraint : 1;
342 
343   /// \brief true if the current \c ParenState represents the false branch of
344   /// a chained conditional expression (e.g. else-if)
345   bool IsChainedConditional : 1;
346 
347   /// \brief true if there conditionnal was wrapped on the first operator (the
348   /// question mark)
349   bool IsWrappedConditional : 1;
350 
351   /// \brief Indicates the indent should be reduced by the length of the
352   /// operator.
353   bool UnindentOperator : 1;
354 
355   bool operator<(const ParenState &Other) const {
356     if (Indent != Other.Indent)
357       return Indent < Other.Indent;
358     if (LastSpace != Other.LastSpace)
359       return LastSpace < Other.LastSpace;
360     if (NestedBlockIndent != Other.NestedBlockIndent)
361       return NestedBlockIndent < Other.NestedBlockIndent;
362     if (FirstLessLess != Other.FirstLessLess)
363       return FirstLessLess < Other.FirstLessLess;
364     if (IsAligned != Other.IsAligned)
365       return IsAligned;
366     if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
367       return BreakBeforeClosingBrace;
368     if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
369       return BreakBeforeClosingParen;
370     if (QuestionColumn != Other.QuestionColumn)
371       return QuestionColumn < Other.QuestionColumn;
372     if (AvoidBinPacking != Other.AvoidBinPacking)
373       return AvoidBinPacking;
374     if (BreakBeforeParameter != Other.BreakBeforeParameter)
375       return BreakBeforeParameter;
376     if (NoLineBreak != Other.NoLineBreak)
377       return NoLineBreak;
378     if (LastOperatorWrapped != Other.LastOperatorWrapped)
379       return LastOperatorWrapped;
380     if (ColonPos != Other.ColonPos)
381       return ColonPos < Other.ColonPos;
382     if (StartOfFunctionCall != Other.StartOfFunctionCall)
383       return StartOfFunctionCall < Other.StartOfFunctionCall;
384     if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
385       return StartOfArraySubscripts < Other.StartOfArraySubscripts;
386     if (CallContinuation != Other.CallContinuation)
387       return CallContinuation < Other.CallContinuation;
388     if (VariablePos != Other.VariablePos)
389       return VariablePos < Other.VariablePos;
390     if (ContainsLineBreak != Other.ContainsLineBreak)
391       return ContainsLineBreak;
392     if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
393       return ContainsUnwrappedBuilder;
394     if (NestedBlockInlined != Other.NestedBlockInlined)
395       return NestedBlockInlined;
396     if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
397       return IsCSharpGenericTypeConstraint;
398     if (IsChainedConditional != Other.IsChainedConditional)
399       return IsChainedConditional;
400     if (IsWrappedConditional != Other.IsWrappedConditional)
401       return IsWrappedConditional;
402     if (UnindentOperator != Other.UnindentOperator)
403       return UnindentOperator;
404     return false;
405   }
406 };
407 
408 /// The current state when indenting a unwrapped line.
409 ///
410 /// As the indenting tries different combinations this is copied by value.
411 struct LineState {
412   /// The number of used columns in the current line.
413   unsigned Column;
414 
415   /// The token that needs to be next formatted.
416   FormatToken *NextToken;
417 
418   /// \c true if \p NextToken should not continue this line.
419   bool NoContinuation;
420 
421   /// The \c NestingLevel at the start of this line.
422   unsigned StartOfLineLevel;
423 
424   /// The lowest \c NestingLevel on the current line.
425   unsigned LowestLevelOnLine;
426 
427   /// The start column of the string literal, if we're in a string
428   /// literal sequence, 0 otherwise.
429   unsigned StartOfStringLiteral;
430 
431   /// Disallow line breaks for this line.
432   bool NoLineBreak;
433 
434   /// A stack keeping track of properties applying to parenthesis
435   /// levels.
436   SmallVector<ParenState> Stack;
437 
438   /// Ignore the stack of \c ParenStates for state comparison.
439   ///
440   /// In long and deeply nested unwrapped lines, the current algorithm can
441   /// be insufficient for finding the best formatting with a reasonable amount
442   /// of time and memory. Setting this flag will effectively lead to the
443   /// algorithm not analyzing some combinations. However, these combinations
444   /// rarely contain the optimal solution: In short, accepting a higher
445   /// penalty early would need to lead to different values in the \c
446   /// ParenState stack (in an otherwise identical state) and these different
447   /// values would need to lead to a significant amount of avoided penalty
448   /// later.
449   ///
450   /// FIXME: Come up with a better algorithm instead.
451   bool IgnoreStackForComparison;
452 
453   /// The indent of the first token.
454   unsigned FirstIndent;
455 
456   /// The line that is being formatted.
457   ///
458   /// Does not need to be considered for memoization because it doesn't change.
459   const AnnotatedLine *Line;
460 
461   /// Comparison operator to be able to used \c LineState in \c map.
462   bool operator<(const LineState &Other) const {
463     if (NextToken != Other.NextToken)
464       return NextToken < Other.NextToken;
465     if (Column != Other.Column)
466       return Column < Other.Column;
467     if (NoContinuation != Other.NoContinuation)
468       return NoContinuation;
469     if (StartOfLineLevel != Other.StartOfLineLevel)
470       return StartOfLineLevel < Other.StartOfLineLevel;
471     if (LowestLevelOnLine != Other.LowestLevelOnLine)
472       return LowestLevelOnLine < Other.LowestLevelOnLine;
473     if (StartOfStringLiteral != Other.StartOfStringLiteral)
474       return StartOfStringLiteral < Other.StartOfStringLiteral;
475     if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
476       return false;
477     return Stack < Other.Stack;
478   }
479 };
480 
481 } // end namespace format
482 } // end namespace clang
483 
484 #endif
485