xref: /freebsd/contrib/llvm-project/clang/lib/Format/ContinuationIndenter.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements an indenter that manages the indentation of
11 /// continuations.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
16 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
17 
18 #include "Encoding.h"
19 #include "FormatToken.h"
20 
21 namespace clang {
22 class SourceManager;
23 
24 namespace format {
25 
26 class AnnotatedLine;
27 class BreakableToken;
28 struct FormatToken;
29 struct LineState;
30 struct ParenState;
31 struct RawStringFormatStyleManager;
32 class WhitespaceManager;
33 
34 struct RawStringFormatStyleManager {
35   llvm::StringMap<FormatStyle> DelimiterStyle;
36   llvm::StringMap<FormatStyle> EnclosingFunctionStyle;
37 
38   RawStringFormatStyleManager(const FormatStyle &CodeStyle);
39 
40   std::optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const;
41 
42   std::optional<FormatStyle>
43   getEnclosingFunctionStyle(StringRef EnclosingFunction) const;
44 };
45 
46 class ContinuationIndenter {
47 public:
48   /// Constructs a \c ContinuationIndenter to format \p Line starting in
49   /// column \p FirstIndent.
50   ContinuationIndenter(const FormatStyle &Style,
51                        const AdditionalKeywords &Keywords,
52                        const SourceManager &SourceMgr,
53                        WhitespaceManager &Whitespaces,
54                        encoding::Encoding Encoding,
55                        bool BinPackInconclusiveFunctions);
56 
57   /// Get the initial state, i.e. the state after placing \p Line's
58   /// first token at \p FirstIndent. When reformatting a fragment of code, as in
59   /// the case of formatting inside raw string literals, \p FirstStartColumn is
60   /// the column at which the state of the parent formatter is.
61   LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn,
62                             const AnnotatedLine *Line, bool DryRun);
63 
64   // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
65   // better home.
66   /// Returns \c true, if a line break after \p State is allowed.
67   bool canBreak(const LineState &State);
68 
69   /// Returns \c true, if a line break after \p State is mandatory.
70   bool mustBreak(const LineState &State);
71 
72   /// Appends the next token to \p State and updates information
73   /// necessary for indentation.
74   ///
75   /// Puts the token on the current line if \p Newline is \c false and adds a
76   /// line break and necessary indentation otherwise.
77   ///
78   /// If \p DryRun is \c false, also creates and stores the required
79   /// \c Replacement.
80   unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
81                            unsigned ExtraSpaces = 0);
82 
83   /// Get the column limit for this line. This is the style's column
84   /// limit, potentially reduced for preprocessor definitions.
85   unsigned getColumnLimit(const LineState &State) const;
86 
87 private:
88   /// Mark the next token as consumed in \p State and modify its stacks
89   /// accordingly.
90   unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
91 
92   /// Update 'State' according to the next token's fake left parentheses.
93   void moveStatePastFakeLParens(LineState &State, bool Newline);
94   /// Update 'State' according to the next token's fake r_parens.
95   void moveStatePastFakeRParens(LineState &State);
96 
97   /// Update 'State' according to the next token being one of "(<{[".
98   void moveStatePastScopeOpener(LineState &State, bool Newline);
99   /// Update 'State' according to the next token being one of ")>}]".
100   void moveStatePastScopeCloser(LineState &State);
101   /// Update 'State' with the next token opening a nested block.
102   void moveStateToNewBlock(LineState &State, bool NewLine);
103 
104   /// Reformats a raw string literal.
105   ///
106   /// \returns An extra penalty induced by reformatting the token.
107   unsigned reformatRawStringLiteral(const FormatToken &Current,
108                                     LineState &State,
109                                     const FormatStyle &RawStringStyle,
110                                     bool DryRun, bool Newline);
111 
112   /// If the current token is at the end of the current line, handle
113   /// the transition to the next line.
114   unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
115                            bool DryRun, bool AllowBreak, bool Newline);
116 
117   /// If \p Current is a raw string that is configured to be reformatted,
118   /// return the style to be used.
119   std::optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
120                                                const LineState &State);
121 
122   /// If the current token sticks out over the end of the line, break
123   /// it if possible.
124   ///
125   /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
126   /// when tokens are broken or lines exceed the column limit, and exceeded
127   /// indicates whether the algorithm purposefully left lines exceeding the
128   /// column limit.
129   ///
130   /// The returned penalty will cover the cost of the additional line breaks
131   /// and column limit violation in all lines except for the last one. The
132   /// penalty for the column limit violation in the last line (and in single
133   /// line tokens) is handled in \c addNextStateToQueue.
134   ///
135   /// \p Strict indicates whether reflowing is allowed to leave characters
136   /// protruding the column limit; if true, lines will be split strictly within
137   /// the column limit where possible; if false, words are allowed to protrude
138   /// over the column limit as long as the penalty is less than the penalty
139   /// of a break.
140   std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
141                                                  LineState &State,
142                                                  bool AllowBreak, bool DryRun,
143                                                  bool Strict);
144 
145   /// Returns the \c BreakableToken starting at \p Current, or nullptr
146   /// if the current token cannot be broken.
147   std::unique_ptr<BreakableToken>
148   createBreakableToken(const FormatToken &Current, LineState &State,
149                        bool AllowBreak);
150 
151   /// Appends the next token to \p State and updates information
152   /// necessary for indentation.
153   ///
154   /// Puts the token on the current line.
155   ///
156   /// If \p DryRun is \c false, also creates and stores the required
157   /// \c Replacement.
158   void addTokenOnCurrentLine(LineState &State, bool DryRun,
159                              unsigned ExtraSpaces);
160 
161   /// Appends the next token to \p State and updates information
162   /// necessary for indentation.
163   ///
164   /// Adds a line break and necessary indentation.
165   ///
166   /// If \p DryRun is \c false, also creates and stores the required
167   /// \c Replacement.
168   unsigned addTokenOnNewLine(LineState &State, bool DryRun);
169 
170   /// Calculate the new column for a line wrap before the next token.
171   unsigned getNewLineColumn(const LineState &State);
172 
173   /// Adds a multiline token to the \p State.
174   ///
175   /// \returns Extra penalty for the first line of the literal: last line is
176   /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
177   /// matter, as we don't change them.
178   unsigned addMultilineToken(const FormatToken &Current, LineState &State);
179 
180   /// Returns \c true if the next token starts a multiline string
181   /// literal.
182   ///
183   /// This includes implicitly concatenated strings, strings that will be broken
184   /// by clang-format and string literals with escaped newlines.
185   bool nextIsMultilineString(const LineState &State);
186 
187   FormatStyle Style;
188   const AdditionalKeywords &Keywords;
189   const SourceManager &SourceMgr;
190   WhitespaceManager &Whitespaces;
191   encoding::Encoding Encoding;
192   bool BinPackInconclusiveFunctions;
193   llvm::Regex CommentPragmasRegex;
194   const RawStringFormatStyleManager RawStringFormats;
195 };
196 
197 struct ParenState {
ParenStateParenState198   ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace,
199              bool AvoidBinPacking, bool NoLineBreak)
200       : Tok(Tok), Indent(Indent), LastSpace(LastSpace),
201         NestedBlockIndent(Indent), IsAligned(false),
202         BreakBeforeClosingBrace(false), BreakBeforeClosingParen(false),
203         BreakBeforeClosingAngle(false), AvoidBinPacking(AvoidBinPacking),
204         BreakBeforeParameter(false), NoLineBreak(NoLineBreak),
205         NoLineBreakInOperand(false), LastOperatorWrapped(true),
206         ContainsLineBreak(false), ContainsUnwrappedBuilder(false),
207         AlignColons(true), ObjCSelectorNameFound(false),
208         HasMultipleNestedBlocks(false), NestedBlockInlined(false),
209         IsInsideObjCArrayLiteral(false), IsCSharpGenericTypeConstraint(false),
210         IsChainedConditional(false), IsWrappedConditional(false),
211         UnindentOperator(false) {}
212 
213   /// The token opening this parenthesis level, or nullptr if this level is
214   /// opened by fake parenthesis.
215   ///
216   /// Not considered for memoization as it will always have the same value at
217   /// the same token.
218   const FormatToken *Tok;
219 
220   /// The position to which a specific parenthesis level needs to be
221   /// indented.
222   unsigned Indent;
223 
224   /// The position of the last space on each level.
225   ///
226   /// Used e.g. to break like:
227   /// functionCall(Parameter, otherCall(
228   ///                             OtherParameter));
229   unsigned LastSpace;
230 
231   /// If a block relative to this parenthesis level gets wrapped, indent
232   /// it this much.
233   unsigned NestedBlockIndent;
234 
235   /// The position the first "<<" operator encountered on each level.
236   ///
237   /// Used to align "<<" operators. 0 if no such operator has been encountered
238   /// on a level.
239   unsigned FirstLessLess = 0;
240 
241   /// The column of a \c ? in a conditional expression;
242   unsigned QuestionColumn = 0;
243 
244   /// The position of the colon in an ObjC method declaration/call.
245   unsigned ColonPos = 0;
246 
247   /// The start of the most recent function in a builder-type call.
248   unsigned StartOfFunctionCall = 0;
249 
250   /// Contains the start of array subscript expressions, so that they
251   /// can be aligned.
252   unsigned StartOfArraySubscripts = 0;
253 
254   /// If a nested name specifier was broken over multiple lines, this
255   /// contains the start column of the second line. Otherwise 0.
256   unsigned NestedNameSpecifierContinuation = 0;
257 
258   /// If a call expression was broken over multiple lines, this
259   /// contains the start column of the second line. Otherwise 0.
260   unsigned CallContinuation = 0;
261 
262   /// The column of the first variable name in a variable declaration.
263   ///
264   /// Used to align further variables if necessary.
265   unsigned VariablePos = 0;
266 
267   /// Whether this block's indentation is used for alignment.
268   bool IsAligned : 1;
269 
270   /// Whether a newline needs to be inserted before the block's closing
271   /// brace.
272   ///
273   /// We only want to insert a newline before the closing brace if there also
274   /// was a newline after the beginning left brace.
275   bool BreakBeforeClosingBrace : 1;
276 
277   /// Whether a newline needs to be inserted before the block's closing
278   /// paren.
279   ///
280   /// We only want to insert a newline before the closing paren if there also
281   /// was a newline after the beginning left paren.
282   bool BreakBeforeClosingParen : 1;
283 
284   /// Whether a newline needs to be inserted before a closing angle `>`.
285   bool BreakBeforeClosingAngle : 1;
286 
287   /// Avoid bin packing, i.e. multiple parameters/elements on multiple
288   /// lines, in this context.
289   bool AvoidBinPacking : 1;
290 
291   /// Break after the next comma (or all the commas in this context if
292   /// \c AvoidBinPacking is \c true).
293   bool BreakBeforeParameter : 1;
294 
295   /// Line breaking in this context would break a formatting rule.
296   bool NoLineBreak : 1;
297 
298   /// Same as \c NoLineBreak, but is restricted until the end of the
299   /// operand (including the next ",").
300   bool NoLineBreakInOperand : 1;
301 
302   /// True if the last binary operator on this level was wrapped to the
303   /// next line.
304   bool LastOperatorWrapped : 1;
305 
306   /// \c true if this \c ParenState already contains a line-break.
307   ///
308   /// The first line break in a certain \c ParenState causes extra penalty so
309   /// that clang-format prefers similar breaks, i.e. breaks in the same
310   /// parenthesis.
311   bool ContainsLineBreak : 1;
312 
313   /// \c true if this \c ParenState contains multiple segments of a
314   /// builder-type call on one line.
315   bool ContainsUnwrappedBuilder : 1;
316 
317   /// \c true if the colons of the curren ObjC method expression should
318   /// be aligned.
319   ///
320   /// Not considered for memoization as it will always have the same value at
321   /// the same token.
322   bool AlignColons : 1;
323 
324   /// \c true if at least one selector name was found in the current
325   /// ObjC method expression.
326   ///
327   /// Not considered for memoization as it will always have the same value at
328   /// the same token.
329   bool ObjCSelectorNameFound : 1;
330 
331   /// \c true if there are multiple nested blocks inside these parens.
332   ///
333   /// Not considered for memoization as it will always have the same value at
334   /// the same token.
335   bool HasMultipleNestedBlocks : 1;
336 
337   /// The start of a nested block (e.g. lambda introducer in C++ or
338   /// "function" in JavaScript) is not wrapped to a new line.
339   bool NestedBlockInlined : 1;
340 
341   /// \c true if the current \c ParenState represents an Objective-C
342   /// array literal.
343   bool IsInsideObjCArrayLiteral : 1;
344 
345   bool IsCSharpGenericTypeConstraint : 1;
346 
347   /// true if the current \c ParenState represents the false branch of a chained
348   /// conditional expression (e.g. else-if)
349   bool IsChainedConditional : 1;
350 
351   /// true if there conditionnal was wrapped on the first operator (the question
352   /// mark)
353   bool IsWrappedConditional : 1;
354 
355   /// Indicates the indent should be reduced by the length of the operator.
356   bool UnindentOperator : 1;
357 
358   bool operator<(const ParenState &Other) const {
359     if (Indent != Other.Indent)
360       return Indent < Other.Indent;
361     if (LastSpace != Other.LastSpace)
362       return LastSpace < Other.LastSpace;
363     if (NestedBlockIndent != Other.NestedBlockIndent)
364       return NestedBlockIndent < Other.NestedBlockIndent;
365     if (FirstLessLess != Other.FirstLessLess)
366       return FirstLessLess < Other.FirstLessLess;
367     if (IsAligned != Other.IsAligned)
368       return IsAligned;
369     if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
370       return BreakBeforeClosingBrace;
371     if (BreakBeforeClosingParen != Other.BreakBeforeClosingParen)
372       return BreakBeforeClosingParen;
373     if (BreakBeforeClosingAngle != Other.BreakBeforeClosingAngle)
374       return BreakBeforeClosingAngle;
375     if (QuestionColumn != Other.QuestionColumn)
376       return QuestionColumn < Other.QuestionColumn;
377     if (AvoidBinPacking != Other.AvoidBinPacking)
378       return AvoidBinPacking;
379     if (BreakBeforeParameter != Other.BreakBeforeParameter)
380       return BreakBeforeParameter;
381     if (NoLineBreak != Other.NoLineBreak)
382       return NoLineBreak;
383     if (LastOperatorWrapped != Other.LastOperatorWrapped)
384       return LastOperatorWrapped;
385     if (ColonPos != Other.ColonPos)
386       return ColonPos < Other.ColonPos;
387     if (StartOfFunctionCall != Other.StartOfFunctionCall)
388       return StartOfFunctionCall < Other.StartOfFunctionCall;
389     if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
390       return StartOfArraySubscripts < Other.StartOfArraySubscripts;
391     if (CallContinuation != Other.CallContinuation)
392       return CallContinuation < Other.CallContinuation;
393     if (VariablePos != Other.VariablePos)
394       return VariablePos < Other.VariablePos;
395     if (ContainsLineBreak != Other.ContainsLineBreak)
396       return ContainsLineBreak;
397     if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
398       return ContainsUnwrappedBuilder;
399     if (NestedBlockInlined != Other.NestedBlockInlined)
400       return NestedBlockInlined;
401     if (IsCSharpGenericTypeConstraint != Other.IsCSharpGenericTypeConstraint)
402       return IsCSharpGenericTypeConstraint;
403     if (IsChainedConditional != Other.IsChainedConditional)
404       return IsChainedConditional;
405     if (IsWrappedConditional != Other.IsWrappedConditional)
406       return IsWrappedConditional;
407     if (UnindentOperator != Other.UnindentOperator)
408       return UnindentOperator;
409     return false;
410   }
411 };
412 
413 /// The current state when indenting a unwrapped line.
414 ///
415 /// As the indenting tries different combinations this is copied by value.
416 struct LineState {
417   /// The number of used columns in the current line.
418   unsigned Column;
419 
420   /// The token that needs to be next formatted.
421   FormatToken *NextToken;
422 
423   /// \c true if \p NextToken should not continue this line.
424   bool NoContinuation;
425 
426   /// The \c NestingLevel at the start of this line.
427   unsigned StartOfLineLevel;
428 
429   /// The lowest \c NestingLevel on the current line.
430   unsigned LowestLevelOnLine;
431 
432   /// The start column of the string literal, if we're in a string
433   /// literal sequence, 0 otherwise.
434   unsigned StartOfStringLiteral;
435 
436   /// Disallow line breaks for this line.
437   bool NoLineBreak;
438 
439   /// A stack keeping track of properties applying to parenthesis
440   /// levels.
441   SmallVector<ParenState> Stack;
442 
443   /// Ignore the stack of \c ParenStates for state comparison.
444   ///
445   /// In long and deeply nested unwrapped lines, the current algorithm can
446   /// be insufficient for finding the best formatting with a reasonable amount
447   /// of time and memory. Setting this flag will effectively lead to the
448   /// algorithm not analyzing some combinations. However, these combinations
449   /// rarely contain the optimal solution: In short, accepting a higher
450   /// penalty early would need to lead to different values in the \c
451   /// ParenState stack (in an otherwise identical state) and these different
452   /// values would need to lead to a significant amount of avoided penalty
453   /// later.
454   ///
455   /// FIXME: Come up with a better algorithm instead.
456   bool IgnoreStackForComparison;
457 
458   /// The indent of the first token.
459   unsigned FirstIndent;
460 
461   /// The line that is being formatted.
462   ///
463   /// Does not need to be considered for memoization because it doesn't change.
464   const AnnotatedLine *Line;
465 
466   /// Comparison operator to be able to used \c LineState in \c map.
467   bool operator<(const LineState &Other) const {
468     if (NextToken != Other.NextToken)
469       return NextToken < Other.NextToken;
470     if (Column != Other.Column)
471       return Column < Other.Column;
472     if (NoContinuation != Other.NoContinuation)
473       return NoContinuation;
474     if (StartOfLineLevel != Other.StartOfLineLevel)
475       return StartOfLineLevel < Other.StartOfLineLevel;
476     if (LowestLevelOnLine != Other.LowestLevelOnLine)
477       return LowestLevelOnLine < Other.LowestLevelOnLine;
478     if (StartOfStringLiteral != Other.StartOfStringLiteral)
479       return StartOfStringLiteral < Other.StartOfStringLiteral;
480     if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
481       return false;
482     return Stack < Other.Stack;
483   }
484 };
485 
486 } // end namespace format
487 } // end namespace clang
488 
489 #endif
490