xref: /freebsd/contrib/llvm-project/clang/lib/Format/TokenAnnotator.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 
20 namespace clang {
21 namespace format {
22 
23 enum LineType {
24   LT_Invalid,
25   // Contains public/private/protected followed by TT_InheritanceColon.
26   LT_AccessModifier,
27   LT_ImportStatement,
28   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
29   LT_ObjCMethodDecl,
30   LT_ObjCProperty, // An @property line.
31   LT_Other,
32   LT_PreprocessorDirective,
33   LT_VirtualFunctionDecl,
34   LT_ArrayOfStructInitializer,
35   LT_CommentAbovePPDirective,
36 };
37 
38 enum ScopeType {
39   // Contained in class declaration/definition.
40   ST_Class,
41   // Contained within function definition.
42   ST_Function,
43   // Contained within other scope block (loop, if/else, etc).
44   ST_Other,
45 };
46 
47 class AnnotatedLine {
48 public:
AnnotatedLine(const UnwrappedLine & Line)49   AnnotatedLine(const UnwrappedLine &Line)
50       : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level),
51         PPLevel(Line.PPLevel),
52         MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
53         MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
54         InPPDirective(Line.InPPDirective),
55         InPragmaDirective(Line.InPragmaDirective),
56         InMacroBody(Line.InMacroBody),
57         MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
58         IsMultiVariableDeclStmt(false), Affected(false),
59         LeadingEmptyLinesAffected(false), ChildrenAffected(false),
60         ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation),
61         FirstStartColumn(Line.FirstStartColumn) {
62     assert(!Line.Tokens.empty());
63 
64     // Calculate Next and Previous for all tokens. Note that we must overwrite
65     // Next and Previous for every token, as previous formatting runs might have
66     // left them in a different state.
67     First->Previous = nullptr;
68     FormatToken *Current = First;
69     addChildren(Line.Tokens.front(), Current);
70     for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
71       if (Node.Tok->MacroParent)
72         ContainsMacroCall = true;
73       Current->Next = Node.Tok;
74       Node.Tok->Previous = Current;
75       Current = Current->Next;
76       addChildren(Node, Current);
77       // FIXME: if we add children, previous will point to the token before
78       // the children; changing this requires significant changes across
79       // clang-format.
80     }
81     Last = Current;
82     Last->Next = nullptr;
83   }
84 
addChildren(const UnwrappedLineNode & Node,FormatToken * Current)85   void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
86     Current->Children.clear();
87     for (const auto &Child : Node.Children) {
88       Children.push_back(new AnnotatedLine(Child));
89       if (Children.back()->ContainsMacroCall)
90         ContainsMacroCall = true;
91       Current->Children.push_back(Children.back());
92     }
93   }
94 
size()95   size_t size() const {
96     size_t Size = 1;
97     for (const auto *Child : Children)
98       Size += Child->size();
99     return Size;
100   }
101 
~AnnotatedLine()102   ~AnnotatedLine() {
103     for (AnnotatedLine *Child : Children)
104       delete Child;
105     FormatToken *Current = First;
106     while (Current) {
107       Current->Children.clear();
108       Current->Role.reset();
109       Current = Current->Next;
110     }
111   }
112 
isComment()113   bool isComment() const {
114     return First && First->is(tok::comment) && !First->getNextNonComment();
115   }
116 
117   /// \c true if this line starts with the given tokens in order, ignoring
118   /// comments.
startsWith(Ts...Tokens)119   template <typename... Ts> bool startsWith(Ts... Tokens) const {
120     return First && First->startsSequence(Tokens...);
121   }
122 
123   /// \c true if this line ends with the given tokens in reversed order,
124   /// ignoring comments.
125   /// For example, given tokens [T1, T2, T3, ...], the function returns true if
126   /// this line is like "... T3 T2 T1".
endsWith(Ts...Tokens)127   template <typename... Ts> bool endsWith(Ts... Tokens) const {
128     return Last && Last->endsSequence(Tokens...);
129   }
130 
131   /// \c true if this line looks like a function definition instead of a
132   /// function declaration. Asserts MightBeFunctionDecl.
mightBeFunctionDefinition()133   bool mightBeFunctionDefinition() const {
134     assert(MightBeFunctionDecl);
135     // Try to determine if the end of a stream of tokens is either the
136     // Definition or the Declaration for a function. It does this by looking for
137     // the ';' in foo(); and using that it ends with a ; to know this is the
138     // Definition, however the line could end with
139     //    foo(); /* comment */
140     // or
141     //    foo(); // comment
142     // or
143     //    foo() // comment
144     // endsWith() ignores the comment.
145     return !endsWith(tok::semi);
146   }
147 
148   /// \c true if this line starts a namespace definition.
startsWithNamespace()149   bool startsWithNamespace() const {
150     return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
151            startsWith(tok::kw_inline, tok::kw_namespace) ||
152            startsWith(tok::kw_export, tok::kw_namespace);
153   }
154 
getFirstNonComment()155   FormatToken *getFirstNonComment() const {
156     assert(First);
157     return First->is(tok::comment) ? First->getNextNonComment() : First;
158   }
159 
getLastNonComment()160   FormatToken *getLastNonComment() const {
161     assert(Last);
162     return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
163   }
164 
165   FormatToken *First;
166   FormatToken *Last;
167 
168   SmallVector<AnnotatedLine *, 0> Children;
169 
170   LineType Type;
171   unsigned Level;
172   unsigned PPLevel;
173   size_t MatchingOpeningBlockLineIndex;
174   size_t MatchingClosingBlockLineIndex;
175   bool InPPDirective;
176   bool InPragmaDirective;
177   bool InMacroBody;
178   bool MustBeDeclaration;
179   bool MightBeFunctionDecl;
180   bool IsMultiVariableDeclStmt;
181 
182   /// \c True if this line contains a macro call for which an expansion exists.
183   bool ContainsMacroCall = false;
184 
185   /// \c True if this line should be formatted, i.e. intersects directly or
186   /// indirectly with one of the input ranges.
187   bool Affected;
188 
189   /// \c True if the leading empty lines of this line intersect with one of the
190   /// input ranges.
191   bool LeadingEmptyLinesAffected;
192 
193   /// \c True if one of this line's children intersects with an input range.
194   bool ChildrenAffected;
195 
196   /// \c True if breaking after last attribute group in function return type.
197   bool ReturnTypeWrapped;
198 
199   /// \c True if this line should be indented by ContinuationIndent in addition
200   /// to the normal indention level.
201   bool IsContinuation;
202 
203   unsigned FirstStartColumn;
204 
205 private:
206   // Disallow copying.
207   AnnotatedLine(const AnnotatedLine &) = delete;
208   void operator=(const AnnotatedLine &) = delete;
209 };
210 
211 /// Determines extra information about the tokens comprising an
212 /// \c UnwrappedLine.
213 class TokenAnnotator {
214 public:
TokenAnnotator(const FormatStyle & Style,const AdditionalKeywords & Keywords)215   TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
216       : Style(Style), IsCpp(Style.isCpp()),
217         LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {
218     assert(IsCpp == LangOpts.CXXOperatorNames);
219   }
220 
221   /// Adapts the indent levels of comment lines to the indent of the
222   /// subsequent line.
223   // FIXME: Can/should this be done in the UnwrappedLineParser?
224   void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
225 
226   void annotate(AnnotatedLine &Line);
227   void calculateFormattingInformation(AnnotatedLine &Line) const;
228 
229 private:
230   /// Calculate the penalty for splitting before \c Tok.
231   unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
232                         bool InFunctionDecl) const;
233 
234   bool spaceRequiredBeforeParens(const FormatToken &Right) const;
235 
236   bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
237                             const FormatToken &Right) const;
238 
239   bool spaceRequiredBefore(const AnnotatedLine &Line,
240                            const FormatToken &Right) const;
241 
242   bool mustBreakBefore(const AnnotatedLine &Line,
243                        const FormatToken &Right) const;
244 
245   bool canBreakBefore(const AnnotatedLine &Line,
246                       const FormatToken &Right) const;
247 
248   bool mustBreakForReturnType(const AnnotatedLine &Line) const;
249 
250   void printDebugInfo(const AnnotatedLine &Line) const;
251 
252   void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
253 
254   void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
255 
256   FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
257                                               FormatToken *CurrentToken,
258                                               unsigned Depth) const;
259   FormatStyle::PointerAlignmentStyle
260   getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
261 
262   FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
263       const FormatToken &PointerOrReference) const;
264 
265   const FormatStyle &Style;
266 
267   bool IsCpp;
268   LangOptions LangOpts;
269 
270   const AdditionalKeywords &Keywords;
271 
272   SmallVector<ScopeType> Scopes;
273 };
274 
275 } // end namespace format
276 } // end namespace clang
277 
278 #endif
279