xref: /freebsd/contrib/llvm-project/clang/lib/Format/TokenAnnotator.h (revision e92ffd9b626833ebdbf2742c8ffddc6cd94b963e)
1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
17 
18 #include "UnwrappedLineParser.h"
19 #include "clang/Format/Format.h"
20 
21 namespace clang {
22 class SourceManager;
23 
24 namespace format {
25 
26 enum LineType {
27   LT_Invalid,
28   LT_ImportStatement,
29   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
30   LT_ObjCMethodDecl,
31   LT_ObjCProperty, // An @property line.
32   LT_Other,
33   LT_PreprocessorDirective,
34   LT_VirtualFunctionDecl,
35   LT_ArrayOfStructInitializer,
36 };
37 
38 class AnnotatedLine {
39 public:
40   AnnotatedLine(const UnwrappedLine &Line)
41       : First(Line.Tokens.front().Tok), Level(Line.Level),
42         MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
43         MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
44         InPPDirective(Line.InPPDirective),
45         MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
46         IsMultiVariableDeclStmt(false), Affected(false),
47         LeadingEmptyLinesAffected(false), ChildrenAffected(false),
48         FirstStartColumn(Line.FirstStartColumn) {
49     assert(!Line.Tokens.empty());
50 
51     // Calculate Next and Previous for all tokens. Note that we must overwrite
52     // Next and Previous for every token, as previous formatting runs might have
53     // left them in a different state.
54     First->Previous = nullptr;
55     FormatToken *Current = First;
56     for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(),
57                                                       E = Line.Tokens.end();
58          I != E; ++I) {
59       const UnwrappedLineNode &Node = *I;
60       Current->Next = I->Tok;
61       I->Tok->Previous = Current;
62       Current = Current->Next;
63       Current->Children.clear();
64       for (const auto &Child : Node.Children) {
65         Children.push_back(new AnnotatedLine(Child));
66         Current->Children.push_back(Children.back());
67       }
68     }
69     Last = Current;
70     Last->Next = nullptr;
71   }
72 
73   ~AnnotatedLine() {
74     for (unsigned i = 0, e = Children.size(); i != e; ++i) {
75       delete Children[i];
76     }
77     FormatToken *Current = First;
78     while (Current) {
79       Current->Children.clear();
80       Current->Role.reset();
81       Current = Current->Next;
82     }
83   }
84 
85   /// \c true if this line starts with the given tokens in order, ignoring
86   /// comments.
87   template <typename... Ts> bool startsWith(Ts... Tokens) const {
88     return First && First->startsSequence(Tokens...);
89   }
90 
91   /// \c true if this line ends with the given tokens in reversed order,
92   /// ignoring comments.
93   /// For example, given tokens [T1, T2, T3, ...], the function returns true if
94   /// this line is like "... T3 T2 T1".
95   template <typename... Ts> bool endsWith(Ts... Tokens) const {
96     return Last && Last->endsSequence(Tokens...);
97   }
98 
99   /// \c true if this line looks like a function definition instead of a
100   /// function declaration. Asserts MightBeFunctionDecl.
101   bool mightBeFunctionDefinition() const {
102     assert(MightBeFunctionDecl);
103     // Try to determine if the end of a stream of tokens is either the
104     // Definition or the Declaration for a function. It does this by looking for
105     // the ';' in foo(); and using that it ends with a ; to know this is the
106     // Definition, however the line could end with
107     //    foo(); /* comment */
108     // or
109     //    foo(); // comment
110     // or
111     //    foo() // comment
112     // endsWith() ignores the comment.
113     return !endsWith(tok::semi);
114   }
115 
116   /// \c true if this line starts a namespace definition.
117   bool startsWithNamespace() const {
118     return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
119            startsWith(tok::kw_inline, tok::kw_namespace) ||
120            startsWith(tok::kw_export, tok::kw_namespace);
121   }
122 
123   FormatToken *First;
124   FormatToken *Last;
125 
126   SmallVector<AnnotatedLine *, 0> Children;
127 
128   LineType Type;
129   unsigned Level;
130   size_t MatchingOpeningBlockLineIndex;
131   size_t MatchingClosingBlockLineIndex;
132   bool InPPDirective;
133   bool MustBeDeclaration;
134   bool MightBeFunctionDecl;
135   bool IsMultiVariableDeclStmt;
136 
137   /// \c True if this line should be formatted, i.e. intersects directly or
138   /// indirectly with one of the input ranges.
139   bool Affected;
140 
141   /// \c True if the leading empty lines of this line intersect with one of the
142   /// input ranges.
143   bool LeadingEmptyLinesAffected;
144 
145   /// \c True if one of this line's children intersects with an input range.
146   bool ChildrenAffected;
147 
148   unsigned FirstStartColumn;
149 
150 private:
151   // Disallow copying.
152   AnnotatedLine(const AnnotatedLine &) = delete;
153   void operator=(const AnnotatedLine &) = delete;
154 };
155 
156 /// Determines extra information about the tokens comprising an
157 /// \c UnwrappedLine.
158 class TokenAnnotator {
159 public:
160   TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
161       : Style(Style), Keywords(Keywords) {}
162 
163   /// Adapts the indent levels of comment lines to the indent of the
164   /// subsequent line.
165   // FIXME: Can/should this be done in the UnwrappedLineParser?
166   void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines);
167 
168   void annotate(AnnotatedLine &Line);
169   void calculateFormattingInformation(AnnotatedLine &Line);
170 
171 private:
172   /// Calculate the penalty for splitting before \c Tok.
173   unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
174                         bool InFunctionDecl);
175 
176   bool spaceRequiredBeforeParens(const FormatToken &Right) const;
177 
178   bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
179                             const FormatToken &Right);
180 
181   bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right);
182 
183   bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
184 
185   bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
186 
187   bool mustBreakForReturnType(const AnnotatedLine &Line) const;
188 
189   void printDebugInfo(const AnnotatedLine &Line);
190 
191   void calculateUnbreakableTailLengths(AnnotatedLine &Line);
192 
193   void calculateArrayInitializerColumnList(AnnotatedLine &Line);
194 
195   FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
196                                               FormatToken *CurrentToken,
197                                               unsigned Depth);
198   FormatStyle::PointerAlignmentStyle
199   getTokenReferenceAlignment(const FormatToken &PointerOrReference);
200 
201   FormatStyle::PointerAlignmentStyle
202   getTokenPointerOrReferenceAlignment(const FormatToken &PointerOrReference);
203 
204   const FormatStyle &Style;
205 
206   const AdditionalKeywords &Keywords;
207 };
208 
209 } // end namespace format
210 } // end namespace clang
211 
212 #endif
213