xref: /freebsd/contrib/llvm-project/clang/lib/Format/TokenAnnotator.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric ///
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This file implements a token annotator, i.e. creates
110b57cec5SDimitry Andric /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
120b57cec5SDimitry Andric ///
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
160b57cec5SDimitry Andric #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H
170b57cec5SDimitry Andric 
180b57cec5SDimitry Andric #include "UnwrappedLineParser.h"
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric namespace clang {
210b57cec5SDimitry Andric namespace format {
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric enum LineType {
240b57cec5SDimitry Andric   LT_Invalid,
25*0fca6ea1SDimitry Andric   // Contains public/private/protected followed by TT_InheritanceColon.
26*0fca6ea1SDimitry Andric   LT_AccessModifier,
270b57cec5SDimitry Andric   LT_ImportStatement,
280b57cec5SDimitry Andric   LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
290b57cec5SDimitry Andric   LT_ObjCMethodDecl,
300b57cec5SDimitry Andric   LT_ObjCProperty, // An @property line.
310b57cec5SDimitry Andric   LT_Other,
320b57cec5SDimitry Andric   LT_PreprocessorDirective,
33fe6060f1SDimitry Andric   LT_VirtualFunctionDecl,
34fe6060f1SDimitry Andric   LT_ArrayOfStructInitializer,
35bdd1243dSDimitry Andric   LT_CommentAbovePPDirective,
360b57cec5SDimitry Andric };
370b57cec5SDimitry Andric 
3806c3fb27SDimitry Andric enum ScopeType {
3906c3fb27SDimitry Andric   // Contained in class declaration/definition.
4006c3fb27SDimitry Andric   ST_Class,
4106c3fb27SDimitry Andric   // Contained within function definition.
4206c3fb27SDimitry Andric   ST_Function,
4306c3fb27SDimitry Andric   // Contained within other scope block (loop, if/else, etc).
4406c3fb27SDimitry Andric   ST_Other,
4506c3fb27SDimitry Andric };
4606c3fb27SDimitry Andric 
470b57cec5SDimitry Andric class AnnotatedLine {
480b57cec5SDimitry Andric public:
AnnotatedLine(const UnwrappedLine & Line)490b57cec5SDimitry Andric   AnnotatedLine(const UnwrappedLine &Line)
50*0fca6ea1SDimitry Andric       : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level),
51bdd1243dSDimitry Andric         PPLevel(Line.PPLevel),
520b57cec5SDimitry Andric         MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex),
530b57cec5SDimitry Andric         MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex),
540b57cec5SDimitry Andric         InPPDirective(Line.InPPDirective),
55bdd1243dSDimitry Andric         InPragmaDirective(Line.InPragmaDirective),
56bdd1243dSDimitry Andric         InMacroBody(Line.InMacroBody),
570b57cec5SDimitry Andric         MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false),
580b57cec5SDimitry Andric         IsMultiVariableDeclStmt(false), Affected(false),
590b57cec5SDimitry Andric         LeadingEmptyLinesAffected(false), ChildrenAffected(false),
60bdd1243dSDimitry Andric         ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation),
610b57cec5SDimitry Andric         FirstStartColumn(Line.FirstStartColumn) {
620b57cec5SDimitry Andric     assert(!Line.Tokens.empty());
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric     // Calculate Next and Previous for all tokens. Note that we must overwrite
650b57cec5SDimitry Andric     // Next and Previous for every token, as previous formatting runs might have
660b57cec5SDimitry Andric     // left them in a different state.
670b57cec5SDimitry Andric     First->Previous = nullptr;
680b57cec5SDimitry Andric     FormatToken *Current = First;
6906c3fb27SDimitry Andric     addChildren(Line.Tokens.front(), Current);
7004eeddc0SDimitry Andric     for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) {
7106c3fb27SDimitry Andric       if (Node.Tok->MacroParent)
7206c3fb27SDimitry Andric         ContainsMacroCall = true;
7304eeddc0SDimitry Andric       Current->Next = Node.Tok;
7404eeddc0SDimitry Andric       Node.Tok->Previous = Current;
750b57cec5SDimitry Andric       Current = Current->Next;
7606c3fb27SDimitry Andric       addChildren(Node, Current);
7706c3fb27SDimitry Andric       // FIXME: if we add children, previous will point to the token before
7806c3fb27SDimitry Andric       // the children; changing this requires significant changes across
7906c3fb27SDimitry Andric       // clang-format.
800b57cec5SDimitry Andric     }
810b57cec5SDimitry Andric     Last = Current;
820b57cec5SDimitry Andric     Last->Next = nullptr;
830b57cec5SDimitry Andric   }
840b57cec5SDimitry Andric 
addChildren(const UnwrappedLineNode & Node,FormatToken * Current)8506c3fb27SDimitry Andric   void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) {
8606c3fb27SDimitry Andric     Current->Children.clear();
8706c3fb27SDimitry Andric     for (const auto &Child : Node.Children) {
8806c3fb27SDimitry Andric       Children.push_back(new AnnotatedLine(Child));
8906c3fb27SDimitry Andric       if (Children.back()->ContainsMacroCall)
9006c3fb27SDimitry Andric         ContainsMacroCall = true;
9106c3fb27SDimitry Andric       Current->Children.push_back(Children.back());
9206c3fb27SDimitry Andric     }
9306c3fb27SDimitry Andric   }
9406c3fb27SDimitry Andric 
size()955f757f3fSDimitry Andric   size_t size() const {
965f757f3fSDimitry Andric     size_t Size = 1;
975f757f3fSDimitry Andric     for (const auto *Child : Children)
985f757f3fSDimitry Andric       Size += Child->size();
995f757f3fSDimitry Andric     return Size;
1005f757f3fSDimitry Andric   }
1015f757f3fSDimitry Andric 
~AnnotatedLine()1020b57cec5SDimitry Andric   ~AnnotatedLine() {
1031fd87a68SDimitry Andric     for (AnnotatedLine *Child : Children)
1041fd87a68SDimitry Andric       delete Child;
1050b57cec5SDimitry Andric     FormatToken *Current = First;
1060b57cec5SDimitry Andric     while (Current) {
1070b57cec5SDimitry Andric       Current->Children.clear();
1080b57cec5SDimitry Andric       Current->Role.reset();
1090b57cec5SDimitry Andric       Current = Current->Next;
1100b57cec5SDimitry Andric     }
1110b57cec5SDimitry Andric   }
1120b57cec5SDimitry Andric 
isComment()11381ad6265SDimitry Andric   bool isComment() const {
11481ad6265SDimitry Andric     return First && First->is(tok::comment) && !First->getNextNonComment();
11581ad6265SDimitry Andric   }
11681ad6265SDimitry Andric 
1170b57cec5SDimitry Andric   /// \c true if this line starts with the given tokens in order, ignoring
1180b57cec5SDimitry Andric   /// comments.
startsWith(Ts...Tokens)1190b57cec5SDimitry Andric   template <typename... Ts> bool startsWith(Ts... Tokens) const {
1200b57cec5SDimitry Andric     return First && First->startsSequence(Tokens...);
1210b57cec5SDimitry Andric   }
1220b57cec5SDimitry Andric 
1230b57cec5SDimitry Andric   /// \c true if this line ends with the given tokens in reversed order,
1240b57cec5SDimitry Andric   /// ignoring comments.
1250b57cec5SDimitry Andric   /// For example, given tokens [T1, T2, T3, ...], the function returns true if
1260b57cec5SDimitry Andric   /// this line is like "... T3 T2 T1".
endsWith(Ts...Tokens)1270b57cec5SDimitry Andric   template <typename... Ts> bool endsWith(Ts... Tokens) const {
1280b57cec5SDimitry Andric     return Last && Last->endsSequence(Tokens...);
1290b57cec5SDimitry Andric   }
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric   /// \c true if this line looks like a function definition instead of a
1320b57cec5SDimitry Andric   /// function declaration. Asserts MightBeFunctionDecl.
mightBeFunctionDefinition()1330b57cec5SDimitry Andric   bool mightBeFunctionDefinition() const {
1340b57cec5SDimitry Andric     assert(MightBeFunctionDecl);
1350b57cec5SDimitry Andric     // Try to determine if the end of a stream of tokens is either the
1360b57cec5SDimitry Andric     // Definition or the Declaration for a function. It does this by looking for
1370b57cec5SDimitry Andric     // the ';' in foo(); and using that it ends with a ; to know this is the
1380b57cec5SDimitry Andric     // Definition, however the line could end with
1390b57cec5SDimitry Andric     //    foo(); /* comment */
1400b57cec5SDimitry Andric     // or
1410b57cec5SDimitry Andric     //    foo(); // comment
1420b57cec5SDimitry Andric     // or
1430b57cec5SDimitry Andric     //    foo() // comment
1440b57cec5SDimitry Andric     // endsWith() ignores the comment.
1450b57cec5SDimitry Andric     return !endsWith(tok::semi);
1460b57cec5SDimitry Andric   }
1470b57cec5SDimitry Andric 
1480b57cec5SDimitry Andric   /// \c true if this line starts a namespace definition.
startsWithNamespace()1490b57cec5SDimitry Andric   bool startsWithNamespace() const {
150a7dea167SDimitry Andric     return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) ||
1510b57cec5SDimitry Andric            startsWith(tok::kw_inline, tok::kw_namespace) ||
1520b57cec5SDimitry Andric            startsWith(tok::kw_export, tok::kw_namespace);
1530b57cec5SDimitry Andric   }
1540b57cec5SDimitry Andric 
getFirstNonComment()1555f757f3fSDimitry Andric   FormatToken *getFirstNonComment() const {
1565f757f3fSDimitry Andric     assert(First);
1575f757f3fSDimitry Andric     return First->is(tok::comment) ? First->getNextNonComment() : First;
1585f757f3fSDimitry Andric   }
1595f757f3fSDimitry Andric 
getLastNonComment()1605f757f3fSDimitry Andric   FormatToken *getLastNonComment() const {
1615f757f3fSDimitry Andric     assert(Last);
1625f757f3fSDimitry Andric     return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last;
1635f757f3fSDimitry Andric   }
1645f757f3fSDimitry Andric 
1650b57cec5SDimitry Andric   FormatToken *First;
1660b57cec5SDimitry Andric   FormatToken *Last;
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric   SmallVector<AnnotatedLine *, 0> Children;
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric   LineType Type;
1710b57cec5SDimitry Andric   unsigned Level;
172bdd1243dSDimitry Andric   unsigned PPLevel;
1730b57cec5SDimitry Andric   size_t MatchingOpeningBlockLineIndex;
1740b57cec5SDimitry Andric   size_t MatchingClosingBlockLineIndex;
1750b57cec5SDimitry Andric   bool InPPDirective;
176bdd1243dSDimitry Andric   bool InPragmaDirective;
177bdd1243dSDimitry Andric   bool InMacroBody;
1780b57cec5SDimitry Andric   bool MustBeDeclaration;
1790b57cec5SDimitry Andric   bool MightBeFunctionDecl;
1800b57cec5SDimitry Andric   bool IsMultiVariableDeclStmt;
1810b57cec5SDimitry Andric 
18206c3fb27SDimitry Andric   /// \c True if this line contains a macro call for which an expansion exists.
18306c3fb27SDimitry Andric   bool ContainsMacroCall = false;
18406c3fb27SDimitry Andric 
1850b57cec5SDimitry Andric   /// \c True if this line should be formatted, i.e. intersects directly or
1860b57cec5SDimitry Andric   /// indirectly with one of the input ranges.
1870b57cec5SDimitry Andric   bool Affected;
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric   /// \c True if the leading empty lines of this line intersect with one of the
1900b57cec5SDimitry Andric   /// input ranges.
1910b57cec5SDimitry Andric   bool LeadingEmptyLinesAffected;
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric   /// \c True if one of this line's children intersects with an input range.
1940b57cec5SDimitry Andric   bool ChildrenAffected;
1950b57cec5SDimitry Andric 
196bdd1243dSDimitry Andric   /// \c True if breaking after last attribute group in function return type.
197bdd1243dSDimitry Andric   bool ReturnTypeWrapped;
198bdd1243dSDimitry Andric 
199bdd1243dSDimitry Andric   /// \c True if this line should be indented by ContinuationIndent in addition
200bdd1243dSDimitry Andric   /// to the normal indention level.
201bdd1243dSDimitry Andric   bool IsContinuation;
202bdd1243dSDimitry Andric 
2030b57cec5SDimitry Andric   unsigned FirstStartColumn;
2040b57cec5SDimitry Andric 
2050b57cec5SDimitry Andric private:
2060b57cec5SDimitry Andric   // Disallow copying.
2070b57cec5SDimitry Andric   AnnotatedLine(const AnnotatedLine &) = delete;
2080b57cec5SDimitry Andric   void operator=(const AnnotatedLine &) = delete;
2090b57cec5SDimitry Andric };
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric /// Determines extra information about the tokens comprising an
2120b57cec5SDimitry Andric /// \c UnwrappedLine.
2130b57cec5SDimitry Andric class TokenAnnotator {
2140b57cec5SDimitry Andric public:
TokenAnnotator(const FormatStyle & Style,const AdditionalKeywords & Keywords)2150b57cec5SDimitry Andric   TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords)
216*0fca6ea1SDimitry Andric       : Style(Style), IsCpp(Style.isCpp()),
217*0fca6ea1SDimitry Andric         LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) {
218*0fca6ea1SDimitry Andric     assert(IsCpp == LangOpts.CXXOperatorNames);
219*0fca6ea1SDimitry Andric   }
2200b57cec5SDimitry Andric 
2210b57cec5SDimitry Andric   /// Adapts the indent levels of comment lines to the indent of the
2220b57cec5SDimitry Andric   /// subsequent line.
2230b57cec5SDimitry Andric   // FIXME: Can/should this be done in the UnwrappedLineParser?
22481ad6265SDimitry Andric   void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const;
2250b57cec5SDimitry Andric 
22606c3fb27SDimitry Andric   void annotate(AnnotatedLine &Line);
22781ad6265SDimitry Andric   void calculateFormattingInformation(AnnotatedLine &Line) const;
2280b57cec5SDimitry Andric 
2290b57cec5SDimitry Andric private:
2300b57cec5SDimitry Andric   /// Calculate the penalty for splitting before \c Tok.
2310b57cec5SDimitry Andric   unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok,
23281ad6265SDimitry Andric                         bool InFunctionDecl) const;
2330b57cec5SDimitry Andric 
2340b57cec5SDimitry Andric   bool spaceRequiredBeforeParens(const FormatToken &Right) const;
2350b57cec5SDimitry Andric 
2360b57cec5SDimitry Andric   bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left,
23781ad6265SDimitry Andric                             const FormatToken &Right) const;
2380b57cec5SDimitry Andric 
23981ad6265SDimitry Andric   bool spaceRequiredBefore(const AnnotatedLine &Line,
24081ad6265SDimitry Andric                            const FormatToken &Right) const;
2410b57cec5SDimitry Andric 
24281ad6265SDimitry Andric   bool mustBreakBefore(const AnnotatedLine &Line,
24381ad6265SDimitry Andric                        const FormatToken &Right) const;
2440b57cec5SDimitry Andric 
24581ad6265SDimitry Andric   bool canBreakBefore(const AnnotatedLine &Line,
24681ad6265SDimitry Andric                       const FormatToken &Right) const;
2470b57cec5SDimitry Andric 
2480b57cec5SDimitry Andric   bool mustBreakForReturnType(const AnnotatedLine &Line) const;
2490b57cec5SDimitry Andric 
25081ad6265SDimitry Andric   void printDebugInfo(const AnnotatedLine &Line) const;
2510b57cec5SDimitry Andric 
25281ad6265SDimitry Andric   void calculateUnbreakableTailLengths(AnnotatedLine &Line) const;
2530b57cec5SDimitry Andric 
25481ad6265SDimitry Andric   void calculateArrayInitializerColumnList(AnnotatedLine &Line) const;
255fe6060f1SDimitry Andric 
256fe6060f1SDimitry Andric   FormatToken *calculateInitializerColumnList(AnnotatedLine &Line,
257fe6060f1SDimitry Andric                                               FormatToken *CurrentToken,
25881ad6265SDimitry Andric                                               unsigned Depth) const;
259fe6060f1SDimitry Andric   FormatStyle::PointerAlignmentStyle
26081ad6265SDimitry Andric   getTokenReferenceAlignment(const FormatToken &PointerOrReference) const;
261fe6060f1SDimitry Andric 
26281ad6265SDimitry Andric   FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment(
26381ad6265SDimitry Andric       const FormatToken &PointerOrReference) const;
264fe6060f1SDimitry Andric 
2650b57cec5SDimitry Andric   const FormatStyle &Style;
2660b57cec5SDimitry Andric 
267*0fca6ea1SDimitry Andric   bool IsCpp;
268*0fca6ea1SDimitry Andric   LangOptions LangOpts;
269*0fca6ea1SDimitry Andric 
2700b57cec5SDimitry Andric   const AdditionalKeywords &Keywords;
27106c3fb27SDimitry Andric 
27206c3fb27SDimitry Andric   SmallVector<ScopeType> Scopes;
2730b57cec5SDimitry Andric };
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric } // end namespace format
2760b57cec5SDimitry Andric } // end namespace clang
2770b57cec5SDimitry Andric 
2780b57cec5SDimitry Andric #endif
279