10b57cec5SDimitry Andric //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This file implements a token annotator, i.e. creates 110b57cec5SDimitry Andric /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 120b57cec5SDimitry Andric /// 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 160b57cec5SDimitry Andric #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 170b57cec5SDimitry Andric 180b57cec5SDimitry Andric #include "UnwrappedLineParser.h" 190b57cec5SDimitry Andric 200b57cec5SDimitry Andric namespace clang { 210b57cec5SDimitry Andric namespace format { 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric enum LineType { 240b57cec5SDimitry Andric LT_Invalid, 25*0fca6ea1SDimitry Andric // Contains public/private/protected followed by TT_InheritanceColon. 26*0fca6ea1SDimitry Andric LT_AccessModifier, 270b57cec5SDimitry Andric LT_ImportStatement, 280b57cec5SDimitry Andric LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 290b57cec5SDimitry Andric LT_ObjCMethodDecl, 300b57cec5SDimitry Andric LT_ObjCProperty, // An @property line. 310b57cec5SDimitry Andric LT_Other, 320b57cec5SDimitry Andric LT_PreprocessorDirective, 33fe6060f1SDimitry Andric LT_VirtualFunctionDecl, 34fe6060f1SDimitry Andric LT_ArrayOfStructInitializer, 35bdd1243dSDimitry Andric LT_CommentAbovePPDirective, 360b57cec5SDimitry Andric }; 370b57cec5SDimitry Andric 3806c3fb27SDimitry Andric enum ScopeType { 3906c3fb27SDimitry Andric // Contained in class declaration/definition. 4006c3fb27SDimitry Andric ST_Class, 4106c3fb27SDimitry Andric // Contained within function definition. 4206c3fb27SDimitry Andric ST_Function, 4306c3fb27SDimitry Andric // Contained within other scope block (loop, if/else, etc). 4406c3fb27SDimitry Andric ST_Other, 4506c3fb27SDimitry Andric }; 4606c3fb27SDimitry Andric 470b57cec5SDimitry Andric class AnnotatedLine { 480b57cec5SDimitry Andric public: AnnotatedLine(const UnwrappedLine & Line)490b57cec5SDimitry Andric AnnotatedLine(const UnwrappedLine &Line) 50*0fca6ea1SDimitry Andric : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level), 51bdd1243dSDimitry Andric PPLevel(Line.PPLevel), 520b57cec5SDimitry Andric MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), 530b57cec5SDimitry Andric MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), 540b57cec5SDimitry Andric InPPDirective(Line.InPPDirective), 55bdd1243dSDimitry Andric InPragmaDirective(Line.InPragmaDirective), 56bdd1243dSDimitry Andric InMacroBody(Line.InMacroBody), 570b57cec5SDimitry Andric MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), 580b57cec5SDimitry Andric IsMultiVariableDeclStmt(false), Affected(false), 590b57cec5SDimitry Andric LeadingEmptyLinesAffected(false), ChildrenAffected(false), 60bdd1243dSDimitry Andric ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation), 610b57cec5SDimitry Andric FirstStartColumn(Line.FirstStartColumn) { 620b57cec5SDimitry Andric assert(!Line.Tokens.empty()); 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric // Calculate Next and Previous for all tokens. Note that we must overwrite 650b57cec5SDimitry Andric // Next and Previous for every token, as previous formatting runs might have 660b57cec5SDimitry Andric // left them in a different state. 670b57cec5SDimitry Andric First->Previous = nullptr; 680b57cec5SDimitry Andric FormatToken *Current = First; 6906c3fb27SDimitry Andric addChildren(Line.Tokens.front(), Current); 7004eeddc0SDimitry Andric for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) { 7106c3fb27SDimitry Andric if (Node.Tok->MacroParent) 7206c3fb27SDimitry Andric ContainsMacroCall = true; 7304eeddc0SDimitry Andric Current->Next = Node.Tok; 7404eeddc0SDimitry Andric Node.Tok->Previous = Current; 750b57cec5SDimitry Andric Current = Current->Next; 7606c3fb27SDimitry Andric addChildren(Node, Current); 7706c3fb27SDimitry Andric // FIXME: if we add children, previous will point to the token before 7806c3fb27SDimitry Andric // the children; changing this requires significant changes across 7906c3fb27SDimitry Andric // clang-format. 800b57cec5SDimitry Andric } 810b57cec5SDimitry Andric Last = Current; 820b57cec5SDimitry Andric Last->Next = nullptr; 830b57cec5SDimitry Andric } 840b57cec5SDimitry Andric addChildren(const UnwrappedLineNode & Node,FormatToken * Current)8506c3fb27SDimitry Andric void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) { 8606c3fb27SDimitry Andric Current->Children.clear(); 8706c3fb27SDimitry Andric for (const auto &Child : Node.Children) { 8806c3fb27SDimitry Andric Children.push_back(new AnnotatedLine(Child)); 8906c3fb27SDimitry Andric if (Children.back()->ContainsMacroCall) 9006c3fb27SDimitry Andric ContainsMacroCall = true; 9106c3fb27SDimitry Andric Current->Children.push_back(Children.back()); 9206c3fb27SDimitry Andric } 9306c3fb27SDimitry Andric } 9406c3fb27SDimitry Andric size()955f757f3fSDimitry Andric size_t size() const { 965f757f3fSDimitry Andric size_t Size = 1; 975f757f3fSDimitry Andric for (const auto *Child : Children) 985f757f3fSDimitry Andric Size += Child->size(); 995f757f3fSDimitry Andric return Size; 1005f757f3fSDimitry Andric } 1015f757f3fSDimitry Andric ~AnnotatedLine()1020b57cec5SDimitry Andric ~AnnotatedLine() { 1031fd87a68SDimitry Andric for (AnnotatedLine *Child : Children) 1041fd87a68SDimitry Andric delete Child; 1050b57cec5SDimitry Andric FormatToken *Current = First; 1060b57cec5SDimitry Andric while (Current) { 1070b57cec5SDimitry Andric Current->Children.clear(); 1080b57cec5SDimitry Andric Current->Role.reset(); 1090b57cec5SDimitry Andric Current = Current->Next; 1100b57cec5SDimitry Andric } 1110b57cec5SDimitry Andric } 1120b57cec5SDimitry Andric isComment()11381ad6265SDimitry Andric bool isComment() const { 11481ad6265SDimitry Andric return First && First->is(tok::comment) && !First->getNextNonComment(); 11581ad6265SDimitry Andric } 11681ad6265SDimitry Andric 1170b57cec5SDimitry Andric /// \c true if this line starts with the given tokens in order, ignoring 1180b57cec5SDimitry Andric /// comments. startsWith(Ts...Tokens)1190b57cec5SDimitry Andric template <typename... Ts> bool startsWith(Ts... Tokens) const { 1200b57cec5SDimitry Andric return First && First->startsSequence(Tokens...); 1210b57cec5SDimitry Andric } 1220b57cec5SDimitry Andric 1230b57cec5SDimitry Andric /// \c true if this line ends with the given tokens in reversed order, 1240b57cec5SDimitry Andric /// ignoring comments. 1250b57cec5SDimitry Andric /// For example, given tokens [T1, T2, T3, ...], the function returns true if 1260b57cec5SDimitry Andric /// this line is like "... T3 T2 T1". endsWith(Ts...Tokens)1270b57cec5SDimitry Andric template <typename... Ts> bool endsWith(Ts... Tokens) const { 1280b57cec5SDimitry Andric return Last && Last->endsSequence(Tokens...); 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric /// \c true if this line looks like a function definition instead of a 1320b57cec5SDimitry Andric /// function declaration. Asserts MightBeFunctionDecl. mightBeFunctionDefinition()1330b57cec5SDimitry Andric bool mightBeFunctionDefinition() const { 1340b57cec5SDimitry Andric assert(MightBeFunctionDecl); 1350b57cec5SDimitry Andric // Try to determine if the end of a stream of tokens is either the 1360b57cec5SDimitry Andric // Definition or the Declaration for a function. It does this by looking for 1370b57cec5SDimitry Andric // the ';' in foo(); and using that it ends with a ; to know this is the 1380b57cec5SDimitry Andric // Definition, however the line could end with 1390b57cec5SDimitry Andric // foo(); /* comment */ 1400b57cec5SDimitry Andric // or 1410b57cec5SDimitry Andric // foo(); // comment 1420b57cec5SDimitry Andric // or 1430b57cec5SDimitry Andric // foo() // comment 1440b57cec5SDimitry Andric // endsWith() ignores the comment. 1450b57cec5SDimitry Andric return !endsWith(tok::semi); 1460b57cec5SDimitry Andric } 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric /// \c true if this line starts a namespace definition. startsWithNamespace()1490b57cec5SDimitry Andric bool startsWithNamespace() const { 150a7dea167SDimitry Andric return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) || 1510b57cec5SDimitry Andric startsWith(tok::kw_inline, tok::kw_namespace) || 1520b57cec5SDimitry Andric startsWith(tok::kw_export, tok::kw_namespace); 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric getFirstNonComment()1555f757f3fSDimitry Andric FormatToken *getFirstNonComment() const { 1565f757f3fSDimitry Andric assert(First); 1575f757f3fSDimitry Andric return First->is(tok::comment) ? First->getNextNonComment() : First; 1585f757f3fSDimitry Andric } 1595f757f3fSDimitry Andric getLastNonComment()1605f757f3fSDimitry Andric FormatToken *getLastNonComment() const { 1615f757f3fSDimitry Andric assert(Last); 1625f757f3fSDimitry Andric return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last; 1635f757f3fSDimitry Andric } 1645f757f3fSDimitry Andric 1650b57cec5SDimitry Andric FormatToken *First; 1660b57cec5SDimitry Andric FormatToken *Last; 1670b57cec5SDimitry Andric 1680b57cec5SDimitry Andric SmallVector<AnnotatedLine *, 0> Children; 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric LineType Type; 1710b57cec5SDimitry Andric unsigned Level; 172bdd1243dSDimitry Andric unsigned PPLevel; 1730b57cec5SDimitry Andric size_t MatchingOpeningBlockLineIndex; 1740b57cec5SDimitry Andric size_t MatchingClosingBlockLineIndex; 1750b57cec5SDimitry Andric bool InPPDirective; 176bdd1243dSDimitry Andric bool InPragmaDirective; 177bdd1243dSDimitry Andric bool InMacroBody; 1780b57cec5SDimitry Andric bool MustBeDeclaration; 1790b57cec5SDimitry Andric bool MightBeFunctionDecl; 1800b57cec5SDimitry Andric bool IsMultiVariableDeclStmt; 1810b57cec5SDimitry Andric 18206c3fb27SDimitry Andric /// \c True if this line contains a macro call for which an expansion exists. 18306c3fb27SDimitry Andric bool ContainsMacroCall = false; 18406c3fb27SDimitry Andric 1850b57cec5SDimitry Andric /// \c True if this line should be formatted, i.e. intersects directly or 1860b57cec5SDimitry Andric /// indirectly with one of the input ranges. 1870b57cec5SDimitry Andric bool Affected; 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric /// \c True if the leading empty lines of this line intersect with one of the 1900b57cec5SDimitry Andric /// input ranges. 1910b57cec5SDimitry Andric bool LeadingEmptyLinesAffected; 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric /// \c True if one of this line's children intersects with an input range. 1940b57cec5SDimitry Andric bool ChildrenAffected; 1950b57cec5SDimitry Andric 196bdd1243dSDimitry Andric /// \c True if breaking after last attribute group in function return type. 197bdd1243dSDimitry Andric bool ReturnTypeWrapped; 198bdd1243dSDimitry Andric 199bdd1243dSDimitry Andric /// \c True if this line should be indented by ContinuationIndent in addition 200bdd1243dSDimitry Andric /// to the normal indention level. 201bdd1243dSDimitry Andric bool IsContinuation; 202bdd1243dSDimitry Andric 2030b57cec5SDimitry Andric unsigned FirstStartColumn; 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric private: 2060b57cec5SDimitry Andric // Disallow copying. 2070b57cec5SDimitry Andric AnnotatedLine(const AnnotatedLine &) = delete; 2080b57cec5SDimitry Andric void operator=(const AnnotatedLine &) = delete; 2090b57cec5SDimitry Andric }; 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric /// Determines extra information about the tokens comprising an 2120b57cec5SDimitry Andric /// \c UnwrappedLine. 2130b57cec5SDimitry Andric class TokenAnnotator { 2140b57cec5SDimitry Andric public: TokenAnnotator(const FormatStyle & Style,const AdditionalKeywords & Keywords)2150b57cec5SDimitry Andric TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) 216*0fca6ea1SDimitry Andric : Style(Style), IsCpp(Style.isCpp()), 217*0fca6ea1SDimitry Andric LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) { 218*0fca6ea1SDimitry Andric assert(IsCpp == LangOpts.CXXOperatorNames); 219*0fca6ea1SDimitry Andric } 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric /// Adapts the indent levels of comment lines to the indent of the 2220b57cec5SDimitry Andric /// subsequent line. 2230b57cec5SDimitry Andric // FIXME: Can/should this be done in the UnwrappedLineParser? 22481ad6265SDimitry Andric void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const; 2250b57cec5SDimitry Andric 22606c3fb27SDimitry Andric void annotate(AnnotatedLine &Line); 22781ad6265SDimitry Andric void calculateFormattingInformation(AnnotatedLine &Line) const; 2280b57cec5SDimitry Andric 2290b57cec5SDimitry Andric private: 2300b57cec5SDimitry Andric /// Calculate the penalty for splitting before \c Tok. 2310b57cec5SDimitry Andric unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, 23281ad6265SDimitry Andric bool InFunctionDecl) const; 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric bool spaceRequiredBeforeParens(const FormatToken &Right) const; 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, 23781ad6265SDimitry Andric const FormatToken &Right) const; 2380b57cec5SDimitry Andric 23981ad6265SDimitry Andric bool spaceRequiredBefore(const AnnotatedLine &Line, 24081ad6265SDimitry Andric const FormatToken &Right) const; 2410b57cec5SDimitry Andric 24281ad6265SDimitry Andric bool mustBreakBefore(const AnnotatedLine &Line, 24381ad6265SDimitry Andric const FormatToken &Right) const; 2440b57cec5SDimitry Andric 24581ad6265SDimitry Andric bool canBreakBefore(const AnnotatedLine &Line, 24681ad6265SDimitry Andric const FormatToken &Right) const; 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric bool mustBreakForReturnType(const AnnotatedLine &Line) const; 2490b57cec5SDimitry Andric 25081ad6265SDimitry Andric void printDebugInfo(const AnnotatedLine &Line) const; 2510b57cec5SDimitry Andric 25281ad6265SDimitry Andric void calculateUnbreakableTailLengths(AnnotatedLine &Line) const; 2530b57cec5SDimitry Andric 25481ad6265SDimitry Andric void calculateArrayInitializerColumnList(AnnotatedLine &Line) const; 255fe6060f1SDimitry Andric 256fe6060f1SDimitry Andric FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, 257fe6060f1SDimitry Andric FormatToken *CurrentToken, 25881ad6265SDimitry Andric unsigned Depth) const; 259fe6060f1SDimitry Andric FormatStyle::PointerAlignmentStyle 26081ad6265SDimitry Andric getTokenReferenceAlignment(const FormatToken &PointerOrReference) const; 261fe6060f1SDimitry Andric 26281ad6265SDimitry Andric FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment( 26381ad6265SDimitry Andric const FormatToken &PointerOrReference) const; 264fe6060f1SDimitry Andric 2650b57cec5SDimitry Andric const FormatStyle &Style; 2660b57cec5SDimitry Andric 267*0fca6ea1SDimitry Andric bool IsCpp; 268*0fca6ea1SDimitry Andric LangOptions LangOpts; 269*0fca6ea1SDimitry Andric 2700b57cec5SDimitry Andric const AdditionalKeywords &Keywords; 27106c3fb27SDimitry Andric 27206c3fb27SDimitry Andric SmallVector<ScopeType> Scopes; 2730b57cec5SDimitry Andric }; 2740b57cec5SDimitry Andric 2750b57cec5SDimitry Andric } // end namespace format 2760b57cec5SDimitry Andric } // end namespace clang 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric #endif 279