1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a token annotator, i.e. creates 11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 17 18 #include "UnwrappedLineParser.h" 19 20 namespace clang { 21 namespace format { 22 23 enum LineType { 24 LT_Invalid, 25 // Contains public/private/protected followed by TT_InheritanceColon. 26 LT_AccessModifier, 27 LT_ImportStatement, 28 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 29 LT_ObjCMethodDecl, 30 LT_ObjCProperty, // An @property line. 31 LT_Other, 32 LT_PreprocessorDirective, 33 LT_VirtualFunctionDecl, 34 LT_ArrayOfStructInitializer, 35 LT_CommentAbovePPDirective, 36 }; 37 38 enum ScopeType { 39 // Contained in class declaration/definition. 40 ST_Class, 41 // Contained within function definition. 42 ST_Function, 43 // Contained within other scope block (loop, if/else, etc). 44 ST_Other, 45 }; 46 47 class AnnotatedLine { 48 public: AnnotatedLine(const UnwrappedLine & Line)49 AnnotatedLine(const UnwrappedLine &Line) 50 : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level), 51 PPLevel(Line.PPLevel), 52 MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), 53 MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), 54 InPPDirective(Line.InPPDirective), 55 InPragmaDirective(Line.InPragmaDirective), 56 InMacroBody(Line.InMacroBody), 57 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), 58 IsMultiVariableDeclStmt(false), Affected(false), 59 LeadingEmptyLinesAffected(false), ChildrenAffected(false), 60 ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation), 61 FirstStartColumn(Line.FirstStartColumn) { 62 assert(!Line.Tokens.empty()); 63 64 // Calculate Next and Previous for all tokens. Note that we must overwrite 65 // Next and Previous for every token, as previous formatting runs might have 66 // left them in a different state. 67 First->Previous = nullptr; 68 FormatToken *Current = First; 69 addChildren(Line.Tokens.front(), Current); 70 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) { 71 if (Node.Tok->MacroParent) 72 ContainsMacroCall = true; 73 Current->Next = Node.Tok; 74 Node.Tok->Previous = Current; 75 Current = Current->Next; 76 addChildren(Node, Current); 77 // FIXME: if we add children, previous will point to the token before 78 // the children; changing this requires significant changes across 79 // clang-format. 80 } 81 Last = Current; 82 Last->Next = nullptr; 83 } 84 addChildren(const UnwrappedLineNode & Node,FormatToken * Current)85 void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) { 86 Current->Children.clear(); 87 for (const auto &Child : Node.Children) { 88 Children.push_back(new AnnotatedLine(Child)); 89 if (Children.back()->ContainsMacroCall) 90 ContainsMacroCall = true; 91 Current->Children.push_back(Children.back()); 92 } 93 } 94 size()95 size_t size() const { 96 size_t Size = 1; 97 for (const auto *Child : Children) 98 Size += Child->size(); 99 return Size; 100 } 101 ~AnnotatedLine()102 ~AnnotatedLine() { 103 for (AnnotatedLine *Child : Children) 104 delete Child; 105 FormatToken *Current = First; 106 while (Current) { 107 Current->Children.clear(); 108 Current->Role.reset(); 109 Current = Current->Next; 110 } 111 } 112 isComment()113 bool isComment() const { 114 return First && First->is(tok::comment) && !First->getNextNonComment(); 115 } 116 117 /// \c true if this line starts with the given tokens in order, ignoring 118 /// comments. startsWith(Ts...Tokens)119 template <typename... Ts> bool startsWith(Ts... Tokens) const { 120 return First && First->startsSequence(Tokens...); 121 } 122 123 /// \c true if this line ends with the given tokens in reversed order, 124 /// ignoring comments. 125 /// For example, given tokens [T1, T2, T3, ...], the function returns true if 126 /// this line is like "... T3 T2 T1". endsWith(Ts...Tokens)127 template <typename... Ts> bool endsWith(Ts... Tokens) const { 128 return Last && Last->endsSequence(Tokens...); 129 } 130 131 /// \c true if this line looks like a function definition instead of a 132 /// function declaration. Asserts MightBeFunctionDecl. mightBeFunctionDefinition()133 bool mightBeFunctionDefinition() const { 134 assert(MightBeFunctionDecl); 135 // Try to determine if the end of a stream of tokens is either the 136 // Definition or the Declaration for a function. It does this by looking for 137 // the ';' in foo(); and using that it ends with a ; to know this is the 138 // Definition, however the line could end with 139 // foo(); /* comment */ 140 // or 141 // foo(); // comment 142 // or 143 // foo() // comment 144 // endsWith() ignores the comment. 145 return !endsWith(tok::semi); 146 } 147 148 /// \c true if this line starts a namespace definition. startsWithNamespace()149 bool startsWithNamespace() const { 150 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) || 151 startsWith(tok::kw_inline, tok::kw_namespace) || 152 startsWith(tok::kw_export, tok::kw_namespace); 153 } 154 getFirstNonComment()155 FormatToken *getFirstNonComment() const { 156 assert(First); 157 return First->is(tok::comment) ? First->getNextNonComment() : First; 158 } 159 getLastNonComment()160 FormatToken *getLastNonComment() const { 161 assert(Last); 162 return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last; 163 } 164 165 FormatToken *First; 166 FormatToken *Last; 167 168 SmallVector<AnnotatedLine *, 0> Children; 169 170 LineType Type; 171 unsigned Level; 172 unsigned PPLevel; 173 size_t MatchingOpeningBlockLineIndex; 174 size_t MatchingClosingBlockLineIndex; 175 bool InPPDirective; 176 bool InPragmaDirective; 177 bool InMacroBody; 178 bool MustBeDeclaration; 179 bool MightBeFunctionDecl; 180 bool IsMultiVariableDeclStmt; 181 182 /// \c True if this line contains a macro call for which an expansion exists. 183 bool ContainsMacroCall = false; 184 185 /// \c True if this line should be formatted, i.e. intersects directly or 186 /// indirectly with one of the input ranges. 187 bool Affected; 188 189 /// \c True if the leading empty lines of this line intersect with one of the 190 /// input ranges. 191 bool LeadingEmptyLinesAffected; 192 193 /// \c True if one of this line's children intersects with an input range. 194 bool ChildrenAffected; 195 196 /// \c True if breaking after last attribute group in function return type. 197 bool ReturnTypeWrapped; 198 199 /// \c True if this line should be indented by ContinuationIndent in addition 200 /// to the normal indention level. 201 bool IsContinuation; 202 203 unsigned FirstStartColumn; 204 205 private: 206 // Disallow copying. 207 AnnotatedLine(const AnnotatedLine &) = delete; 208 void operator=(const AnnotatedLine &) = delete; 209 }; 210 211 /// Determines extra information about the tokens comprising an 212 /// \c UnwrappedLine. 213 class TokenAnnotator { 214 public: TokenAnnotator(const FormatStyle & Style,const AdditionalKeywords & Keywords)215 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) 216 : Style(Style), IsCpp(Style.isCpp()), 217 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) { 218 assert(IsCpp == LangOpts.CXXOperatorNames); 219 } 220 221 /// Adapts the indent levels of comment lines to the indent of the 222 /// subsequent line. 223 // FIXME: Can/should this be done in the UnwrappedLineParser? 224 void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const; 225 226 void annotate(AnnotatedLine &Line); 227 void calculateFormattingInformation(AnnotatedLine &Line) const; 228 229 private: 230 /// Calculate the penalty for splitting before \c Tok. 231 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, 232 bool InFunctionDecl) const; 233 234 bool spaceRequiredBeforeParens(const FormatToken &Right) const; 235 236 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, 237 const FormatToken &Right) const; 238 239 bool spaceRequiredBefore(const AnnotatedLine &Line, 240 const FormatToken &Right) const; 241 242 bool mustBreakBefore(const AnnotatedLine &Line, 243 const FormatToken &Right) const; 244 245 bool canBreakBefore(const AnnotatedLine &Line, 246 const FormatToken &Right) const; 247 248 bool mustBreakForReturnType(const AnnotatedLine &Line) const; 249 250 void printDebugInfo(const AnnotatedLine &Line) const; 251 252 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const; 253 254 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const; 255 256 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, 257 FormatToken *CurrentToken, 258 unsigned Depth) const; 259 FormatStyle::PointerAlignmentStyle 260 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const; 261 262 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment( 263 const FormatToken &PointerOrReference) const; 264 265 const FormatStyle &Style; 266 267 bool IsCpp; 268 LangOptions LangOpts; 269 270 const AdditionalKeywords &Keywords; 271 272 SmallVector<ScopeType> Scopes; 273 }; 274 275 } // end namespace format 276 } // end namespace clang 277 278 #endif 279