1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a token annotator, i.e. creates 11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 17 18 #include "UnwrappedLineParser.h" 19 #include "clang/Format/Format.h" 20 21 namespace clang { 22 namespace format { 23 24 enum LineType { 25 LT_Invalid, 26 LT_ImportStatement, 27 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 28 LT_ObjCMethodDecl, 29 LT_ObjCProperty, // An @property line. 30 LT_Other, 31 LT_PreprocessorDirective, 32 LT_VirtualFunctionDecl, 33 LT_ArrayOfStructInitializer, 34 LT_CommentAbovePPDirective, 35 }; 36 37 enum ScopeType { 38 // Contained in class declaration/definition. 39 ST_Class, 40 // Contained within function definition. 41 ST_Function, 42 // Contained within other scope block (loop, if/else, etc). 43 ST_Other, 44 }; 45 46 class AnnotatedLine { 47 public: 48 AnnotatedLine(const UnwrappedLine &Line) 49 : First(Line.Tokens.front().Tok), Level(Line.Level), 50 PPLevel(Line.PPLevel), 51 MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), 52 MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), 53 InPPDirective(Line.InPPDirective), 54 InPragmaDirective(Line.InPragmaDirective), 55 InMacroBody(Line.InMacroBody), 56 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), 57 IsMultiVariableDeclStmt(false), Affected(false), 58 LeadingEmptyLinesAffected(false), ChildrenAffected(false), 59 ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation), 60 FirstStartColumn(Line.FirstStartColumn) { 61 assert(!Line.Tokens.empty()); 62 63 // Calculate Next and Previous for all tokens. Note that we must overwrite 64 // Next and Previous for every token, as previous formatting runs might have 65 // left them in a different state. 66 First->Previous = nullptr; 67 FormatToken *Current = First; 68 addChildren(Line.Tokens.front(), Current); 69 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) { 70 if (Node.Tok->MacroParent) 71 ContainsMacroCall = true; 72 Current->Next = Node.Tok; 73 Node.Tok->Previous = Current; 74 Current = Current->Next; 75 addChildren(Node, Current); 76 // FIXME: if we add children, previous will point to the token before 77 // the children; changing this requires significant changes across 78 // clang-format. 79 } 80 Last = Current; 81 Last->Next = nullptr; 82 } 83 84 void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) { 85 Current->Children.clear(); 86 for (const auto &Child : Node.Children) { 87 Children.push_back(new AnnotatedLine(Child)); 88 if (Children.back()->ContainsMacroCall) 89 ContainsMacroCall = true; 90 Current->Children.push_back(Children.back()); 91 } 92 } 93 94 size_t size() const { 95 size_t Size = 1; 96 for (const auto *Child : Children) 97 Size += Child->size(); 98 return Size; 99 } 100 101 ~AnnotatedLine() { 102 for (AnnotatedLine *Child : Children) 103 delete Child; 104 FormatToken *Current = First; 105 while (Current) { 106 Current->Children.clear(); 107 Current->Role.reset(); 108 Current = Current->Next; 109 } 110 } 111 112 bool isComment() const { 113 return First && First->is(tok::comment) && !First->getNextNonComment(); 114 } 115 116 /// \c true if this line starts with the given tokens in order, ignoring 117 /// comments. 118 template <typename... Ts> bool startsWith(Ts... Tokens) const { 119 return First && First->startsSequence(Tokens...); 120 } 121 122 /// \c true if this line ends with the given tokens in reversed order, 123 /// ignoring comments. 124 /// For example, given tokens [T1, T2, T3, ...], the function returns true if 125 /// this line is like "... T3 T2 T1". 126 template <typename... Ts> bool endsWith(Ts... Tokens) const { 127 return Last && Last->endsSequence(Tokens...); 128 } 129 130 /// \c true if this line looks like a function definition instead of a 131 /// function declaration. Asserts MightBeFunctionDecl. 132 bool mightBeFunctionDefinition() const { 133 assert(MightBeFunctionDecl); 134 // Try to determine if the end of a stream of tokens is either the 135 // Definition or the Declaration for a function. It does this by looking for 136 // the ';' in foo(); and using that it ends with a ; to know this is the 137 // Definition, however the line could end with 138 // foo(); /* comment */ 139 // or 140 // foo(); // comment 141 // or 142 // foo() // comment 143 // endsWith() ignores the comment. 144 return !endsWith(tok::semi); 145 } 146 147 /// \c true if this line starts a namespace definition. 148 bool startsWithNamespace() const { 149 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) || 150 startsWith(tok::kw_inline, tok::kw_namespace) || 151 startsWith(tok::kw_export, tok::kw_namespace); 152 } 153 154 FormatToken *getFirstNonComment() const { 155 assert(First); 156 return First->is(tok::comment) ? First->getNextNonComment() : First; 157 } 158 159 FormatToken *getLastNonComment() const { 160 assert(Last); 161 return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last; 162 } 163 164 FormatToken *First; 165 FormatToken *Last; 166 167 SmallVector<AnnotatedLine *, 0> Children; 168 169 LineType Type; 170 unsigned Level; 171 unsigned PPLevel; 172 size_t MatchingOpeningBlockLineIndex; 173 size_t MatchingClosingBlockLineIndex; 174 bool InPPDirective; 175 bool InPragmaDirective; 176 bool InMacroBody; 177 bool MustBeDeclaration; 178 bool MightBeFunctionDecl; 179 bool IsMultiVariableDeclStmt; 180 181 /// \c True if this line contains a macro call for which an expansion exists. 182 bool ContainsMacroCall = false; 183 184 /// \c True if this line should be formatted, i.e. intersects directly or 185 /// indirectly with one of the input ranges. 186 bool Affected; 187 188 /// \c True if the leading empty lines of this line intersect with one of the 189 /// input ranges. 190 bool LeadingEmptyLinesAffected; 191 192 /// \c True if one of this line's children intersects with an input range. 193 bool ChildrenAffected; 194 195 /// \c True if breaking after last attribute group in function return type. 196 bool ReturnTypeWrapped; 197 198 /// \c True if this line should be indented by ContinuationIndent in addition 199 /// to the normal indention level. 200 bool IsContinuation; 201 202 unsigned FirstStartColumn; 203 204 private: 205 // Disallow copying. 206 AnnotatedLine(const AnnotatedLine &) = delete; 207 void operator=(const AnnotatedLine &) = delete; 208 }; 209 210 /// Determines extra information about the tokens comprising an 211 /// \c UnwrappedLine. 212 class TokenAnnotator { 213 public: 214 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) 215 : Style(Style), Keywords(Keywords) {} 216 217 /// Adapts the indent levels of comment lines to the indent of the 218 /// subsequent line. 219 // FIXME: Can/should this be done in the UnwrappedLineParser? 220 void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const; 221 222 void annotate(AnnotatedLine &Line); 223 void calculateFormattingInformation(AnnotatedLine &Line) const; 224 225 private: 226 /// Calculate the penalty for splitting before \c Tok. 227 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, 228 bool InFunctionDecl) const; 229 230 bool spaceRequiredBeforeParens(const FormatToken &Right) const; 231 232 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, 233 const FormatToken &Right) const; 234 235 bool spaceRequiredBefore(const AnnotatedLine &Line, 236 const FormatToken &Right) const; 237 238 bool mustBreakBefore(const AnnotatedLine &Line, 239 const FormatToken &Right) const; 240 241 bool canBreakBefore(const AnnotatedLine &Line, 242 const FormatToken &Right) const; 243 244 bool mustBreakForReturnType(const AnnotatedLine &Line) const; 245 246 void printDebugInfo(const AnnotatedLine &Line) const; 247 248 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const; 249 250 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const; 251 252 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, 253 FormatToken *CurrentToken, 254 unsigned Depth) const; 255 FormatStyle::PointerAlignmentStyle 256 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const; 257 258 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment( 259 const FormatToken &PointerOrReference) const; 260 261 const FormatStyle &Style; 262 263 const AdditionalKeywords &Keywords; 264 265 SmallVector<ScopeType> Scopes; 266 }; 267 268 } // end namespace format 269 } // end namespace clang 270 271 #endif 272