1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a token annotator, i.e. creates 11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 17 18 #include "UnwrappedLineParser.h" 19 #include "clang/Format/Format.h" 20 21 namespace clang { 22 class SourceManager; 23 24 namespace format { 25 26 enum LineType { 27 LT_Invalid, 28 LT_ImportStatement, 29 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 30 LT_ObjCMethodDecl, 31 LT_ObjCProperty, // An @property line. 32 LT_Other, 33 LT_PreprocessorDirective, 34 LT_VirtualFunctionDecl, 35 LT_ArrayOfStructInitializer, 36 }; 37 38 class AnnotatedLine { 39 public: 40 AnnotatedLine(const UnwrappedLine &Line) 41 : First(Line.Tokens.front().Tok), Level(Line.Level), 42 MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), 43 MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), 44 InPPDirective(Line.InPPDirective), 45 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), 46 IsMultiVariableDeclStmt(false), Affected(false), 47 LeadingEmptyLinesAffected(false), ChildrenAffected(false), 48 FirstStartColumn(Line.FirstStartColumn) { 49 assert(!Line.Tokens.empty()); 50 51 // Calculate Next and Previous for all tokens. Note that we must overwrite 52 // Next and Previous for every token, as previous formatting runs might have 53 // left them in a different state. 54 First->Previous = nullptr; 55 FormatToken *Current = First; 56 for (auto I = ++Line.Tokens.begin(), E = Line.Tokens.end(); I != E; ++I) { 57 const UnwrappedLineNode &Node = *I; 58 Current->Next = I->Tok; 59 I->Tok->Previous = Current; 60 Current = Current->Next; 61 Current->Children.clear(); 62 for (const auto &Child : Node.Children) { 63 Children.push_back(new AnnotatedLine(Child)); 64 Current->Children.push_back(Children.back()); 65 } 66 } 67 Last = Current; 68 Last->Next = nullptr; 69 } 70 71 ~AnnotatedLine() { 72 for (unsigned i = 0, e = Children.size(); i != e; ++i) { 73 delete Children[i]; 74 } 75 FormatToken *Current = First; 76 while (Current) { 77 Current->Children.clear(); 78 Current->Role.reset(); 79 Current = Current->Next; 80 } 81 } 82 83 /// \c true if this line starts with the given tokens in order, ignoring 84 /// comments. 85 template <typename... Ts> bool startsWith(Ts... Tokens) const { 86 return First && First->startsSequence(Tokens...); 87 } 88 89 /// \c true if this line ends with the given tokens in reversed order, 90 /// ignoring comments. 91 /// For example, given tokens [T1, T2, T3, ...], the function returns true if 92 /// this line is like "... T3 T2 T1". 93 template <typename... Ts> bool endsWith(Ts... Tokens) const { 94 return Last && Last->endsSequence(Tokens...); 95 } 96 97 /// \c true if this line looks like a function definition instead of a 98 /// function declaration. Asserts MightBeFunctionDecl. 99 bool mightBeFunctionDefinition() const { 100 assert(MightBeFunctionDecl); 101 // Try to determine if the end of a stream of tokens is either the 102 // Definition or the Declaration for a function. It does this by looking for 103 // the ';' in foo(); and using that it ends with a ; to know this is the 104 // Definition, however the line could end with 105 // foo(); /* comment */ 106 // or 107 // foo(); // comment 108 // or 109 // foo() // comment 110 // endsWith() ignores the comment. 111 return !endsWith(tok::semi); 112 } 113 114 /// \c true if this line starts a namespace definition. 115 bool startsWithNamespace() const { 116 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) || 117 startsWith(tok::kw_inline, tok::kw_namespace) || 118 startsWith(tok::kw_export, tok::kw_namespace); 119 } 120 121 FormatToken *First; 122 FormatToken *Last; 123 124 SmallVector<AnnotatedLine *, 0> Children; 125 126 LineType Type; 127 unsigned Level; 128 size_t MatchingOpeningBlockLineIndex; 129 size_t MatchingClosingBlockLineIndex; 130 bool InPPDirective; 131 bool MustBeDeclaration; 132 bool MightBeFunctionDecl; 133 bool IsMultiVariableDeclStmt; 134 135 /// \c True if this line should be formatted, i.e. intersects directly or 136 /// indirectly with one of the input ranges. 137 bool Affected; 138 139 /// \c True if the leading empty lines of this line intersect with one of the 140 /// input ranges. 141 bool LeadingEmptyLinesAffected; 142 143 /// \c True if one of this line's children intersects with an input range. 144 bool ChildrenAffected; 145 146 unsigned FirstStartColumn; 147 148 private: 149 // Disallow copying. 150 AnnotatedLine(const AnnotatedLine &) = delete; 151 void operator=(const AnnotatedLine &) = delete; 152 }; 153 154 /// Determines extra information about the tokens comprising an 155 /// \c UnwrappedLine. 156 class TokenAnnotator { 157 public: 158 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) 159 : Style(Style), Keywords(Keywords) {} 160 161 /// Adapts the indent levels of comment lines to the indent of the 162 /// subsequent line. 163 // FIXME: Can/should this be done in the UnwrappedLineParser? 164 void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines); 165 166 void annotate(AnnotatedLine &Line); 167 void calculateFormattingInformation(AnnotatedLine &Line); 168 169 private: 170 /// Calculate the penalty for splitting before \c Tok. 171 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, 172 bool InFunctionDecl); 173 174 bool spaceRequiredBeforeParens(const FormatToken &Right) const; 175 176 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, 177 const FormatToken &Right); 178 179 bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right); 180 181 bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right); 182 183 bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right); 184 185 bool mustBreakForReturnType(const AnnotatedLine &Line) const; 186 187 void printDebugInfo(const AnnotatedLine &Line); 188 189 void calculateUnbreakableTailLengths(AnnotatedLine &Line); 190 191 void calculateArrayInitializerColumnList(AnnotatedLine &Line); 192 193 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, 194 FormatToken *CurrentToken, 195 unsigned Depth); 196 FormatStyle::PointerAlignmentStyle 197 getTokenReferenceAlignment(const FormatToken &PointerOrReference); 198 199 FormatStyle::PointerAlignmentStyle 200 getTokenPointerOrReferenceAlignment(const FormatToken &PointerOrReference); 201 202 const FormatStyle &Style; 203 204 const AdditionalKeywords &Keywords; 205 }; 206 207 } // end namespace format 208 } // end namespace clang 209 210 #endif 211