1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a token annotator, i.e. creates 11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 17 18 #include "UnwrappedLineParser.h" 19 #include "clang/Format/Format.h" 20 21 namespace clang { 22 namespace format { 23 24 enum LineType { 25 LT_Invalid, 26 LT_ImportStatement, 27 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 28 LT_ObjCMethodDecl, 29 LT_ObjCProperty, // An @property line. 30 LT_Other, 31 LT_PreprocessorDirective, 32 LT_VirtualFunctionDecl, 33 LT_ArrayOfStructInitializer, 34 }; 35 36 class AnnotatedLine { 37 public: 38 AnnotatedLine(const UnwrappedLine &Line) 39 : First(Line.Tokens.front().Tok), Level(Line.Level), 40 MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), 41 MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), 42 InPPDirective(Line.InPPDirective), 43 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), 44 IsMultiVariableDeclStmt(false), Affected(false), 45 LeadingEmptyLinesAffected(false), ChildrenAffected(false), 46 FirstStartColumn(Line.FirstStartColumn) { 47 assert(!Line.Tokens.empty()); 48 49 // Calculate Next and Previous for all tokens. Note that we must overwrite 50 // Next and Previous for every token, as previous formatting runs might have 51 // left them in a different state. 52 First->Previous = nullptr; 53 FormatToken *Current = First; 54 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) { 55 Current->Next = Node.Tok; 56 Node.Tok->Previous = Current; 57 Current = Current->Next; 58 Current->Children.clear(); 59 for (const auto &Child : Node.Children) { 60 Children.push_back(new AnnotatedLine(Child)); 61 Current->Children.push_back(Children.back()); 62 } 63 } 64 Last = Current; 65 Last->Next = nullptr; 66 } 67 68 ~AnnotatedLine() { 69 for (AnnotatedLine *Child : Children) 70 delete Child; 71 FormatToken *Current = First; 72 while (Current) { 73 Current->Children.clear(); 74 Current->Role.reset(); 75 Current = Current->Next; 76 } 77 } 78 79 /// \c true if this line starts with the given tokens in order, ignoring 80 /// comments. 81 template <typename... Ts> bool startsWith(Ts... Tokens) const { 82 return First && First->startsSequence(Tokens...); 83 } 84 85 /// \c true if this line ends with the given tokens in reversed order, 86 /// ignoring comments. 87 /// For example, given tokens [T1, T2, T3, ...], the function returns true if 88 /// this line is like "... T3 T2 T1". 89 template <typename... Ts> bool endsWith(Ts... Tokens) const { 90 return Last && Last->endsSequence(Tokens...); 91 } 92 93 /// \c true if this line looks like a function definition instead of a 94 /// function declaration. Asserts MightBeFunctionDecl. 95 bool mightBeFunctionDefinition() const { 96 assert(MightBeFunctionDecl); 97 // Try to determine if the end of a stream of tokens is either the 98 // Definition or the Declaration for a function. It does this by looking for 99 // the ';' in foo(); and using that it ends with a ; to know this is the 100 // Definition, however the line could end with 101 // foo(); /* comment */ 102 // or 103 // foo(); // comment 104 // or 105 // foo() // comment 106 // endsWith() ignores the comment. 107 return !endsWith(tok::semi); 108 } 109 110 /// \c true if this line starts a namespace definition. 111 bool startsWithNamespace() const { 112 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) || 113 startsWith(tok::kw_inline, tok::kw_namespace) || 114 startsWith(tok::kw_export, tok::kw_namespace); 115 } 116 117 FormatToken *First; 118 FormatToken *Last; 119 120 SmallVector<AnnotatedLine *, 0> Children; 121 122 LineType Type; 123 unsigned Level; 124 size_t MatchingOpeningBlockLineIndex; 125 size_t MatchingClosingBlockLineIndex; 126 bool InPPDirective; 127 bool MustBeDeclaration; 128 bool MightBeFunctionDecl; 129 bool IsMultiVariableDeclStmt; 130 131 /// \c True if this line should be formatted, i.e. intersects directly or 132 /// indirectly with one of the input ranges. 133 bool Affected; 134 135 /// \c True if the leading empty lines of this line intersect with one of the 136 /// input ranges. 137 bool LeadingEmptyLinesAffected; 138 139 /// \c True if one of this line's children intersects with an input range. 140 bool ChildrenAffected; 141 142 unsigned FirstStartColumn; 143 144 private: 145 // Disallow copying. 146 AnnotatedLine(const AnnotatedLine &) = delete; 147 void operator=(const AnnotatedLine &) = delete; 148 }; 149 150 /// Determines extra information about the tokens comprising an 151 /// \c UnwrappedLine. 152 class TokenAnnotator { 153 public: 154 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) 155 : Style(Style), Keywords(Keywords) {} 156 157 /// Adapts the indent levels of comment lines to the indent of the 158 /// subsequent line. 159 // FIXME: Can/should this be done in the UnwrappedLineParser? 160 void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines); 161 162 void annotate(AnnotatedLine &Line); 163 void calculateFormattingInformation(AnnotatedLine &Line); 164 165 private: 166 /// Calculate the penalty for splitting before \c Tok. 167 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, 168 bool InFunctionDecl); 169 170 bool spaceRequiredBeforeParens(const FormatToken &Right) const; 171 172 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, 173 const FormatToken &Right); 174 175 bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right); 176 177 bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right); 178 179 bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right); 180 181 bool mustBreakForReturnType(const AnnotatedLine &Line) const; 182 183 void printDebugInfo(const AnnotatedLine &Line); 184 185 void calculateUnbreakableTailLengths(AnnotatedLine &Line); 186 187 void calculateArrayInitializerColumnList(AnnotatedLine &Line); 188 189 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, 190 FormatToken *CurrentToken, 191 unsigned Depth); 192 FormatStyle::PointerAlignmentStyle 193 getTokenReferenceAlignment(const FormatToken &PointerOrReference); 194 195 FormatStyle::PointerAlignmentStyle 196 getTokenPointerOrReferenceAlignment(const FormatToken &PointerOrReference); 197 198 const FormatStyle &Style; 199 200 const AdditionalKeywords &Keywords; 201 }; 202 203 } // end namespace format 204 } // end namespace clang 205 206 #endif 207