1 //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines the \c FormatTokenSource interface, which provides a token 11 /// stream as well as the ability to manipulate the token stream. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 17 18 #include "UnwrappedLineParser.h" 19 20 #define DEBUG_TYPE "format-token-source" 21 22 namespace clang { 23 namespace format { 24 25 // Navigate a token stream. 26 // 27 // Enables traversal of a token stream, resetting the position in a token 28 // stream, as well as inserting new tokens. 29 class FormatTokenSource { 30 public: 31 virtual ~FormatTokenSource() {} 32 33 // Returns the next token in the token stream. 34 virtual FormatToken *getNextToken() = 0; 35 36 // Returns the token preceding the token returned by the last call to 37 // getNextToken() in the token stream, or nullptr if no such token exists. 38 // 39 // Must not be called directly at the position directly after insertTokens() 40 // is called. 41 virtual FormatToken *getPreviousToken() = 0; 42 43 // Returns the token that would be returned by the next call to 44 // getNextToken(). 45 virtual FormatToken *peekNextToken(bool SkipComment = false) = 0; 46 47 // Returns whether we are at the end of the file. 48 // This can be different from whether getNextToken() returned an eof token 49 // when the FormatTokenSource is a view on a part of the token stream. 50 virtual bool isEOF() = 0; 51 52 // Gets the current position in the token stream, to be used by setPosition(). 53 // 54 // Note that the value of the position is not meaningful, and specifically 55 // should not be used to get relative token positions. 56 virtual unsigned getPosition() = 0; 57 58 // Resets the token stream to the state it was in when getPosition() returned 59 // Position, and return the token at that position in the stream. 60 virtual FormatToken *setPosition(unsigned Position) = 0; 61 62 // Insert the given tokens before the current position. 63 // Returns the first token in \c Tokens. 64 // The next returned token will be the second token in \c Tokens. 65 // Requires the last token in Tokens to be EOF; once the EOF token is reached, 66 // the next token will be the last token returned by getNextToken(); 67 // 68 // For example, given the token sequence 'a1 a2': 69 // getNextToken() -> a1 70 // insertTokens('b1 b2') -> b1 71 // getNextToken() -> b2 72 // getNextToken() -> a1 73 // getNextToken() -> a2 74 virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0; 75 76 [[nodiscard]] FormatToken *getNextNonComment() { 77 FormatToken *Tok; 78 do { 79 Tok = getNextToken(); 80 assert(Tok); 81 } while (Tok->is(tok::comment)); 82 return Tok; 83 } 84 }; 85 86 class IndexedTokenSource : public FormatTokenSource { 87 public: 88 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 89 : Tokens(Tokens), Position(-1) {} 90 91 FormatToken *getNextToken() override { 92 if (Position >= 0 && isEOF()) { 93 LLVM_DEBUG({ 94 llvm::dbgs() << "Next "; 95 dbgToken(Position); 96 }); 97 return Tokens[Position]; 98 } 99 Position = successor(Position); 100 LLVM_DEBUG({ 101 llvm::dbgs() << "Next "; 102 dbgToken(Position); 103 }); 104 return Tokens[Position]; 105 } 106 107 FormatToken *getPreviousToken() override { 108 assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof)); 109 return Position > 0 ? Tokens[Position - 1] : nullptr; 110 } 111 112 FormatToken *peekNextToken(bool SkipComment = false) override { 113 if (isEOF()) 114 return Tokens[Position]; 115 int Next = successor(Position); 116 if (SkipComment) 117 while (Tokens[Next]->is(tok::comment)) 118 Next = successor(Next); 119 LLVM_DEBUG({ 120 llvm::dbgs() << "Peeking "; 121 dbgToken(Next); 122 }); 123 return Tokens[Next]; 124 } 125 126 bool isEOF() override { 127 return Position == -1 ? false : Tokens[Position]->is(tok::eof); 128 } 129 130 unsigned getPosition() override { 131 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 132 assert(Position >= 0); 133 return Position; 134 } 135 136 FormatToken *setPosition(unsigned P) override { 137 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 138 Position = P; 139 return Tokens[Position]; 140 } 141 142 FormatToken *insertTokens(ArrayRef<FormatToken *> New) override { 143 assert(Position != -1); 144 assert((*New.rbegin())->Tok.is(tok::eof)); 145 int Next = Tokens.size(); 146 Tokens.append(New.begin(), New.end()); 147 LLVM_DEBUG({ 148 llvm::dbgs() << "Inserting:\n"; 149 for (int I = Next, E = Tokens.size(); I != E; ++I) 150 dbgToken(I, " "); 151 llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> " 152 << Position << "\n"; 153 }); 154 Jumps[Tokens.size() - 1] = Position; 155 Position = Next; 156 LLVM_DEBUG({ 157 llvm::dbgs() << "At inserted token "; 158 dbgToken(Position); 159 }); 160 return Tokens[Position]; 161 } 162 163 void reset() { Position = -1; } 164 165 private: 166 int successor(int Current) const { 167 int Next = Current + 1; 168 auto it = Jumps.find(Next); 169 if (it != Jumps.end()) { 170 Next = it->second; 171 assert(!Jumps.contains(Next)); 172 } 173 return Next; 174 } 175 176 void dbgToken(int Position, StringRef Indent = "") { 177 FormatToken *Tok = Tokens[Position]; 178 llvm::dbgs() << Indent << "[" << Position 179 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 180 << ", Macro: " << !!Tok->MacroCtx << "\n"; 181 } 182 183 SmallVector<FormatToken *> Tokens; 184 int Position; 185 186 // Maps from position a to position b, so that when we reach a, the token 187 // stream continues at position b instead. 188 llvm::DenseMap<int, int> Jumps; 189 }; 190 191 class ScopedMacroState : public FormatTokenSource { 192 public: 193 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 194 FormatToken *&ResetToken) 195 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 196 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 197 Token(nullptr), PreviousToken(nullptr) { 198 FakeEOF.Tok.startToken(); 199 FakeEOF.Tok.setKind(tok::eof); 200 TokenSource = this; 201 Line.Level = 0; 202 Line.InPPDirective = true; 203 // InMacroBody gets set after the `#define x` part. 204 } 205 206 ~ScopedMacroState() override { 207 TokenSource = PreviousTokenSource; 208 ResetToken = Token; 209 Line.InPPDirective = false; 210 Line.InMacroBody = false; 211 Line.Level = PreviousLineLevel; 212 } 213 214 FormatToken *getNextToken() override { 215 // The \c UnwrappedLineParser guards against this by never calling 216 // \c getNextToken() after it has encountered the first eof token. 217 assert(!eof()); 218 PreviousToken = Token; 219 Token = PreviousTokenSource->getNextToken(); 220 if (eof()) 221 return &FakeEOF; 222 return Token; 223 } 224 225 FormatToken *getPreviousToken() override { 226 return PreviousTokenSource->getPreviousToken(); 227 } 228 229 FormatToken *peekNextToken(bool SkipComment) override { 230 if (eof()) 231 return &FakeEOF; 232 return PreviousTokenSource->peekNextToken(SkipComment); 233 } 234 235 bool isEOF() override { return PreviousTokenSource->isEOF(); } 236 237 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 238 239 FormatToken *setPosition(unsigned Position) override { 240 PreviousToken = nullptr; 241 Token = PreviousTokenSource->setPosition(Position); 242 return Token; 243 } 244 245 FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override { 246 llvm_unreachable("Cannot insert tokens while parsing a macro."); 247 return nullptr; 248 } 249 250 private: 251 bool eof() { 252 return Token && Token->HasUnescapedNewline && 253 !continuesLineComment(*Token, PreviousToken, 254 /*MinColumnToken=*/PreviousToken); 255 } 256 257 FormatToken FakeEOF; 258 UnwrappedLine &Line; 259 FormatTokenSource *&TokenSource; 260 FormatToken *&ResetToken; 261 unsigned PreviousLineLevel; 262 FormatTokenSource *PreviousTokenSource; 263 264 FormatToken *Token; 265 FormatToken *PreviousToken; 266 }; 267 268 } // namespace format 269 } // namespace clang 270 271 #undef DEBUG_TYPE 272 273 #endif 274