1 //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines the \c FormatTokenSource interface, which provides a token 11 /// stream as well as the ability to manipulate the token stream. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H 17 18 #include "FormatToken.h" 19 #include "UnwrappedLineParser.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include <cstddef> 22 23 #define DEBUG_TYPE "format-token-source" 24 25 namespace clang { 26 namespace format { 27 28 // Navigate a token stream. 29 // 30 // Enables traversal of a token stream, resetting the position in a token 31 // stream, as well as inserting new tokens. 32 class FormatTokenSource { 33 public: 34 virtual ~FormatTokenSource() {} 35 36 // Returns the next token in the token stream. 37 virtual FormatToken *getNextToken() = 0; 38 39 // Returns the token preceding the token returned by the last call to 40 // getNextToken() in the token stream, or nullptr if no such token exists. 41 // 42 // Must not be called directly at the position directly after insertTokens() 43 // is called. 44 virtual FormatToken *getPreviousToken() = 0; 45 46 // Returns the token that would be returned by the next call to 47 // getNextToken(). 48 virtual FormatToken *peekNextToken(bool SkipComment = false) = 0; 49 50 // Returns whether we are at the end of the file. 51 // This can be different from whether getNextToken() returned an eof token 52 // when the FormatTokenSource is a view on a part of the token stream. 53 virtual bool isEOF() = 0; 54 55 // Gets the current position in the token stream, to be used by setPosition(). 56 // 57 // Note that the value of the position is not meaningful, and specifically 58 // should not be used to get relative token positions. 59 virtual unsigned getPosition() = 0; 60 61 // Resets the token stream to the state it was in when getPosition() returned 62 // Position, and return the token at that position in the stream. 63 virtual FormatToken *setPosition(unsigned Position) = 0; 64 65 // Insert the given tokens before the current position. 66 // Returns the first token in \c Tokens. 67 // The next returned token will be the second token in \c Tokens. 68 // Requires the last token in Tokens to be EOF; once the EOF token is reached, 69 // the next token will be the last token returned by getNextToken(); 70 // 71 // For example, given the token sequence 'a1 a2': 72 // getNextToken() -> a1 73 // insertTokens('b1 b2') -> b1 74 // getNextToken() -> b2 75 // getNextToken() -> a1 76 // getNextToken() -> a2 77 virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0; 78 }; 79 80 class IndexedTokenSource : public FormatTokenSource { 81 public: 82 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 83 : Tokens(Tokens), Position(-1) {} 84 85 FormatToken *getNextToken() override { 86 if (Position >= 0 && isEOF()) { 87 LLVM_DEBUG({ 88 llvm::dbgs() << "Next "; 89 dbgToken(Position); 90 }); 91 return Tokens[Position]; 92 } 93 Position = successor(Position); 94 LLVM_DEBUG({ 95 llvm::dbgs() << "Next "; 96 dbgToken(Position); 97 }); 98 return Tokens[Position]; 99 } 100 101 FormatToken *getPreviousToken() override { 102 assert(Position <= 0 || !Tokens[Position - 1]->is(tok::eof)); 103 return Position > 0 ? Tokens[Position - 1] : nullptr; 104 } 105 106 FormatToken *peekNextToken(bool SkipComment = false) override { 107 if (isEOF()) 108 return Tokens[Position]; 109 int Next = successor(Position); 110 if (SkipComment) 111 while (Tokens[Next]->is(tok::comment)) 112 Next = successor(Next); 113 LLVM_DEBUG({ 114 llvm::dbgs() << "Peeking "; 115 dbgToken(Next); 116 }); 117 return Tokens[Next]; 118 } 119 120 bool isEOF() override { 121 return Position == -1 ? false : Tokens[Position]->is(tok::eof); 122 } 123 124 unsigned getPosition() override { 125 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 126 assert(Position >= 0); 127 return Position; 128 } 129 130 FormatToken *setPosition(unsigned P) override { 131 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 132 Position = P; 133 return Tokens[Position]; 134 } 135 136 FormatToken *insertTokens(ArrayRef<FormatToken *> New) override { 137 assert(Position != -1); 138 assert((*New.rbegin())->Tok.is(tok::eof)); 139 int Next = Tokens.size(); 140 Tokens.append(New.begin(), New.end()); 141 LLVM_DEBUG({ 142 llvm::dbgs() << "Inserting:\n"; 143 for (int I = Next, E = Tokens.size(); I != E; ++I) 144 dbgToken(I, " "); 145 llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> " 146 << Position << "\n"; 147 }); 148 Jumps[Tokens.size() - 1] = Position; 149 Position = Next; 150 LLVM_DEBUG({ 151 llvm::dbgs() << "At inserted token "; 152 dbgToken(Position); 153 }); 154 return Tokens[Position]; 155 } 156 157 void reset() { Position = -1; } 158 159 private: 160 int successor(int Current) const { 161 int Next = Current + 1; 162 auto it = Jumps.find(Next); 163 if (it != Jumps.end()) { 164 Next = it->second; 165 assert(!Jumps.contains(Next)); 166 } 167 return Next; 168 } 169 170 void dbgToken(int Position, llvm::StringRef Indent = "") { 171 FormatToken *Tok = Tokens[Position]; 172 llvm::dbgs() << Indent << "[" << Position 173 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 174 << ", Macro: " << !!Tok->MacroCtx << "\n"; 175 } 176 177 SmallVector<FormatToken *> Tokens; 178 int Position; 179 180 // Maps from position a to position b, so that when we reach a, the token 181 // stream continues at position b instead. 182 llvm::DenseMap<int, int> Jumps; 183 }; 184 185 class ScopedMacroState : public FormatTokenSource { 186 public: 187 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 188 FormatToken *&ResetToken) 189 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 190 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 191 Token(nullptr), PreviousToken(nullptr) { 192 FakeEOF.Tok.startToken(); 193 FakeEOF.Tok.setKind(tok::eof); 194 TokenSource = this; 195 Line.Level = 0; 196 Line.InPPDirective = true; 197 // InMacroBody gets set after the `#define x` part. 198 } 199 200 ~ScopedMacroState() override { 201 TokenSource = PreviousTokenSource; 202 ResetToken = Token; 203 Line.InPPDirective = false; 204 Line.InMacroBody = false; 205 Line.Level = PreviousLineLevel; 206 } 207 208 FormatToken *getNextToken() override { 209 // The \c UnwrappedLineParser guards against this by never calling 210 // \c getNextToken() after it has encountered the first eof token. 211 assert(!eof()); 212 PreviousToken = Token; 213 Token = PreviousTokenSource->getNextToken(); 214 if (eof()) 215 return &FakeEOF; 216 return Token; 217 } 218 219 FormatToken *getPreviousToken() override { 220 return PreviousTokenSource->getPreviousToken(); 221 } 222 223 FormatToken *peekNextToken(bool SkipComment) override { 224 if (eof()) 225 return &FakeEOF; 226 return PreviousTokenSource->peekNextToken(SkipComment); 227 } 228 229 bool isEOF() override { return PreviousTokenSource->isEOF(); } 230 231 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 232 233 FormatToken *setPosition(unsigned Position) override { 234 PreviousToken = nullptr; 235 Token = PreviousTokenSource->setPosition(Position); 236 return Token; 237 } 238 239 FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override { 240 llvm_unreachable("Cannot insert tokens while parsing a macro."); 241 return nullptr; 242 } 243 244 private: 245 bool eof() { 246 return Token && Token->HasUnescapedNewline && 247 !continuesLineComment(*Token, PreviousToken, 248 /*MinColumnToken=*/PreviousToken); 249 } 250 251 FormatToken FakeEOF; 252 UnwrappedLine &Line; 253 FormatTokenSource *&TokenSource; 254 FormatToken *&ResetToken; 255 unsigned PreviousLineLevel; 256 FormatTokenSource *PreviousTokenSource; 257 258 FormatToken *Token; 259 FormatToken *PreviousToken; 260 }; 261 262 } // namespace format 263 } // namespace clang 264 265 #undef DEBUG_TYPE 266 267 #endif 268