xref: /freebsd/contrib/llvm-project/clang/lib/Format/FormatTokenSource.h (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file defines the \c FormatTokenSource interface, which provides a token
11 /// stream as well as the ability to manipulate the token stream.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
17 
18 #include "UnwrappedLineParser.h"
19 
20 #define DEBUG_TYPE "format-token-source"
21 
22 namespace clang {
23 namespace format {
24 
25 // Navigate a token stream.
26 //
27 // Enables traversal of a token stream, resetting the position in a token
28 // stream, as well as inserting new tokens.
29 class FormatTokenSource {
30 public:
31   virtual ~FormatTokenSource() {}
32 
33   // Returns the next token in the token stream.
34   virtual FormatToken *getNextToken() = 0;
35 
36   // Returns the token preceding the token returned by the last call to
37   // getNextToken() in the token stream, or nullptr if no such token exists.
38   //
39   // Must not be called directly at the position directly after insertTokens()
40   // is called.
41   virtual FormatToken *getPreviousToken() = 0;
42 
43   // Returns the token that would be returned by the next call to
44   // getNextToken().
45   virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
46 
47   // Returns whether we are at the end of the file.
48   // This can be different from whether getNextToken() returned an eof token
49   // when the FormatTokenSource is a view on a part of the token stream.
50   virtual bool isEOF() = 0;
51 
52   // Gets the current position in the token stream, to be used by setPosition().
53   //
54   // Note that the value of the position is not meaningful, and specifically
55   // should not be used to get relative token positions.
56   virtual unsigned getPosition() = 0;
57 
58   // Resets the token stream to the state it was in when getPosition() returned
59   // Position, and return the token at that position in the stream.
60   virtual FormatToken *setPosition(unsigned Position) = 0;
61 
62   // Insert the given tokens before the current position.
63   // Returns the first token in \c Tokens.
64   // The next returned token will be the second token in \c Tokens.
65   // Requires the last token in Tokens to be EOF; once the EOF token is reached,
66   // the next token will be the last token returned by getNextToken();
67   //
68   // For example, given the token sequence 'a1 a2':
69   // getNextToken() -> a1
70   // insertTokens('b1 b2') -> b1
71   // getNextToken() -> b2
72   // getNextToken() -> a1
73   // getNextToken() -> a2
74   virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
75 
76   [[nodiscard]] FormatToken *getNextNonComment() {
77     FormatToken *Tok;
78     do {
79       Tok = getNextToken();
80       assert(Tok);
81     } while (Tok->is(tok::comment));
82     return Tok;
83   }
84 };
85 
86 class IndexedTokenSource : public FormatTokenSource {
87 public:
88   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
89       : Tokens(Tokens), Position(-1) {}
90 
91   FormatToken *getNextToken() override {
92     if (Position >= 0 && isEOF()) {
93       LLVM_DEBUG({
94         llvm::dbgs() << "Next ";
95         dbgToken(Position);
96       });
97       return Tokens[Position];
98     }
99     Position = successor(Position);
100     LLVM_DEBUG({
101       llvm::dbgs() << "Next ";
102       dbgToken(Position);
103     });
104     return Tokens[Position];
105   }
106 
107   FormatToken *getPreviousToken() override {
108     assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof));
109     return Position > 0 ? Tokens[Position - 1] : nullptr;
110   }
111 
112   FormatToken *peekNextToken(bool SkipComment = false) override {
113     if (isEOF())
114       return Tokens[Position];
115     int Next = successor(Position);
116     if (SkipComment)
117       while (Tokens[Next]->is(tok::comment))
118         Next = successor(Next);
119     LLVM_DEBUG({
120       llvm::dbgs() << "Peeking ";
121       dbgToken(Next);
122     });
123     return Tokens[Next];
124   }
125 
126   bool isEOF() override {
127     return Position == -1 ? false : Tokens[Position]->is(tok::eof);
128   }
129 
130   unsigned getPosition() override {
131     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
132     assert(Position >= 0);
133     return Position;
134   }
135 
136   FormatToken *setPosition(unsigned P) override {
137     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
138     Position = P;
139     return Tokens[Position];
140   }
141 
142   FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
143     assert(Position != -1);
144     assert((*New.rbegin())->Tok.is(tok::eof));
145     int Next = Tokens.size();
146     Tokens.append(New.begin(), New.end());
147     LLVM_DEBUG({
148       llvm::dbgs() << "Inserting:\n";
149       for (int I = Next, E = Tokens.size(); I != E; ++I)
150         dbgToken(I, "  ");
151       llvm::dbgs() << "  Jump from: " << (Tokens.size() - 1) << " -> "
152                    << Position << "\n";
153     });
154     Jumps[Tokens.size() - 1] = Position;
155     Position = Next;
156     LLVM_DEBUG({
157       llvm::dbgs() << "At inserted token ";
158       dbgToken(Position);
159     });
160     return Tokens[Position];
161   }
162 
163   void reset() { Position = -1; }
164 
165 private:
166   int successor(int Current) const {
167     int Next = Current + 1;
168     auto it = Jumps.find(Next);
169     if (it != Jumps.end()) {
170       Next = it->second;
171       assert(!Jumps.contains(Next));
172     }
173     return Next;
174   }
175 
176   void dbgToken(int Position, StringRef Indent = "") {
177     FormatToken *Tok = Tokens[Position];
178     llvm::dbgs() << Indent << "[" << Position
179                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
180                  << ", Macro: " << !!Tok->MacroCtx << "\n";
181   }
182 
183   SmallVector<FormatToken *> Tokens;
184   int Position;
185 
186   // Maps from position a to position b, so that when we reach a, the token
187   // stream continues at position b instead.
188   llvm::DenseMap<int, int> Jumps;
189 };
190 
191 class ScopedMacroState : public FormatTokenSource {
192 public:
193   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
194                    FormatToken *&ResetToken)
195       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
196         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
197         Token(nullptr), PreviousToken(nullptr) {
198     FakeEOF.Tok.startToken();
199     FakeEOF.Tok.setKind(tok::eof);
200     TokenSource = this;
201     Line.Level = 0;
202     Line.InPPDirective = true;
203     // InMacroBody gets set after the `#define x` part.
204   }
205 
206   ~ScopedMacroState() override {
207     TokenSource = PreviousTokenSource;
208     ResetToken = Token;
209     Line.InPPDirective = false;
210     Line.InMacroBody = false;
211     Line.Level = PreviousLineLevel;
212   }
213 
214   FormatToken *getNextToken() override {
215     // The \c UnwrappedLineParser guards against this by never calling
216     // \c getNextToken() after it has encountered the first eof token.
217     assert(!eof());
218     PreviousToken = Token;
219     Token = PreviousTokenSource->getNextToken();
220     if (eof())
221       return &FakeEOF;
222     return Token;
223   }
224 
225   FormatToken *getPreviousToken() override {
226     return PreviousTokenSource->getPreviousToken();
227   }
228 
229   FormatToken *peekNextToken(bool SkipComment) override {
230     if (eof())
231       return &FakeEOF;
232     return PreviousTokenSource->peekNextToken(SkipComment);
233   }
234 
235   bool isEOF() override { return PreviousTokenSource->isEOF(); }
236 
237   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
238 
239   FormatToken *setPosition(unsigned Position) override {
240     PreviousToken = nullptr;
241     Token = PreviousTokenSource->setPosition(Position);
242     return Token;
243   }
244 
245   FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
246     llvm_unreachable("Cannot insert tokens while parsing a macro.");
247     return nullptr;
248   }
249 
250 private:
251   bool eof() {
252     return Token && Token->HasUnescapedNewline &&
253            !continuesLineComment(*Token, PreviousToken,
254                                  /*MinColumnToken=*/PreviousToken);
255   }
256 
257   FormatToken FakeEOF;
258   UnwrappedLine &Line;
259   FormatTokenSource *&TokenSource;
260   FormatToken *&ResetToken;
261   unsigned PreviousLineLevel;
262   FormatTokenSource *PreviousTokenSource;
263 
264   FormatToken *Token;
265   FormatToken *PreviousToken;
266 };
267 
268 } // namespace format
269 } // namespace clang
270 
271 #undef DEBUG_TYPE
272 
273 #endif
274