xref: /freebsd/contrib/llvm-project/llvm/include/llvm/MC/MCParser/AsmLexer.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This class declares the lexer for assembly files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_MC_MCPARSER_ASMLEXER_H
14 #define LLVM_MC_MCPARSER_ASMLEXER_H
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/MC/MCAsmMacro.h"
20 #include "llvm/Support/Compiler.h"
21 #include <cassert>
22 #include <cstddef>
23 #include <string>
24 #include <utility>
25 
26 namespace llvm {
27 
28 class MCAsmInfo;
29 
30 /// A callback class which is notified of each comment in an assembly file as
31 /// it is lexed.
32 class AsmCommentConsumer {
33 public:
34   virtual ~AsmCommentConsumer() = default;
35 
36   /// Callback function for when a comment is lexed. Loc is the start of the
37   /// comment text (excluding the comment-start marker). CommentText is the text
38   /// of the comment, excluding the comment start and end markers, and the
39   /// newline for single-line comments.
40   virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
41 };
42 
43 class AsmLexer {
44   /// The current token, stored in the base class for faster access.
45   SmallVector<AsmToken, 1> CurTok;
46 
47   const char *CurPtr = nullptr;
48   StringRef CurBuf;
49 
50   /// The location and description of the current error
51   SMLoc ErrLoc;
52   std::string Err;
53 
54   const MCAsmInfo &MAI;
55 
56   bool IsAtStartOfLine = true;
57   bool JustConsumedEOL = true;
58   bool IsPeeking = false;
59   bool EndStatementAtEOF = true;
60 
61   const char *TokStart = nullptr;
62   bool SkipSpace = true;
63   bool AllowAtInIdentifier = false;
64   bool AllowHashInIdentifier = false;
65   bool IsAtStartOfStatement = true;
66   bool LexMasmHexFloats = false;
67   bool LexMasmIntegers = false;
68   bool LexMasmStrings = false;
69   bool LexMotorolaIntegers = false;
70   bool UseMasmDefaultRadix = false;
71   unsigned DefaultRadix = 10;
72   bool LexHLASMIntegers = false;
73   bool LexHLASMStrings = false;
74   AsmCommentConsumer *CommentConsumer = nullptr;
75 
76   LLVM_ABI AsmToken LexToken();
77 
SetError(SMLoc errLoc,const std::string & err)78   void SetError(SMLoc errLoc, const std::string &err) {
79     ErrLoc = errLoc;
80     Err = err;
81   }
82 
83 public:
84   LLVM_ABI AsmLexer(const MCAsmInfo &MAI);
85   AsmLexer(const AsmLexer &) = delete;
86   AsmLexer &operator=(const AsmLexer &) = delete;
87 
88   /// Consume the next token from the input stream and return it.
89   ///
90   /// The lexer will continuously return the end-of-file token once the end of
91   /// the main input file has been reached.
Lex()92   const AsmToken &Lex() {
93     assert(!CurTok.empty());
94     // Mark if we parsing out a EndOfStatement.
95     JustConsumedEOL = CurTok.front().getKind() == AsmToken::EndOfStatement;
96     CurTok.erase(CurTok.begin());
97     // LexToken may generate multiple tokens via UnLex but will always return
98     // the first one. Place returned value at head of CurTok vector.
99     if (CurTok.empty()) {
100       AsmToken T = LexToken();
101       CurTok.insert(CurTok.begin(), T);
102     }
103     return CurTok.front();
104   }
105 
UnLex(AsmToken const & Token)106   void UnLex(AsmToken const &Token) {
107     CurTok.insert(CurTok.begin(), Token);
108   }
109 
justConsumedEOL()110   bool justConsumedEOL() { return JustConsumedEOL; }
111 
112   LLVM_ABI StringRef LexUntilEndOfStatement();
113 
114   /// Get the current source location.
getLoc()115   SMLoc getLoc() const { return SMLoc::getFromPointer(TokStart); }
116 
117   /// Get the current (last) lexed token.
getTok()118   const AsmToken &getTok() const { return CurTok[0]; }
119 
120   /// Look ahead at the next token to be lexed.
121   const AsmToken peekTok(bool ShouldSkipSpace = true) {
122     AsmToken Tok;
123 
124     MutableArrayRef<AsmToken> Buf(Tok);
125     size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
126 
127     assert(ReadCount == 1);
128     (void)ReadCount;
129 
130     return Tok;
131   }
132 
133   /// Look ahead an arbitrary number of tokens.
134   LLVM_ABI size_t peekTokens(MutableArrayRef<AsmToken> Buf,
135                              bool ShouldSkipSpace = true);
136 
137   /// Get the current error location
getErrLoc()138   SMLoc getErrLoc() { return ErrLoc; }
139 
140   /// Get the current error string
getErr()141   const std::string &getErr() { return Err; }
142 
143   /// Get the kind of current token.
getKind()144   AsmToken::TokenKind getKind() const { return getTok().getKind(); }
145 
146   /// Check if the current token has kind \p K.
is(AsmToken::TokenKind K)147   bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
148 
149   /// Check if the current token has kind \p K.
isNot(AsmToken::TokenKind K)150   bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
151 
152   /// Set whether spaces should be ignored by the lexer
setSkipSpace(bool val)153   void setSkipSpace(bool val) { SkipSpace = val; }
154 
getAllowAtInIdentifier()155   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
setAllowAtInIdentifier(bool v)156   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
157 
setAllowHashInIdentifier(bool V)158   void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
159 
setCommentConsumer(AsmCommentConsumer * CommentConsumer)160   void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
161     this->CommentConsumer = CommentConsumer;
162   }
163 
164   /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified
165   /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]).
setLexMasmIntegers(bool V)166   void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
167 
168   /// Set whether to use masm-style default-radix integer literals. If disabled,
169   /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]).
useMasmDefaultRadix(bool V)170   void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; }
171 
getMasmDefaultRadix()172   unsigned getMasmDefaultRadix() const { return DefaultRadix; }
setMasmDefaultRadix(unsigned Radix)173   void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; }
174 
175   /// Set whether to lex masm-style hex float literals, such as 3f800000r.
setLexMasmHexFloats(bool V)176   void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; }
177 
178   /// Set whether to lex masm-style string literals, such as 'Can''t find file'
179   /// and "This ""value"" not found".
setLexMasmStrings(bool V)180   void setLexMasmStrings(bool V) { LexMasmStrings = V; }
181 
182   /// Set whether to lex Motorola-style integer literals, such as $deadbeef or
183   /// %01010110.
setLexMotorolaIntegers(bool V)184   void setLexMotorolaIntegers(bool V) { LexMotorolaIntegers = V; }
185 
186   /// Set whether to lex HLASM-flavour integers. For now this is only [0-9]*
setLexHLASMIntegers(bool V)187   void setLexHLASMIntegers(bool V) { LexHLASMIntegers = V; }
188 
189   /// Set whether to "lex" HLASM-flavour character and string literals. For now,
190   /// setting this option to true, will disable lexing for character and string
191   /// literals.
setLexHLASMStrings(bool V)192   void setLexHLASMStrings(bool V) { LexHLASMStrings = V; }
193 
194   LLVM_ABI void setBuffer(StringRef Buf, const char *ptr = nullptr,
195                           bool EndStatementAtEOF = true);
196 
getMAI()197   const MCAsmInfo &getMAI() const { return MAI; }
198 
199 private:
200   bool isAtStartOfComment(const char *Ptr);
201   bool isAtStatementSeparator(const char *Ptr);
202   [[nodiscard]] int getNextChar();
203   int peekNextChar();
204   AsmToken ReturnError(const char *Loc, const std::string &Msg);
205 
206   AsmToken LexIdentifier();
207   AsmToken LexSlash();
208   AsmToken LexLineComment();
209   AsmToken LexDigit();
210   AsmToken LexSingleQuote();
211   AsmToken LexQuote();
212   AsmToken LexFloatLiteral();
213   AsmToken LexHexFloatLiteral(bool NoIntDigits);
214 
215   StringRef LexUntilEndOfLine();
216 };
217 
218 } // end namespace llvm
219 
220 #endif // LLVM_MC_MCPARSER_ASMLEXER_H
221