1 //===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This class declares the lexer for assembly files. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_MC_MCPARSER_ASMLEXER_H 14 #define LLVM_MC_MCPARSER_ASMLEXER_H 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/MC/MCAsmMacro.h" 20 #include "llvm/Support/Compiler.h" 21 #include <cassert> 22 #include <cstddef> 23 #include <string> 24 #include <utility> 25 26 namespace llvm { 27 28 class MCAsmInfo; 29 30 /// A callback class which is notified of each comment in an assembly file as 31 /// it is lexed. 32 class AsmCommentConsumer { 33 public: 34 virtual ~AsmCommentConsumer() = default; 35 36 /// Callback function for when a comment is lexed. Loc is the start of the 37 /// comment text (excluding the comment-start marker). CommentText is the text 38 /// of the comment, excluding the comment start and end markers, and the 39 /// newline for single-line comments. 40 virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0; 41 }; 42 43 class AsmLexer { 44 /// The current token, stored in the base class for faster access. 45 SmallVector<AsmToken, 1> CurTok; 46 47 const char *CurPtr = nullptr; 48 StringRef CurBuf; 49 50 /// The location and description of the current error 51 SMLoc ErrLoc; 52 std::string Err; 53 54 const MCAsmInfo &MAI; 55 56 bool IsAtStartOfLine = true; 57 bool JustConsumedEOL = true; 58 bool IsPeeking = false; 59 bool EndStatementAtEOF = true; 60 61 const char *TokStart = nullptr; 62 bool SkipSpace = true; 63 bool AllowAtInIdentifier = false; 64 bool AllowHashInIdentifier = false; 65 bool IsAtStartOfStatement = true; 66 bool LexMasmHexFloats = false; 67 bool LexMasmIntegers = false; 68 bool LexMasmStrings = false; 69 bool LexMotorolaIntegers = false; 70 bool UseMasmDefaultRadix = false; 71 unsigned DefaultRadix = 10; 72 bool LexHLASMIntegers = false; 73 bool LexHLASMStrings = false; 74 AsmCommentConsumer *CommentConsumer = nullptr; 75 76 LLVM_ABI AsmToken LexToken(); 77 SetError(SMLoc errLoc,const std::string & err)78 void SetError(SMLoc errLoc, const std::string &err) { 79 ErrLoc = errLoc; 80 Err = err; 81 } 82 83 public: 84 LLVM_ABI AsmLexer(const MCAsmInfo &MAI); 85 AsmLexer(const AsmLexer &) = delete; 86 AsmLexer &operator=(const AsmLexer &) = delete; 87 88 /// Consume the next token from the input stream and return it. 89 /// 90 /// The lexer will continuously return the end-of-file token once the end of 91 /// the main input file has been reached. Lex()92 const AsmToken &Lex() { 93 assert(!CurTok.empty()); 94 // Mark if we parsing out a EndOfStatement. 95 JustConsumedEOL = CurTok.front().getKind() == AsmToken::EndOfStatement; 96 CurTok.erase(CurTok.begin()); 97 // LexToken may generate multiple tokens via UnLex but will always return 98 // the first one. Place returned value at head of CurTok vector. 99 if (CurTok.empty()) { 100 AsmToken T = LexToken(); 101 CurTok.insert(CurTok.begin(), T); 102 } 103 return CurTok.front(); 104 } 105 UnLex(AsmToken const & Token)106 void UnLex(AsmToken const &Token) { 107 CurTok.insert(CurTok.begin(), Token); 108 } 109 justConsumedEOL()110 bool justConsumedEOL() { return JustConsumedEOL; } 111 112 LLVM_ABI StringRef LexUntilEndOfStatement(); 113 114 /// Get the current source location. getLoc()115 SMLoc getLoc() const { return SMLoc::getFromPointer(TokStart); } 116 117 /// Get the current (last) lexed token. getTok()118 const AsmToken &getTok() const { return CurTok[0]; } 119 120 /// Look ahead at the next token to be lexed. 121 const AsmToken peekTok(bool ShouldSkipSpace = true) { 122 AsmToken Tok; 123 124 MutableArrayRef<AsmToken> Buf(Tok); 125 size_t ReadCount = peekTokens(Buf, ShouldSkipSpace); 126 127 assert(ReadCount == 1); 128 (void)ReadCount; 129 130 return Tok; 131 } 132 133 /// Look ahead an arbitrary number of tokens. 134 LLVM_ABI size_t peekTokens(MutableArrayRef<AsmToken> Buf, 135 bool ShouldSkipSpace = true); 136 137 /// Get the current error location getErrLoc()138 SMLoc getErrLoc() { return ErrLoc; } 139 140 /// Get the current error string getErr()141 const std::string &getErr() { return Err; } 142 143 /// Get the kind of current token. getKind()144 AsmToken::TokenKind getKind() const { return getTok().getKind(); } 145 146 /// Check if the current token has kind \p K. is(AsmToken::TokenKind K)147 bool is(AsmToken::TokenKind K) const { return getTok().is(K); } 148 149 /// Check if the current token has kind \p K. isNot(AsmToken::TokenKind K)150 bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); } 151 152 /// Set whether spaces should be ignored by the lexer setSkipSpace(bool val)153 void setSkipSpace(bool val) { SkipSpace = val; } 154 getAllowAtInIdentifier()155 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } setAllowAtInIdentifier(bool v)156 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } 157 setAllowHashInIdentifier(bool V)158 void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; } 159 setCommentConsumer(AsmCommentConsumer * CommentConsumer)160 void setCommentConsumer(AsmCommentConsumer *CommentConsumer) { 161 this->CommentConsumer = CommentConsumer; 162 } 163 164 /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified 165 /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]). setLexMasmIntegers(bool V)166 void setLexMasmIntegers(bool V) { LexMasmIntegers = V; } 167 168 /// Set whether to use masm-style default-radix integer literals. If disabled, 169 /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]). useMasmDefaultRadix(bool V)170 void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; } 171 getMasmDefaultRadix()172 unsigned getMasmDefaultRadix() const { return DefaultRadix; } setMasmDefaultRadix(unsigned Radix)173 void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; } 174 175 /// Set whether to lex masm-style hex float literals, such as 3f800000r. setLexMasmHexFloats(bool V)176 void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; } 177 178 /// Set whether to lex masm-style string literals, such as 'Can''t find file' 179 /// and "This ""value"" not found". setLexMasmStrings(bool V)180 void setLexMasmStrings(bool V) { LexMasmStrings = V; } 181 182 /// Set whether to lex Motorola-style integer literals, such as $deadbeef or 183 /// %01010110. setLexMotorolaIntegers(bool V)184 void setLexMotorolaIntegers(bool V) { LexMotorolaIntegers = V; } 185 186 /// Set whether to lex HLASM-flavour integers. For now this is only [0-9]* setLexHLASMIntegers(bool V)187 void setLexHLASMIntegers(bool V) { LexHLASMIntegers = V; } 188 189 /// Set whether to "lex" HLASM-flavour character and string literals. For now, 190 /// setting this option to true, will disable lexing for character and string 191 /// literals. setLexHLASMStrings(bool V)192 void setLexHLASMStrings(bool V) { LexHLASMStrings = V; } 193 194 LLVM_ABI void setBuffer(StringRef Buf, const char *ptr = nullptr, 195 bool EndStatementAtEOF = true); 196 getMAI()197 const MCAsmInfo &getMAI() const { return MAI; } 198 199 private: 200 bool isAtStartOfComment(const char *Ptr); 201 bool isAtStatementSeparator(const char *Ptr); 202 [[nodiscard]] int getNextChar(); 203 int peekNextChar(); 204 AsmToken ReturnError(const char *Loc, const std::string &Msg); 205 206 AsmToken LexIdentifier(); 207 AsmToken LexSlash(); 208 AsmToken LexLineComment(); 209 AsmToken LexDigit(); 210 AsmToken LexSingleQuote(); 211 AsmToken LexQuote(); 212 AsmToken LexFloatLiteral(); 213 AsmToken LexHexFloatLiteral(bool NoIntDigits); 214 215 StringRef LexUntilEndOfLine(); 216 }; 217 218 } // end namespace llvm 219 220 #endif // LLVM_MC_MCPARSER_ASMLEXER_H 221