1 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 10 #define LLVM_MC_MCPARSER_MCASMLEXER_H 11 12 #include "llvm/ADT/ArrayRef.h" 13 #include "llvm/ADT/SmallVector.h" 14 #include "llvm/MC/MCAsmMacro.h" 15 #include <cassert> 16 #include <cstddef> 17 #include <string> 18 19 namespace llvm { 20 21 /// A callback class which is notified of each comment in an assembly file as 22 /// it is lexed. 23 class AsmCommentConsumer { 24 public: 25 virtual ~AsmCommentConsumer() = default; 26 27 /// Callback function for when a comment is lexed. Loc is the start of the 28 /// comment text (excluding the comment-start marker). CommentText is the text 29 /// of the comment, excluding the comment start and end markers, and the 30 /// newline for single-line comments. 31 virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0; 32 }; 33 34 35 /// Generic assembler lexer interface, for use by target specific assembly 36 /// lexers. 37 class MCAsmLexer { 38 /// The current token, stored in the base class for faster access. 39 SmallVector<AsmToken, 1> CurTok; 40 41 /// The location and description of the current error 42 SMLoc ErrLoc; 43 std::string Err; 44 45 protected: // Can only create subclasses. 46 const char *TokStart = nullptr; 47 bool SkipSpace = true; 48 bool AllowAtInIdentifier = false; 49 bool AllowHashInIdentifier = false; 50 bool IsAtStartOfStatement = true; 51 bool LexMasmHexFloats = false; 52 bool LexMasmIntegers = false; 53 bool LexMasmStrings = false; 54 bool LexMotorolaIntegers = false; 55 bool UseMasmDefaultRadix = false; 56 unsigned DefaultRadix = 10; 57 bool LexHLASMIntegers = false; 58 bool LexHLASMStrings = false; 59 AsmCommentConsumer *CommentConsumer = nullptr; 60 61 MCAsmLexer(); 62 63 virtual AsmToken LexToken() = 0; 64 SetError(SMLoc errLoc,const std::string & err)65 void SetError(SMLoc errLoc, const std::string &err) { 66 ErrLoc = errLoc; 67 Err = err; 68 } 69 70 public: 71 MCAsmLexer(const MCAsmLexer &) = delete; 72 MCAsmLexer &operator=(const MCAsmLexer &) = delete; 73 virtual ~MCAsmLexer(); 74 75 /// Consume the next token from the input stream and return it. 76 /// 77 /// The lexer will continuously return the end-of-file token once the end of 78 /// the main input file has been reached. Lex()79 const AsmToken &Lex() { 80 assert(!CurTok.empty()); 81 // Mark if we parsing out a EndOfStatement. 82 IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement; 83 CurTok.erase(CurTok.begin()); 84 // LexToken may generate multiple tokens via UnLex but will always return 85 // the first one. Place returned value at head of CurTok vector. 86 if (CurTok.empty()) { 87 AsmToken T = LexToken(); 88 CurTok.insert(CurTok.begin(), T); 89 } 90 return CurTok.front(); 91 } 92 UnLex(AsmToken const & Token)93 void UnLex(AsmToken const &Token) { 94 IsAtStartOfStatement = false; 95 CurTok.insert(CurTok.begin(), Token); 96 } 97 isAtStartOfStatement()98 bool isAtStartOfStatement() { return IsAtStartOfStatement; } 99 100 virtual StringRef LexUntilEndOfStatement() = 0; 101 102 /// Get the current source location. 103 SMLoc getLoc() const; 104 105 /// Get the current (last) lexed token. getTok()106 const AsmToken &getTok() const { 107 return CurTok[0]; 108 } 109 110 /// Look ahead at the next token to be lexed. 111 const AsmToken peekTok(bool ShouldSkipSpace = true) { 112 AsmToken Tok; 113 114 MutableArrayRef<AsmToken> Buf(Tok); 115 size_t ReadCount = peekTokens(Buf, ShouldSkipSpace); 116 117 assert(ReadCount == 1); 118 (void)ReadCount; 119 120 return Tok; 121 } 122 123 /// Look ahead an arbitrary number of tokens. 124 virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf, 125 bool ShouldSkipSpace = true) = 0; 126 127 /// Get the current error location getErrLoc()128 SMLoc getErrLoc() { 129 return ErrLoc; 130 } 131 132 /// Get the current error string getErr()133 const std::string &getErr() { 134 return Err; 135 } 136 137 /// Get the kind of current token. getKind()138 AsmToken::TokenKind getKind() const { return getTok().getKind(); } 139 140 /// Check if the current token has kind \p K. is(AsmToken::TokenKind K)141 bool is(AsmToken::TokenKind K) const { return getTok().is(K); } 142 143 /// Check if the current token has kind \p K. isNot(AsmToken::TokenKind K)144 bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); } 145 146 /// Set whether spaces should be ignored by the lexer setSkipSpace(bool val)147 void setSkipSpace(bool val) { SkipSpace = val; } 148 getAllowAtInIdentifier()149 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } setAllowAtInIdentifier(bool v)150 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } 151 setAllowHashInIdentifier(bool V)152 void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; } 153 setCommentConsumer(AsmCommentConsumer * CommentConsumer)154 void setCommentConsumer(AsmCommentConsumer *CommentConsumer) { 155 this->CommentConsumer = CommentConsumer; 156 } 157 158 /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified 159 /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]). setLexMasmIntegers(bool V)160 void setLexMasmIntegers(bool V) { LexMasmIntegers = V; } 161 162 /// Set whether to use masm-style default-radix integer literals. If disabled, 163 /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]). useMasmDefaultRadix(bool V)164 void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; } 165 getMasmDefaultRadix()166 unsigned getMasmDefaultRadix() const { return DefaultRadix; } setMasmDefaultRadix(unsigned Radix)167 void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; } 168 169 /// Set whether to lex masm-style hex float literals, such as 3f800000r. setLexMasmHexFloats(bool V)170 void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; } 171 172 /// Set whether to lex masm-style string literals, such as 'Can''t find file' 173 /// and "This ""value"" not found". setLexMasmStrings(bool V)174 void setLexMasmStrings(bool V) { LexMasmStrings = V; } 175 176 /// Set whether to lex Motorola-style integer literals, such as $deadbeef or 177 /// %01010110. setLexMotorolaIntegers(bool V)178 void setLexMotorolaIntegers(bool V) { LexMotorolaIntegers = V; } 179 180 /// Set whether to lex HLASM-flavour integers. For now this is only [0-9]* setLexHLASMIntegers(bool V)181 void setLexHLASMIntegers(bool V) { LexHLASMIntegers = V; } 182 183 /// Set whether to "lex" HLASM-flavour character and string literals. For now, 184 /// setting this option to true, will disable lexing for character and string 185 /// literals. setLexHLASMStrings(bool V)186 void setLexHLASMStrings(bool V) { LexHLASMStrings = V; } 187 }; 188 189 } // end namespace llvm 190 191 #endif // LLVM_MC_MCPARSER_MCASMLEXER_H 192