10b57cec5SDimitry Andric //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This class implements the lexer for assembly files. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "llvm/MC/MCParser/AsmLexer.h" 140b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 150b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 160b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h" 170b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 180b57cec5SDimitry Andric #include "llvm/ADT/StringSwitch.h" 190b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h" 200b57cec5SDimitry Andric #include "llvm/MC/MCParser/MCAsmLexer.h" 21e8d8bef9SDimitry Andric #include "llvm/Support/Compiler.h" 220b57cec5SDimitry Andric #include "llvm/Support/SMLoc.h" 230b57cec5SDimitry Andric #include "llvm/Support/SaveAndRestore.h" 240b57cec5SDimitry Andric #include <cassert> 250b57cec5SDimitry Andric #include <cctype> 260b57cec5SDimitry Andric #include <cstdio> 270b57cec5SDimitry Andric #include <cstring> 280b57cec5SDimitry Andric #include <string> 290b57cec5SDimitry Andric #include <tuple> 300b57cec5SDimitry Andric #include <utility> 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric using namespace llvm; 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { 350b57cec5SDimitry Andric AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); 36fe6060f1SDimitry Andric LexMotorolaIntegers = MAI.shouldUseMotorolaIntegers(); 370b57cec5SDimitry Andric } 380b57cec5SDimitry Andric 390b57cec5SDimitry Andric AsmLexer::~AsmLexer() = default; 400b57cec5SDimitry Andric 415ffd83dbSDimitry Andric void AsmLexer::setBuffer(StringRef Buf, const char *ptr, 425ffd83dbSDimitry Andric bool EndStatementAtEOF) { 430b57cec5SDimitry Andric CurBuf = Buf; 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric if (ptr) 460b57cec5SDimitry Andric CurPtr = ptr; 470b57cec5SDimitry Andric else 480b57cec5SDimitry Andric CurPtr = CurBuf.begin(); 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric TokStart = nullptr; 515ffd83dbSDimitry Andric this->EndStatementAtEOF = EndStatementAtEOF; 520b57cec5SDimitry Andric } 530b57cec5SDimitry Andric 540b57cec5SDimitry Andric /// ReturnError - Set the error to the specified string at the specified 550b57cec5SDimitry Andric /// location. This is defined to always return AsmToken::Error. 560b57cec5SDimitry Andric AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { 570b57cec5SDimitry Andric SetError(SMLoc::getFromPointer(Loc), Msg); 580b57cec5SDimitry Andric 590b57cec5SDimitry Andric return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc)); 600b57cec5SDimitry Andric } 610b57cec5SDimitry Andric 620b57cec5SDimitry Andric int AsmLexer::getNextChar() { 630b57cec5SDimitry Andric if (CurPtr == CurBuf.end()) 640b57cec5SDimitry Andric return EOF; 650b57cec5SDimitry Andric return (unsigned char)*CurPtr++; 660b57cec5SDimitry Andric } 670b57cec5SDimitry Andric 68e8d8bef9SDimitry Andric int AsmLexer::peekNextChar() { 69e8d8bef9SDimitry Andric if (CurPtr == CurBuf.end()) 70e8d8bef9SDimitry Andric return EOF; 71e8d8bef9SDimitry Andric return (unsigned char)*CurPtr; 72e8d8bef9SDimitry Andric } 73e8d8bef9SDimitry Andric 740b57cec5SDimitry Andric /// The leading integral digit sequence and dot should have already been 750b57cec5SDimitry Andric /// consumed, some or all of the fractional digit sequence *can* have been 760b57cec5SDimitry Andric /// consumed. 770b57cec5SDimitry Andric AsmToken AsmLexer::LexFloatLiteral() { 780b57cec5SDimitry Andric // Skip the fractional digit sequence. 790b57cec5SDimitry Andric while (isDigit(*CurPtr)) 800b57cec5SDimitry Andric ++CurPtr; 810b57cec5SDimitry Andric 820b57cec5SDimitry Andric if (*CurPtr == '-' || *CurPtr == '+') 83fe6060f1SDimitry Andric return ReturnError(CurPtr, "invalid sign in float literal"); 840b57cec5SDimitry Andric 850b57cec5SDimitry Andric // Check for exponent 860b57cec5SDimitry Andric if ((*CurPtr == 'e' || *CurPtr == 'E')) { 870b57cec5SDimitry Andric ++CurPtr; 880b57cec5SDimitry Andric 890b57cec5SDimitry Andric if (*CurPtr == '-' || *CurPtr == '+') 900b57cec5SDimitry Andric ++CurPtr; 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric while (isDigit(*CurPtr)) 930b57cec5SDimitry Andric ++CurPtr; 940b57cec5SDimitry Andric } 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric return AsmToken(AsmToken::Real, 970b57cec5SDimitry Andric StringRef(TokStart, CurPtr - TokStart)); 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ 1010b57cec5SDimitry Andric /// while making sure there are enough actual digits around for the constant to 1020b57cec5SDimitry Andric /// be valid. 1030b57cec5SDimitry Andric /// 1040b57cec5SDimitry Andric /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed 1050b57cec5SDimitry Andric /// before we get here. 1060b57cec5SDimitry Andric AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { 1070b57cec5SDimitry Andric assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') && 1080b57cec5SDimitry Andric "unexpected parse state in floating hex"); 1090b57cec5SDimitry Andric bool NoFracDigits = true; 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric // Skip the fractional part if there is one 1120b57cec5SDimitry Andric if (*CurPtr == '.') { 1130b57cec5SDimitry Andric ++CurPtr; 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric const char *FracStart = CurPtr; 1160b57cec5SDimitry Andric while (isHexDigit(*CurPtr)) 1170b57cec5SDimitry Andric ++CurPtr; 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric NoFracDigits = CurPtr == FracStart; 1200b57cec5SDimitry Andric } 1210b57cec5SDimitry Andric 1220b57cec5SDimitry Andric if (NoIntDigits && NoFracDigits) 1230b57cec5SDimitry Andric return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 1240b57cec5SDimitry Andric "expected at least one significand digit"); 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric // Make sure we do have some kind of proper exponent part 1270b57cec5SDimitry Andric if (*CurPtr != 'p' && *CurPtr != 'P') 1280b57cec5SDimitry Andric return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 1290b57cec5SDimitry Andric "expected exponent part 'p'"); 1300b57cec5SDimitry Andric ++CurPtr; 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric if (*CurPtr == '+' || *CurPtr == '-') 1330b57cec5SDimitry Andric ++CurPtr; 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric // N.b. exponent digits are *not* hex 1360b57cec5SDimitry Andric const char *ExpStart = CurPtr; 1370b57cec5SDimitry Andric while (isDigit(*CurPtr)) 1380b57cec5SDimitry Andric ++CurPtr; 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric if (CurPtr == ExpStart) 1410b57cec5SDimitry Andric return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 1420b57cec5SDimitry Andric "expected at least one exponent digit"); 1430b57cec5SDimitry Andric 1440b57cec5SDimitry Andric return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); 1450b57cec5SDimitry Andric } 1460b57cec5SDimitry Andric 147fe6060f1SDimitry Andric /// LexIdentifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]* 148fe6060f1SDimitry Andric static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) { 149fe6060f1SDimitry Andric return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' || 150fe6060f1SDimitry Andric (AllowAt && C == '@') || (AllowHash && C == '#'); 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric 1530b57cec5SDimitry Andric AsmToken AsmLexer::LexIdentifier() { 1540b57cec5SDimitry Andric // Check for floating point literals. 1550b57cec5SDimitry Andric if (CurPtr[-1] == '.' && isDigit(*CurPtr)) { 1560b57cec5SDimitry Andric // Disambiguate a .1243foo identifier from a floating literal. 1570b57cec5SDimitry Andric while (isDigit(*CurPtr)) 1580b57cec5SDimitry Andric ++CurPtr; 1590b57cec5SDimitry Andric 160fe6060f1SDimitry Andric if (!isIdentifierChar(*CurPtr, AllowAtInIdentifier, 161fe6060f1SDimitry Andric AllowHashInIdentifier) || 1620b57cec5SDimitry Andric *CurPtr == 'e' || *CurPtr == 'E') 1630b57cec5SDimitry Andric return LexFloatLiteral(); 1640b57cec5SDimitry Andric } 1650b57cec5SDimitry Andric 166fe6060f1SDimitry Andric while (isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier)) 1670b57cec5SDimitry Andric ++CurPtr; 1680b57cec5SDimitry Andric 1690b57cec5SDimitry Andric // Handle . as a special case. 1700b57cec5SDimitry Andric if (CurPtr == TokStart+1 && TokStart[0] == '.') 1710b57cec5SDimitry Andric return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); 1740b57cec5SDimitry Andric } 1750b57cec5SDimitry Andric 1760b57cec5SDimitry Andric /// LexSlash: Slash: / 1770b57cec5SDimitry Andric /// C-Style Comment: /* ... */ 178fe6060f1SDimitry Andric /// C-style Comment: // ... 1790b57cec5SDimitry Andric AsmToken AsmLexer::LexSlash() { 180fe6060f1SDimitry Andric if (!MAI.shouldAllowAdditionalComments()) { 181fe6060f1SDimitry Andric IsAtStartOfStatement = false; 182fe6060f1SDimitry Andric return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); 183fe6060f1SDimitry Andric } 184fe6060f1SDimitry Andric 1850b57cec5SDimitry Andric switch (*CurPtr) { 1860b57cec5SDimitry Andric case '*': 1870b57cec5SDimitry Andric IsAtStartOfStatement = false; 1880b57cec5SDimitry Andric break; // C style comment. 1890b57cec5SDimitry Andric case '/': 1900b57cec5SDimitry Andric ++CurPtr; 1910b57cec5SDimitry Andric return LexLineComment(); 1920b57cec5SDimitry Andric default: 1930b57cec5SDimitry Andric IsAtStartOfStatement = false; 1940b57cec5SDimitry Andric return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); 1950b57cec5SDimitry Andric } 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric // C Style comment. 1980b57cec5SDimitry Andric ++CurPtr; // skip the star. 1990b57cec5SDimitry Andric const char *CommentTextStart = CurPtr; 2000b57cec5SDimitry Andric while (CurPtr != CurBuf.end()) { 2010b57cec5SDimitry Andric switch (*CurPtr++) { 2020b57cec5SDimitry Andric case '*': 2030b57cec5SDimitry Andric // End of the comment? 2040b57cec5SDimitry Andric if (*CurPtr != '/') 2050b57cec5SDimitry Andric break; 2060b57cec5SDimitry Andric // If we have a CommentConsumer, notify it about the comment. 2070b57cec5SDimitry Andric if (CommentConsumer) { 2080b57cec5SDimitry Andric CommentConsumer->HandleComment( 2090b57cec5SDimitry Andric SMLoc::getFromPointer(CommentTextStart), 2100b57cec5SDimitry Andric StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric ++CurPtr; // End the */. 2130b57cec5SDimitry Andric return AsmToken(AsmToken::Comment, 2140b57cec5SDimitry Andric StringRef(TokStart, CurPtr - TokStart)); 2150b57cec5SDimitry Andric } 2160b57cec5SDimitry Andric } 2170b57cec5SDimitry Andric return ReturnError(TokStart, "unterminated comment"); 2180b57cec5SDimitry Andric } 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric /// LexLineComment: Comment: #[^\n]* 2210b57cec5SDimitry Andric /// : //[^\n]* 2220b57cec5SDimitry Andric AsmToken AsmLexer::LexLineComment() { 2230b57cec5SDimitry Andric // Mark This as an end of statement with a body of the 2240b57cec5SDimitry Andric // comment. While it would be nicer to leave this two tokens, 2250b57cec5SDimitry Andric // backwards compatability with TargetParsers makes keeping this in this form 2260b57cec5SDimitry Andric // better. 2270b57cec5SDimitry Andric const char *CommentTextStart = CurPtr; 2280b57cec5SDimitry Andric int CurChar = getNextChar(); 2290b57cec5SDimitry Andric while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) 2300b57cec5SDimitry Andric CurChar = getNextChar(); 231349cc55cSDimitry Andric const char *NewlinePtr = CurPtr; 2320b57cec5SDimitry Andric if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n') 2330b57cec5SDimitry Andric ++CurPtr; 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric // If we have a CommentConsumer, notify it about the comment. 2360b57cec5SDimitry Andric if (CommentConsumer) { 2370b57cec5SDimitry Andric CommentConsumer->HandleComment( 2380b57cec5SDimitry Andric SMLoc::getFromPointer(CommentTextStart), 239349cc55cSDimitry Andric StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart)); 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric IsAtStartOfLine = true; 2430b57cec5SDimitry Andric // This is a whole line comment. leave newline 2440b57cec5SDimitry Andric if (IsAtStartOfStatement) 2450b57cec5SDimitry Andric return AsmToken(AsmToken::EndOfStatement, 2460b57cec5SDimitry Andric StringRef(TokStart, CurPtr - TokStart)); 2470b57cec5SDimitry Andric IsAtStartOfStatement = true; 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric return AsmToken(AsmToken::EndOfStatement, 2500b57cec5SDimitry Andric StringRef(TokStart, CurPtr - 1 - TokStart)); 2510b57cec5SDimitry Andric } 2520b57cec5SDimitry Andric 2530b57cec5SDimitry Andric static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { 25481ad6265SDimitry Andric // Skip case-insensitive ULL, UL, U, L and LL suffixes. 25581ad6265SDimitry Andric if (CurPtr[0] == 'U' || CurPtr[0] == 'u') 2560b57cec5SDimitry Andric ++CurPtr; 25781ad6265SDimitry Andric if (CurPtr[0] == 'L' || CurPtr[0] == 'l') 2580b57cec5SDimitry Andric ++CurPtr; 25981ad6265SDimitry Andric if (CurPtr[0] == 'L' || CurPtr[0] == 'l') 2600b57cec5SDimitry Andric ++CurPtr; 2610b57cec5SDimitry Andric } 2620b57cec5SDimitry Andric 2630b57cec5SDimitry Andric // Look ahead to search for first non-hex digit, if it's [hH], then we treat the 2640b57cec5SDimitry Andric // integer as a hexadecimal, possibly with leading zeroes. 2650b57cec5SDimitry Andric static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, 2660b57cec5SDimitry Andric bool LexHex) { 2670b57cec5SDimitry Andric const char *FirstNonDec = nullptr; 2680b57cec5SDimitry Andric const char *LookAhead = CurPtr; 2690b57cec5SDimitry Andric while (true) { 2700b57cec5SDimitry Andric if (isDigit(*LookAhead)) { 2710b57cec5SDimitry Andric ++LookAhead; 2720b57cec5SDimitry Andric } else { 2730b57cec5SDimitry Andric if (!FirstNonDec) 2740b57cec5SDimitry Andric FirstNonDec = LookAhead; 2750b57cec5SDimitry Andric 2760b57cec5SDimitry Andric // Keep going if we are looking for a 'h' suffix. 2770b57cec5SDimitry Andric if (LexHex && isHexDigit(*LookAhead)) 2780b57cec5SDimitry Andric ++LookAhead; 2790b57cec5SDimitry Andric else 2800b57cec5SDimitry Andric break; 2810b57cec5SDimitry Andric } 2820b57cec5SDimitry Andric } 2830b57cec5SDimitry Andric bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H'); 2840b57cec5SDimitry Andric CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec; 2850b57cec5SDimitry Andric if (isHex) 2860b57cec5SDimitry Andric return 16; 2870b57cec5SDimitry Andric return DefaultRadix; 2880b57cec5SDimitry Andric } 2890b57cec5SDimitry Andric 290e8d8bef9SDimitry Andric static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) { 291e8d8bef9SDimitry Andric while (hexDigitValue(*CurPtr) < DefaultRadix) { 292e8d8bef9SDimitry Andric ++CurPtr; 293e8d8bef9SDimitry Andric } 294e8d8bef9SDimitry Andric return CurPtr; 295e8d8bef9SDimitry Andric } 296e8d8bef9SDimitry Andric 297e8d8bef9SDimitry Andric static AsmToken intToken(StringRef Ref, APInt &Value) { 2980b57cec5SDimitry Andric if (Value.isIntN(64)) 2990b57cec5SDimitry Andric return AsmToken(AsmToken::Integer, Ref, Value); 3000b57cec5SDimitry Andric return AsmToken(AsmToken::BigNum, Ref, Value); 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric 303e8d8bef9SDimitry Andric static std::string radixName(unsigned Radix) { 304e8d8bef9SDimitry Andric switch (Radix) { 305e8d8bef9SDimitry Andric case 2: 306e8d8bef9SDimitry Andric return "binary"; 307e8d8bef9SDimitry Andric case 8: 308e8d8bef9SDimitry Andric return "octal"; 309e8d8bef9SDimitry Andric case 10: 310e8d8bef9SDimitry Andric return "decimal"; 311e8d8bef9SDimitry Andric case 16: 312e8d8bef9SDimitry Andric return "hexadecimal"; 313e8d8bef9SDimitry Andric default: 314e8d8bef9SDimitry Andric return "base-" + std::to_string(Radix); 315e8d8bef9SDimitry Andric } 316e8d8bef9SDimitry Andric } 317e8d8bef9SDimitry Andric 3180b57cec5SDimitry Andric /// LexDigit: First character is [0-9]. 3190b57cec5SDimitry Andric /// Local Label: [0-9][:] 3200b57cec5SDimitry Andric /// Forward/Backward Label: [0-9][fb] 3210b57cec5SDimitry Andric /// Binary integer: 0b[01]+ 3220b57cec5SDimitry Andric /// Octal integer: 0[0-7]+ 3230b57cec5SDimitry Andric /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] 3240b57cec5SDimitry Andric /// Decimal integer: [1-9][0-9]* 3250b57cec5SDimitry Andric AsmToken AsmLexer::LexDigit() { 326e8d8bef9SDimitry Andric // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY]) 327e8d8bef9SDimitry Andric // MASM-flavor octal integer: [0-7]+[oOqQ] 328e8d8bef9SDimitry Andric // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT]) 3290b57cec5SDimitry Andric // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH] 3300b57cec5SDimitry Andric if (LexMasmIntegers && isdigit(CurPtr[-1])) { 331e8d8bef9SDimitry Andric const char *FirstNonBinary = 332e8d8bef9SDimitry Andric (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr; 333e8d8bef9SDimitry Andric const char *FirstNonDecimal = 334e8d8bef9SDimitry Andric (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr; 3350b57cec5SDimitry Andric const char *OldCurPtr = CurPtr; 3360b57cec5SDimitry Andric while (isHexDigit(*CurPtr)) { 337e8d8bef9SDimitry Andric switch (*CurPtr) { 338e8d8bef9SDimitry Andric default: 339e8d8bef9SDimitry Andric if (!FirstNonDecimal) { 340e8d8bef9SDimitry Andric FirstNonDecimal = CurPtr; 341e8d8bef9SDimitry Andric } 342*bdd1243dSDimitry Andric [[fallthrough]]; 343e8d8bef9SDimitry Andric case '9': 344e8d8bef9SDimitry Andric case '8': 345e8d8bef9SDimitry Andric case '7': 346e8d8bef9SDimitry Andric case '6': 347e8d8bef9SDimitry Andric case '5': 348e8d8bef9SDimitry Andric case '4': 349e8d8bef9SDimitry Andric case '3': 350e8d8bef9SDimitry Andric case '2': 351e8d8bef9SDimitry Andric if (!FirstNonBinary) { 3520b57cec5SDimitry Andric FirstNonBinary = CurPtr; 353e8d8bef9SDimitry Andric } 354e8d8bef9SDimitry Andric break; 355e8d8bef9SDimitry Andric case '1': 356e8d8bef9SDimitry Andric case '0': 357e8d8bef9SDimitry Andric break; 358e8d8bef9SDimitry Andric } 3590b57cec5SDimitry Andric ++CurPtr; 3600b57cec5SDimitry Andric } 361e8d8bef9SDimitry Andric if (*CurPtr == '.') { 362e8d8bef9SDimitry Andric // MASM float literals (other than hex floats) always contain a ".", and 363e8d8bef9SDimitry Andric // are always written in decimal. 364e8d8bef9SDimitry Andric ++CurPtr; 365e8d8bef9SDimitry Andric return LexFloatLiteral(); 366e8d8bef9SDimitry Andric } 367e8d8bef9SDimitry Andric 368e8d8bef9SDimitry Andric if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) { 369e8d8bef9SDimitry Andric ++CurPtr; 370e8d8bef9SDimitry Andric return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); 371e8d8bef9SDimitry Andric } 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric unsigned Radix = 0; 3740b57cec5SDimitry Andric if (*CurPtr == 'h' || *CurPtr == 'H') { 3750b57cec5SDimitry Andric // hexadecimal number 3760b57cec5SDimitry Andric ++CurPtr; 3770b57cec5SDimitry Andric Radix = 16; 378e8d8bef9SDimitry Andric } else if (*CurPtr == 't' || *CurPtr == 'T') { 379e8d8bef9SDimitry Andric // decimal number 380e8d8bef9SDimitry Andric ++CurPtr; 381e8d8bef9SDimitry Andric Radix = 10; 382e8d8bef9SDimitry Andric } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' || 383e8d8bef9SDimitry Andric *CurPtr == 'Q') { 384e8d8bef9SDimitry Andric // octal number 385e8d8bef9SDimitry Andric ++CurPtr; 386e8d8bef9SDimitry Andric Radix = 8; 387e8d8bef9SDimitry Andric } else if (*CurPtr == 'y' || *CurPtr == 'Y') { 388e8d8bef9SDimitry Andric // binary number 389e8d8bef9SDimitry Andric ++CurPtr; 3900b57cec5SDimitry Andric Radix = 2; 391e8d8bef9SDimitry Andric } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr && 392e8d8bef9SDimitry Andric DefaultRadix < 14 && 393e8d8bef9SDimitry Andric (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) { 394e8d8bef9SDimitry Andric Radix = 10; 395e8d8bef9SDimitry Andric } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr && 396e8d8bef9SDimitry Andric DefaultRadix < 12 && 397e8d8bef9SDimitry Andric (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) { 398e8d8bef9SDimitry Andric Radix = 2; 399e8d8bef9SDimitry Andric } 4000b57cec5SDimitry Andric 401e8d8bef9SDimitry Andric if (Radix) { 4020b57cec5SDimitry Andric StringRef Result(TokStart, CurPtr - TokStart); 4030b57cec5SDimitry Andric APInt Value(128, 0, true); 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric if (Result.drop_back().getAsInteger(Radix, Value)) 406e8d8bef9SDimitry Andric return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric // MSVC accepts and ignores type suffices on integer literals. 4090b57cec5SDimitry Andric SkipIgnoredIntegerSuffix(CurPtr); 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric return intToken(Result, Value); 4120b57cec5SDimitry Andric } 4130b57cec5SDimitry Andric 414e8d8bef9SDimitry Andric // default-radix integers, or floating point numbers, fall through 4150b57cec5SDimitry Andric CurPtr = OldCurPtr; 4160b57cec5SDimitry Andric } 4170b57cec5SDimitry Andric 418e8d8bef9SDimitry Andric // MASM default-radix integers: [0-9a-fA-F]+ 419e8d8bef9SDimitry Andric // (All other integer literals have a radix specifier.) 420e8d8bef9SDimitry Andric if (LexMasmIntegers && UseMasmDefaultRadix) { 421e8d8bef9SDimitry Andric CurPtr = findLastDigit(CurPtr, 16); 422e8d8bef9SDimitry Andric StringRef Result(TokStart, CurPtr - TokStart); 423e8d8bef9SDimitry Andric 424e8d8bef9SDimitry Andric APInt Value(128, 0, true); 425e8d8bef9SDimitry Andric if (Result.getAsInteger(DefaultRadix, Value)) { 426e8d8bef9SDimitry Andric return ReturnError(TokStart, 427e8d8bef9SDimitry Andric "invalid " + radixName(DefaultRadix) + " number"); 428e8d8bef9SDimitry Andric } 429e8d8bef9SDimitry Andric 430e8d8bef9SDimitry Andric return intToken(Result, Value); 431e8d8bef9SDimitry Andric } 432e8d8bef9SDimitry Andric 433fe6060f1SDimitry Andric // Motorola hex integers: $[0-9a-fA-F]+ 434fe6060f1SDimitry Andric if (LexMotorolaIntegers && CurPtr[-1] == '$') { 435fe6060f1SDimitry Andric const char *NumStart = CurPtr; 436fe6060f1SDimitry Andric while (isHexDigit(CurPtr[0])) 437fe6060f1SDimitry Andric ++CurPtr; 438fe6060f1SDimitry Andric 439fe6060f1SDimitry Andric APInt Result(128, 0); 440fe6060f1SDimitry Andric if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(16, Result)) 441fe6060f1SDimitry Andric return ReturnError(TokStart, "invalid hexadecimal number"); 442fe6060f1SDimitry Andric 443fe6060f1SDimitry Andric return intToken(StringRef(TokStart, CurPtr - TokStart), Result); 444fe6060f1SDimitry Andric } 445fe6060f1SDimitry Andric 446fe6060f1SDimitry Andric // Motorola binary integers: %[01]+ 447fe6060f1SDimitry Andric if (LexMotorolaIntegers && CurPtr[-1] == '%') { 448fe6060f1SDimitry Andric const char *NumStart = CurPtr; 449fe6060f1SDimitry Andric while (*CurPtr == '0' || *CurPtr == '1') 450fe6060f1SDimitry Andric ++CurPtr; 451fe6060f1SDimitry Andric 452fe6060f1SDimitry Andric APInt Result(128, 0); 453fe6060f1SDimitry Andric if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(2, Result)) 454fe6060f1SDimitry Andric return ReturnError(TokStart, "invalid binary number"); 455fe6060f1SDimitry Andric 456fe6060f1SDimitry Andric return intToken(StringRef(TokStart, CurPtr - TokStart), Result); 457fe6060f1SDimitry Andric } 458fe6060f1SDimitry Andric 4590b57cec5SDimitry Andric // Decimal integer: [1-9][0-9]* 460fe6060f1SDimitry Andric // HLASM-flavour decimal integer: [0-9][0-9]* 461fe6060f1SDimitry Andric // FIXME: Later on, support for fb for HLASM has to be added in 462fe6060f1SDimitry Andric // as they probably would be needed for asm goto 463fe6060f1SDimitry Andric if (LexHLASMIntegers || CurPtr[-1] != '0' || CurPtr[0] == '.') { 4640b57cec5SDimitry Andric unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers); 465fe6060f1SDimitry Andric 466fe6060f1SDimitry Andric if (!LexHLASMIntegers) { 467fe6060f1SDimitry Andric bool IsHex = Radix == 16; 4680b57cec5SDimitry Andric // Check for floating point literals. 469fe6060f1SDimitry Andric if (!IsHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) { 4700b57cec5SDimitry Andric if (*CurPtr == '.') 4710b57cec5SDimitry Andric ++CurPtr; 4720b57cec5SDimitry Andric return LexFloatLiteral(); 4730b57cec5SDimitry Andric } 474fe6060f1SDimitry Andric } 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric StringRef Result(TokStart, CurPtr - TokStart); 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric APInt Value(128, 0, true); 479fe6060f1SDimitry Andric if (Result.getAsInteger(Radix, Value)) 480e8d8bef9SDimitry Andric return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); 4810b57cec5SDimitry Andric 482fe6060f1SDimitry Andric if (!LexHLASMIntegers) 4830b57cec5SDimitry Andric // The darwin/x86 (and x86-64) assembler accepts and ignores type 4840b57cec5SDimitry Andric // suffices on integer literals. 4850b57cec5SDimitry Andric SkipIgnoredIntegerSuffix(CurPtr); 4860b57cec5SDimitry Andric 4870b57cec5SDimitry Andric return intToken(Result, Value); 4880b57cec5SDimitry Andric } 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) { 4910b57cec5SDimitry Andric ++CurPtr; 4920b57cec5SDimitry Andric // See if we actually have "0b" as part of something like "jmp 0b\n" 4930b57cec5SDimitry Andric if (!isDigit(CurPtr[0])) { 4940b57cec5SDimitry Andric --CurPtr; 4950b57cec5SDimitry Andric StringRef Result(TokStart, CurPtr - TokStart); 4960b57cec5SDimitry Andric return AsmToken(AsmToken::Integer, Result, 0); 4970b57cec5SDimitry Andric } 4980b57cec5SDimitry Andric const char *NumStart = CurPtr; 4990b57cec5SDimitry Andric while (CurPtr[0] == '0' || CurPtr[0] == '1') 5000b57cec5SDimitry Andric ++CurPtr; 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric // Requires at least one binary digit. 5030b57cec5SDimitry Andric if (CurPtr == NumStart) 5040b57cec5SDimitry Andric return ReturnError(TokStart, "invalid binary number"); 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric StringRef Result(TokStart, CurPtr - TokStart); 5070b57cec5SDimitry Andric 5080b57cec5SDimitry Andric APInt Value(128, 0, true); 5090b57cec5SDimitry Andric if (Result.substr(2).getAsInteger(2, Value)) 5100b57cec5SDimitry Andric return ReturnError(TokStart, "invalid binary number"); 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 5130b57cec5SDimitry Andric // suffixes on integer literals. 5140b57cec5SDimitry Andric SkipIgnoredIntegerSuffix(CurPtr); 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric return intToken(Result, Value); 5170b57cec5SDimitry Andric } 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric if ((*CurPtr == 'x') || (*CurPtr == 'X')) { 5200b57cec5SDimitry Andric ++CurPtr; 5210b57cec5SDimitry Andric const char *NumStart = CurPtr; 5220b57cec5SDimitry Andric while (isHexDigit(CurPtr[0])) 5230b57cec5SDimitry Andric ++CurPtr; 5240b57cec5SDimitry Andric 5250b57cec5SDimitry Andric // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be 5260b57cec5SDimitry Andric // diagnosed by LexHexFloatLiteral). 5270b57cec5SDimitry Andric if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P') 5280b57cec5SDimitry Andric return LexHexFloatLiteral(NumStart == CurPtr); 5290b57cec5SDimitry Andric 5300b57cec5SDimitry Andric // Otherwise requires at least one hex digit. 5310b57cec5SDimitry Andric if (CurPtr == NumStart) 5320b57cec5SDimitry Andric return ReturnError(CurPtr-2, "invalid hexadecimal number"); 5330b57cec5SDimitry Andric 5340b57cec5SDimitry Andric APInt Result(128, 0); 5350b57cec5SDimitry Andric if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) 5360b57cec5SDimitry Andric return ReturnError(TokStart, "invalid hexadecimal number"); 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric // Consume the optional [hH]. 5390b57cec5SDimitry Andric if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H')) 5400b57cec5SDimitry Andric ++CurPtr; 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 5430b57cec5SDimitry Andric // suffixes on integer literals. 5440b57cec5SDimitry Andric SkipIgnoredIntegerSuffix(CurPtr); 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric return intToken(StringRef(TokStart, CurPtr - TokStart), Result); 5470b57cec5SDimitry Andric } 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric // Either octal or hexadecimal. 5500b57cec5SDimitry Andric APInt Value(128, 0, true); 5510b57cec5SDimitry Andric unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers); 5520b57cec5SDimitry Andric StringRef Result(TokStart, CurPtr - TokStart); 5530b57cec5SDimitry Andric if (Result.getAsInteger(Radix, Value)) 554e8d8bef9SDimitry Andric return ReturnError(TokStart, "invalid " + radixName(Radix) + " number"); 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric // Consume the [hH]. 5570b57cec5SDimitry Andric if (Radix == 16) 5580b57cec5SDimitry Andric ++CurPtr; 5590b57cec5SDimitry Andric 5600b57cec5SDimitry Andric // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 5610b57cec5SDimitry Andric // suffixes on integer literals. 5620b57cec5SDimitry Andric SkipIgnoredIntegerSuffix(CurPtr); 5630b57cec5SDimitry Andric 5640b57cec5SDimitry Andric return intToken(Result, Value); 5650b57cec5SDimitry Andric } 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric /// LexSingleQuote: Integer: 'b' 5680b57cec5SDimitry Andric AsmToken AsmLexer::LexSingleQuote() { 5690b57cec5SDimitry Andric int CurChar = getNextChar(); 5700b57cec5SDimitry Andric 571fe6060f1SDimitry Andric if (LexHLASMStrings) 572fe6060f1SDimitry Andric return ReturnError(TokStart, "invalid usage of character literals"); 573fe6060f1SDimitry Andric 574e8d8bef9SDimitry Andric if (LexMasmStrings) { 575e8d8bef9SDimitry Andric while (CurChar != EOF) { 576e8d8bef9SDimitry Andric if (CurChar != '\'') { 577e8d8bef9SDimitry Andric CurChar = getNextChar(); 578e8d8bef9SDimitry Andric } else if (peekNextChar() == '\'') { 579e8d8bef9SDimitry Andric // In MASM single-quote strings, doubled single-quotes mean an escaped 580e8d8bef9SDimitry Andric // single quote, so should be lexed in. 581e8d8bef9SDimitry Andric getNextChar(); 582e8d8bef9SDimitry Andric CurChar = getNextChar(); 583e8d8bef9SDimitry Andric } else { 584e8d8bef9SDimitry Andric break; 585e8d8bef9SDimitry Andric } 586e8d8bef9SDimitry Andric } 587e8d8bef9SDimitry Andric if (CurChar == EOF) 588e8d8bef9SDimitry Andric return ReturnError(TokStart, "unterminated string constant"); 589e8d8bef9SDimitry Andric return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); 590e8d8bef9SDimitry Andric } 591e8d8bef9SDimitry Andric 5920b57cec5SDimitry Andric if (CurChar == '\\') 5930b57cec5SDimitry Andric CurChar = getNextChar(); 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric if (CurChar == EOF) 5960b57cec5SDimitry Andric return ReturnError(TokStart, "unterminated single quote"); 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric CurChar = getNextChar(); 5990b57cec5SDimitry Andric 6000b57cec5SDimitry Andric if (CurChar != '\'') 6010b57cec5SDimitry Andric return ReturnError(TokStart, "single quote way too long"); 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric // The idea here being that 'c' is basically just an integral 6040b57cec5SDimitry Andric // constant. 6050b57cec5SDimitry Andric StringRef Res = StringRef(TokStart,CurPtr - TokStart); 6060b57cec5SDimitry Andric long long Value; 6070b57cec5SDimitry Andric 6080b57cec5SDimitry Andric if (Res.startswith("\'\\")) { 6090b57cec5SDimitry Andric char theChar = Res[2]; 6100b57cec5SDimitry Andric switch (theChar) { 6110b57cec5SDimitry Andric default: Value = theChar; break; 6120b57cec5SDimitry Andric case '\'': Value = '\''; break; 6130b57cec5SDimitry Andric case 't': Value = '\t'; break; 6140b57cec5SDimitry Andric case 'n': Value = '\n'; break; 6150b57cec5SDimitry Andric case 'b': Value = '\b'; break; 616fe6060f1SDimitry Andric case 'f': Value = '\f'; break; 617fe6060f1SDimitry Andric case 'r': Value = '\r'; break; 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric } else 6200b57cec5SDimitry Andric Value = TokStart[1]; 6210b57cec5SDimitry Andric 6220b57cec5SDimitry Andric return AsmToken(AsmToken::Integer, Res, Value); 6230b57cec5SDimitry Andric } 6240b57cec5SDimitry Andric 6250b57cec5SDimitry Andric /// LexQuote: String: "..." 6260b57cec5SDimitry Andric AsmToken AsmLexer::LexQuote() { 6270b57cec5SDimitry Andric int CurChar = getNextChar(); 628fe6060f1SDimitry Andric if (LexHLASMStrings) 629fe6060f1SDimitry Andric return ReturnError(TokStart, "invalid usage of string literals"); 630fe6060f1SDimitry Andric 631e8d8bef9SDimitry Andric if (LexMasmStrings) { 632e8d8bef9SDimitry Andric while (CurChar != EOF) { 633e8d8bef9SDimitry Andric if (CurChar != '"') { 634e8d8bef9SDimitry Andric CurChar = getNextChar(); 635e8d8bef9SDimitry Andric } else if (peekNextChar() == '"') { 636e8d8bef9SDimitry Andric // In MASM double-quoted strings, doubled double-quotes mean an escaped 637e8d8bef9SDimitry Andric // double quote, so should be lexed in. 638e8d8bef9SDimitry Andric getNextChar(); 639e8d8bef9SDimitry Andric CurChar = getNextChar(); 640e8d8bef9SDimitry Andric } else { 641e8d8bef9SDimitry Andric break; 642e8d8bef9SDimitry Andric } 643e8d8bef9SDimitry Andric } 644e8d8bef9SDimitry Andric if (CurChar == EOF) 645e8d8bef9SDimitry Andric return ReturnError(TokStart, "unterminated string constant"); 646e8d8bef9SDimitry Andric return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); 647e8d8bef9SDimitry Andric } 648e8d8bef9SDimitry Andric 6490b57cec5SDimitry Andric // TODO: does gas allow multiline string constants? 6500b57cec5SDimitry Andric while (CurChar != '"') { 6510b57cec5SDimitry Andric if (CurChar == '\\') { 6520b57cec5SDimitry Andric // Allow \", etc. 6530b57cec5SDimitry Andric CurChar = getNextChar(); 6540b57cec5SDimitry Andric } 6550b57cec5SDimitry Andric 6560b57cec5SDimitry Andric if (CurChar == EOF) 6570b57cec5SDimitry Andric return ReturnError(TokStart, "unterminated string constant"); 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric CurChar = getNextChar(); 6600b57cec5SDimitry Andric } 6610b57cec5SDimitry Andric 6620b57cec5SDimitry Andric return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); 6630b57cec5SDimitry Andric } 6640b57cec5SDimitry Andric 6650b57cec5SDimitry Andric StringRef AsmLexer::LexUntilEndOfStatement() { 6660b57cec5SDimitry Andric TokStart = CurPtr; 6670b57cec5SDimitry Andric 6680b57cec5SDimitry Andric while (!isAtStartOfComment(CurPtr) && // Start of line comment. 6690b57cec5SDimitry Andric !isAtStatementSeparator(CurPtr) && // End of statement marker. 6700b57cec5SDimitry Andric *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { 6710b57cec5SDimitry Andric ++CurPtr; 6720b57cec5SDimitry Andric } 6730b57cec5SDimitry Andric return StringRef(TokStart, CurPtr-TokStart); 6740b57cec5SDimitry Andric } 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric StringRef AsmLexer::LexUntilEndOfLine() { 6770b57cec5SDimitry Andric TokStart = CurPtr; 6780b57cec5SDimitry Andric 6790b57cec5SDimitry Andric while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { 6800b57cec5SDimitry Andric ++CurPtr; 6810b57cec5SDimitry Andric } 6820b57cec5SDimitry Andric return StringRef(TokStart, CurPtr-TokStart); 6830b57cec5SDimitry Andric } 6840b57cec5SDimitry Andric 6850b57cec5SDimitry Andric size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, 6860b57cec5SDimitry Andric bool ShouldSkipSpace) { 687*bdd1243dSDimitry Andric SaveAndRestore SavedTokenStart(TokStart); 688*bdd1243dSDimitry Andric SaveAndRestore SavedCurPtr(CurPtr); 689*bdd1243dSDimitry Andric SaveAndRestore SavedAtStartOfLine(IsAtStartOfLine); 690*bdd1243dSDimitry Andric SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement); 691*bdd1243dSDimitry Andric SaveAndRestore SavedSkipSpace(SkipSpace, ShouldSkipSpace); 692*bdd1243dSDimitry Andric SaveAndRestore SavedIsPeeking(IsPeeking, true); 6930b57cec5SDimitry Andric std::string SavedErr = getErr(); 6940b57cec5SDimitry Andric SMLoc SavedErrLoc = getErrLoc(); 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric size_t ReadCount; 6970b57cec5SDimitry Andric for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { 6980b57cec5SDimitry Andric AsmToken Token = LexToken(); 6990b57cec5SDimitry Andric 7000b57cec5SDimitry Andric Buf[ReadCount] = Token; 7010b57cec5SDimitry Andric 7020b57cec5SDimitry Andric if (Token.is(AsmToken::Eof)) 7030b57cec5SDimitry Andric break; 7040b57cec5SDimitry Andric } 7050b57cec5SDimitry Andric 7060b57cec5SDimitry Andric SetError(SavedErrLoc, SavedErr); 7070b57cec5SDimitry Andric return ReadCount; 7080b57cec5SDimitry Andric } 7090b57cec5SDimitry Andric 7100b57cec5SDimitry Andric bool AsmLexer::isAtStartOfComment(const char *Ptr) { 711fe6060f1SDimitry Andric if (MAI.getRestrictCommentStringToStartOfStatement() && !IsAtStartOfStatement) 712fe6060f1SDimitry Andric return false; 713fe6060f1SDimitry Andric 7140b57cec5SDimitry Andric StringRef CommentString = MAI.getCommentString(); 7150b57cec5SDimitry Andric 7160b57cec5SDimitry Andric if (CommentString.size() == 1) 7170b57cec5SDimitry Andric return CommentString[0] == Ptr[0]; 7180b57cec5SDimitry Andric 719*bdd1243dSDimitry Andric // Allow # preprocessor comments also be counted as comments for "##" cases 7200b57cec5SDimitry Andric if (CommentString[1] == '#') 7210b57cec5SDimitry Andric return CommentString[0] == Ptr[0]; 7220b57cec5SDimitry Andric 7230b57cec5SDimitry Andric return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0; 7240b57cec5SDimitry Andric } 7250b57cec5SDimitry Andric 7260b57cec5SDimitry Andric bool AsmLexer::isAtStatementSeparator(const char *Ptr) { 7270b57cec5SDimitry Andric return strncmp(Ptr, MAI.getSeparatorString(), 7280b57cec5SDimitry Andric strlen(MAI.getSeparatorString())) == 0; 7290b57cec5SDimitry Andric } 7300b57cec5SDimitry Andric 7310b57cec5SDimitry Andric AsmToken AsmLexer::LexToken() { 7320b57cec5SDimitry Andric TokStart = CurPtr; 7330b57cec5SDimitry Andric // This always consumes at least one character. 7340b57cec5SDimitry Andric int CurChar = getNextChar(); 7350b57cec5SDimitry Andric 7360b57cec5SDimitry Andric if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) { 7370b57cec5SDimitry Andric // If this starts with a '#', this may be a cpp 7380b57cec5SDimitry Andric // hash directive and otherwise a line comment. 7390b57cec5SDimitry Andric AsmToken TokenBuf[2]; 7400b57cec5SDimitry Andric MutableArrayRef<AsmToken> Buf(TokenBuf, 2); 7410b57cec5SDimitry Andric size_t num = peekTokens(Buf, true); 7420b57cec5SDimitry Andric // There cannot be a space preceding this 7430b57cec5SDimitry Andric if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) && 7440b57cec5SDimitry Andric TokenBuf[1].is(AsmToken::String)) { 7450b57cec5SDimitry Andric CurPtr = TokStart; // reset curPtr; 7460b57cec5SDimitry Andric StringRef s = LexUntilEndOfLine(); 7470b57cec5SDimitry Andric UnLex(TokenBuf[1]); 7480b57cec5SDimitry Andric UnLex(TokenBuf[0]); 7490b57cec5SDimitry Andric return AsmToken(AsmToken::HashDirective, s); 7500b57cec5SDimitry Andric } 751fe6060f1SDimitry Andric 752fe6060f1SDimitry Andric if (MAI.shouldAllowAdditionalComments()) 7530b57cec5SDimitry Andric return LexLineComment(); 7540b57cec5SDimitry Andric } 7550b57cec5SDimitry Andric 7560b57cec5SDimitry Andric if (isAtStartOfComment(TokStart)) 7570b57cec5SDimitry Andric return LexLineComment(); 7580b57cec5SDimitry Andric 7590b57cec5SDimitry Andric if (isAtStatementSeparator(TokStart)) { 7600b57cec5SDimitry Andric CurPtr += strlen(MAI.getSeparatorString()) - 1; 7610b57cec5SDimitry Andric IsAtStartOfLine = true; 7620b57cec5SDimitry Andric IsAtStartOfStatement = true; 7630b57cec5SDimitry Andric return AsmToken(AsmToken::EndOfStatement, 7640b57cec5SDimitry Andric StringRef(TokStart, strlen(MAI.getSeparatorString()))); 7650b57cec5SDimitry Andric } 7660b57cec5SDimitry Andric 7670b57cec5SDimitry Andric // If we're missing a newline at EOF, make sure we still get an 7680b57cec5SDimitry Andric // EndOfStatement token before the Eof token. 7695ffd83dbSDimitry Andric if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) { 7700b57cec5SDimitry Andric IsAtStartOfLine = true; 7710b57cec5SDimitry Andric IsAtStartOfStatement = true; 772e8d8bef9SDimitry Andric return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); 7730b57cec5SDimitry Andric } 7740b57cec5SDimitry Andric IsAtStartOfLine = false; 7750b57cec5SDimitry Andric bool OldIsAtStartOfStatement = IsAtStartOfStatement; 7760b57cec5SDimitry Andric IsAtStartOfStatement = false; 7770b57cec5SDimitry Andric switch (CurChar) { 7780b57cec5SDimitry Andric default: 779fe6060f1SDimitry Andric // Handle identifier: [a-zA-Z_.?][a-zA-Z0-9_$.@#?]* 780fe6060f1SDimitry Andric if (isalpha(CurChar) || CurChar == '_' || CurChar == '.' || 781fe6060f1SDimitry Andric (MAI.doesAllowQuestionAtStartOfIdentifier() && CurChar == '?')) 7825ffd83dbSDimitry Andric return LexIdentifier(); 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric // Unknown character, emit an error. 7850b57cec5SDimitry Andric return ReturnError(TokStart, "invalid character in input"); 7860b57cec5SDimitry Andric case EOF: 7875ffd83dbSDimitry Andric if (EndStatementAtEOF) { 7880b57cec5SDimitry Andric IsAtStartOfLine = true; 7890b57cec5SDimitry Andric IsAtStartOfStatement = true; 7905ffd83dbSDimitry Andric } 7910b57cec5SDimitry Andric return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); 7920b57cec5SDimitry Andric case 0: 7930b57cec5SDimitry Andric case ' ': 7940b57cec5SDimitry Andric case '\t': 7950b57cec5SDimitry Andric IsAtStartOfStatement = OldIsAtStartOfStatement; 7960b57cec5SDimitry Andric while (*CurPtr == ' ' || *CurPtr == '\t') 7970b57cec5SDimitry Andric CurPtr++; 7980b57cec5SDimitry Andric if (SkipSpace) 7990b57cec5SDimitry Andric return LexToken(); // Ignore whitespace. 8000b57cec5SDimitry Andric else 8010b57cec5SDimitry Andric return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); 8020b57cec5SDimitry Andric case '\r': { 8030b57cec5SDimitry Andric IsAtStartOfLine = true; 8040b57cec5SDimitry Andric IsAtStartOfStatement = true; 8050b57cec5SDimitry Andric // If this is a CR followed by LF, treat that as one token. 8060b57cec5SDimitry Andric if (CurPtr != CurBuf.end() && *CurPtr == '\n') 8070b57cec5SDimitry Andric ++CurPtr; 8080b57cec5SDimitry Andric return AsmToken(AsmToken::EndOfStatement, 8090b57cec5SDimitry Andric StringRef(TokStart, CurPtr - TokStart)); 8100b57cec5SDimitry Andric } 8110b57cec5SDimitry Andric case '\n': 8120b57cec5SDimitry Andric IsAtStartOfLine = true; 8130b57cec5SDimitry Andric IsAtStartOfStatement = true; 8140b57cec5SDimitry Andric return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 8150b57cec5SDimitry Andric case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); 8160b57cec5SDimitry Andric case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); 8170b57cec5SDimitry Andric case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); 8180b57cec5SDimitry Andric case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); 8190b57cec5SDimitry Andric case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); 8200b57cec5SDimitry Andric case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); 8210b57cec5SDimitry Andric case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); 8220b57cec5SDimitry Andric case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); 8230b57cec5SDimitry Andric case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); 8240b57cec5SDimitry Andric case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); 8250b57cec5SDimitry Andric case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); 826fe6060f1SDimitry Andric case '$': { 827fe6060f1SDimitry Andric if (LexMotorolaIntegers && isHexDigit(*CurPtr)) 828fe6060f1SDimitry Andric return LexDigit(); 829fe6060f1SDimitry Andric if (MAI.doesAllowDollarAtStartOfIdentifier()) 830fe6060f1SDimitry Andric return LexIdentifier(); 831fe6060f1SDimitry Andric return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); 832fe6060f1SDimitry Andric } 833fe6060f1SDimitry Andric case '@': { 834fe6060f1SDimitry Andric if (MAI.doesAllowAtAtStartOfIdentifier()) 835fe6060f1SDimitry Andric return LexIdentifier(); 836fe6060f1SDimitry Andric return AsmToken(AsmToken::At, StringRef(TokStart, 1)); 837fe6060f1SDimitry Andric } 8380b57cec5SDimitry Andric case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); 8390b57cec5SDimitry Andric case '=': 8400b57cec5SDimitry Andric if (*CurPtr == '=') { 8410b57cec5SDimitry Andric ++CurPtr; 8420b57cec5SDimitry Andric return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); 8430b57cec5SDimitry Andric } 8440b57cec5SDimitry Andric return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); 8450b57cec5SDimitry Andric case '-': 8460b57cec5SDimitry Andric if (*CurPtr == '>') { 8470b57cec5SDimitry Andric ++CurPtr; 8480b57cec5SDimitry Andric return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2)); 8490b57cec5SDimitry Andric } 8500b57cec5SDimitry Andric return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); 8510b57cec5SDimitry Andric case '|': 8520b57cec5SDimitry Andric if (*CurPtr == '|') { 8530b57cec5SDimitry Andric ++CurPtr; 8540b57cec5SDimitry Andric return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); 8550b57cec5SDimitry Andric } 8560b57cec5SDimitry Andric return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); 8570b57cec5SDimitry Andric case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); 8580b57cec5SDimitry Andric case '&': 8590b57cec5SDimitry Andric if (*CurPtr == '&') { 8600b57cec5SDimitry Andric ++CurPtr; 8610b57cec5SDimitry Andric return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); 8620b57cec5SDimitry Andric } 8630b57cec5SDimitry Andric return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); 8640b57cec5SDimitry Andric case '!': 8650b57cec5SDimitry Andric if (*CurPtr == '=') { 8660b57cec5SDimitry Andric ++CurPtr; 8670b57cec5SDimitry Andric return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); 8680b57cec5SDimitry Andric } 8690b57cec5SDimitry Andric return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); 8700b57cec5SDimitry Andric case '%': 871fe6060f1SDimitry Andric if (LexMotorolaIntegers && (*CurPtr == '0' || *CurPtr == '1')) { 872fe6060f1SDimitry Andric return LexDigit(); 873fe6060f1SDimitry Andric } 874fe6060f1SDimitry Andric 8750b57cec5SDimitry Andric if (MAI.hasMipsExpressions()) { 8760b57cec5SDimitry Andric AsmToken::TokenKind Operator; 8770b57cec5SDimitry Andric unsigned OperatorLength; 8780b57cec5SDimitry Andric 8790b57cec5SDimitry Andric std::tie(Operator, OperatorLength) = 8800b57cec5SDimitry Andric StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>( 8810b57cec5SDimitry Andric StringRef(CurPtr)) 8820b57cec5SDimitry Andric .StartsWith("call16", {AsmToken::PercentCall16, 7}) 8830b57cec5SDimitry Andric .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8}) 8840b57cec5SDimitry Andric .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8}) 8850b57cec5SDimitry Andric .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10}) 8860b57cec5SDimitry Andric .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10}) 8870b57cec5SDimitry Andric .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9}) 8880b57cec5SDimitry Andric .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7}) 8890b57cec5SDimitry Andric .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7}) 8900b57cec5SDimitry Andric .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9}) 8910b57cec5SDimitry Andric .StartsWith("got_page", {AsmToken::PercentGot_Page, 9}) 8920b57cec5SDimitry Andric .StartsWith("gottprel", {AsmToken::PercentGottprel, 9}) 8930b57cec5SDimitry Andric .StartsWith("got", {AsmToken::PercentGot, 4}) 8940b57cec5SDimitry Andric .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7}) 8950b57cec5SDimitry Andric .StartsWith("higher", {AsmToken::PercentHigher, 7}) 8960b57cec5SDimitry Andric .StartsWith("highest", {AsmToken::PercentHighest, 8}) 8970b57cec5SDimitry Andric .StartsWith("hi", {AsmToken::PercentHi, 3}) 8980b57cec5SDimitry Andric .StartsWith("lo", {AsmToken::PercentLo, 3}) 8990b57cec5SDimitry Andric .StartsWith("neg", {AsmToken::PercentNeg, 4}) 9000b57cec5SDimitry Andric .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9}) 9010b57cec5SDimitry Andric .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9}) 9020b57cec5SDimitry Andric .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6}) 9030b57cec5SDimitry Andric .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7}) 9040b57cec5SDimitry Andric .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9}) 9050b57cec5SDimitry Andric .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9}) 9060b57cec5SDimitry Andric .Default({AsmToken::Percent, 1}); 9070b57cec5SDimitry Andric 9080b57cec5SDimitry Andric if (Operator != AsmToken::Percent) { 9090b57cec5SDimitry Andric CurPtr += OperatorLength - 1; 9100b57cec5SDimitry Andric return AsmToken(Operator, StringRef(TokStart, OperatorLength)); 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric } 9130b57cec5SDimitry Andric return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); 9140b57cec5SDimitry Andric case '/': 9150b57cec5SDimitry Andric IsAtStartOfStatement = OldIsAtStartOfStatement; 9160b57cec5SDimitry Andric return LexSlash(); 917fe6060f1SDimitry Andric case '#': { 918fe6060f1SDimitry Andric if (MAI.doesAllowHashAtStartOfIdentifier()) 919fe6060f1SDimitry Andric return LexIdentifier(); 920fe6060f1SDimitry Andric return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 921fe6060f1SDimitry Andric } 9220b57cec5SDimitry Andric case '\'': return LexSingleQuote(); 9230b57cec5SDimitry Andric case '"': return LexQuote(); 9240b57cec5SDimitry Andric case '0': case '1': case '2': case '3': case '4': 9250b57cec5SDimitry Andric case '5': case '6': case '7': case '8': case '9': 9260b57cec5SDimitry Andric return LexDigit(); 9270b57cec5SDimitry Andric case '<': 9280b57cec5SDimitry Andric switch (*CurPtr) { 9290b57cec5SDimitry Andric case '<': 9300b57cec5SDimitry Andric ++CurPtr; 9310b57cec5SDimitry Andric return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2)); 9320b57cec5SDimitry Andric case '=': 9330b57cec5SDimitry Andric ++CurPtr; 9340b57cec5SDimitry Andric return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2)); 9350b57cec5SDimitry Andric case '>': 9360b57cec5SDimitry Andric ++CurPtr; 9370b57cec5SDimitry Andric return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2)); 9380b57cec5SDimitry Andric default: 9390b57cec5SDimitry Andric return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); 9400b57cec5SDimitry Andric } 9410b57cec5SDimitry Andric case '>': 9420b57cec5SDimitry Andric switch (*CurPtr) { 9430b57cec5SDimitry Andric case '>': 9440b57cec5SDimitry Andric ++CurPtr; 9450b57cec5SDimitry Andric return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2)); 9460b57cec5SDimitry Andric case '=': 9470b57cec5SDimitry Andric ++CurPtr; 9480b57cec5SDimitry Andric return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2)); 9490b57cec5SDimitry Andric default: 9500b57cec5SDimitry Andric return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); 9510b57cec5SDimitry Andric } 9520b57cec5SDimitry Andric 9530b57cec5SDimitry Andric // TODO: Quoted identifiers (objc methods etc) 9540b57cec5SDimitry Andric // local labels: [0-9][:] 9550b57cec5SDimitry Andric // Forward/backward labels: [0-9][fb] 9560b57cec5SDimitry Andric // Integers, fp constants, character constants. 9570b57cec5SDimitry Andric } 9580b57cec5SDimitry Andric } 959