xref: /freebsd/contrib/llvm-project/llvm/lib/MC/MCParser/AsmLexer.cpp (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
10b57cec5SDimitry Andric //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This class implements the lexer for assembly files.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "llvm/MC/MCParser/AsmLexer.h"
140b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
150b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
160b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h"
170b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
180b57cec5SDimitry Andric #include "llvm/ADT/StringSwitch.h"
190b57cec5SDimitry Andric #include "llvm/MC/MCAsmInfo.h"
200b57cec5SDimitry Andric #include "llvm/MC/MCParser/MCAsmLexer.h"
21e8d8bef9SDimitry Andric #include "llvm/Support/Compiler.h"
220b57cec5SDimitry Andric #include "llvm/Support/SMLoc.h"
230b57cec5SDimitry Andric #include "llvm/Support/SaveAndRestore.h"
240b57cec5SDimitry Andric #include <cassert>
250b57cec5SDimitry Andric #include <cctype>
260b57cec5SDimitry Andric #include <cstdio>
270b57cec5SDimitry Andric #include <cstring>
280b57cec5SDimitry Andric #include <string>
290b57cec5SDimitry Andric #include <tuple>
300b57cec5SDimitry Andric #include <utility>
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric using namespace llvm;
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
350b57cec5SDimitry Andric   AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
36fe6060f1SDimitry Andric   LexMotorolaIntegers = MAI.shouldUseMotorolaIntegers();
370b57cec5SDimitry Andric }
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric AsmLexer::~AsmLexer() = default;
400b57cec5SDimitry Andric 
415ffd83dbSDimitry Andric void AsmLexer::setBuffer(StringRef Buf, const char *ptr,
425ffd83dbSDimitry Andric                          bool EndStatementAtEOF) {
430b57cec5SDimitry Andric   CurBuf = Buf;
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric   if (ptr)
460b57cec5SDimitry Andric     CurPtr = ptr;
470b57cec5SDimitry Andric   else
480b57cec5SDimitry Andric     CurPtr = CurBuf.begin();
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric   TokStart = nullptr;
515ffd83dbSDimitry Andric   this->EndStatementAtEOF = EndStatementAtEOF;
520b57cec5SDimitry Andric }
530b57cec5SDimitry Andric 
540b57cec5SDimitry Andric /// ReturnError - Set the error to the specified string at the specified
550b57cec5SDimitry Andric /// location.  This is defined to always return AsmToken::Error.
560b57cec5SDimitry Andric AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
570b57cec5SDimitry Andric   SetError(SMLoc::getFromPointer(Loc), Msg);
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric   return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
600b57cec5SDimitry Andric }
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric int AsmLexer::getNextChar() {
630b57cec5SDimitry Andric   if (CurPtr == CurBuf.end())
640b57cec5SDimitry Andric     return EOF;
650b57cec5SDimitry Andric   return (unsigned char)*CurPtr++;
660b57cec5SDimitry Andric }
670b57cec5SDimitry Andric 
68e8d8bef9SDimitry Andric int AsmLexer::peekNextChar() {
69e8d8bef9SDimitry Andric   if (CurPtr == CurBuf.end())
70e8d8bef9SDimitry Andric     return EOF;
71e8d8bef9SDimitry Andric   return (unsigned char)*CurPtr;
72e8d8bef9SDimitry Andric }
73e8d8bef9SDimitry Andric 
740b57cec5SDimitry Andric /// The leading integral digit sequence and dot should have already been
750b57cec5SDimitry Andric /// consumed, some or all of the fractional digit sequence *can* have been
760b57cec5SDimitry Andric /// consumed.
770b57cec5SDimitry Andric AsmToken AsmLexer::LexFloatLiteral() {
780b57cec5SDimitry Andric   // Skip the fractional digit sequence.
790b57cec5SDimitry Andric   while (isDigit(*CurPtr))
800b57cec5SDimitry Andric     ++CurPtr;
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric   if (*CurPtr == '-' || *CurPtr == '+')
83fe6060f1SDimitry Andric     return ReturnError(CurPtr, "invalid sign in float literal");
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric   // Check for exponent
860b57cec5SDimitry Andric   if ((*CurPtr == 'e' || *CurPtr == 'E')) {
870b57cec5SDimitry Andric     ++CurPtr;
880b57cec5SDimitry Andric 
890b57cec5SDimitry Andric     if (*CurPtr == '-' || *CurPtr == '+')
900b57cec5SDimitry Andric       ++CurPtr;
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric     while (isDigit(*CurPtr))
930b57cec5SDimitry Andric       ++CurPtr;
940b57cec5SDimitry Andric   }
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric   return AsmToken(AsmToken::Real,
970b57cec5SDimitry Andric                   StringRef(TokStart, CurPtr - TokStart));
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric 
1000b57cec5SDimitry Andric /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
1010b57cec5SDimitry Andric /// while making sure there are enough actual digits around for the constant to
1020b57cec5SDimitry Andric /// be valid.
1030b57cec5SDimitry Andric ///
1040b57cec5SDimitry Andric /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
1050b57cec5SDimitry Andric /// before we get here.
1060b57cec5SDimitry Andric AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
1070b57cec5SDimitry Andric   assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
1080b57cec5SDimitry Andric          "unexpected parse state in floating hex");
1090b57cec5SDimitry Andric   bool NoFracDigits = true;
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric   // Skip the fractional part if there is one
1120b57cec5SDimitry Andric   if (*CurPtr == '.') {
1130b57cec5SDimitry Andric     ++CurPtr;
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric     const char *FracStart = CurPtr;
1160b57cec5SDimitry Andric     while (isHexDigit(*CurPtr))
1170b57cec5SDimitry Andric       ++CurPtr;
1180b57cec5SDimitry Andric 
1190b57cec5SDimitry Andric     NoFracDigits = CurPtr == FracStart;
1200b57cec5SDimitry Andric   }
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric   if (NoIntDigits && NoFracDigits)
1230b57cec5SDimitry Andric     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
1240b57cec5SDimitry Andric                                  "expected at least one significand digit");
1250b57cec5SDimitry Andric 
1260b57cec5SDimitry Andric   // Make sure we do have some kind of proper exponent part
1270b57cec5SDimitry Andric   if (*CurPtr != 'p' && *CurPtr != 'P')
1280b57cec5SDimitry Andric     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
1290b57cec5SDimitry Andric                                  "expected exponent part 'p'");
1300b57cec5SDimitry Andric   ++CurPtr;
1310b57cec5SDimitry Andric 
1320b57cec5SDimitry Andric   if (*CurPtr == '+' || *CurPtr == '-')
1330b57cec5SDimitry Andric     ++CurPtr;
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric   // N.b. exponent digits are *not* hex
1360b57cec5SDimitry Andric   const char *ExpStart = CurPtr;
1370b57cec5SDimitry Andric   while (isDigit(*CurPtr))
1380b57cec5SDimitry Andric     ++CurPtr;
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric   if (CurPtr == ExpStart)
1410b57cec5SDimitry Andric     return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
1420b57cec5SDimitry Andric                                  "expected at least one exponent digit");
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric   return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
1450b57cec5SDimitry Andric }
1460b57cec5SDimitry Andric 
147fe6060f1SDimitry Andric /// LexIdentifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]*
148fe6060f1SDimitry Andric static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) {
149fe6060f1SDimitry Andric   return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' ||
150fe6060f1SDimitry Andric          (AllowAt && C == '@') || (AllowHash && C == '#');
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric AsmToken AsmLexer::LexIdentifier() {
1540b57cec5SDimitry Andric   // Check for floating point literals.
1550b57cec5SDimitry Andric   if (CurPtr[-1] == '.' && isDigit(*CurPtr)) {
1560b57cec5SDimitry Andric     // Disambiguate a .1243foo identifier from a floating literal.
1570b57cec5SDimitry Andric     while (isDigit(*CurPtr))
1580b57cec5SDimitry Andric       ++CurPtr;
1590b57cec5SDimitry Andric 
160fe6060f1SDimitry Andric     if (!isIdentifierChar(*CurPtr, AllowAtInIdentifier,
161fe6060f1SDimitry Andric                           AllowHashInIdentifier) ||
1620b57cec5SDimitry Andric         *CurPtr == 'e' || *CurPtr == 'E')
1630b57cec5SDimitry Andric       return LexFloatLiteral();
1640b57cec5SDimitry Andric   }
1650b57cec5SDimitry Andric 
166fe6060f1SDimitry Andric   while (isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier))
1670b57cec5SDimitry Andric     ++CurPtr;
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric   // Handle . as a special case.
1700b57cec5SDimitry Andric   if (CurPtr == TokStart+1 && TokStart[0] == '.')
1710b57cec5SDimitry Andric     return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric   return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
1740b57cec5SDimitry Andric }
1750b57cec5SDimitry Andric 
1760b57cec5SDimitry Andric /// LexSlash: Slash: /
1770b57cec5SDimitry Andric ///           C-Style Comment: /* ... */
178fe6060f1SDimitry Andric ///           C-style Comment: // ...
1790b57cec5SDimitry Andric AsmToken AsmLexer::LexSlash() {
180fe6060f1SDimitry Andric   if (!MAI.shouldAllowAdditionalComments()) {
181fe6060f1SDimitry Andric     IsAtStartOfStatement = false;
182fe6060f1SDimitry Andric     return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
183fe6060f1SDimitry Andric   }
184fe6060f1SDimitry Andric 
1850b57cec5SDimitry Andric   switch (*CurPtr) {
1860b57cec5SDimitry Andric   case '*':
1870b57cec5SDimitry Andric     IsAtStartOfStatement = false;
1880b57cec5SDimitry Andric     break; // C style comment.
1890b57cec5SDimitry Andric   case '/':
1900b57cec5SDimitry Andric     ++CurPtr;
1910b57cec5SDimitry Andric     return LexLineComment();
1920b57cec5SDimitry Andric   default:
1930b57cec5SDimitry Andric     IsAtStartOfStatement = false;
1940b57cec5SDimitry Andric     return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
1950b57cec5SDimitry Andric   }
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric   // C Style comment.
1980b57cec5SDimitry Andric   ++CurPtr;  // skip the star.
1990b57cec5SDimitry Andric   const char *CommentTextStart = CurPtr;
2000b57cec5SDimitry Andric   while (CurPtr != CurBuf.end()) {
2010b57cec5SDimitry Andric     switch (*CurPtr++) {
2020b57cec5SDimitry Andric     case '*':
2030b57cec5SDimitry Andric       // End of the comment?
2040b57cec5SDimitry Andric       if (*CurPtr != '/')
2050b57cec5SDimitry Andric         break;
2060b57cec5SDimitry Andric       // If we have a CommentConsumer, notify it about the comment.
2070b57cec5SDimitry Andric       if (CommentConsumer) {
2080b57cec5SDimitry Andric         CommentConsumer->HandleComment(
2090b57cec5SDimitry Andric             SMLoc::getFromPointer(CommentTextStart),
2100b57cec5SDimitry Andric             StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
2110b57cec5SDimitry Andric       }
2120b57cec5SDimitry Andric       ++CurPtr;   // End the */.
2130b57cec5SDimitry Andric       return AsmToken(AsmToken::Comment,
2140b57cec5SDimitry Andric                       StringRef(TokStart, CurPtr - TokStart));
2150b57cec5SDimitry Andric     }
2160b57cec5SDimitry Andric   }
2170b57cec5SDimitry Andric   return ReturnError(TokStart, "unterminated comment");
2180b57cec5SDimitry Andric }
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric /// LexLineComment: Comment: #[^\n]*
2210b57cec5SDimitry Andric ///                        : //[^\n]*
2220b57cec5SDimitry Andric AsmToken AsmLexer::LexLineComment() {
2230b57cec5SDimitry Andric   // Mark This as an end of statement with a body of the
2240b57cec5SDimitry Andric   // comment. While it would be nicer to leave this two tokens,
2250b57cec5SDimitry Andric   // backwards compatability with TargetParsers makes keeping this in this form
2260b57cec5SDimitry Andric   // better.
2270b57cec5SDimitry Andric   const char *CommentTextStart = CurPtr;
2280b57cec5SDimitry Andric   int CurChar = getNextChar();
2290b57cec5SDimitry Andric   while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
2300b57cec5SDimitry Andric     CurChar = getNextChar();
231349cc55cSDimitry Andric   const char *NewlinePtr = CurPtr;
2320b57cec5SDimitry Andric   if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
2330b57cec5SDimitry Andric     ++CurPtr;
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   // If we have a CommentConsumer, notify it about the comment.
2360b57cec5SDimitry Andric   if (CommentConsumer) {
2370b57cec5SDimitry Andric     CommentConsumer->HandleComment(
2380b57cec5SDimitry Andric         SMLoc::getFromPointer(CommentTextStart),
239349cc55cSDimitry Andric         StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart));
2400b57cec5SDimitry Andric   }
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric   IsAtStartOfLine = true;
2430b57cec5SDimitry Andric   // This is a whole line comment. leave newline
2440b57cec5SDimitry Andric   if (IsAtStartOfStatement)
2450b57cec5SDimitry Andric     return AsmToken(AsmToken::EndOfStatement,
2460b57cec5SDimitry Andric                     StringRef(TokStart, CurPtr - TokStart));
2470b57cec5SDimitry Andric   IsAtStartOfStatement = true;
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric   return AsmToken(AsmToken::EndOfStatement,
2500b57cec5SDimitry Andric                   StringRef(TokStart, CurPtr - 1 - TokStart));
2510b57cec5SDimitry Andric }
2520b57cec5SDimitry Andric 
2530b57cec5SDimitry Andric static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
25481ad6265SDimitry Andric   // Skip case-insensitive ULL, UL, U, L and LL suffixes.
25581ad6265SDimitry Andric   if (CurPtr[0] == 'U' || CurPtr[0] == 'u')
2560b57cec5SDimitry Andric     ++CurPtr;
25781ad6265SDimitry Andric   if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
2580b57cec5SDimitry Andric     ++CurPtr;
25981ad6265SDimitry Andric   if (CurPtr[0] == 'L' || CurPtr[0] == 'l')
2600b57cec5SDimitry Andric     ++CurPtr;
2610b57cec5SDimitry Andric }
2620b57cec5SDimitry Andric 
2630b57cec5SDimitry Andric // Look ahead to search for first non-hex digit, if it's [hH], then we treat the
2640b57cec5SDimitry Andric // integer as a hexadecimal, possibly with leading zeroes.
2650b57cec5SDimitry Andric static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
2660b57cec5SDimitry Andric                                bool LexHex) {
2670b57cec5SDimitry Andric   const char *FirstNonDec = nullptr;
2680b57cec5SDimitry Andric   const char *LookAhead = CurPtr;
2690b57cec5SDimitry Andric   while (true) {
2700b57cec5SDimitry Andric     if (isDigit(*LookAhead)) {
2710b57cec5SDimitry Andric       ++LookAhead;
2720b57cec5SDimitry Andric     } else {
2730b57cec5SDimitry Andric       if (!FirstNonDec)
2740b57cec5SDimitry Andric         FirstNonDec = LookAhead;
2750b57cec5SDimitry Andric 
2760b57cec5SDimitry Andric       // Keep going if we are looking for a 'h' suffix.
2770b57cec5SDimitry Andric       if (LexHex && isHexDigit(*LookAhead))
2780b57cec5SDimitry Andric         ++LookAhead;
2790b57cec5SDimitry Andric       else
2800b57cec5SDimitry Andric         break;
2810b57cec5SDimitry Andric     }
2820b57cec5SDimitry Andric   }
2830b57cec5SDimitry Andric   bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
2840b57cec5SDimitry Andric   CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
2850b57cec5SDimitry Andric   if (isHex)
2860b57cec5SDimitry Andric     return 16;
2870b57cec5SDimitry Andric   return DefaultRadix;
2880b57cec5SDimitry Andric }
2890b57cec5SDimitry Andric 
290e8d8bef9SDimitry Andric static const char *findLastDigit(const char *CurPtr, unsigned DefaultRadix) {
291e8d8bef9SDimitry Andric   while (hexDigitValue(*CurPtr) < DefaultRadix) {
292e8d8bef9SDimitry Andric     ++CurPtr;
293e8d8bef9SDimitry Andric   }
294e8d8bef9SDimitry Andric   return CurPtr;
295e8d8bef9SDimitry Andric }
296e8d8bef9SDimitry Andric 
297e8d8bef9SDimitry Andric static AsmToken intToken(StringRef Ref, APInt &Value) {
2980b57cec5SDimitry Andric   if (Value.isIntN(64))
2990b57cec5SDimitry Andric     return AsmToken(AsmToken::Integer, Ref, Value);
3000b57cec5SDimitry Andric   return AsmToken(AsmToken::BigNum, Ref, Value);
3010b57cec5SDimitry Andric }
3020b57cec5SDimitry Andric 
303e8d8bef9SDimitry Andric static std::string radixName(unsigned Radix) {
304e8d8bef9SDimitry Andric   switch (Radix) {
305e8d8bef9SDimitry Andric   case 2:
306e8d8bef9SDimitry Andric     return "binary";
307e8d8bef9SDimitry Andric   case 8:
308e8d8bef9SDimitry Andric     return "octal";
309e8d8bef9SDimitry Andric   case 10:
310e8d8bef9SDimitry Andric     return "decimal";
311e8d8bef9SDimitry Andric   case 16:
312e8d8bef9SDimitry Andric     return "hexadecimal";
313e8d8bef9SDimitry Andric   default:
314e8d8bef9SDimitry Andric     return "base-" + std::to_string(Radix);
315e8d8bef9SDimitry Andric   }
316e8d8bef9SDimitry Andric }
317e8d8bef9SDimitry Andric 
3180b57cec5SDimitry Andric /// LexDigit: First character is [0-9].
3190b57cec5SDimitry Andric ///   Local Label: [0-9][:]
3200b57cec5SDimitry Andric ///   Forward/Backward Label: [0-9][fb]
3210b57cec5SDimitry Andric ///   Binary integer: 0b[01]+
3220b57cec5SDimitry Andric ///   Octal integer: 0[0-7]+
3230b57cec5SDimitry Andric ///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
3240b57cec5SDimitry Andric ///   Decimal integer: [1-9][0-9]*
3250b57cec5SDimitry Andric AsmToken AsmLexer::LexDigit() {
326e8d8bef9SDimitry Andric   // MASM-flavor binary integer: [01]+[yY] (if DefaultRadix < 16, [bByY])
327e8d8bef9SDimitry Andric   // MASM-flavor octal integer: [0-7]+[oOqQ]
328e8d8bef9SDimitry Andric   // MASM-flavor decimal integer: [0-9]+[tT] (if DefaultRadix < 16, [dDtT])
3290b57cec5SDimitry Andric   // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
3300b57cec5SDimitry Andric   if (LexMasmIntegers && isdigit(CurPtr[-1])) {
331e8d8bef9SDimitry Andric     const char *FirstNonBinary =
332e8d8bef9SDimitry Andric         (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr;
333e8d8bef9SDimitry Andric     const char *FirstNonDecimal =
334e8d8bef9SDimitry Andric         (CurPtr[-1] < '0' || CurPtr[-1] > '9') ? CurPtr - 1 : nullptr;
3350b57cec5SDimitry Andric     const char *OldCurPtr = CurPtr;
3360b57cec5SDimitry Andric     while (isHexDigit(*CurPtr)) {
337e8d8bef9SDimitry Andric       switch (*CurPtr) {
338e8d8bef9SDimitry Andric       default:
339e8d8bef9SDimitry Andric         if (!FirstNonDecimal) {
340e8d8bef9SDimitry Andric           FirstNonDecimal = CurPtr;
341e8d8bef9SDimitry Andric         }
342*bdd1243dSDimitry Andric         [[fallthrough]];
343e8d8bef9SDimitry Andric       case '9':
344e8d8bef9SDimitry Andric       case '8':
345e8d8bef9SDimitry Andric       case '7':
346e8d8bef9SDimitry Andric       case '6':
347e8d8bef9SDimitry Andric       case '5':
348e8d8bef9SDimitry Andric       case '4':
349e8d8bef9SDimitry Andric       case '3':
350e8d8bef9SDimitry Andric       case '2':
351e8d8bef9SDimitry Andric         if (!FirstNonBinary) {
3520b57cec5SDimitry Andric           FirstNonBinary = CurPtr;
353e8d8bef9SDimitry Andric         }
354e8d8bef9SDimitry Andric         break;
355e8d8bef9SDimitry Andric       case '1':
356e8d8bef9SDimitry Andric       case '0':
357e8d8bef9SDimitry Andric         break;
358e8d8bef9SDimitry Andric       }
3590b57cec5SDimitry Andric       ++CurPtr;
3600b57cec5SDimitry Andric     }
361e8d8bef9SDimitry Andric     if (*CurPtr == '.') {
362e8d8bef9SDimitry Andric       // MASM float literals (other than hex floats) always contain a ".", and
363e8d8bef9SDimitry Andric       // are always written in decimal.
364e8d8bef9SDimitry Andric       ++CurPtr;
365e8d8bef9SDimitry Andric       return LexFloatLiteral();
366e8d8bef9SDimitry Andric     }
367e8d8bef9SDimitry Andric 
368e8d8bef9SDimitry Andric     if (LexMasmHexFloats && (*CurPtr == 'r' || *CurPtr == 'R')) {
369e8d8bef9SDimitry Andric       ++CurPtr;
370e8d8bef9SDimitry Andric       return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
371e8d8bef9SDimitry Andric     }
3720b57cec5SDimitry Andric 
3730b57cec5SDimitry Andric     unsigned Radix = 0;
3740b57cec5SDimitry Andric     if (*CurPtr == 'h' || *CurPtr == 'H') {
3750b57cec5SDimitry Andric       // hexadecimal number
3760b57cec5SDimitry Andric       ++CurPtr;
3770b57cec5SDimitry Andric       Radix = 16;
378e8d8bef9SDimitry Andric     } else if (*CurPtr == 't' || *CurPtr == 'T') {
379e8d8bef9SDimitry Andric       // decimal number
380e8d8bef9SDimitry Andric       ++CurPtr;
381e8d8bef9SDimitry Andric       Radix = 10;
382e8d8bef9SDimitry Andric     } else if (*CurPtr == 'o' || *CurPtr == 'O' || *CurPtr == 'q' ||
383e8d8bef9SDimitry Andric                *CurPtr == 'Q') {
384e8d8bef9SDimitry Andric       // octal number
385e8d8bef9SDimitry Andric       ++CurPtr;
386e8d8bef9SDimitry Andric       Radix = 8;
387e8d8bef9SDimitry Andric     } else if (*CurPtr == 'y' || *CurPtr == 'Y') {
388e8d8bef9SDimitry Andric       // binary number
389e8d8bef9SDimitry Andric       ++CurPtr;
3900b57cec5SDimitry Andric       Radix = 2;
391e8d8bef9SDimitry Andric     } else if (FirstNonDecimal && FirstNonDecimal + 1 == CurPtr &&
392e8d8bef9SDimitry Andric                DefaultRadix < 14 &&
393e8d8bef9SDimitry Andric                (*FirstNonDecimal == 'd' || *FirstNonDecimal == 'D')) {
394e8d8bef9SDimitry Andric       Radix = 10;
395e8d8bef9SDimitry Andric     } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
396e8d8bef9SDimitry Andric                DefaultRadix < 12 &&
397e8d8bef9SDimitry Andric                (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) {
398e8d8bef9SDimitry Andric       Radix = 2;
399e8d8bef9SDimitry Andric     }
4000b57cec5SDimitry Andric 
401e8d8bef9SDimitry Andric     if (Radix) {
4020b57cec5SDimitry Andric       StringRef Result(TokStart, CurPtr - TokStart);
4030b57cec5SDimitry Andric       APInt Value(128, 0, true);
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric       if (Result.drop_back().getAsInteger(Radix, Value))
406e8d8bef9SDimitry Andric         return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
4070b57cec5SDimitry Andric 
4080b57cec5SDimitry Andric       // MSVC accepts and ignores type suffices on integer literals.
4090b57cec5SDimitry Andric       SkipIgnoredIntegerSuffix(CurPtr);
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric       return intToken(Result, Value);
4120b57cec5SDimitry Andric     }
4130b57cec5SDimitry Andric 
414e8d8bef9SDimitry Andric     // default-radix integers, or floating point numbers, fall through
4150b57cec5SDimitry Andric     CurPtr = OldCurPtr;
4160b57cec5SDimitry Andric   }
4170b57cec5SDimitry Andric 
418e8d8bef9SDimitry Andric   // MASM default-radix integers: [0-9a-fA-F]+
419e8d8bef9SDimitry Andric   // (All other integer literals have a radix specifier.)
420e8d8bef9SDimitry Andric   if (LexMasmIntegers && UseMasmDefaultRadix) {
421e8d8bef9SDimitry Andric     CurPtr = findLastDigit(CurPtr, 16);
422e8d8bef9SDimitry Andric     StringRef Result(TokStart, CurPtr - TokStart);
423e8d8bef9SDimitry Andric 
424e8d8bef9SDimitry Andric     APInt Value(128, 0, true);
425e8d8bef9SDimitry Andric     if (Result.getAsInteger(DefaultRadix, Value)) {
426e8d8bef9SDimitry Andric       return ReturnError(TokStart,
427e8d8bef9SDimitry Andric                          "invalid " + radixName(DefaultRadix) + " number");
428e8d8bef9SDimitry Andric     }
429e8d8bef9SDimitry Andric 
430e8d8bef9SDimitry Andric     return intToken(Result, Value);
431e8d8bef9SDimitry Andric   }
432e8d8bef9SDimitry Andric 
433fe6060f1SDimitry Andric   // Motorola hex integers: $[0-9a-fA-F]+
434fe6060f1SDimitry Andric   if (LexMotorolaIntegers && CurPtr[-1] == '$') {
435fe6060f1SDimitry Andric     const char *NumStart = CurPtr;
436fe6060f1SDimitry Andric     while (isHexDigit(CurPtr[0]))
437fe6060f1SDimitry Andric       ++CurPtr;
438fe6060f1SDimitry Andric 
439fe6060f1SDimitry Andric     APInt Result(128, 0);
440fe6060f1SDimitry Andric     if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(16, Result))
441fe6060f1SDimitry Andric       return ReturnError(TokStart, "invalid hexadecimal number");
442fe6060f1SDimitry Andric 
443fe6060f1SDimitry Andric     return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
444fe6060f1SDimitry Andric   }
445fe6060f1SDimitry Andric 
446fe6060f1SDimitry Andric   // Motorola binary integers: %[01]+
447fe6060f1SDimitry Andric   if (LexMotorolaIntegers && CurPtr[-1] == '%') {
448fe6060f1SDimitry Andric     const char *NumStart = CurPtr;
449fe6060f1SDimitry Andric     while (*CurPtr == '0' || *CurPtr == '1')
450fe6060f1SDimitry Andric       ++CurPtr;
451fe6060f1SDimitry Andric 
452fe6060f1SDimitry Andric     APInt Result(128, 0);
453fe6060f1SDimitry Andric     if (StringRef(NumStart, CurPtr - NumStart).getAsInteger(2, Result))
454fe6060f1SDimitry Andric       return ReturnError(TokStart, "invalid binary number");
455fe6060f1SDimitry Andric 
456fe6060f1SDimitry Andric     return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
457fe6060f1SDimitry Andric   }
458fe6060f1SDimitry Andric 
4590b57cec5SDimitry Andric   // Decimal integer: [1-9][0-9]*
460fe6060f1SDimitry Andric   // HLASM-flavour decimal integer: [0-9][0-9]*
461fe6060f1SDimitry Andric   // FIXME: Later on, support for fb for HLASM has to be added in
462fe6060f1SDimitry Andric   // as they probably would be needed for asm goto
463fe6060f1SDimitry Andric   if (LexHLASMIntegers || CurPtr[-1] != '0' || CurPtr[0] == '.') {
4640b57cec5SDimitry Andric     unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
465fe6060f1SDimitry Andric 
466fe6060f1SDimitry Andric     if (!LexHLASMIntegers) {
467fe6060f1SDimitry Andric       bool IsHex = Radix == 16;
4680b57cec5SDimitry Andric       // Check for floating point literals.
469fe6060f1SDimitry Andric       if (!IsHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) {
4700b57cec5SDimitry Andric         if (*CurPtr == '.')
4710b57cec5SDimitry Andric           ++CurPtr;
4720b57cec5SDimitry Andric         return LexFloatLiteral();
4730b57cec5SDimitry Andric       }
474fe6060f1SDimitry Andric     }
4750b57cec5SDimitry Andric 
4760b57cec5SDimitry Andric     StringRef Result(TokStart, CurPtr - TokStart);
4770b57cec5SDimitry Andric 
4780b57cec5SDimitry Andric     APInt Value(128, 0, true);
479fe6060f1SDimitry Andric     if (Result.getAsInteger(Radix, Value))
480e8d8bef9SDimitry Andric       return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
4810b57cec5SDimitry Andric 
482fe6060f1SDimitry Andric     if (!LexHLASMIntegers)
4830b57cec5SDimitry Andric       // The darwin/x86 (and x86-64) assembler accepts and ignores type
4840b57cec5SDimitry Andric       // suffices on integer literals.
4850b57cec5SDimitry Andric       SkipIgnoredIntegerSuffix(CurPtr);
4860b57cec5SDimitry Andric 
4870b57cec5SDimitry Andric     return intToken(Result, Value);
4880b57cec5SDimitry Andric   }
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric   if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
4910b57cec5SDimitry Andric     ++CurPtr;
4920b57cec5SDimitry Andric     // See if we actually have "0b" as part of something like "jmp 0b\n"
4930b57cec5SDimitry Andric     if (!isDigit(CurPtr[0])) {
4940b57cec5SDimitry Andric       --CurPtr;
4950b57cec5SDimitry Andric       StringRef Result(TokStart, CurPtr - TokStart);
4960b57cec5SDimitry Andric       return AsmToken(AsmToken::Integer, Result, 0);
4970b57cec5SDimitry Andric     }
4980b57cec5SDimitry Andric     const char *NumStart = CurPtr;
4990b57cec5SDimitry Andric     while (CurPtr[0] == '0' || CurPtr[0] == '1')
5000b57cec5SDimitry Andric       ++CurPtr;
5010b57cec5SDimitry Andric 
5020b57cec5SDimitry Andric     // Requires at least one binary digit.
5030b57cec5SDimitry Andric     if (CurPtr == NumStart)
5040b57cec5SDimitry Andric       return ReturnError(TokStart, "invalid binary number");
5050b57cec5SDimitry Andric 
5060b57cec5SDimitry Andric     StringRef Result(TokStart, CurPtr - TokStart);
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric     APInt Value(128, 0, true);
5090b57cec5SDimitry Andric     if (Result.substr(2).getAsInteger(2, Value))
5100b57cec5SDimitry Andric       return ReturnError(TokStart, "invalid binary number");
5110b57cec5SDimitry Andric 
5120b57cec5SDimitry Andric     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
5130b57cec5SDimitry Andric     // suffixes on integer literals.
5140b57cec5SDimitry Andric     SkipIgnoredIntegerSuffix(CurPtr);
5150b57cec5SDimitry Andric 
5160b57cec5SDimitry Andric     return intToken(Result, Value);
5170b57cec5SDimitry Andric   }
5180b57cec5SDimitry Andric 
5190b57cec5SDimitry Andric   if ((*CurPtr == 'x') || (*CurPtr == 'X')) {
5200b57cec5SDimitry Andric     ++CurPtr;
5210b57cec5SDimitry Andric     const char *NumStart = CurPtr;
5220b57cec5SDimitry Andric     while (isHexDigit(CurPtr[0]))
5230b57cec5SDimitry Andric       ++CurPtr;
5240b57cec5SDimitry Andric 
5250b57cec5SDimitry Andric     // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
5260b57cec5SDimitry Andric     // diagnosed by LexHexFloatLiteral).
5270b57cec5SDimitry Andric     if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P')
5280b57cec5SDimitry Andric       return LexHexFloatLiteral(NumStart == CurPtr);
5290b57cec5SDimitry Andric 
5300b57cec5SDimitry Andric     // Otherwise requires at least one hex digit.
5310b57cec5SDimitry Andric     if (CurPtr == NumStart)
5320b57cec5SDimitry Andric       return ReturnError(CurPtr-2, "invalid hexadecimal number");
5330b57cec5SDimitry Andric 
5340b57cec5SDimitry Andric     APInt Result(128, 0);
5350b57cec5SDimitry Andric     if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
5360b57cec5SDimitry Andric       return ReturnError(TokStart, "invalid hexadecimal number");
5370b57cec5SDimitry Andric 
5380b57cec5SDimitry Andric     // Consume the optional [hH].
5390b57cec5SDimitry Andric     if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
5400b57cec5SDimitry Andric       ++CurPtr;
5410b57cec5SDimitry Andric 
5420b57cec5SDimitry Andric     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
5430b57cec5SDimitry Andric     // suffixes on integer literals.
5440b57cec5SDimitry Andric     SkipIgnoredIntegerSuffix(CurPtr);
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric     return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
5470b57cec5SDimitry Andric   }
5480b57cec5SDimitry Andric 
5490b57cec5SDimitry Andric   // Either octal or hexadecimal.
5500b57cec5SDimitry Andric   APInt Value(128, 0, true);
5510b57cec5SDimitry Andric   unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
5520b57cec5SDimitry Andric   StringRef Result(TokStart, CurPtr - TokStart);
5530b57cec5SDimitry Andric   if (Result.getAsInteger(Radix, Value))
554e8d8bef9SDimitry Andric     return ReturnError(TokStart, "invalid " + radixName(Radix) + " number");
5550b57cec5SDimitry Andric 
5560b57cec5SDimitry Andric   // Consume the [hH].
5570b57cec5SDimitry Andric   if (Radix == 16)
5580b57cec5SDimitry Andric     ++CurPtr;
5590b57cec5SDimitry Andric 
5600b57cec5SDimitry Andric   // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
5610b57cec5SDimitry Andric   // suffixes on integer literals.
5620b57cec5SDimitry Andric   SkipIgnoredIntegerSuffix(CurPtr);
5630b57cec5SDimitry Andric 
5640b57cec5SDimitry Andric   return intToken(Result, Value);
5650b57cec5SDimitry Andric }
5660b57cec5SDimitry Andric 
5670b57cec5SDimitry Andric /// LexSingleQuote: Integer: 'b'
5680b57cec5SDimitry Andric AsmToken AsmLexer::LexSingleQuote() {
5690b57cec5SDimitry Andric   int CurChar = getNextChar();
5700b57cec5SDimitry Andric 
571fe6060f1SDimitry Andric   if (LexHLASMStrings)
572fe6060f1SDimitry Andric     return ReturnError(TokStart, "invalid usage of character literals");
573fe6060f1SDimitry Andric 
574e8d8bef9SDimitry Andric   if (LexMasmStrings) {
575e8d8bef9SDimitry Andric     while (CurChar != EOF) {
576e8d8bef9SDimitry Andric       if (CurChar != '\'') {
577e8d8bef9SDimitry Andric         CurChar = getNextChar();
578e8d8bef9SDimitry Andric       } else if (peekNextChar() == '\'') {
579e8d8bef9SDimitry Andric         // In MASM single-quote strings, doubled single-quotes mean an escaped
580e8d8bef9SDimitry Andric         // single quote, so should be lexed in.
581e8d8bef9SDimitry Andric         getNextChar();
582e8d8bef9SDimitry Andric         CurChar = getNextChar();
583e8d8bef9SDimitry Andric       } else {
584e8d8bef9SDimitry Andric         break;
585e8d8bef9SDimitry Andric       }
586e8d8bef9SDimitry Andric     }
587e8d8bef9SDimitry Andric     if (CurChar == EOF)
588e8d8bef9SDimitry Andric       return ReturnError(TokStart, "unterminated string constant");
589e8d8bef9SDimitry Andric     return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
590e8d8bef9SDimitry Andric   }
591e8d8bef9SDimitry Andric 
5920b57cec5SDimitry Andric   if (CurChar == '\\')
5930b57cec5SDimitry Andric     CurChar = getNextChar();
5940b57cec5SDimitry Andric 
5950b57cec5SDimitry Andric   if (CurChar == EOF)
5960b57cec5SDimitry Andric     return ReturnError(TokStart, "unterminated single quote");
5970b57cec5SDimitry Andric 
5980b57cec5SDimitry Andric   CurChar = getNextChar();
5990b57cec5SDimitry Andric 
6000b57cec5SDimitry Andric   if (CurChar != '\'')
6010b57cec5SDimitry Andric     return ReturnError(TokStart, "single quote way too long");
6020b57cec5SDimitry Andric 
6030b57cec5SDimitry Andric   // The idea here being that 'c' is basically just an integral
6040b57cec5SDimitry Andric   // constant.
6050b57cec5SDimitry Andric   StringRef Res = StringRef(TokStart,CurPtr - TokStart);
6060b57cec5SDimitry Andric   long long Value;
6070b57cec5SDimitry Andric 
6080b57cec5SDimitry Andric   if (Res.startswith("\'\\")) {
6090b57cec5SDimitry Andric     char theChar = Res[2];
6100b57cec5SDimitry Andric     switch (theChar) {
6110b57cec5SDimitry Andric       default: Value = theChar; break;
6120b57cec5SDimitry Andric       case '\'': Value = '\''; break;
6130b57cec5SDimitry Andric       case 't': Value = '\t'; break;
6140b57cec5SDimitry Andric       case 'n': Value = '\n'; break;
6150b57cec5SDimitry Andric       case 'b': Value = '\b'; break;
616fe6060f1SDimitry Andric       case 'f': Value = '\f'; break;
617fe6060f1SDimitry Andric       case 'r': Value = '\r'; break;
6180b57cec5SDimitry Andric     }
6190b57cec5SDimitry Andric   } else
6200b57cec5SDimitry Andric     Value = TokStart[1];
6210b57cec5SDimitry Andric 
6220b57cec5SDimitry Andric   return AsmToken(AsmToken::Integer, Res, Value);
6230b57cec5SDimitry Andric }
6240b57cec5SDimitry Andric 
6250b57cec5SDimitry Andric /// LexQuote: String: "..."
6260b57cec5SDimitry Andric AsmToken AsmLexer::LexQuote() {
6270b57cec5SDimitry Andric   int CurChar = getNextChar();
628fe6060f1SDimitry Andric   if (LexHLASMStrings)
629fe6060f1SDimitry Andric     return ReturnError(TokStart, "invalid usage of string literals");
630fe6060f1SDimitry Andric 
631e8d8bef9SDimitry Andric   if (LexMasmStrings) {
632e8d8bef9SDimitry Andric     while (CurChar != EOF) {
633e8d8bef9SDimitry Andric       if (CurChar != '"') {
634e8d8bef9SDimitry Andric         CurChar = getNextChar();
635e8d8bef9SDimitry Andric       } else if (peekNextChar() == '"') {
636e8d8bef9SDimitry Andric         // In MASM double-quoted strings, doubled double-quotes mean an escaped
637e8d8bef9SDimitry Andric         // double quote, so should be lexed in.
638e8d8bef9SDimitry Andric         getNextChar();
639e8d8bef9SDimitry Andric         CurChar = getNextChar();
640e8d8bef9SDimitry Andric       } else {
641e8d8bef9SDimitry Andric         break;
642e8d8bef9SDimitry Andric       }
643e8d8bef9SDimitry Andric     }
644e8d8bef9SDimitry Andric     if (CurChar == EOF)
645e8d8bef9SDimitry Andric       return ReturnError(TokStart, "unterminated string constant");
646e8d8bef9SDimitry Andric     return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
647e8d8bef9SDimitry Andric   }
648e8d8bef9SDimitry Andric 
6490b57cec5SDimitry Andric   // TODO: does gas allow multiline string constants?
6500b57cec5SDimitry Andric   while (CurChar != '"') {
6510b57cec5SDimitry Andric     if (CurChar == '\\') {
6520b57cec5SDimitry Andric       // Allow \", etc.
6530b57cec5SDimitry Andric       CurChar = getNextChar();
6540b57cec5SDimitry Andric     }
6550b57cec5SDimitry Andric 
6560b57cec5SDimitry Andric     if (CurChar == EOF)
6570b57cec5SDimitry Andric       return ReturnError(TokStart, "unterminated string constant");
6580b57cec5SDimitry Andric 
6590b57cec5SDimitry Andric     CurChar = getNextChar();
6600b57cec5SDimitry Andric   }
6610b57cec5SDimitry Andric 
6620b57cec5SDimitry Andric   return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
6630b57cec5SDimitry Andric }
6640b57cec5SDimitry Andric 
6650b57cec5SDimitry Andric StringRef AsmLexer::LexUntilEndOfStatement() {
6660b57cec5SDimitry Andric   TokStart = CurPtr;
6670b57cec5SDimitry Andric 
6680b57cec5SDimitry Andric   while (!isAtStartOfComment(CurPtr) &&     // Start of line comment.
6690b57cec5SDimitry Andric          !isAtStatementSeparator(CurPtr) && // End of statement marker.
6700b57cec5SDimitry Andric          *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
6710b57cec5SDimitry Andric     ++CurPtr;
6720b57cec5SDimitry Andric   }
6730b57cec5SDimitry Andric   return StringRef(TokStart, CurPtr-TokStart);
6740b57cec5SDimitry Andric }
6750b57cec5SDimitry Andric 
6760b57cec5SDimitry Andric StringRef AsmLexer::LexUntilEndOfLine() {
6770b57cec5SDimitry Andric   TokStart = CurPtr;
6780b57cec5SDimitry Andric 
6790b57cec5SDimitry Andric   while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
6800b57cec5SDimitry Andric     ++CurPtr;
6810b57cec5SDimitry Andric   }
6820b57cec5SDimitry Andric   return StringRef(TokStart, CurPtr-TokStart);
6830b57cec5SDimitry Andric }
6840b57cec5SDimitry Andric 
6850b57cec5SDimitry Andric size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
6860b57cec5SDimitry Andric                             bool ShouldSkipSpace) {
687*bdd1243dSDimitry Andric   SaveAndRestore SavedTokenStart(TokStart);
688*bdd1243dSDimitry Andric   SaveAndRestore SavedCurPtr(CurPtr);
689*bdd1243dSDimitry Andric   SaveAndRestore SavedAtStartOfLine(IsAtStartOfLine);
690*bdd1243dSDimitry Andric   SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement);
691*bdd1243dSDimitry Andric   SaveAndRestore SavedSkipSpace(SkipSpace, ShouldSkipSpace);
692*bdd1243dSDimitry Andric   SaveAndRestore SavedIsPeeking(IsPeeking, true);
6930b57cec5SDimitry Andric   std::string SavedErr = getErr();
6940b57cec5SDimitry Andric   SMLoc SavedErrLoc = getErrLoc();
6950b57cec5SDimitry Andric 
6960b57cec5SDimitry Andric   size_t ReadCount;
6970b57cec5SDimitry Andric   for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
6980b57cec5SDimitry Andric     AsmToken Token = LexToken();
6990b57cec5SDimitry Andric 
7000b57cec5SDimitry Andric     Buf[ReadCount] = Token;
7010b57cec5SDimitry Andric 
7020b57cec5SDimitry Andric     if (Token.is(AsmToken::Eof))
7030b57cec5SDimitry Andric       break;
7040b57cec5SDimitry Andric   }
7050b57cec5SDimitry Andric 
7060b57cec5SDimitry Andric   SetError(SavedErrLoc, SavedErr);
7070b57cec5SDimitry Andric   return ReadCount;
7080b57cec5SDimitry Andric }
7090b57cec5SDimitry Andric 
7100b57cec5SDimitry Andric bool AsmLexer::isAtStartOfComment(const char *Ptr) {
711fe6060f1SDimitry Andric   if (MAI.getRestrictCommentStringToStartOfStatement() && !IsAtStartOfStatement)
712fe6060f1SDimitry Andric     return false;
713fe6060f1SDimitry Andric 
7140b57cec5SDimitry Andric   StringRef CommentString = MAI.getCommentString();
7150b57cec5SDimitry Andric 
7160b57cec5SDimitry Andric   if (CommentString.size() == 1)
7170b57cec5SDimitry Andric     return CommentString[0] == Ptr[0];
7180b57cec5SDimitry Andric 
719*bdd1243dSDimitry Andric   // Allow # preprocessor comments also be counted as comments for "##" cases
7200b57cec5SDimitry Andric   if (CommentString[1] == '#')
7210b57cec5SDimitry Andric     return CommentString[0] == Ptr[0];
7220b57cec5SDimitry Andric 
7230b57cec5SDimitry Andric   return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
7240b57cec5SDimitry Andric }
7250b57cec5SDimitry Andric 
7260b57cec5SDimitry Andric bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
7270b57cec5SDimitry Andric   return strncmp(Ptr, MAI.getSeparatorString(),
7280b57cec5SDimitry Andric                  strlen(MAI.getSeparatorString())) == 0;
7290b57cec5SDimitry Andric }
7300b57cec5SDimitry Andric 
7310b57cec5SDimitry Andric AsmToken AsmLexer::LexToken() {
7320b57cec5SDimitry Andric   TokStart = CurPtr;
7330b57cec5SDimitry Andric   // This always consumes at least one character.
7340b57cec5SDimitry Andric   int CurChar = getNextChar();
7350b57cec5SDimitry Andric 
7360b57cec5SDimitry Andric   if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
7370b57cec5SDimitry Andric     // If this starts with a '#', this may be a cpp
7380b57cec5SDimitry Andric     // hash directive and otherwise a line comment.
7390b57cec5SDimitry Andric     AsmToken TokenBuf[2];
7400b57cec5SDimitry Andric     MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
7410b57cec5SDimitry Andric     size_t num = peekTokens(Buf, true);
7420b57cec5SDimitry Andric     // There cannot be a space preceding this
7430b57cec5SDimitry Andric     if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
7440b57cec5SDimitry Andric         TokenBuf[1].is(AsmToken::String)) {
7450b57cec5SDimitry Andric       CurPtr = TokStart; // reset curPtr;
7460b57cec5SDimitry Andric       StringRef s = LexUntilEndOfLine();
7470b57cec5SDimitry Andric       UnLex(TokenBuf[1]);
7480b57cec5SDimitry Andric       UnLex(TokenBuf[0]);
7490b57cec5SDimitry Andric       return AsmToken(AsmToken::HashDirective, s);
7500b57cec5SDimitry Andric     }
751fe6060f1SDimitry Andric 
752fe6060f1SDimitry Andric     if (MAI.shouldAllowAdditionalComments())
7530b57cec5SDimitry Andric       return LexLineComment();
7540b57cec5SDimitry Andric   }
7550b57cec5SDimitry Andric 
7560b57cec5SDimitry Andric   if (isAtStartOfComment(TokStart))
7570b57cec5SDimitry Andric     return LexLineComment();
7580b57cec5SDimitry Andric 
7590b57cec5SDimitry Andric   if (isAtStatementSeparator(TokStart)) {
7600b57cec5SDimitry Andric     CurPtr += strlen(MAI.getSeparatorString()) - 1;
7610b57cec5SDimitry Andric     IsAtStartOfLine = true;
7620b57cec5SDimitry Andric     IsAtStartOfStatement = true;
7630b57cec5SDimitry Andric     return AsmToken(AsmToken::EndOfStatement,
7640b57cec5SDimitry Andric                     StringRef(TokStart, strlen(MAI.getSeparatorString())));
7650b57cec5SDimitry Andric   }
7660b57cec5SDimitry Andric 
7670b57cec5SDimitry Andric   // If we're missing a newline at EOF, make sure we still get an
7680b57cec5SDimitry Andric   // EndOfStatement token before the Eof token.
7695ffd83dbSDimitry Andric   if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) {
7700b57cec5SDimitry Andric     IsAtStartOfLine = true;
7710b57cec5SDimitry Andric     IsAtStartOfStatement = true;
772e8d8bef9SDimitry Andric     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
7730b57cec5SDimitry Andric   }
7740b57cec5SDimitry Andric   IsAtStartOfLine = false;
7750b57cec5SDimitry Andric   bool OldIsAtStartOfStatement = IsAtStartOfStatement;
7760b57cec5SDimitry Andric   IsAtStartOfStatement = false;
7770b57cec5SDimitry Andric   switch (CurChar) {
7780b57cec5SDimitry Andric   default:
779fe6060f1SDimitry Andric     // Handle identifier: [a-zA-Z_.?][a-zA-Z0-9_$.@#?]*
780fe6060f1SDimitry Andric     if (isalpha(CurChar) || CurChar == '_' || CurChar == '.' ||
781fe6060f1SDimitry Andric         (MAI.doesAllowQuestionAtStartOfIdentifier() && CurChar == '?'))
7825ffd83dbSDimitry Andric       return LexIdentifier();
7830b57cec5SDimitry Andric 
7840b57cec5SDimitry Andric     // Unknown character, emit an error.
7850b57cec5SDimitry Andric     return ReturnError(TokStart, "invalid character in input");
7860b57cec5SDimitry Andric   case EOF:
7875ffd83dbSDimitry Andric     if (EndStatementAtEOF) {
7880b57cec5SDimitry Andric       IsAtStartOfLine = true;
7890b57cec5SDimitry Andric       IsAtStartOfStatement = true;
7905ffd83dbSDimitry Andric     }
7910b57cec5SDimitry Andric     return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
7920b57cec5SDimitry Andric   case 0:
7930b57cec5SDimitry Andric   case ' ':
7940b57cec5SDimitry Andric   case '\t':
7950b57cec5SDimitry Andric     IsAtStartOfStatement = OldIsAtStartOfStatement;
7960b57cec5SDimitry Andric     while (*CurPtr == ' ' || *CurPtr == '\t')
7970b57cec5SDimitry Andric       CurPtr++;
7980b57cec5SDimitry Andric     if (SkipSpace)
7990b57cec5SDimitry Andric       return LexToken(); // Ignore whitespace.
8000b57cec5SDimitry Andric     else
8010b57cec5SDimitry Andric       return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
8020b57cec5SDimitry Andric   case '\r': {
8030b57cec5SDimitry Andric     IsAtStartOfLine = true;
8040b57cec5SDimitry Andric     IsAtStartOfStatement = true;
8050b57cec5SDimitry Andric     // If this is a CR followed by LF, treat that as one token.
8060b57cec5SDimitry Andric     if (CurPtr != CurBuf.end() && *CurPtr == '\n')
8070b57cec5SDimitry Andric       ++CurPtr;
8080b57cec5SDimitry Andric     return AsmToken(AsmToken::EndOfStatement,
8090b57cec5SDimitry Andric                     StringRef(TokStart, CurPtr - TokStart));
8100b57cec5SDimitry Andric   }
8110b57cec5SDimitry Andric   case '\n':
8120b57cec5SDimitry Andric     IsAtStartOfLine = true;
8130b57cec5SDimitry Andric     IsAtStartOfStatement = true;
8140b57cec5SDimitry Andric     return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
8150b57cec5SDimitry Andric   case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
8160b57cec5SDimitry Andric   case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
8170b57cec5SDimitry Andric   case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
8180b57cec5SDimitry Andric   case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
8190b57cec5SDimitry Andric   case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
8200b57cec5SDimitry Andric   case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
8210b57cec5SDimitry Andric   case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
8220b57cec5SDimitry Andric   case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
8230b57cec5SDimitry Andric   case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
8240b57cec5SDimitry Andric   case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
8250b57cec5SDimitry Andric   case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
826fe6060f1SDimitry Andric   case '$': {
827fe6060f1SDimitry Andric     if (LexMotorolaIntegers && isHexDigit(*CurPtr))
828fe6060f1SDimitry Andric       return LexDigit();
829fe6060f1SDimitry Andric     if (MAI.doesAllowDollarAtStartOfIdentifier())
830fe6060f1SDimitry Andric       return LexIdentifier();
831fe6060f1SDimitry Andric     return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
832fe6060f1SDimitry Andric   }
833fe6060f1SDimitry Andric   case '@': {
834fe6060f1SDimitry Andric     if (MAI.doesAllowAtAtStartOfIdentifier())
835fe6060f1SDimitry Andric       return LexIdentifier();
836fe6060f1SDimitry Andric     return AsmToken(AsmToken::At, StringRef(TokStart, 1));
837fe6060f1SDimitry Andric   }
8380b57cec5SDimitry Andric   case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
8390b57cec5SDimitry Andric   case '=':
8400b57cec5SDimitry Andric     if (*CurPtr == '=') {
8410b57cec5SDimitry Andric       ++CurPtr;
8420b57cec5SDimitry Andric       return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
8430b57cec5SDimitry Andric     }
8440b57cec5SDimitry Andric     return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
8450b57cec5SDimitry Andric   case '-':
8460b57cec5SDimitry Andric     if (*CurPtr == '>') {
8470b57cec5SDimitry Andric       ++CurPtr;
8480b57cec5SDimitry Andric       return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2));
8490b57cec5SDimitry Andric     }
8500b57cec5SDimitry Andric     return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
8510b57cec5SDimitry Andric   case '|':
8520b57cec5SDimitry Andric     if (*CurPtr == '|') {
8530b57cec5SDimitry Andric       ++CurPtr;
8540b57cec5SDimitry Andric       return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
8550b57cec5SDimitry Andric     }
8560b57cec5SDimitry Andric     return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
8570b57cec5SDimitry Andric   case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
8580b57cec5SDimitry Andric   case '&':
8590b57cec5SDimitry Andric     if (*CurPtr == '&') {
8600b57cec5SDimitry Andric       ++CurPtr;
8610b57cec5SDimitry Andric       return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
8620b57cec5SDimitry Andric     }
8630b57cec5SDimitry Andric     return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
8640b57cec5SDimitry Andric   case '!':
8650b57cec5SDimitry Andric     if (*CurPtr == '=') {
8660b57cec5SDimitry Andric       ++CurPtr;
8670b57cec5SDimitry Andric       return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
8680b57cec5SDimitry Andric     }
8690b57cec5SDimitry Andric     return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
8700b57cec5SDimitry Andric   case '%':
871fe6060f1SDimitry Andric     if (LexMotorolaIntegers && (*CurPtr == '0' || *CurPtr == '1')) {
872fe6060f1SDimitry Andric       return LexDigit();
873fe6060f1SDimitry Andric     }
874fe6060f1SDimitry Andric 
8750b57cec5SDimitry Andric     if (MAI.hasMipsExpressions()) {
8760b57cec5SDimitry Andric       AsmToken::TokenKind Operator;
8770b57cec5SDimitry Andric       unsigned OperatorLength;
8780b57cec5SDimitry Andric 
8790b57cec5SDimitry Andric       std::tie(Operator, OperatorLength) =
8800b57cec5SDimitry Andric           StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>(
8810b57cec5SDimitry Andric               StringRef(CurPtr))
8820b57cec5SDimitry Andric               .StartsWith("call16", {AsmToken::PercentCall16, 7})
8830b57cec5SDimitry Andric               .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
8840b57cec5SDimitry Andric               .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
8850b57cec5SDimitry Andric               .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
8860b57cec5SDimitry Andric               .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
8870b57cec5SDimitry Andric               .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
8880b57cec5SDimitry Andric               .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
8890b57cec5SDimitry Andric               .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
8900b57cec5SDimitry Andric               .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
8910b57cec5SDimitry Andric               .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
8920b57cec5SDimitry Andric               .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
8930b57cec5SDimitry Andric               .StartsWith("got", {AsmToken::PercentGot, 4})
8940b57cec5SDimitry Andric               .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
8950b57cec5SDimitry Andric               .StartsWith("higher", {AsmToken::PercentHigher, 7})
8960b57cec5SDimitry Andric               .StartsWith("highest", {AsmToken::PercentHighest, 8})
8970b57cec5SDimitry Andric               .StartsWith("hi", {AsmToken::PercentHi, 3})
8980b57cec5SDimitry Andric               .StartsWith("lo", {AsmToken::PercentLo, 3})
8990b57cec5SDimitry Andric               .StartsWith("neg", {AsmToken::PercentNeg, 4})
9000b57cec5SDimitry Andric               .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
9010b57cec5SDimitry Andric               .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
9020b57cec5SDimitry Andric               .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
9030b57cec5SDimitry Andric               .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
9040b57cec5SDimitry Andric               .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
9050b57cec5SDimitry Andric               .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
9060b57cec5SDimitry Andric               .Default({AsmToken::Percent, 1});
9070b57cec5SDimitry Andric 
9080b57cec5SDimitry Andric       if (Operator != AsmToken::Percent) {
9090b57cec5SDimitry Andric         CurPtr += OperatorLength - 1;
9100b57cec5SDimitry Andric         return AsmToken(Operator, StringRef(TokStart, OperatorLength));
9110b57cec5SDimitry Andric       }
9120b57cec5SDimitry Andric     }
9130b57cec5SDimitry Andric     return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
9140b57cec5SDimitry Andric   case '/':
9150b57cec5SDimitry Andric     IsAtStartOfStatement = OldIsAtStartOfStatement;
9160b57cec5SDimitry Andric     return LexSlash();
917fe6060f1SDimitry Andric   case '#': {
918fe6060f1SDimitry Andric     if (MAI.doesAllowHashAtStartOfIdentifier())
919fe6060f1SDimitry Andric       return LexIdentifier();
920fe6060f1SDimitry Andric     return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
921fe6060f1SDimitry Andric   }
9220b57cec5SDimitry Andric   case '\'': return LexSingleQuote();
9230b57cec5SDimitry Andric   case '"': return LexQuote();
9240b57cec5SDimitry Andric   case '0': case '1': case '2': case '3': case '4':
9250b57cec5SDimitry Andric   case '5': case '6': case '7': case '8': case '9':
9260b57cec5SDimitry Andric     return LexDigit();
9270b57cec5SDimitry Andric   case '<':
9280b57cec5SDimitry Andric     switch (*CurPtr) {
9290b57cec5SDimitry Andric     case '<':
9300b57cec5SDimitry Andric       ++CurPtr;
9310b57cec5SDimitry Andric       return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2));
9320b57cec5SDimitry Andric     case '=':
9330b57cec5SDimitry Andric       ++CurPtr;
9340b57cec5SDimitry Andric       return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2));
9350b57cec5SDimitry Andric     case '>':
9360b57cec5SDimitry Andric       ++CurPtr;
9370b57cec5SDimitry Andric       return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2));
9380b57cec5SDimitry Andric     default:
9390b57cec5SDimitry Andric       return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
9400b57cec5SDimitry Andric     }
9410b57cec5SDimitry Andric   case '>':
9420b57cec5SDimitry Andric     switch (*CurPtr) {
9430b57cec5SDimitry Andric     case '>':
9440b57cec5SDimitry Andric       ++CurPtr;
9450b57cec5SDimitry Andric       return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2));
9460b57cec5SDimitry Andric     case '=':
9470b57cec5SDimitry Andric       ++CurPtr;
9480b57cec5SDimitry Andric       return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2));
9490b57cec5SDimitry Andric     default:
9500b57cec5SDimitry Andric       return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
9510b57cec5SDimitry Andric     }
9520b57cec5SDimitry Andric 
9530b57cec5SDimitry Andric   // TODO: Quoted identifiers (objc methods etc)
9540b57cec5SDimitry Andric   // local labels: [0-9][:]
9550b57cec5SDimitry Andric   // Forward/backward labels: [0-9][fb]
9560b57cec5SDimitry Andric   // Integers, fp constants, character constants.
9570b57cec5SDimitry Andric   }
9580b57cec5SDimitry Andric }
959