1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This class implements the lexer for assembly files. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/MC/MCParser/AsmLexer.h" 14 #include "llvm/ADT/APInt.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/StringExtras.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/MC/MCAsmInfo.h" 20 #include "llvm/MC/MCParser/MCAsmLexer.h" 21 #include "llvm/Support/SMLoc.h" 22 #include "llvm/Support/SaveAndRestore.h" 23 #include <cassert> 24 #include <cctype> 25 #include <cstdio> 26 #include <cstring> 27 #include <string> 28 #include <tuple> 29 #include <utility> 30 31 using namespace llvm; 32 33 AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { 34 AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); 35 } 36 37 AsmLexer::~AsmLexer() = default; 38 39 void AsmLexer::setBuffer(StringRef Buf, const char *ptr) { 40 CurBuf = Buf; 41 42 if (ptr) 43 CurPtr = ptr; 44 else 45 CurPtr = CurBuf.begin(); 46 47 TokStart = nullptr; 48 } 49 50 /// ReturnError - Set the error to the specified string at the specified 51 /// location. This is defined to always return AsmToken::Error. 52 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { 53 SetError(SMLoc::getFromPointer(Loc), Msg); 54 55 return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc)); 56 } 57 58 int AsmLexer::getNextChar() { 59 if (CurPtr == CurBuf.end()) 60 return EOF; 61 return (unsigned char)*CurPtr++; 62 } 63 64 /// The leading integral digit sequence and dot should have already been 65 /// consumed, some or all of the fractional digit sequence *can* have been 66 /// consumed. 67 AsmToken AsmLexer::LexFloatLiteral() { 68 // Skip the fractional digit sequence. 69 while (isDigit(*CurPtr)) 70 ++CurPtr; 71 72 if (*CurPtr == '-' || *CurPtr == '+') 73 return ReturnError(CurPtr, "Invalid sign in float literal"); 74 75 // Check for exponent 76 if ((*CurPtr == 'e' || *CurPtr == 'E')) { 77 ++CurPtr; 78 79 if (*CurPtr == '-' || *CurPtr == '+') 80 ++CurPtr; 81 82 while (isDigit(*CurPtr)) 83 ++CurPtr; 84 } 85 86 return AsmToken(AsmToken::Real, 87 StringRef(TokStart, CurPtr - TokStart)); 88 } 89 90 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ 91 /// while making sure there are enough actual digits around for the constant to 92 /// be valid. 93 /// 94 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed 95 /// before we get here. 96 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { 97 assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') && 98 "unexpected parse state in floating hex"); 99 bool NoFracDigits = true; 100 101 // Skip the fractional part if there is one 102 if (*CurPtr == '.') { 103 ++CurPtr; 104 105 const char *FracStart = CurPtr; 106 while (isHexDigit(*CurPtr)) 107 ++CurPtr; 108 109 NoFracDigits = CurPtr == FracStart; 110 } 111 112 if (NoIntDigits && NoFracDigits) 113 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 114 "expected at least one significand digit"); 115 116 // Make sure we do have some kind of proper exponent part 117 if (*CurPtr != 'p' && *CurPtr != 'P') 118 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 119 "expected exponent part 'p'"); 120 ++CurPtr; 121 122 if (*CurPtr == '+' || *CurPtr == '-') 123 ++CurPtr; 124 125 // N.b. exponent digits are *not* hex 126 const char *ExpStart = CurPtr; 127 while (isDigit(*CurPtr)) 128 ++CurPtr; 129 130 if (CurPtr == ExpStart) 131 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 132 "expected at least one exponent digit"); 133 134 return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); 135 } 136 137 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* 138 static bool IsIdentifierChar(char c, bool AllowAt) { 139 return isAlnum(c) || c == '_' || c == '$' || c == '.' || 140 (c == '@' && AllowAt) || c == '?'; 141 } 142 143 AsmToken AsmLexer::LexIdentifier() { 144 // Check for floating point literals. 145 if (CurPtr[-1] == '.' && isDigit(*CurPtr)) { 146 // Disambiguate a .1243foo identifier from a floating literal. 147 while (isDigit(*CurPtr)) 148 ++CurPtr; 149 150 if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) || 151 *CurPtr == 'e' || *CurPtr == 'E') 152 return LexFloatLiteral(); 153 } 154 155 while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) 156 ++CurPtr; 157 158 // Handle . as a special case. 159 if (CurPtr == TokStart+1 && TokStart[0] == '.') 160 return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); 161 162 return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); 163 } 164 165 /// LexSlash: Slash: / 166 /// C-Style Comment: /* ... */ 167 AsmToken AsmLexer::LexSlash() { 168 switch (*CurPtr) { 169 case '*': 170 IsAtStartOfStatement = false; 171 break; // C style comment. 172 case '/': 173 ++CurPtr; 174 return LexLineComment(); 175 default: 176 IsAtStartOfStatement = false; 177 return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); 178 } 179 180 // C Style comment. 181 ++CurPtr; // skip the star. 182 const char *CommentTextStart = CurPtr; 183 while (CurPtr != CurBuf.end()) { 184 switch (*CurPtr++) { 185 case '*': 186 // End of the comment? 187 if (*CurPtr != '/') 188 break; 189 // If we have a CommentConsumer, notify it about the comment. 190 if (CommentConsumer) { 191 CommentConsumer->HandleComment( 192 SMLoc::getFromPointer(CommentTextStart), 193 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); 194 } 195 ++CurPtr; // End the */. 196 return AsmToken(AsmToken::Comment, 197 StringRef(TokStart, CurPtr - TokStart)); 198 } 199 } 200 return ReturnError(TokStart, "unterminated comment"); 201 } 202 203 /// LexLineComment: Comment: #[^\n]* 204 /// : //[^\n]* 205 AsmToken AsmLexer::LexLineComment() { 206 // Mark This as an end of statement with a body of the 207 // comment. While it would be nicer to leave this two tokens, 208 // backwards compatability with TargetParsers makes keeping this in this form 209 // better. 210 const char *CommentTextStart = CurPtr; 211 int CurChar = getNextChar(); 212 while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) 213 CurChar = getNextChar(); 214 if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n') 215 ++CurPtr; 216 217 // If we have a CommentConsumer, notify it about the comment. 218 if (CommentConsumer) { 219 CommentConsumer->HandleComment( 220 SMLoc::getFromPointer(CommentTextStart), 221 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); 222 } 223 224 IsAtStartOfLine = true; 225 // This is a whole line comment. leave newline 226 if (IsAtStartOfStatement) 227 return AsmToken(AsmToken::EndOfStatement, 228 StringRef(TokStart, CurPtr - TokStart)); 229 IsAtStartOfStatement = true; 230 231 return AsmToken(AsmToken::EndOfStatement, 232 StringRef(TokStart, CurPtr - 1 - TokStart)); 233 } 234 235 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { 236 // Skip ULL, UL, U, L and LL suffices. 237 if (CurPtr[0] == 'U') 238 ++CurPtr; 239 if (CurPtr[0] == 'L') 240 ++CurPtr; 241 if (CurPtr[0] == 'L') 242 ++CurPtr; 243 } 244 245 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the 246 // integer as a hexadecimal, possibly with leading zeroes. 247 static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, 248 bool LexHex) { 249 const char *FirstNonDec = nullptr; 250 const char *LookAhead = CurPtr; 251 while (true) { 252 if (isDigit(*LookAhead)) { 253 ++LookAhead; 254 } else { 255 if (!FirstNonDec) 256 FirstNonDec = LookAhead; 257 258 // Keep going if we are looking for a 'h' suffix. 259 if (LexHex && isHexDigit(*LookAhead)) 260 ++LookAhead; 261 else 262 break; 263 } 264 } 265 bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H'); 266 CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec; 267 if (isHex) 268 return 16; 269 return DefaultRadix; 270 } 271 272 static AsmToken intToken(StringRef Ref, APInt &Value) 273 { 274 if (Value.isIntN(64)) 275 return AsmToken(AsmToken::Integer, Ref, Value); 276 return AsmToken(AsmToken::BigNum, Ref, Value); 277 } 278 279 /// LexDigit: First character is [0-9]. 280 /// Local Label: [0-9][:] 281 /// Forward/Backward Label: [0-9][fb] 282 /// Binary integer: 0b[01]+ 283 /// Octal integer: 0[0-7]+ 284 /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] 285 /// Decimal integer: [1-9][0-9]* 286 AsmToken AsmLexer::LexDigit() { 287 // MASM-flavor binary integer: [01]+[bB] 288 // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH] 289 if (LexMasmIntegers && isdigit(CurPtr[-1])) { 290 const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? 291 CurPtr - 1 : nullptr; 292 const char *OldCurPtr = CurPtr; 293 while (isHexDigit(*CurPtr)) { 294 if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary) 295 FirstNonBinary = CurPtr; 296 ++CurPtr; 297 } 298 299 unsigned Radix = 0; 300 if (*CurPtr == 'h' || *CurPtr == 'H') { 301 // hexadecimal number 302 ++CurPtr; 303 Radix = 16; 304 } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr && 305 (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) 306 Radix = 2; 307 308 if (Radix == 2 || Radix == 16) { 309 StringRef Result(TokStart, CurPtr - TokStart); 310 APInt Value(128, 0, true); 311 312 if (Result.drop_back().getAsInteger(Radix, Value)) 313 return ReturnError(TokStart, Radix == 2 ? "invalid binary number" : 314 "invalid hexdecimal number"); 315 316 // MSVC accepts and ignores type suffices on integer literals. 317 SkipIgnoredIntegerSuffix(CurPtr); 318 319 return intToken(Result, Value); 320 } 321 322 // octal/decimal integers, or floating point numbers, fall through 323 CurPtr = OldCurPtr; 324 } 325 326 // Decimal integer: [1-9][0-9]* 327 if (CurPtr[-1] != '0' || CurPtr[0] == '.') { 328 unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers); 329 bool isHex = Radix == 16; 330 // Check for floating point literals. 331 if (!isHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) { 332 if (*CurPtr == '.') 333 ++CurPtr; 334 return LexFloatLiteral(); 335 } 336 337 StringRef Result(TokStart, CurPtr - TokStart); 338 339 APInt Value(128, 0, true); 340 if (Result.getAsInteger(Radix, Value)) 341 return ReturnError(TokStart, !isHex ? "invalid decimal number" : 342 "invalid hexdecimal number"); 343 344 // Consume the [hH]. 345 if (LexMasmIntegers && Radix == 16) 346 ++CurPtr; 347 348 // The darwin/x86 (and x86-64) assembler accepts and ignores type 349 // suffices on integer literals. 350 SkipIgnoredIntegerSuffix(CurPtr); 351 352 return intToken(Result, Value); 353 } 354 355 if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) { 356 ++CurPtr; 357 // See if we actually have "0b" as part of something like "jmp 0b\n" 358 if (!isDigit(CurPtr[0])) { 359 --CurPtr; 360 StringRef Result(TokStart, CurPtr - TokStart); 361 return AsmToken(AsmToken::Integer, Result, 0); 362 } 363 const char *NumStart = CurPtr; 364 while (CurPtr[0] == '0' || CurPtr[0] == '1') 365 ++CurPtr; 366 367 // Requires at least one binary digit. 368 if (CurPtr == NumStart) 369 return ReturnError(TokStart, "invalid binary number"); 370 371 StringRef Result(TokStart, CurPtr - TokStart); 372 373 APInt Value(128, 0, true); 374 if (Result.substr(2).getAsInteger(2, Value)) 375 return ReturnError(TokStart, "invalid binary number"); 376 377 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 378 // suffixes on integer literals. 379 SkipIgnoredIntegerSuffix(CurPtr); 380 381 return intToken(Result, Value); 382 } 383 384 if ((*CurPtr == 'x') || (*CurPtr == 'X')) { 385 ++CurPtr; 386 const char *NumStart = CurPtr; 387 while (isHexDigit(CurPtr[0])) 388 ++CurPtr; 389 390 // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be 391 // diagnosed by LexHexFloatLiteral). 392 if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P') 393 return LexHexFloatLiteral(NumStart == CurPtr); 394 395 // Otherwise requires at least one hex digit. 396 if (CurPtr == NumStart) 397 return ReturnError(CurPtr-2, "invalid hexadecimal number"); 398 399 APInt Result(128, 0); 400 if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) 401 return ReturnError(TokStart, "invalid hexadecimal number"); 402 403 // Consume the optional [hH]. 404 if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H')) 405 ++CurPtr; 406 407 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 408 // suffixes on integer literals. 409 SkipIgnoredIntegerSuffix(CurPtr); 410 411 return intToken(StringRef(TokStart, CurPtr - TokStart), Result); 412 } 413 414 // Either octal or hexadecimal. 415 APInt Value(128, 0, true); 416 unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers); 417 bool isHex = Radix == 16; 418 StringRef Result(TokStart, CurPtr - TokStart); 419 if (Result.getAsInteger(Radix, Value)) 420 return ReturnError(TokStart, !isHex ? "invalid octal number" : 421 "invalid hexdecimal number"); 422 423 // Consume the [hH]. 424 if (Radix == 16) 425 ++CurPtr; 426 427 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 428 // suffixes on integer literals. 429 SkipIgnoredIntegerSuffix(CurPtr); 430 431 return intToken(Result, Value); 432 } 433 434 /// LexSingleQuote: Integer: 'b' 435 AsmToken AsmLexer::LexSingleQuote() { 436 int CurChar = getNextChar(); 437 438 if (CurChar == '\\') 439 CurChar = getNextChar(); 440 441 if (CurChar == EOF) 442 return ReturnError(TokStart, "unterminated single quote"); 443 444 CurChar = getNextChar(); 445 446 if (CurChar != '\'') 447 return ReturnError(TokStart, "single quote way too long"); 448 449 // The idea here being that 'c' is basically just an integral 450 // constant. 451 StringRef Res = StringRef(TokStart,CurPtr - TokStart); 452 long long Value; 453 454 if (Res.startswith("\'\\")) { 455 char theChar = Res[2]; 456 switch (theChar) { 457 default: Value = theChar; break; 458 case '\'': Value = '\''; break; 459 case 't': Value = '\t'; break; 460 case 'n': Value = '\n'; break; 461 case 'b': Value = '\b'; break; 462 } 463 } else 464 Value = TokStart[1]; 465 466 return AsmToken(AsmToken::Integer, Res, Value); 467 } 468 469 /// LexQuote: String: "..." 470 AsmToken AsmLexer::LexQuote() { 471 int CurChar = getNextChar(); 472 // TODO: does gas allow multiline string constants? 473 while (CurChar != '"') { 474 if (CurChar == '\\') { 475 // Allow \", etc. 476 CurChar = getNextChar(); 477 } 478 479 if (CurChar == EOF) 480 return ReturnError(TokStart, "unterminated string constant"); 481 482 CurChar = getNextChar(); 483 } 484 485 return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); 486 } 487 488 StringRef AsmLexer::LexUntilEndOfStatement() { 489 TokStart = CurPtr; 490 491 while (!isAtStartOfComment(CurPtr) && // Start of line comment. 492 !isAtStatementSeparator(CurPtr) && // End of statement marker. 493 *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { 494 ++CurPtr; 495 } 496 return StringRef(TokStart, CurPtr-TokStart); 497 } 498 499 StringRef AsmLexer::LexUntilEndOfLine() { 500 TokStart = CurPtr; 501 502 while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { 503 ++CurPtr; 504 } 505 return StringRef(TokStart, CurPtr-TokStart); 506 } 507 508 size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, 509 bool ShouldSkipSpace) { 510 SaveAndRestore<const char *> SavedTokenStart(TokStart); 511 SaveAndRestore<const char *> SavedCurPtr(CurPtr); 512 SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine); 513 SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement); 514 SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace); 515 SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true); 516 std::string SavedErr = getErr(); 517 SMLoc SavedErrLoc = getErrLoc(); 518 519 size_t ReadCount; 520 for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { 521 AsmToken Token = LexToken(); 522 523 Buf[ReadCount] = Token; 524 525 if (Token.is(AsmToken::Eof)) 526 break; 527 } 528 529 SetError(SavedErrLoc, SavedErr); 530 return ReadCount; 531 } 532 533 bool AsmLexer::isAtStartOfComment(const char *Ptr) { 534 StringRef CommentString = MAI.getCommentString(); 535 536 if (CommentString.size() == 1) 537 return CommentString[0] == Ptr[0]; 538 539 // Allow # preprocessor commments also be counted as comments for "##" cases 540 if (CommentString[1] == '#') 541 return CommentString[0] == Ptr[0]; 542 543 return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0; 544 } 545 546 bool AsmLexer::isAtStatementSeparator(const char *Ptr) { 547 return strncmp(Ptr, MAI.getSeparatorString(), 548 strlen(MAI.getSeparatorString())) == 0; 549 } 550 551 AsmToken AsmLexer::LexToken() { 552 TokStart = CurPtr; 553 // This always consumes at least one character. 554 int CurChar = getNextChar(); 555 556 if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) { 557 // If this starts with a '#', this may be a cpp 558 // hash directive and otherwise a line comment. 559 AsmToken TokenBuf[2]; 560 MutableArrayRef<AsmToken> Buf(TokenBuf, 2); 561 size_t num = peekTokens(Buf, true); 562 // There cannot be a space preceding this 563 if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) && 564 TokenBuf[1].is(AsmToken::String)) { 565 CurPtr = TokStart; // reset curPtr; 566 StringRef s = LexUntilEndOfLine(); 567 UnLex(TokenBuf[1]); 568 UnLex(TokenBuf[0]); 569 return AsmToken(AsmToken::HashDirective, s); 570 } 571 return LexLineComment(); 572 } 573 574 if (isAtStartOfComment(TokStart)) 575 return LexLineComment(); 576 577 if (isAtStatementSeparator(TokStart)) { 578 CurPtr += strlen(MAI.getSeparatorString()) - 1; 579 IsAtStartOfLine = true; 580 IsAtStartOfStatement = true; 581 return AsmToken(AsmToken::EndOfStatement, 582 StringRef(TokStart, strlen(MAI.getSeparatorString()))); 583 } 584 585 // If we're missing a newline at EOF, make sure we still get an 586 // EndOfStatement token before the Eof token. 587 if (CurChar == EOF && !IsAtStartOfStatement) { 588 IsAtStartOfLine = true; 589 IsAtStartOfStatement = true; 590 return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 591 } 592 IsAtStartOfLine = false; 593 bool OldIsAtStartOfStatement = IsAtStartOfStatement; 594 IsAtStartOfStatement = false; 595 switch (CurChar) { 596 default: 597 // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* 598 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') 599 return LexIdentifier(); 600 601 // Unknown character, emit an error. 602 return ReturnError(TokStart, "invalid character in input"); 603 case EOF: 604 IsAtStartOfLine = true; 605 IsAtStartOfStatement = true; 606 return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); 607 case 0: 608 case ' ': 609 case '\t': 610 IsAtStartOfStatement = OldIsAtStartOfStatement; 611 while (*CurPtr == ' ' || *CurPtr == '\t') 612 CurPtr++; 613 if (SkipSpace) 614 return LexToken(); // Ignore whitespace. 615 else 616 return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); 617 case '\r': { 618 IsAtStartOfLine = true; 619 IsAtStartOfStatement = true; 620 // If this is a CR followed by LF, treat that as one token. 621 if (CurPtr != CurBuf.end() && *CurPtr == '\n') 622 ++CurPtr; 623 return AsmToken(AsmToken::EndOfStatement, 624 StringRef(TokStart, CurPtr - TokStart)); 625 } 626 case '\n': 627 IsAtStartOfLine = true; 628 IsAtStartOfStatement = true; 629 return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 630 case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); 631 case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); 632 case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); 633 case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); 634 case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); 635 case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); 636 case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); 637 case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); 638 case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); 639 case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); 640 case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); 641 case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); 642 case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); 643 case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); 644 case '=': 645 if (*CurPtr == '=') { 646 ++CurPtr; 647 return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); 648 } 649 return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); 650 case '-': 651 if (*CurPtr == '>') { 652 ++CurPtr; 653 return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2)); 654 } 655 return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); 656 case '|': 657 if (*CurPtr == '|') { 658 ++CurPtr; 659 return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); 660 } 661 return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); 662 case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); 663 case '&': 664 if (*CurPtr == '&') { 665 ++CurPtr; 666 return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); 667 } 668 return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); 669 case '!': 670 if (*CurPtr == '=') { 671 ++CurPtr; 672 return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); 673 } 674 return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); 675 case '%': 676 if (MAI.hasMipsExpressions()) { 677 AsmToken::TokenKind Operator; 678 unsigned OperatorLength; 679 680 std::tie(Operator, OperatorLength) = 681 StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>( 682 StringRef(CurPtr)) 683 .StartsWith("call16", {AsmToken::PercentCall16, 7}) 684 .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8}) 685 .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8}) 686 .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10}) 687 .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10}) 688 .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9}) 689 .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7}) 690 .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7}) 691 .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9}) 692 .StartsWith("got_page", {AsmToken::PercentGot_Page, 9}) 693 .StartsWith("gottprel", {AsmToken::PercentGottprel, 9}) 694 .StartsWith("got", {AsmToken::PercentGot, 4}) 695 .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7}) 696 .StartsWith("higher", {AsmToken::PercentHigher, 7}) 697 .StartsWith("highest", {AsmToken::PercentHighest, 8}) 698 .StartsWith("hi", {AsmToken::PercentHi, 3}) 699 .StartsWith("lo", {AsmToken::PercentLo, 3}) 700 .StartsWith("neg", {AsmToken::PercentNeg, 4}) 701 .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9}) 702 .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9}) 703 .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6}) 704 .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7}) 705 .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9}) 706 .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9}) 707 .Default({AsmToken::Percent, 1}); 708 709 if (Operator != AsmToken::Percent) { 710 CurPtr += OperatorLength - 1; 711 return AsmToken(Operator, StringRef(TokStart, OperatorLength)); 712 } 713 } 714 return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); 715 case '/': 716 IsAtStartOfStatement = OldIsAtStartOfStatement; 717 return LexSlash(); 718 case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 719 case '\'': return LexSingleQuote(); 720 case '"': return LexQuote(); 721 case '0': case '1': case '2': case '3': case '4': 722 case '5': case '6': case '7': case '8': case '9': 723 return LexDigit(); 724 case '<': 725 switch (*CurPtr) { 726 case '<': 727 ++CurPtr; 728 return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2)); 729 case '=': 730 ++CurPtr; 731 return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2)); 732 case '>': 733 ++CurPtr; 734 return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2)); 735 default: 736 return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); 737 } 738 case '>': 739 switch (*CurPtr) { 740 case '>': 741 ++CurPtr; 742 return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2)); 743 case '=': 744 ++CurPtr; 745 return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2)); 746 default: 747 return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); 748 } 749 750 // TODO: Quoted identifiers (objc methods etc) 751 // local labels: [0-9][:] 752 // Forward/backward labels: [0-9][fb] 753 // Integers, fp constants, character constants. 754 } 755 } 756