1 //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This class implements the lexer for assembly files. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/MC/MCParser/AsmLexer.h" 14 #include "llvm/ADT/APInt.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/StringExtras.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/MC/MCAsmInfo.h" 20 #include "llvm/MC/MCParser/MCAsmLexer.h" 21 #include "llvm/Support/SMLoc.h" 22 #include "llvm/Support/SaveAndRestore.h" 23 #include <cassert> 24 #include <cctype> 25 #include <cstdio> 26 #include <cstring> 27 #include <string> 28 #include <tuple> 29 #include <utility> 30 31 using namespace llvm; 32 33 AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { 34 AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); 35 } 36 37 AsmLexer::~AsmLexer() = default; 38 39 void AsmLexer::setBuffer(StringRef Buf, const char *ptr, 40 bool EndStatementAtEOF) { 41 CurBuf = Buf; 42 43 if (ptr) 44 CurPtr = ptr; 45 else 46 CurPtr = CurBuf.begin(); 47 48 TokStart = nullptr; 49 this->EndStatementAtEOF = EndStatementAtEOF; 50 } 51 52 /// ReturnError - Set the error to the specified string at the specified 53 /// location. This is defined to always return AsmToken::Error. 54 AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { 55 SetError(SMLoc::getFromPointer(Loc), Msg); 56 57 return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc)); 58 } 59 60 int AsmLexer::getNextChar() { 61 if (CurPtr == CurBuf.end()) 62 return EOF; 63 return (unsigned char)*CurPtr++; 64 } 65 66 /// The leading integral digit sequence and dot should have already been 67 /// consumed, some or all of the fractional digit sequence *can* have been 68 /// consumed. 69 AsmToken AsmLexer::LexFloatLiteral() { 70 // Skip the fractional digit sequence. 71 while (isDigit(*CurPtr)) 72 ++CurPtr; 73 74 if (*CurPtr == '-' || *CurPtr == '+') 75 return ReturnError(CurPtr, "Invalid sign in float literal"); 76 77 // Check for exponent 78 if ((*CurPtr == 'e' || *CurPtr == 'E')) { 79 ++CurPtr; 80 81 if (*CurPtr == '-' || *CurPtr == '+') 82 ++CurPtr; 83 84 while (isDigit(*CurPtr)) 85 ++CurPtr; 86 } 87 88 return AsmToken(AsmToken::Real, 89 StringRef(TokStart, CurPtr - TokStart)); 90 } 91 92 /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ 93 /// while making sure there are enough actual digits around for the constant to 94 /// be valid. 95 /// 96 /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed 97 /// before we get here. 98 AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { 99 assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') && 100 "unexpected parse state in floating hex"); 101 bool NoFracDigits = true; 102 103 // Skip the fractional part if there is one 104 if (*CurPtr == '.') { 105 ++CurPtr; 106 107 const char *FracStart = CurPtr; 108 while (isHexDigit(*CurPtr)) 109 ++CurPtr; 110 111 NoFracDigits = CurPtr == FracStart; 112 } 113 114 if (NoIntDigits && NoFracDigits) 115 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 116 "expected at least one significand digit"); 117 118 // Make sure we do have some kind of proper exponent part 119 if (*CurPtr != 'p' && *CurPtr != 'P') 120 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 121 "expected exponent part 'p'"); 122 ++CurPtr; 123 124 if (*CurPtr == '+' || *CurPtr == '-') 125 ++CurPtr; 126 127 // N.b. exponent digits are *not* hex 128 const char *ExpStart = CurPtr; 129 while (isDigit(*CurPtr)) 130 ++CurPtr; 131 132 if (CurPtr == ExpStart) 133 return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " 134 "expected at least one exponent digit"); 135 136 return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); 137 } 138 139 /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* 140 static bool IsIdentifierChar(char c, bool AllowAt) { 141 return isAlnum(c) || c == '_' || c == '$' || c == '.' || 142 (c == '@' && AllowAt) || c == '?'; 143 } 144 145 AsmToken AsmLexer::LexIdentifier() { 146 // Check for floating point literals. 147 if (CurPtr[-1] == '.' && isDigit(*CurPtr)) { 148 // Disambiguate a .1243foo identifier from a floating literal. 149 while (isDigit(*CurPtr)) 150 ++CurPtr; 151 152 if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) || 153 *CurPtr == 'e' || *CurPtr == 'E') 154 return LexFloatLiteral(); 155 } 156 157 while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) 158 ++CurPtr; 159 160 // Handle . as a special case. 161 if (CurPtr == TokStart+1 && TokStart[0] == '.') 162 return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); 163 164 return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); 165 } 166 167 /// LexSlash: Slash: / 168 /// C-Style Comment: /* ... */ 169 AsmToken AsmLexer::LexSlash() { 170 switch (*CurPtr) { 171 case '*': 172 IsAtStartOfStatement = false; 173 break; // C style comment. 174 case '/': 175 ++CurPtr; 176 return LexLineComment(); 177 default: 178 IsAtStartOfStatement = false; 179 return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); 180 } 181 182 // C Style comment. 183 ++CurPtr; // skip the star. 184 const char *CommentTextStart = CurPtr; 185 while (CurPtr != CurBuf.end()) { 186 switch (*CurPtr++) { 187 case '*': 188 // End of the comment? 189 if (*CurPtr != '/') 190 break; 191 // If we have a CommentConsumer, notify it about the comment. 192 if (CommentConsumer) { 193 CommentConsumer->HandleComment( 194 SMLoc::getFromPointer(CommentTextStart), 195 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); 196 } 197 ++CurPtr; // End the */. 198 return AsmToken(AsmToken::Comment, 199 StringRef(TokStart, CurPtr - TokStart)); 200 } 201 } 202 return ReturnError(TokStart, "unterminated comment"); 203 } 204 205 /// LexLineComment: Comment: #[^\n]* 206 /// : //[^\n]* 207 AsmToken AsmLexer::LexLineComment() { 208 // Mark This as an end of statement with a body of the 209 // comment. While it would be nicer to leave this two tokens, 210 // backwards compatability with TargetParsers makes keeping this in this form 211 // better. 212 const char *CommentTextStart = CurPtr; 213 int CurChar = getNextChar(); 214 while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) 215 CurChar = getNextChar(); 216 if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n') 217 ++CurPtr; 218 219 // If we have a CommentConsumer, notify it about the comment. 220 if (CommentConsumer) { 221 CommentConsumer->HandleComment( 222 SMLoc::getFromPointer(CommentTextStart), 223 StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); 224 } 225 226 IsAtStartOfLine = true; 227 // This is a whole line comment. leave newline 228 if (IsAtStartOfStatement) 229 return AsmToken(AsmToken::EndOfStatement, 230 StringRef(TokStart, CurPtr - TokStart)); 231 IsAtStartOfStatement = true; 232 233 return AsmToken(AsmToken::EndOfStatement, 234 StringRef(TokStart, CurPtr - 1 - TokStart)); 235 } 236 237 static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { 238 // Skip ULL, UL, U, L and LL suffices. 239 if (CurPtr[0] == 'U') 240 ++CurPtr; 241 if (CurPtr[0] == 'L') 242 ++CurPtr; 243 if (CurPtr[0] == 'L') 244 ++CurPtr; 245 } 246 247 // Look ahead to search for first non-hex digit, if it's [hH], then we treat the 248 // integer as a hexadecimal, possibly with leading zeroes. 249 static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix, 250 bool LexHex) { 251 const char *FirstNonDec = nullptr; 252 const char *LookAhead = CurPtr; 253 while (true) { 254 if (isDigit(*LookAhead)) { 255 ++LookAhead; 256 } else { 257 if (!FirstNonDec) 258 FirstNonDec = LookAhead; 259 260 // Keep going if we are looking for a 'h' suffix. 261 if (LexHex && isHexDigit(*LookAhead)) 262 ++LookAhead; 263 else 264 break; 265 } 266 } 267 bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H'); 268 CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec; 269 if (isHex) 270 return 16; 271 return DefaultRadix; 272 } 273 274 static AsmToken intToken(StringRef Ref, APInt &Value) 275 { 276 if (Value.isIntN(64)) 277 return AsmToken(AsmToken::Integer, Ref, Value); 278 return AsmToken(AsmToken::BigNum, Ref, Value); 279 } 280 281 /// LexDigit: First character is [0-9]. 282 /// Local Label: [0-9][:] 283 /// Forward/Backward Label: [0-9][fb] 284 /// Binary integer: 0b[01]+ 285 /// Octal integer: 0[0-7]+ 286 /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] 287 /// Decimal integer: [1-9][0-9]* 288 AsmToken AsmLexer::LexDigit() { 289 // MASM-flavor binary integer: [01]+[bB] 290 // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH] 291 if (LexMasmIntegers && isdigit(CurPtr[-1])) { 292 const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? 293 CurPtr - 1 : nullptr; 294 const char *OldCurPtr = CurPtr; 295 while (isHexDigit(*CurPtr)) { 296 if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary) 297 FirstNonBinary = CurPtr; 298 ++CurPtr; 299 } 300 301 unsigned Radix = 0; 302 if (*CurPtr == 'h' || *CurPtr == 'H') { 303 // hexadecimal number 304 ++CurPtr; 305 Radix = 16; 306 } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr && 307 (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) 308 Radix = 2; 309 310 if (Radix == 2 || Radix == 16) { 311 StringRef Result(TokStart, CurPtr - TokStart); 312 APInt Value(128, 0, true); 313 314 if (Result.drop_back().getAsInteger(Radix, Value)) 315 return ReturnError(TokStart, Radix == 2 ? "invalid binary number" : 316 "invalid hexdecimal number"); 317 318 // MSVC accepts and ignores type suffices on integer literals. 319 SkipIgnoredIntegerSuffix(CurPtr); 320 321 return intToken(Result, Value); 322 } 323 324 // octal/decimal integers, or floating point numbers, fall through 325 CurPtr = OldCurPtr; 326 } 327 328 // Decimal integer: [1-9][0-9]* 329 if (CurPtr[-1] != '0' || CurPtr[0] == '.') { 330 unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers); 331 bool isHex = Radix == 16; 332 // Check for floating point literals. 333 if (!isHex && (*CurPtr == '.' || *CurPtr == 'e' || *CurPtr == 'E')) { 334 if (*CurPtr == '.') 335 ++CurPtr; 336 return LexFloatLiteral(); 337 } 338 339 StringRef Result(TokStart, CurPtr - TokStart); 340 341 APInt Value(128, 0, true); 342 if (Result.getAsInteger(Radix, Value)) 343 return ReturnError(TokStart, !isHex ? "invalid decimal number" : 344 "invalid hexdecimal number"); 345 346 // Consume the [hH]. 347 if (LexMasmIntegers && Radix == 16) 348 ++CurPtr; 349 350 // The darwin/x86 (and x86-64) assembler accepts and ignores type 351 // suffices on integer literals. 352 SkipIgnoredIntegerSuffix(CurPtr); 353 354 return intToken(Result, Value); 355 } 356 357 if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) { 358 ++CurPtr; 359 // See if we actually have "0b" as part of something like "jmp 0b\n" 360 if (!isDigit(CurPtr[0])) { 361 --CurPtr; 362 StringRef Result(TokStart, CurPtr - TokStart); 363 return AsmToken(AsmToken::Integer, Result, 0); 364 } 365 const char *NumStart = CurPtr; 366 while (CurPtr[0] == '0' || CurPtr[0] == '1') 367 ++CurPtr; 368 369 // Requires at least one binary digit. 370 if (CurPtr == NumStart) 371 return ReturnError(TokStart, "invalid binary number"); 372 373 StringRef Result(TokStart, CurPtr - TokStart); 374 375 APInt Value(128, 0, true); 376 if (Result.substr(2).getAsInteger(2, Value)) 377 return ReturnError(TokStart, "invalid binary number"); 378 379 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 380 // suffixes on integer literals. 381 SkipIgnoredIntegerSuffix(CurPtr); 382 383 return intToken(Result, Value); 384 } 385 386 if ((*CurPtr == 'x') || (*CurPtr == 'X')) { 387 ++CurPtr; 388 const char *NumStart = CurPtr; 389 while (isHexDigit(CurPtr[0])) 390 ++CurPtr; 391 392 // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be 393 // diagnosed by LexHexFloatLiteral). 394 if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P') 395 return LexHexFloatLiteral(NumStart == CurPtr); 396 397 // Otherwise requires at least one hex digit. 398 if (CurPtr == NumStart) 399 return ReturnError(CurPtr-2, "invalid hexadecimal number"); 400 401 APInt Result(128, 0); 402 if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) 403 return ReturnError(TokStart, "invalid hexadecimal number"); 404 405 // Consume the optional [hH]. 406 if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H')) 407 ++CurPtr; 408 409 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 410 // suffixes on integer literals. 411 SkipIgnoredIntegerSuffix(CurPtr); 412 413 return intToken(StringRef(TokStart, CurPtr - TokStart), Result); 414 } 415 416 // Either octal or hexadecimal. 417 APInt Value(128, 0, true); 418 unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers); 419 bool isHex = Radix == 16; 420 StringRef Result(TokStart, CurPtr - TokStart); 421 if (Result.getAsInteger(Radix, Value)) 422 return ReturnError(TokStart, !isHex ? "invalid octal number" : 423 "invalid hexdecimal number"); 424 425 // Consume the [hH]. 426 if (Radix == 16) 427 ++CurPtr; 428 429 // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL 430 // suffixes on integer literals. 431 SkipIgnoredIntegerSuffix(CurPtr); 432 433 return intToken(Result, Value); 434 } 435 436 /// LexSingleQuote: Integer: 'b' 437 AsmToken AsmLexer::LexSingleQuote() { 438 int CurChar = getNextChar(); 439 440 if (CurChar == '\\') 441 CurChar = getNextChar(); 442 443 if (CurChar == EOF) 444 return ReturnError(TokStart, "unterminated single quote"); 445 446 CurChar = getNextChar(); 447 448 if (CurChar != '\'') 449 return ReturnError(TokStart, "single quote way too long"); 450 451 // The idea here being that 'c' is basically just an integral 452 // constant. 453 StringRef Res = StringRef(TokStart,CurPtr - TokStart); 454 long long Value; 455 456 if (Res.startswith("\'\\")) { 457 char theChar = Res[2]; 458 switch (theChar) { 459 default: Value = theChar; break; 460 case '\'': Value = '\''; break; 461 case 't': Value = '\t'; break; 462 case 'n': Value = '\n'; break; 463 case 'b': Value = '\b'; break; 464 } 465 } else 466 Value = TokStart[1]; 467 468 return AsmToken(AsmToken::Integer, Res, Value); 469 } 470 471 /// LexQuote: String: "..." 472 AsmToken AsmLexer::LexQuote() { 473 int CurChar = getNextChar(); 474 // TODO: does gas allow multiline string constants? 475 while (CurChar != '"') { 476 if (CurChar == '\\') { 477 // Allow \", etc. 478 CurChar = getNextChar(); 479 } 480 481 if (CurChar == EOF) 482 return ReturnError(TokStart, "unterminated string constant"); 483 484 CurChar = getNextChar(); 485 } 486 487 return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); 488 } 489 490 StringRef AsmLexer::LexUntilEndOfStatement() { 491 TokStart = CurPtr; 492 493 while (!isAtStartOfComment(CurPtr) && // Start of line comment. 494 !isAtStatementSeparator(CurPtr) && // End of statement marker. 495 *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { 496 ++CurPtr; 497 } 498 return StringRef(TokStart, CurPtr-TokStart); 499 } 500 501 StringRef AsmLexer::LexUntilEndOfLine() { 502 TokStart = CurPtr; 503 504 while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { 505 ++CurPtr; 506 } 507 return StringRef(TokStart, CurPtr-TokStart); 508 } 509 510 size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, 511 bool ShouldSkipSpace) { 512 SaveAndRestore<const char *> SavedTokenStart(TokStart); 513 SaveAndRestore<const char *> SavedCurPtr(CurPtr); 514 SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine); 515 SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement); 516 SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace); 517 SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true); 518 std::string SavedErr = getErr(); 519 SMLoc SavedErrLoc = getErrLoc(); 520 521 size_t ReadCount; 522 for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { 523 AsmToken Token = LexToken(); 524 525 Buf[ReadCount] = Token; 526 527 if (Token.is(AsmToken::Eof)) 528 break; 529 } 530 531 SetError(SavedErrLoc, SavedErr); 532 return ReadCount; 533 } 534 535 bool AsmLexer::isAtStartOfComment(const char *Ptr) { 536 StringRef CommentString = MAI.getCommentString(); 537 538 if (CommentString.size() == 1) 539 return CommentString[0] == Ptr[0]; 540 541 // Allow # preprocessor commments also be counted as comments for "##" cases 542 if (CommentString[1] == '#') 543 return CommentString[0] == Ptr[0]; 544 545 return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0; 546 } 547 548 bool AsmLexer::isAtStatementSeparator(const char *Ptr) { 549 return strncmp(Ptr, MAI.getSeparatorString(), 550 strlen(MAI.getSeparatorString())) == 0; 551 } 552 553 AsmToken AsmLexer::LexToken() { 554 TokStart = CurPtr; 555 // This always consumes at least one character. 556 int CurChar = getNextChar(); 557 558 if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) { 559 // If this starts with a '#', this may be a cpp 560 // hash directive and otherwise a line comment. 561 AsmToken TokenBuf[2]; 562 MutableArrayRef<AsmToken> Buf(TokenBuf, 2); 563 size_t num = peekTokens(Buf, true); 564 // There cannot be a space preceding this 565 if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) && 566 TokenBuf[1].is(AsmToken::String)) { 567 CurPtr = TokStart; // reset curPtr; 568 StringRef s = LexUntilEndOfLine(); 569 UnLex(TokenBuf[1]); 570 UnLex(TokenBuf[0]); 571 return AsmToken(AsmToken::HashDirective, s); 572 } 573 return LexLineComment(); 574 } 575 576 if (isAtStartOfComment(TokStart)) 577 return LexLineComment(); 578 579 if (isAtStatementSeparator(TokStart)) { 580 CurPtr += strlen(MAI.getSeparatorString()) - 1; 581 IsAtStartOfLine = true; 582 IsAtStartOfStatement = true; 583 return AsmToken(AsmToken::EndOfStatement, 584 StringRef(TokStart, strlen(MAI.getSeparatorString()))); 585 } 586 587 // If we're missing a newline at EOF, make sure we still get an 588 // EndOfStatement token before the Eof token. 589 if (CurChar == EOF && !IsAtStartOfStatement && EndStatementAtEOF) { 590 IsAtStartOfLine = true; 591 IsAtStartOfStatement = true; 592 return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 593 } 594 IsAtStartOfLine = false; 595 bool OldIsAtStartOfStatement = IsAtStartOfStatement; 596 IsAtStartOfStatement = false; 597 switch (CurChar) { 598 default: 599 if (MAI.doesAllowSymbolAtNameStart()) { 600 // Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@?]* 601 if (!isDigit(CurChar) && 602 IsIdentifierChar(CurChar, MAI.doesAllowAtInName())) 603 return LexIdentifier(); 604 } else { 605 // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* 606 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') 607 return LexIdentifier(); 608 } 609 610 // Unknown character, emit an error. 611 return ReturnError(TokStart, "invalid character in input"); 612 case EOF: 613 if (EndStatementAtEOF) { 614 IsAtStartOfLine = true; 615 IsAtStartOfStatement = true; 616 } 617 return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); 618 case 0: 619 case ' ': 620 case '\t': 621 IsAtStartOfStatement = OldIsAtStartOfStatement; 622 while (*CurPtr == ' ' || *CurPtr == '\t') 623 CurPtr++; 624 if (SkipSpace) 625 return LexToken(); // Ignore whitespace. 626 else 627 return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); 628 case '\r': { 629 IsAtStartOfLine = true; 630 IsAtStartOfStatement = true; 631 // If this is a CR followed by LF, treat that as one token. 632 if (CurPtr != CurBuf.end() && *CurPtr == '\n') 633 ++CurPtr; 634 return AsmToken(AsmToken::EndOfStatement, 635 StringRef(TokStart, CurPtr - TokStart)); 636 } 637 case '\n': 638 IsAtStartOfLine = true; 639 IsAtStartOfStatement = true; 640 return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); 641 case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); 642 case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); 643 case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); 644 case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); 645 case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); 646 case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); 647 case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); 648 case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); 649 case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); 650 case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); 651 case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); 652 case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); 653 case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); 654 case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); 655 case '=': 656 if (*CurPtr == '=') { 657 ++CurPtr; 658 return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); 659 } 660 return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); 661 case '-': 662 if (*CurPtr == '>') { 663 ++CurPtr; 664 return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2)); 665 } 666 return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); 667 case '|': 668 if (*CurPtr == '|') { 669 ++CurPtr; 670 return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); 671 } 672 return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); 673 case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); 674 case '&': 675 if (*CurPtr == '&') { 676 ++CurPtr; 677 return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); 678 } 679 return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); 680 case '!': 681 if (*CurPtr == '=') { 682 ++CurPtr; 683 return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); 684 } 685 return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); 686 case '%': 687 if (MAI.hasMipsExpressions()) { 688 AsmToken::TokenKind Operator; 689 unsigned OperatorLength; 690 691 std::tie(Operator, OperatorLength) = 692 StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>( 693 StringRef(CurPtr)) 694 .StartsWith("call16", {AsmToken::PercentCall16, 7}) 695 .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8}) 696 .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8}) 697 .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10}) 698 .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10}) 699 .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9}) 700 .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7}) 701 .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7}) 702 .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9}) 703 .StartsWith("got_page", {AsmToken::PercentGot_Page, 9}) 704 .StartsWith("gottprel", {AsmToken::PercentGottprel, 9}) 705 .StartsWith("got", {AsmToken::PercentGot, 4}) 706 .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7}) 707 .StartsWith("higher", {AsmToken::PercentHigher, 7}) 708 .StartsWith("highest", {AsmToken::PercentHighest, 8}) 709 .StartsWith("hi", {AsmToken::PercentHi, 3}) 710 .StartsWith("lo", {AsmToken::PercentLo, 3}) 711 .StartsWith("neg", {AsmToken::PercentNeg, 4}) 712 .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9}) 713 .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9}) 714 .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6}) 715 .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7}) 716 .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9}) 717 .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9}) 718 .Default({AsmToken::Percent, 1}); 719 720 if (Operator != AsmToken::Percent) { 721 CurPtr += OperatorLength - 1; 722 return AsmToken(Operator, StringRef(TokStart, OperatorLength)); 723 } 724 } 725 return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); 726 case '/': 727 IsAtStartOfStatement = OldIsAtStartOfStatement; 728 return LexSlash(); 729 case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); 730 case '\'': return LexSingleQuote(); 731 case '"': return LexQuote(); 732 case '0': case '1': case '2': case '3': case '4': 733 case '5': case '6': case '7': case '8': case '9': 734 return LexDigit(); 735 case '<': 736 switch (*CurPtr) { 737 case '<': 738 ++CurPtr; 739 return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2)); 740 case '=': 741 ++CurPtr; 742 return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2)); 743 case '>': 744 ++CurPtr; 745 return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2)); 746 default: 747 return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); 748 } 749 case '>': 750 switch (*CurPtr) { 751 case '>': 752 ++CurPtr; 753 return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2)); 754 case '=': 755 ++CurPtr; 756 return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2)); 757 default: 758 return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); 759 } 760 761 // TODO: Quoted identifiers (objc methods etc) 762 // local labels: [0-9][:] 763 // Forward/backward labels: [0-9][fb] 764 // Integers, fp constants, character constants. 765 } 766 } 767