1 //===- MILexer.cpp - Machine instructions lexer implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the lexing of machine instructions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MILexer.h" 14 #include "llvm/ADT/None.h" 15 #include "llvm/ADT/StringExtras.h" 16 #include "llvm/ADT/StringSwitch.h" 17 #include "llvm/ADT/Twine.h" 18 #include <cassert> 19 #include <cctype> 20 #include <string> 21 22 using namespace llvm; 23 24 namespace { 25 26 using ErrorCallbackType = 27 function_ref<void(StringRef::iterator Loc, const Twine &)>; 28 29 /// This class provides a way to iterate and get characters from the source 30 /// string. 31 class Cursor { 32 const char *Ptr = nullptr; 33 const char *End = nullptr; 34 35 public: 36 Cursor(NoneType) {} 37 38 explicit Cursor(StringRef Str) { 39 Ptr = Str.data(); 40 End = Ptr + Str.size(); 41 } 42 43 bool isEOF() const { return Ptr == End; } 44 45 char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } 46 47 void advance(unsigned I = 1) { Ptr += I; } 48 49 StringRef remaining() const { return StringRef(Ptr, End - Ptr); } 50 51 StringRef upto(Cursor C) const { 52 assert(C.Ptr >= Ptr && C.Ptr <= End); 53 return StringRef(Ptr, C.Ptr - Ptr); 54 } 55 56 StringRef::iterator location() const { return Ptr; } 57 58 operator bool() const { return Ptr != nullptr; } 59 }; 60 61 } // end anonymous namespace 62 63 MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { 64 this->Kind = Kind; 65 this->Range = Range; 66 return *this; 67 } 68 69 MIToken &MIToken::setStringValue(StringRef StrVal) { 70 StringValue = StrVal; 71 return *this; 72 } 73 74 MIToken &MIToken::setOwnedStringValue(std::string StrVal) { 75 StringValueStorage = std::move(StrVal); 76 StringValue = StringValueStorage; 77 return *this; 78 } 79 80 MIToken &MIToken::setIntegerValue(APSInt IntVal) { 81 this->IntVal = std::move(IntVal); 82 return *this; 83 } 84 85 /// Skip the leading whitespace characters and return the updated cursor. 86 static Cursor skipWhitespace(Cursor C) { 87 while (isblank(C.peek())) 88 C.advance(); 89 return C; 90 } 91 92 static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } 93 94 /// Skip a line comment and return the updated cursor. 95 static Cursor skipComment(Cursor C) { 96 if (C.peek() != ';') 97 return C; 98 while (!isNewlineChar(C.peek()) && !C.isEOF()) 99 C.advance(); 100 return C; 101 } 102 103 /// Machine operands can have comments, enclosed between /* and */. 104 /// This eats up all tokens, including /* and */. 105 static Cursor skipMachineOperandComment(Cursor C) { 106 if (C.peek() != '/' || C.peek(1) != '*') 107 return C; 108 109 while (C.peek() != '*' || C.peek(1) != '/') 110 C.advance(); 111 112 C.advance(); 113 C.advance(); 114 return C; 115 } 116 117 /// Return true if the given character satisfies the following regular 118 /// expression: [-a-zA-Z$._0-9] 119 static bool isIdentifierChar(char C) { 120 return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || 121 C == '$'; 122 } 123 124 /// Unescapes the given string value. 125 /// 126 /// Expects the string value to be quoted. 127 static std::string unescapeQuotedString(StringRef Value) { 128 assert(Value.front() == '"' && Value.back() == '"'); 129 Cursor C = Cursor(Value.substr(1, Value.size() - 2)); 130 131 std::string Str; 132 Str.reserve(C.remaining().size()); 133 while (!C.isEOF()) { 134 char Char = C.peek(); 135 if (Char == '\\') { 136 if (C.peek(1) == '\\') { 137 // Two '\' become one 138 Str += '\\'; 139 C.advance(2); 140 continue; 141 } 142 if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { 143 Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); 144 C.advance(3); 145 continue; 146 } 147 } 148 Str += Char; 149 C.advance(); 150 } 151 return Str; 152 } 153 154 /// Lex a string constant using the following regular expression: \"[^\"]*\" 155 static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { 156 assert(C.peek() == '"'); 157 for (C.advance(); C.peek() != '"'; C.advance()) { 158 if (C.isEOF() || isNewlineChar(C.peek())) { 159 ErrorCallback( 160 C.location(), 161 "end of machine instruction reached before the closing '\"'"); 162 return None; 163 } 164 } 165 C.advance(); 166 return C; 167 } 168 169 static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, 170 unsigned PrefixLength, ErrorCallbackType ErrorCallback) { 171 auto Range = C; 172 C.advance(PrefixLength); 173 if (C.peek() == '"') { 174 if (Cursor R = lexStringConstant(C, ErrorCallback)) { 175 StringRef String = Range.upto(R); 176 Token.reset(Type, String) 177 .setOwnedStringValue( 178 unescapeQuotedString(String.drop_front(PrefixLength))); 179 return R; 180 } 181 Token.reset(MIToken::Error, Range.remaining()); 182 return Range; 183 } 184 while (isIdentifierChar(C.peek())) 185 C.advance(); 186 Token.reset(Type, Range.upto(C)) 187 .setStringValue(Range.upto(C).drop_front(PrefixLength)); 188 return C; 189 } 190 191 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { 192 return StringSwitch<MIToken::TokenKind>(Identifier) 193 .Case("_", MIToken::underscore) 194 .Case("implicit", MIToken::kw_implicit) 195 .Case("implicit-def", MIToken::kw_implicit_define) 196 .Case("def", MIToken::kw_def) 197 .Case("dead", MIToken::kw_dead) 198 .Case("killed", MIToken::kw_killed) 199 .Case("undef", MIToken::kw_undef) 200 .Case("internal", MIToken::kw_internal) 201 .Case("early-clobber", MIToken::kw_early_clobber) 202 .Case("debug-use", MIToken::kw_debug_use) 203 .Case("renamable", MIToken::kw_renamable) 204 .Case("tied-def", MIToken::kw_tied_def) 205 .Case("frame-setup", MIToken::kw_frame_setup) 206 .Case("frame-destroy", MIToken::kw_frame_destroy) 207 .Case("nnan", MIToken::kw_nnan) 208 .Case("ninf", MIToken::kw_ninf) 209 .Case("nsz", MIToken::kw_nsz) 210 .Case("arcp", MIToken::kw_arcp) 211 .Case("contract", MIToken::kw_contract) 212 .Case("afn", MIToken::kw_afn) 213 .Case("reassoc", MIToken::kw_reassoc) 214 .Case("nuw", MIToken::kw_nuw) 215 .Case("nsw", MIToken::kw_nsw) 216 .Case("exact", MIToken::kw_exact) 217 .Case("nofpexcept", MIToken::kw_nofpexcept) 218 .Case("debug-location", MIToken::kw_debug_location) 219 .Case("debug-instr-number", MIToken::kw_debug_instr_number) 220 .Case("same_value", MIToken::kw_cfi_same_value) 221 .Case("offset", MIToken::kw_cfi_offset) 222 .Case("rel_offset", MIToken::kw_cfi_rel_offset) 223 .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register) 224 .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) 225 .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset) 226 .Case("escape", MIToken::kw_cfi_escape) 227 .Case("def_cfa", MIToken::kw_cfi_def_cfa) 228 .Case("llvm_def_aspace_cfa", MIToken::kw_cfi_llvm_def_aspace_cfa) 229 .Case("remember_state", MIToken::kw_cfi_remember_state) 230 .Case("restore", MIToken::kw_cfi_restore) 231 .Case("restore_state", MIToken::kw_cfi_restore_state) 232 .Case("undefined", MIToken::kw_cfi_undefined) 233 .Case("register", MIToken::kw_cfi_register) 234 .Case("window_save", MIToken::kw_cfi_window_save) 235 .Case("negate_ra_sign_state", 236 MIToken::kw_cfi_aarch64_negate_ra_sign_state) 237 .Case("blockaddress", MIToken::kw_blockaddress) 238 .Case("intrinsic", MIToken::kw_intrinsic) 239 .Case("target-index", MIToken::kw_target_index) 240 .Case("half", MIToken::kw_half) 241 .Case("float", MIToken::kw_float) 242 .Case("double", MIToken::kw_double) 243 .Case("x86_fp80", MIToken::kw_x86_fp80) 244 .Case("fp128", MIToken::kw_fp128) 245 .Case("ppc_fp128", MIToken::kw_ppc_fp128) 246 .Case("target-flags", MIToken::kw_target_flags) 247 .Case("volatile", MIToken::kw_volatile) 248 .Case("non-temporal", MIToken::kw_non_temporal) 249 .Case("dereferenceable", MIToken::kw_dereferenceable) 250 .Case("invariant", MIToken::kw_invariant) 251 .Case("align", MIToken::kw_align) 252 .Case("basealign", MIToken::kw_basealign) 253 .Case("addrspace", MIToken::kw_addrspace) 254 .Case("stack", MIToken::kw_stack) 255 .Case("got", MIToken::kw_got) 256 .Case("jump-table", MIToken::kw_jump_table) 257 .Case("constant-pool", MIToken::kw_constant_pool) 258 .Case("call-entry", MIToken::kw_call_entry) 259 .Case("custom", MIToken::kw_custom) 260 .Case("liveout", MIToken::kw_liveout) 261 .Case("address-taken", MIToken::kw_address_taken) 262 .Case("landing-pad", MIToken::kw_landing_pad) 263 .Case("inlineasm-br-indirect-target", 264 MIToken::kw_inlineasm_br_indirect_target) 265 .Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry) 266 .Case("liveins", MIToken::kw_liveins) 267 .Case("successors", MIToken::kw_successors) 268 .Case("floatpred", MIToken::kw_floatpred) 269 .Case("intpred", MIToken::kw_intpred) 270 .Case("shufflemask", MIToken::kw_shufflemask) 271 .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) 272 .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) 273 .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker) 274 .Case("bbsections", MIToken::kw_bbsections) 275 .Case("unknown-size", MIToken::kw_unknown_size) 276 .Case("unknown-address", MIToken::kw_unknown_address) 277 .Case("distinct", MIToken::kw_distinct) 278 .Default(MIToken::Identifier); 279 } 280 281 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { 282 if (!isalpha(C.peek()) && C.peek() != '_') 283 return None; 284 auto Range = C; 285 while (isIdentifierChar(C.peek())) 286 C.advance(); 287 auto Identifier = Range.upto(C); 288 Token.reset(getIdentifierKind(Identifier), Identifier) 289 .setStringValue(Identifier); 290 return C; 291 } 292 293 static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, 294 ErrorCallbackType ErrorCallback) { 295 bool IsReference = C.remaining().startswith("%bb."); 296 if (!IsReference && !C.remaining().startswith("bb.")) 297 return None; 298 auto Range = C; 299 unsigned PrefixLength = IsReference ? 4 : 3; 300 C.advance(PrefixLength); // Skip '%bb.' or 'bb.' 301 if (!isdigit(C.peek())) { 302 Token.reset(MIToken::Error, C.remaining()); 303 ErrorCallback(C.location(), "expected a number after '%bb.'"); 304 return C; 305 } 306 auto NumberRange = C; 307 while (isdigit(C.peek())) 308 C.advance(); 309 StringRef Number = NumberRange.upto(C); 310 unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' 311 // TODO: The format bb.<id>.<irname> is supported only when it's not a 312 // reference. Once we deprecate the format where the irname shows up, we 313 // should only lex forward if it is a reference. 314 if (C.peek() == '.') { 315 C.advance(); // Skip '.' 316 ++StringOffset; 317 while (isIdentifierChar(C.peek())) 318 C.advance(); 319 } 320 Token.reset(IsReference ? MIToken::MachineBasicBlock 321 : MIToken::MachineBasicBlockLabel, 322 Range.upto(C)) 323 .setIntegerValue(APSInt(Number)) 324 .setStringValue(Range.upto(C).drop_front(StringOffset)); 325 return C; 326 } 327 328 static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, 329 MIToken::TokenKind Kind) { 330 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 331 return None; 332 auto Range = C; 333 C.advance(Rule.size()); 334 auto NumberRange = C; 335 while (isdigit(C.peek())) 336 C.advance(); 337 Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); 338 return C; 339 } 340 341 static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, 342 MIToken::TokenKind Kind) { 343 if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) 344 return None; 345 auto Range = C; 346 C.advance(Rule.size()); 347 auto NumberRange = C; 348 while (isdigit(C.peek())) 349 C.advance(); 350 StringRef Number = NumberRange.upto(C); 351 unsigned StringOffset = Rule.size() + Number.size(); 352 if (C.peek() == '.') { 353 C.advance(); 354 ++StringOffset; 355 while (isIdentifierChar(C.peek())) 356 C.advance(); 357 } 358 Token.reset(Kind, Range.upto(C)) 359 .setIntegerValue(APSInt(Number)) 360 .setStringValue(Range.upto(C).drop_front(StringOffset)); 361 return C; 362 } 363 364 static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { 365 return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); 366 } 367 368 static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { 369 return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); 370 } 371 372 static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { 373 return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); 374 } 375 376 static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { 377 return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); 378 } 379 380 static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, 381 ErrorCallbackType ErrorCallback) { 382 const StringRef Rule = "%subreg."; 383 if (!C.remaining().startswith(Rule)) 384 return None; 385 return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(), 386 ErrorCallback); 387 } 388 389 static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, 390 ErrorCallbackType ErrorCallback) { 391 const StringRef Rule = "%ir-block."; 392 if (!C.remaining().startswith(Rule)) 393 return None; 394 if (isdigit(C.peek(Rule.size()))) 395 return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); 396 return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); 397 } 398 399 static Cursor maybeLexIRValue(Cursor C, MIToken &Token, 400 ErrorCallbackType ErrorCallback) { 401 const StringRef Rule = "%ir."; 402 if (!C.remaining().startswith(Rule)) 403 return None; 404 if (isdigit(C.peek(Rule.size()))) 405 return maybeLexIndex(C, Token, Rule, MIToken::IRValue); 406 return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); 407 } 408 409 static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, 410 ErrorCallbackType ErrorCallback) { 411 if (C.peek() != '"') 412 return None; 413 return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, 414 ErrorCallback); 415 } 416 417 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { 418 auto Range = C; 419 C.advance(); // Skip '%' 420 auto NumberRange = C; 421 while (isdigit(C.peek())) 422 C.advance(); 423 Token.reset(MIToken::VirtualRegister, Range.upto(C)) 424 .setIntegerValue(APSInt(NumberRange.upto(C))); 425 return C; 426 } 427 428 /// Returns true for a character allowed in a register name. 429 static bool isRegisterChar(char C) { 430 return isIdentifierChar(C) && C != '.'; 431 } 432 433 static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { 434 Cursor Range = C; 435 C.advance(); // Skip '%' 436 while (isRegisterChar(C.peek())) 437 C.advance(); 438 Token.reset(MIToken::NamedVirtualRegister, Range.upto(C)) 439 .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' 440 return C; 441 } 442 443 static Cursor maybeLexRegister(Cursor C, MIToken &Token, 444 ErrorCallbackType ErrorCallback) { 445 if (C.peek() != '%' && C.peek() != '$') 446 return None; 447 448 if (C.peek() == '%') { 449 if (isdigit(C.peek(1))) 450 return lexVirtualRegister(C, Token); 451 452 if (isRegisterChar(C.peek(1))) 453 return lexNamedVirtualRegister(C, Token); 454 455 return None; 456 } 457 458 assert(C.peek() == '$'); 459 auto Range = C; 460 C.advance(); // Skip '$' 461 while (isRegisterChar(C.peek())) 462 C.advance(); 463 Token.reset(MIToken::NamedRegister, Range.upto(C)) 464 .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$' 465 return C; 466 } 467 468 static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, 469 ErrorCallbackType ErrorCallback) { 470 if (C.peek() != '@') 471 return None; 472 if (!isdigit(C.peek(1))) 473 return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, 474 ErrorCallback); 475 auto Range = C; 476 C.advance(1); // Skip the '@' 477 auto NumberRange = C; 478 while (isdigit(C.peek())) 479 C.advance(); 480 Token.reset(MIToken::GlobalValue, Range.upto(C)) 481 .setIntegerValue(APSInt(NumberRange.upto(C))); 482 return C; 483 } 484 485 static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, 486 ErrorCallbackType ErrorCallback) { 487 if (C.peek() != '&') 488 return None; 489 return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, 490 ErrorCallback); 491 } 492 493 static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, 494 ErrorCallbackType ErrorCallback) { 495 const StringRef Rule = "<mcsymbol "; 496 if (!C.remaining().startswith(Rule)) 497 return None; 498 auto Start = C; 499 C.advance(Rule.size()); 500 501 // Try a simple unquoted name. 502 if (C.peek() != '"') { 503 while (isIdentifierChar(C.peek())) 504 C.advance(); 505 StringRef String = Start.upto(C).drop_front(Rule.size()); 506 if (C.peek() != '>') { 507 ErrorCallback(C.location(), 508 "expected the '<mcsymbol ...' to be closed by a '>'"); 509 Token.reset(MIToken::Error, Start.remaining()); 510 return Start; 511 } 512 C.advance(); 513 514 Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String); 515 return C; 516 } 517 518 // Otherwise lex out a quoted name. 519 Cursor R = lexStringConstant(C, ErrorCallback); 520 if (!R) { 521 ErrorCallback(C.location(), 522 "unable to parse quoted string from opening quote"); 523 Token.reset(MIToken::Error, Start.remaining()); 524 return Start; 525 } 526 StringRef String = Start.upto(R).drop_front(Rule.size()); 527 if (R.peek() != '>') { 528 ErrorCallback(R.location(), 529 "expected the '<mcsymbol ...' to be closed by a '>'"); 530 Token.reset(MIToken::Error, Start.remaining()); 531 return Start; 532 } 533 R.advance(); 534 535 Token.reset(MIToken::MCSymbol, Start.upto(R)) 536 .setOwnedStringValue(unescapeQuotedString(String)); 537 return R; 538 } 539 540 static bool isValidHexFloatingPointPrefix(char C) { 541 return C == 'H' || C == 'K' || C == 'L' || C == 'M' || C == 'R'; 542 } 543 544 static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { 545 C.advance(); 546 // Skip over [0-9]*([eE][-+]?[0-9]+)? 547 while (isdigit(C.peek())) 548 C.advance(); 549 if ((C.peek() == 'e' || C.peek() == 'E') && 550 (isdigit(C.peek(1)) || 551 ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) { 552 C.advance(2); 553 while (isdigit(C.peek())) 554 C.advance(); 555 } 556 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 557 return C; 558 } 559 560 static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { 561 if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X')) 562 return None; 563 Cursor Range = C; 564 C.advance(2); 565 unsigned PrefLen = 2; 566 if (isValidHexFloatingPointPrefix(C.peek())) { 567 C.advance(); 568 PrefLen++; 569 } 570 while (isxdigit(C.peek())) 571 C.advance(); 572 StringRef StrVal = Range.upto(C); 573 if (StrVal.size() <= PrefLen) 574 return None; 575 if (PrefLen == 2) 576 Token.reset(MIToken::HexLiteral, Range.upto(C)); 577 else // It must be 3, which means that there was a floating-point prefix. 578 Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); 579 return C; 580 } 581 582 static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { 583 if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) 584 return None; 585 auto Range = C; 586 C.advance(); 587 while (isdigit(C.peek())) 588 C.advance(); 589 if (C.peek() == '.') 590 return lexFloatingPointLiteral(Range, C, Token); 591 StringRef StrVal = Range.upto(C); 592 Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); 593 return C; 594 } 595 596 static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { 597 return StringSwitch<MIToken::TokenKind>(Identifier) 598 .Case("!tbaa", MIToken::md_tbaa) 599 .Case("!alias.scope", MIToken::md_alias_scope) 600 .Case("!noalias", MIToken::md_noalias) 601 .Case("!range", MIToken::md_range) 602 .Case("!DIExpression", MIToken::md_diexpr) 603 .Case("!DILocation", MIToken::md_dilocation) 604 .Default(MIToken::Error); 605 } 606 607 static Cursor maybeLexExclaim(Cursor C, MIToken &Token, 608 ErrorCallbackType ErrorCallback) { 609 if (C.peek() != '!') 610 return None; 611 auto Range = C; 612 C.advance(1); 613 if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { 614 Token.reset(MIToken::exclaim, Range.upto(C)); 615 return C; 616 } 617 while (isIdentifierChar(C.peek())) 618 C.advance(); 619 StringRef StrVal = Range.upto(C); 620 Token.reset(getMetadataKeywordKind(StrVal), StrVal); 621 if (Token.isError()) 622 ErrorCallback(Token.location(), 623 "use of unknown metadata keyword '" + StrVal + "'"); 624 return C; 625 } 626 627 static MIToken::TokenKind symbolToken(char C) { 628 switch (C) { 629 case ',': 630 return MIToken::comma; 631 case '.': 632 return MIToken::dot; 633 case '=': 634 return MIToken::equal; 635 case ':': 636 return MIToken::colon; 637 case '(': 638 return MIToken::lparen; 639 case ')': 640 return MIToken::rparen; 641 case '{': 642 return MIToken::lbrace; 643 case '}': 644 return MIToken::rbrace; 645 case '+': 646 return MIToken::plus; 647 case '-': 648 return MIToken::minus; 649 case '<': 650 return MIToken::less; 651 case '>': 652 return MIToken::greater; 653 default: 654 return MIToken::Error; 655 } 656 } 657 658 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { 659 MIToken::TokenKind Kind; 660 unsigned Length = 1; 661 if (C.peek() == ':' && C.peek(1) == ':') { 662 Kind = MIToken::coloncolon; 663 Length = 2; 664 } else 665 Kind = symbolToken(C.peek()); 666 if (Kind == MIToken::Error) 667 return None; 668 auto Range = C; 669 C.advance(Length); 670 Token.reset(Kind, Range.upto(C)); 671 return C; 672 } 673 674 static Cursor maybeLexNewline(Cursor C, MIToken &Token) { 675 if (!isNewlineChar(C.peek())) 676 return None; 677 auto Range = C; 678 C.advance(); 679 Token.reset(MIToken::Newline, Range.upto(C)); 680 return C; 681 } 682 683 static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, 684 ErrorCallbackType ErrorCallback) { 685 if (C.peek() != '`') 686 return None; 687 auto Range = C; 688 C.advance(); 689 auto StrRange = C; 690 while (C.peek() != '`') { 691 if (C.isEOF() || isNewlineChar(C.peek())) { 692 ErrorCallback( 693 C.location(), 694 "end of machine instruction reached before the closing '`'"); 695 Token.reset(MIToken::Error, Range.remaining()); 696 return C; 697 } 698 C.advance(); 699 } 700 StringRef Value = StrRange.upto(C); 701 C.advance(); 702 Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value); 703 return C; 704 } 705 706 StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, 707 ErrorCallbackType ErrorCallback) { 708 auto C = skipComment(skipWhitespace(Cursor(Source))); 709 if (C.isEOF()) { 710 Token.reset(MIToken::Eof, C.remaining()); 711 return C.remaining(); 712 } 713 714 C = skipMachineOperandComment(C); 715 716 if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) 717 return R.remaining(); 718 if (Cursor R = maybeLexIdentifier(C, Token)) 719 return R.remaining(); 720 if (Cursor R = maybeLexJumpTableIndex(C, Token)) 721 return R.remaining(); 722 if (Cursor R = maybeLexStackObject(C, Token)) 723 return R.remaining(); 724 if (Cursor R = maybeLexFixedStackObject(C, Token)) 725 return R.remaining(); 726 if (Cursor R = maybeLexConstantPoolItem(C, Token)) 727 return R.remaining(); 728 if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) 729 return R.remaining(); 730 if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) 731 return R.remaining(); 732 if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) 733 return R.remaining(); 734 if (Cursor R = maybeLexRegister(C, Token, ErrorCallback)) 735 return R.remaining(); 736 if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) 737 return R.remaining(); 738 if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) 739 return R.remaining(); 740 if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback)) 741 return R.remaining(); 742 if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) 743 return R.remaining(); 744 if (Cursor R = maybeLexNumericalLiteral(C, Token)) 745 return R.remaining(); 746 if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback)) 747 return R.remaining(); 748 if (Cursor R = maybeLexSymbol(C, Token)) 749 return R.remaining(); 750 if (Cursor R = maybeLexNewline(C, Token)) 751 return R.remaining(); 752 if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) 753 return R.remaining(); 754 if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) 755 return R.remaining(); 756 757 Token.reset(MIToken::Error, C.remaining()); 758 ErrorCallback(C.location(), 759 Twine("unexpected character '") + Twine(C.peek()) + "'"); 760 return C.remaining(); 761 } 762