1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86IntelInstPrinter.h" 11 #include "MCTargetDesc/X86MCExpr.h" 12 #include "MCTargetDesc/X86TargetStreamer.h" 13 #include "TargetInfo/X86TargetInfo.h" 14 #include "X86AsmParserCommon.h" 15 #include "X86Operand.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallString.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/MC/MCContext.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCParser/MCAsmLexer.h" 26 #include "llvm/MC/MCParser/MCAsmParser.h" 27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 28 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSection.h" 31 #include "llvm/MC/MCStreamer.h" 32 #include "llvm/MC/MCSubtargetInfo.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/Support/SourceMgr.h" 35 #include "llvm/Support/TargetRegistry.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include <algorithm> 38 #include <memory> 39 40 using namespace llvm; 41 42 static bool checkScale(unsigned Scale, StringRef &ErrMsg) { 43 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { 44 ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; 45 return true; 46 } 47 return false; 48 } 49 50 namespace { 51 52 static const char OpPrecedence[] = { 53 0, // IC_OR 54 1, // IC_XOR 55 2, // IC_AND 56 3, // IC_LSHIFT 57 3, // IC_RSHIFT 58 4, // IC_PLUS 59 4, // IC_MINUS 60 5, // IC_MULTIPLY 61 5, // IC_DIVIDE 62 5, // IC_MOD 63 6, // IC_NOT 64 7, // IC_NEG 65 8, // IC_RPAREN 66 9, // IC_LPAREN 67 0, // IC_IMM 68 0 // IC_REGISTER 69 }; 70 71 class X86AsmParser : public MCTargetAsmParser { 72 ParseInstructionInfo *InstInfo; 73 bool Code16GCC; 74 75 enum VEXEncoding { 76 VEXEncoding_Default, 77 VEXEncoding_VEX2, 78 VEXEncoding_VEX3, 79 VEXEncoding_EVEX, 80 }; 81 82 VEXEncoding ForcedVEXEncoding = VEXEncoding_Default; 83 84 private: 85 SMLoc consumeToken() { 86 MCAsmParser &Parser = getParser(); 87 SMLoc Result = Parser.getTok().getLoc(); 88 Parser.Lex(); 89 return Result; 90 } 91 92 X86TargetStreamer &getTargetStreamer() { 93 assert(getParser().getStreamer().getTargetStreamer() && 94 "do not have a target streamer"); 95 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 96 return static_cast<X86TargetStreamer &>(TS); 97 } 98 99 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst, 100 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures, 101 bool matchingInlineAsm, unsigned VariantID = 0) { 102 // In Code16GCC mode, match as 32-bit. 103 if (Code16GCC) 104 SwitchMode(X86::Mode32Bit); 105 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo, 106 MissingFeatures, matchingInlineAsm, 107 VariantID); 108 if (Code16GCC) 109 SwitchMode(X86::Mode16Bit); 110 return rv; 111 } 112 113 enum InfixCalculatorTok { 114 IC_OR = 0, 115 IC_XOR, 116 IC_AND, 117 IC_LSHIFT, 118 IC_RSHIFT, 119 IC_PLUS, 120 IC_MINUS, 121 IC_MULTIPLY, 122 IC_DIVIDE, 123 IC_MOD, 124 IC_NOT, 125 IC_NEG, 126 IC_RPAREN, 127 IC_LPAREN, 128 IC_IMM, 129 IC_REGISTER 130 }; 131 132 enum IntelOperatorKind { 133 IOK_INVALID = 0, 134 IOK_LENGTH, 135 IOK_SIZE, 136 IOK_TYPE, 137 IOK_OFFSET 138 }; 139 140 class InfixCalculator { 141 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 142 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 143 SmallVector<ICToken, 4> PostfixStack; 144 145 bool isUnaryOperator(const InfixCalculatorTok Op) { 146 return Op == IC_NEG || Op == IC_NOT; 147 } 148 149 public: 150 int64_t popOperand() { 151 assert (!PostfixStack.empty() && "Poped an empty stack!"); 152 ICToken Op = PostfixStack.pop_back_val(); 153 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER)) 154 return -1; // The invalid Scale value will be caught later by checkScale 155 return Op.second; 156 } 157 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 158 assert ((Op == IC_IMM || Op == IC_REGISTER) && 159 "Unexpected operand!"); 160 PostfixStack.push_back(std::make_pair(Op, Val)); 161 } 162 163 void popOperator() { InfixOperatorStack.pop_back(); } 164 void pushOperator(InfixCalculatorTok Op) { 165 // Push the new operator if the stack is empty. 166 if (InfixOperatorStack.empty()) { 167 InfixOperatorStack.push_back(Op); 168 return; 169 } 170 171 // Push the new operator if it has a higher precedence than the operator 172 // on the top of the stack or the operator on the top of the stack is a 173 // left parentheses. 174 unsigned Idx = InfixOperatorStack.size() - 1; 175 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 176 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 177 InfixOperatorStack.push_back(Op); 178 return; 179 } 180 181 // The operator on the top of the stack has higher precedence than the 182 // new operator. 183 unsigned ParenCount = 0; 184 while (1) { 185 // Nothing to process. 186 if (InfixOperatorStack.empty()) 187 break; 188 189 Idx = InfixOperatorStack.size() - 1; 190 StackOp = InfixOperatorStack[Idx]; 191 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 192 break; 193 194 // If we have an even parentheses count and we see a left parentheses, 195 // then stop processing. 196 if (!ParenCount && StackOp == IC_LPAREN) 197 break; 198 199 if (StackOp == IC_RPAREN) { 200 ++ParenCount; 201 InfixOperatorStack.pop_back(); 202 } else if (StackOp == IC_LPAREN) { 203 --ParenCount; 204 InfixOperatorStack.pop_back(); 205 } else { 206 InfixOperatorStack.pop_back(); 207 PostfixStack.push_back(std::make_pair(StackOp, 0)); 208 } 209 } 210 // Push the new operator. 211 InfixOperatorStack.push_back(Op); 212 } 213 214 int64_t execute() { 215 // Push any remaining operators onto the postfix stack. 216 while (!InfixOperatorStack.empty()) { 217 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 218 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 219 PostfixStack.push_back(std::make_pair(StackOp, 0)); 220 } 221 222 if (PostfixStack.empty()) 223 return 0; 224 225 SmallVector<ICToken, 16> OperandStack; 226 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { 227 ICToken Op = PostfixStack[i]; 228 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 229 OperandStack.push_back(Op); 230 } else if (isUnaryOperator(Op.first)) { 231 assert (OperandStack.size() > 0 && "Too few operands."); 232 ICToken Operand = OperandStack.pop_back_val(); 233 assert (Operand.first == IC_IMM && 234 "Unary operation with a register!"); 235 switch (Op.first) { 236 default: 237 report_fatal_error("Unexpected operator!"); 238 break; 239 case IC_NEG: 240 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second)); 241 break; 242 case IC_NOT: 243 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second)); 244 break; 245 } 246 } else { 247 assert (OperandStack.size() > 1 && "Too few operands."); 248 int64_t Val; 249 ICToken Op2 = OperandStack.pop_back_val(); 250 ICToken Op1 = OperandStack.pop_back_val(); 251 switch (Op.first) { 252 default: 253 report_fatal_error("Unexpected operator!"); 254 break; 255 case IC_PLUS: 256 Val = Op1.second + Op2.second; 257 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 258 break; 259 case IC_MINUS: 260 Val = Op1.second - Op2.second; 261 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 262 break; 263 case IC_MULTIPLY: 264 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 265 "Multiply operation with an immediate and a register!"); 266 Val = Op1.second * Op2.second; 267 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 268 break; 269 case IC_DIVIDE: 270 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 271 "Divide operation with an immediate and a register!"); 272 assert (Op2.second != 0 && "Division by zero!"); 273 Val = Op1.second / Op2.second; 274 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 275 break; 276 case IC_MOD: 277 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 278 "Modulo operation with an immediate and a register!"); 279 Val = Op1.second % Op2.second; 280 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 281 break; 282 case IC_OR: 283 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 284 "Or operation with an immediate and a register!"); 285 Val = Op1.second | Op2.second; 286 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 287 break; 288 case IC_XOR: 289 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 290 "Xor operation with an immediate and a register!"); 291 Val = Op1.second ^ Op2.second; 292 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 293 break; 294 case IC_AND: 295 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 296 "And operation with an immediate and a register!"); 297 Val = Op1.second & Op2.second; 298 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 299 break; 300 case IC_LSHIFT: 301 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 302 "Left shift operation with an immediate and a register!"); 303 Val = Op1.second << Op2.second; 304 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 305 break; 306 case IC_RSHIFT: 307 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 308 "Right shift operation with an immediate and a register!"); 309 Val = Op1.second >> Op2.second; 310 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 311 break; 312 } 313 } 314 } 315 assert (OperandStack.size() == 1 && "Expected a single result."); 316 return OperandStack.pop_back_val().second; 317 } 318 }; 319 320 enum IntelExprState { 321 IES_INIT, 322 IES_OR, 323 IES_XOR, 324 IES_AND, 325 IES_LSHIFT, 326 IES_RSHIFT, 327 IES_PLUS, 328 IES_MINUS, 329 IES_NOT, 330 IES_MULTIPLY, 331 IES_DIVIDE, 332 IES_MOD, 333 IES_LBRAC, 334 IES_RBRAC, 335 IES_LPAREN, 336 IES_RPAREN, 337 IES_REGISTER, 338 IES_INTEGER, 339 IES_IDENTIFIER, 340 IES_ERROR 341 }; 342 343 class IntelExprStateMachine { 344 IntelExprState State, PrevState; 345 unsigned BaseReg, IndexReg, TmpReg, Scale; 346 int64_t Imm; 347 const MCExpr *Sym; 348 StringRef SymName; 349 InfixCalculator IC; 350 InlineAsmIdentifierInfo Info; 351 short BracCount; 352 bool MemExpr; 353 354 public: 355 IntelExprStateMachine() 356 : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), 357 TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0), 358 MemExpr(false) {} 359 360 void addImm(int64_t imm) { Imm += imm; } 361 short getBracCount() { return BracCount; } 362 bool isMemExpr() { return MemExpr; } 363 unsigned getBaseReg() { return BaseReg; } 364 unsigned getIndexReg() { return IndexReg; } 365 unsigned getScale() { return Scale; } 366 const MCExpr *getSym() { return Sym; } 367 StringRef getSymName() { return SymName; } 368 int64_t getImm() { return Imm + IC.execute(); } 369 bool isValidEndState() { 370 return State == IES_RBRAC || State == IES_INTEGER; 371 } 372 bool hadError() { return State == IES_ERROR; } 373 InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; } 374 375 void onOr() { 376 IntelExprState CurrState = State; 377 switch (State) { 378 default: 379 State = IES_ERROR; 380 break; 381 case IES_INTEGER: 382 case IES_RPAREN: 383 case IES_REGISTER: 384 State = IES_OR; 385 IC.pushOperator(IC_OR); 386 break; 387 } 388 PrevState = CurrState; 389 } 390 void onXor() { 391 IntelExprState CurrState = State; 392 switch (State) { 393 default: 394 State = IES_ERROR; 395 break; 396 case IES_INTEGER: 397 case IES_RPAREN: 398 case IES_REGISTER: 399 State = IES_XOR; 400 IC.pushOperator(IC_XOR); 401 break; 402 } 403 PrevState = CurrState; 404 } 405 void onAnd() { 406 IntelExprState CurrState = State; 407 switch (State) { 408 default: 409 State = IES_ERROR; 410 break; 411 case IES_INTEGER: 412 case IES_RPAREN: 413 case IES_REGISTER: 414 State = IES_AND; 415 IC.pushOperator(IC_AND); 416 break; 417 } 418 PrevState = CurrState; 419 } 420 void onLShift() { 421 IntelExprState CurrState = State; 422 switch (State) { 423 default: 424 State = IES_ERROR; 425 break; 426 case IES_INTEGER: 427 case IES_RPAREN: 428 case IES_REGISTER: 429 State = IES_LSHIFT; 430 IC.pushOperator(IC_LSHIFT); 431 break; 432 } 433 PrevState = CurrState; 434 } 435 void onRShift() { 436 IntelExprState CurrState = State; 437 switch (State) { 438 default: 439 State = IES_ERROR; 440 break; 441 case IES_INTEGER: 442 case IES_RPAREN: 443 case IES_REGISTER: 444 State = IES_RSHIFT; 445 IC.pushOperator(IC_RSHIFT); 446 break; 447 } 448 PrevState = CurrState; 449 } 450 bool onPlus(StringRef &ErrMsg) { 451 IntelExprState CurrState = State; 452 switch (State) { 453 default: 454 State = IES_ERROR; 455 break; 456 case IES_INTEGER: 457 case IES_RPAREN: 458 case IES_REGISTER: 459 State = IES_PLUS; 460 IC.pushOperator(IC_PLUS); 461 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 462 // If we already have a BaseReg, then assume this is the IndexReg with 463 // no explicit scale. 464 if (!BaseReg) { 465 BaseReg = TmpReg; 466 } else { 467 if (IndexReg) { 468 ErrMsg = "BaseReg/IndexReg already set!"; 469 return true; 470 } 471 IndexReg = TmpReg; 472 Scale = 0; 473 } 474 } 475 break; 476 } 477 PrevState = CurrState; 478 return false; 479 } 480 bool onMinus(StringRef &ErrMsg) { 481 IntelExprState CurrState = State; 482 switch (State) { 483 default: 484 State = IES_ERROR; 485 break; 486 case IES_OR: 487 case IES_XOR: 488 case IES_AND: 489 case IES_LSHIFT: 490 case IES_RSHIFT: 491 case IES_PLUS: 492 case IES_NOT: 493 case IES_MULTIPLY: 494 case IES_DIVIDE: 495 case IES_MOD: 496 case IES_LPAREN: 497 case IES_RPAREN: 498 case IES_LBRAC: 499 case IES_RBRAC: 500 case IES_INTEGER: 501 case IES_REGISTER: 502 case IES_INIT: 503 State = IES_MINUS; 504 // push minus operator if it is not a negate operator 505 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || 506 CurrState == IES_INTEGER || CurrState == IES_RBRAC) 507 IC.pushOperator(IC_MINUS); 508 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 509 // We have negate operator for Scale: it's illegal 510 ErrMsg = "Scale can't be negative"; 511 return true; 512 } else 513 IC.pushOperator(IC_NEG); 514 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 515 // If we already have a BaseReg, then assume this is the IndexReg with 516 // no explicit scale. 517 if (!BaseReg) { 518 BaseReg = TmpReg; 519 } else { 520 if (IndexReg) { 521 ErrMsg = "BaseReg/IndexReg already set!"; 522 return true; 523 } 524 IndexReg = TmpReg; 525 Scale = 0; 526 } 527 } 528 break; 529 } 530 PrevState = CurrState; 531 return false; 532 } 533 void onNot() { 534 IntelExprState CurrState = State; 535 switch (State) { 536 default: 537 State = IES_ERROR; 538 break; 539 case IES_OR: 540 case IES_XOR: 541 case IES_AND: 542 case IES_LSHIFT: 543 case IES_RSHIFT: 544 case IES_PLUS: 545 case IES_MINUS: 546 case IES_NOT: 547 case IES_MULTIPLY: 548 case IES_DIVIDE: 549 case IES_MOD: 550 case IES_LPAREN: 551 case IES_LBRAC: 552 case IES_INIT: 553 State = IES_NOT; 554 IC.pushOperator(IC_NOT); 555 break; 556 } 557 PrevState = CurrState; 558 } 559 560 bool onRegister(unsigned Reg, StringRef &ErrMsg) { 561 IntelExprState CurrState = State; 562 switch (State) { 563 default: 564 State = IES_ERROR; 565 break; 566 case IES_PLUS: 567 case IES_LPAREN: 568 case IES_LBRAC: 569 State = IES_REGISTER; 570 TmpReg = Reg; 571 IC.pushOperand(IC_REGISTER); 572 break; 573 case IES_MULTIPLY: 574 // Index Register - Scale * Register 575 if (PrevState == IES_INTEGER) { 576 if (IndexReg) { 577 ErrMsg = "BaseReg/IndexReg already set!"; 578 return true; 579 } 580 State = IES_REGISTER; 581 IndexReg = Reg; 582 // Get the scale and replace the 'Scale * Register' with '0'. 583 Scale = IC.popOperand(); 584 if (checkScale(Scale, ErrMsg)) 585 return true; 586 IC.pushOperand(IC_IMM); 587 IC.popOperator(); 588 } else { 589 State = IES_ERROR; 590 } 591 break; 592 } 593 PrevState = CurrState; 594 return false; 595 } 596 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName, 597 const InlineAsmIdentifierInfo &IDInfo, 598 bool ParsingInlineAsm, StringRef &ErrMsg) { 599 // InlineAsm: Treat an enum value as an integer 600 if (ParsingInlineAsm) 601 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 602 return onInteger(IDInfo.Enum.EnumVal, ErrMsg); 603 // Treat a symbolic constant like an integer 604 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef)) 605 return onInteger(CE->getValue(), ErrMsg); 606 PrevState = State; 607 bool HasSymbol = Sym != nullptr; 608 switch (State) { 609 default: 610 State = IES_ERROR; 611 break; 612 case IES_PLUS: 613 case IES_MINUS: 614 case IES_NOT: 615 case IES_INIT: 616 case IES_LBRAC: 617 MemExpr = true; 618 State = IES_INTEGER; 619 Sym = SymRef; 620 SymName = SymRefName; 621 IC.pushOperand(IC_IMM); 622 if (ParsingInlineAsm) 623 Info = IDInfo; 624 break; 625 } 626 if (HasSymbol) 627 ErrMsg = "cannot use more than one symbol in memory operand"; 628 return HasSymbol; 629 } 630 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { 631 IntelExprState CurrState = State; 632 switch (State) { 633 default: 634 State = IES_ERROR; 635 break; 636 case IES_PLUS: 637 case IES_MINUS: 638 case IES_NOT: 639 case IES_OR: 640 case IES_XOR: 641 case IES_AND: 642 case IES_LSHIFT: 643 case IES_RSHIFT: 644 case IES_DIVIDE: 645 case IES_MOD: 646 case IES_MULTIPLY: 647 case IES_LPAREN: 648 case IES_INIT: 649 case IES_LBRAC: 650 State = IES_INTEGER; 651 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 652 // Index Register - Register * Scale 653 if (IndexReg) { 654 ErrMsg = "BaseReg/IndexReg already set!"; 655 return true; 656 } 657 IndexReg = TmpReg; 658 Scale = TmpInt; 659 if (checkScale(Scale, ErrMsg)) 660 return true; 661 // Get the scale and replace the 'Register * Scale' with '0'. 662 IC.popOperator(); 663 } else { 664 IC.pushOperand(IC_IMM, TmpInt); 665 } 666 break; 667 } 668 PrevState = CurrState; 669 return false; 670 } 671 void onStar() { 672 PrevState = State; 673 switch (State) { 674 default: 675 State = IES_ERROR; 676 break; 677 case IES_INTEGER: 678 case IES_REGISTER: 679 case IES_RPAREN: 680 State = IES_MULTIPLY; 681 IC.pushOperator(IC_MULTIPLY); 682 break; 683 } 684 } 685 void onDivide() { 686 PrevState = State; 687 switch (State) { 688 default: 689 State = IES_ERROR; 690 break; 691 case IES_INTEGER: 692 case IES_RPAREN: 693 State = IES_DIVIDE; 694 IC.pushOperator(IC_DIVIDE); 695 break; 696 } 697 } 698 void onMod() { 699 PrevState = State; 700 switch (State) { 701 default: 702 State = IES_ERROR; 703 break; 704 case IES_INTEGER: 705 case IES_RPAREN: 706 State = IES_MOD; 707 IC.pushOperator(IC_MOD); 708 break; 709 } 710 } 711 bool onLBrac() { 712 if (BracCount) 713 return true; 714 PrevState = State; 715 switch (State) { 716 default: 717 State = IES_ERROR; 718 break; 719 case IES_RBRAC: 720 case IES_INTEGER: 721 case IES_RPAREN: 722 State = IES_PLUS; 723 IC.pushOperator(IC_PLUS); 724 break; 725 case IES_INIT: 726 assert(!BracCount && "BracCount should be zero on parsing's start"); 727 State = IES_LBRAC; 728 break; 729 } 730 MemExpr = true; 731 BracCount++; 732 return false; 733 } 734 bool onRBrac() { 735 IntelExprState CurrState = State; 736 switch (State) { 737 default: 738 State = IES_ERROR; 739 break; 740 case IES_INTEGER: 741 case IES_REGISTER: 742 case IES_RPAREN: 743 if (BracCount-- != 1) 744 return true; 745 State = IES_RBRAC; 746 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 747 // If we already have a BaseReg, then assume this is the IndexReg with 748 // no explicit scale. 749 if (!BaseReg) { 750 BaseReg = TmpReg; 751 } else { 752 assert (!IndexReg && "BaseReg/IndexReg already set!"); 753 IndexReg = TmpReg; 754 Scale = 0; 755 } 756 } 757 break; 758 } 759 PrevState = CurrState; 760 return false; 761 } 762 void onLParen() { 763 IntelExprState CurrState = State; 764 switch (State) { 765 default: 766 State = IES_ERROR; 767 break; 768 case IES_PLUS: 769 case IES_MINUS: 770 case IES_NOT: 771 case IES_OR: 772 case IES_XOR: 773 case IES_AND: 774 case IES_LSHIFT: 775 case IES_RSHIFT: 776 case IES_MULTIPLY: 777 case IES_DIVIDE: 778 case IES_MOD: 779 case IES_LPAREN: 780 case IES_INIT: 781 case IES_LBRAC: 782 State = IES_LPAREN; 783 IC.pushOperator(IC_LPAREN); 784 break; 785 } 786 PrevState = CurrState; 787 } 788 void onRParen() { 789 PrevState = State; 790 switch (State) { 791 default: 792 State = IES_ERROR; 793 break; 794 case IES_INTEGER: 795 case IES_REGISTER: 796 case IES_RPAREN: 797 State = IES_RPAREN; 798 IC.pushOperator(IC_RPAREN); 799 break; 800 } 801 } 802 }; 803 804 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None, 805 bool MatchingInlineAsm = false) { 806 MCAsmParser &Parser = getParser(); 807 if (MatchingInlineAsm) { 808 if (!getLexer().isAtStartOfStatement()) 809 Parser.eatToEndOfStatement(); 810 return false; 811 } 812 return Parser.Error(L, Msg, Range); 813 } 814 815 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) { 816 Error(Loc, Msg, R); 817 return nullptr; 818 } 819 820 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); 821 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); 822 bool IsSIReg(unsigned Reg); 823 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg); 824 void 825 AddDefaultSrcDestOperands(OperandVector &Operands, 826 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 827 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst); 828 bool VerifyAndAdjustOperands(OperandVector &OrigOperands, 829 OperandVector &FinalOperands); 830 std::unique_ptr<X86Operand> ParseOperand(); 831 std::unique_ptr<X86Operand> ParseATTOperand(); 832 std::unique_ptr<X86Operand> ParseIntelOperand(); 833 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator(); 834 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); 835 unsigned IdentifyIntelInlineAsmOperator(StringRef Name); 836 unsigned ParseIntelInlineAsmOperator(unsigned OpKind); 837 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start); 838 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM); 839 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start, 840 SMLoc End); 841 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 842 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier, 843 InlineAsmIdentifierInfo &Info, 844 bool IsUnevaluatedOperand, SMLoc &End); 845 846 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, 847 const MCExpr *&Disp, 848 const SMLoc &StartLoc, 849 SMLoc &EndLoc); 850 851 X86::CondCode ParseConditionCode(StringRef CCode); 852 853 bool ParseIntelMemoryOperandSize(unsigned &Size); 854 std::unique_ptr<X86Operand> 855 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, 856 unsigned IndexReg, unsigned Scale, SMLoc Start, 857 SMLoc End, unsigned Size, StringRef Identifier, 858 const InlineAsmIdentifierInfo &Info); 859 860 bool parseDirectiveEven(SMLoc L); 861 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 862 863 /// CodeView FPO data directives. 864 bool parseDirectiveFPOProc(SMLoc L); 865 bool parseDirectiveFPOSetFrame(SMLoc L); 866 bool parseDirectiveFPOPushReg(SMLoc L); 867 bool parseDirectiveFPOStackAlloc(SMLoc L); 868 bool parseDirectiveFPOStackAlign(SMLoc L); 869 bool parseDirectiveFPOEndPrologue(SMLoc L); 870 bool parseDirectiveFPOEndProc(SMLoc L); 871 bool parseDirectiveFPOData(SMLoc L); 872 873 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 874 875 bool validateInstruction(MCInst &Inst, const OperandVector &Ops); 876 bool processInstruction(MCInst &Inst, const OperandVector &Ops); 877 878 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds 879 /// instrumentation around Inst. 880 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); 881 882 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 883 OperandVector &Operands, MCStreamer &Out, 884 uint64_t &ErrorInfo, 885 bool MatchingInlineAsm) override; 886 887 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands, 888 MCStreamer &Out, bool MatchingInlineAsm); 889 890 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures, 891 bool MatchingInlineAsm); 892 893 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 894 OperandVector &Operands, MCStreamer &Out, 895 uint64_t &ErrorInfo, 896 bool MatchingInlineAsm); 897 898 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 899 OperandVector &Operands, MCStreamer &Out, 900 uint64_t &ErrorInfo, 901 bool MatchingInlineAsm); 902 903 bool OmitRegisterFromClobberLists(unsigned RegNo) override; 904 905 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) 906 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. 907 /// return false if no parsing errors occurred, true otherwise. 908 bool HandleAVX512Operand(OperandVector &Operands, 909 const MCParsedAsmOperand &Op); 910 911 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc); 912 913 bool is64BitMode() const { 914 // FIXME: Can tablegen auto-generate this? 915 return getSTI().getFeatureBits()[X86::Mode64Bit]; 916 } 917 bool is32BitMode() const { 918 // FIXME: Can tablegen auto-generate this? 919 return getSTI().getFeatureBits()[X86::Mode32Bit]; 920 } 921 bool is16BitMode() const { 922 // FIXME: Can tablegen auto-generate this? 923 return getSTI().getFeatureBits()[X86::Mode16Bit]; 924 } 925 void SwitchMode(unsigned mode) { 926 MCSubtargetInfo &STI = copySTI(); 927 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit}); 928 FeatureBitset OldMode = STI.getFeatureBits() & AllModes; 929 FeatureBitset FB = ComputeAvailableFeatures( 930 STI.ToggleFeature(OldMode.flip(mode))); 931 setAvailableFeatures(FB); 932 933 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes)); 934 } 935 936 unsigned getPointerWidth() { 937 if (is16BitMode()) return 16; 938 if (is32BitMode()) return 32; 939 if (is64BitMode()) return 64; 940 llvm_unreachable("invalid mode"); 941 } 942 943 bool isParsingIntelSyntax() { 944 return getParser().getAssemblerDialect(); 945 } 946 947 /// @name Auto-generated Matcher Functions 948 /// { 949 950 #define GET_ASSEMBLER_HEADER 951 #include "X86GenAsmMatcher.inc" 952 953 /// } 954 955 public: 956 enum X86MatchResultTy { 957 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY, 958 }; 959 960 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, 961 const MCInstrInfo &mii, const MCTargetOptions &Options) 962 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr), 963 Code16GCC(false) { 964 965 Parser.addAliasForDirective(".word", ".2byte"); 966 967 // Initialize the set of available features. 968 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); 969 } 970 971 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 972 973 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; 974 975 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 976 SMLoc NameLoc, OperandVector &Operands) override; 977 978 bool ParseDirective(AsmToken DirectiveID) override; 979 }; 980 } // end anonymous namespace 981 982 /// @name Auto-generated Match Functions 983 /// { 984 985 static unsigned MatchRegisterName(StringRef Name); 986 987 /// } 988 989 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, 990 unsigned Scale, bool Is64BitMode, 991 StringRef &ErrMsg) { 992 // If we have both a base register and an index register make sure they are 993 // both 64-bit or 32-bit registers. 994 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 995 996 if (BaseReg != 0 && 997 !(BaseReg == X86::RIP || BaseReg == X86::EIP || 998 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) || 999 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) || 1000 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) { 1001 ErrMsg = "invalid base+index expression"; 1002 return true; 1003 } 1004 1005 if (IndexReg != 0 && 1006 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ || 1007 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1008 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1009 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1010 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 1011 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 1012 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) { 1013 ErrMsg = "invalid base+index expression"; 1014 return true; 1015 } 1016 1017 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) || 1018 IndexReg == X86::EIP || IndexReg == X86::RIP || 1019 IndexReg == X86::ESP || IndexReg == X86::RSP) { 1020 ErrMsg = "invalid base+index expression"; 1021 return true; 1022 } 1023 1024 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, 1025 // and then only in non-64-bit modes. 1026 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 1027 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP && 1028 BaseReg != X86::SI && BaseReg != X86::DI))) { 1029 ErrMsg = "invalid 16-bit base register"; 1030 return true; 1031 } 1032 1033 if (BaseReg == 0 && 1034 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { 1035 ErrMsg = "16-bit memory operand may not include only index register"; 1036 return true; 1037 } 1038 1039 if (BaseReg != 0 && IndexReg != 0) { 1040 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 1041 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1042 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1043 IndexReg == X86::EIZ)) { 1044 ErrMsg = "base register is 64-bit, but index register is not"; 1045 return true; 1046 } 1047 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 1048 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1049 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1050 IndexReg == X86::RIZ)) { 1051 ErrMsg = "base register is 32-bit, but index register is not"; 1052 return true; 1053 } 1054 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { 1055 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1056 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { 1057 ErrMsg = "base register is 16-bit, but index register is not"; 1058 return true; 1059 } 1060 if ((BaseReg != X86::BX && BaseReg != X86::BP) || 1061 (IndexReg != X86::SI && IndexReg != X86::DI)) { 1062 ErrMsg = "invalid 16-bit base/index register combination"; 1063 return true; 1064 } 1065 } 1066 } 1067 1068 // RIP/EIP-relative addressing is only supported in 64-bit mode. 1069 if (!Is64BitMode && BaseReg != 0 && 1070 (BaseReg == X86::RIP || BaseReg == X86::EIP)) { 1071 ErrMsg = "IP-relative addressing requires 64-bit mode"; 1072 return true; 1073 } 1074 1075 return checkScale(Scale, ErrMsg); 1076 } 1077 1078 bool X86AsmParser::ParseRegister(unsigned &RegNo, 1079 SMLoc &StartLoc, SMLoc &EndLoc) { 1080 MCAsmParser &Parser = getParser(); 1081 RegNo = 0; 1082 const AsmToken &PercentTok = Parser.getTok(); 1083 StartLoc = PercentTok.getLoc(); 1084 1085 // If we encounter a %, ignore it. This code handles registers with and 1086 // without the prefix, unprefixed registers can occur in cfi directives. 1087 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) 1088 Parser.Lex(); // Eat percent token. 1089 1090 const AsmToken &Tok = Parser.getTok(); 1091 EndLoc = Tok.getEndLoc(); 1092 1093 if (Tok.isNot(AsmToken::Identifier)) { 1094 if (isParsingIntelSyntax()) return true; 1095 return Error(StartLoc, "invalid register name", 1096 SMRange(StartLoc, EndLoc)); 1097 } 1098 1099 RegNo = MatchRegisterName(Tok.getString()); 1100 1101 // If the match failed, try the register name as lowercase. 1102 if (RegNo == 0) 1103 RegNo = MatchRegisterName(Tok.getString().lower()); 1104 1105 // The "flags" register cannot be referenced directly. 1106 // Treat it as an identifier instead. 1107 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS) 1108 RegNo = 0; 1109 1110 if (!is64BitMode()) { 1111 // FIXME: This should be done using Requires<Not64BitMode> and 1112 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 1113 // checked. 1114 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a 1115 // REX prefix. 1116 if (RegNo == X86::RIZ || RegNo == X86::RIP || 1117 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 1118 X86II::isX86_64NonExtLowByteReg(RegNo) || 1119 X86II::isX86_64ExtendedReg(RegNo)) { 1120 StringRef RegName = Tok.getString(); 1121 Parser.Lex(); // Eat register name. 1122 return Error(StartLoc, 1123 "register %" + RegName + " is only available in 64-bit mode", 1124 SMRange(StartLoc, EndLoc)); 1125 } 1126 } 1127 1128 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 1129 if (RegNo == X86::ST0) { 1130 Parser.Lex(); // Eat 'st' 1131 1132 // Check to see if we have '(4)' after %st. 1133 if (getLexer().isNot(AsmToken::LParen)) 1134 return false; 1135 // Lex the paren. 1136 getParser().Lex(); 1137 1138 const AsmToken &IntTok = Parser.getTok(); 1139 if (IntTok.isNot(AsmToken::Integer)) 1140 return Error(IntTok.getLoc(), "expected stack index"); 1141 switch (IntTok.getIntVal()) { 1142 case 0: RegNo = X86::ST0; break; 1143 case 1: RegNo = X86::ST1; break; 1144 case 2: RegNo = X86::ST2; break; 1145 case 3: RegNo = X86::ST3; break; 1146 case 4: RegNo = X86::ST4; break; 1147 case 5: RegNo = X86::ST5; break; 1148 case 6: RegNo = X86::ST6; break; 1149 case 7: RegNo = X86::ST7; break; 1150 default: return Error(IntTok.getLoc(), "invalid stack index"); 1151 } 1152 1153 if (getParser().Lex().isNot(AsmToken::RParen)) 1154 return Error(Parser.getTok().getLoc(), "expected ')'"); 1155 1156 EndLoc = Parser.getTok().getEndLoc(); 1157 Parser.Lex(); // Eat ')' 1158 return false; 1159 } 1160 1161 EndLoc = Parser.getTok().getEndLoc(); 1162 1163 // If this is "db[0-15]", match it as an alias 1164 // for dr[0-15]. 1165 if (RegNo == 0 && Tok.getString().startswith("db")) { 1166 if (Tok.getString().size() == 3) { 1167 switch (Tok.getString()[2]) { 1168 case '0': RegNo = X86::DR0; break; 1169 case '1': RegNo = X86::DR1; break; 1170 case '2': RegNo = X86::DR2; break; 1171 case '3': RegNo = X86::DR3; break; 1172 case '4': RegNo = X86::DR4; break; 1173 case '5': RegNo = X86::DR5; break; 1174 case '6': RegNo = X86::DR6; break; 1175 case '7': RegNo = X86::DR7; break; 1176 case '8': RegNo = X86::DR8; break; 1177 case '9': RegNo = X86::DR9; break; 1178 } 1179 } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') { 1180 switch (Tok.getString()[3]) { 1181 case '0': RegNo = X86::DR10; break; 1182 case '1': RegNo = X86::DR11; break; 1183 case '2': RegNo = X86::DR12; break; 1184 case '3': RegNo = X86::DR13; break; 1185 case '4': RegNo = X86::DR14; break; 1186 case '5': RegNo = X86::DR15; break; 1187 } 1188 } 1189 1190 if (RegNo != 0) { 1191 EndLoc = Parser.getTok().getEndLoc(); 1192 Parser.Lex(); // Eat it. 1193 return false; 1194 } 1195 } 1196 1197 if (RegNo == 0) { 1198 if (isParsingIntelSyntax()) return true; 1199 return Error(StartLoc, "invalid register name", 1200 SMRange(StartLoc, EndLoc)); 1201 } 1202 1203 Parser.Lex(); // Eat identifier token. 1204 return false; 1205 } 1206 1207 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { 1208 bool Parse32 = is32BitMode() || Code16GCC; 1209 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI); 1210 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1211 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1212 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1213 Loc, Loc, 0); 1214 } 1215 1216 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { 1217 bool Parse32 = is32BitMode() || Code16GCC; 1218 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI); 1219 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1220 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1221 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1222 Loc, Loc, 0); 1223 } 1224 1225 bool X86AsmParser::IsSIReg(unsigned Reg) { 1226 switch (Reg) { 1227 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!"); 1228 case X86::RSI: 1229 case X86::ESI: 1230 case X86::SI: 1231 return true; 1232 case X86::RDI: 1233 case X86::EDI: 1234 case X86::DI: 1235 return false; 1236 } 1237 } 1238 1239 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, 1240 bool IsSIReg) { 1241 switch (RegClassID) { 1242 default: llvm_unreachable("Unexpected register class"); 1243 case X86::GR64RegClassID: 1244 return IsSIReg ? X86::RSI : X86::RDI; 1245 case X86::GR32RegClassID: 1246 return IsSIReg ? X86::ESI : X86::EDI; 1247 case X86::GR16RegClassID: 1248 return IsSIReg ? X86::SI : X86::DI; 1249 } 1250 } 1251 1252 void X86AsmParser::AddDefaultSrcDestOperands( 1253 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1254 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) { 1255 if (isParsingIntelSyntax()) { 1256 Operands.push_back(std::move(Dst)); 1257 Operands.push_back(std::move(Src)); 1258 } 1259 else { 1260 Operands.push_back(std::move(Src)); 1261 Operands.push_back(std::move(Dst)); 1262 } 1263 } 1264 1265 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, 1266 OperandVector &FinalOperands) { 1267 1268 if (OrigOperands.size() > 1) { 1269 // Check if sizes match, OrigOperands also contains the instruction name 1270 assert(OrigOperands.size() == FinalOperands.size() + 1 && 1271 "Operand size mismatch"); 1272 1273 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings; 1274 // Verify types match 1275 int RegClassID = -1; 1276 for (unsigned int i = 0; i < FinalOperands.size(); ++i) { 1277 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]); 1278 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]); 1279 1280 if (FinalOp.isReg() && 1281 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg())) 1282 // Return false and let a normal complaint about bogus operands happen 1283 return false; 1284 1285 if (FinalOp.isMem()) { 1286 1287 if (!OrigOp.isMem()) 1288 // Return false and let a normal complaint about bogus operands happen 1289 return false; 1290 1291 unsigned OrigReg = OrigOp.Mem.BaseReg; 1292 unsigned FinalReg = FinalOp.Mem.BaseReg; 1293 1294 // If we've already encounterd a register class, make sure all register 1295 // bases are of the same register class 1296 if (RegClassID != -1 && 1297 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) { 1298 return Error(OrigOp.getStartLoc(), 1299 "mismatching source and destination index registers"); 1300 } 1301 1302 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg)) 1303 RegClassID = X86::GR64RegClassID; 1304 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg)) 1305 RegClassID = X86::GR32RegClassID; 1306 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg)) 1307 RegClassID = X86::GR16RegClassID; 1308 else 1309 // Unexpected register class type 1310 // Return false and let a normal complaint about bogus operands happen 1311 return false; 1312 1313 bool IsSI = IsSIReg(FinalReg); 1314 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI); 1315 1316 if (FinalReg != OrigReg) { 1317 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI"; 1318 Warnings.push_back(std::make_pair( 1319 OrigOp.getStartLoc(), 1320 "memory operand is only for determining the size, " + RegName + 1321 " will be used for the location")); 1322 } 1323 1324 FinalOp.Mem.Size = OrigOp.Mem.Size; 1325 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg; 1326 FinalOp.Mem.BaseReg = FinalReg; 1327 } 1328 } 1329 1330 // Produce warnings only if all the operands passed the adjustment - prevent 1331 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings 1332 for (auto &WarningMsg : Warnings) { 1333 Warning(WarningMsg.first, WarningMsg.second); 1334 } 1335 1336 // Remove old operands 1337 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1338 OrigOperands.pop_back(); 1339 } 1340 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end()); 1341 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1342 OrigOperands.push_back(std::move(FinalOperands[i])); 1343 1344 return false; 1345 } 1346 1347 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() { 1348 if (isParsingIntelSyntax()) 1349 return ParseIntelOperand(); 1350 return ParseATTOperand(); 1351 } 1352 1353 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm( 1354 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, 1355 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, 1356 const InlineAsmIdentifierInfo &Info) { 1357 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or 1358 // some other label reference. 1359 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) { 1360 // Insert an explicit size if the user didn't have one. 1361 if (!Size) { 1362 Size = getPointerWidth(); 1363 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, 1364 /*Len=*/0, Size); 1365 } 1366 // Create an absolute memory reference in order to match against 1367 // instructions taking a PC relative operand. 1368 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size, 1369 Identifier, Info.Label.Decl); 1370 } 1371 // We either have a direct symbol reference, or an offset from a symbol. The 1372 // parser always puts the symbol on the LHS, so look there for size 1373 // calculation purposes. 1374 unsigned FrontendSize = 0; 1375 void *Decl = nullptr; 1376 bool IsGlobalLV = false; 1377 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 1378 // Size is in terms of bits in this context. 1379 FrontendSize = Info.Var.Type * 8; 1380 Decl = Info.Var.Decl; 1381 IsGlobalLV = Info.Var.IsGlobalLV; 1382 } 1383 // It is widely common for MS InlineAsm to use a global variable and one/two 1384 // registers in a mmory expression, and though unaccessible via rip/eip. 1385 if (IsGlobalLV && (BaseReg || IndexReg)) { 1386 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End); 1387 // Otherwise, we set the base register to a non-zero value 1388 // if we don't know the actual value at this time. This is necessary to 1389 // get the matching correct in some cases. 1390 } else { 1391 BaseReg = BaseReg ? BaseReg : 1; 1392 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 1393 IndexReg, Scale, Start, End, Size, Identifier, 1394 Decl, FrontendSize); 1395 } 1396 } 1397 1398 // Some binary bitwise operators have a named synonymous 1399 // Query a candidate string for being such a named operator 1400 // and if so - invoke the appropriate handler 1401 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) { 1402 // A named operator should be either lower or upper case, but not a mix 1403 if (Name.compare(Name.lower()) && Name.compare(Name.upper())) 1404 return false; 1405 if (Name.equals_lower("not")) 1406 SM.onNot(); 1407 else if (Name.equals_lower("or")) 1408 SM.onOr(); 1409 else if (Name.equals_lower("shl")) 1410 SM.onLShift(); 1411 else if (Name.equals_lower("shr")) 1412 SM.onRShift(); 1413 else if (Name.equals_lower("xor")) 1414 SM.onXor(); 1415 else if (Name.equals_lower("and")) 1416 SM.onAnd(); 1417 else if (Name.equals_lower("mod")) 1418 SM.onMod(); 1419 else 1420 return false; 1421 return true; 1422 } 1423 1424 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1425 MCAsmParser &Parser = getParser(); 1426 const AsmToken &Tok = Parser.getTok(); 1427 StringRef ErrMsg; 1428 1429 AsmToken::TokenKind PrevTK = AsmToken::Error; 1430 bool Done = false; 1431 while (!Done) { 1432 bool UpdateLocLex = true; 1433 AsmToken::TokenKind TK = getLexer().getKind(); 1434 1435 switch (TK) { 1436 default: 1437 if ((Done = SM.isValidEndState())) 1438 break; 1439 return Error(Tok.getLoc(), "unknown token in expression"); 1440 case AsmToken::EndOfStatement: 1441 Done = true; 1442 break; 1443 case AsmToken::Real: 1444 // DotOperator: [ebx].0 1445 UpdateLocLex = false; 1446 if (ParseIntelDotOperator(SM, End)) 1447 return true; 1448 break; 1449 case AsmToken::At: 1450 case AsmToken::String: 1451 case AsmToken::Identifier: { 1452 SMLoc IdentLoc = Tok.getLoc(); 1453 StringRef Identifier = Tok.getString(); 1454 UpdateLocLex = false; 1455 // Register 1456 unsigned Reg; 1457 if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) { 1458 if (SM.onRegister(Reg, ErrMsg)) 1459 return Error(Tok.getLoc(), ErrMsg); 1460 break; 1461 } 1462 // Operator synonymous ("not", "or" etc.) 1463 if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM))) 1464 break; 1465 // Symbol reference, when parsing assembly content 1466 InlineAsmIdentifierInfo Info; 1467 const MCExpr *Val; 1468 if (!isParsingInlineAsm()) { 1469 if (getParser().parsePrimaryExpr(Val, End)) { 1470 return Error(Tok.getLoc(), "Unexpected identifier!"); 1471 } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) { 1472 return Error(IdentLoc, ErrMsg); 1473 } else 1474 break; 1475 } 1476 // MS InlineAsm operators (TYPE/LENGTH/SIZE) 1477 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { 1478 if (OpKind == IOK_OFFSET) 1479 return Error(IdentLoc, "Dealing OFFSET operator as part of" 1480 "a compound immediate expression is yet to be supported"); 1481 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { 1482 if (SM.onInteger(Val, ErrMsg)) 1483 return Error(IdentLoc, ErrMsg); 1484 } else 1485 return true; 1486 break; 1487 } 1488 // MS Dot Operator expression 1489 if (Identifier.count('.') && PrevTK == AsmToken::RBrac) { 1490 if (ParseIntelDotOperator(SM, End)) 1491 return true; 1492 break; 1493 } 1494 // MS InlineAsm identifier 1495 // Call parseIdentifier() to combine @ with the identifier behind it. 1496 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier)) 1497 return Error(IdentLoc, "expected identifier"); 1498 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) 1499 return true; 1500 else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg)) 1501 return Error(IdentLoc, ErrMsg); 1502 break; 1503 } 1504 case AsmToken::Integer: { 1505 // Look for 'b' or 'f' following an Integer as a directional label 1506 SMLoc Loc = getTok().getLoc(); 1507 int64_t IntVal = getTok().getIntVal(); 1508 End = consumeToken(); 1509 UpdateLocLex = false; 1510 if (getLexer().getKind() == AsmToken::Identifier) { 1511 StringRef IDVal = getTok().getString(); 1512 if (IDVal == "f" || IDVal == "b") { 1513 MCSymbol *Sym = 1514 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); 1515 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1516 const MCExpr *Val = 1517 MCSymbolRefExpr::create(Sym, Variant, getContext()); 1518 if (IDVal == "b" && Sym->isUndefined()) 1519 return Error(Loc, "invalid reference to undefined symbol"); 1520 StringRef Identifier = Sym->getName(); 1521 InlineAsmIdentifierInfo Info; 1522 if (SM.onIdentifierExpr(Val, Identifier, Info, 1523 isParsingInlineAsm(), ErrMsg)) 1524 return Error(Loc, ErrMsg); 1525 End = consumeToken(); 1526 } else { 1527 if (SM.onInteger(IntVal, ErrMsg)) 1528 return Error(Loc, ErrMsg); 1529 } 1530 } else { 1531 if (SM.onInteger(IntVal, ErrMsg)) 1532 return Error(Loc, ErrMsg); 1533 } 1534 break; 1535 } 1536 case AsmToken::Plus: 1537 if (SM.onPlus(ErrMsg)) 1538 return Error(getTok().getLoc(), ErrMsg); 1539 break; 1540 case AsmToken::Minus: 1541 if (SM.onMinus(ErrMsg)) 1542 return Error(getTok().getLoc(), ErrMsg); 1543 break; 1544 case AsmToken::Tilde: SM.onNot(); break; 1545 case AsmToken::Star: SM.onStar(); break; 1546 case AsmToken::Slash: SM.onDivide(); break; 1547 case AsmToken::Percent: SM.onMod(); break; 1548 case AsmToken::Pipe: SM.onOr(); break; 1549 case AsmToken::Caret: SM.onXor(); break; 1550 case AsmToken::Amp: SM.onAnd(); break; 1551 case AsmToken::LessLess: 1552 SM.onLShift(); break; 1553 case AsmToken::GreaterGreater: 1554 SM.onRShift(); break; 1555 case AsmToken::LBrac: 1556 if (SM.onLBrac()) 1557 return Error(Tok.getLoc(), "unexpected bracket encountered"); 1558 break; 1559 case AsmToken::RBrac: 1560 if (SM.onRBrac()) 1561 return Error(Tok.getLoc(), "unexpected bracket encountered"); 1562 break; 1563 case AsmToken::LParen: SM.onLParen(); break; 1564 case AsmToken::RParen: SM.onRParen(); break; 1565 } 1566 if (SM.hadError()) 1567 return Error(Tok.getLoc(), "unknown token in expression"); 1568 1569 if (!Done && UpdateLocLex) 1570 End = consumeToken(); 1571 1572 PrevTK = TK; 1573 } 1574 return false; 1575 } 1576 1577 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM, 1578 SMLoc Start, SMLoc End) { 1579 SMLoc Loc = Start; 1580 unsigned ExprLen = End.getPointer() - Start.getPointer(); 1581 // Skip everything before a symbol displacement (if we have one) 1582 if (SM.getSym()) { 1583 StringRef SymName = SM.getSymName(); 1584 if (unsigned Len = SymName.data() - Start.getPointer()) 1585 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len); 1586 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size()); 1587 ExprLen = End.getPointer() - (SymName.data() + SymName.size()); 1588 // If we have only a symbol than there's no need for complex rewrite, 1589 // simply skip everything after it 1590 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) { 1591 if (ExprLen) 1592 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen); 1593 return; 1594 } 1595 } 1596 // Build an Intel Expression rewrite 1597 StringRef BaseRegStr; 1598 StringRef IndexRegStr; 1599 if (SM.getBaseReg()) 1600 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg()); 1601 if (SM.getIndexReg()) 1602 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg()); 1603 // Emit it 1604 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr()); 1605 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr); 1606 } 1607 1608 // Inline assembly may use variable names with namespace alias qualifiers. 1609 bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val, 1610 StringRef &Identifier, 1611 InlineAsmIdentifierInfo &Info, 1612 bool IsUnevaluatedOperand, 1613 SMLoc &End) { 1614 MCAsmParser &Parser = getParser(); 1615 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly."); 1616 Val = nullptr; 1617 1618 StringRef LineBuf(Identifier.data()); 1619 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 1620 1621 const AsmToken &Tok = Parser.getTok(); 1622 SMLoc Loc = Tok.getLoc(); 1623 1624 // Advance the token stream until the end of the current token is 1625 // after the end of what the frontend claimed. 1626 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 1627 do { 1628 End = Tok.getEndLoc(); 1629 getLexer().Lex(); 1630 } while (End.getPointer() < EndPtr); 1631 Identifier = LineBuf; 1632 1633 // The frontend should end parsing on an assembler token boundary, unless it 1634 // failed parsing. 1635 assert((End.getPointer() == EndPtr || 1636 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) && 1637 "frontend claimed part of a token?"); 1638 1639 // If the identifier lookup was unsuccessful, assume that we are dealing with 1640 // a label. 1641 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) { 1642 StringRef InternalName = 1643 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), 1644 Loc, false); 1645 assert(InternalName.size() && "We should have an internal name here."); 1646 // Push a rewrite for replacing the identifier name with the internal name. 1647 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), 1648 InternalName); 1649 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 1650 return false; 1651 // Create the symbol reference. 1652 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); 1653 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1654 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); 1655 return false; 1656 } 1657 1658 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand 1659 std::unique_ptr<X86Operand> 1660 X86AsmParser::ParseRoundingModeOp(SMLoc Start) { 1661 MCAsmParser &Parser = getParser(); 1662 const AsmToken &Tok = Parser.getTok(); 1663 // Eat "{" and mark the current place. 1664 const SMLoc consumedToken = consumeToken(); 1665 if (Tok.isNot(AsmToken::Identifier)) 1666 return ErrorOperand(Tok.getLoc(), "Expected an identifier after {"); 1667 if (Tok.getIdentifier().startswith("r")){ 1668 int rndMode = StringSwitch<int>(Tok.getIdentifier()) 1669 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) 1670 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF) 1671 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF) 1672 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO) 1673 .Default(-1); 1674 if (-1 == rndMode) 1675 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode."); 1676 Parser.Lex(); // Eat "r*" of r*-sae 1677 if (!getLexer().is(AsmToken::Minus)) 1678 return ErrorOperand(Tok.getLoc(), "Expected - at this point"); 1679 Parser.Lex(); // Eat "-" 1680 Parser.Lex(); // Eat the sae 1681 if (!getLexer().is(AsmToken::RCurly)) 1682 return ErrorOperand(Tok.getLoc(), "Expected } at this point"); 1683 SMLoc End = Tok.getEndLoc(); 1684 Parser.Lex(); // Eat "}" 1685 const MCExpr *RndModeOp = 1686 MCConstantExpr::create(rndMode, Parser.getContext()); 1687 return X86Operand::CreateImm(RndModeOp, Start, End); 1688 } 1689 if(Tok.getIdentifier().equals("sae")){ 1690 Parser.Lex(); // Eat the sae 1691 if (!getLexer().is(AsmToken::RCurly)) 1692 return ErrorOperand(Tok.getLoc(), "Expected } at this point"); 1693 Parser.Lex(); // Eat "}" 1694 return X86Operand::CreateToken("{sae}", consumedToken); 1695 } 1696 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1697 } 1698 1699 /// Parse the '.' operator. 1700 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) { 1701 const AsmToken &Tok = getTok(); 1702 unsigned Offset; 1703 1704 // Drop the optional '.'. 1705 StringRef DotDispStr = Tok.getString(); 1706 if (DotDispStr.startswith(".")) 1707 DotDispStr = DotDispStr.drop_front(1); 1708 1709 // .Imm gets lexed as a real. 1710 if (Tok.is(AsmToken::Real)) { 1711 APInt DotDisp; 1712 DotDispStr.getAsInteger(10, DotDisp); 1713 Offset = DotDisp.getZExtValue(); 1714 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) { 1715 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 1716 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, 1717 Offset)) 1718 return Error(Tok.getLoc(), "Unable to lookup field reference!"); 1719 } else 1720 return Error(Tok.getLoc(), "Unexpected token type!"); 1721 1722 // Eat the DotExpression and update End 1723 End = SMLoc::getFromPointer(DotDispStr.data()); 1724 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size(); 1725 while (Tok.getLoc().getPointer() < DotExprEndLoc) 1726 Lex(); 1727 SM.addImm(Offset); 1728 return false; 1729 } 1730 1731 /// Parse the 'offset' operator. This operator is used to specify the 1732 /// location rather then the content of a variable. 1733 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() { 1734 MCAsmParser &Parser = getParser(); 1735 const AsmToken &Tok = Parser.getTok(); 1736 SMLoc OffsetOfLoc = Tok.getLoc(); 1737 Parser.Lex(); // Eat offset. 1738 1739 const MCExpr *Val; 1740 InlineAsmIdentifierInfo Info; 1741 SMLoc Start = Tok.getLoc(), End; 1742 StringRef Identifier = Tok.getString(); 1743 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, 1744 /*Unevaluated=*/false, End)) 1745 return nullptr; 1746 1747 void *Decl = nullptr; 1748 // FIXME: MS evaluates "offset <Constant>" to the underlying integral 1749 if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 1750 return ErrorOperand(Start, "offset operator cannot yet handle constants"); 1751 else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) 1752 Decl = Info.Var.Decl; 1753 // Don't emit the offset operator. 1754 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7); 1755 1756 // The offset operator will have an 'r' constraint, thus we need to create 1757 // register operand to ensure proper matching. Just pick a GPR based on 1758 // the size of a pointer. 1759 bool Parse32 = is32BitMode() || Code16GCC; 1760 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX); 1761 1762 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, 1763 OffsetOfLoc, Identifier, Decl); 1764 } 1765 1766 // Query a candidate string for being an Intel assembly operator 1767 // Report back its kind, or IOK_INVALID if does not evaluated as a known one 1768 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) { 1769 return StringSwitch<unsigned>(Name) 1770 .Cases("TYPE","type",IOK_TYPE) 1771 .Cases("SIZE","size",IOK_SIZE) 1772 .Cases("LENGTH","length",IOK_LENGTH) 1773 .Cases("OFFSET","offset",IOK_OFFSET) 1774 .Default(IOK_INVALID); 1775 } 1776 1777 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 1778 /// returns the number of elements in an array. It returns the value 1 for 1779 /// non-array variables. The SIZE operator returns the size of a C or C++ 1780 /// variable. A variable's size is the product of its LENGTH and TYPE. The 1781 /// TYPE operator returns the size of a C or C++ type or variable. If the 1782 /// variable is an array, TYPE returns the size of a single element. 1783 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { 1784 MCAsmParser &Parser = getParser(); 1785 const AsmToken &Tok = Parser.getTok(); 1786 Parser.Lex(); // Eat operator. 1787 1788 const MCExpr *Val = nullptr; 1789 InlineAsmIdentifierInfo Info; 1790 SMLoc Start = Tok.getLoc(), End; 1791 StringRef Identifier = Tok.getString(); 1792 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, 1793 /*Unevaluated=*/true, End)) 1794 return 0; 1795 1796 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 1797 Error(Start, "unable to lookup expression"); 1798 return 0; 1799 } 1800 1801 unsigned CVal = 0; 1802 switch(OpKind) { 1803 default: llvm_unreachable("Unexpected operand kind!"); 1804 case IOK_LENGTH: CVal = Info.Var.Length; break; 1805 case IOK_SIZE: CVal = Info.Var.Size; break; 1806 case IOK_TYPE: CVal = Info.Var.Type; break; 1807 } 1808 1809 return CVal; 1810 } 1811 1812 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { 1813 Size = StringSwitch<unsigned>(getTok().getString()) 1814 .Cases("BYTE", "byte", 8) 1815 .Cases("WORD", "word", 16) 1816 .Cases("DWORD", "dword", 32) 1817 .Cases("FLOAT", "float", 32) 1818 .Cases("LONG", "long", 32) 1819 .Cases("FWORD", "fword", 48) 1820 .Cases("DOUBLE", "double", 64) 1821 .Cases("QWORD", "qword", 64) 1822 .Cases("MMWORD","mmword", 64) 1823 .Cases("XWORD", "xword", 80) 1824 .Cases("TBYTE", "tbyte", 80) 1825 .Cases("XMMWORD", "xmmword", 128) 1826 .Cases("YMMWORD", "ymmword", 256) 1827 .Cases("ZMMWORD", "zmmword", 512) 1828 .Default(0); 1829 if (Size) { 1830 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word). 1831 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr"))) 1832 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); 1833 Lex(); // Eat ptr. 1834 } 1835 return false; 1836 } 1837 1838 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { 1839 MCAsmParser &Parser = getParser(); 1840 const AsmToken &Tok = Parser.getTok(); 1841 SMLoc Start, End; 1842 1843 // FIXME: Offset operator 1844 // Should be handled as part of immediate expression, as other operators 1845 // Currently, only supported as a stand-alone operand 1846 if (isParsingInlineAsm()) 1847 if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET) 1848 return ParseIntelOffsetOfOperator(); 1849 1850 // Parse optional Size directive. 1851 unsigned Size; 1852 if (ParseIntelMemoryOperandSize(Size)) 1853 return nullptr; 1854 bool PtrInOperand = bool(Size); 1855 1856 Start = Tok.getLoc(); 1857 1858 // Rounding mode operand. 1859 if (getLexer().is(AsmToken::LCurly)) 1860 return ParseRoundingModeOp(Start); 1861 1862 // Register operand. 1863 unsigned RegNo = 0; 1864 if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) { 1865 if (RegNo == X86::RIP) 1866 return ErrorOperand(Start, "rip can only be used as a base register"); 1867 // A Register followed by ':' is considered a segment override 1868 if (Tok.isNot(AsmToken::Colon)) 1869 return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) : 1870 ErrorOperand(Start, "expected memory operand after 'ptr', " 1871 "found register operand instead"); 1872 // An alleged segment override. check if we have a valid segment register 1873 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo)) 1874 return ErrorOperand(Start, "invalid segment register"); 1875 // Eat ':' and update Start location 1876 Start = Lex().getLoc(); 1877 } 1878 1879 // Immediates and Memory 1880 IntelExprStateMachine SM; 1881 if (ParseIntelExpression(SM, End)) 1882 return nullptr; 1883 1884 if (isParsingInlineAsm()) 1885 RewriteIntelExpression(SM, Start, Tok.getLoc()); 1886 1887 int64_t Imm = SM.getImm(); 1888 const MCExpr *Disp = SM.getSym(); 1889 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext()); 1890 if (Disp && Imm) 1891 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext()); 1892 if (!Disp) 1893 Disp = ImmDisp; 1894 1895 // RegNo != 0 specifies a valid segment register, 1896 // and we are parsing a segment override 1897 if (!SM.isMemExpr() && !RegNo) 1898 return X86Operand::CreateImm(Disp, Start, End); 1899 1900 StringRef ErrMsg; 1901 unsigned BaseReg = SM.getBaseReg(); 1902 unsigned IndexReg = SM.getIndexReg(); 1903 unsigned Scale = SM.getScale(); 1904 1905 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP && 1906 (IndexReg == X86::ESP || IndexReg == X86::RSP)) 1907 std::swap(BaseReg, IndexReg); 1908 1909 // If BaseReg is a vector register and IndexReg is not, swap them unless 1910 // Scale was specified in which case it would be an error. 1911 if (Scale == 0 && 1912 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 1913 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 1914 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) && 1915 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) || 1916 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) || 1917 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg))) 1918 std::swap(BaseReg, IndexReg); 1919 1920 if (Scale != 0 && 1921 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) 1922 return ErrorOperand(Start, "16-bit addresses cannot have a scale"); 1923 1924 // If there was no explicit scale specified, change it to 1. 1925 if (Scale == 0) 1926 Scale = 1; 1927 1928 // If this is a 16-bit addressing mode with the base and index in the wrong 1929 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is 1930 // shared with att syntax where order matters. 1931 if ((BaseReg == X86::SI || BaseReg == X86::DI) && 1932 (IndexReg == X86::BX || IndexReg == X86::BP)) 1933 std::swap(BaseReg, IndexReg); 1934 1935 if ((BaseReg || IndexReg) && 1936 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 1937 ErrMsg)) 1938 return ErrorOperand(Start, ErrMsg); 1939 if (isParsingInlineAsm()) 1940 return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg, 1941 Scale, Start, End, Size, SM.getSymName(), 1942 SM.getIdentifierInfo()); 1943 if (!(BaseReg || IndexReg || RegNo)) 1944 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size); 1945 return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp, 1946 BaseReg, IndexReg, Scale, Start, End, Size); 1947 } 1948 1949 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() { 1950 MCAsmParser &Parser = getParser(); 1951 switch (getLexer().getKind()) { 1952 case AsmToken::Dollar: { 1953 // $42 or $ID -> immediate. 1954 SMLoc Start = Parser.getTok().getLoc(), End; 1955 Parser.Lex(); 1956 const MCExpr *Val; 1957 // This is an immediate, so we should not parse a register. Do a precheck 1958 // for '%' to supercede intra-register parse errors. 1959 SMLoc L = Parser.getTok().getLoc(); 1960 if (check(getLexer().is(AsmToken::Percent), L, 1961 "expected immediate expression") || 1962 getParser().parseExpression(Val, End) || 1963 check(isa<X86MCExpr>(Val), L, "expected immediate expression")) 1964 return nullptr; 1965 return X86Operand::CreateImm(Val, Start, End); 1966 } 1967 case AsmToken::LCurly: { 1968 SMLoc Start = Parser.getTok().getLoc(); 1969 return ParseRoundingModeOp(Start); 1970 } 1971 default: { 1972 // This a memory operand or a register. We have some parsing complications 1973 // as a '(' may be part of an immediate expression or the addressing mode 1974 // block. This is complicated by the fact that an assembler-level variable 1975 // may refer either to a register or an immediate expression. 1976 1977 SMLoc Loc = Parser.getTok().getLoc(), EndLoc; 1978 const MCExpr *Expr = nullptr; 1979 unsigned Reg = 0; 1980 if (getLexer().isNot(AsmToken::LParen)) { 1981 // No '(' so this is either a displacement expression or a register. 1982 if (Parser.parseExpression(Expr, EndLoc)) 1983 return nullptr; 1984 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) { 1985 // Segment Register. Reset Expr and copy value to register. 1986 Expr = nullptr; 1987 Reg = RE->getRegNo(); 1988 1989 // Sanity check register. 1990 if (Reg == X86::EIZ || Reg == X86::RIZ) 1991 return ErrorOperand( 1992 Loc, "%eiz and %riz can only be used as index registers", 1993 SMRange(Loc, EndLoc)); 1994 if (Reg == X86::RIP) 1995 return ErrorOperand(Loc, "%rip can only be used as a base register", 1996 SMRange(Loc, EndLoc)); 1997 // Return register that are not segment prefixes immediately. 1998 if (!Parser.parseOptionalToken(AsmToken::Colon)) 1999 return X86Operand::CreateReg(Reg, Loc, EndLoc); 2000 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg)) 2001 return ErrorOperand(Loc, "invalid segment register"); 2002 } 2003 } 2004 // This is a Memory operand. 2005 return ParseMemOperand(Reg, Expr, Loc, EndLoc); 2006 } 2007 } 2008 } 2009 2010 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic, 2011 // otherwise the EFLAGS Condition Code enumerator. 2012 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) { 2013 return StringSwitch<X86::CondCode>(CC) 2014 .Case("o", X86::COND_O) // Overflow 2015 .Case("no", X86::COND_NO) // No Overflow 2016 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal 2017 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below 2018 .Cases("e", "z", X86::COND_E) // Equal/Zero 2019 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero 2020 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above 2021 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal 2022 .Case("s", X86::COND_S) // Sign 2023 .Case("ns", X86::COND_NS) // No Sign 2024 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even 2025 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd 2026 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal 2027 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less 2028 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater 2029 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal 2030 .Default(X86::COND_INVALID); 2031 } 2032 2033 // true on failure, false otherwise 2034 // If no {z} mark was found - Parser doesn't advance 2035 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z, 2036 const SMLoc &StartLoc) { 2037 MCAsmParser &Parser = getParser(); 2038 // Assuming we are just pass the '{' mark, quering the next token 2039 // Searched for {z}, but none was found. Return false, as no parsing error was 2040 // encountered 2041 if (!(getLexer().is(AsmToken::Identifier) && 2042 (getLexer().getTok().getIdentifier() == "z"))) 2043 return false; 2044 Parser.Lex(); // Eat z 2045 // Query and eat the '}' mark 2046 if (!getLexer().is(AsmToken::RCurly)) 2047 return Error(getLexer().getLoc(), "Expected } at this point"); 2048 Parser.Lex(); // Eat '}' 2049 // Assign Z with the {z} mark opernad 2050 Z = X86Operand::CreateToken("{z}", StartLoc); 2051 return false; 2052 } 2053 2054 // true on failure, false otherwise 2055 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, 2056 const MCParsedAsmOperand &Op) { 2057 MCAsmParser &Parser = getParser(); 2058 if (getLexer().is(AsmToken::LCurly)) { 2059 // Eat "{" and mark the current place. 2060 const SMLoc consumedToken = consumeToken(); 2061 // Distinguish {1to<NUM>} from {%k<NUM>}. 2062 if(getLexer().is(AsmToken::Integer)) { 2063 // Parse memory broadcasting ({1to<NUM>}). 2064 if (getLexer().getTok().getIntVal() != 1) 2065 return TokError("Expected 1to<NUM> at this point"); 2066 Parser.Lex(); // Eat "1" of 1to8 2067 if (!getLexer().is(AsmToken::Identifier) || 2068 !getLexer().getTok().getIdentifier().startswith("to")) 2069 return TokError("Expected 1to<NUM> at this point"); 2070 // Recognize only reasonable suffixes. 2071 const char *BroadcastPrimitive = 2072 StringSwitch<const char*>(getLexer().getTok().getIdentifier()) 2073 .Case("to2", "{1to2}") 2074 .Case("to4", "{1to4}") 2075 .Case("to8", "{1to8}") 2076 .Case("to16", "{1to16}") 2077 .Default(nullptr); 2078 if (!BroadcastPrimitive) 2079 return TokError("Invalid memory broadcast primitive."); 2080 Parser.Lex(); // Eat "toN" of 1toN 2081 if (!getLexer().is(AsmToken::RCurly)) 2082 return TokError("Expected } at this point"); 2083 Parser.Lex(); // Eat "}" 2084 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, 2085 consumedToken)); 2086 // No AVX512 specific primitives can pass 2087 // after memory broadcasting, so return. 2088 return false; 2089 } else { 2090 // Parse either {k}{z}, {z}{k}, {k} or {z} 2091 // last one have no meaning, but GCC accepts it 2092 // Currently, we're just pass a '{' mark 2093 std::unique_ptr<X86Operand> Z; 2094 if (ParseZ(Z, consumedToken)) 2095 return true; 2096 // Reaching here means that parsing of the allegadly '{z}' mark yielded 2097 // no errors. 2098 // Query for the need of further parsing for a {%k<NUM>} mark 2099 if (!Z || getLexer().is(AsmToken::LCurly)) { 2100 SMLoc StartLoc = Z ? consumeToken() : consumedToken; 2101 // Parse an op-mask register mark ({%k<NUM>}), which is now to be 2102 // expected 2103 unsigned RegNo; 2104 SMLoc RegLoc; 2105 if (!ParseRegister(RegNo, RegLoc, StartLoc) && 2106 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) { 2107 if (RegNo == X86::K0) 2108 return Error(RegLoc, "Register k0 can't be used as write mask"); 2109 if (!getLexer().is(AsmToken::RCurly)) 2110 return Error(getLexer().getLoc(), "Expected } at this point"); 2111 Operands.push_back(X86Operand::CreateToken("{", StartLoc)); 2112 Operands.push_back( 2113 X86Operand::CreateReg(RegNo, StartLoc, StartLoc)); 2114 Operands.push_back(X86Operand::CreateToken("}", consumeToken())); 2115 } else 2116 return Error(getLexer().getLoc(), 2117 "Expected an op-mask register at this point"); 2118 // {%k<NUM>} mark is found, inquire for {z} 2119 if (getLexer().is(AsmToken::LCurly) && !Z) { 2120 // Have we've found a parsing error, or found no (expected) {z} mark 2121 // - report an error 2122 if (ParseZ(Z, consumeToken()) || !Z) 2123 return Error(getLexer().getLoc(), 2124 "Expected a {z} mark at this point"); 2125 2126 } 2127 // '{z}' on its own is meaningless, hence should be ignored. 2128 // on the contrary - have it been accompanied by a K register, 2129 // allow it. 2130 if (Z) 2131 Operands.push_back(std::move(Z)); 2132 } 2133 } 2134 } 2135 return false; 2136 } 2137 2138 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix 2139 /// has already been parsed if present. disp may be provided as well. 2140 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg, 2141 const MCExpr *&Disp, 2142 const SMLoc &StartLoc, 2143 SMLoc &EndLoc) { 2144 MCAsmParser &Parser = getParser(); 2145 SMLoc Loc; 2146 // Based on the initial passed values, we may be in any of these cases, we are 2147 // in one of these cases (with current position (*)): 2148 2149 // 1. seg : * disp (base-index-scale-expr) 2150 // 2. seg : *(disp) (base-index-scale-expr) 2151 // 3. seg : *(base-index-scale-expr) 2152 // 4. disp *(base-index-scale-expr) 2153 // 5. *(disp) (base-index-scale-expr) 2154 // 6. *(base-index-scale-expr) 2155 // 7. disp * 2156 // 8. *(disp) 2157 2158 // If we do not have an displacement yet, check if we're in cases 4 or 6 by 2159 // checking if the first object after the parenthesis is a register (or an 2160 // identifier referring to a register) and parse the displacement or default 2161 // to 0 as appropriate. 2162 auto isAtMemOperand = [this]() { 2163 if (this->getLexer().isNot(AsmToken::LParen)) 2164 return false; 2165 AsmToken Buf[2]; 2166 StringRef Id; 2167 auto TokCount = this->getLexer().peekTokens(Buf, true); 2168 if (TokCount == 0) 2169 return false; 2170 switch (Buf[0].getKind()) { 2171 case AsmToken::Percent: 2172 case AsmToken::Comma: 2173 return true; 2174 // These lower cases are doing a peekIdentifier. 2175 case AsmToken::At: 2176 case AsmToken::Dollar: 2177 if ((TokCount > 1) && 2178 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) && 2179 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer())) 2180 Id = StringRef(Buf[0].getLoc().getPointer(), 2181 Buf[1].getIdentifier().size() + 1); 2182 break; 2183 case AsmToken::Identifier: 2184 case AsmToken::String: 2185 Id = Buf[0].getIdentifier(); 2186 break; 2187 default: 2188 return false; 2189 } 2190 // We have an ID. Check if it is bound to a register. 2191 if (!Id.empty()) { 2192 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id); 2193 if (Sym->isVariable()) { 2194 auto V = Sym->getVariableValue(/*SetUsed*/ false); 2195 return isa<X86MCExpr>(V); 2196 } 2197 } 2198 return false; 2199 }; 2200 2201 if (!Disp) { 2202 // Parse immediate if we're not at a mem operand yet. 2203 if (!isAtMemOperand()) { 2204 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc)) 2205 return nullptr; 2206 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here."); 2207 } else { 2208 // Disp is implicitly zero if we haven't parsed it yet. 2209 Disp = MCConstantExpr::create(0, Parser.getContext()); 2210 } 2211 } 2212 2213 // We are now either at the end of the operand or at the '(' at the start of a 2214 // base-index-scale-expr. 2215 2216 if (!parseOptionalToken(AsmToken::LParen)) { 2217 if (SegReg == 0) 2218 return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc); 2219 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, 2220 StartLoc, EndLoc); 2221 } 2222 2223 // If we reached here, then eat the '(' and Process 2224 // the rest of the memory operand. 2225 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 2226 SMLoc BaseLoc = getLexer().getLoc(); 2227 const MCExpr *E; 2228 StringRef ErrMsg; 2229 2230 // Parse BaseReg if one is provided. 2231 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) { 2232 if (Parser.parseExpression(E, EndLoc) || 2233 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here")) 2234 return nullptr; 2235 2236 // Sanity check register. 2237 BaseReg = cast<X86MCExpr>(E)->getRegNo(); 2238 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) 2239 return ErrorOperand(BaseLoc, 2240 "eiz and riz can only be used as index registers", 2241 SMRange(BaseLoc, EndLoc)); 2242 } 2243 2244 if (parseOptionalToken(AsmToken::Comma)) { 2245 // Following the comma we should have either an index register, or a scale 2246 // value. We don't support the later form, but we want to parse it 2247 // correctly. 2248 // 2249 // Even though it would be completely consistent to support syntax like 2250 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 2251 if (getLexer().isNot(AsmToken::RParen)) { 2252 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc)) 2253 return nullptr; 2254 2255 if (!isa<X86MCExpr>(E)) { 2256 // We've parsed an unexpected Scale Value instead of an index 2257 // register. Interpret it as an absolute. 2258 int64_t ScaleVal; 2259 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr())) 2260 return ErrorOperand(Loc, "expected absolute expression"); 2261 if (ScaleVal != 1) 2262 Warning(Loc, "scale factor without index register is ignored"); 2263 Scale = 1; 2264 } else { // IndexReg Found. 2265 IndexReg = cast<X86MCExpr>(E)->getRegNo(); 2266 2267 if (BaseReg == X86::RIP) 2268 return ErrorOperand( 2269 Loc, "%rip as base register can not have an index register"); 2270 if (IndexReg == X86::RIP) 2271 return ErrorOperand(Loc, "%rip is not allowed as an index register"); 2272 2273 if (parseOptionalToken(AsmToken::Comma)) { 2274 // Parse the scale amount: 2275 // ::= ',' [scale-expression] 2276 2277 // A scale amount without an index is ignored. 2278 if (getLexer().isNot(AsmToken::RParen)) { 2279 int64_t ScaleVal; 2280 if (Parser.parseTokenLoc(Loc) || 2281 Parser.parseAbsoluteExpression(ScaleVal)) 2282 return ErrorOperand(Loc, "expected scale expression"); 2283 Scale = (unsigned)ScaleVal; 2284 // Validate the scale amount. 2285 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 2286 Scale != 1) 2287 return ErrorOperand(Loc, 2288 "scale factor in 16-bit address must be 1"); 2289 if (checkScale(Scale, ErrMsg)) 2290 return ErrorOperand(Loc, ErrMsg); 2291 } 2292 } 2293 } 2294 } 2295 } 2296 2297 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 2298 if (parseToken(AsmToken::RParen, "unexpected token in memory operand")) 2299 return nullptr; 2300 2301 // This is to support otherwise illegal operand (%dx) found in various 2302 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now 2303 // be supported. Mark such DX variants separately fix only in special cases. 2304 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 && 2305 isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0) 2306 return X86Operand::CreateDXReg(BaseLoc, BaseLoc); 2307 2308 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 2309 ErrMsg)) 2310 return ErrorOperand(BaseLoc, ErrMsg); 2311 2312 if (SegReg || BaseReg || IndexReg) 2313 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 2314 IndexReg, Scale, StartLoc, EndLoc); 2315 return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc); 2316 } 2317 2318 // Parse either a standard primary expression or a register. 2319 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 2320 MCAsmParser &Parser = getParser(); 2321 // See if this is a register first. 2322 if (getTok().is(AsmToken::Percent) || 2323 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) && 2324 MatchRegisterName(Parser.getTok().getString()))) { 2325 SMLoc StartLoc = Parser.getTok().getLoc(); 2326 unsigned RegNo; 2327 if (ParseRegister(RegNo, StartLoc, EndLoc)) 2328 return true; 2329 Res = X86MCExpr::create(RegNo, Parser.getContext()); 2330 return false; 2331 } 2332 return Parser.parsePrimaryExpr(Res, EndLoc); 2333 } 2334 2335 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 2336 SMLoc NameLoc, OperandVector &Operands) { 2337 MCAsmParser &Parser = getParser(); 2338 InstInfo = &Info; 2339 2340 // Reset the forced VEX encoding. 2341 ForcedVEXEncoding = VEXEncoding_Default; 2342 2343 // Parse pseudo prefixes. 2344 while (1) { 2345 if (Name == "{") { 2346 if (getLexer().isNot(AsmToken::Identifier)) 2347 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'"); 2348 std::string Prefix = Parser.getTok().getString().lower(); 2349 Parser.Lex(); // Eat identifier. 2350 if (getLexer().isNot(AsmToken::RCurly)) 2351 return Error(Parser.getTok().getLoc(), "Expected '}'"); 2352 Parser.Lex(); // Eat curly. 2353 2354 if (Prefix == "vex2") 2355 ForcedVEXEncoding = VEXEncoding_VEX2; 2356 else if (Prefix == "vex3") 2357 ForcedVEXEncoding = VEXEncoding_VEX3; 2358 else if (Prefix == "evex") 2359 ForcedVEXEncoding = VEXEncoding_EVEX; 2360 else 2361 return Error(NameLoc, "unknown prefix"); 2362 2363 NameLoc = Parser.getTok().getLoc(); 2364 if (getLexer().is(AsmToken::LCurly)) { 2365 Parser.Lex(); 2366 Name = "{"; 2367 } else { 2368 if (getLexer().isNot(AsmToken::Identifier)) 2369 return Error(Parser.getTok().getLoc(), "Expected identifier"); 2370 // FIXME: The mnemonic won't match correctly if its not in lower case. 2371 Name = Parser.getTok().getString(); 2372 Parser.Lex(); 2373 } 2374 continue; 2375 } 2376 2377 break; 2378 } 2379 2380 StringRef PatchedName = Name; 2381 2382 // Hack to skip "short" following Jcc. 2383 if (isParsingIntelSyntax() && 2384 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" || 2385 PatchedName == "jcxz" || PatchedName == "jexcz" || 2386 (PatchedName.startswith("j") && 2387 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) { 2388 StringRef NextTok = Parser.getTok().getString(); 2389 if (NextTok == "short") { 2390 SMLoc NameEndLoc = 2391 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size()); 2392 // Eat the short keyword. 2393 Parser.Lex(); 2394 // MS and GAS ignore the short keyword; they both determine the jmp type 2395 // based on the distance of the label. (NASM does emit different code with 2396 // and without "short," though.) 2397 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc, 2398 NextTok.size() + 1); 2399 } 2400 } 2401 2402 // FIXME: Hack to recognize setneb as setne. 2403 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 2404 PatchedName != "setb" && PatchedName != "setnb") 2405 PatchedName = PatchedName.substr(0, Name.size()-1); 2406 2407 unsigned ComparisonPredicate = ~0U; 2408 2409 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 2410 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 2411 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 2412 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 2413 bool IsVCMP = PatchedName[0] == 'v'; 2414 unsigned CCIdx = IsVCMP ? 4 : 3; 2415 unsigned CC = StringSwitch<unsigned>( 2416 PatchedName.slice(CCIdx, PatchedName.size() - 2)) 2417 .Case("eq", 0x00) 2418 .Case("eq_oq", 0x00) 2419 .Case("lt", 0x01) 2420 .Case("lt_os", 0x01) 2421 .Case("le", 0x02) 2422 .Case("le_os", 0x02) 2423 .Case("unord", 0x03) 2424 .Case("unord_q", 0x03) 2425 .Case("neq", 0x04) 2426 .Case("neq_uq", 0x04) 2427 .Case("nlt", 0x05) 2428 .Case("nlt_us", 0x05) 2429 .Case("nle", 0x06) 2430 .Case("nle_us", 0x06) 2431 .Case("ord", 0x07) 2432 .Case("ord_q", 0x07) 2433 /* AVX only from here */ 2434 .Case("eq_uq", 0x08) 2435 .Case("nge", 0x09) 2436 .Case("nge_us", 0x09) 2437 .Case("ngt", 0x0A) 2438 .Case("ngt_us", 0x0A) 2439 .Case("false", 0x0B) 2440 .Case("false_oq", 0x0B) 2441 .Case("neq_oq", 0x0C) 2442 .Case("ge", 0x0D) 2443 .Case("ge_os", 0x0D) 2444 .Case("gt", 0x0E) 2445 .Case("gt_os", 0x0E) 2446 .Case("true", 0x0F) 2447 .Case("true_uq", 0x0F) 2448 .Case("eq_os", 0x10) 2449 .Case("lt_oq", 0x11) 2450 .Case("le_oq", 0x12) 2451 .Case("unord_s", 0x13) 2452 .Case("neq_us", 0x14) 2453 .Case("nlt_uq", 0x15) 2454 .Case("nle_uq", 0x16) 2455 .Case("ord_s", 0x17) 2456 .Case("eq_us", 0x18) 2457 .Case("nge_uq", 0x19) 2458 .Case("ngt_uq", 0x1A) 2459 .Case("false_os", 0x1B) 2460 .Case("neq_os", 0x1C) 2461 .Case("ge_oq", 0x1D) 2462 .Case("gt_oq", 0x1E) 2463 .Case("true_us", 0x1F) 2464 .Default(~0U); 2465 if (CC != ~0U && (IsVCMP || CC < 8)) { 2466 if (PatchedName.endswith("ss")) 2467 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 2468 else if (PatchedName.endswith("sd")) 2469 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 2470 else if (PatchedName.endswith("ps")) 2471 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 2472 else if (PatchedName.endswith("pd")) 2473 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 2474 else 2475 llvm_unreachable("Unexpected suffix!"); 2476 2477 ComparisonPredicate = CC; 2478 } 2479 } 2480 2481 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}. 2482 if (PatchedName.startswith("vpcmp") && 2483 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 2484 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 2485 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 2486 unsigned CC = StringSwitch<unsigned>( 2487 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 2488 .Case("eq", 0x0) // Only allowed on unsigned. Checked below. 2489 .Case("lt", 0x1) 2490 .Case("le", 0x2) 2491 //.Case("false", 0x3) // Not a documented alias. 2492 .Case("neq", 0x4) 2493 .Case("nlt", 0x5) 2494 .Case("nle", 0x6) 2495 //.Case("true", 0x7) // Not a documented alias. 2496 .Default(~0U); 2497 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) { 2498 switch (PatchedName.back()) { 2499 default: llvm_unreachable("Unexpected character!"); 2500 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break; 2501 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break; 2502 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break; 2503 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break; 2504 } 2505 // Set up the immediate to push into the operands later. 2506 ComparisonPredicate = CC; 2507 } 2508 } 2509 2510 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}. 2511 if (PatchedName.startswith("vpcom") && 2512 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 2513 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 2514 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 2515 unsigned CC = StringSwitch<unsigned>( 2516 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 2517 .Case("lt", 0x0) 2518 .Case("le", 0x1) 2519 .Case("gt", 0x2) 2520 .Case("ge", 0x3) 2521 .Case("eq", 0x4) 2522 .Case("neq", 0x5) 2523 .Case("false", 0x6) 2524 .Case("true", 0x7) 2525 .Default(~0U); 2526 if (CC != ~0U) { 2527 switch (PatchedName.back()) { 2528 default: llvm_unreachable("Unexpected character!"); 2529 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break; 2530 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break; 2531 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break; 2532 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break; 2533 } 2534 // Set up the immediate to push into the operands later. 2535 ComparisonPredicate = CC; 2536 } 2537 } 2538 2539 2540 // Determine whether this is an instruction prefix. 2541 // FIXME: 2542 // Enhance prefixes integrity robustness. for example, following forms 2543 // are currently tolerated: 2544 // repz repnz <insn> ; GAS errors for the use of two similar prefixes 2545 // lock addq %rax, %rbx ; Destination operand must be of memory type 2546 // xacquire <insn> ; xacquire must be accompanied by 'lock' 2547 bool isPrefix = StringSwitch<bool>(Name) 2548 .Cases("rex64", "data32", "data16", true) 2549 .Cases("xacquire", "xrelease", true) 2550 .Cases("acquire", "release", isParsingIntelSyntax()) 2551 .Default(false); 2552 2553 auto isLockRepeatNtPrefix = [](StringRef N) { 2554 return StringSwitch<bool>(N) 2555 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true) 2556 .Default(false); 2557 }; 2558 2559 bool CurlyAsEndOfStatement = false; 2560 2561 unsigned Flags = X86::IP_NO_PREFIX; 2562 while (isLockRepeatNtPrefix(Name.lower())) { 2563 unsigned Prefix = 2564 StringSwitch<unsigned>(Name) 2565 .Cases("lock", "lock", X86::IP_HAS_LOCK) 2566 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT) 2567 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) 2568 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK) 2569 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) 2570 Flags |= Prefix; 2571 if (getLexer().is(AsmToken::EndOfStatement)) { 2572 // We don't have real instr with the given prefix 2573 // let's use the prefix as the instr. 2574 // TODO: there could be several prefixes one after another 2575 Flags = X86::IP_NO_PREFIX; 2576 break; 2577 } 2578 // FIXME: The mnemonic won't match correctly if its not in lower case. 2579 Name = Parser.getTok().getString(); 2580 Parser.Lex(); // eat the prefix 2581 // Hack: we could have something like "rep # some comment" or 2582 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" 2583 while (Name.startswith(";") || Name.startswith("\n") || 2584 Name.startswith("#") || Name.startswith("\t") || 2585 Name.startswith("/")) { 2586 // FIXME: The mnemonic won't match correctly if its not in lower case. 2587 Name = Parser.getTok().getString(); 2588 Parser.Lex(); // go to next prefix or instr 2589 } 2590 } 2591 2592 if (Flags) 2593 PatchedName = Name; 2594 2595 // Hacks to handle 'data16' and 'data32' 2596 if (PatchedName == "data16" && is16BitMode()) { 2597 return Error(NameLoc, "redundant data16 prefix"); 2598 } 2599 if (PatchedName == "data32") { 2600 if (is32BitMode()) 2601 return Error(NameLoc, "redundant data32 prefix"); 2602 if (is64BitMode()) 2603 return Error(NameLoc, "'data32' is not supported in 64-bit mode"); 2604 // Hack to 'data16' for the table lookup. 2605 PatchedName = "data16"; 2606 } 2607 2608 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 2609 2610 // Push the immediate if we extracted one from the mnemonic. 2611 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) { 2612 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 2613 getParser().getContext()); 2614 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2615 } 2616 2617 // This does the actual operand parsing. Don't parse any more if we have a 2618 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 2619 // just want to parse the "lock" as the first instruction and the "incl" as 2620 // the next one. 2621 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 2622 // Parse '*' modifier. 2623 if (getLexer().is(AsmToken::Star)) 2624 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 2625 2626 // Read the operands. 2627 while(1) { 2628 if (std::unique_ptr<X86Operand> Op = ParseOperand()) { 2629 Operands.push_back(std::move(Op)); 2630 if (HandleAVX512Operand(Operands, *Operands.back())) 2631 return true; 2632 } else { 2633 return true; 2634 } 2635 // check for comma and eat it 2636 if (getLexer().is(AsmToken::Comma)) 2637 Parser.Lex(); 2638 else 2639 break; 2640 } 2641 2642 // In MS inline asm curly braces mark the beginning/end of a block, 2643 // therefore they should be interepreted as end of statement 2644 CurlyAsEndOfStatement = 2645 isParsingIntelSyntax() && isParsingInlineAsm() && 2646 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); 2647 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement) 2648 return TokError("unexpected token in argument list"); 2649 } 2650 2651 // Push the immediate if we extracted one from the mnemonic. 2652 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) { 2653 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 2654 getParser().getContext()); 2655 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2656 } 2657 2658 // Consume the EndOfStatement or the prefix separator Slash 2659 if (getLexer().is(AsmToken::EndOfStatement) || 2660 (isPrefix && getLexer().is(AsmToken::Slash))) 2661 Parser.Lex(); 2662 else if (CurlyAsEndOfStatement) 2663 // Add an actual EndOfStatement before the curly brace 2664 Info.AsmRewrites->emplace_back(AOK_EndOfStatement, 2665 getLexer().getTok().getLoc(), 0); 2666 2667 // This is for gas compatibility and cannot be done in td. 2668 // Adding "p" for some floating point with no argument. 2669 // For example: fsub --> fsubp 2670 bool IsFp = 2671 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr"; 2672 if (IsFp && Operands.size() == 1) { 2673 const char *Repl = StringSwitch<const char *>(Name) 2674 .Case("fsub", "fsubp") 2675 .Case("fdiv", "fdivp") 2676 .Case("fsubr", "fsubrp") 2677 .Case("fdivr", "fdivrp"); 2678 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl); 2679 } 2680 2681 if ((Name == "mov" || Name == "movw" || Name == "movl") && 2682 (Operands.size() == 3)) { 2683 X86Operand &Op1 = (X86Operand &)*Operands[1]; 2684 X86Operand &Op2 = (X86Operand &)*Operands[2]; 2685 SMLoc Loc = Op1.getEndLoc(); 2686 // Moving a 32 or 16 bit value into a segment register has the same 2687 // behavior. Modify such instructions to always take shorter form. 2688 if (Op1.isReg() && Op2.isReg() && 2689 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains( 2690 Op2.getReg()) && 2691 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) || 2692 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) { 2693 // Change instruction name to match new instruction. 2694 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) { 2695 Name = is16BitMode() ? "movw" : "movl"; 2696 Operands[0] = X86Operand::CreateToken(Name, NameLoc); 2697 } 2698 // Select the correct equivalent 16-/32-bit source register. 2699 unsigned Reg = 2700 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32); 2701 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc); 2702 } 2703 } 2704 2705 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" -> 2706 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 2707 // documented form in various unofficial manuals, so a lot of code uses it. 2708 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" || 2709 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") && 2710 Operands.size() == 3) { 2711 X86Operand &Op = (X86Operand &)*Operands.back(); 2712 if (Op.isDXReg()) 2713 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 2714 Op.getEndLoc()); 2715 } 2716 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al". 2717 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" || 2718 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") && 2719 Operands.size() == 3) { 2720 X86Operand &Op = (X86Operand &)*Operands[1]; 2721 if (Op.isDXReg()) 2722 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 2723 Op.getEndLoc()); 2724 } 2725 2726 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands; 2727 bool HadVerifyError = false; 2728 2729 // Append default arguments to "ins[bwld]" 2730 if (Name.startswith("ins") && 2731 (Operands.size() == 1 || Operands.size() == 3) && 2732 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" || 2733 Name == "ins")) { 2734 2735 AddDefaultSrcDestOperands(TmpOperands, 2736 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), 2737 DefaultMemDIOperand(NameLoc)); 2738 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2739 } 2740 2741 // Append default arguments to "outs[bwld]" 2742 if (Name.startswith("outs") && 2743 (Operands.size() == 1 || Operands.size() == 3) && 2744 (Name == "outsb" || Name == "outsw" || Name == "outsl" || 2745 Name == "outsd" || Name == "outs")) { 2746 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 2747 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 2748 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2749 } 2750 2751 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate 2752 // values of $SIREG according to the mode. It would be nice if this 2753 // could be achieved with InstAlias in the tables. 2754 if (Name.startswith("lods") && 2755 (Operands.size() == 1 || Operands.size() == 2) && 2756 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 2757 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) { 2758 TmpOperands.push_back(DefaultMemSIOperand(NameLoc)); 2759 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2760 } 2761 2762 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate 2763 // values of $DIREG according to the mode. It would be nice if this 2764 // could be achieved with InstAlias in the tables. 2765 if (Name.startswith("stos") && 2766 (Operands.size() == 1 || Operands.size() == 2) && 2767 (Name == "stos" || Name == "stosb" || Name == "stosw" || 2768 Name == "stosl" || Name == "stosd" || Name == "stosq")) { 2769 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 2770 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2771 } 2772 2773 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate 2774 // values of $DIREG according to the mode. It would be nice if this 2775 // could be achieved with InstAlias in the tables. 2776 if (Name.startswith("scas") && 2777 (Operands.size() == 1 || Operands.size() == 2) && 2778 (Name == "scas" || Name == "scasb" || Name == "scasw" || 2779 Name == "scasl" || Name == "scasd" || Name == "scasq")) { 2780 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 2781 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2782 } 2783 2784 // Add default SI and DI operands to "cmps[bwlq]". 2785 if (Name.startswith("cmps") && 2786 (Operands.size() == 1 || Operands.size() == 3) && 2787 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || 2788 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { 2789 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc), 2790 DefaultMemSIOperand(NameLoc)); 2791 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2792 } 2793 2794 // Add default SI and DI operands to "movs[bwlq]". 2795 if (((Name.startswith("movs") && 2796 (Name == "movs" || Name == "movsb" || Name == "movsw" || 2797 Name == "movsl" || Name == "movsd" || Name == "movsq")) || 2798 (Name.startswith("smov") && 2799 (Name == "smov" || Name == "smovb" || Name == "smovw" || 2800 Name == "smovl" || Name == "smovd" || Name == "smovq"))) && 2801 (Operands.size() == 1 || Operands.size() == 3)) { 2802 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax()) 2803 Operands.back() = X86Operand::CreateToken("movsl", NameLoc); 2804 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 2805 DefaultMemDIOperand(NameLoc)); 2806 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2807 } 2808 2809 // Check if we encountered an error for one the string insturctions 2810 if (HadVerifyError) { 2811 return HadVerifyError; 2812 } 2813 2814 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 2815 // "shift <op>". 2816 if ((Name.startswith("shr") || Name.startswith("sar") || 2817 Name.startswith("shl") || Name.startswith("sal") || 2818 Name.startswith("rcl") || Name.startswith("rcr") || 2819 Name.startswith("rol") || Name.startswith("ror")) && 2820 Operands.size() == 3) { 2821 if (isParsingIntelSyntax()) { 2822 // Intel syntax 2823 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]); 2824 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 2825 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 2826 Operands.pop_back(); 2827 } else { 2828 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2829 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 2830 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 2831 Operands.erase(Operands.begin() + 1); 2832 } 2833 } 2834 2835 // Transforms "int $3" into "int3" as a size optimization. We can't write an 2836 // instalias with an immediate operand yet. 2837 if (Name == "int" && Operands.size() == 2) { 2838 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2839 if (Op1.isImm()) 2840 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm())) 2841 if (CE->getValue() == 3) { 2842 Operands.erase(Operands.begin() + 1); 2843 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3"); 2844 } 2845 } 2846 2847 // Transforms "xlat mem8" into "xlatb" 2848 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) { 2849 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 2850 if (Op1.isMem8()) { 2851 Warning(Op1.getStartLoc(), "memory operand is only for determining the " 2852 "size, (R|E)BX will be used for the location"); 2853 Operands.pop_back(); 2854 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb"); 2855 } 2856 } 2857 2858 if (Flags) 2859 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc)); 2860 return false; 2861 } 2862 2863 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { 2864 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 2865 2866 switch (Inst.getOpcode()) { 2867 default: return false; 2868 case X86::VMOVZPQILo2PQIrr: 2869 case X86::VMOVAPDrr: 2870 case X86::VMOVAPDYrr: 2871 case X86::VMOVAPSrr: 2872 case X86::VMOVAPSYrr: 2873 case X86::VMOVDQArr: 2874 case X86::VMOVDQAYrr: 2875 case X86::VMOVDQUrr: 2876 case X86::VMOVDQUYrr: 2877 case X86::VMOVUPDrr: 2878 case X86::VMOVUPDYrr: 2879 case X86::VMOVUPSrr: 2880 case X86::VMOVUPSYrr: { 2881 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of 2882 // the registers is extended, but other isn't. 2883 if (ForcedVEXEncoding == VEXEncoding_VEX3 || 2884 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 || 2885 MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8) 2886 return false; 2887 2888 unsigned NewOpc; 2889 switch (Inst.getOpcode()) { 2890 default: llvm_unreachable("Invalid opcode"); 2891 case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break; 2892 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; 2893 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; 2894 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; 2895 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; 2896 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; 2897 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; 2898 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; 2899 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; 2900 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; 2901 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; 2902 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; 2903 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; 2904 } 2905 Inst.setOpcode(NewOpc); 2906 return true; 2907 } 2908 case X86::VMOVSDrr: 2909 case X86::VMOVSSrr: { 2910 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of 2911 // the registers is extended, but other isn't. 2912 if (ForcedVEXEncoding == VEXEncoding_VEX3 || 2913 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 || 2914 MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8) 2915 return false; 2916 2917 unsigned NewOpc; 2918 switch (Inst.getOpcode()) { 2919 default: llvm_unreachable("Invalid opcode"); 2920 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; 2921 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; 2922 } 2923 Inst.setOpcode(NewOpc); 2924 return true; 2925 } 2926 } 2927 } 2928 2929 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { 2930 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 2931 2932 switch (Inst.getOpcode()) { 2933 case X86::VGATHERDPDYrm: 2934 case X86::VGATHERDPDrm: 2935 case X86::VGATHERDPSYrm: 2936 case X86::VGATHERDPSrm: 2937 case X86::VGATHERQPDYrm: 2938 case X86::VGATHERQPDrm: 2939 case X86::VGATHERQPSYrm: 2940 case X86::VGATHERQPSrm: 2941 case X86::VPGATHERDDYrm: 2942 case X86::VPGATHERDDrm: 2943 case X86::VPGATHERDQYrm: 2944 case X86::VPGATHERDQrm: 2945 case X86::VPGATHERQDYrm: 2946 case X86::VPGATHERQDrm: 2947 case X86::VPGATHERQQYrm: 2948 case X86::VPGATHERQQrm: { 2949 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 2950 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg()); 2951 unsigned Index = 2952 MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg()); 2953 if (Dest == Mask || Dest == Index || Mask == Index) 2954 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination " 2955 "registers should be distinct"); 2956 break; 2957 } 2958 case X86::VGATHERDPDZ128rm: 2959 case X86::VGATHERDPDZ256rm: 2960 case X86::VGATHERDPDZrm: 2961 case X86::VGATHERDPSZ128rm: 2962 case X86::VGATHERDPSZ256rm: 2963 case X86::VGATHERDPSZrm: 2964 case X86::VGATHERQPDZ128rm: 2965 case X86::VGATHERQPDZ256rm: 2966 case X86::VGATHERQPDZrm: 2967 case X86::VGATHERQPSZ128rm: 2968 case X86::VGATHERQPSZ256rm: 2969 case X86::VGATHERQPSZrm: 2970 case X86::VPGATHERDDZ128rm: 2971 case X86::VPGATHERDDZ256rm: 2972 case X86::VPGATHERDDZrm: 2973 case X86::VPGATHERDQZ128rm: 2974 case X86::VPGATHERDQZ256rm: 2975 case X86::VPGATHERDQZrm: 2976 case X86::VPGATHERQDZ128rm: 2977 case X86::VPGATHERQDZ256rm: 2978 case X86::VPGATHERQDZrm: 2979 case X86::VPGATHERQQZ128rm: 2980 case X86::VPGATHERQQZ256rm: 2981 case X86::VPGATHERQQZrm: { 2982 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 2983 unsigned Index = 2984 MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg()); 2985 if (Dest == Index) 2986 return Warning(Ops[0]->getStartLoc(), "index and destination registers " 2987 "should be distinct"); 2988 break; 2989 } 2990 case X86::V4FMADDPSrm: 2991 case X86::V4FMADDPSrmk: 2992 case X86::V4FMADDPSrmkz: 2993 case X86::V4FMADDSSrm: 2994 case X86::V4FMADDSSrmk: 2995 case X86::V4FMADDSSrmkz: 2996 case X86::V4FNMADDPSrm: 2997 case X86::V4FNMADDPSrmk: 2998 case X86::V4FNMADDPSrmkz: 2999 case X86::V4FNMADDSSrm: 3000 case X86::V4FNMADDSSrmk: 3001 case X86::V4FNMADDSSrmkz: 3002 case X86::VP4DPWSSDSrm: 3003 case X86::VP4DPWSSDSrmk: 3004 case X86::VP4DPWSSDSrmkz: 3005 case X86::VP4DPWSSDrm: 3006 case X86::VP4DPWSSDrmk: 3007 case X86::VP4DPWSSDrmkz: { 3008 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() - 3009 X86::AddrNumOperands - 1).getReg(); 3010 unsigned Src2Enc = MRI->getEncodingValue(Src2); 3011 if (Src2Enc % 4 != 0) { 3012 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2); 3013 unsigned GroupStart = (Src2Enc / 4) * 4; 3014 unsigned GroupEnd = GroupStart + 3; 3015 return Warning(Ops[0]->getStartLoc(), 3016 "source register '" + RegName + "' implicitly denotes '" + 3017 RegName.take_front(3) + Twine(GroupStart) + "' to '" + 3018 RegName.take_front(3) + Twine(GroupEnd) + 3019 "' source group"); 3020 } 3021 break; 3022 } 3023 } 3024 3025 return false; 3026 } 3027 3028 static const char *getSubtargetFeatureName(uint64_t Val); 3029 3030 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands, 3031 MCStreamer &Out) { 3032 Out.EmitInstruction(Inst, getSTI()); 3033 } 3034 3035 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3036 OperandVector &Operands, 3037 MCStreamer &Out, uint64_t &ErrorInfo, 3038 bool MatchingInlineAsm) { 3039 if (isParsingIntelSyntax()) 3040 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3041 MatchingInlineAsm); 3042 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3043 MatchingInlineAsm); 3044 } 3045 3046 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, 3047 OperandVector &Operands, MCStreamer &Out, 3048 bool MatchingInlineAsm) { 3049 // FIXME: This should be replaced with a real .td file alias mechanism. 3050 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 3051 // call. 3052 const char *Repl = StringSwitch<const char *>(Op.getToken()) 3053 .Case("finit", "fninit") 3054 .Case("fsave", "fnsave") 3055 .Case("fstcw", "fnstcw") 3056 .Case("fstcww", "fnstcw") 3057 .Case("fstenv", "fnstenv") 3058 .Case("fstsw", "fnstsw") 3059 .Case("fstsww", "fnstsw") 3060 .Case("fclex", "fnclex") 3061 .Default(nullptr); 3062 if (Repl) { 3063 MCInst Inst; 3064 Inst.setOpcode(X86::WAIT); 3065 Inst.setLoc(IDLoc); 3066 if (!MatchingInlineAsm) 3067 EmitInstruction(Inst, Operands, Out); 3068 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 3069 } 3070 } 3071 3072 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, 3073 const FeatureBitset &MissingFeatures, 3074 bool MatchingInlineAsm) { 3075 assert(MissingFeatures.any() && "Unknown missing feature!"); 3076 SmallString<126> Msg; 3077 raw_svector_ostream OS(Msg); 3078 OS << "instruction requires:"; 3079 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) { 3080 if (MissingFeatures[i]) 3081 OS << ' ' << getSubtargetFeatureName(i); 3082 } 3083 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); 3084 } 3085 3086 static unsigned getPrefixes(OperandVector &Operands) { 3087 unsigned Result = 0; 3088 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back()); 3089 if (Prefix.isPrefix()) { 3090 Result = Prefix.getPrefix(); 3091 Operands.pop_back(); 3092 } 3093 return Result; 3094 } 3095 3096 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3097 unsigned Opc = Inst.getOpcode(); 3098 const MCInstrDesc &MCID = MII.get(Opc); 3099 3100 if (ForcedVEXEncoding == VEXEncoding_EVEX && 3101 (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX) 3102 return Match_Unsupported; 3103 3104 if ((ForcedVEXEncoding == VEXEncoding_VEX2 || 3105 ForcedVEXEncoding == VEXEncoding_VEX3) && 3106 (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX) 3107 return Match_Unsupported; 3108 3109 // These instructions match ambiguously with their VEX encoded counterparts 3110 // and appear first in the matching table. Reject them unless we're forcing 3111 // EVEX encoding. 3112 // FIXME: We really need a way to break the ambiguity. 3113 switch (Opc) { 3114 case X86::VCVTSD2SIZrm_Int: 3115 case X86::VCVTSD2SI64Zrm_Int: 3116 case X86::VCVTSS2SIZrm_Int: 3117 case X86::VCVTSS2SI64Zrm_Int: 3118 case X86::VCVTTSD2SIZrm: case X86::VCVTTSD2SIZrm_Int: 3119 case X86::VCVTTSD2SI64Zrm: case X86::VCVTTSD2SI64Zrm_Int: 3120 case X86::VCVTTSS2SIZrm: case X86::VCVTTSS2SIZrm_Int: 3121 case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int: 3122 if (ForcedVEXEncoding != VEXEncoding_EVEX) 3123 return Match_Unsupported; 3124 } 3125 3126 return Match_Success; 3127 } 3128 3129 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 3130 OperandVector &Operands, 3131 MCStreamer &Out, 3132 uint64_t &ErrorInfo, 3133 bool MatchingInlineAsm) { 3134 assert(!Operands.empty() && "Unexpect empty operand list!"); 3135 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 3136 SMRange EmptyRange = None; 3137 3138 // First, handle aliases that expand to multiple instructions. 3139 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, 3140 Out, MatchingInlineAsm); 3141 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 3142 unsigned Prefixes = getPrefixes(Operands); 3143 3144 MCInst Inst; 3145 3146 // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the 3147 // encoder. 3148 if (ForcedVEXEncoding == VEXEncoding_VEX3) 3149 Prefixes |= X86::IP_USE_VEX3; 3150 3151 if (Prefixes) 3152 Inst.setFlags(Prefixes); 3153 3154 // First, try a direct match. 3155 FeatureBitset MissingFeatures; 3156 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo, 3157 MissingFeatures, MatchingInlineAsm, 3158 isParsingIntelSyntax()); 3159 switch (OriginalError) { 3160 default: llvm_unreachable("Unexpected match result!"); 3161 case Match_Success: 3162 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 3163 return true; 3164 // Some instructions need post-processing to, for example, tweak which 3165 // encoding is selected. Loop on it while changes happen so the 3166 // individual transformations can chain off each other. 3167 if (!MatchingInlineAsm) 3168 while (processInstruction(Inst, Operands)) 3169 ; 3170 3171 Inst.setLoc(IDLoc); 3172 if (!MatchingInlineAsm) 3173 EmitInstruction(Inst, Operands, Out); 3174 Opcode = Inst.getOpcode(); 3175 return false; 3176 case Match_MissingFeature: 3177 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm); 3178 case Match_InvalidOperand: 3179 case Match_MnemonicFail: 3180 case Match_Unsupported: 3181 break; 3182 } 3183 if (Op.getToken().empty()) { 3184 Error(IDLoc, "instruction must have size higher than 0", EmptyRange, 3185 MatchingInlineAsm); 3186 return true; 3187 } 3188 3189 // FIXME: Ideally, we would only attempt suffix matches for things which are 3190 // valid prefixes, and we could just infer the right unambiguous 3191 // type. However, that requires substantially more matcher support than the 3192 // following hack. 3193 3194 // Change the operand to point to a temporary token. 3195 StringRef Base = Op.getToken(); 3196 SmallString<16> Tmp; 3197 Tmp += Base; 3198 Tmp += ' '; 3199 Op.setTokenValue(Tmp); 3200 3201 // If this instruction starts with an 'f', then it is a floating point stack 3202 // instruction. These come in up to three forms for 32-bit, 64-bit, and 3203 // 80-bit floating point, which use the suffixes s,l,t respectively. 3204 // 3205 // Otherwise, we assume that this may be an integer instruction, which comes 3206 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 3207 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 3208 3209 // Check for the various suffix matches. 3210 uint64_t ErrorInfoIgnore; 3211 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings. 3212 unsigned Match[4]; 3213 3214 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) { 3215 Tmp.back() = Suffixes[I]; 3216 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore, 3217 MissingFeatures, MatchingInlineAsm, 3218 isParsingIntelSyntax()); 3219 // If this returned as a missing feature failure, remember that. 3220 if (Match[I] == Match_MissingFeature) 3221 ErrorInfoMissingFeatures = MissingFeatures; 3222 } 3223 3224 // Restore the old token. 3225 Op.setTokenValue(Base); 3226 3227 // If exactly one matched, then we treat that as a successful match (and the 3228 // instruction will already have been filled in correctly, since the failing 3229 // matches won't have modified it). 3230 unsigned NumSuccessfulMatches = 3231 std::count(std::begin(Match), std::end(Match), Match_Success); 3232 if (NumSuccessfulMatches == 1) { 3233 Inst.setLoc(IDLoc); 3234 if (!MatchingInlineAsm) 3235 EmitInstruction(Inst, Operands, Out); 3236 Opcode = Inst.getOpcode(); 3237 return false; 3238 } 3239 3240 // Otherwise, the match failed, try to produce a decent error message. 3241 3242 // If we had multiple suffix matches, then identify this as an ambiguous 3243 // match. 3244 if (NumSuccessfulMatches > 1) { 3245 char MatchChars[4]; 3246 unsigned NumMatches = 0; 3247 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) 3248 if (Match[I] == Match_Success) 3249 MatchChars[NumMatches++] = Suffixes[I]; 3250 3251 SmallString<126> Msg; 3252 raw_svector_ostream OS(Msg); 3253 OS << "ambiguous instructions require an explicit suffix (could be "; 3254 for (unsigned i = 0; i != NumMatches; ++i) { 3255 if (i != 0) 3256 OS << ", "; 3257 if (i + 1 == NumMatches) 3258 OS << "or "; 3259 OS << "'" << Base << MatchChars[i] << "'"; 3260 } 3261 OS << ")"; 3262 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm); 3263 return true; 3264 } 3265 3266 // Okay, we know that none of the variants matched successfully. 3267 3268 // If all of the instructions reported an invalid mnemonic, then the original 3269 // mnemonic was invalid. 3270 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) { 3271 if (OriginalError == Match_MnemonicFail) 3272 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 3273 Op.getLocRange(), MatchingInlineAsm); 3274 3275 if (OriginalError == Match_Unsupported) 3276 return Error(IDLoc, "unsupported instruction", EmptyRange, 3277 MatchingInlineAsm); 3278 3279 assert(OriginalError == Match_InvalidOperand && "Unexpected error"); 3280 // Recover location info for the operand if we know which was the problem. 3281 if (ErrorInfo != ~0ULL) { 3282 if (ErrorInfo >= Operands.size()) 3283 return Error(IDLoc, "too few operands for instruction", EmptyRange, 3284 MatchingInlineAsm); 3285 3286 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; 3287 if (Operand.getStartLoc().isValid()) { 3288 SMRange OperandRange = Operand.getLocRange(); 3289 return Error(Operand.getStartLoc(), "invalid operand for instruction", 3290 OperandRange, MatchingInlineAsm); 3291 } 3292 } 3293 3294 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 3295 MatchingInlineAsm); 3296 } 3297 3298 // If one instruction matched as unsupported, report this as unsupported. 3299 if (std::count(std::begin(Match), std::end(Match), 3300 Match_Unsupported) == 1) { 3301 return Error(IDLoc, "unsupported instruction", EmptyRange, 3302 MatchingInlineAsm); 3303 } 3304 3305 // If one instruction matched with a missing feature, report this as a 3306 // missing feature. 3307 if (std::count(std::begin(Match), std::end(Match), 3308 Match_MissingFeature) == 1) { 3309 ErrorInfo = Match_MissingFeature; 3310 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 3311 MatchingInlineAsm); 3312 } 3313 3314 // If one instruction matched with an invalid operand, report this as an 3315 // operand failure. 3316 if (std::count(std::begin(Match), std::end(Match), 3317 Match_InvalidOperand) == 1) { 3318 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 3319 MatchingInlineAsm); 3320 } 3321 3322 // If all of these were an outright failure, report it in a useless way. 3323 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 3324 EmptyRange, MatchingInlineAsm); 3325 return true; 3326 } 3327 3328 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 3329 OperandVector &Operands, 3330 MCStreamer &Out, 3331 uint64_t &ErrorInfo, 3332 bool MatchingInlineAsm) { 3333 assert(!Operands.empty() && "Unexpect empty operand list!"); 3334 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 3335 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken(); 3336 SMRange EmptyRange = None; 3337 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken(); 3338 unsigned Prefixes = getPrefixes(Operands); 3339 3340 // First, handle aliases that expand to multiple instructions. 3341 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm); 3342 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 3343 3344 MCInst Inst; 3345 3346 // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the 3347 // encoder. 3348 if (ForcedVEXEncoding == VEXEncoding_VEX3) 3349 Prefixes |= X86::IP_USE_VEX3; 3350 3351 if (Prefixes) 3352 Inst.setFlags(Prefixes); 3353 3354 // Find one unsized memory operand, if present. 3355 X86Operand *UnsizedMemOp = nullptr; 3356 for (const auto &Op : Operands) { 3357 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 3358 if (X86Op->isMemUnsized()) { 3359 UnsizedMemOp = X86Op; 3360 // Have we found an unqualified memory operand, 3361 // break. IA allows only one memory operand. 3362 break; 3363 } 3364 } 3365 3366 // Allow some instructions to have implicitly pointer-sized operands. This is 3367 // compatible with gas. 3368 if (UnsizedMemOp) { 3369 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; 3370 for (const char *Instr : PtrSizedInstrs) { 3371 if (Mnemonic == Instr) { 3372 UnsizedMemOp->Mem.Size = getPointerWidth(); 3373 break; 3374 } 3375 } 3376 } 3377 3378 SmallVector<unsigned, 8> Match; 3379 FeatureBitset ErrorInfoMissingFeatures; 3380 FeatureBitset MissingFeatures; 3381 3382 // If unsized push has immediate operand we should default the default pointer 3383 // size for the size. 3384 if (Mnemonic == "push" && Operands.size() == 2) { 3385 auto *X86Op = static_cast<X86Operand *>(Operands[1].get()); 3386 if (X86Op->isImm()) { 3387 // If it's not a constant fall through and let remainder take care of it. 3388 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm()); 3389 unsigned Size = getPointerWidth(); 3390 if (CE && 3391 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) { 3392 SmallString<16> Tmp; 3393 Tmp += Base; 3394 Tmp += (is64BitMode()) 3395 ? "q" 3396 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " "; 3397 Op.setTokenValue(Tmp); 3398 // Do match in ATT mode to allow explicit suffix usage. 3399 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo, 3400 MissingFeatures, MatchingInlineAsm, 3401 false /*isParsingIntelSyntax()*/)); 3402 Op.setTokenValue(Base); 3403 } 3404 } 3405 } 3406 3407 // If an unsized memory operand is present, try to match with each memory 3408 // operand size. In Intel assembly, the size is not part of the instruction 3409 // mnemonic. 3410 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { 3411 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512}; 3412 for (unsigned Size : MopSizes) { 3413 UnsizedMemOp->Mem.Size = Size; 3414 uint64_t ErrorInfoIgnore; 3415 unsigned LastOpcode = Inst.getOpcode(); 3416 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore, 3417 MissingFeatures, MatchingInlineAsm, 3418 isParsingIntelSyntax()); 3419 if (Match.empty() || LastOpcode != Inst.getOpcode()) 3420 Match.push_back(M); 3421 3422 // If this returned as a missing feature failure, remember that. 3423 if (Match.back() == Match_MissingFeature) 3424 ErrorInfoMissingFeatures = MissingFeatures; 3425 } 3426 3427 // Restore the size of the unsized memory operand if we modified it. 3428 UnsizedMemOp->Mem.Size = 0; 3429 } 3430 3431 // If we haven't matched anything yet, this is not a basic integer or FPU 3432 // operation. There shouldn't be any ambiguity in our mnemonic table, so try 3433 // matching with the unsized operand. 3434 if (Match.empty()) { 3435 Match.push_back(MatchInstruction( 3436 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 3437 isParsingIntelSyntax())); 3438 // If this returned as a missing feature failure, remember that. 3439 if (Match.back() == Match_MissingFeature) 3440 ErrorInfoMissingFeatures = MissingFeatures; 3441 } 3442 3443 // Restore the size of the unsized memory operand if we modified it. 3444 if (UnsizedMemOp) 3445 UnsizedMemOp->Mem.Size = 0; 3446 3447 // If it's a bad mnemonic, all results will be the same. 3448 if (Match.back() == Match_MnemonicFail) { 3449 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'", 3450 Op.getLocRange(), MatchingInlineAsm); 3451 } 3452 3453 unsigned NumSuccessfulMatches = 3454 std::count(std::begin(Match), std::end(Match), Match_Success); 3455 3456 // If matching was ambiguous and we had size information from the frontend, 3457 // try again with that. This handles cases like "movxz eax, m8/m16". 3458 if (UnsizedMemOp && NumSuccessfulMatches > 1 && 3459 UnsizedMemOp->getMemFrontendSize()) { 3460 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize(); 3461 unsigned M = MatchInstruction( 3462 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 3463 isParsingIntelSyntax()); 3464 if (M == Match_Success) 3465 NumSuccessfulMatches = 1; 3466 3467 // Add a rewrite that encodes the size information we used from the 3468 // frontend. 3469 InstInfo->AsmRewrites->emplace_back( 3470 AOK_SizeDirective, UnsizedMemOp->getStartLoc(), 3471 /*Len=*/0, UnsizedMemOp->getMemFrontendSize()); 3472 } 3473 3474 // If exactly one matched, then we treat that as a successful match (and the 3475 // instruction will already have been filled in correctly, since the failing 3476 // matches won't have modified it). 3477 if (NumSuccessfulMatches == 1) { 3478 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 3479 return true; 3480 // Some instructions need post-processing to, for example, tweak which 3481 // encoding is selected. Loop on it while changes happen so the individual 3482 // transformations can chain off each other. 3483 if (!MatchingInlineAsm) 3484 while (processInstruction(Inst, Operands)) 3485 ; 3486 Inst.setLoc(IDLoc); 3487 if (!MatchingInlineAsm) 3488 EmitInstruction(Inst, Operands, Out); 3489 Opcode = Inst.getOpcode(); 3490 return false; 3491 } else if (NumSuccessfulMatches > 1) { 3492 assert(UnsizedMemOp && 3493 "multiple matches only possible with unsized memory operands"); 3494 return Error(UnsizedMemOp->getStartLoc(), 3495 "ambiguous operand size for instruction '" + Mnemonic + "\'", 3496 UnsizedMemOp->getLocRange()); 3497 } 3498 3499 // If one instruction matched as unsupported, report this as unsupported. 3500 if (std::count(std::begin(Match), std::end(Match), 3501 Match_Unsupported) == 1) { 3502 return Error(IDLoc, "unsupported instruction", EmptyRange, 3503 MatchingInlineAsm); 3504 } 3505 3506 // If one instruction matched with a missing feature, report this as a 3507 // missing feature. 3508 if (std::count(std::begin(Match), std::end(Match), 3509 Match_MissingFeature) == 1) { 3510 ErrorInfo = Match_MissingFeature; 3511 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 3512 MatchingInlineAsm); 3513 } 3514 3515 // If one instruction matched with an invalid operand, report this as an 3516 // operand failure. 3517 if (std::count(std::begin(Match), std::end(Match), 3518 Match_InvalidOperand) == 1) { 3519 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 3520 MatchingInlineAsm); 3521 } 3522 3523 // If all of these were an outright failure, report it in a useless way. 3524 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange, 3525 MatchingInlineAsm); 3526 } 3527 3528 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { 3529 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); 3530 } 3531 3532 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 3533 MCAsmParser &Parser = getParser(); 3534 StringRef IDVal = DirectiveID.getIdentifier(); 3535 if (IDVal.startswith(".code")) 3536 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 3537 else if (IDVal.startswith(".att_syntax")) { 3538 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3539 if (Parser.getTok().getString() == "prefix") 3540 Parser.Lex(); 3541 else if (Parser.getTok().getString() == "noprefix") 3542 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not " 3543 "supported: registers must have a " 3544 "'%' prefix in .att_syntax"); 3545 } 3546 getParser().setAssemblerDialect(0); 3547 return false; 3548 } else if (IDVal.startswith(".intel_syntax")) { 3549 getParser().setAssemblerDialect(1); 3550 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3551 if (Parser.getTok().getString() == "noprefix") 3552 Parser.Lex(); 3553 else if (Parser.getTok().getString() == "prefix") 3554 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not " 3555 "supported: registers must not have " 3556 "a '%' prefix in .intel_syntax"); 3557 } 3558 return false; 3559 } else if (IDVal == ".even") 3560 return parseDirectiveEven(DirectiveID.getLoc()); 3561 else if (IDVal == ".cv_fpo_proc") 3562 return parseDirectiveFPOProc(DirectiveID.getLoc()); 3563 else if (IDVal == ".cv_fpo_setframe") 3564 return parseDirectiveFPOSetFrame(DirectiveID.getLoc()); 3565 else if (IDVal == ".cv_fpo_pushreg") 3566 return parseDirectiveFPOPushReg(DirectiveID.getLoc()); 3567 else if (IDVal == ".cv_fpo_stackalloc") 3568 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc()); 3569 else if (IDVal == ".cv_fpo_stackalign") 3570 return parseDirectiveFPOStackAlign(DirectiveID.getLoc()); 3571 else if (IDVal == ".cv_fpo_endprologue") 3572 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); 3573 else if (IDVal == ".cv_fpo_endproc") 3574 return parseDirectiveFPOEndProc(DirectiveID.getLoc()); 3575 3576 return true; 3577 } 3578 3579 /// parseDirectiveEven 3580 /// ::= .even 3581 bool X86AsmParser::parseDirectiveEven(SMLoc L) { 3582 if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) 3583 return false; 3584 3585 const MCSection *Section = getStreamer().getCurrentSectionOnly(); 3586 if (!Section) { 3587 getStreamer().InitSections(false); 3588 Section = getStreamer().getCurrentSectionOnly(); 3589 } 3590 if (Section->UseCodeAlign()) 3591 getStreamer().EmitCodeAlignment(2, 0); 3592 else 3593 getStreamer().EmitValueToAlignment(2, 0, 1, 0); 3594 return false; 3595 } 3596 3597 /// ParseDirectiveCode 3598 /// ::= .code16 | .code32 | .code64 3599 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 3600 MCAsmParser &Parser = getParser(); 3601 Code16GCC = false; 3602 if (IDVal == ".code16") { 3603 Parser.Lex(); 3604 if (!is16BitMode()) { 3605 SwitchMode(X86::Mode16Bit); 3606 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 3607 } 3608 } else if (IDVal == ".code16gcc") { 3609 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode. 3610 Parser.Lex(); 3611 Code16GCC = true; 3612 if (!is16BitMode()) { 3613 SwitchMode(X86::Mode16Bit); 3614 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16); 3615 } 3616 } else if (IDVal == ".code32") { 3617 Parser.Lex(); 3618 if (!is32BitMode()) { 3619 SwitchMode(X86::Mode32Bit); 3620 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32); 3621 } 3622 } else if (IDVal == ".code64") { 3623 Parser.Lex(); 3624 if (!is64BitMode()) { 3625 SwitchMode(X86::Mode64Bit); 3626 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64); 3627 } 3628 } else { 3629 Error(L, "unknown directive " + IDVal); 3630 return false; 3631 } 3632 3633 return false; 3634 } 3635 3636 // .cv_fpo_proc foo 3637 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) { 3638 MCAsmParser &Parser = getParser(); 3639 StringRef ProcName; 3640 int64_t ParamsSize; 3641 if (Parser.parseIdentifier(ProcName)) 3642 return Parser.TokError("expected symbol name"); 3643 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count")) 3644 return true; 3645 if (!isUIntN(32, ParamsSize)) 3646 return Parser.TokError("parameters size out of range"); 3647 if (Parser.parseEOL("unexpected tokens")) 3648 return addErrorSuffix(" in '.cv_fpo_proc' directive"); 3649 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); 3650 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L); 3651 } 3652 3653 // .cv_fpo_setframe ebp 3654 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) { 3655 MCAsmParser &Parser = getParser(); 3656 unsigned Reg; 3657 SMLoc DummyLoc; 3658 if (ParseRegister(Reg, DummyLoc, DummyLoc) || 3659 Parser.parseEOL("unexpected tokens")) 3660 return addErrorSuffix(" in '.cv_fpo_setframe' directive"); 3661 return getTargetStreamer().emitFPOSetFrame(Reg, L); 3662 } 3663 3664 // .cv_fpo_pushreg ebx 3665 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) { 3666 MCAsmParser &Parser = getParser(); 3667 unsigned Reg; 3668 SMLoc DummyLoc; 3669 if (ParseRegister(Reg, DummyLoc, DummyLoc) || 3670 Parser.parseEOL("unexpected tokens")) 3671 return addErrorSuffix(" in '.cv_fpo_pushreg' directive"); 3672 return getTargetStreamer().emitFPOPushReg(Reg, L); 3673 } 3674 3675 // .cv_fpo_stackalloc 20 3676 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) { 3677 MCAsmParser &Parser = getParser(); 3678 int64_t Offset; 3679 if (Parser.parseIntToken(Offset, "expected offset") || 3680 Parser.parseEOL("unexpected tokens")) 3681 return addErrorSuffix(" in '.cv_fpo_stackalloc' directive"); 3682 return getTargetStreamer().emitFPOStackAlloc(Offset, L); 3683 } 3684 3685 // .cv_fpo_stackalign 8 3686 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) { 3687 MCAsmParser &Parser = getParser(); 3688 int64_t Offset; 3689 if (Parser.parseIntToken(Offset, "expected offset") || 3690 Parser.parseEOL("unexpected tokens")) 3691 return addErrorSuffix(" in '.cv_fpo_stackalign' directive"); 3692 return getTargetStreamer().emitFPOStackAlign(Offset, L); 3693 } 3694 3695 // .cv_fpo_endprologue 3696 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) { 3697 MCAsmParser &Parser = getParser(); 3698 if (Parser.parseEOL("unexpected tokens")) 3699 return addErrorSuffix(" in '.cv_fpo_endprologue' directive"); 3700 return getTargetStreamer().emitFPOEndPrologue(L); 3701 } 3702 3703 // .cv_fpo_endproc 3704 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) { 3705 MCAsmParser &Parser = getParser(); 3706 if (Parser.parseEOL("unexpected tokens")) 3707 return addErrorSuffix(" in '.cv_fpo_endproc' directive"); 3708 return getTargetStreamer().emitFPOEndProc(L); 3709 } 3710 3711 // Force static initialization. 3712 extern "C" void LLVMInitializeX86AsmParser() { 3713 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target()); 3714 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target()); 3715 } 3716 3717 #define GET_REGISTER_MATCHER 3718 #define GET_MATCHER_IMPLEMENTATION 3719 #define GET_SUBTARGET_FEATURE_NAME 3720 #include "X86GenAsmMatcher.inc" 3721