1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86IntelInstPrinter.h" 11 #include "MCTargetDesc/X86MCExpr.h" 12 #include "MCTargetDesc/X86TargetStreamer.h" 13 #include "TargetInfo/X86TargetInfo.h" 14 #include "X86AsmParserCommon.h" 15 #include "X86Operand.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallString.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringSwitch.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/MC/MCContext.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCParser/MCAsmLexer.h" 26 #include "llvm/MC/MCParser/MCAsmParser.h" 27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 28 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSection.h" 31 #include "llvm/MC/MCStreamer.h" 32 #include "llvm/MC/MCSubtargetInfo.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/SourceMgr.h" 36 #include "llvm/Support/TargetRegistry.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include <algorithm> 39 #include <memory> 40 41 using namespace llvm; 42 43 static cl::opt<bool> LVIInlineAsmHardening( 44 "x86-experimental-lvi-inline-asm-hardening", 45 cl::desc("Harden inline assembly code that may be vulnerable to Load Value" 46 " Injection (LVI). This feature is experimental."), cl::Hidden); 47 48 static bool checkScale(unsigned Scale, StringRef &ErrMsg) { 49 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { 50 ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; 51 return true; 52 } 53 return false; 54 } 55 56 namespace { 57 58 static const char OpPrecedence[] = { 59 0, // IC_OR 60 1, // IC_XOR 61 2, // IC_AND 62 3, // IC_LSHIFT 63 3, // IC_RSHIFT 64 4, // IC_PLUS 65 4, // IC_MINUS 66 5, // IC_MULTIPLY 67 5, // IC_DIVIDE 68 5, // IC_MOD 69 6, // IC_NOT 70 7, // IC_NEG 71 8, // IC_RPAREN 72 9, // IC_LPAREN 73 0, // IC_IMM 74 0 // IC_REGISTER 75 }; 76 77 class X86AsmParser : public MCTargetAsmParser { 78 ParseInstructionInfo *InstInfo; 79 bool Code16GCC; 80 81 enum VEXEncoding { 82 VEXEncoding_Default, 83 VEXEncoding_VEX, 84 VEXEncoding_VEX3, 85 VEXEncoding_EVEX, 86 }; 87 88 VEXEncoding ForcedVEXEncoding = VEXEncoding_Default; 89 90 private: 91 SMLoc consumeToken() { 92 MCAsmParser &Parser = getParser(); 93 SMLoc Result = Parser.getTok().getLoc(); 94 Parser.Lex(); 95 return Result; 96 } 97 98 X86TargetStreamer &getTargetStreamer() { 99 assert(getParser().getStreamer().getTargetStreamer() && 100 "do not have a target streamer"); 101 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 102 return static_cast<X86TargetStreamer &>(TS); 103 } 104 105 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst, 106 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures, 107 bool matchingInlineAsm, unsigned VariantID = 0) { 108 // In Code16GCC mode, match as 32-bit. 109 if (Code16GCC) 110 SwitchMode(X86::Mode32Bit); 111 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo, 112 MissingFeatures, matchingInlineAsm, 113 VariantID); 114 if (Code16GCC) 115 SwitchMode(X86::Mode16Bit); 116 return rv; 117 } 118 119 enum InfixCalculatorTok { 120 IC_OR = 0, 121 IC_XOR, 122 IC_AND, 123 IC_LSHIFT, 124 IC_RSHIFT, 125 IC_PLUS, 126 IC_MINUS, 127 IC_MULTIPLY, 128 IC_DIVIDE, 129 IC_MOD, 130 IC_NOT, 131 IC_NEG, 132 IC_RPAREN, 133 IC_LPAREN, 134 IC_IMM, 135 IC_REGISTER 136 }; 137 138 enum IntelOperatorKind { 139 IOK_INVALID = 0, 140 IOK_LENGTH, 141 IOK_SIZE, 142 IOK_TYPE, 143 }; 144 145 class InfixCalculator { 146 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 147 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 148 SmallVector<ICToken, 4> PostfixStack; 149 150 bool isUnaryOperator(const InfixCalculatorTok Op) { 151 return Op == IC_NEG || Op == IC_NOT; 152 } 153 154 public: 155 int64_t popOperand() { 156 assert (!PostfixStack.empty() && "Poped an empty stack!"); 157 ICToken Op = PostfixStack.pop_back_val(); 158 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER)) 159 return -1; // The invalid Scale value will be caught later by checkScale 160 return Op.second; 161 } 162 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 163 assert ((Op == IC_IMM || Op == IC_REGISTER) && 164 "Unexpected operand!"); 165 PostfixStack.push_back(std::make_pair(Op, Val)); 166 } 167 168 void popOperator() { InfixOperatorStack.pop_back(); } 169 void pushOperator(InfixCalculatorTok Op) { 170 // Push the new operator if the stack is empty. 171 if (InfixOperatorStack.empty()) { 172 InfixOperatorStack.push_back(Op); 173 return; 174 } 175 176 // Push the new operator if it has a higher precedence than the operator 177 // on the top of the stack or the operator on the top of the stack is a 178 // left parentheses. 179 unsigned Idx = InfixOperatorStack.size() - 1; 180 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 181 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 182 InfixOperatorStack.push_back(Op); 183 return; 184 } 185 186 // The operator on the top of the stack has higher precedence than the 187 // new operator. 188 unsigned ParenCount = 0; 189 while (1) { 190 // Nothing to process. 191 if (InfixOperatorStack.empty()) 192 break; 193 194 Idx = InfixOperatorStack.size() - 1; 195 StackOp = InfixOperatorStack[Idx]; 196 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 197 break; 198 199 // If we have an even parentheses count and we see a left parentheses, 200 // then stop processing. 201 if (!ParenCount && StackOp == IC_LPAREN) 202 break; 203 204 if (StackOp == IC_RPAREN) { 205 ++ParenCount; 206 InfixOperatorStack.pop_back(); 207 } else if (StackOp == IC_LPAREN) { 208 --ParenCount; 209 InfixOperatorStack.pop_back(); 210 } else { 211 InfixOperatorStack.pop_back(); 212 PostfixStack.push_back(std::make_pair(StackOp, 0)); 213 } 214 } 215 // Push the new operator. 216 InfixOperatorStack.push_back(Op); 217 } 218 219 int64_t execute() { 220 // Push any remaining operators onto the postfix stack. 221 while (!InfixOperatorStack.empty()) { 222 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 223 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 224 PostfixStack.push_back(std::make_pair(StackOp, 0)); 225 } 226 227 if (PostfixStack.empty()) 228 return 0; 229 230 SmallVector<ICToken, 16> OperandStack; 231 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { 232 ICToken Op = PostfixStack[i]; 233 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 234 OperandStack.push_back(Op); 235 } else if (isUnaryOperator(Op.first)) { 236 assert (OperandStack.size() > 0 && "Too few operands."); 237 ICToken Operand = OperandStack.pop_back_val(); 238 assert (Operand.first == IC_IMM && 239 "Unary operation with a register!"); 240 switch (Op.first) { 241 default: 242 report_fatal_error("Unexpected operator!"); 243 break; 244 case IC_NEG: 245 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second)); 246 break; 247 case IC_NOT: 248 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second)); 249 break; 250 } 251 } else { 252 assert (OperandStack.size() > 1 && "Too few operands."); 253 int64_t Val; 254 ICToken Op2 = OperandStack.pop_back_val(); 255 ICToken Op1 = OperandStack.pop_back_val(); 256 switch (Op.first) { 257 default: 258 report_fatal_error("Unexpected operator!"); 259 break; 260 case IC_PLUS: 261 Val = Op1.second + Op2.second; 262 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 263 break; 264 case IC_MINUS: 265 Val = Op1.second - Op2.second; 266 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 267 break; 268 case IC_MULTIPLY: 269 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 270 "Multiply operation with an immediate and a register!"); 271 Val = Op1.second * Op2.second; 272 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 273 break; 274 case IC_DIVIDE: 275 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 276 "Divide operation with an immediate and a register!"); 277 assert (Op2.second != 0 && "Division by zero!"); 278 Val = Op1.second / Op2.second; 279 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 280 break; 281 case IC_MOD: 282 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 283 "Modulo operation with an immediate and a register!"); 284 Val = Op1.second % Op2.second; 285 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 286 break; 287 case IC_OR: 288 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 289 "Or operation with an immediate and a register!"); 290 Val = Op1.second | Op2.second; 291 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 292 break; 293 case IC_XOR: 294 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 295 "Xor operation with an immediate and a register!"); 296 Val = Op1.second ^ Op2.second; 297 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 298 break; 299 case IC_AND: 300 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 301 "And operation with an immediate and a register!"); 302 Val = Op1.second & Op2.second; 303 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 304 break; 305 case IC_LSHIFT: 306 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 307 "Left shift operation with an immediate and a register!"); 308 Val = Op1.second << Op2.second; 309 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 310 break; 311 case IC_RSHIFT: 312 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 313 "Right shift operation with an immediate and a register!"); 314 Val = Op1.second >> Op2.second; 315 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 316 break; 317 } 318 } 319 } 320 assert (OperandStack.size() == 1 && "Expected a single result."); 321 return OperandStack.pop_back_val().second; 322 } 323 }; 324 325 enum IntelExprState { 326 IES_INIT, 327 IES_OR, 328 IES_XOR, 329 IES_AND, 330 IES_LSHIFT, 331 IES_RSHIFT, 332 IES_PLUS, 333 IES_MINUS, 334 IES_OFFSET, 335 IES_CAST, 336 IES_NOT, 337 IES_MULTIPLY, 338 IES_DIVIDE, 339 IES_MOD, 340 IES_LBRAC, 341 IES_RBRAC, 342 IES_LPAREN, 343 IES_RPAREN, 344 IES_REGISTER, 345 IES_INTEGER, 346 IES_IDENTIFIER, 347 IES_ERROR 348 }; 349 350 class IntelExprStateMachine { 351 IntelExprState State, PrevState; 352 unsigned BaseReg, IndexReg, TmpReg, Scale; 353 int64_t Imm; 354 const MCExpr *Sym; 355 StringRef SymName; 356 InfixCalculator IC; 357 InlineAsmIdentifierInfo Info; 358 short BracCount; 359 bool MemExpr; 360 bool OffsetOperator; 361 SMLoc OffsetOperatorLoc; 362 StringRef CurType; 363 364 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { 365 if (Sym) { 366 ErrMsg = "cannot use more than one symbol in memory operand"; 367 return true; 368 } 369 Sym = Val; 370 SymName = ID; 371 return false; 372 } 373 374 public: 375 IntelExprStateMachine() 376 : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), 377 TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0), 378 MemExpr(false), OffsetOperator(false) {} 379 380 void addImm(int64_t imm) { Imm += imm; } 381 short getBracCount() { return BracCount; } 382 bool isMemExpr() { return MemExpr; } 383 bool isOffsetOperator() { return OffsetOperator; } 384 SMLoc getOffsetLoc() { return OffsetOperatorLoc; } 385 unsigned getBaseReg() { return BaseReg; } 386 unsigned getIndexReg() { return IndexReg; } 387 unsigned getScale() { return Scale; } 388 const MCExpr *getSym() { return Sym; } 389 StringRef getSymName() { return SymName; } 390 StringRef getType() { return CurType; } 391 int64_t getImm() { return Imm + IC.execute(); } 392 bool isValidEndState() { 393 return State == IES_RBRAC || State == IES_INTEGER; 394 } 395 bool hadError() { return State == IES_ERROR; } 396 InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; } 397 398 void onOr() { 399 IntelExprState CurrState = State; 400 switch (State) { 401 default: 402 State = IES_ERROR; 403 break; 404 case IES_INTEGER: 405 case IES_RPAREN: 406 case IES_REGISTER: 407 State = IES_OR; 408 IC.pushOperator(IC_OR); 409 break; 410 } 411 PrevState = CurrState; 412 } 413 void onXor() { 414 IntelExprState CurrState = State; 415 switch (State) { 416 default: 417 State = IES_ERROR; 418 break; 419 case IES_INTEGER: 420 case IES_RPAREN: 421 case IES_REGISTER: 422 State = IES_XOR; 423 IC.pushOperator(IC_XOR); 424 break; 425 } 426 PrevState = CurrState; 427 } 428 void onAnd() { 429 IntelExprState CurrState = State; 430 switch (State) { 431 default: 432 State = IES_ERROR; 433 break; 434 case IES_INTEGER: 435 case IES_RPAREN: 436 case IES_REGISTER: 437 State = IES_AND; 438 IC.pushOperator(IC_AND); 439 break; 440 } 441 PrevState = CurrState; 442 } 443 void onLShift() { 444 IntelExprState CurrState = State; 445 switch (State) { 446 default: 447 State = IES_ERROR; 448 break; 449 case IES_INTEGER: 450 case IES_RPAREN: 451 case IES_REGISTER: 452 State = IES_LSHIFT; 453 IC.pushOperator(IC_LSHIFT); 454 break; 455 } 456 PrevState = CurrState; 457 } 458 void onRShift() { 459 IntelExprState CurrState = State; 460 switch (State) { 461 default: 462 State = IES_ERROR; 463 break; 464 case IES_INTEGER: 465 case IES_RPAREN: 466 case IES_REGISTER: 467 State = IES_RSHIFT; 468 IC.pushOperator(IC_RSHIFT); 469 break; 470 } 471 PrevState = CurrState; 472 } 473 bool onPlus(StringRef &ErrMsg) { 474 IntelExprState CurrState = State; 475 switch (State) { 476 default: 477 State = IES_ERROR; 478 break; 479 case IES_INTEGER: 480 case IES_RPAREN: 481 case IES_REGISTER: 482 case IES_OFFSET: 483 State = IES_PLUS; 484 IC.pushOperator(IC_PLUS); 485 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 486 // If we already have a BaseReg, then assume this is the IndexReg with 487 // no explicit scale. 488 if (!BaseReg) { 489 BaseReg = TmpReg; 490 } else { 491 if (IndexReg) { 492 ErrMsg = "BaseReg/IndexReg already set!"; 493 return true; 494 } 495 IndexReg = TmpReg; 496 Scale = 0; 497 } 498 } 499 break; 500 } 501 PrevState = CurrState; 502 return false; 503 } 504 bool onMinus(StringRef &ErrMsg) { 505 IntelExprState CurrState = State; 506 switch (State) { 507 default: 508 State = IES_ERROR; 509 break; 510 case IES_OR: 511 case IES_XOR: 512 case IES_AND: 513 case IES_LSHIFT: 514 case IES_RSHIFT: 515 case IES_PLUS: 516 case IES_NOT: 517 case IES_MULTIPLY: 518 case IES_DIVIDE: 519 case IES_MOD: 520 case IES_LPAREN: 521 case IES_RPAREN: 522 case IES_LBRAC: 523 case IES_RBRAC: 524 case IES_INTEGER: 525 case IES_REGISTER: 526 case IES_INIT: 527 case IES_OFFSET: 528 State = IES_MINUS; 529 // push minus operator if it is not a negate operator 530 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || 531 CurrState == IES_INTEGER || CurrState == IES_RBRAC || 532 CurrState == IES_OFFSET) 533 IC.pushOperator(IC_MINUS); 534 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 535 // We have negate operator for Scale: it's illegal 536 ErrMsg = "Scale can't be negative"; 537 return true; 538 } else 539 IC.pushOperator(IC_NEG); 540 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 541 // If we already have a BaseReg, then assume this is the IndexReg with 542 // no explicit scale. 543 if (!BaseReg) { 544 BaseReg = TmpReg; 545 } else { 546 if (IndexReg) { 547 ErrMsg = "BaseReg/IndexReg already set!"; 548 return true; 549 } 550 IndexReg = TmpReg; 551 Scale = 0; 552 } 553 } 554 break; 555 } 556 PrevState = CurrState; 557 return false; 558 } 559 void onNot() { 560 IntelExprState CurrState = State; 561 switch (State) { 562 default: 563 State = IES_ERROR; 564 break; 565 case IES_OR: 566 case IES_XOR: 567 case IES_AND: 568 case IES_LSHIFT: 569 case IES_RSHIFT: 570 case IES_PLUS: 571 case IES_MINUS: 572 case IES_NOT: 573 case IES_MULTIPLY: 574 case IES_DIVIDE: 575 case IES_MOD: 576 case IES_LPAREN: 577 case IES_LBRAC: 578 case IES_INIT: 579 State = IES_NOT; 580 IC.pushOperator(IC_NOT); 581 break; 582 } 583 PrevState = CurrState; 584 } 585 bool onRegister(unsigned Reg, StringRef &ErrMsg) { 586 IntelExprState CurrState = State; 587 switch (State) { 588 default: 589 State = IES_ERROR; 590 break; 591 case IES_PLUS: 592 case IES_LPAREN: 593 case IES_LBRAC: 594 State = IES_REGISTER; 595 TmpReg = Reg; 596 IC.pushOperand(IC_REGISTER); 597 break; 598 case IES_MULTIPLY: 599 // Index Register - Scale * Register 600 if (PrevState == IES_INTEGER) { 601 if (IndexReg) { 602 ErrMsg = "BaseReg/IndexReg already set!"; 603 return true; 604 } 605 State = IES_REGISTER; 606 IndexReg = Reg; 607 // Get the scale and replace the 'Scale * Register' with '0'. 608 Scale = IC.popOperand(); 609 if (checkScale(Scale, ErrMsg)) 610 return true; 611 IC.pushOperand(IC_IMM); 612 IC.popOperator(); 613 } else { 614 State = IES_ERROR; 615 } 616 break; 617 } 618 PrevState = CurrState; 619 return false; 620 } 621 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName, 622 const InlineAsmIdentifierInfo &IDInfo, 623 bool ParsingMSInlineAsm, StringRef &ErrMsg) { 624 // InlineAsm: Treat an enum value as an integer 625 if (ParsingMSInlineAsm) 626 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 627 return onInteger(IDInfo.Enum.EnumVal, ErrMsg); 628 // Treat a symbolic constant like an integer 629 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef)) 630 return onInteger(CE->getValue(), ErrMsg); 631 PrevState = State; 632 switch (State) { 633 default: 634 State = IES_ERROR; 635 break; 636 case IES_CAST: 637 case IES_PLUS: 638 case IES_MINUS: 639 case IES_NOT: 640 case IES_INIT: 641 case IES_LBRAC: 642 if (setSymRef(SymRef, SymRefName, ErrMsg)) 643 return true; 644 MemExpr = true; 645 State = IES_INTEGER; 646 IC.pushOperand(IC_IMM); 647 if (ParsingMSInlineAsm) 648 Info = IDInfo; 649 break; 650 } 651 return false; 652 } 653 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { 654 IntelExprState CurrState = State; 655 switch (State) { 656 default: 657 State = IES_ERROR; 658 break; 659 case IES_PLUS: 660 case IES_MINUS: 661 case IES_NOT: 662 case IES_OR: 663 case IES_XOR: 664 case IES_AND: 665 case IES_LSHIFT: 666 case IES_RSHIFT: 667 case IES_DIVIDE: 668 case IES_MOD: 669 case IES_MULTIPLY: 670 case IES_LPAREN: 671 case IES_INIT: 672 case IES_LBRAC: 673 State = IES_INTEGER; 674 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 675 // Index Register - Register * Scale 676 if (IndexReg) { 677 ErrMsg = "BaseReg/IndexReg already set!"; 678 return true; 679 } 680 IndexReg = TmpReg; 681 Scale = TmpInt; 682 if (checkScale(Scale, ErrMsg)) 683 return true; 684 // Get the scale and replace the 'Register * Scale' with '0'. 685 IC.popOperator(); 686 } else { 687 IC.pushOperand(IC_IMM, TmpInt); 688 } 689 break; 690 } 691 PrevState = CurrState; 692 return false; 693 } 694 void onStar() { 695 PrevState = State; 696 switch (State) { 697 default: 698 State = IES_ERROR; 699 break; 700 case IES_INTEGER: 701 case IES_REGISTER: 702 case IES_RPAREN: 703 State = IES_MULTIPLY; 704 IC.pushOperator(IC_MULTIPLY); 705 break; 706 } 707 } 708 void onDivide() { 709 PrevState = State; 710 switch (State) { 711 default: 712 State = IES_ERROR; 713 break; 714 case IES_INTEGER: 715 case IES_RPAREN: 716 State = IES_DIVIDE; 717 IC.pushOperator(IC_DIVIDE); 718 break; 719 } 720 } 721 void onMod() { 722 PrevState = State; 723 switch (State) { 724 default: 725 State = IES_ERROR; 726 break; 727 case IES_INTEGER: 728 case IES_RPAREN: 729 State = IES_MOD; 730 IC.pushOperator(IC_MOD); 731 break; 732 } 733 } 734 bool onLBrac() { 735 if (BracCount) 736 return true; 737 PrevState = State; 738 switch (State) { 739 default: 740 State = IES_ERROR; 741 break; 742 case IES_RBRAC: 743 case IES_INTEGER: 744 case IES_RPAREN: 745 State = IES_PLUS; 746 IC.pushOperator(IC_PLUS); 747 break; 748 case IES_INIT: 749 case IES_CAST: 750 assert(!BracCount && "BracCount should be zero on parsing's start"); 751 State = IES_LBRAC; 752 break; 753 } 754 MemExpr = true; 755 BracCount++; 756 return false; 757 } 758 bool onRBrac() { 759 IntelExprState CurrState = State; 760 switch (State) { 761 default: 762 State = IES_ERROR; 763 break; 764 case IES_INTEGER: 765 case IES_OFFSET: 766 case IES_REGISTER: 767 case IES_RPAREN: 768 if (BracCount-- != 1) 769 return true; 770 State = IES_RBRAC; 771 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 772 // If we already have a BaseReg, then assume this is the IndexReg with 773 // no explicit scale. 774 if (!BaseReg) { 775 BaseReg = TmpReg; 776 } else { 777 assert (!IndexReg && "BaseReg/IndexReg already set!"); 778 IndexReg = TmpReg; 779 Scale = 0; 780 } 781 } 782 break; 783 } 784 PrevState = CurrState; 785 return false; 786 } 787 void onLParen() { 788 IntelExprState CurrState = State; 789 switch (State) { 790 default: 791 State = IES_ERROR; 792 break; 793 case IES_PLUS: 794 case IES_MINUS: 795 case IES_NOT: 796 case IES_OR: 797 case IES_XOR: 798 case IES_AND: 799 case IES_LSHIFT: 800 case IES_RSHIFT: 801 case IES_MULTIPLY: 802 case IES_DIVIDE: 803 case IES_MOD: 804 case IES_LPAREN: 805 case IES_INIT: 806 case IES_LBRAC: 807 State = IES_LPAREN; 808 IC.pushOperator(IC_LPAREN); 809 break; 810 } 811 PrevState = CurrState; 812 } 813 void onRParen() { 814 PrevState = State; 815 switch (State) { 816 default: 817 State = IES_ERROR; 818 break; 819 case IES_INTEGER: 820 case IES_OFFSET: 821 case IES_REGISTER: 822 case IES_RBRAC: 823 case IES_RPAREN: 824 State = IES_RPAREN; 825 IC.pushOperator(IC_RPAREN); 826 break; 827 } 828 } 829 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID, 830 const InlineAsmIdentifierInfo &IDInfo, bool ParsingMSInlineAsm, 831 StringRef &ErrMsg) { 832 PrevState = State; 833 switch (State) { 834 default: 835 ErrMsg = "unexpected offset operator expression"; 836 return true; 837 case IES_PLUS: 838 case IES_INIT: 839 case IES_LBRAC: 840 if (setSymRef(Val, ID, ErrMsg)) 841 return true; 842 OffsetOperator = true; 843 OffsetOperatorLoc = OffsetLoc; 844 State = IES_OFFSET; 845 // As we cannot yet resolve the actual value (offset), we retain 846 // the requested semantics by pushing a '0' to the operands stack 847 IC.pushOperand(IC_IMM); 848 if (ParsingMSInlineAsm) { 849 Info = IDInfo; 850 } 851 break; 852 } 853 return false; 854 } 855 void onCast(StringRef Type) { 856 PrevState = State; 857 switch (State) { 858 default: 859 State = IES_ERROR; 860 break; 861 case IES_LPAREN: 862 setType(Type); 863 State = IES_CAST; 864 break; 865 } 866 } 867 void setType(StringRef Type) { CurType = Type; } 868 }; 869 870 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None, 871 bool MatchingInlineAsm = false) { 872 MCAsmParser &Parser = getParser(); 873 if (MatchingInlineAsm) { 874 if (!getLexer().isAtStartOfStatement()) 875 Parser.eatToEndOfStatement(); 876 return false; 877 } 878 return Parser.Error(L, Msg, Range); 879 } 880 881 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) { 882 Error(Loc, Msg, R); 883 return nullptr; 884 } 885 886 bool MatchRegisterByName(unsigned &RegNo, StringRef RegName, SMLoc StartLoc, 887 SMLoc EndLoc); 888 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 889 bool RestoreOnFailure); 890 891 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); 892 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); 893 bool IsSIReg(unsigned Reg); 894 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg); 895 void 896 AddDefaultSrcDestOperands(OperandVector &Operands, 897 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 898 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst); 899 bool VerifyAndAdjustOperands(OperandVector &OrigOperands, 900 OperandVector &FinalOperands); 901 std::unique_ptr<X86Operand> ParseOperand(); 902 std::unique_ptr<X86Operand> ParseATTOperand(); 903 std::unique_ptr<X86Operand> ParseIntelOperand(); 904 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, 905 InlineAsmIdentifierInfo &Info, SMLoc &End); 906 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); 907 unsigned IdentifyIntelInlineAsmOperator(StringRef Name); 908 unsigned ParseIntelInlineAsmOperator(unsigned OpKind); 909 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start); 910 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM, 911 bool &ParseError, SMLoc &End); 912 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start, 913 SMLoc End); 914 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 915 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier, 916 InlineAsmIdentifierInfo &Info, 917 bool IsUnevaluatedOperand, SMLoc &End, 918 bool IsParsingOffsetOperator = false); 919 920 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, 921 const MCExpr *&Disp, 922 const SMLoc &StartLoc, 923 SMLoc &EndLoc); 924 925 X86::CondCode ParseConditionCode(StringRef CCode); 926 927 bool ParseIntelMemoryOperandSize(unsigned &Size); 928 std::unique_ptr<X86Operand> 929 CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, 930 unsigned IndexReg, unsigned Scale, SMLoc Start, 931 SMLoc End, unsigned Size, StringRef Identifier, 932 const InlineAsmIdentifierInfo &Info); 933 934 bool parseDirectiveEven(SMLoc L); 935 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 936 937 /// CodeView FPO data directives. 938 bool parseDirectiveFPOProc(SMLoc L); 939 bool parseDirectiveFPOSetFrame(SMLoc L); 940 bool parseDirectiveFPOPushReg(SMLoc L); 941 bool parseDirectiveFPOStackAlloc(SMLoc L); 942 bool parseDirectiveFPOStackAlign(SMLoc L); 943 bool parseDirectiveFPOEndPrologue(SMLoc L); 944 bool parseDirectiveFPOEndProc(SMLoc L); 945 bool parseDirectiveFPOData(SMLoc L); 946 947 /// SEH directives. 948 bool parseSEHRegisterNumber(unsigned RegClassID, unsigned &RegNo); 949 bool parseDirectiveSEHPushReg(SMLoc); 950 bool parseDirectiveSEHSetFrame(SMLoc); 951 bool parseDirectiveSEHSaveReg(SMLoc); 952 bool parseDirectiveSEHSaveXMM(SMLoc); 953 bool parseDirectiveSEHPushFrame(SMLoc); 954 955 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 956 957 bool validateInstruction(MCInst &Inst, const OperandVector &Ops); 958 bool processInstruction(MCInst &Inst, const OperandVector &Ops); 959 960 // Load Value Injection (LVI) Mitigations for machine code 961 void emitWarningForSpecialLVIInstruction(SMLoc Loc); 962 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out); 963 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out); 964 965 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds 966 /// instrumentation around Inst. 967 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); 968 969 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 970 OperandVector &Operands, MCStreamer &Out, 971 uint64_t &ErrorInfo, 972 bool MatchingInlineAsm) override; 973 974 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands, 975 MCStreamer &Out, bool MatchingInlineAsm); 976 977 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures, 978 bool MatchingInlineAsm); 979 980 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 981 OperandVector &Operands, MCStreamer &Out, 982 uint64_t &ErrorInfo, 983 bool MatchingInlineAsm); 984 985 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 986 OperandVector &Operands, MCStreamer &Out, 987 uint64_t &ErrorInfo, 988 bool MatchingInlineAsm); 989 990 bool OmitRegisterFromClobberLists(unsigned RegNo) override; 991 992 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) 993 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. 994 /// return false if no parsing errors occurred, true otherwise. 995 bool HandleAVX512Operand(OperandVector &Operands, 996 const MCParsedAsmOperand &Op); 997 998 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc); 999 1000 bool is64BitMode() const { 1001 // FIXME: Can tablegen auto-generate this? 1002 return getSTI().getFeatureBits()[X86::Mode64Bit]; 1003 } 1004 bool is32BitMode() const { 1005 // FIXME: Can tablegen auto-generate this? 1006 return getSTI().getFeatureBits()[X86::Mode32Bit]; 1007 } 1008 bool is16BitMode() const { 1009 // FIXME: Can tablegen auto-generate this? 1010 return getSTI().getFeatureBits()[X86::Mode16Bit]; 1011 } 1012 void SwitchMode(unsigned mode) { 1013 MCSubtargetInfo &STI = copySTI(); 1014 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit}); 1015 FeatureBitset OldMode = STI.getFeatureBits() & AllModes; 1016 FeatureBitset FB = ComputeAvailableFeatures( 1017 STI.ToggleFeature(OldMode.flip(mode))); 1018 setAvailableFeatures(FB); 1019 1020 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes)); 1021 } 1022 1023 unsigned getPointerWidth() { 1024 if (is16BitMode()) return 16; 1025 if (is32BitMode()) return 32; 1026 if (is64BitMode()) return 64; 1027 llvm_unreachable("invalid mode"); 1028 } 1029 1030 bool isParsingIntelSyntax() { 1031 return getParser().getAssemblerDialect(); 1032 } 1033 1034 /// @name Auto-generated Matcher Functions 1035 /// { 1036 1037 #define GET_ASSEMBLER_HEADER 1038 #include "X86GenAsmMatcher.inc" 1039 1040 /// } 1041 1042 public: 1043 enum X86MatchResultTy { 1044 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY, 1045 #define GET_OPERAND_DIAGNOSTIC_TYPES 1046 #include "X86GenAsmMatcher.inc" 1047 }; 1048 1049 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, 1050 const MCInstrInfo &mii, const MCTargetOptions &Options) 1051 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr), 1052 Code16GCC(false) { 1053 1054 Parser.addAliasForDirective(".word", ".2byte"); 1055 1056 // Initialize the set of available features. 1057 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); 1058 } 1059 1060 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1061 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1062 SMLoc &EndLoc) override; 1063 1064 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; 1065 1066 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1067 SMLoc NameLoc, OperandVector &Operands) override; 1068 1069 bool ParseDirective(AsmToken DirectiveID) override; 1070 }; 1071 } // end anonymous namespace 1072 1073 /// @name Auto-generated Match Functions 1074 /// { 1075 1076 static unsigned MatchRegisterName(StringRef Name); 1077 1078 /// } 1079 1080 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, 1081 unsigned Scale, bool Is64BitMode, 1082 StringRef &ErrMsg) { 1083 // If we have both a base register and an index register make sure they are 1084 // both 64-bit or 32-bit registers. 1085 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 1086 1087 if (BaseReg != 0 && 1088 !(BaseReg == X86::RIP || BaseReg == X86::EIP || 1089 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) || 1090 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) || 1091 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) { 1092 ErrMsg = "invalid base+index expression"; 1093 return true; 1094 } 1095 1096 if (IndexReg != 0 && 1097 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ || 1098 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1099 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1100 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1101 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 1102 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 1103 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) { 1104 ErrMsg = "invalid base+index expression"; 1105 return true; 1106 } 1107 1108 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) || 1109 IndexReg == X86::EIP || IndexReg == X86::RIP || 1110 IndexReg == X86::ESP || IndexReg == X86::RSP) { 1111 ErrMsg = "invalid base+index expression"; 1112 return true; 1113 } 1114 1115 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, 1116 // and then only in non-64-bit modes. 1117 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 1118 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP && 1119 BaseReg != X86::SI && BaseReg != X86::DI))) { 1120 ErrMsg = "invalid 16-bit base register"; 1121 return true; 1122 } 1123 1124 if (BaseReg == 0 && 1125 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { 1126 ErrMsg = "16-bit memory operand may not include only index register"; 1127 return true; 1128 } 1129 1130 if (BaseReg != 0 && IndexReg != 0) { 1131 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 1132 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1133 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1134 IndexReg == X86::EIZ)) { 1135 ErrMsg = "base register is 64-bit, but index register is not"; 1136 return true; 1137 } 1138 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 1139 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1140 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1141 IndexReg == X86::RIZ)) { 1142 ErrMsg = "base register is 32-bit, but index register is not"; 1143 return true; 1144 } 1145 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { 1146 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1147 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { 1148 ErrMsg = "base register is 16-bit, but index register is not"; 1149 return true; 1150 } 1151 if ((BaseReg != X86::BX && BaseReg != X86::BP) || 1152 (IndexReg != X86::SI && IndexReg != X86::DI)) { 1153 ErrMsg = "invalid 16-bit base/index register combination"; 1154 return true; 1155 } 1156 } 1157 } 1158 1159 // RIP/EIP-relative addressing is only supported in 64-bit mode. 1160 if (!Is64BitMode && BaseReg != 0 && 1161 (BaseReg == X86::RIP || BaseReg == X86::EIP)) { 1162 ErrMsg = "IP-relative addressing requires 64-bit mode"; 1163 return true; 1164 } 1165 1166 return checkScale(Scale, ErrMsg); 1167 } 1168 1169 bool X86AsmParser::MatchRegisterByName(unsigned &RegNo, StringRef RegName, 1170 SMLoc StartLoc, SMLoc EndLoc) { 1171 // If we encounter a %, ignore it. This code handles registers with and 1172 // without the prefix, unprefixed registers can occur in cfi directives. 1173 RegName.consume_front("%"); 1174 1175 RegNo = MatchRegisterName(RegName); 1176 1177 // If the match failed, try the register name as lowercase. 1178 if (RegNo == 0) 1179 RegNo = MatchRegisterName(RegName.lower()); 1180 1181 // The "flags" and "mxcsr" registers cannot be referenced directly. 1182 // Treat it as an identifier instead. 1183 if (isParsingMSInlineAsm() && isParsingIntelSyntax() && 1184 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR)) 1185 RegNo = 0; 1186 1187 if (!is64BitMode()) { 1188 // FIXME: This should be done using Requires<Not64BitMode> and 1189 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 1190 // checked. 1191 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a 1192 // REX prefix. 1193 if (RegNo == X86::RIZ || RegNo == X86::RIP || 1194 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 1195 X86II::isX86_64NonExtLowByteReg(RegNo) || 1196 X86II::isX86_64ExtendedReg(RegNo)) { 1197 return Error(StartLoc, 1198 "register %" + RegName + " is only available in 64-bit mode", 1199 SMRange(StartLoc, EndLoc)); 1200 } 1201 } 1202 1203 // If this is "db[0-15]", match it as an alias 1204 // for dr[0-15]. 1205 if (RegNo == 0 && RegName.startswith("db")) { 1206 if (RegName.size() == 3) { 1207 switch (RegName[2]) { 1208 case '0': 1209 RegNo = X86::DR0; 1210 break; 1211 case '1': 1212 RegNo = X86::DR1; 1213 break; 1214 case '2': 1215 RegNo = X86::DR2; 1216 break; 1217 case '3': 1218 RegNo = X86::DR3; 1219 break; 1220 case '4': 1221 RegNo = X86::DR4; 1222 break; 1223 case '5': 1224 RegNo = X86::DR5; 1225 break; 1226 case '6': 1227 RegNo = X86::DR6; 1228 break; 1229 case '7': 1230 RegNo = X86::DR7; 1231 break; 1232 case '8': 1233 RegNo = X86::DR8; 1234 break; 1235 case '9': 1236 RegNo = X86::DR9; 1237 break; 1238 } 1239 } else if (RegName.size() == 4 && RegName[2] == '1') { 1240 switch (RegName[3]) { 1241 case '0': 1242 RegNo = X86::DR10; 1243 break; 1244 case '1': 1245 RegNo = X86::DR11; 1246 break; 1247 case '2': 1248 RegNo = X86::DR12; 1249 break; 1250 case '3': 1251 RegNo = X86::DR13; 1252 break; 1253 case '4': 1254 RegNo = X86::DR14; 1255 break; 1256 case '5': 1257 RegNo = X86::DR15; 1258 break; 1259 } 1260 } 1261 } 1262 1263 if (RegNo == 0) { 1264 if (isParsingIntelSyntax()) 1265 return true; 1266 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc)); 1267 } 1268 return false; 1269 } 1270 1271 bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1272 SMLoc &EndLoc, bool RestoreOnFailure) { 1273 MCAsmParser &Parser = getParser(); 1274 MCAsmLexer &Lexer = getLexer(); 1275 RegNo = 0; 1276 1277 SmallVector<AsmToken, 5> Tokens; 1278 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() { 1279 if (RestoreOnFailure) { 1280 while (!Tokens.empty()) { 1281 Lexer.UnLex(Tokens.pop_back_val()); 1282 } 1283 } 1284 }; 1285 1286 const AsmToken &PercentTok = Parser.getTok(); 1287 StartLoc = PercentTok.getLoc(); 1288 1289 // If we encounter a %, ignore it. This code handles registers with and 1290 // without the prefix, unprefixed registers can occur in cfi directives. 1291 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) { 1292 Tokens.push_back(PercentTok); 1293 Parser.Lex(); // Eat percent token. 1294 } 1295 1296 const AsmToken &Tok = Parser.getTok(); 1297 EndLoc = Tok.getEndLoc(); 1298 1299 if (Tok.isNot(AsmToken::Identifier)) { 1300 OnFailure(); 1301 if (isParsingIntelSyntax()) return true; 1302 return Error(StartLoc, "invalid register name", 1303 SMRange(StartLoc, EndLoc)); 1304 } 1305 1306 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) { 1307 OnFailure(); 1308 return true; 1309 } 1310 1311 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 1312 if (RegNo == X86::ST0) { 1313 Tokens.push_back(Tok); 1314 Parser.Lex(); // Eat 'st' 1315 1316 // Check to see if we have '(4)' after %st. 1317 if (Lexer.isNot(AsmToken::LParen)) 1318 return false; 1319 // Lex the paren. 1320 Tokens.push_back(Parser.getTok()); 1321 Parser.Lex(); 1322 1323 const AsmToken &IntTok = Parser.getTok(); 1324 if (IntTok.isNot(AsmToken::Integer)) { 1325 OnFailure(); 1326 return Error(IntTok.getLoc(), "expected stack index"); 1327 } 1328 switch (IntTok.getIntVal()) { 1329 case 0: RegNo = X86::ST0; break; 1330 case 1: RegNo = X86::ST1; break; 1331 case 2: RegNo = X86::ST2; break; 1332 case 3: RegNo = X86::ST3; break; 1333 case 4: RegNo = X86::ST4; break; 1334 case 5: RegNo = X86::ST5; break; 1335 case 6: RegNo = X86::ST6; break; 1336 case 7: RegNo = X86::ST7; break; 1337 default: 1338 OnFailure(); 1339 return Error(IntTok.getLoc(), "invalid stack index"); 1340 } 1341 1342 // Lex IntTok 1343 Tokens.push_back(IntTok); 1344 Parser.Lex(); 1345 if (Lexer.isNot(AsmToken::RParen)) { 1346 OnFailure(); 1347 return Error(Parser.getTok().getLoc(), "expected ')'"); 1348 } 1349 1350 EndLoc = Parser.getTok().getEndLoc(); 1351 Parser.Lex(); // Eat ')' 1352 return false; 1353 } 1354 1355 EndLoc = Parser.getTok().getEndLoc(); 1356 1357 if (RegNo == 0) { 1358 OnFailure(); 1359 if (isParsingIntelSyntax()) return true; 1360 return Error(StartLoc, "invalid register name", 1361 SMRange(StartLoc, EndLoc)); 1362 } 1363 1364 Parser.Lex(); // Eat identifier token. 1365 return false; 1366 } 1367 1368 bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1369 SMLoc &EndLoc) { 1370 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 1371 } 1372 1373 OperandMatchResultTy X86AsmParser::tryParseRegister(unsigned &RegNo, 1374 SMLoc &StartLoc, 1375 SMLoc &EndLoc) { 1376 bool Result = 1377 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 1378 bool PendingErrors = getParser().hasPendingError(); 1379 getParser().clearPendingErrors(); 1380 if (PendingErrors) 1381 return MatchOperand_ParseFail; 1382 if (Result) 1383 return MatchOperand_NoMatch; 1384 return MatchOperand_Success; 1385 } 1386 1387 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { 1388 bool Parse32 = is32BitMode() || Code16GCC; 1389 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI); 1390 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1391 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1392 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1393 Loc, Loc, 0); 1394 } 1395 1396 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { 1397 bool Parse32 = is32BitMode() || Code16GCC; 1398 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI); 1399 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1400 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1401 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1402 Loc, Loc, 0); 1403 } 1404 1405 bool X86AsmParser::IsSIReg(unsigned Reg) { 1406 switch (Reg) { 1407 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!"); 1408 case X86::RSI: 1409 case X86::ESI: 1410 case X86::SI: 1411 return true; 1412 case X86::RDI: 1413 case X86::EDI: 1414 case X86::DI: 1415 return false; 1416 } 1417 } 1418 1419 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, 1420 bool IsSIReg) { 1421 switch (RegClassID) { 1422 default: llvm_unreachable("Unexpected register class"); 1423 case X86::GR64RegClassID: 1424 return IsSIReg ? X86::RSI : X86::RDI; 1425 case X86::GR32RegClassID: 1426 return IsSIReg ? X86::ESI : X86::EDI; 1427 case X86::GR16RegClassID: 1428 return IsSIReg ? X86::SI : X86::DI; 1429 } 1430 } 1431 1432 void X86AsmParser::AddDefaultSrcDestOperands( 1433 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1434 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) { 1435 if (isParsingIntelSyntax()) { 1436 Operands.push_back(std::move(Dst)); 1437 Operands.push_back(std::move(Src)); 1438 } 1439 else { 1440 Operands.push_back(std::move(Src)); 1441 Operands.push_back(std::move(Dst)); 1442 } 1443 } 1444 1445 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, 1446 OperandVector &FinalOperands) { 1447 1448 if (OrigOperands.size() > 1) { 1449 // Check if sizes match, OrigOperands also contains the instruction name 1450 assert(OrigOperands.size() == FinalOperands.size() + 1 && 1451 "Operand size mismatch"); 1452 1453 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings; 1454 // Verify types match 1455 int RegClassID = -1; 1456 for (unsigned int i = 0; i < FinalOperands.size(); ++i) { 1457 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]); 1458 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]); 1459 1460 if (FinalOp.isReg() && 1461 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg())) 1462 // Return false and let a normal complaint about bogus operands happen 1463 return false; 1464 1465 if (FinalOp.isMem()) { 1466 1467 if (!OrigOp.isMem()) 1468 // Return false and let a normal complaint about bogus operands happen 1469 return false; 1470 1471 unsigned OrigReg = OrigOp.Mem.BaseReg; 1472 unsigned FinalReg = FinalOp.Mem.BaseReg; 1473 1474 // If we've already encounterd a register class, make sure all register 1475 // bases are of the same register class 1476 if (RegClassID != -1 && 1477 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) { 1478 return Error(OrigOp.getStartLoc(), 1479 "mismatching source and destination index registers"); 1480 } 1481 1482 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg)) 1483 RegClassID = X86::GR64RegClassID; 1484 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg)) 1485 RegClassID = X86::GR32RegClassID; 1486 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg)) 1487 RegClassID = X86::GR16RegClassID; 1488 else 1489 // Unexpected register class type 1490 // Return false and let a normal complaint about bogus operands happen 1491 return false; 1492 1493 bool IsSI = IsSIReg(FinalReg); 1494 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI); 1495 1496 if (FinalReg != OrigReg) { 1497 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI"; 1498 Warnings.push_back(std::make_pair( 1499 OrigOp.getStartLoc(), 1500 "memory operand is only for determining the size, " + RegName + 1501 " will be used for the location")); 1502 } 1503 1504 FinalOp.Mem.Size = OrigOp.Mem.Size; 1505 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg; 1506 FinalOp.Mem.BaseReg = FinalReg; 1507 } 1508 } 1509 1510 // Produce warnings only if all the operands passed the adjustment - prevent 1511 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings 1512 for (auto &WarningMsg : Warnings) { 1513 Warning(WarningMsg.first, WarningMsg.second); 1514 } 1515 1516 // Remove old operands 1517 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1518 OrigOperands.pop_back(); 1519 } 1520 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end()); 1521 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1522 OrigOperands.push_back(std::move(FinalOperands[i])); 1523 1524 return false; 1525 } 1526 1527 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() { 1528 if (isParsingIntelSyntax()) 1529 return ParseIntelOperand(); 1530 return ParseATTOperand(); 1531 } 1532 1533 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForMSInlineAsm( 1534 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, 1535 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, 1536 const InlineAsmIdentifierInfo &Info) { 1537 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or 1538 // some other label reference. 1539 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) { 1540 // Insert an explicit size if the user didn't have one. 1541 if (!Size) { 1542 Size = getPointerWidth(); 1543 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, 1544 /*Len=*/0, Size); 1545 } 1546 // Create an absolute memory reference in order to match against 1547 // instructions taking a PC relative operand. 1548 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size, 1549 Identifier, Info.Label.Decl); 1550 } 1551 // We either have a direct symbol reference, or an offset from a symbol. The 1552 // parser always puts the symbol on the LHS, so look there for size 1553 // calculation purposes. 1554 unsigned FrontendSize = 0; 1555 void *Decl = nullptr; 1556 bool IsGlobalLV = false; 1557 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 1558 // Size is in terms of bits in this context. 1559 FrontendSize = Info.Var.Type * 8; 1560 Decl = Info.Var.Decl; 1561 IsGlobalLV = Info.Var.IsGlobalLV; 1562 } 1563 // It is widely common for MS InlineAsm to use a global variable and one/two 1564 // registers in a mmory expression, and though unaccessible via rip/eip. 1565 if (IsGlobalLV && (BaseReg || IndexReg)) { 1566 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End); 1567 // Otherwise, we set the base register to a non-zero value 1568 // if we don't know the actual value at this time. This is necessary to 1569 // get the matching correct in some cases. 1570 } else { 1571 BaseReg = BaseReg ? BaseReg : 1; 1572 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 1573 IndexReg, Scale, Start, End, Size, 1574 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, 1575 FrontendSize); 1576 } 1577 } 1578 1579 // Some binary bitwise operators have a named synonymous 1580 // Query a candidate string for being such a named operator 1581 // and if so - invoke the appropriate handler 1582 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, 1583 IntelExprStateMachine &SM, 1584 bool &ParseError, SMLoc &End) { 1585 // A named operator should be either lower or upper case, but not a mix 1586 if (Name.compare(Name.lower()) && Name.compare(Name.upper())) 1587 return false; 1588 if (Name.equals_lower("not")) { 1589 SM.onNot(); 1590 } else if (Name.equals_lower("or")) { 1591 SM.onOr(); 1592 } else if (Name.equals_lower("shl")) { 1593 SM.onLShift(); 1594 } else if (Name.equals_lower("shr")) { 1595 SM.onRShift(); 1596 } else if (Name.equals_lower("xor")) { 1597 SM.onXor(); 1598 } else if (Name.equals_lower("and")) { 1599 SM.onAnd(); 1600 } else if (Name.equals_lower("mod")) { 1601 SM.onMod(); 1602 } else if (Name.equals_lower("offset")) { 1603 SMLoc OffsetLoc = getTok().getLoc(); 1604 const MCExpr *Val = nullptr; 1605 StringRef ID; 1606 InlineAsmIdentifierInfo Info; 1607 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End); 1608 if (ParseError) 1609 return true; 1610 StringRef ErrMsg; 1611 ParseError = 1612 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg); 1613 if (ParseError) 1614 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg); 1615 } else { 1616 return false; 1617 } 1618 if (!Name.equals_lower("offset")) 1619 End = consumeToken(); 1620 return true; 1621 } 1622 1623 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1624 MCAsmParser &Parser = getParser(); 1625 const AsmToken &Tok = Parser.getTok(); 1626 StringRef ErrMsg; 1627 1628 AsmToken::TokenKind PrevTK = AsmToken::Error; 1629 bool Done = false; 1630 while (!Done) { 1631 bool UpdateLocLex = true; 1632 AsmToken::TokenKind TK = getLexer().getKind(); 1633 1634 switch (TK) { 1635 default: 1636 if ((Done = SM.isValidEndState())) 1637 break; 1638 return Error(Tok.getLoc(), "unknown token in expression"); 1639 case AsmToken::EndOfStatement: 1640 Done = true; 1641 break; 1642 case AsmToken::Real: 1643 // DotOperator: [ebx].0 1644 UpdateLocLex = false; 1645 if (ParseIntelDotOperator(SM, End)) 1646 return true; 1647 break; 1648 case AsmToken::At: 1649 case AsmToken::String: 1650 case AsmToken::Identifier: { 1651 SMLoc IdentLoc = Tok.getLoc(); 1652 StringRef Identifier = Tok.getString(); 1653 UpdateLocLex = false; 1654 // (MASM only) <TYPE> PTR operator 1655 if (Parser.isParsingMasm()) { 1656 const AsmToken &NextTok = getLexer().peekTok(); 1657 if (NextTok.is(AsmToken::Identifier) && 1658 NextTok.getIdentifier().equals_lower("ptr")) { 1659 SM.onCast(Identifier); 1660 // Eat type and PTR. 1661 consumeToken(); 1662 End = consumeToken(); 1663 break; 1664 } 1665 } 1666 // Register, or (MASM only) <register>.<field> 1667 unsigned Reg; 1668 if (Tok.is(AsmToken::Identifier)) { 1669 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) { 1670 if (SM.onRegister(Reg, ErrMsg)) 1671 return Error(IdentLoc, ErrMsg); 1672 break; 1673 } 1674 if (Parser.isParsingMasm()) { 1675 const std::pair<StringRef, StringRef> IDField = 1676 Tok.getString().split('.'); 1677 const StringRef ID = IDField.first, Field = IDField.second; 1678 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size()); 1679 if (!Field.empty() && 1680 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) { 1681 if (SM.onRegister(Reg, ErrMsg)) 1682 return Error(IdentLoc, ErrMsg); 1683 1684 StringRef Type; 1685 unsigned Offset = 0; 1686 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data()); 1687 if (Parser.lookUpField(Field, Type, Offset)) 1688 return Error(FieldStartLoc, "unknown offset"); 1689 else if (SM.onPlus(ErrMsg)) 1690 return Error(getTok().getLoc(), ErrMsg); 1691 else if (SM.onInteger(Offset, ErrMsg)) 1692 return Error(IdentLoc, ErrMsg); 1693 SM.setType(Type); 1694 1695 End = consumeToken(); 1696 break; 1697 } 1698 } 1699 } 1700 // Operator synonymous ("not", "or" etc.) 1701 bool ParseError = false; 1702 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) { 1703 if (ParseError) 1704 return true; 1705 break; 1706 } 1707 // Symbol reference, when parsing assembly content 1708 InlineAsmIdentifierInfo Info; 1709 const MCExpr *Val; 1710 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) { 1711 // MS Dot Operator expression 1712 if (Identifier.count('.') && 1713 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) { 1714 if (ParseIntelDotOperator(SM, End)) 1715 return true; 1716 break; 1717 } 1718 } 1719 if (isParsingMSInlineAsm()) { 1720 // MS InlineAsm operators (TYPE/LENGTH/SIZE) 1721 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { 1722 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { 1723 if (SM.onInteger(Val, ErrMsg)) 1724 return Error(IdentLoc, ErrMsg); 1725 } else 1726 return true; 1727 break; 1728 } 1729 // MS InlineAsm identifier 1730 // Call parseIdentifier() to combine @ with the identifier behind it. 1731 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier)) 1732 return Error(IdentLoc, "expected identifier"); 1733 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) 1734 return true; 1735 else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg)) 1736 return Error(IdentLoc, ErrMsg); 1737 break; 1738 } 1739 if (getParser().parsePrimaryExpr(Val, End)) { 1740 return Error(Tok.getLoc(), "Unexpected identifier!"); 1741 } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) { 1742 return Error(IdentLoc, ErrMsg); 1743 } 1744 break; 1745 } 1746 case AsmToken::Integer: { 1747 // Look for 'b' or 'f' following an Integer as a directional label 1748 SMLoc Loc = getTok().getLoc(); 1749 int64_t IntVal = getTok().getIntVal(); 1750 End = consumeToken(); 1751 UpdateLocLex = false; 1752 if (getLexer().getKind() == AsmToken::Identifier) { 1753 StringRef IDVal = getTok().getString(); 1754 if (IDVal == "f" || IDVal == "b") { 1755 MCSymbol *Sym = 1756 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); 1757 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1758 const MCExpr *Val = 1759 MCSymbolRefExpr::create(Sym, Variant, getContext()); 1760 if (IDVal == "b" && Sym->isUndefined()) 1761 return Error(Loc, "invalid reference to undefined symbol"); 1762 StringRef Identifier = Sym->getName(); 1763 InlineAsmIdentifierInfo Info; 1764 if (SM.onIdentifierExpr(Val, Identifier, Info, isParsingMSInlineAsm(), 1765 ErrMsg)) 1766 return Error(Loc, ErrMsg); 1767 End = consumeToken(); 1768 } else { 1769 if (SM.onInteger(IntVal, ErrMsg)) 1770 return Error(Loc, ErrMsg); 1771 } 1772 } else { 1773 if (SM.onInteger(IntVal, ErrMsg)) 1774 return Error(Loc, ErrMsg); 1775 } 1776 break; 1777 } 1778 case AsmToken::Plus: 1779 if (SM.onPlus(ErrMsg)) 1780 return Error(getTok().getLoc(), ErrMsg); 1781 break; 1782 case AsmToken::Minus: 1783 if (SM.onMinus(ErrMsg)) 1784 return Error(getTok().getLoc(), ErrMsg); 1785 break; 1786 case AsmToken::Tilde: SM.onNot(); break; 1787 case AsmToken::Star: SM.onStar(); break; 1788 case AsmToken::Slash: SM.onDivide(); break; 1789 case AsmToken::Percent: SM.onMod(); break; 1790 case AsmToken::Pipe: SM.onOr(); break; 1791 case AsmToken::Caret: SM.onXor(); break; 1792 case AsmToken::Amp: SM.onAnd(); break; 1793 case AsmToken::LessLess: 1794 SM.onLShift(); break; 1795 case AsmToken::GreaterGreater: 1796 SM.onRShift(); break; 1797 case AsmToken::LBrac: 1798 if (SM.onLBrac()) 1799 return Error(Tok.getLoc(), "unexpected bracket encountered"); 1800 break; 1801 case AsmToken::RBrac: 1802 if (SM.onRBrac()) 1803 return Error(Tok.getLoc(), "unexpected bracket encountered"); 1804 break; 1805 case AsmToken::LParen: SM.onLParen(); break; 1806 case AsmToken::RParen: SM.onRParen(); break; 1807 } 1808 if (SM.hadError()) 1809 return Error(Tok.getLoc(), "unknown token in expression"); 1810 1811 if (!Done && UpdateLocLex) 1812 End = consumeToken(); 1813 1814 PrevTK = TK; 1815 } 1816 return false; 1817 } 1818 1819 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM, 1820 SMLoc Start, SMLoc End) { 1821 SMLoc Loc = Start; 1822 unsigned ExprLen = End.getPointer() - Start.getPointer(); 1823 // Skip everything before a symbol displacement (if we have one) 1824 if (SM.getSym() && !SM.isOffsetOperator()) { 1825 StringRef SymName = SM.getSymName(); 1826 if (unsigned Len = SymName.data() - Start.getPointer()) 1827 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len); 1828 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size()); 1829 ExprLen = End.getPointer() - (SymName.data() + SymName.size()); 1830 // If we have only a symbol than there's no need for complex rewrite, 1831 // simply skip everything after it 1832 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) { 1833 if (ExprLen) 1834 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen); 1835 return; 1836 } 1837 } 1838 // Build an Intel Expression rewrite 1839 StringRef BaseRegStr; 1840 StringRef IndexRegStr; 1841 StringRef OffsetNameStr; 1842 if (SM.getBaseReg()) 1843 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg()); 1844 if (SM.getIndexReg()) 1845 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg()); 1846 if (SM.isOffsetOperator()) 1847 OffsetNameStr = SM.getSymName(); 1848 // Emit it 1849 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr, 1850 SM.getImm(), SM.isMemExpr()); 1851 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr); 1852 } 1853 1854 // Inline assembly may use variable names with namespace alias qualifiers. 1855 bool X86AsmParser::ParseIntelInlineAsmIdentifier( 1856 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info, 1857 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) { 1858 MCAsmParser &Parser = getParser(); 1859 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly."); 1860 Val = nullptr; 1861 1862 StringRef LineBuf(Identifier.data()); 1863 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 1864 1865 const AsmToken &Tok = Parser.getTok(); 1866 SMLoc Loc = Tok.getLoc(); 1867 1868 // Advance the token stream until the end of the current token is 1869 // after the end of what the frontend claimed. 1870 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 1871 do { 1872 End = Tok.getEndLoc(); 1873 getLexer().Lex(); 1874 } while (End.getPointer() < EndPtr); 1875 Identifier = LineBuf; 1876 1877 // The frontend should end parsing on an assembler token boundary, unless it 1878 // failed parsing. 1879 assert((End.getPointer() == EndPtr || 1880 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) && 1881 "frontend claimed part of a token?"); 1882 1883 // If the identifier lookup was unsuccessful, assume that we are dealing with 1884 // a label. 1885 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) { 1886 StringRef InternalName = 1887 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), 1888 Loc, false); 1889 assert(InternalName.size() && "We should have an internal name here."); 1890 // Push a rewrite for replacing the identifier name with the internal name, 1891 // unless we are parsing the operand of an offset operator 1892 if (!IsParsingOffsetOperator) 1893 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), 1894 InternalName); 1895 else 1896 Identifier = InternalName; 1897 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 1898 return false; 1899 // Create the symbol reference. 1900 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); 1901 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 1902 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); 1903 return false; 1904 } 1905 1906 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand 1907 std::unique_ptr<X86Operand> 1908 X86AsmParser::ParseRoundingModeOp(SMLoc Start) { 1909 MCAsmParser &Parser = getParser(); 1910 const AsmToken &Tok = Parser.getTok(); 1911 // Eat "{" and mark the current place. 1912 const SMLoc consumedToken = consumeToken(); 1913 if (Tok.isNot(AsmToken::Identifier)) 1914 return ErrorOperand(Tok.getLoc(), "Expected an identifier after {"); 1915 if (Tok.getIdentifier().startswith("r")){ 1916 int rndMode = StringSwitch<int>(Tok.getIdentifier()) 1917 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) 1918 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF) 1919 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF) 1920 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO) 1921 .Default(-1); 1922 if (-1 == rndMode) 1923 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode."); 1924 Parser.Lex(); // Eat "r*" of r*-sae 1925 if (!getLexer().is(AsmToken::Minus)) 1926 return ErrorOperand(Tok.getLoc(), "Expected - at this point"); 1927 Parser.Lex(); // Eat "-" 1928 Parser.Lex(); // Eat the sae 1929 if (!getLexer().is(AsmToken::RCurly)) 1930 return ErrorOperand(Tok.getLoc(), "Expected } at this point"); 1931 SMLoc End = Tok.getEndLoc(); 1932 Parser.Lex(); // Eat "}" 1933 const MCExpr *RndModeOp = 1934 MCConstantExpr::create(rndMode, Parser.getContext()); 1935 return X86Operand::CreateImm(RndModeOp, Start, End); 1936 } 1937 if(Tok.getIdentifier().equals("sae")){ 1938 Parser.Lex(); // Eat the sae 1939 if (!getLexer().is(AsmToken::RCurly)) 1940 return ErrorOperand(Tok.getLoc(), "Expected } at this point"); 1941 Parser.Lex(); // Eat "}" 1942 return X86Operand::CreateToken("{sae}", consumedToken); 1943 } 1944 return ErrorOperand(Tok.getLoc(), "unknown token in expression"); 1945 } 1946 1947 /// Parse the '.' operator. 1948 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, 1949 SMLoc &End) { 1950 const AsmToken &Tok = getTok(); 1951 StringRef Type; 1952 unsigned Offset = 0; 1953 1954 // Drop the optional '.'. 1955 StringRef DotDispStr = Tok.getString(); 1956 if (DotDispStr.startswith(".")) 1957 DotDispStr = DotDispStr.drop_front(1); 1958 1959 // .Imm gets lexed as a real. 1960 if (Tok.is(AsmToken::Real)) { 1961 APInt DotDisp; 1962 DotDispStr.getAsInteger(10, DotDisp); 1963 Offset = DotDisp.getZExtValue(); 1964 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) && 1965 Tok.is(AsmToken::Identifier)) { 1966 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 1967 const StringRef Base = BaseMember.first, Member = BaseMember.second; 1968 if (getParser().lookUpField(SM.getType(), DotDispStr, Type, Offset) && 1969 getParser().lookUpField(SM.getSymName(), DotDispStr, Type, Offset) && 1970 getParser().lookUpField(DotDispStr, Type, Offset) && 1971 (!SemaCallback || 1972 SemaCallback->LookupInlineAsmField(Base, Member, Offset))) 1973 return Error(Tok.getLoc(), "Unable to lookup field reference!"); 1974 } else 1975 return Error(Tok.getLoc(), "Unexpected token type!"); 1976 1977 // Eat the DotExpression and update End 1978 End = SMLoc::getFromPointer(DotDispStr.data()); 1979 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size(); 1980 while (Tok.getLoc().getPointer() < DotExprEndLoc) 1981 Lex(); 1982 SM.addImm(Offset); 1983 SM.setType(Type); 1984 return false; 1985 } 1986 1987 /// Parse the 'offset' operator. 1988 /// This operator is used to specify the location of a given operand 1989 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, 1990 InlineAsmIdentifierInfo &Info, 1991 SMLoc &End) { 1992 // Eat offset, mark start of identifier. 1993 SMLoc Start = Lex().getLoc(); 1994 ID = getTok().getString(); 1995 if (!isParsingMSInlineAsm()) { 1996 if ((getTok().isNot(AsmToken::Identifier) && 1997 getTok().isNot(AsmToken::String)) || 1998 getParser().parsePrimaryExpr(Val, End)) 1999 return Error(Start, "unexpected token!"); 2000 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) { 2001 return Error(Start, "unable to lookup expression"); 2002 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) { 2003 return Error(Start, "offset operator cannot yet handle constants"); 2004 } 2005 return false; 2006 } 2007 2008 // Query a candidate string for being an Intel assembly operator 2009 // Report back its kind, or IOK_INVALID if does not evaluated as a known one 2010 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) { 2011 return StringSwitch<unsigned>(Name) 2012 .Cases("TYPE","type",IOK_TYPE) 2013 .Cases("SIZE","size",IOK_SIZE) 2014 .Cases("LENGTH","length",IOK_LENGTH) 2015 .Default(IOK_INVALID); 2016 } 2017 2018 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 2019 /// returns the number of elements in an array. It returns the value 1 for 2020 /// non-array variables. The SIZE operator returns the size of a C or C++ 2021 /// variable. A variable's size is the product of its LENGTH and TYPE. The 2022 /// TYPE operator returns the size of a C or C++ type or variable. If the 2023 /// variable is an array, TYPE returns the size of a single element. 2024 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { 2025 MCAsmParser &Parser = getParser(); 2026 const AsmToken &Tok = Parser.getTok(); 2027 Parser.Lex(); // Eat operator. 2028 2029 const MCExpr *Val = nullptr; 2030 InlineAsmIdentifierInfo Info; 2031 SMLoc Start = Tok.getLoc(), End; 2032 StringRef Identifier = Tok.getString(); 2033 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, 2034 /*Unevaluated=*/true, End)) 2035 return 0; 2036 2037 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 2038 Error(Start, "unable to lookup expression"); 2039 return 0; 2040 } 2041 2042 unsigned CVal = 0; 2043 switch(OpKind) { 2044 default: llvm_unreachable("Unexpected operand kind!"); 2045 case IOK_LENGTH: CVal = Info.Var.Length; break; 2046 case IOK_SIZE: CVal = Info.Var.Size; break; 2047 case IOK_TYPE: CVal = Info.Var.Type; break; 2048 } 2049 2050 return CVal; 2051 } 2052 2053 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { 2054 Size = StringSwitch<unsigned>(getTok().getString()) 2055 .Cases("BYTE", "byte", 8) 2056 .Cases("WORD", "word", 16) 2057 .Cases("DWORD", "dword", 32) 2058 .Cases("FLOAT", "float", 32) 2059 .Cases("LONG", "long", 32) 2060 .Cases("FWORD", "fword", 48) 2061 .Cases("DOUBLE", "double", 64) 2062 .Cases("QWORD", "qword", 64) 2063 .Cases("MMWORD","mmword", 64) 2064 .Cases("XWORD", "xword", 80) 2065 .Cases("TBYTE", "tbyte", 80) 2066 .Cases("XMMWORD", "xmmword", 128) 2067 .Cases("YMMWORD", "ymmword", 256) 2068 .Cases("ZMMWORD", "zmmword", 512) 2069 .Default(0); 2070 if (Size) { 2071 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word). 2072 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr"))) 2073 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); 2074 Lex(); // Eat ptr. 2075 } 2076 return false; 2077 } 2078 2079 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { 2080 MCAsmParser &Parser = getParser(); 2081 const AsmToken &Tok = Parser.getTok(); 2082 SMLoc Start, End; 2083 2084 // Parse optional Size directive. 2085 unsigned Size; 2086 if (ParseIntelMemoryOperandSize(Size)) 2087 return nullptr; 2088 bool PtrInOperand = bool(Size); 2089 2090 Start = Tok.getLoc(); 2091 2092 // Rounding mode operand. 2093 if (getLexer().is(AsmToken::LCurly)) 2094 return ParseRoundingModeOp(Start); 2095 2096 // Register operand. 2097 unsigned RegNo = 0; 2098 if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) { 2099 if (RegNo == X86::RIP) 2100 return ErrorOperand(Start, "rip can only be used as a base register"); 2101 // A Register followed by ':' is considered a segment override 2102 if (Tok.isNot(AsmToken::Colon)) 2103 return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) : 2104 ErrorOperand(Start, "expected memory operand after 'ptr', " 2105 "found register operand instead"); 2106 // An alleged segment override. check if we have a valid segment register 2107 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo)) 2108 return ErrorOperand(Start, "invalid segment register"); 2109 // Eat ':' and update Start location 2110 Start = Lex().getLoc(); 2111 } 2112 2113 // Immediates and Memory 2114 IntelExprStateMachine SM; 2115 if (ParseIntelExpression(SM, End)) 2116 return nullptr; 2117 2118 if (isParsingMSInlineAsm()) 2119 RewriteIntelExpression(SM, Start, Tok.getLoc()); 2120 2121 int64_t Imm = SM.getImm(); 2122 const MCExpr *Disp = SM.getSym(); 2123 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext()); 2124 if (Disp && Imm) 2125 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext()); 2126 if (!Disp) 2127 Disp = ImmDisp; 2128 2129 // RegNo != 0 specifies a valid segment register, 2130 // and we are parsing a segment override 2131 if (!SM.isMemExpr() && !RegNo) { 2132 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) { 2133 const InlineAsmIdentifierInfo Info = SM.getIdentifierInfo(); 2134 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 2135 // Disp includes the address of a variable; make sure this is recorded 2136 // for later handling. 2137 return X86Operand::CreateImm(Disp, Start, End, SM.getSymName(), 2138 Info.Var.Decl, Info.Var.IsGlobalLV); 2139 } 2140 } 2141 2142 return X86Operand::CreateImm(Disp, Start, End); 2143 } 2144 2145 StringRef ErrMsg; 2146 unsigned BaseReg = SM.getBaseReg(); 2147 unsigned IndexReg = SM.getIndexReg(); 2148 unsigned Scale = SM.getScale(); 2149 2150 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP && 2151 (IndexReg == X86::ESP || IndexReg == X86::RSP)) 2152 std::swap(BaseReg, IndexReg); 2153 2154 // If BaseReg is a vector register and IndexReg is not, swap them unless 2155 // Scale was specified in which case it would be an error. 2156 if (Scale == 0 && 2157 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 2158 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 2159 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) && 2160 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) || 2161 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) || 2162 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg))) 2163 std::swap(BaseReg, IndexReg); 2164 2165 if (Scale != 0 && 2166 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) 2167 return ErrorOperand(Start, "16-bit addresses cannot have a scale"); 2168 2169 // If there was no explicit scale specified, change it to 1. 2170 if (Scale == 0) 2171 Scale = 1; 2172 2173 // If this is a 16-bit addressing mode with the base and index in the wrong 2174 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is 2175 // shared with att syntax where order matters. 2176 if ((BaseReg == X86::SI || BaseReg == X86::DI) && 2177 (IndexReg == X86::BX || IndexReg == X86::BP)) 2178 std::swap(BaseReg, IndexReg); 2179 2180 if ((BaseReg || IndexReg) && 2181 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 2182 ErrMsg)) 2183 return ErrorOperand(Start, ErrMsg); 2184 if (isParsingMSInlineAsm()) 2185 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start, 2186 End, Size, SM.getSymName(), 2187 SM.getIdentifierInfo()); 2188 2189 // When parsing x64 MS-style assembly, all memory operands default to 2190 // RIP-relative when interpreted as non-absolute references. 2191 if (Parser.isParsingMasm() && is64BitMode()) 2192 return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp, BaseReg, 2193 IndexReg, Scale, Start, End, Size, 2194 /*DefaultBaseReg=*/X86::RIP); 2195 2196 if (!(BaseReg || IndexReg || RegNo)) 2197 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size); 2198 return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp, 2199 BaseReg, IndexReg, Scale, Start, End, Size); 2200 } 2201 2202 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() { 2203 MCAsmParser &Parser = getParser(); 2204 switch (getLexer().getKind()) { 2205 case AsmToken::Dollar: { 2206 // $42 or $ID -> immediate. 2207 SMLoc Start = Parser.getTok().getLoc(), End; 2208 Parser.Lex(); 2209 const MCExpr *Val; 2210 // This is an immediate, so we should not parse a register. Do a precheck 2211 // for '%' to supercede intra-register parse errors. 2212 SMLoc L = Parser.getTok().getLoc(); 2213 if (check(getLexer().is(AsmToken::Percent), L, 2214 "expected immediate expression") || 2215 getParser().parseExpression(Val, End) || 2216 check(isa<X86MCExpr>(Val), L, "expected immediate expression")) 2217 return nullptr; 2218 return X86Operand::CreateImm(Val, Start, End); 2219 } 2220 case AsmToken::LCurly: { 2221 SMLoc Start = Parser.getTok().getLoc(); 2222 return ParseRoundingModeOp(Start); 2223 } 2224 default: { 2225 // This a memory operand or a register. We have some parsing complications 2226 // as a '(' may be part of an immediate expression or the addressing mode 2227 // block. This is complicated by the fact that an assembler-level variable 2228 // may refer either to a register or an immediate expression. 2229 2230 SMLoc Loc = Parser.getTok().getLoc(), EndLoc; 2231 const MCExpr *Expr = nullptr; 2232 unsigned Reg = 0; 2233 if (getLexer().isNot(AsmToken::LParen)) { 2234 // No '(' so this is either a displacement expression or a register. 2235 if (Parser.parseExpression(Expr, EndLoc)) 2236 return nullptr; 2237 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) { 2238 // Segment Register. Reset Expr and copy value to register. 2239 Expr = nullptr; 2240 Reg = RE->getRegNo(); 2241 2242 // Sanity check register. 2243 if (Reg == X86::EIZ || Reg == X86::RIZ) 2244 return ErrorOperand( 2245 Loc, "%eiz and %riz can only be used as index registers", 2246 SMRange(Loc, EndLoc)); 2247 if (Reg == X86::RIP) 2248 return ErrorOperand(Loc, "%rip can only be used as a base register", 2249 SMRange(Loc, EndLoc)); 2250 // Return register that are not segment prefixes immediately. 2251 if (!Parser.parseOptionalToken(AsmToken::Colon)) 2252 return X86Operand::CreateReg(Reg, Loc, EndLoc); 2253 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg)) 2254 return ErrorOperand(Loc, "invalid segment register"); 2255 } 2256 } 2257 // This is a Memory operand. 2258 return ParseMemOperand(Reg, Expr, Loc, EndLoc); 2259 } 2260 } 2261 } 2262 2263 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic, 2264 // otherwise the EFLAGS Condition Code enumerator. 2265 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) { 2266 return StringSwitch<X86::CondCode>(CC) 2267 .Case("o", X86::COND_O) // Overflow 2268 .Case("no", X86::COND_NO) // No Overflow 2269 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal 2270 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below 2271 .Cases("e", "z", X86::COND_E) // Equal/Zero 2272 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero 2273 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above 2274 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal 2275 .Case("s", X86::COND_S) // Sign 2276 .Case("ns", X86::COND_NS) // No Sign 2277 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even 2278 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd 2279 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal 2280 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less 2281 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater 2282 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal 2283 .Default(X86::COND_INVALID); 2284 } 2285 2286 // true on failure, false otherwise 2287 // If no {z} mark was found - Parser doesn't advance 2288 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z, 2289 const SMLoc &StartLoc) { 2290 MCAsmParser &Parser = getParser(); 2291 // Assuming we are just pass the '{' mark, quering the next token 2292 // Searched for {z}, but none was found. Return false, as no parsing error was 2293 // encountered 2294 if (!(getLexer().is(AsmToken::Identifier) && 2295 (getLexer().getTok().getIdentifier() == "z"))) 2296 return false; 2297 Parser.Lex(); // Eat z 2298 // Query and eat the '}' mark 2299 if (!getLexer().is(AsmToken::RCurly)) 2300 return Error(getLexer().getLoc(), "Expected } at this point"); 2301 Parser.Lex(); // Eat '}' 2302 // Assign Z with the {z} mark opernad 2303 Z = X86Operand::CreateToken("{z}", StartLoc); 2304 return false; 2305 } 2306 2307 // true on failure, false otherwise 2308 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, 2309 const MCParsedAsmOperand &Op) { 2310 MCAsmParser &Parser = getParser(); 2311 if (getLexer().is(AsmToken::LCurly)) { 2312 // Eat "{" and mark the current place. 2313 const SMLoc consumedToken = consumeToken(); 2314 // Distinguish {1to<NUM>} from {%k<NUM>}. 2315 if(getLexer().is(AsmToken::Integer)) { 2316 // Parse memory broadcasting ({1to<NUM>}). 2317 if (getLexer().getTok().getIntVal() != 1) 2318 return TokError("Expected 1to<NUM> at this point"); 2319 Parser.Lex(); // Eat "1" of 1to8 2320 if (!getLexer().is(AsmToken::Identifier) || 2321 !getLexer().getTok().getIdentifier().startswith("to")) 2322 return TokError("Expected 1to<NUM> at this point"); 2323 // Recognize only reasonable suffixes. 2324 const char *BroadcastPrimitive = 2325 StringSwitch<const char*>(getLexer().getTok().getIdentifier()) 2326 .Case("to2", "{1to2}") 2327 .Case("to4", "{1to4}") 2328 .Case("to8", "{1to8}") 2329 .Case("to16", "{1to16}") 2330 .Default(nullptr); 2331 if (!BroadcastPrimitive) 2332 return TokError("Invalid memory broadcast primitive."); 2333 Parser.Lex(); // Eat "toN" of 1toN 2334 if (!getLexer().is(AsmToken::RCurly)) 2335 return TokError("Expected } at this point"); 2336 Parser.Lex(); // Eat "}" 2337 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, 2338 consumedToken)); 2339 // No AVX512 specific primitives can pass 2340 // after memory broadcasting, so return. 2341 return false; 2342 } else { 2343 // Parse either {k}{z}, {z}{k}, {k} or {z} 2344 // last one have no meaning, but GCC accepts it 2345 // Currently, we're just pass a '{' mark 2346 std::unique_ptr<X86Operand> Z; 2347 if (ParseZ(Z, consumedToken)) 2348 return true; 2349 // Reaching here means that parsing of the allegadly '{z}' mark yielded 2350 // no errors. 2351 // Query for the need of further parsing for a {%k<NUM>} mark 2352 if (!Z || getLexer().is(AsmToken::LCurly)) { 2353 SMLoc StartLoc = Z ? consumeToken() : consumedToken; 2354 // Parse an op-mask register mark ({%k<NUM>}), which is now to be 2355 // expected 2356 unsigned RegNo; 2357 SMLoc RegLoc; 2358 if (!ParseRegister(RegNo, RegLoc, StartLoc) && 2359 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) { 2360 if (RegNo == X86::K0) 2361 return Error(RegLoc, "Register k0 can't be used as write mask"); 2362 if (!getLexer().is(AsmToken::RCurly)) 2363 return Error(getLexer().getLoc(), "Expected } at this point"); 2364 Operands.push_back(X86Operand::CreateToken("{", StartLoc)); 2365 Operands.push_back( 2366 X86Operand::CreateReg(RegNo, StartLoc, StartLoc)); 2367 Operands.push_back(X86Operand::CreateToken("}", consumeToken())); 2368 } else 2369 return Error(getLexer().getLoc(), 2370 "Expected an op-mask register at this point"); 2371 // {%k<NUM>} mark is found, inquire for {z} 2372 if (getLexer().is(AsmToken::LCurly) && !Z) { 2373 // Have we've found a parsing error, or found no (expected) {z} mark 2374 // - report an error 2375 if (ParseZ(Z, consumeToken()) || !Z) 2376 return Error(getLexer().getLoc(), 2377 "Expected a {z} mark at this point"); 2378 2379 } 2380 // '{z}' on its own is meaningless, hence should be ignored. 2381 // on the contrary - have it been accompanied by a K register, 2382 // allow it. 2383 if (Z) 2384 Operands.push_back(std::move(Z)); 2385 } 2386 } 2387 } 2388 return false; 2389 } 2390 2391 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix 2392 /// has already been parsed if present. disp may be provided as well. 2393 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg, 2394 const MCExpr *&Disp, 2395 const SMLoc &StartLoc, 2396 SMLoc &EndLoc) { 2397 MCAsmParser &Parser = getParser(); 2398 SMLoc Loc; 2399 // Based on the initial passed values, we may be in any of these cases, we are 2400 // in one of these cases (with current position (*)): 2401 2402 // 1. seg : * disp (base-index-scale-expr) 2403 // 2. seg : *(disp) (base-index-scale-expr) 2404 // 3. seg : *(base-index-scale-expr) 2405 // 4. disp *(base-index-scale-expr) 2406 // 5. *(disp) (base-index-scale-expr) 2407 // 6. *(base-index-scale-expr) 2408 // 7. disp * 2409 // 8. *(disp) 2410 2411 // If we do not have an displacement yet, check if we're in cases 4 or 6 by 2412 // checking if the first object after the parenthesis is a register (or an 2413 // identifier referring to a register) and parse the displacement or default 2414 // to 0 as appropriate. 2415 auto isAtMemOperand = [this]() { 2416 if (this->getLexer().isNot(AsmToken::LParen)) 2417 return false; 2418 AsmToken Buf[2]; 2419 StringRef Id; 2420 auto TokCount = this->getLexer().peekTokens(Buf, true); 2421 if (TokCount == 0) 2422 return false; 2423 switch (Buf[0].getKind()) { 2424 case AsmToken::Percent: 2425 case AsmToken::Comma: 2426 return true; 2427 // These lower cases are doing a peekIdentifier. 2428 case AsmToken::At: 2429 case AsmToken::Dollar: 2430 if ((TokCount > 1) && 2431 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) && 2432 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer())) 2433 Id = StringRef(Buf[0].getLoc().getPointer(), 2434 Buf[1].getIdentifier().size() + 1); 2435 break; 2436 case AsmToken::Identifier: 2437 case AsmToken::String: 2438 Id = Buf[0].getIdentifier(); 2439 break; 2440 default: 2441 return false; 2442 } 2443 // We have an ID. Check if it is bound to a register. 2444 if (!Id.empty()) { 2445 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id); 2446 if (Sym->isVariable()) { 2447 auto V = Sym->getVariableValue(/*SetUsed*/ false); 2448 return isa<X86MCExpr>(V); 2449 } 2450 } 2451 return false; 2452 }; 2453 2454 if (!Disp) { 2455 // Parse immediate if we're not at a mem operand yet. 2456 if (!isAtMemOperand()) { 2457 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc)) 2458 return nullptr; 2459 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here."); 2460 } else { 2461 // Disp is implicitly zero if we haven't parsed it yet. 2462 Disp = MCConstantExpr::create(0, Parser.getContext()); 2463 } 2464 } 2465 2466 // We are now either at the end of the operand or at the '(' at the start of a 2467 // base-index-scale-expr. 2468 2469 if (!parseOptionalToken(AsmToken::LParen)) { 2470 if (SegReg == 0) 2471 return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc); 2472 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1, 2473 StartLoc, EndLoc); 2474 } 2475 2476 // If we reached here, then eat the '(' and Process 2477 // the rest of the memory operand. 2478 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 2479 SMLoc BaseLoc = getLexer().getLoc(); 2480 const MCExpr *E; 2481 StringRef ErrMsg; 2482 2483 // Parse BaseReg if one is provided. 2484 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) { 2485 if (Parser.parseExpression(E, EndLoc) || 2486 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here")) 2487 return nullptr; 2488 2489 // Sanity check register. 2490 BaseReg = cast<X86MCExpr>(E)->getRegNo(); 2491 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) 2492 return ErrorOperand(BaseLoc, 2493 "eiz and riz can only be used as index registers", 2494 SMRange(BaseLoc, EndLoc)); 2495 } 2496 2497 if (parseOptionalToken(AsmToken::Comma)) { 2498 // Following the comma we should have either an index register, or a scale 2499 // value. We don't support the later form, but we want to parse it 2500 // correctly. 2501 // 2502 // Even though it would be completely consistent to support syntax like 2503 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 2504 if (getLexer().isNot(AsmToken::RParen)) { 2505 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc)) 2506 return nullptr; 2507 2508 if (!isa<X86MCExpr>(E)) { 2509 // We've parsed an unexpected Scale Value instead of an index 2510 // register. Interpret it as an absolute. 2511 int64_t ScaleVal; 2512 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr())) 2513 return ErrorOperand(Loc, "expected absolute expression"); 2514 if (ScaleVal != 1) 2515 Warning(Loc, "scale factor without index register is ignored"); 2516 Scale = 1; 2517 } else { // IndexReg Found. 2518 IndexReg = cast<X86MCExpr>(E)->getRegNo(); 2519 2520 if (BaseReg == X86::RIP) 2521 return ErrorOperand( 2522 Loc, "%rip as base register can not have an index register"); 2523 if (IndexReg == X86::RIP) 2524 return ErrorOperand(Loc, "%rip is not allowed as an index register"); 2525 2526 if (parseOptionalToken(AsmToken::Comma)) { 2527 // Parse the scale amount: 2528 // ::= ',' [scale-expression] 2529 2530 // A scale amount without an index is ignored. 2531 if (getLexer().isNot(AsmToken::RParen)) { 2532 int64_t ScaleVal; 2533 if (Parser.parseTokenLoc(Loc) || 2534 Parser.parseAbsoluteExpression(ScaleVal)) 2535 return ErrorOperand(Loc, "expected scale expression"); 2536 Scale = (unsigned)ScaleVal; 2537 // Validate the scale amount. 2538 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 2539 Scale != 1) 2540 return ErrorOperand(Loc, 2541 "scale factor in 16-bit address must be 1"); 2542 if (checkScale(Scale, ErrMsg)) 2543 return ErrorOperand(Loc, ErrMsg); 2544 } 2545 } 2546 } 2547 } 2548 } 2549 2550 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 2551 if (parseToken(AsmToken::RParen, "unexpected token in memory operand")) 2552 return nullptr; 2553 2554 // This is to support otherwise illegal operand (%dx) found in various 2555 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now 2556 // be supported. Mark such DX variants separately fix only in special cases. 2557 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 && 2558 isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0) 2559 return X86Operand::CreateDXReg(BaseLoc, BaseLoc); 2560 2561 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 2562 ErrMsg)) 2563 return ErrorOperand(BaseLoc, ErrMsg); 2564 2565 if (SegReg || BaseReg || IndexReg) 2566 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, 2567 IndexReg, Scale, StartLoc, EndLoc); 2568 return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc); 2569 } 2570 2571 // Parse either a standard primary expression or a register. 2572 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 2573 MCAsmParser &Parser = getParser(); 2574 // See if this is a register first. 2575 if (getTok().is(AsmToken::Percent) || 2576 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) && 2577 MatchRegisterName(Parser.getTok().getString()))) { 2578 SMLoc StartLoc = Parser.getTok().getLoc(); 2579 unsigned RegNo; 2580 if (ParseRegister(RegNo, StartLoc, EndLoc)) 2581 return true; 2582 Res = X86MCExpr::create(RegNo, Parser.getContext()); 2583 return false; 2584 } 2585 return Parser.parsePrimaryExpr(Res, EndLoc); 2586 } 2587 2588 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 2589 SMLoc NameLoc, OperandVector &Operands) { 2590 MCAsmParser &Parser = getParser(); 2591 InstInfo = &Info; 2592 2593 // Reset the forced VEX encoding. 2594 ForcedVEXEncoding = VEXEncoding_Default; 2595 2596 // Parse pseudo prefixes. 2597 while (1) { 2598 if (Name == "{") { 2599 if (getLexer().isNot(AsmToken::Identifier)) 2600 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'"); 2601 std::string Prefix = Parser.getTok().getString().lower(); 2602 Parser.Lex(); // Eat identifier. 2603 if (getLexer().isNot(AsmToken::RCurly)) 2604 return Error(Parser.getTok().getLoc(), "Expected '}'"); 2605 Parser.Lex(); // Eat curly. 2606 2607 if (Prefix == "vex" || Prefix == "vex2") 2608 ForcedVEXEncoding = VEXEncoding_VEX; 2609 else if (Prefix == "vex3") 2610 ForcedVEXEncoding = VEXEncoding_VEX3; 2611 else if (Prefix == "evex") 2612 ForcedVEXEncoding = VEXEncoding_EVEX; 2613 else 2614 return Error(NameLoc, "unknown prefix"); 2615 2616 NameLoc = Parser.getTok().getLoc(); 2617 if (getLexer().is(AsmToken::LCurly)) { 2618 Parser.Lex(); 2619 Name = "{"; 2620 } else { 2621 if (getLexer().isNot(AsmToken::Identifier)) 2622 return Error(Parser.getTok().getLoc(), "Expected identifier"); 2623 // FIXME: The mnemonic won't match correctly if its not in lower case. 2624 Name = Parser.getTok().getString(); 2625 Parser.Lex(); 2626 } 2627 continue; 2628 } 2629 2630 break; 2631 } 2632 2633 StringRef PatchedName = Name; 2634 2635 // Hack to skip "short" following Jcc. 2636 if (isParsingIntelSyntax() && 2637 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" || 2638 PatchedName == "jcxz" || PatchedName == "jexcz" || 2639 (PatchedName.startswith("j") && 2640 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) { 2641 StringRef NextTok = Parser.getTok().getString(); 2642 if (NextTok == "short") { 2643 SMLoc NameEndLoc = 2644 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size()); 2645 // Eat the short keyword. 2646 Parser.Lex(); 2647 // MS and GAS ignore the short keyword; they both determine the jmp type 2648 // based on the distance of the label. (NASM does emit different code with 2649 // and without "short," though.) 2650 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc, 2651 NextTok.size() + 1); 2652 } 2653 } 2654 2655 // FIXME: Hack to recognize setneb as setne. 2656 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 2657 PatchedName != "setb" && PatchedName != "setnb") 2658 PatchedName = PatchedName.substr(0, Name.size()-1); 2659 2660 unsigned ComparisonPredicate = ~0U; 2661 2662 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. 2663 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 2664 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 2665 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 2666 bool IsVCMP = PatchedName[0] == 'v'; 2667 unsigned CCIdx = IsVCMP ? 4 : 3; 2668 unsigned CC = StringSwitch<unsigned>( 2669 PatchedName.slice(CCIdx, PatchedName.size() - 2)) 2670 .Case("eq", 0x00) 2671 .Case("eq_oq", 0x00) 2672 .Case("lt", 0x01) 2673 .Case("lt_os", 0x01) 2674 .Case("le", 0x02) 2675 .Case("le_os", 0x02) 2676 .Case("unord", 0x03) 2677 .Case("unord_q", 0x03) 2678 .Case("neq", 0x04) 2679 .Case("neq_uq", 0x04) 2680 .Case("nlt", 0x05) 2681 .Case("nlt_us", 0x05) 2682 .Case("nle", 0x06) 2683 .Case("nle_us", 0x06) 2684 .Case("ord", 0x07) 2685 .Case("ord_q", 0x07) 2686 /* AVX only from here */ 2687 .Case("eq_uq", 0x08) 2688 .Case("nge", 0x09) 2689 .Case("nge_us", 0x09) 2690 .Case("ngt", 0x0A) 2691 .Case("ngt_us", 0x0A) 2692 .Case("false", 0x0B) 2693 .Case("false_oq", 0x0B) 2694 .Case("neq_oq", 0x0C) 2695 .Case("ge", 0x0D) 2696 .Case("ge_os", 0x0D) 2697 .Case("gt", 0x0E) 2698 .Case("gt_os", 0x0E) 2699 .Case("true", 0x0F) 2700 .Case("true_uq", 0x0F) 2701 .Case("eq_os", 0x10) 2702 .Case("lt_oq", 0x11) 2703 .Case("le_oq", 0x12) 2704 .Case("unord_s", 0x13) 2705 .Case("neq_us", 0x14) 2706 .Case("nlt_uq", 0x15) 2707 .Case("nle_uq", 0x16) 2708 .Case("ord_s", 0x17) 2709 .Case("eq_us", 0x18) 2710 .Case("nge_uq", 0x19) 2711 .Case("ngt_uq", 0x1A) 2712 .Case("false_os", 0x1B) 2713 .Case("neq_os", 0x1C) 2714 .Case("ge_oq", 0x1D) 2715 .Case("gt_oq", 0x1E) 2716 .Case("true_us", 0x1F) 2717 .Default(~0U); 2718 if (CC != ~0U && (IsVCMP || CC < 8)) { 2719 if (PatchedName.endswith("ss")) 2720 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 2721 else if (PatchedName.endswith("sd")) 2722 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 2723 else if (PatchedName.endswith("ps")) 2724 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 2725 else if (PatchedName.endswith("pd")) 2726 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 2727 else 2728 llvm_unreachable("Unexpected suffix!"); 2729 2730 ComparisonPredicate = CC; 2731 } 2732 } 2733 2734 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}. 2735 if (PatchedName.startswith("vpcmp") && 2736 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 2737 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 2738 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 2739 unsigned CC = StringSwitch<unsigned>( 2740 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 2741 .Case("eq", 0x0) // Only allowed on unsigned. Checked below. 2742 .Case("lt", 0x1) 2743 .Case("le", 0x2) 2744 //.Case("false", 0x3) // Not a documented alias. 2745 .Case("neq", 0x4) 2746 .Case("nlt", 0x5) 2747 .Case("nle", 0x6) 2748 //.Case("true", 0x7) // Not a documented alias. 2749 .Default(~0U); 2750 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) { 2751 switch (PatchedName.back()) { 2752 default: llvm_unreachable("Unexpected character!"); 2753 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break; 2754 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break; 2755 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break; 2756 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break; 2757 } 2758 // Set up the immediate to push into the operands later. 2759 ComparisonPredicate = CC; 2760 } 2761 } 2762 2763 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}. 2764 if (PatchedName.startswith("vpcom") && 2765 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 2766 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 2767 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 2768 unsigned CC = StringSwitch<unsigned>( 2769 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 2770 .Case("lt", 0x0) 2771 .Case("le", 0x1) 2772 .Case("gt", 0x2) 2773 .Case("ge", 0x3) 2774 .Case("eq", 0x4) 2775 .Case("neq", 0x5) 2776 .Case("false", 0x6) 2777 .Case("true", 0x7) 2778 .Default(~0U); 2779 if (CC != ~0U) { 2780 switch (PatchedName.back()) { 2781 default: llvm_unreachable("Unexpected character!"); 2782 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break; 2783 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break; 2784 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break; 2785 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break; 2786 } 2787 // Set up the immediate to push into the operands later. 2788 ComparisonPredicate = CC; 2789 } 2790 } 2791 2792 2793 // Determine whether this is an instruction prefix. 2794 // FIXME: 2795 // Enhance prefixes integrity robustness. for example, following forms 2796 // are currently tolerated: 2797 // repz repnz <insn> ; GAS errors for the use of two similar prefixes 2798 // lock addq %rax, %rbx ; Destination operand must be of memory type 2799 // xacquire <insn> ; xacquire must be accompanied by 'lock' 2800 bool isPrefix = StringSwitch<bool>(Name) 2801 .Cases("rex64", "data32", "data16", true) 2802 .Cases("xacquire", "xrelease", true) 2803 .Cases("acquire", "release", isParsingIntelSyntax()) 2804 .Default(false); 2805 2806 auto isLockRepeatNtPrefix = [](StringRef N) { 2807 return StringSwitch<bool>(N) 2808 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true) 2809 .Default(false); 2810 }; 2811 2812 bool CurlyAsEndOfStatement = false; 2813 2814 unsigned Flags = X86::IP_NO_PREFIX; 2815 while (isLockRepeatNtPrefix(Name.lower())) { 2816 unsigned Prefix = 2817 StringSwitch<unsigned>(Name) 2818 .Cases("lock", "lock", X86::IP_HAS_LOCK) 2819 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT) 2820 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) 2821 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK) 2822 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) 2823 Flags |= Prefix; 2824 if (getLexer().is(AsmToken::EndOfStatement)) { 2825 // We don't have real instr with the given prefix 2826 // let's use the prefix as the instr. 2827 // TODO: there could be several prefixes one after another 2828 Flags = X86::IP_NO_PREFIX; 2829 break; 2830 } 2831 // FIXME: The mnemonic won't match correctly if its not in lower case. 2832 Name = Parser.getTok().getString(); 2833 Parser.Lex(); // eat the prefix 2834 // Hack: we could have something like "rep # some comment" or 2835 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" 2836 while (Name.startswith(";") || Name.startswith("\n") || 2837 Name.startswith("#") || Name.startswith("\t") || 2838 Name.startswith("/")) { 2839 // FIXME: The mnemonic won't match correctly if its not in lower case. 2840 Name = Parser.getTok().getString(); 2841 Parser.Lex(); // go to next prefix or instr 2842 } 2843 } 2844 2845 if (Flags) 2846 PatchedName = Name; 2847 2848 // Hacks to handle 'data16' and 'data32' 2849 if (PatchedName == "data16" && is16BitMode()) { 2850 return Error(NameLoc, "redundant data16 prefix"); 2851 } 2852 if (PatchedName == "data32") { 2853 if (is32BitMode()) 2854 return Error(NameLoc, "redundant data32 prefix"); 2855 if (is64BitMode()) 2856 return Error(NameLoc, "'data32' is not supported in 64-bit mode"); 2857 // Hack to 'data16' for the table lookup. 2858 PatchedName = "data16"; 2859 } 2860 2861 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 2862 2863 // Push the immediate if we extracted one from the mnemonic. 2864 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) { 2865 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 2866 getParser().getContext()); 2867 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2868 } 2869 2870 // This does the actual operand parsing. Don't parse any more if we have a 2871 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 2872 // just want to parse the "lock" as the first instruction and the "incl" as 2873 // the next one. 2874 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { 2875 // Parse '*' modifier. 2876 if (getLexer().is(AsmToken::Star)) 2877 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 2878 2879 // Read the operands. 2880 while(1) { 2881 if (std::unique_ptr<X86Operand> Op = ParseOperand()) { 2882 Operands.push_back(std::move(Op)); 2883 if (HandleAVX512Operand(Operands, *Operands.back())) 2884 return true; 2885 } else { 2886 return true; 2887 } 2888 // check for comma and eat it 2889 if (getLexer().is(AsmToken::Comma)) 2890 Parser.Lex(); 2891 else 2892 break; 2893 } 2894 2895 // In MS inline asm curly braces mark the beginning/end of a block, 2896 // therefore they should be interepreted as end of statement 2897 CurlyAsEndOfStatement = 2898 isParsingIntelSyntax() && isParsingMSInlineAsm() && 2899 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); 2900 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement) 2901 return TokError("unexpected token in argument list"); 2902 } 2903 2904 // Push the immediate if we extracted one from the mnemonic. 2905 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) { 2906 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 2907 getParser().getContext()); 2908 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 2909 } 2910 2911 // Consume the EndOfStatement or the prefix separator Slash 2912 if (getLexer().is(AsmToken::EndOfStatement) || 2913 (isPrefix && getLexer().is(AsmToken::Slash))) 2914 Parser.Lex(); 2915 else if (CurlyAsEndOfStatement) 2916 // Add an actual EndOfStatement before the curly brace 2917 Info.AsmRewrites->emplace_back(AOK_EndOfStatement, 2918 getLexer().getTok().getLoc(), 0); 2919 2920 // This is for gas compatibility and cannot be done in td. 2921 // Adding "p" for some floating point with no argument. 2922 // For example: fsub --> fsubp 2923 bool IsFp = 2924 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr"; 2925 if (IsFp && Operands.size() == 1) { 2926 const char *Repl = StringSwitch<const char *>(Name) 2927 .Case("fsub", "fsubp") 2928 .Case("fdiv", "fdivp") 2929 .Case("fsubr", "fsubrp") 2930 .Case("fdivr", "fdivrp"); 2931 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl); 2932 } 2933 2934 if ((Name == "mov" || Name == "movw" || Name == "movl") && 2935 (Operands.size() == 3)) { 2936 X86Operand &Op1 = (X86Operand &)*Operands[1]; 2937 X86Operand &Op2 = (X86Operand &)*Operands[2]; 2938 SMLoc Loc = Op1.getEndLoc(); 2939 // Moving a 32 or 16 bit value into a segment register has the same 2940 // behavior. Modify such instructions to always take shorter form. 2941 if (Op1.isReg() && Op2.isReg() && 2942 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains( 2943 Op2.getReg()) && 2944 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) || 2945 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) { 2946 // Change instruction name to match new instruction. 2947 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) { 2948 Name = is16BitMode() ? "movw" : "movl"; 2949 Operands[0] = X86Operand::CreateToken(Name, NameLoc); 2950 } 2951 // Select the correct equivalent 16-/32-bit source register. 2952 unsigned Reg = 2953 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32); 2954 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc); 2955 } 2956 } 2957 2958 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" -> 2959 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 2960 // documented form in various unofficial manuals, so a lot of code uses it. 2961 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" || 2962 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") && 2963 Operands.size() == 3) { 2964 X86Operand &Op = (X86Operand &)*Operands.back(); 2965 if (Op.isDXReg()) 2966 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 2967 Op.getEndLoc()); 2968 } 2969 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al". 2970 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" || 2971 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") && 2972 Operands.size() == 3) { 2973 X86Operand &Op = (X86Operand &)*Operands[1]; 2974 if (Op.isDXReg()) 2975 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 2976 Op.getEndLoc()); 2977 } 2978 2979 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands; 2980 bool HadVerifyError = false; 2981 2982 // Append default arguments to "ins[bwld]" 2983 if (Name.startswith("ins") && 2984 (Operands.size() == 1 || Operands.size() == 3) && 2985 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" || 2986 Name == "ins")) { 2987 2988 AddDefaultSrcDestOperands(TmpOperands, 2989 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), 2990 DefaultMemDIOperand(NameLoc)); 2991 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 2992 } 2993 2994 // Append default arguments to "outs[bwld]" 2995 if (Name.startswith("outs") && 2996 (Operands.size() == 1 || Operands.size() == 3) && 2997 (Name == "outsb" || Name == "outsw" || Name == "outsl" || 2998 Name == "outsd" || Name == "outs")) { 2999 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 3000 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 3001 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3002 } 3003 3004 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate 3005 // values of $SIREG according to the mode. It would be nice if this 3006 // could be achieved with InstAlias in the tables. 3007 if (Name.startswith("lods") && 3008 (Operands.size() == 1 || Operands.size() == 2) && 3009 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 3010 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) { 3011 TmpOperands.push_back(DefaultMemSIOperand(NameLoc)); 3012 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3013 } 3014 3015 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate 3016 // values of $DIREG according to the mode. It would be nice if this 3017 // could be achieved with InstAlias in the tables. 3018 if (Name.startswith("stos") && 3019 (Operands.size() == 1 || Operands.size() == 2) && 3020 (Name == "stos" || Name == "stosb" || Name == "stosw" || 3021 Name == "stosl" || Name == "stosd" || Name == "stosq")) { 3022 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 3023 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3024 } 3025 3026 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate 3027 // values of $DIREG according to the mode. It would be nice if this 3028 // could be achieved with InstAlias in the tables. 3029 if (Name.startswith("scas") && 3030 (Operands.size() == 1 || Operands.size() == 2) && 3031 (Name == "scas" || Name == "scasb" || Name == "scasw" || 3032 Name == "scasl" || Name == "scasd" || Name == "scasq")) { 3033 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 3034 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3035 } 3036 3037 // Add default SI and DI operands to "cmps[bwlq]". 3038 if (Name.startswith("cmps") && 3039 (Operands.size() == 1 || Operands.size() == 3) && 3040 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || 3041 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { 3042 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc), 3043 DefaultMemSIOperand(NameLoc)); 3044 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3045 } 3046 3047 // Add default SI and DI operands to "movs[bwlq]". 3048 if (((Name.startswith("movs") && 3049 (Name == "movs" || Name == "movsb" || Name == "movsw" || 3050 Name == "movsl" || Name == "movsd" || Name == "movsq")) || 3051 (Name.startswith("smov") && 3052 (Name == "smov" || Name == "smovb" || Name == "smovw" || 3053 Name == "smovl" || Name == "smovd" || Name == "smovq"))) && 3054 (Operands.size() == 1 || Operands.size() == 3)) { 3055 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax()) 3056 Operands.back() = X86Operand::CreateToken("movsl", NameLoc); 3057 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 3058 DefaultMemDIOperand(NameLoc)); 3059 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3060 } 3061 3062 // Check if we encountered an error for one the string insturctions 3063 if (HadVerifyError) { 3064 return HadVerifyError; 3065 } 3066 3067 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to 3068 // "shift <op>". 3069 if ((Name.startswith("shr") || Name.startswith("sar") || 3070 Name.startswith("shl") || Name.startswith("sal") || 3071 Name.startswith("rcl") || Name.startswith("rcr") || 3072 Name.startswith("rol") || Name.startswith("ror")) && 3073 Operands.size() == 3) { 3074 if (isParsingIntelSyntax()) { 3075 // Intel syntax 3076 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]); 3077 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 3078 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 3079 Operands.pop_back(); 3080 } else { 3081 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 3082 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && 3083 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) 3084 Operands.erase(Operands.begin() + 1); 3085 } 3086 } 3087 3088 // Transforms "int $3" into "int3" as a size optimization. We can't write an 3089 // instalias with an immediate operand yet. 3090 if (Name == "int" && Operands.size() == 2) { 3091 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 3092 if (Op1.isImm()) 3093 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm())) 3094 if (CE->getValue() == 3) { 3095 Operands.erase(Operands.begin() + 1); 3096 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3"); 3097 } 3098 } 3099 3100 // Transforms "xlat mem8" into "xlatb" 3101 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) { 3102 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 3103 if (Op1.isMem8()) { 3104 Warning(Op1.getStartLoc(), "memory operand is only for determining the " 3105 "size, (R|E)BX will be used for the location"); 3106 Operands.pop_back(); 3107 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb"); 3108 } 3109 } 3110 3111 if (Flags) 3112 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc)); 3113 return false; 3114 } 3115 3116 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { 3117 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 3118 3119 switch (Inst.getOpcode()) { 3120 default: return false; 3121 case X86::VMOVZPQILo2PQIrr: 3122 case X86::VMOVAPDrr: 3123 case X86::VMOVAPDYrr: 3124 case X86::VMOVAPSrr: 3125 case X86::VMOVAPSYrr: 3126 case X86::VMOVDQArr: 3127 case X86::VMOVDQAYrr: 3128 case X86::VMOVDQUrr: 3129 case X86::VMOVDQUYrr: 3130 case X86::VMOVUPDrr: 3131 case X86::VMOVUPDYrr: 3132 case X86::VMOVUPSrr: 3133 case X86::VMOVUPSYrr: { 3134 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of 3135 // the registers is extended, but other isn't. 3136 if (ForcedVEXEncoding == VEXEncoding_VEX3 || 3137 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 || 3138 MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8) 3139 return false; 3140 3141 unsigned NewOpc; 3142 switch (Inst.getOpcode()) { 3143 default: llvm_unreachable("Invalid opcode"); 3144 case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break; 3145 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; 3146 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; 3147 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; 3148 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; 3149 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; 3150 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; 3151 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; 3152 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; 3153 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; 3154 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; 3155 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; 3156 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; 3157 } 3158 Inst.setOpcode(NewOpc); 3159 return true; 3160 } 3161 case X86::VMOVSDrr: 3162 case X86::VMOVSSrr: { 3163 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of 3164 // the registers is extended, but other isn't. 3165 if (ForcedVEXEncoding == VEXEncoding_VEX3 || 3166 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 || 3167 MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8) 3168 return false; 3169 3170 unsigned NewOpc; 3171 switch (Inst.getOpcode()) { 3172 default: llvm_unreachable("Invalid opcode"); 3173 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; 3174 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; 3175 } 3176 Inst.setOpcode(NewOpc); 3177 return true; 3178 } 3179 } 3180 } 3181 3182 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { 3183 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 3184 3185 switch (Inst.getOpcode()) { 3186 case X86::VGATHERDPDYrm: 3187 case X86::VGATHERDPDrm: 3188 case X86::VGATHERDPSYrm: 3189 case X86::VGATHERDPSrm: 3190 case X86::VGATHERQPDYrm: 3191 case X86::VGATHERQPDrm: 3192 case X86::VGATHERQPSYrm: 3193 case X86::VGATHERQPSrm: 3194 case X86::VPGATHERDDYrm: 3195 case X86::VPGATHERDDrm: 3196 case X86::VPGATHERDQYrm: 3197 case X86::VPGATHERDQrm: 3198 case X86::VPGATHERQDYrm: 3199 case X86::VPGATHERQDrm: 3200 case X86::VPGATHERQQYrm: 3201 case X86::VPGATHERQQrm: { 3202 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 3203 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg()); 3204 unsigned Index = 3205 MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg()); 3206 if (Dest == Mask || Dest == Index || Mask == Index) 3207 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination " 3208 "registers should be distinct"); 3209 break; 3210 } 3211 case X86::VGATHERDPDZ128rm: 3212 case X86::VGATHERDPDZ256rm: 3213 case X86::VGATHERDPDZrm: 3214 case X86::VGATHERDPSZ128rm: 3215 case X86::VGATHERDPSZ256rm: 3216 case X86::VGATHERDPSZrm: 3217 case X86::VGATHERQPDZ128rm: 3218 case X86::VGATHERQPDZ256rm: 3219 case X86::VGATHERQPDZrm: 3220 case X86::VGATHERQPSZ128rm: 3221 case X86::VGATHERQPSZ256rm: 3222 case X86::VGATHERQPSZrm: 3223 case X86::VPGATHERDDZ128rm: 3224 case X86::VPGATHERDDZ256rm: 3225 case X86::VPGATHERDDZrm: 3226 case X86::VPGATHERDQZ128rm: 3227 case X86::VPGATHERDQZ256rm: 3228 case X86::VPGATHERDQZrm: 3229 case X86::VPGATHERQDZ128rm: 3230 case X86::VPGATHERQDZ256rm: 3231 case X86::VPGATHERQDZrm: 3232 case X86::VPGATHERQQZ128rm: 3233 case X86::VPGATHERQQZ256rm: 3234 case X86::VPGATHERQQZrm: { 3235 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 3236 unsigned Index = 3237 MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg()); 3238 if (Dest == Index) 3239 return Warning(Ops[0]->getStartLoc(), "index and destination registers " 3240 "should be distinct"); 3241 break; 3242 } 3243 case X86::V4FMADDPSrm: 3244 case X86::V4FMADDPSrmk: 3245 case X86::V4FMADDPSrmkz: 3246 case X86::V4FMADDSSrm: 3247 case X86::V4FMADDSSrmk: 3248 case X86::V4FMADDSSrmkz: 3249 case X86::V4FNMADDPSrm: 3250 case X86::V4FNMADDPSrmk: 3251 case X86::V4FNMADDPSrmkz: 3252 case X86::V4FNMADDSSrm: 3253 case X86::V4FNMADDSSrmk: 3254 case X86::V4FNMADDSSrmkz: 3255 case X86::VP4DPWSSDSrm: 3256 case X86::VP4DPWSSDSrmk: 3257 case X86::VP4DPWSSDSrmkz: 3258 case X86::VP4DPWSSDrm: 3259 case X86::VP4DPWSSDrmk: 3260 case X86::VP4DPWSSDrmkz: { 3261 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() - 3262 X86::AddrNumOperands - 1).getReg(); 3263 unsigned Src2Enc = MRI->getEncodingValue(Src2); 3264 if (Src2Enc % 4 != 0) { 3265 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2); 3266 unsigned GroupStart = (Src2Enc / 4) * 4; 3267 unsigned GroupEnd = GroupStart + 3; 3268 return Warning(Ops[0]->getStartLoc(), 3269 "source register '" + RegName + "' implicitly denotes '" + 3270 RegName.take_front(3) + Twine(GroupStart) + "' to '" + 3271 RegName.take_front(3) + Twine(GroupEnd) + 3272 "' source group"); 3273 } 3274 break; 3275 } 3276 } 3277 3278 return false; 3279 } 3280 3281 static const char *getSubtargetFeatureName(uint64_t Val); 3282 3283 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) { 3284 Warning(Loc, "Instruction may be vulnerable to LVI and " 3285 "requires manual mitigation"); 3286 Note(SMLoc(), "See https://software.intel.com/" 3287 "security-software-guidance/insights/" 3288 "deep-dive-load-value-injection#specialinstructions" 3289 " for more information"); 3290 } 3291 3292 /// RET instructions and also instructions that indirect calls/jumps from memory 3293 /// combine a load and a branch within a single instruction. To mitigate these 3294 /// instructions against LVI, they must be decomposed into separate load and 3295 /// branch instructions, with an LFENCE in between. For more details, see: 3296 /// - X86LoadValueInjectionRetHardening.cpp 3297 /// - X86LoadValueInjectionIndirectThunks.cpp 3298 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection 3299 /// 3300 /// Returns `true` if a mitigation was applied or warning was emitted. 3301 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) { 3302 // Information on control-flow instructions that require manual mitigation can 3303 // be found here: 3304 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions 3305 switch (Inst.getOpcode()) { 3306 case X86::RETW: 3307 case X86::RETL: 3308 case X86::RETQ: 3309 case X86::RETIL: 3310 case X86::RETIQ: 3311 case X86::RETIW: { 3312 MCInst ShlInst, FenceInst; 3313 bool Parse32 = is32BitMode() || Code16GCC; 3314 unsigned Basereg = 3315 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP); 3316 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 3317 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 3318 /*BaseReg=*/Basereg, /*IndexReg=*/0, 3319 /*Scale=*/1, SMLoc{}, SMLoc{}, 0); 3320 ShlInst.setOpcode(X86::SHL64mi); 3321 ShlMemOp->addMemOperands(ShlInst, 5); 3322 ShlInst.addOperand(MCOperand::createImm(0)); 3323 FenceInst.setOpcode(X86::LFENCE); 3324 Out.emitInstruction(ShlInst, getSTI()); 3325 Out.emitInstruction(FenceInst, getSTI()); 3326 return; 3327 } 3328 case X86::JMP16m: 3329 case X86::JMP32m: 3330 case X86::JMP64m: 3331 case X86::CALL16m: 3332 case X86::CALL32m: 3333 case X86::CALL64m: 3334 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3335 return; 3336 } 3337 } 3338 3339 /// To mitigate LVI, every instruction that performs a load can be followed by 3340 /// an LFENCE instruction to squash any potential mis-speculation. There are 3341 /// some instructions that require additional considerations, and may requre 3342 /// manual mitigation. For more details, see: 3343 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection 3344 /// 3345 /// Returns `true` if a mitigation was applied or warning was emitted. 3346 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst, 3347 MCStreamer &Out) { 3348 auto Opcode = Inst.getOpcode(); 3349 auto Flags = Inst.getFlags(); 3350 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) { 3351 // Information on REP string instructions that require manual mitigation can 3352 // be found here: 3353 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions 3354 switch (Opcode) { 3355 case X86::CMPSB: 3356 case X86::CMPSW: 3357 case X86::CMPSL: 3358 case X86::CMPSQ: 3359 case X86::SCASB: 3360 case X86::SCASW: 3361 case X86::SCASL: 3362 case X86::SCASQ: 3363 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3364 return; 3365 } 3366 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) { 3367 // If a REP instruction is found on its own line, it may or may not be 3368 // followed by a vulnerable instruction. Emit a warning just in case. 3369 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3370 return; 3371 } 3372 3373 const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); 3374 3375 // Can't mitigate after terminators or calls. A control flow change may have 3376 // already occurred. 3377 if (MCID.isTerminator() || MCID.isCall()) 3378 return; 3379 3380 // LFENCE has the mayLoad property, don't double fence. 3381 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) { 3382 MCInst FenceInst; 3383 FenceInst.setOpcode(X86::LFENCE); 3384 Out.emitInstruction(FenceInst, getSTI()); 3385 } 3386 } 3387 3388 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands, 3389 MCStreamer &Out) { 3390 if (LVIInlineAsmHardening && 3391 getSTI().getFeatureBits()[X86::FeatureLVIControlFlowIntegrity]) 3392 applyLVICFIMitigation(Inst, Out); 3393 3394 Out.emitInstruction(Inst, getSTI()); 3395 3396 if (LVIInlineAsmHardening && 3397 getSTI().getFeatureBits()[X86::FeatureLVILoadHardening]) 3398 applyLVILoadHardeningMitigation(Inst, Out); 3399 } 3400 3401 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3402 OperandVector &Operands, 3403 MCStreamer &Out, uint64_t &ErrorInfo, 3404 bool MatchingInlineAsm) { 3405 if (isParsingIntelSyntax()) 3406 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3407 MatchingInlineAsm); 3408 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3409 MatchingInlineAsm); 3410 } 3411 3412 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, 3413 OperandVector &Operands, MCStreamer &Out, 3414 bool MatchingInlineAsm) { 3415 // FIXME: This should be replaced with a real .td file alias mechanism. 3416 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 3417 // call. 3418 const char *Repl = StringSwitch<const char *>(Op.getToken()) 3419 .Case("finit", "fninit") 3420 .Case("fsave", "fnsave") 3421 .Case("fstcw", "fnstcw") 3422 .Case("fstcww", "fnstcw") 3423 .Case("fstenv", "fnstenv") 3424 .Case("fstsw", "fnstsw") 3425 .Case("fstsww", "fnstsw") 3426 .Case("fclex", "fnclex") 3427 .Default(nullptr); 3428 if (Repl) { 3429 MCInst Inst; 3430 Inst.setOpcode(X86::WAIT); 3431 Inst.setLoc(IDLoc); 3432 if (!MatchingInlineAsm) 3433 emitInstruction(Inst, Operands, Out); 3434 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 3435 } 3436 } 3437 3438 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, 3439 const FeatureBitset &MissingFeatures, 3440 bool MatchingInlineAsm) { 3441 assert(MissingFeatures.any() && "Unknown missing feature!"); 3442 SmallString<126> Msg; 3443 raw_svector_ostream OS(Msg); 3444 OS << "instruction requires:"; 3445 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) { 3446 if (MissingFeatures[i]) 3447 OS << ' ' << getSubtargetFeatureName(i); 3448 } 3449 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); 3450 } 3451 3452 static unsigned getPrefixes(OperandVector &Operands) { 3453 unsigned Result = 0; 3454 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back()); 3455 if (Prefix.isPrefix()) { 3456 Result = Prefix.getPrefix(); 3457 Operands.pop_back(); 3458 } 3459 return Result; 3460 } 3461 3462 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3463 unsigned Opc = Inst.getOpcode(); 3464 const MCInstrDesc &MCID = MII.get(Opc); 3465 3466 if (ForcedVEXEncoding == VEXEncoding_EVEX && 3467 (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX) 3468 return Match_Unsupported; 3469 3470 if ((ForcedVEXEncoding == VEXEncoding_VEX || 3471 ForcedVEXEncoding == VEXEncoding_VEX3) && 3472 (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX) 3473 return Match_Unsupported; 3474 3475 // These instructions match ambiguously with their VEX encoded counterparts 3476 // and appear first in the matching table. Reject them unless we're forcing 3477 // EVEX encoding. 3478 // FIXME: We really need a way to break the ambiguity. 3479 switch (Opc) { 3480 case X86::VCVTSD2SIZrm_Int: 3481 case X86::VCVTSD2SI64Zrm_Int: 3482 case X86::VCVTSS2SIZrm_Int: 3483 case X86::VCVTSS2SI64Zrm_Int: 3484 case X86::VCVTTSD2SIZrm: case X86::VCVTTSD2SIZrm_Int: 3485 case X86::VCVTTSD2SI64Zrm: case X86::VCVTTSD2SI64Zrm_Int: 3486 case X86::VCVTTSS2SIZrm: case X86::VCVTTSS2SIZrm_Int: 3487 case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int: 3488 if (ForcedVEXEncoding != VEXEncoding_EVEX) 3489 return Match_Unsupported; 3490 break; 3491 } 3492 3493 return Match_Success; 3494 } 3495 3496 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 3497 OperandVector &Operands, 3498 MCStreamer &Out, 3499 uint64_t &ErrorInfo, 3500 bool MatchingInlineAsm) { 3501 assert(!Operands.empty() && "Unexpect empty operand list!"); 3502 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 3503 SMRange EmptyRange = None; 3504 3505 // First, handle aliases that expand to multiple instructions. 3506 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, 3507 Out, MatchingInlineAsm); 3508 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 3509 unsigned Prefixes = getPrefixes(Operands); 3510 3511 MCInst Inst; 3512 3513 // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the 3514 // encoder. 3515 if (ForcedVEXEncoding == VEXEncoding_VEX3) 3516 Prefixes |= X86::IP_USE_VEX3; 3517 3518 if (Prefixes) 3519 Inst.setFlags(Prefixes); 3520 3521 // First, try a direct match. 3522 FeatureBitset MissingFeatures; 3523 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo, 3524 MissingFeatures, MatchingInlineAsm, 3525 isParsingIntelSyntax()); 3526 switch (OriginalError) { 3527 default: llvm_unreachable("Unexpected match result!"); 3528 case Match_Success: 3529 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 3530 return true; 3531 // Some instructions need post-processing to, for example, tweak which 3532 // encoding is selected. Loop on it while changes happen so the 3533 // individual transformations can chain off each other. 3534 if (!MatchingInlineAsm) 3535 while (processInstruction(Inst, Operands)) 3536 ; 3537 3538 Inst.setLoc(IDLoc); 3539 if (!MatchingInlineAsm) 3540 emitInstruction(Inst, Operands, Out); 3541 Opcode = Inst.getOpcode(); 3542 return false; 3543 case Match_InvalidImmUnsignedi4: { 3544 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 3545 if (ErrorLoc == SMLoc()) 3546 ErrorLoc = IDLoc; 3547 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 3548 EmptyRange, MatchingInlineAsm); 3549 } 3550 case Match_MissingFeature: 3551 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm); 3552 case Match_InvalidOperand: 3553 case Match_MnemonicFail: 3554 case Match_Unsupported: 3555 break; 3556 } 3557 if (Op.getToken().empty()) { 3558 Error(IDLoc, "instruction must have size higher than 0", EmptyRange, 3559 MatchingInlineAsm); 3560 return true; 3561 } 3562 3563 // FIXME: Ideally, we would only attempt suffix matches for things which are 3564 // valid prefixes, and we could just infer the right unambiguous 3565 // type. However, that requires substantially more matcher support than the 3566 // following hack. 3567 3568 // Change the operand to point to a temporary token. 3569 StringRef Base = Op.getToken(); 3570 SmallString<16> Tmp; 3571 Tmp += Base; 3572 Tmp += ' '; 3573 Op.setTokenValue(Tmp); 3574 3575 // If this instruction starts with an 'f', then it is a floating point stack 3576 // instruction. These come in up to three forms for 32-bit, 64-bit, and 3577 // 80-bit floating point, which use the suffixes s,l,t respectively. 3578 // 3579 // Otherwise, we assume that this may be an integer instruction, which comes 3580 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 3581 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 3582 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 } 3583 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0"; 3584 3585 // Check for the various suffix matches. 3586 uint64_t ErrorInfoIgnore; 3587 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings. 3588 unsigned Match[4]; 3589 3590 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one. 3591 // So we should make sure the suffix matcher only works for memory variant 3592 // that has the same size with the suffix. 3593 // FIXME: This flag is a workaround for legacy instructions that didn't 3594 // declare non suffix variant assembly. 3595 bool HasVectorReg = false; 3596 X86Operand *MemOp = nullptr; 3597 for (const auto &Op : Operands) { 3598 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 3599 if (X86Op->isVectorReg()) 3600 HasVectorReg = true; 3601 else if (X86Op->isMem()) { 3602 MemOp = X86Op; 3603 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax"); 3604 // Have we found an unqualified memory operand, 3605 // break. IA allows only one memory operand. 3606 break; 3607 } 3608 } 3609 3610 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) { 3611 Tmp.back() = Suffixes[I]; 3612 if (MemOp && HasVectorReg) 3613 MemOp->Mem.Size = MemSize[I]; 3614 Match[I] = Match_MnemonicFail; 3615 if (MemOp || !HasVectorReg) { 3616 Match[I] = 3617 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures, 3618 MatchingInlineAsm, isParsingIntelSyntax()); 3619 // If this returned as a missing feature failure, remember that. 3620 if (Match[I] == Match_MissingFeature) 3621 ErrorInfoMissingFeatures = MissingFeatures; 3622 } 3623 } 3624 3625 // Restore the old token. 3626 Op.setTokenValue(Base); 3627 3628 // If exactly one matched, then we treat that as a successful match (and the 3629 // instruction will already have been filled in correctly, since the failing 3630 // matches won't have modified it). 3631 unsigned NumSuccessfulMatches = 3632 std::count(std::begin(Match), std::end(Match), Match_Success); 3633 if (NumSuccessfulMatches == 1) { 3634 Inst.setLoc(IDLoc); 3635 if (!MatchingInlineAsm) 3636 emitInstruction(Inst, Operands, Out); 3637 Opcode = Inst.getOpcode(); 3638 return false; 3639 } 3640 3641 // Otherwise, the match failed, try to produce a decent error message. 3642 3643 // If we had multiple suffix matches, then identify this as an ambiguous 3644 // match. 3645 if (NumSuccessfulMatches > 1) { 3646 char MatchChars[4]; 3647 unsigned NumMatches = 0; 3648 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) 3649 if (Match[I] == Match_Success) 3650 MatchChars[NumMatches++] = Suffixes[I]; 3651 3652 SmallString<126> Msg; 3653 raw_svector_ostream OS(Msg); 3654 OS << "ambiguous instructions require an explicit suffix (could be "; 3655 for (unsigned i = 0; i != NumMatches; ++i) { 3656 if (i != 0) 3657 OS << ", "; 3658 if (i + 1 == NumMatches) 3659 OS << "or "; 3660 OS << "'" << Base << MatchChars[i] << "'"; 3661 } 3662 OS << ")"; 3663 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm); 3664 return true; 3665 } 3666 3667 // Okay, we know that none of the variants matched successfully. 3668 3669 // If all of the instructions reported an invalid mnemonic, then the original 3670 // mnemonic was invalid. 3671 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) { 3672 if (OriginalError == Match_MnemonicFail) 3673 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 3674 Op.getLocRange(), MatchingInlineAsm); 3675 3676 if (OriginalError == Match_Unsupported) 3677 return Error(IDLoc, "unsupported instruction", EmptyRange, 3678 MatchingInlineAsm); 3679 3680 assert(OriginalError == Match_InvalidOperand && "Unexpected error"); 3681 // Recover location info for the operand if we know which was the problem. 3682 if (ErrorInfo != ~0ULL) { 3683 if (ErrorInfo >= Operands.size()) 3684 return Error(IDLoc, "too few operands for instruction", EmptyRange, 3685 MatchingInlineAsm); 3686 3687 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; 3688 if (Operand.getStartLoc().isValid()) { 3689 SMRange OperandRange = Operand.getLocRange(); 3690 return Error(Operand.getStartLoc(), "invalid operand for instruction", 3691 OperandRange, MatchingInlineAsm); 3692 } 3693 } 3694 3695 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 3696 MatchingInlineAsm); 3697 } 3698 3699 // If one instruction matched as unsupported, report this as unsupported. 3700 if (std::count(std::begin(Match), std::end(Match), 3701 Match_Unsupported) == 1) { 3702 return Error(IDLoc, "unsupported instruction", EmptyRange, 3703 MatchingInlineAsm); 3704 } 3705 3706 // If one instruction matched with a missing feature, report this as a 3707 // missing feature. 3708 if (std::count(std::begin(Match), std::end(Match), 3709 Match_MissingFeature) == 1) { 3710 ErrorInfo = Match_MissingFeature; 3711 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 3712 MatchingInlineAsm); 3713 } 3714 3715 // If one instruction matched with an invalid operand, report this as an 3716 // operand failure. 3717 if (std::count(std::begin(Match), std::end(Match), 3718 Match_InvalidOperand) == 1) { 3719 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 3720 MatchingInlineAsm); 3721 } 3722 3723 // If all of these were an outright failure, report it in a useless way. 3724 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 3725 EmptyRange, MatchingInlineAsm); 3726 return true; 3727 } 3728 3729 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 3730 OperandVector &Operands, 3731 MCStreamer &Out, 3732 uint64_t &ErrorInfo, 3733 bool MatchingInlineAsm) { 3734 assert(!Operands.empty() && "Unexpect empty operand list!"); 3735 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 3736 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken(); 3737 SMRange EmptyRange = None; 3738 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken(); 3739 unsigned Prefixes = getPrefixes(Operands); 3740 3741 // First, handle aliases that expand to multiple instructions. 3742 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm); 3743 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 3744 3745 MCInst Inst; 3746 3747 // If VEX3 encoding is forced, we need to pass the USE_VEX3 flag to the 3748 // encoder. 3749 if (ForcedVEXEncoding == VEXEncoding_VEX3) 3750 Prefixes |= X86::IP_USE_VEX3; 3751 3752 if (Prefixes) 3753 Inst.setFlags(Prefixes); 3754 3755 // Find one unsized memory operand, if present. 3756 X86Operand *UnsizedMemOp = nullptr; 3757 for (const auto &Op : Operands) { 3758 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 3759 if (X86Op->isMemUnsized()) { 3760 UnsizedMemOp = X86Op; 3761 // Have we found an unqualified memory operand, 3762 // break. IA allows only one memory operand. 3763 break; 3764 } 3765 } 3766 3767 // Allow some instructions to have implicitly pointer-sized operands. This is 3768 // compatible with gas. 3769 if (UnsizedMemOp) { 3770 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; 3771 for (const char *Instr : PtrSizedInstrs) { 3772 if (Mnemonic == Instr) { 3773 UnsizedMemOp->Mem.Size = getPointerWidth(); 3774 break; 3775 } 3776 } 3777 } 3778 3779 SmallVector<unsigned, 8> Match; 3780 FeatureBitset ErrorInfoMissingFeatures; 3781 FeatureBitset MissingFeatures; 3782 3783 // If unsized push has immediate operand we should default the default pointer 3784 // size for the size. 3785 if (Mnemonic == "push" && Operands.size() == 2) { 3786 auto *X86Op = static_cast<X86Operand *>(Operands[1].get()); 3787 if (X86Op->isImm()) { 3788 // If it's not a constant fall through and let remainder take care of it. 3789 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm()); 3790 unsigned Size = getPointerWidth(); 3791 if (CE && 3792 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) { 3793 SmallString<16> Tmp; 3794 Tmp += Base; 3795 Tmp += (is64BitMode()) 3796 ? "q" 3797 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " "; 3798 Op.setTokenValue(Tmp); 3799 // Do match in ATT mode to allow explicit suffix usage. 3800 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo, 3801 MissingFeatures, MatchingInlineAsm, 3802 false /*isParsingIntelSyntax()*/)); 3803 Op.setTokenValue(Base); 3804 } 3805 } 3806 } 3807 3808 // If an unsized memory operand is present, try to match with each memory 3809 // operand size. In Intel assembly, the size is not part of the instruction 3810 // mnemonic. 3811 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { 3812 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512}; 3813 for (unsigned Size : MopSizes) { 3814 UnsizedMemOp->Mem.Size = Size; 3815 uint64_t ErrorInfoIgnore; 3816 unsigned LastOpcode = Inst.getOpcode(); 3817 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore, 3818 MissingFeatures, MatchingInlineAsm, 3819 isParsingIntelSyntax()); 3820 if (Match.empty() || LastOpcode != Inst.getOpcode()) 3821 Match.push_back(M); 3822 3823 // If this returned as a missing feature failure, remember that. 3824 if (Match.back() == Match_MissingFeature) 3825 ErrorInfoMissingFeatures = MissingFeatures; 3826 } 3827 3828 // Restore the size of the unsized memory operand if we modified it. 3829 UnsizedMemOp->Mem.Size = 0; 3830 } 3831 3832 // If we haven't matched anything yet, this is not a basic integer or FPU 3833 // operation. There shouldn't be any ambiguity in our mnemonic table, so try 3834 // matching with the unsized operand. 3835 if (Match.empty()) { 3836 Match.push_back(MatchInstruction( 3837 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 3838 isParsingIntelSyntax())); 3839 // If this returned as a missing feature failure, remember that. 3840 if (Match.back() == Match_MissingFeature) 3841 ErrorInfoMissingFeatures = MissingFeatures; 3842 } 3843 3844 // Restore the size of the unsized memory operand if we modified it. 3845 if (UnsizedMemOp) 3846 UnsizedMemOp->Mem.Size = 0; 3847 3848 // If it's a bad mnemonic, all results will be the same. 3849 if (Match.back() == Match_MnemonicFail) { 3850 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'", 3851 Op.getLocRange(), MatchingInlineAsm); 3852 } 3853 3854 unsigned NumSuccessfulMatches = 3855 std::count(std::begin(Match), std::end(Match), Match_Success); 3856 3857 // If matching was ambiguous and we had size information from the frontend, 3858 // try again with that. This handles cases like "movxz eax, m8/m16". 3859 if (UnsizedMemOp && NumSuccessfulMatches > 1 && 3860 UnsizedMemOp->getMemFrontendSize()) { 3861 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize(); 3862 unsigned M = MatchInstruction( 3863 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 3864 isParsingIntelSyntax()); 3865 if (M == Match_Success) 3866 NumSuccessfulMatches = 1; 3867 3868 // Add a rewrite that encodes the size information we used from the 3869 // frontend. 3870 InstInfo->AsmRewrites->emplace_back( 3871 AOK_SizeDirective, UnsizedMemOp->getStartLoc(), 3872 /*Len=*/0, UnsizedMemOp->getMemFrontendSize()); 3873 } 3874 3875 // If exactly one matched, then we treat that as a successful match (and the 3876 // instruction will already have been filled in correctly, since the failing 3877 // matches won't have modified it). 3878 if (NumSuccessfulMatches == 1) { 3879 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 3880 return true; 3881 // Some instructions need post-processing to, for example, tweak which 3882 // encoding is selected. Loop on it while changes happen so the individual 3883 // transformations can chain off each other. 3884 if (!MatchingInlineAsm) 3885 while (processInstruction(Inst, Operands)) 3886 ; 3887 Inst.setLoc(IDLoc); 3888 if (!MatchingInlineAsm) 3889 emitInstruction(Inst, Operands, Out); 3890 Opcode = Inst.getOpcode(); 3891 return false; 3892 } else if (NumSuccessfulMatches > 1) { 3893 assert(UnsizedMemOp && 3894 "multiple matches only possible with unsized memory operands"); 3895 return Error(UnsizedMemOp->getStartLoc(), 3896 "ambiguous operand size for instruction '" + Mnemonic + "\'", 3897 UnsizedMemOp->getLocRange()); 3898 } 3899 3900 // If one instruction matched as unsupported, report this as unsupported. 3901 if (std::count(std::begin(Match), std::end(Match), 3902 Match_Unsupported) == 1) { 3903 return Error(IDLoc, "unsupported instruction", EmptyRange, 3904 MatchingInlineAsm); 3905 } 3906 3907 // If one instruction matched with a missing feature, report this as a 3908 // missing feature. 3909 if (std::count(std::begin(Match), std::end(Match), 3910 Match_MissingFeature) == 1) { 3911 ErrorInfo = Match_MissingFeature; 3912 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 3913 MatchingInlineAsm); 3914 } 3915 3916 // If one instruction matched with an invalid operand, report this as an 3917 // operand failure. 3918 if (std::count(std::begin(Match), std::end(Match), 3919 Match_InvalidOperand) == 1) { 3920 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 3921 MatchingInlineAsm); 3922 } 3923 3924 if (std::count(std::begin(Match), std::end(Match), 3925 Match_InvalidImmUnsignedi4) == 1) { 3926 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 3927 if (ErrorLoc == SMLoc()) 3928 ErrorLoc = IDLoc; 3929 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 3930 EmptyRange, MatchingInlineAsm); 3931 } 3932 3933 // If all of these were an outright failure, report it in a useless way. 3934 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange, 3935 MatchingInlineAsm); 3936 } 3937 3938 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { 3939 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); 3940 } 3941 3942 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 3943 MCAsmParser &Parser = getParser(); 3944 StringRef IDVal = DirectiveID.getIdentifier(); 3945 if (IDVal.startswith(".code")) 3946 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 3947 else if (IDVal.startswith(".att_syntax")) { 3948 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3949 if (Parser.getTok().getString() == "prefix") 3950 Parser.Lex(); 3951 else if (Parser.getTok().getString() == "noprefix") 3952 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not " 3953 "supported: registers must have a " 3954 "'%' prefix in .att_syntax"); 3955 } 3956 getParser().setAssemblerDialect(0); 3957 return false; 3958 } else if (IDVal.startswith(".intel_syntax")) { 3959 getParser().setAssemblerDialect(1); 3960 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3961 if (Parser.getTok().getString() == "noprefix") 3962 Parser.Lex(); 3963 else if (Parser.getTok().getString() == "prefix") 3964 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not " 3965 "supported: registers must not have " 3966 "a '%' prefix in .intel_syntax"); 3967 } 3968 return false; 3969 } else if (IDVal == ".even") 3970 return parseDirectiveEven(DirectiveID.getLoc()); 3971 else if (IDVal == ".cv_fpo_proc") 3972 return parseDirectiveFPOProc(DirectiveID.getLoc()); 3973 else if (IDVal == ".cv_fpo_setframe") 3974 return parseDirectiveFPOSetFrame(DirectiveID.getLoc()); 3975 else if (IDVal == ".cv_fpo_pushreg") 3976 return parseDirectiveFPOPushReg(DirectiveID.getLoc()); 3977 else if (IDVal == ".cv_fpo_stackalloc") 3978 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc()); 3979 else if (IDVal == ".cv_fpo_stackalign") 3980 return parseDirectiveFPOStackAlign(DirectiveID.getLoc()); 3981 else if (IDVal == ".cv_fpo_endprologue") 3982 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); 3983 else if (IDVal == ".cv_fpo_endproc") 3984 return parseDirectiveFPOEndProc(DirectiveID.getLoc()); 3985 else if (IDVal == ".seh_pushreg") 3986 return parseDirectiveSEHPushReg(DirectiveID.getLoc()); 3987 else if (IDVal == ".seh_setframe") 3988 return parseDirectiveSEHSetFrame(DirectiveID.getLoc()); 3989 else if (IDVal == ".seh_savereg") 3990 return parseDirectiveSEHSaveReg(DirectiveID.getLoc()); 3991 else if (IDVal == ".seh_savexmm") 3992 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc()); 3993 else if (IDVal == ".seh_pushframe") 3994 return parseDirectiveSEHPushFrame(DirectiveID.getLoc()); 3995 3996 return true; 3997 } 3998 3999 /// parseDirectiveEven 4000 /// ::= .even 4001 bool X86AsmParser::parseDirectiveEven(SMLoc L) { 4002 if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) 4003 return false; 4004 4005 const MCSection *Section = getStreamer().getCurrentSectionOnly(); 4006 if (!Section) { 4007 getStreamer().InitSections(false); 4008 Section = getStreamer().getCurrentSectionOnly(); 4009 } 4010 if (Section->UseCodeAlign()) 4011 getStreamer().emitCodeAlignment(2, 0); 4012 else 4013 getStreamer().emitValueToAlignment(2, 0, 1, 0); 4014 return false; 4015 } 4016 4017 /// ParseDirectiveCode 4018 /// ::= .code16 | .code32 | .code64 4019 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 4020 MCAsmParser &Parser = getParser(); 4021 Code16GCC = false; 4022 if (IDVal == ".code16") { 4023 Parser.Lex(); 4024 if (!is16BitMode()) { 4025 SwitchMode(X86::Mode16Bit); 4026 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); 4027 } 4028 } else if (IDVal == ".code16gcc") { 4029 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode. 4030 Parser.Lex(); 4031 Code16GCC = true; 4032 if (!is16BitMode()) { 4033 SwitchMode(X86::Mode16Bit); 4034 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); 4035 } 4036 } else if (IDVal == ".code32") { 4037 Parser.Lex(); 4038 if (!is32BitMode()) { 4039 SwitchMode(X86::Mode32Bit); 4040 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32); 4041 } 4042 } else if (IDVal == ".code64") { 4043 Parser.Lex(); 4044 if (!is64BitMode()) { 4045 SwitchMode(X86::Mode64Bit); 4046 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64); 4047 } 4048 } else { 4049 Error(L, "unknown directive " + IDVal); 4050 return false; 4051 } 4052 4053 return false; 4054 } 4055 4056 // .cv_fpo_proc foo 4057 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) { 4058 MCAsmParser &Parser = getParser(); 4059 StringRef ProcName; 4060 int64_t ParamsSize; 4061 if (Parser.parseIdentifier(ProcName)) 4062 return Parser.TokError("expected symbol name"); 4063 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count")) 4064 return true; 4065 if (!isUIntN(32, ParamsSize)) 4066 return Parser.TokError("parameters size out of range"); 4067 if (Parser.parseEOL("unexpected tokens")) 4068 return addErrorSuffix(" in '.cv_fpo_proc' directive"); 4069 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); 4070 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L); 4071 } 4072 4073 // .cv_fpo_setframe ebp 4074 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) { 4075 MCAsmParser &Parser = getParser(); 4076 unsigned Reg; 4077 SMLoc DummyLoc; 4078 if (ParseRegister(Reg, DummyLoc, DummyLoc) || 4079 Parser.parseEOL("unexpected tokens")) 4080 return addErrorSuffix(" in '.cv_fpo_setframe' directive"); 4081 return getTargetStreamer().emitFPOSetFrame(Reg, L); 4082 } 4083 4084 // .cv_fpo_pushreg ebx 4085 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) { 4086 MCAsmParser &Parser = getParser(); 4087 unsigned Reg; 4088 SMLoc DummyLoc; 4089 if (ParseRegister(Reg, DummyLoc, DummyLoc) || 4090 Parser.parseEOL("unexpected tokens")) 4091 return addErrorSuffix(" in '.cv_fpo_pushreg' directive"); 4092 return getTargetStreamer().emitFPOPushReg(Reg, L); 4093 } 4094 4095 // .cv_fpo_stackalloc 20 4096 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) { 4097 MCAsmParser &Parser = getParser(); 4098 int64_t Offset; 4099 if (Parser.parseIntToken(Offset, "expected offset") || 4100 Parser.parseEOL("unexpected tokens")) 4101 return addErrorSuffix(" in '.cv_fpo_stackalloc' directive"); 4102 return getTargetStreamer().emitFPOStackAlloc(Offset, L); 4103 } 4104 4105 // .cv_fpo_stackalign 8 4106 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) { 4107 MCAsmParser &Parser = getParser(); 4108 int64_t Offset; 4109 if (Parser.parseIntToken(Offset, "expected offset") || 4110 Parser.parseEOL("unexpected tokens")) 4111 return addErrorSuffix(" in '.cv_fpo_stackalign' directive"); 4112 return getTargetStreamer().emitFPOStackAlign(Offset, L); 4113 } 4114 4115 // .cv_fpo_endprologue 4116 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) { 4117 MCAsmParser &Parser = getParser(); 4118 if (Parser.parseEOL("unexpected tokens")) 4119 return addErrorSuffix(" in '.cv_fpo_endprologue' directive"); 4120 return getTargetStreamer().emitFPOEndPrologue(L); 4121 } 4122 4123 // .cv_fpo_endproc 4124 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) { 4125 MCAsmParser &Parser = getParser(); 4126 if (Parser.parseEOL("unexpected tokens")) 4127 return addErrorSuffix(" in '.cv_fpo_endproc' directive"); 4128 return getTargetStreamer().emitFPOEndProc(L); 4129 } 4130 4131 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID, 4132 unsigned &RegNo) { 4133 SMLoc startLoc = getLexer().getLoc(); 4134 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 4135 4136 // Try parsing the argument as a register first. 4137 if (getLexer().getTok().isNot(AsmToken::Integer)) { 4138 SMLoc endLoc; 4139 if (ParseRegister(RegNo, startLoc, endLoc)) 4140 return true; 4141 4142 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) { 4143 return Error(startLoc, 4144 "register is not supported for use with this directive"); 4145 } 4146 } else { 4147 // Otherwise, an integer number matching the encoding of the desired 4148 // register may appear. 4149 int64_t EncodedReg; 4150 if (getParser().parseAbsoluteExpression(EncodedReg)) 4151 return true; 4152 4153 // The SEH register number is the same as the encoding register number. Map 4154 // from the encoding back to the LLVM register number. 4155 RegNo = 0; 4156 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) { 4157 if (MRI->getEncodingValue(Reg) == EncodedReg) { 4158 RegNo = Reg; 4159 break; 4160 } 4161 } 4162 if (RegNo == 0) { 4163 return Error(startLoc, 4164 "incorrect register number for use with this directive"); 4165 } 4166 } 4167 4168 return false; 4169 } 4170 4171 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) { 4172 unsigned Reg = 0; 4173 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4174 return true; 4175 4176 if (getLexer().isNot(AsmToken::EndOfStatement)) 4177 return TokError("unexpected token in directive"); 4178 4179 getParser().Lex(); 4180 getStreamer().EmitWinCFIPushReg(Reg, Loc); 4181 return false; 4182 } 4183 4184 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) { 4185 unsigned Reg = 0; 4186 int64_t Off; 4187 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4188 return true; 4189 if (getLexer().isNot(AsmToken::Comma)) 4190 return TokError("you must specify a stack pointer offset"); 4191 4192 getParser().Lex(); 4193 if (getParser().parseAbsoluteExpression(Off)) 4194 return true; 4195 4196 if (getLexer().isNot(AsmToken::EndOfStatement)) 4197 return TokError("unexpected token in directive"); 4198 4199 getParser().Lex(); 4200 getStreamer().EmitWinCFISetFrame(Reg, Off, Loc); 4201 return false; 4202 } 4203 4204 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) { 4205 unsigned Reg = 0; 4206 int64_t Off; 4207 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4208 return true; 4209 if (getLexer().isNot(AsmToken::Comma)) 4210 return TokError("you must specify an offset on the stack"); 4211 4212 getParser().Lex(); 4213 if (getParser().parseAbsoluteExpression(Off)) 4214 return true; 4215 4216 if (getLexer().isNot(AsmToken::EndOfStatement)) 4217 return TokError("unexpected token in directive"); 4218 4219 getParser().Lex(); 4220 getStreamer().EmitWinCFISaveReg(Reg, Off, Loc); 4221 return false; 4222 } 4223 4224 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) { 4225 unsigned Reg = 0; 4226 int64_t Off; 4227 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg)) 4228 return true; 4229 if (getLexer().isNot(AsmToken::Comma)) 4230 return TokError("you must specify an offset on the stack"); 4231 4232 getParser().Lex(); 4233 if (getParser().parseAbsoluteExpression(Off)) 4234 return true; 4235 4236 if (getLexer().isNot(AsmToken::EndOfStatement)) 4237 return TokError("unexpected token in directive"); 4238 4239 getParser().Lex(); 4240 getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc); 4241 return false; 4242 } 4243 4244 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) { 4245 bool Code = false; 4246 StringRef CodeID; 4247 if (getLexer().is(AsmToken::At)) { 4248 SMLoc startLoc = getLexer().getLoc(); 4249 getParser().Lex(); 4250 if (!getParser().parseIdentifier(CodeID)) { 4251 if (CodeID != "code") 4252 return Error(startLoc, "expected @code"); 4253 Code = true; 4254 } 4255 } 4256 4257 if (getLexer().isNot(AsmToken::EndOfStatement)) 4258 return TokError("unexpected token in directive"); 4259 4260 getParser().Lex(); 4261 getStreamer().EmitWinCFIPushFrame(Code, Loc); 4262 return false; 4263 } 4264 4265 // Force static initialization. 4266 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser() { 4267 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target()); 4268 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target()); 4269 } 4270 4271 #define GET_REGISTER_MATCHER 4272 #define GET_MATCHER_IMPLEMENTATION 4273 #define GET_SUBTARGET_FEATURE_NAME 4274 #include "X86GenAsmMatcher.inc" 4275