1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86EncodingOptimization.h" 11 #include "MCTargetDesc/X86IntelInstPrinter.h" 12 #include "MCTargetDesc/X86MCExpr.h" 13 #include "MCTargetDesc/X86MCTargetDesc.h" 14 #include "MCTargetDesc/X86TargetStreamer.h" 15 #include "TargetInfo/X86TargetInfo.h" 16 #include "X86AsmParserCommon.h" 17 #include "X86Operand.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringSwitch.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCContext.h" 24 #include "llvm/MC/MCExpr.h" 25 #include "llvm/MC/MCInst.h" 26 #include "llvm/MC/MCInstrInfo.h" 27 #include "llvm/MC/MCParser/MCAsmLexer.h" 28 #include "llvm/MC/MCParser/MCAsmParser.h" 29 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 30 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 31 #include "llvm/MC/MCRegisterInfo.h" 32 #include "llvm/MC/MCSection.h" 33 #include "llvm/MC/MCStreamer.h" 34 #include "llvm/MC/MCSubtargetInfo.h" 35 #include "llvm/MC/MCSymbol.h" 36 #include "llvm/MC/TargetRegistry.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/SourceMgr.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <algorithm> 42 #include <memory> 43 44 using namespace llvm; 45 46 static cl::opt<bool> LVIInlineAsmHardening( 47 "x86-experimental-lvi-inline-asm-hardening", 48 cl::desc("Harden inline assembly code that may be vulnerable to Load Value" 49 " Injection (LVI). This feature is experimental."), cl::Hidden); 50 51 static bool checkScale(unsigned Scale, StringRef &ErrMsg) { 52 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { 53 ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; 54 return true; 55 } 56 return false; 57 } 58 59 namespace { 60 61 // Including the generated SSE2AVX compression tables. 62 #define GET_X86_SSE2AVX_TABLE 63 #include "X86GenInstrMapping.inc" 64 65 static const char OpPrecedence[] = { 66 0, // IC_OR 67 1, // IC_XOR 68 2, // IC_AND 69 4, // IC_LSHIFT 70 4, // IC_RSHIFT 71 5, // IC_PLUS 72 5, // IC_MINUS 73 6, // IC_MULTIPLY 74 6, // IC_DIVIDE 75 6, // IC_MOD 76 7, // IC_NOT 77 8, // IC_NEG 78 9, // IC_RPAREN 79 10, // IC_LPAREN 80 0, // IC_IMM 81 0, // IC_REGISTER 82 3, // IC_EQ 83 3, // IC_NE 84 3, // IC_LT 85 3, // IC_LE 86 3, // IC_GT 87 3 // IC_GE 88 }; 89 90 class X86AsmParser : public MCTargetAsmParser { 91 ParseInstructionInfo *InstInfo; 92 bool Code16GCC; 93 unsigned ForcedDataPrefix = 0; 94 95 enum OpcodePrefix { 96 OpcodePrefix_Default, 97 OpcodePrefix_REX, 98 OpcodePrefix_REX2, 99 OpcodePrefix_VEX, 100 OpcodePrefix_VEX2, 101 OpcodePrefix_VEX3, 102 OpcodePrefix_EVEX, 103 }; 104 105 OpcodePrefix ForcedOpcodePrefix = OpcodePrefix_Default; 106 107 enum DispEncoding { 108 DispEncoding_Default, 109 DispEncoding_Disp8, 110 DispEncoding_Disp32, 111 }; 112 113 DispEncoding ForcedDispEncoding = DispEncoding_Default; 114 115 // Does this instruction use apx extended register? 116 bool UseApxExtendedReg = false; 117 // Is this instruction explicitly required not to update flags? 118 bool ForcedNoFlag = false; 119 120 private: 121 SMLoc consumeToken() { 122 MCAsmParser &Parser = getParser(); 123 SMLoc Result = Parser.getTok().getLoc(); 124 Parser.Lex(); 125 return Result; 126 } 127 128 X86TargetStreamer &getTargetStreamer() { 129 assert(getParser().getStreamer().getTargetStreamer() && 130 "do not have a target streamer"); 131 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 132 return static_cast<X86TargetStreamer &>(TS); 133 } 134 135 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst, 136 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures, 137 bool matchingInlineAsm, unsigned VariantID = 0) { 138 // In Code16GCC mode, match as 32-bit. 139 if (Code16GCC) 140 SwitchMode(X86::Is32Bit); 141 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo, 142 MissingFeatures, matchingInlineAsm, 143 VariantID); 144 if (Code16GCC) 145 SwitchMode(X86::Is16Bit); 146 return rv; 147 } 148 149 enum InfixCalculatorTok { 150 IC_OR = 0, 151 IC_XOR, 152 IC_AND, 153 IC_LSHIFT, 154 IC_RSHIFT, 155 IC_PLUS, 156 IC_MINUS, 157 IC_MULTIPLY, 158 IC_DIVIDE, 159 IC_MOD, 160 IC_NOT, 161 IC_NEG, 162 IC_RPAREN, 163 IC_LPAREN, 164 IC_IMM, 165 IC_REGISTER, 166 IC_EQ, 167 IC_NE, 168 IC_LT, 169 IC_LE, 170 IC_GT, 171 IC_GE 172 }; 173 174 enum IntelOperatorKind { 175 IOK_INVALID = 0, 176 IOK_LENGTH, 177 IOK_SIZE, 178 IOK_TYPE, 179 }; 180 181 enum MasmOperatorKind { 182 MOK_INVALID = 0, 183 MOK_LENGTHOF, 184 MOK_SIZEOF, 185 MOK_TYPE, 186 }; 187 188 class InfixCalculator { 189 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 190 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 191 SmallVector<ICToken, 4> PostfixStack; 192 193 bool isUnaryOperator(InfixCalculatorTok Op) const { 194 return Op == IC_NEG || Op == IC_NOT; 195 } 196 197 public: 198 int64_t popOperand() { 199 assert (!PostfixStack.empty() && "Poped an empty stack!"); 200 ICToken Op = PostfixStack.pop_back_val(); 201 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER)) 202 return -1; // The invalid Scale value will be caught later by checkScale 203 return Op.second; 204 } 205 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 206 assert ((Op == IC_IMM || Op == IC_REGISTER) && 207 "Unexpected operand!"); 208 PostfixStack.push_back(std::make_pair(Op, Val)); 209 } 210 211 void popOperator() { InfixOperatorStack.pop_back(); } 212 void pushOperator(InfixCalculatorTok Op) { 213 // Push the new operator if the stack is empty. 214 if (InfixOperatorStack.empty()) { 215 InfixOperatorStack.push_back(Op); 216 return; 217 } 218 219 // Push the new operator if it has a higher precedence than the operator 220 // on the top of the stack or the operator on the top of the stack is a 221 // left parentheses. 222 unsigned Idx = InfixOperatorStack.size() - 1; 223 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 224 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 225 InfixOperatorStack.push_back(Op); 226 return; 227 } 228 229 // The operator on the top of the stack has higher precedence than the 230 // new operator. 231 unsigned ParenCount = 0; 232 while (true) { 233 // Nothing to process. 234 if (InfixOperatorStack.empty()) 235 break; 236 237 Idx = InfixOperatorStack.size() - 1; 238 StackOp = InfixOperatorStack[Idx]; 239 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 240 break; 241 242 // If we have an even parentheses count and we see a left parentheses, 243 // then stop processing. 244 if (!ParenCount && StackOp == IC_LPAREN) 245 break; 246 247 if (StackOp == IC_RPAREN) { 248 ++ParenCount; 249 InfixOperatorStack.pop_back(); 250 } else if (StackOp == IC_LPAREN) { 251 --ParenCount; 252 InfixOperatorStack.pop_back(); 253 } else { 254 InfixOperatorStack.pop_back(); 255 PostfixStack.push_back(std::make_pair(StackOp, 0)); 256 } 257 } 258 // Push the new operator. 259 InfixOperatorStack.push_back(Op); 260 } 261 262 int64_t execute() { 263 // Push any remaining operators onto the postfix stack. 264 while (!InfixOperatorStack.empty()) { 265 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 266 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 267 PostfixStack.push_back(std::make_pair(StackOp, 0)); 268 } 269 270 if (PostfixStack.empty()) 271 return 0; 272 273 SmallVector<ICToken, 16> OperandStack; 274 for (const ICToken &Op : PostfixStack) { 275 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 276 OperandStack.push_back(Op); 277 } else if (isUnaryOperator(Op.first)) { 278 assert (OperandStack.size() > 0 && "Too few operands."); 279 ICToken Operand = OperandStack.pop_back_val(); 280 assert (Operand.first == IC_IMM && 281 "Unary operation with a register!"); 282 switch (Op.first) { 283 default: 284 report_fatal_error("Unexpected operator!"); 285 break; 286 case IC_NEG: 287 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second)); 288 break; 289 case IC_NOT: 290 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second)); 291 break; 292 } 293 } else { 294 assert (OperandStack.size() > 1 && "Too few operands."); 295 int64_t Val; 296 ICToken Op2 = OperandStack.pop_back_val(); 297 ICToken Op1 = OperandStack.pop_back_val(); 298 switch (Op.first) { 299 default: 300 report_fatal_error("Unexpected operator!"); 301 break; 302 case IC_PLUS: 303 Val = Op1.second + Op2.second; 304 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 305 break; 306 case IC_MINUS: 307 Val = Op1.second - Op2.second; 308 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 309 break; 310 case IC_MULTIPLY: 311 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 312 "Multiply operation with an immediate and a register!"); 313 Val = Op1.second * Op2.second; 314 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 315 break; 316 case IC_DIVIDE: 317 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 318 "Divide operation with an immediate and a register!"); 319 assert (Op2.second != 0 && "Division by zero!"); 320 Val = Op1.second / Op2.second; 321 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 322 break; 323 case IC_MOD: 324 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 325 "Modulo operation with an immediate and a register!"); 326 Val = Op1.second % Op2.second; 327 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 328 break; 329 case IC_OR: 330 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 331 "Or operation with an immediate and a register!"); 332 Val = Op1.second | Op2.second; 333 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 334 break; 335 case IC_XOR: 336 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 337 "Xor operation with an immediate and a register!"); 338 Val = Op1.second ^ Op2.second; 339 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 340 break; 341 case IC_AND: 342 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 343 "And operation with an immediate and a register!"); 344 Val = Op1.second & Op2.second; 345 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 346 break; 347 case IC_LSHIFT: 348 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 349 "Left shift operation with an immediate and a register!"); 350 Val = Op1.second << Op2.second; 351 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 352 break; 353 case IC_RSHIFT: 354 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 355 "Right shift operation with an immediate and a register!"); 356 Val = Op1.second >> Op2.second; 357 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 358 break; 359 case IC_EQ: 360 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 361 "Equals operation with an immediate and a register!"); 362 Val = (Op1.second == Op2.second) ? -1 : 0; 363 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 364 break; 365 case IC_NE: 366 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 367 "Not-equals operation with an immediate and a register!"); 368 Val = (Op1.second != Op2.second) ? -1 : 0; 369 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 370 break; 371 case IC_LT: 372 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 373 "Less-than operation with an immediate and a register!"); 374 Val = (Op1.second < Op2.second) ? -1 : 0; 375 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 376 break; 377 case IC_LE: 378 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 379 "Less-than-or-equal operation with an immediate and a " 380 "register!"); 381 Val = (Op1.second <= Op2.second) ? -1 : 0; 382 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 383 break; 384 case IC_GT: 385 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 386 "Greater-than operation with an immediate and a register!"); 387 Val = (Op1.second > Op2.second) ? -1 : 0; 388 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 389 break; 390 case IC_GE: 391 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 392 "Greater-than-or-equal operation with an immediate and a " 393 "register!"); 394 Val = (Op1.second >= Op2.second) ? -1 : 0; 395 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 396 break; 397 } 398 } 399 } 400 assert (OperandStack.size() == 1 && "Expected a single result."); 401 return OperandStack.pop_back_val().second; 402 } 403 }; 404 405 enum IntelExprState { 406 IES_INIT, 407 IES_OR, 408 IES_XOR, 409 IES_AND, 410 IES_EQ, 411 IES_NE, 412 IES_LT, 413 IES_LE, 414 IES_GT, 415 IES_GE, 416 IES_LSHIFT, 417 IES_RSHIFT, 418 IES_PLUS, 419 IES_MINUS, 420 IES_OFFSET, 421 IES_CAST, 422 IES_NOT, 423 IES_MULTIPLY, 424 IES_DIVIDE, 425 IES_MOD, 426 IES_LBRAC, 427 IES_RBRAC, 428 IES_LPAREN, 429 IES_RPAREN, 430 IES_REGISTER, 431 IES_INTEGER, 432 IES_ERROR 433 }; 434 435 class IntelExprStateMachine { 436 IntelExprState State = IES_INIT, PrevState = IES_ERROR; 437 unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0; 438 int64_t Imm = 0; 439 const MCExpr *Sym = nullptr; 440 StringRef SymName; 441 InfixCalculator IC; 442 InlineAsmIdentifierInfo Info; 443 short BracCount = 0; 444 bool MemExpr = false; 445 bool BracketUsed = false; 446 bool OffsetOperator = false; 447 bool AttachToOperandIdx = false; 448 bool IsPIC = false; 449 SMLoc OffsetOperatorLoc; 450 AsmTypeInfo CurType; 451 452 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { 453 if (Sym) { 454 ErrMsg = "cannot use more than one symbol in memory operand"; 455 return true; 456 } 457 Sym = Val; 458 SymName = ID; 459 return false; 460 } 461 462 public: 463 IntelExprStateMachine() = default; 464 465 void addImm(int64_t imm) { Imm += imm; } 466 short getBracCount() const { return BracCount; } 467 bool isMemExpr() const { return MemExpr; } 468 bool isBracketUsed() const { return BracketUsed; } 469 bool isOffsetOperator() const { return OffsetOperator; } 470 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; } 471 unsigned getBaseReg() const { return BaseReg; } 472 unsigned getIndexReg() const { return IndexReg; } 473 unsigned getScale() const { return Scale; } 474 const MCExpr *getSym() const { return Sym; } 475 StringRef getSymName() const { return SymName; } 476 StringRef getType() const { return CurType.Name; } 477 unsigned getSize() const { return CurType.Size; } 478 unsigned getElementSize() const { return CurType.ElementSize; } 479 unsigned getLength() const { return CurType.Length; } 480 int64_t getImm() { return Imm + IC.execute(); } 481 bool isValidEndState() const { 482 return State == IES_RBRAC || State == IES_RPAREN || 483 State == IES_INTEGER || State == IES_REGISTER || 484 State == IES_OFFSET; 485 } 486 487 // Is the intel expression appended after an operand index. 488 // [OperandIdx][Intel Expression] 489 // This is neccessary for checking if it is an independent 490 // intel expression at back end when parse inline asm. 491 void setAppendAfterOperand() { AttachToOperandIdx = true; } 492 493 bool isPIC() const { return IsPIC; } 494 void setPIC() { IsPIC = true; } 495 496 bool hadError() const { return State == IES_ERROR; } 497 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; } 498 499 bool regsUseUpError(StringRef &ErrMsg) { 500 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg] 501 // can not intruduce additional register in inline asm in PIC model. 502 if (IsPIC && AttachToOperandIdx) 503 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!"; 504 else 505 ErrMsg = "BaseReg/IndexReg already set!"; 506 return true; 507 } 508 509 void onOr() { 510 IntelExprState CurrState = State; 511 switch (State) { 512 default: 513 State = IES_ERROR; 514 break; 515 case IES_INTEGER: 516 case IES_RPAREN: 517 case IES_REGISTER: 518 State = IES_OR; 519 IC.pushOperator(IC_OR); 520 break; 521 } 522 PrevState = CurrState; 523 } 524 void onXor() { 525 IntelExprState CurrState = State; 526 switch (State) { 527 default: 528 State = IES_ERROR; 529 break; 530 case IES_INTEGER: 531 case IES_RPAREN: 532 case IES_REGISTER: 533 State = IES_XOR; 534 IC.pushOperator(IC_XOR); 535 break; 536 } 537 PrevState = CurrState; 538 } 539 void onAnd() { 540 IntelExprState CurrState = State; 541 switch (State) { 542 default: 543 State = IES_ERROR; 544 break; 545 case IES_INTEGER: 546 case IES_RPAREN: 547 case IES_REGISTER: 548 State = IES_AND; 549 IC.pushOperator(IC_AND); 550 break; 551 } 552 PrevState = CurrState; 553 } 554 void onEq() { 555 IntelExprState CurrState = State; 556 switch (State) { 557 default: 558 State = IES_ERROR; 559 break; 560 case IES_INTEGER: 561 case IES_RPAREN: 562 case IES_REGISTER: 563 State = IES_EQ; 564 IC.pushOperator(IC_EQ); 565 break; 566 } 567 PrevState = CurrState; 568 } 569 void onNE() { 570 IntelExprState CurrState = State; 571 switch (State) { 572 default: 573 State = IES_ERROR; 574 break; 575 case IES_INTEGER: 576 case IES_RPAREN: 577 case IES_REGISTER: 578 State = IES_NE; 579 IC.pushOperator(IC_NE); 580 break; 581 } 582 PrevState = CurrState; 583 } 584 void onLT() { 585 IntelExprState CurrState = State; 586 switch (State) { 587 default: 588 State = IES_ERROR; 589 break; 590 case IES_INTEGER: 591 case IES_RPAREN: 592 case IES_REGISTER: 593 State = IES_LT; 594 IC.pushOperator(IC_LT); 595 break; 596 } 597 PrevState = CurrState; 598 } 599 void onLE() { 600 IntelExprState CurrState = State; 601 switch (State) { 602 default: 603 State = IES_ERROR; 604 break; 605 case IES_INTEGER: 606 case IES_RPAREN: 607 case IES_REGISTER: 608 State = IES_LE; 609 IC.pushOperator(IC_LE); 610 break; 611 } 612 PrevState = CurrState; 613 } 614 void onGT() { 615 IntelExprState CurrState = State; 616 switch (State) { 617 default: 618 State = IES_ERROR; 619 break; 620 case IES_INTEGER: 621 case IES_RPAREN: 622 case IES_REGISTER: 623 State = IES_GT; 624 IC.pushOperator(IC_GT); 625 break; 626 } 627 PrevState = CurrState; 628 } 629 void onGE() { 630 IntelExprState CurrState = State; 631 switch (State) { 632 default: 633 State = IES_ERROR; 634 break; 635 case IES_INTEGER: 636 case IES_RPAREN: 637 case IES_REGISTER: 638 State = IES_GE; 639 IC.pushOperator(IC_GE); 640 break; 641 } 642 PrevState = CurrState; 643 } 644 void onLShift() { 645 IntelExprState CurrState = State; 646 switch (State) { 647 default: 648 State = IES_ERROR; 649 break; 650 case IES_INTEGER: 651 case IES_RPAREN: 652 case IES_REGISTER: 653 State = IES_LSHIFT; 654 IC.pushOperator(IC_LSHIFT); 655 break; 656 } 657 PrevState = CurrState; 658 } 659 void onRShift() { 660 IntelExprState CurrState = State; 661 switch (State) { 662 default: 663 State = IES_ERROR; 664 break; 665 case IES_INTEGER: 666 case IES_RPAREN: 667 case IES_REGISTER: 668 State = IES_RSHIFT; 669 IC.pushOperator(IC_RSHIFT); 670 break; 671 } 672 PrevState = CurrState; 673 } 674 bool onPlus(StringRef &ErrMsg) { 675 IntelExprState CurrState = State; 676 switch (State) { 677 default: 678 State = IES_ERROR; 679 break; 680 case IES_INTEGER: 681 case IES_RPAREN: 682 case IES_REGISTER: 683 case IES_OFFSET: 684 State = IES_PLUS; 685 IC.pushOperator(IC_PLUS); 686 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 687 // If we already have a BaseReg, then assume this is the IndexReg with 688 // no explicit scale. 689 if (!BaseReg) { 690 BaseReg = TmpReg; 691 } else { 692 if (IndexReg) 693 return regsUseUpError(ErrMsg); 694 IndexReg = TmpReg; 695 Scale = 0; 696 } 697 } 698 break; 699 } 700 PrevState = CurrState; 701 return false; 702 } 703 bool onMinus(StringRef &ErrMsg) { 704 IntelExprState CurrState = State; 705 switch (State) { 706 default: 707 State = IES_ERROR; 708 break; 709 case IES_OR: 710 case IES_XOR: 711 case IES_AND: 712 case IES_EQ: 713 case IES_NE: 714 case IES_LT: 715 case IES_LE: 716 case IES_GT: 717 case IES_GE: 718 case IES_LSHIFT: 719 case IES_RSHIFT: 720 case IES_PLUS: 721 case IES_NOT: 722 case IES_MULTIPLY: 723 case IES_DIVIDE: 724 case IES_MOD: 725 case IES_LPAREN: 726 case IES_RPAREN: 727 case IES_LBRAC: 728 case IES_RBRAC: 729 case IES_INTEGER: 730 case IES_REGISTER: 731 case IES_INIT: 732 case IES_OFFSET: 733 State = IES_MINUS; 734 // push minus operator if it is not a negate operator 735 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || 736 CurrState == IES_INTEGER || CurrState == IES_RBRAC || 737 CurrState == IES_OFFSET) 738 IC.pushOperator(IC_MINUS); 739 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 740 // We have negate operator for Scale: it's illegal 741 ErrMsg = "Scale can't be negative"; 742 return true; 743 } else 744 IC.pushOperator(IC_NEG); 745 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 746 // If we already have a BaseReg, then assume this is the IndexReg with 747 // no explicit scale. 748 if (!BaseReg) { 749 BaseReg = TmpReg; 750 } else { 751 if (IndexReg) 752 return regsUseUpError(ErrMsg); 753 IndexReg = TmpReg; 754 Scale = 0; 755 } 756 } 757 break; 758 } 759 PrevState = CurrState; 760 return false; 761 } 762 void onNot() { 763 IntelExprState CurrState = State; 764 switch (State) { 765 default: 766 State = IES_ERROR; 767 break; 768 case IES_OR: 769 case IES_XOR: 770 case IES_AND: 771 case IES_EQ: 772 case IES_NE: 773 case IES_LT: 774 case IES_LE: 775 case IES_GT: 776 case IES_GE: 777 case IES_LSHIFT: 778 case IES_RSHIFT: 779 case IES_PLUS: 780 case IES_MINUS: 781 case IES_NOT: 782 case IES_MULTIPLY: 783 case IES_DIVIDE: 784 case IES_MOD: 785 case IES_LPAREN: 786 case IES_LBRAC: 787 case IES_INIT: 788 State = IES_NOT; 789 IC.pushOperator(IC_NOT); 790 break; 791 } 792 PrevState = CurrState; 793 } 794 bool onRegister(unsigned Reg, StringRef &ErrMsg) { 795 IntelExprState CurrState = State; 796 switch (State) { 797 default: 798 State = IES_ERROR; 799 break; 800 case IES_PLUS: 801 case IES_LPAREN: 802 case IES_LBRAC: 803 State = IES_REGISTER; 804 TmpReg = Reg; 805 IC.pushOperand(IC_REGISTER); 806 break; 807 case IES_MULTIPLY: 808 // Index Register - Scale * Register 809 if (PrevState == IES_INTEGER) { 810 if (IndexReg) 811 return regsUseUpError(ErrMsg); 812 State = IES_REGISTER; 813 IndexReg = Reg; 814 // Get the scale and replace the 'Scale * Register' with '0'. 815 Scale = IC.popOperand(); 816 if (checkScale(Scale, ErrMsg)) 817 return true; 818 IC.pushOperand(IC_IMM); 819 IC.popOperator(); 820 } else { 821 State = IES_ERROR; 822 } 823 break; 824 } 825 PrevState = CurrState; 826 return false; 827 } 828 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName, 829 const InlineAsmIdentifierInfo &IDInfo, 830 const AsmTypeInfo &Type, bool ParsingMSInlineAsm, 831 StringRef &ErrMsg) { 832 // InlineAsm: Treat an enum value as an integer 833 if (ParsingMSInlineAsm) 834 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 835 return onInteger(IDInfo.Enum.EnumVal, ErrMsg); 836 // Treat a symbolic constant like an integer 837 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef)) 838 return onInteger(CE->getValue(), ErrMsg); 839 PrevState = State; 840 switch (State) { 841 default: 842 State = IES_ERROR; 843 break; 844 case IES_CAST: 845 case IES_PLUS: 846 case IES_MINUS: 847 case IES_NOT: 848 case IES_INIT: 849 case IES_LBRAC: 850 case IES_LPAREN: 851 if (setSymRef(SymRef, SymRefName, ErrMsg)) 852 return true; 853 MemExpr = true; 854 State = IES_INTEGER; 855 IC.pushOperand(IC_IMM); 856 if (ParsingMSInlineAsm) 857 Info = IDInfo; 858 setTypeInfo(Type); 859 break; 860 } 861 return false; 862 } 863 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { 864 IntelExprState CurrState = State; 865 switch (State) { 866 default: 867 State = IES_ERROR; 868 break; 869 case IES_PLUS: 870 case IES_MINUS: 871 case IES_NOT: 872 case IES_OR: 873 case IES_XOR: 874 case IES_AND: 875 case IES_EQ: 876 case IES_NE: 877 case IES_LT: 878 case IES_LE: 879 case IES_GT: 880 case IES_GE: 881 case IES_LSHIFT: 882 case IES_RSHIFT: 883 case IES_DIVIDE: 884 case IES_MOD: 885 case IES_MULTIPLY: 886 case IES_LPAREN: 887 case IES_INIT: 888 case IES_LBRAC: 889 State = IES_INTEGER; 890 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 891 // Index Register - Register * Scale 892 if (IndexReg) 893 return regsUseUpError(ErrMsg); 894 IndexReg = TmpReg; 895 Scale = TmpInt; 896 if (checkScale(Scale, ErrMsg)) 897 return true; 898 // Get the scale and replace the 'Register * Scale' with '0'. 899 IC.popOperator(); 900 } else { 901 IC.pushOperand(IC_IMM, TmpInt); 902 } 903 break; 904 } 905 PrevState = CurrState; 906 return false; 907 } 908 void onStar() { 909 PrevState = State; 910 switch (State) { 911 default: 912 State = IES_ERROR; 913 break; 914 case IES_INTEGER: 915 case IES_REGISTER: 916 case IES_RPAREN: 917 State = IES_MULTIPLY; 918 IC.pushOperator(IC_MULTIPLY); 919 break; 920 } 921 } 922 void onDivide() { 923 PrevState = State; 924 switch (State) { 925 default: 926 State = IES_ERROR; 927 break; 928 case IES_INTEGER: 929 case IES_RPAREN: 930 State = IES_DIVIDE; 931 IC.pushOperator(IC_DIVIDE); 932 break; 933 } 934 } 935 void onMod() { 936 PrevState = State; 937 switch (State) { 938 default: 939 State = IES_ERROR; 940 break; 941 case IES_INTEGER: 942 case IES_RPAREN: 943 State = IES_MOD; 944 IC.pushOperator(IC_MOD); 945 break; 946 } 947 } 948 bool onLBrac() { 949 if (BracCount) 950 return true; 951 PrevState = State; 952 switch (State) { 953 default: 954 State = IES_ERROR; 955 break; 956 case IES_RBRAC: 957 case IES_INTEGER: 958 case IES_RPAREN: 959 State = IES_PLUS; 960 IC.pushOperator(IC_PLUS); 961 CurType.Length = 1; 962 CurType.Size = CurType.ElementSize; 963 break; 964 case IES_INIT: 965 case IES_CAST: 966 assert(!BracCount && "BracCount should be zero on parsing's start"); 967 State = IES_LBRAC; 968 break; 969 } 970 MemExpr = true; 971 BracketUsed = true; 972 BracCount++; 973 return false; 974 } 975 bool onRBrac(StringRef &ErrMsg) { 976 IntelExprState CurrState = State; 977 switch (State) { 978 default: 979 State = IES_ERROR; 980 break; 981 case IES_INTEGER: 982 case IES_OFFSET: 983 case IES_REGISTER: 984 case IES_RPAREN: 985 if (BracCount-- != 1) { 986 ErrMsg = "unexpected bracket encountered"; 987 return true; 988 } 989 State = IES_RBRAC; 990 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 991 // If we already have a BaseReg, then assume this is the IndexReg with 992 // no explicit scale. 993 if (!BaseReg) { 994 BaseReg = TmpReg; 995 } else { 996 if (IndexReg) 997 return regsUseUpError(ErrMsg); 998 IndexReg = TmpReg; 999 Scale = 0; 1000 } 1001 } 1002 break; 1003 } 1004 PrevState = CurrState; 1005 return false; 1006 } 1007 void onLParen() { 1008 IntelExprState CurrState = State; 1009 switch (State) { 1010 default: 1011 State = IES_ERROR; 1012 break; 1013 case IES_PLUS: 1014 case IES_MINUS: 1015 case IES_NOT: 1016 case IES_OR: 1017 case IES_XOR: 1018 case IES_AND: 1019 case IES_EQ: 1020 case IES_NE: 1021 case IES_LT: 1022 case IES_LE: 1023 case IES_GT: 1024 case IES_GE: 1025 case IES_LSHIFT: 1026 case IES_RSHIFT: 1027 case IES_MULTIPLY: 1028 case IES_DIVIDE: 1029 case IES_MOD: 1030 case IES_LPAREN: 1031 case IES_INIT: 1032 case IES_LBRAC: 1033 State = IES_LPAREN; 1034 IC.pushOperator(IC_LPAREN); 1035 break; 1036 } 1037 PrevState = CurrState; 1038 } 1039 void onRParen() { 1040 PrevState = State; 1041 switch (State) { 1042 default: 1043 State = IES_ERROR; 1044 break; 1045 case IES_INTEGER: 1046 case IES_OFFSET: 1047 case IES_REGISTER: 1048 case IES_RBRAC: 1049 case IES_RPAREN: 1050 State = IES_RPAREN; 1051 IC.pushOperator(IC_RPAREN); 1052 break; 1053 } 1054 } 1055 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID, 1056 const InlineAsmIdentifierInfo &IDInfo, 1057 bool ParsingMSInlineAsm, StringRef &ErrMsg) { 1058 PrevState = State; 1059 switch (State) { 1060 default: 1061 ErrMsg = "unexpected offset operator expression"; 1062 return true; 1063 case IES_PLUS: 1064 case IES_INIT: 1065 case IES_LBRAC: 1066 if (setSymRef(Val, ID, ErrMsg)) 1067 return true; 1068 OffsetOperator = true; 1069 OffsetOperatorLoc = OffsetLoc; 1070 State = IES_OFFSET; 1071 // As we cannot yet resolve the actual value (offset), we retain 1072 // the requested semantics by pushing a '0' to the operands stack 1073 IC.pushOperand(IC_IMM); 1074 if (ParsingMSInlineAsm) { 1075 Info = IDInfo; 1076 } 1077 break; 1078 } 1079 return false; 1080 } 1081 void onCast(AsmTypeInfo Info) { 1082 PrevState = State; 1083 switch (State) { 1084 default: 1085 State = IES_ERROR; 1086 break; 1087 case IES_LPAREN: 1088 setTypeInfo(Info); 1089 State = IES_CAST; 1090 break; 1091 } 1092 } 1093 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; } 1094 }; 1095 1096 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt, 1097 bool MatchingInlineAsm = false) { 1098 MCAsmParser &Parser = getParser(); 1099 if (MatchingInlineAsm) { 1100 if (!getLexer().isAtStartOfStatement()) 1101 Parser.eatToEndOfStatement(); 1102 return false; 1103 } 1104 return Parser.Error(L, Msg, Range); 1105 } 1106 1107 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc, 1108 SMLoc EndLoc); 1109 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1110 bool RestoreOnFailure); 1111 1112 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); 1113 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); 1114 bool IsSIReg(unsigned Reg); 1115 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg); 1116 void 1117 AddDefaultSrcDestOperands(OperandVector &Operands, 1118 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1119 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst); 1120 bool VerifyAndAdjustOperands(OperandVector &OrigOperands, 1121 OperandVector &FinalOperands); 1122 bool parseOperand(OperandVector &Operands, StringRef Name); 1123 bool parseATTOperand(OperandVector &Operands); 1124 bool parseIntelOperand(OperandVector &Operands, StringRef Name); 1125 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, 1126 InlineAsmIdentifierInfo &Info, SMLoc &End); 1127 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); 1128 unsigned IdentifyIntelInlineAsmOperator(StringRef Name); 1129 unsigned ParseIntelInlineAsmOperator(unsigned OpKind); 1130 unsigned IdentifyMasmOperator(StringRef Name); 1131 bool ParseMasmOperator(unsigned OpKind, int64_t &Val); 1132 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands); 1133 bool parseCFlagsOp(OperandVector &Operands); 1134 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM, 1135 bool &ParseError, SMLoc &End); 1136 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM, 1137 bool &ParseError, SMLoc &End); 1138 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start, 1139 SMLoc End); 1140 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 1141 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier, 1142 InlineAsmIdentifierInfo &Info, 1143 bool IsUnevaluatedOperand, SMLoc &End, 1144 bool IsParsingOffsetOperator = false); 1145 void tryParseOperandIdx(AsmToken::TokenKind PrevTK, 1146 IntelExprStateMachine &SM); 1147 1148 bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc, 1149 SMLoc EndLoc, OperandVector &Operands); 1150 1151 X86::CondCode ParseConditionCode(StringRef CCode); 1152 1153 bool ParseIntelMemoryOperandSize(unsigned &Size); 1154 bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, 1155 unsigned BaseReg, unsigned IndexReg, 1156 unsigned Scale, bool NonAbsMem, SMLoc Start, 1157 SMLoc End, unsigned Size, StringRef Identifier, 1158 const InlineAsmIdentifierInfo &Info, 1159 OperandVector &Operands); 1160 1161 bool parseDirectiveArch(); 1162 bool parseDirectiveNops(SMLoc L); 1163 bool parseDirectiveEven(SMLoc L); 1164 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 1165 1166 /// CodeView FPO data directives. 1167 bool parseDirectiveFPOProc(SMLoc L); 1168 bool parseDirectiveFPOSetFrame(SMLoc L); 1169 bool parseDirectiveFPOPushReg(SMLoc L); 1170 bool parseDirectiveFPOStackAlloc(SMLoc L); 1171 bool parseDirectiveFPOStackAlign(SMLoc L); 1172 bool parseDirectiveFPOEndPrologue(SMLoc L); 1173 bool parseDirectiveFPOEndProc(SMLoc L); 1174 1175 /// SEH directives. 1176 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo); 1177 bool parseDirectiveSEHPushReg(SMLoc); 1178 bool parseDirectiveSEHSetFrame(SMLoc); 1179 bool parseDirectiveSEHSaveReg(SMLoc); 1180 bool parseDirectiveSEHSaveXMM(SMLoc); 1181 bool parseDirectiveSEHPushFrame(SMLoc); 1182 1183 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1184 1185 bool validateInstruction(MCInst &Inst, const OperandVector &Ops); 1186 bool processInstruction(MCInst &Inst, const OperandVector &Ops); 1187 1188 // Load Value Injection (LVI) Mitigations for machine code 1189 void emitWarningForSpecialLVIInstruction(SMLoc Loc); 1190 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out); 1191 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out); 1192 1193 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds 1194 /// instrumentation around Inst. 1195 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); 1196 1197 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1198 OperandVector &Operands, MCStreamer &Out, 1199 uint64_t &ErrorInfo, 1200 bool MatchingInlineAsm) override; 1201 1202 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands, 1203 MCStreamer &Out, bool MatchingInlineAsm); 1204 1205 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures, 1206 bool MatchingInlineAsm); 1207 1208 bool matchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, 1209 OperandVector &Operands, MCStreamer &Out, 1210 uint64_t &ErrorInfo, bool MatchingInlineAsm); 1211 1212 bool matchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, 1213 OperandVector &Operands, MCStreamer &Out, 1214 uint64_t &ErrorInfo, 1215 bool MatchingInlineAsm); 1216 1217 bool OmitRegisterFromClobberLists(unsigned RegNo) override; 1218 1219 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) 1220 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. 1221 /// return false if no parsing errors occurred, true otherwise. 1222 bool HandleAVX512Operand(OperandVector &Operands); 1223 1224 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc); 1225 1226 bool is64BitMode() const { 1227 // FIXME: Can tablegen auto-generate this? 1228 return getSTI().hasFeature(X86::Is64Bit); 1229 } 1230 bool is32BitMode() const { 1231 // FIXME: Can tablegen auto-generate this? 1232 return getSTI().hasFeature(X86::Is32Bit); 1233 } 1234 bool is16BitMode() const { 1235 // FIXME: Can tablegen auto-generate this? 1236 return getSTI().hasFeature(X86::Is16Bit); 1237 } 1238 void SwitchMode(unsigned mode) { 1239 MCSubtargetInfo &STI = copySTI(); 1240 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit}); 1241 FeatureBitset OldMode = STI.getFeatureBits() & AllModes; 1242 FeatureBitset FB = ComputeAvailableFeatures( 1243 STI.ToggleFeature(OldMode.flip(mode))); 1244 setAvailableFeatures(FB); 1245 1246 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes)); 1247 } 1248 1249 unsigned getPointerWidth() { 1250 if (is16BitMode()) return 16; 1251 if (is32BitMode()) return 32; 1252 if (is64BitMode()) return 64; 1253 llvm_unreachable("invalid mode"); 1254 } 1255 1256 bool isParsingIntelSyntax() { 1257 return getParser().getAssemblerDialect(); 1258 } 1259 1260 /// @name Auto-generated Matcher Functions 1261 /// { 1262 1263 #define GET_ASSEMBLER_HEADER 1264 #include "X86GenAsmMatcher.inc" 1265 1266 /// } 1267 1268 public: 1269 enum X86MatchResultTy { 1270 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY, 1271 #define GET_OPERAND_DIAGNOSTIC_TYPES 1272 #include "X86GenAsmMatcher.inc" 1273 }; 1274 1275 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, 1276 const MCInstrInfo &mii, const MCTargetOptions &Options) 1277 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr), 1278 Code16GCC(false) { 1279 1280 Parser.addAliasForDirective(".word", ".2byte"); 1281 1282 // Initialize the set of available features. 1283 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); 1284 } 1285 1286 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; 1287 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1288 SMLoc &EndLoc) override; 1289 1290 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; 1291 1292 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1293 SMLoc NameLoc, OperandVector &Operands) override; 1294 1295 bool ParseDirective(AsmToken DirectiveID) override; 1296 }; 1297 } // end anonymous namespace 1298 1299 #define GET_REGISTER_MATCHER 1300 #define GET_SUBTARGET_FEATURE_NAME 1301 #include "X86GenAsmMatcher.inc" 1302 1303 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, 1304 unsigned Scale, bool Is64BitMode, 1305 StringRef &ErrMsg) { 1306 // If we have both a base register and an index register make sure they are 1307 // both 64-bit or 32-bit registers. 1308 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 1309 1310 if (BaseReg != 0 && 1311 !(BaseReg == X86::RIP || BaseReg == X86::EIP || 1312 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) || 1313 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) || 1314 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) { 1315 ErrMsg = "invalid base+index expression"; 1316 return true; 1317 } 1318 1319 if (IndexReg != 0 && 1320 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ || 1321 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1322 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1323 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1324 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 1325 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 1326 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) { 1327 ErrMsg = "invalid base+index expression"; 1328 return true; 1329 } 1330 1331 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) || 1332 IndexReg == X86::EIP || IndexReg == X86::RIP || 1333 IndexReg == X86::ESP || IndexReg == X86::RSP) { 1334 ErrMsg = "invalid base+index expression"; 1335 return true; 1336 } 1337 1338 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, 1339 // and then only in non-64-bit modes. 1340 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 1341 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP && 1342 BaseReg != X86::SI && BaseReg != X86::DI))) { 1343 ErrMsg = "invalid 16-bit base register"; 1344 return true; 1345 } 1346 1347 if (BaseReg == 0 && 1348 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { 1349 ErrMsg = "16-bit memory operand may not include only index register"; 1350 return true; 1351 } 1352 1353 if (BaseReg != 0 && IndexReg != 0) { 1354 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 1355 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1356 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1357 IndexReg == X86::EIZ)) { 1358 ErrMsg = "base register is 64-bit, but index register is not"; 1359 return true; 1360 } 1361 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 1362 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1363 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1364 IndexReg == X86::RIZ)) { 1365 ErrMsg = "base register is 32-bit, but index register is not"; 1366 return true; 1367 } 1368 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { 1369 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1370 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { 1371 ErrMsg = "base register is 16-bit, but index register is not"; 1372 return true; 1373 } 1374 if ((BaseReg != X86::BX && BaseReg != X86::BP) || 1375 (IndexReg != X86::SI && IndexReg != X86::DI)) { 1376 ErrMsg = "invalid 16-bit base/index register combination"; 1377 return true; 1378 } 1379 } 1380 } 1381 1382 // RIP/EIP-relative addressing is only supported in 64-bit mode. 1383 if (!Is64BitMode && BaseReg != 0 && 1384 (BaseReg == X86::RIP || BaseReg == X86::EIP)) { 1385 ErrMsg = "IP-relative addressing requires 64-bit mode"; 1386 return true; 1387 } 1388 1389 return checkScale(Scale, ErrMsg); 1390 } 1391 1392 bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName, 1393 SMLoc StartLoc, SMLoc EndLoc) { 1394 // If we encounter a %, ignore it. This code handles registers with and 1395 // without the prefix, unprefixed registers can occur in cfi directives. 1396 RegName.consume_front("%"); 1397 1398 RegNo = MatchRegisterName(RegName); 1399 1400 // If the match failed, try the register name as lowercase. 1401 if (RegNo == 0) 1402 RegNo = MatchRegisterName(RegName.lower()); 1403 1404 // The "flags" and "mxcsr" registers cannot be referenced directly. 1405 // Treat it as an identifier instead. 1406 if (isParsingMSInlineAsm() && isParsingIntelSyntax() && 1407 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR)) 1408 RegNo = 0; 1409 1410 if (!is64BitMode()) { 1411 // FIXME: This should be done using Requires<Not64BitMode> and 1412 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 1413 // checked. 1414 if (RegNo == X86::RIZ || RegNo == X86::RIP || 1415 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 1416 X86II::isX86_64NonExtLowByteReg(RegNo) || 1417 X86II::isX86_64ExtendedReg(RegNo)) { 1418 return Error(StartLoc, 1419 "register %" + RegName + " is only available in 64-bit mode", 1420 SMRange(StartLoc, EndLoc)); 1421 } 1422 } 1423 1424 if (X86II::isApxExtendedReg(RegNo)) 1425 UseApxExtendedReg = true; 1426 1427 // If this is "db[0-15]", match it as an alias 1428 // for dr[0-15]. 1429 if (RegNo == 0 && RegName.starts_with("db")) { 1430 if (RegName.size() == 3) { 1431 switch (RegName[2]) { 1432 case '0': 1433 RegNo = X86::DR0; 1434 break; 1435 case '1': 1436 RegNo = X86::DR1; 1437 break; 1438 case '2': 1439 RegNo = X86::DR2; 1440 break; 1441 case '3': 1442 RegNo = X86::DR3; 1443 break; 1444 case '4': 1445 RegNo = X86::DR4; 1446 break; 1447 case '5': 1448 RegNo = X86::DR5; 1449 break; 1450 case '6': 1451 RegNo = X86::DR6; 1452 break; 1453 case '7': 1454 RegNo = X86::DR7; 1455 break; 1456 case '8': 1457 RegNo = X86::DR8; 1458 break; 1459 case '9': 1460 RegNo = X86::DR9; 1461 break; 1462 } 1463 } else if (RegName.size() == 4 && RegName[2] == '1') { 1464 switch (RegName[3]) { 1465 case '0': 1466 RegNo = X86::DR10; 1467 break; 1468 case '1': 1469 RegNo = X86::DR11; 1470 break; 1471 case '2': 1472 RegNo = X86::DR12; 1473 break; 1474 case '3': 1475 RegNo = X86::DR13; 1476 break; 1477 case '4': 1478 RegNo = X86::DR14; 1479 break; 1480 case '5': 1481 RegNo = X86::DR15; 1482 break; 1483 } 1484 } 1485 } 1486 1487 if (RegNo == 0) { 1488 if (isParsingIntelSyntax()) 1489 return true; 1490 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc)); 1491 } 1492 return false; 1493 } 1494 1495 bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 1496 SMLoc &EndLoc, bool RestoreOnFailure) { 1497 MCAsmParser &Parser = getParser(); 1498 MCAsmLexer &Lexer = getLexer(); 1499 RegNo = 0; 1500 1501 SmallVector<AsmToken, 5> Tokens; 1502 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() { 1503 if (RestoreOnFailure) { 1504 while (!Tokens.empty()) { 1505 Lexer.UnLex(Tokens.pop_back_val()); 1506 } 1507 } 1508 }; 1509 1510 const AsmToken &PercentTok = Parser.getTok(); 1511 StartLoc = PercentTok.getLoc(); 1512 1513 // If we encounter a %, ignore it. This code handles registers with and 1514 // without the prefix, unprefixed registers can occur in cfi directives. 1515 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) { 1516 Tokens.push_back(PercentTok); 1517 Parser.Lex(); // Eat percent token. 1518 } 1519 1520 const AsmToken &Tok = Parser.getTok(); 1521 EndLoc = Tok.getEndLoc(); 1522 1523 if (Tok.isNot(AsmToken::Identifier)) { 1524 OnFailure(); 1525 if (isParsingIntelSyntax()) return true; 1526 return Error(StartLoc, "invalid register name", 1527 SMRange(StartLoc, EndLoc)); 1528 } 1529 1530 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) { 1531 OnFailure(); 1532 return true; 1533 } 1534 1535 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 1536 if (RegNo == X86::ST0) { 1537 Tokens.push_back(Tok); 1538 Parser.Lex(); // Eat 'st' 1539 1540 // Check to see if we have '(4)' after %st. 1541 if (Lexer.isNot(AsmToken::LParen)) 1542 return false; 1543 // Lex the paren. 1544 Tokens.push_back(Parser.getTok()); 1545 Parser.Lex(); 1546 1547 const AsmToken &IntTok = Parser.getTok(); 1548 if (IntTok.isNot(AsmToken::Integer)) { 1549 OnFailure(); 1550 return Error(IntTok.getLoc(), "expected stack index"); 1551 } 1552 switch (IntTok.getIntVal()) { 1553 case 0: RegNo = X86::ST0; break; 1554 case 1: RegNo = X86::ST1; break; 1555 case 2: RegNo = X86::ST2; break; 1556 case 3: RegNo = X86::ST3; break; 1557 case 4: RegNo = X86::ST4; break; 1558 case 5: RegNo = X86::ST5; break; 1559 case 6: RegNo = X86::ST6; break; 1560 case 7: RegNo = X86::ST7; break; 1561 default: 1562 OnFailure(); 1563 return Error(IntTok.getLoc(), "invalid stack index"); 1564 } 1565 1566 // Lex IntTok 1567 Tokens.push_back(IntTok); 1568 Parser.Lex(); 1569 if (Lexer.isNot(AsmToken::RParen)) { 1570 OnFailure(); 1571 return Error(Parser.getTok().getLoc(), "expected ')'"); 1572 } 1573 1574 EndLoc = Parser.getTok().getEndLoc(); 1575 Parser.Lex(); // Eat ')' 1576 return false; 1577 } 1578 1579 EndLoc = Parser.getTok().getEndLoc(); 1580 1581 if (RegNo == 0) { 1582 OnFailure(); 1583 if (isParsingIntelSyntax()) return true; 1584 return Error(StartLoc, "invalid register name", 1585 SMRange(StartLoc, EndLoc)); 1586 } 1587 1588 Parser.Lex(); // Eat identifier token. 1589 return false; 1590 } 1591 1592 bool X86AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, 1593 SMLoc &EndLoc) { 1594 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 1595 } 1596 1597 ParseStatus X86AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1598 SMLoc &EndLoc) { 1599 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 1600 bool PendingErrors = getParser().hasPendingError(); 1601 getParser().clearPendingErrors(); 1602 if (PendingErrors) 1603 return ParseStatus::Failure; 1604 if (Result) 1605 return ParseStatus::NoMatch; 1606 return ParseStatus::Success; 1607 } 1608 1609 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { 1610 bool Parse32 = is32BitMode() || Code16GCC; 1611 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI); 1612 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1613 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1614 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1615 Loc, Loc, 0); 1616 } 1617 1618 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { 1619 bool Parse32 = is32BitMode() || Code16GCC; 1620 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI); 1621 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1622 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1623 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1624 Loc, Loc, 0); 1625 } 1626 1627 bool X86AsmParser::IsSIReg(unsigned Reg) { 1628 switch (Reg) { 1629 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!"); 1630 case X86::RSI: 1631 case X86::ESI: 1632 case X86::SI: 1633 return true; 1634 case X86::RDI: 1635 case X86::EDI: 1636 case X86::DI: 1637 return false; 1638 } 1639 } 1640 1641 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, 1642 bool IsSIReg) { 1643 switch (RegClassID) { 1644 default: llvm_unreachable("Unexpected register class"); 1645 case X86::GR64RegClassID: 1646 return IsSIReg ? X86::RSI : X86::RDI; 1647 case X86::GR32RegClassID: 1648 return IsSIReg ? X86::ESI : X86::EDI; 1649 case X86::GR16RegClassID: 1650 return IsSIReg ? X86::SI : X86::DI; 1651 } 1652 } 1653 1654 void X86AsmParser::AddDefaultSrcDestOperands( 1655 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1656 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) { 1657 if (isParsingIntelSyntax()) { 1658 Operands.push_back(std::move(Dst)); 1659 Operands.push_back(std::move(Src)); 1660 } 1661 else { 1662 Operands.push_back(std::move(Src)); 1663 Operands.push_back(std::move(Dst)); 1664 } 1665 } 1666 1667 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, 1668 OperandVector &FinalOperands) { 1669 1670 if (OrigOperands.size() > 1) { 1671 // Check if sizes match, OrigOperands also contains the instruction name 1672 assert(OrigOperands.size() == FinalOperands.size() + 1 && 1673 "Operand size mismatch"); 1674 1675 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings; 1676 // Verify types match 1677 int RegClassID = -1; 1678 for (unsigned int i = 0; i < FinalOperands.size(); ++i) { 1679 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]); 1680 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]); 1681 1682 if (FinalOp.isReg() && 1683 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg())) 1684 // Return false and let a normal complaint about bogus operands happen 1685 return false; 1686 1687 if (FinalOp.isMem()) { 1688 1689 if (!OrigOp.isMem()) 1690 // Return false and let a normal complaint about bogus operands happen 1691 return false; 1692 1693 unsigned OrigReg = OrigOp.Mem.BaseReg; 1694 unsigned FinalReg = FinalOp.Mem.BaseReg; 1695 1696 // If we've already encounterd a register class, make sure all register 1697 // bases are of the same register class 1698 if (RegClassID != -1 && 1699 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) { 1700 return Error(OrigOp.getStartLoc(), 1701 "mismatching source and destination index registers"); 1702 } 1703 1704 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg)) 1705 RegClassID = X86::GR64RegClassID; 1706 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg)) 1707 RegClassID = X86::GR32RegClassID; 1708 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg)) 1709 RegClassID = X86::GR16RegClassID; 1710 else 1711 // Unexpected register class type 1712 // Return false and let a normal complaint about bogus operands happen 1713 return false; 1714 1715 bool IsSI = IsSIReg(FinalReg); 1716 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI); 1717 1718 if (FinalReg != OrigReg) { 1719 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI"; 1720 Warnings.push_back(std::make_pair( 1721 OrigOp.getStartLoc(), 1722 "memory operand is only for determining the size, " + RegName + 1723 " will be used for the location")); 1724 } 1725 1726 FinalOp.Mem.Size = OrigOp.Mem.Size; 1727 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg; 1728 FinalOp.Mem.BaseReg = FinalReg; 1729 } 1730 } 1731 1732 // Produce warnings only if all the operands passed the adjustment - prevent 1733 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings 1734 for (auto &WarningMsg : Warnings) { 1735 Warning(WarningMsg.first, WarningMsg.second); 1736 } 1737 1738 // Remove old operands 1739 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1740 OrigOperands.pop_back(); 1741 } 1742 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end()); 1743 for (auto &Op : FinalOperands) 1744 OrigOperands.push_back(std::move(Op)); 1745 1746 return false; 1747 } 1748 1749 bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) { 1750 if (isParsingIntelSyntax()) 1751 return parseIntelOperand(Operands, Name); 1752 1753 return parseATTOperand(Operands); 1754 } 1755 1756 bool X86AsmParser::CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, 1757 unsigned BaseReg, unsigned IndexReg, 1758 unsigned Scale, bool NonAbsMem, 1759 SMLoc Start, SMLoc End, 1760 unsigned Size, StringRef Identifier, 1761 const InlineAsmIdentifierInfo &Info, 1762 OperandVector &Operands) { 1763 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or 1764 // some other label reference. 1765 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) { 1766 // Create an absolute memory reference in order to match against 1767 // instructions taking a PC relative operand. 1768 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, 1769 End, Size, Identifier, 1770 Info.Label.Decl)); 1771 return false; 1772 } 1773 // We either have a direct symbol reference, or an offset from a symbol. The 1774 // parser always puts the symbol on the LHS, so look there for size 1775 // calculation purposes. 1776 unsigned FrontendSize = 0; 1777 void *Decl = nullptr; 1778 bool IsGlobalLV = false; 1779 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 1780 // Size is in terms of bits in this context. 1781 FrontendSize = Info.Var.Type * 8; 1782 Decl = Info.Var.Decl; 1783 IsGlobalLV = Info.Var.IsGlobalLV; 1784 } 1785 // It is widely common for MS InlineAsm to use a global variable and one/two 1786 // registers in a mmory expression, and though unaccessible via rip/eip. 1787 if (IsGlobalLV) { 1788 if (BaseReg || IndexReg) { 1789 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, 1790 End, Size, Identifier, Decl, 0, 1791 BaseReg && IndexReg)); 1792 return false; 1793 } 1794 if (NonAbsMem) 1795 BaseReg = 1; // Make isAbsMem() false 1796 } 1797 Operands.push_back(X86Operand::CreateMem( 1798 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End, 1799 Size, 1800 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize)); 1801 return false; 1802 } 1803 1804 // Some binary bitwise operators have a named synonymous 1805 // Query a candidate string for being such a named operator 1806 // and if so - invoke the appropriate handler 1807 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, 1808 IntelExprStateMachine &SM, 1809 bool &ParseError, SMLoc &End) { 1810 // A named operator should be either lower or upper case, but not a mix... 1811 // except in MASM, which uses full case-insensitivity. 1812 if (Name != Name.lower() && Name != Name.upper() && 1813 !getParser().isParsingMasm()) 1814 return false; 1815 if (Name.equals_insensitive("not")) { 1816 SM.onNot(); 1817 } else if (Name.equals_insensitive("or")) { 1818 SM.onOr(); 1819 } else if (Name.equals_insensitive("shl")) { 1820 SM.onLShift(); 1821 } else if (Name.equals_insensitive("shr")) { 1822 SM.onRShift(); 1823 } else if (Name.equals_insensitive("xor")) { 1824 SM.onXor(); 1825 } else if (Name.equals_insensitive("and")) { 1826 SM.onAnd(); 1827 } else if (Name.equals_insensitive("mod")) { 1828 SM.onMod(); 1829 } else if (Name.equals_insensitive("offset")) { 1830 SMLoc OffsetLoc = getTok().getLoc(); 1831 const MCExpr *Val = nullptr; 1832 StringRef ID; 1833 InlineAsmIdentifierInfo Info; 1834 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End); 1835 if (ParseError) 1836 return true; 1837 StringRef ErrMsg; 1838 ParseError = 1839 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg); 1840 if (ParseError) 1841 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg); 1842 } else { 1843 return false; 1844 } 1845 if (!Name.equals_insensitive("offset")) 1846 End = consumeToken(); 1847 return true; 1848 } 1849 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name, 1850 IntelExprStateMachine &SM, 1851 bool &ParseError, SMLoc &End) { 1852 if (Name.equals_insensitive("eq")) { 1853 SM.onEq(); 1854 } else if (Name.equals_insensitive("ne")) { 1855 SM.onNE(); 1856 } else if (Name.equals_insensitive("lt")) { 1857 SM.onLT(); 1858 } else if (Name.equals_insensitive("le")) { 1859 SM.onLE(); 1860 } else if (Name.equals_insensitive("gt")) { 1861 SM.onGT(); 1862 } else if (Name.equals_insensitive("ge")) { 1863 SM.onGE(); 1864 } else { 1865 return false; 1866 } 1867 End = consumeToken(); 1868 return true; 1869 } 1870 1871 // Check if current intel expression append after an operand. 1872 // Like: [Operand][Intel Expression] 1873 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK, 1874 IntelExprStateMachine &SM) { 1875 if (PrevTK != AsmToken::RBrac) 1876 return; 1877 1878 SM.setAppendAfterOperand(); 1879 } 1880 1881 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1882 MCAsmParser &Parser = getParser(); 1883 StringRef ErrMsg; 1884 1885 AsmToken::TokenKind PrevTK = AsmToken::Error; 1886 1887 if (getContext().getObjectFileInfo()->isPositionIndependent()) 1888 SM.setPIC(); 1889 1890 bool Done = false; 1891 while (!Done) { 1892 // Get a fresh reference on each loop iteration in case the previous 1893 // iteration moved the token storage during UnLex(). 1894 const AsmToken &Tok = Parser.getTok(); 1895 1896 bool UpdateLocLex = true; 1897 AsmToken::TokenKind TK = getLexer().getKind(); 1898 1899 switch (TK) { 1900 default: 1901 if ((Done = SM.isValidEndState())) 1902 break; 1903 return Error(Tok.getLoc(), "unknown token in expression"); 1904 case AsmToken::Error: 1905 return Error(getLexer().getErrLoc(), getLexer().getErr()); 1906 break; 1907 case AsmToken::Real: 1908 // DotOperator: [ebx].0 1909 UpdateLocLex = false; 1910 if (ParseIntelDotOperator(SM, End)) 1911 return true; 1912 break; 1913 case AsmToken::Dot: 1914 if (!Parser.isParsingMasm()) { 1915 if ((Done = SM.isValidEndState())) 1916 break; 1917 return Error(Tok.getLoc(), "unknown token in expression"); 1918 } 1919 // MASM allows spaces around the dot operator (e.g., "var . x") 1920 Lex(); 1921 UpdateLocLex = false; 1922 if (ParseIntelDotOperator(SM, End)) 1923 return true; 1924 break; 1925 case AsmToken::Dollar: 1926 if (!Parser.isParsingMasm()) { 1927 if ((Done = SM.isValidEndState())) 1928 break; 1929 return Error(Tok.getLoc(), "unknown token in expression"); 1930 } 1931 [[fallthrough]]; 1932 case AsmToken::String: { 1933 if (Parser.isParsingMasm()) { 1934 // MASM parsers handle strings in expressions as constants. 1935 SMLoc ValueLoc = Tok.getLoc(); 1936 int64_t Res; 1937 const MCExpr *Val; 1938 if (Parser.parsePrimaryExpr(Val, End, nullptr)) 1939 return true; 1940 UpdateLocLex = false; 1941 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr())) 1942 return Error(ValueLoc, "expected absolute value"); 1943 if (SM.onInteger(Res, ErrMsg)) 1944 return Error(ValueLoc, ErrMsg); 1945 break; 1946 } 1947 [[fallthrough]]; 1948 } 1949 case AsmToken::At: 1950 case AsmToken::Identifier: { 1951 SMLoc IdentLoc = Tok.getLoc(); 1952 StringRef Identifier = Tok.getString(); 1953 UpdateLocLex = false; 1954 if (Parser.isParsingMasm()) { 1955 size_t DotOffset = Identifier.find_first_of('.'); 1956 if (DotOffset != StringRef::npos) { 1957 consumeToken(); 1958 StringRef LHS = Identifier.slice(0, DotOffset); 1959 StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1); 1960 StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos); 1961 if (!RHS.empty()) { 1962 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS)); 1963 } 1964 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot)); 1965 if (!LHS.empty()) { 1966 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS)); 1967 } 1968 break; 1969 } 1970 } 1971 // (MASM only) <TYPE> PTR operator 1972 if (Parser.isParsingMasm()) { 1973 const AsmToken &NextTok = getLexer().peekTok(); 1974 if (NextTok.is(AsmToken::Identifier) && 1975 NextTok.getIdentifier().equals_insensitive("ptr")) { 1976 AsmTypeInfo Info; 1977 if (Parser.lookUpType(Identifier, Info)) 1978 return Error(Tok.getLoc(), "unknown type"); 1979 SM.onCast(Info); 1980 // Eat type and PTR. 1981 consumeToken(); 1982 End = consumeToken(); 1983 break; 1984 } 1985 } 1986 // Register, or (MASM only) <register>.<field> 1987 MCRegister Reg; 1988 if (Tok.is(AsmToken::Identifier)) { 1989 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) { 1990 if (SM.onRegister(Reg, ErrMsg)) 1991 return Error(IdentLoc, ErrMsg); 1992 break; 1993 } 1994 if (Parser.isParsingMasm()) { 1995 const std::pair<StringRef, StringRef> IDField = 1996 Tok.getString().split('.'); 1997 const StringRef ID = IDField.first, Field = IDField.second; 1998 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size()); 1999 if (!Field.empty() && 2000 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) { 2001 if (SM.onRegister(Reg, ErrMsg)) 2002 return Error(IdentLoc, ErrMsg); 2003 2004 AsmFieldInfo Info; 2005 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data()); 2006 if (Parser.lookUpField(Field, Info)) 2007 return Error(FieldStartLoc, "unknown offset"); 2008 else if (SM.onPlus(ErrMsg)) 2009 return Error(getTok().getLoc(), ErrMsg); 2010 else if (SM.onInteger(Info.Offset, ErrMsg)) 2011 return Error(IdentLoc, ErrMsg); 2012 SM.setTypeInfo(Info.Type); 2013 2014 End = consumeToken(); 2015 break; 2016 } 2017 } 2018 } 2019 // Operator synonymous ("not", "or" etc.) 2020 bool ParseError = false; 2021 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) { 2022 if (ParseError) 2023 return true; 2024 break; 2025 } 2026 if (Parser.isParsingMasm() && 2027 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) { 2028 if (ParseError) 2029 return true; 2030 break; 2031 } 2032 // Symbol reference, when parsing assembly content 2033 InlineAsmIdentifierInfo Info; 2034 AsmFieldInfo FieldInfo; 2035 const MCExpr *Val; 2036 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) { 2037 // MS Dot Operator expression 2038 if (Identifier.count('.') && 2039 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) { 2040 if (ParseIntelDotOperator(SM, End)) 2041 return true; 2042 break; 2043 } 2044 } 2045 if (isParsingMSInlineAsm()) { 2046 // MS InlineAsm operators (TYPE/LENGTH/SIZE) 2047 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { 2048 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { 2049 if (SM.onInteger(Val, ErrMsg)) 2050 return Error(IdentLoc, ErrMsg); 2051 } else { 2052 return true; 2053 } 2054 break; 2055 } 2056 // MS InlineAsm identifier 2057 // Call parseIdentifier() to combine @ with the identifier behind it. 2058 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier)) 2059 return Error(IdentLoc, "expected identifier"); 2060 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) 2061 return true; 2062 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type, 2063 true, ErrMsg)) 2064 return Error(IdentLoc, ErrMsg); 2065 break; 2066 } 2067 if (Parser.isParsingMasm()) { 2068 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) { 2069 int64_t Val; 2070 if (ParseMasmOperator(OpKind, Val)) 2071 return true; 2072 if (SM.onInteger(Val, ErrMsg)) 2073 return Error(IdentLoc, ErrMsg); 2074 break; 2075 } 2076 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) { 2077 // Field offset immediate; <TYPE>.<field specification> 2078 Lex(); // eat type 2079 bool EndDot = parseOptionalToken(AsmToken::Dot); 2080 while (EndDot || (getTok().is(AsmToken::Identifier) && 2081 getTok().getString().starts_with("."))) { 2082 getParser().parseIdentifier(Identifier); 2083 if (!EndDot) 2084 Identifier.consume_front("."); 2085 EndDot = Identifier.consume_back("."); 2086 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier, 2087 FieldInfo)) { 2088 SMLoc IDEnd = 2089 SMLoc::getFromPointer(Identifier.data() + Identifier.size()); 2090 return Error(IdentLoc, "Unable to lookup field reference!", 2091 SMRange(IdentLoc, IDEnd)); 2092 } 2093 if (!EndDot) 2094 EndDot = parseOptionalToken(AsmToken::Dot); 2095 } 2096 if (SM.onInteger(FieldInfo.Offset, ErrMsg)) 2097 return Error(IdentLoc, ErrMsg); 2098 break; 2099 } 2100 } 2101 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) { 2102 return Error(Tok.getLoc(), "Unexpected identifier!"); 2103 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type, 2104 false, ErrMsg)) { 2105 return Error(IdentLoc, ErrMsg); 2106 } 2107 break; 2108 } 2109 case AsmToken::Integer: { 2110 // Look for 'b' or 'f' following an Integer as a directional label 2111 SMLoc Loc = getTok().getLoc(); 2112 int64_t IntVal = getTok().getIntVal(); 2113 End = consumeToken(); 2114 UpdateLocLex = false; 2115 if (getLexer().getKind() == AsmToken::Identifier) { 2116 StringRef IDVal = getTok().getString(); 2117 if (IDVal == "f" || IDVal == "b") { 2118 MCSymbol *Sym = 2119 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); 2120 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 2121 const MCExpr *Val = 2122 MCSymbolRefExpr::create(Sym, Variant, getContext()); 2123 if (IDVal == "b" && Sym->isUndefined()) 2124 return Error(Loc, "invalid reference to undefined symbol"); 2125 StringRef Identifier = Sym->getName(); 2126 InlineAsmIdentifierInfo Info; 2127 AsmTypeInfo Type; 2128 if (SM.onIdentifierExpr(Val, Identifier, Info, Type, 2129 isParsingMSInlineAsm(), ErrMsg)) 2130 return Error(Loc, ErrMsg); 2131 End = consumeToken(); 2132 } else { 2133 if (SM.onInteger(IntVal, ErrMsg)) 2134 return Error(Loc, ErrMsg); 2135 } 2136 } else { 2137 if (SM.onInteger(IntVal, ErrMsg)) 2138 return Error(Loc, ErrMsg); 2139 } 2140 break; 2141 } 2142 case AsmToken::Plus: 2143 if (SM.onPlus(ErrMsg)) 2144 return Error(getTok().getLoc(), ErrMsg); 2145 break; 2146 case AsmToken::Minus: 2147 if (SM.onMinus(ErrMsg)) 2148 return Error(getTok().getLoc(), ErrMsg); 2149 break; 2150 case AsmToken::Tilde: SM.onNot(); break; 2151 case AsmToken::Star: SM.onStar(); break; 2152 case AsmToken::Slash: SM.onDivide(); break; 2153 case AsmToken::Percent: SM.onMod(); break; 2154 case AsmToken::Pipe: SM.onOr(); break; 2155 case AsmToken::Caret: SM.onXor(); break; 2156 case AsmToken::Amp: SM.onAnd(); break; 2157 case AsmToken::LessLess: 2158 SM.onLShift(); break; 2159 case AsmToken::GreaterGreater: 2160 SM.onRShift(); break; 2161 case AsmToken::LBrac: 2162 if (SM.onLBrac()) 2163 return Error(Tok.getLoc(), "unexpected bracket encountered"); 2164 tryParseOperandIdx(PrevTK, SM); 2165 break; 2166 case AsmToken::RBrac: 2167 if (SM.onRBrac(ErrMsg)) { 2168 return Error(Tok.getLoc(), ErrMsg); 2169 } 2170 break; 2171 case AsmToken::LParen: SM.onLParen(); break; 2172 case AsmToken::RParen: SM.onRParen(); break; 2173 } 2174 if (SM.hadError()) 2175 return Error(Tok.getLoc(), "unknown token in expression"); 2176 2177 if (!Done && UpdateLocLex) 2178 End = consumeToken(); 2179 2180 PrevTK = TK; 2181 } 2182 return false; 2183 } 2184 2185 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM, 2186 SMLoc Start, SMLoc End) { 2187 SMLoc Loc = Start; 2188 unsigned ExprLen = End.getPointer() - Start.getPointer(); 2189 // Skip everything before a symbol displacement (if we have one) 2190 if (SM.getSym() && !SM.isOffsetOperator()) { 2191 StringRef SymName = SM.getSymName(); 2192 if (unsigned Len = SymName.data() - Start.getPointer()) 2193 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len); 2194 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size()); 2195 ExprLen = End.getPointer() - (SymName.data() + SymName.size()); 2196 // If we have only a symbol than there's no need for complex rewrite, 2197 // simply skip everything after it 2198 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) { 2199 if (ExprLen) 2200 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen); 2201 return; 2202 } 2203 } 2204 // Build an Intel Expression rewrite 2205 StringRef BaseRegStr; 2206 StringRef IndexRegStr; 2207 StringRef OffsetNameStr; 2208 if (SM.getBaseReg()) 2209 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg()); 2210 if (SM.getIndexReg()) 2211 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg()); 2212 if (SM.isOffsetOperator()) 2213 OffsetNameStr = SM.getSymName(); 2214 // Emit it 2215 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr, 2216 SM.getImm(), SM.isMemExpr()); 2217 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr); 2218 } 2219 2220 // Inline assembly may use variable names with namespace alias qualifiers. 2221 bool X86AsmParser::ParseIntelInlineAsmIdentifier( 2222 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info, 2223 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) { 2224 MCAsmParser &Parser = getParser(); 2225 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly."); 2226 Val = nullptr; 2227 2228 StringRef LineBuf(Identifier.data()); 2229 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 2230 2231 const AsmToken &Tok = Parser.getTok(); 2232 SMLoc Loc = Tok.getLoc(); 2233 2234 // Advance the token stream until the end of the current token is 2235 // after the end of what the frontend claimed. 2236 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 2237 do { 2238 End = Tok.getEndLoc(); 2239 getLexer().Lex(); 2240 } while (End.getPointer() < EndPtr); 2241 Identifier = LineBuf; 2242 2243 // The frontend should end parsing on an assembler token boundary, unless it 2244 // failed parsing. 2245 assert((End.getPointer() == EndPtr || 2246 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) && 2247 "frontend claimed part of a token?"); 2248 2249 // If the identifier lookup was unsuccessful, assume that we are dealing with 2250 // a label. 2251 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) { 2252 StringRef InternalName = 2253 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), 2254 Loc, false); 2255 assert(InternalName.size() && "We should have an internal name here."); 2256 // Push a rewrite for replacing the identifier name with the internal name, 2257 // unless we are parsing the operand of an offset operator 2258 if (!IsParsingOffsetOperator) 2259 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), 2260 InternalName); 2261 else 2262 Identifier = InternalName; 2263 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 2264 return false; 2265 // Create the symbol reference. 2266 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); 2267 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 2268 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); 2269 return false; 2270 } 2271 2272 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand 2273 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) { 2274 MCAsmParser &Parser = getParser(); 2275 const AsmToken &Tok = Parser.getTok(); 2276 // Eat "{" and mark the current place. 2277 const SMLoc consumedToken = consumeToken(); 2278 if (Tok.isNot(AsmToken::Identifier)) 2279 return Error(Tok.getLoc(), "Expected an identifier after {"); 2280 if (Tok.getIdentifier().starts_with("r")) { 2281 int rndMode = StringSwitch<int>(Tok.getIdentifier()) 2282 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) 2283 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF) 2284 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF) 2285 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO) 2286 .Default(-1); 2287 if (-1 == rndMode) 2288 return Error(Tok.getLoc(), "Invalid rounding mode."); 2289 Parser.Lex(); // Eat "r*" of r*-sae 2290 if (!getLexer().is(AsmToken::Minus)) 2291 return Error(Tok.getLoc(), "Expected - at this point"); 2292 Parser.Lex(); // Eat "-" 2293 Parser.Lex(); // Eat the sae 2294 if (!getLexer().is(AsmToken::RCurly)) 2295 return Error(Tok.getLoc(), "Expected } at this point"); 2296 SMLoc End = Tok.getEndLoc(); 2297 Parser.Lex(); // Eat "}" 2298 const MCExpr *RndModeOp = 2299 MCConstantExpr::create(rndMode, Parser.getContext()); 2300 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End)); 2301 return false; 2302 } 2303 if (Tok.getIdentifier() == "sae") { 2304 Parser.Lex(); // Eat the sae 2305 if (!getLexer().is(AsmToken::RCurly)) 2306 return Error(Tok.getLoc(), "Expected } at this point"); 2307 Parser.Lex(); // Eat "}" 2308 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken)); 2309 return false; 2310 } 2311 return Error(Tok.getLoc(), "unknown token in expression"); 2312 } 2313 2314 /// Parse condtional flags for CCMP/CTEST, e.g {dfv=of,sf,zf,cf} right after 2315 /// mnemonic. 2316 bool X86AsmParser::parseCFlagsOp(OperandVector &Operands) { 2317 MCAsmParser &Parser = getParser(); 2318 AsmToken Tok = Parser.getTok(); 2319 const SMLoc Start = Tok.getLoc(); 2320 if (!Tok.is(AsmToken::LCurly)) 2321 return Error(Tok.getLoc(), "Expected { at this point"); 2322 Parser.Lex(); // Eat "{" 2323 Tok = Parser.getTok(); 2324 if (Tok.getIdentifier().lower() != "dfv") 2325 return Error(Tok.getLoc(), "Expected dfv at this point"); 2326 Parser.Lex(); // Eat "dfv" 2327 Tok = Parser.getTok(); 2328 if (!Tok.is(AsmToken::Equal)) 2329 return Error(Tok.getLoc(), "Expected = at this point"); 2330 Parser.Lex(); // Eat "=" 2331 2332 Tok = Parser.getTok(); 2333 SMLoc End; 2334 if (Tok.is(AsmToken::RCurly)) { 2335 End = Tok.getEndLoc(); 2336 Operands.push_back(X86Operand::CreateImm( 2337 MCConstantExpr::create(0, Parser.getContext()), Start, End)); 2338 Parser.Lex(); // Eat "}" 2339 return false; 2340 } 2341 unsigned CFlags = 0; 2342 for (unsigned I = 0; I < 4; ++I) { 2343 Tok = Parser.getTok(); 2344 unsigned CFlag = StringSwitch<unsigned>(Tok.getIdentifier().lower()) 2345 .Case("of", 0x8) 2346 .Case("sf", 0x4) 2347 .Case("zf", 0x2) 2348 .Case("cf", 0x1) 2349 .Default(~0U); 2350 if (CFlag == ~0U) 2351 return Error(Tok.getLoc(), "Invalid conditional flags"); 2352 2353 if (CFlags & CFlag) 2354 return Error(Tok.getLoc(), "Duplicated conditional flag"); 2355 CFlags |= CFlag; 2356 2357 Parser.Lex(); // Eat one conditional flag 2358 Tok = Parser.getTok(); 2359 if (Tok.is(AsmToken::RCurly)) { 2360 End = Tok.getEndLoc(); 2361 Operands.push_back(X86Operand::CreateImm( 2362 MCConstantExpr::create(CFlags, Parser.getContext()), Start, End)); 2363 Parser.Lex(); // Eat "}" 2364 return false; 2365 } else if (I == 3) { 2366 return Error(Tok.getLoc(), "Expected } at this point"); 2367 } else if (Tok.isNot(AsmToken::Comma)) { 2368 return Error(Tok.getLoc(), "Expected } or , at this point"); 2369 } 2370 Parser.Lex(); // Eat "," 2371 } 2372 llvm_unreachable("Unexpected control flow"); 2373 } 2374 2375 /// Parse the '.' operator. 2376 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, 2377 SMLoc &End) { 2378 const AsmToken &Tok = getTok(); 2379 AsmFieldInfo Info; 2380 2381 // Drop the optional '.'. 2382 StringRef DotDispStr = Tok.getString(); 2383 DotDispStr.consume_front("."); 2384 StringRef TrailingDot; 2385 2386 // .Imm gets lexed as a real. 2387 if (Tok.is(AsmToken::Real)) { 2388 APInt DotDisp; 2389 if (DotDispStr.getAsInteger(10, DotDisp)) 2390 return Error(Tok.getLoc(), "Unexpected offset"); 2391 Info.Offset = DotDisp.getZExtValue(); 2392 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) && 2393 Tok.is(AsmToken::Identifier)) { 2394 if (DotDispStr.ends_with(".")) { 2395 TrailingDot = DotDispStr.substr(DotDispStr.size() - 1); 2396 DotDispStr = DotDispStr.drop_back(1); 2397 } 2398 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 2399 const StringRef Base = BaseMember.first, Member = BaseMember.second; 2400 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) && 2401 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) && 2402 getParser().lookUpField(DotDispStr, Info) && 2403 (!SemaCallback || 2404 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset))) 2405 return Error(Tok.getLoc(), "Unable to lookup field reference!"); 2406 } else { 2407 return Error(Tok.getLoc(), "Unexpected token type!"); 2408 } 2409 2410 // Eat the DotExpression and update End 2411 End = SMLoc::getFromPointer(DotDispStr.data()); 2412 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size(); 2413 while (Tok.getLoc().getPointer() < DotExprEndLoc) 2414 Lex(); 2415 if (!TrailingDot.empty()) 2416 getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot)); 2417 SM.addImm(Info.Offset); 2418 SM.setTypeInfo(Info.Type); 2419 return false; 2420 } 2421 2422 /// Parse the 'offset' operator. 2423 /// This operator is used to specify the location of a given operand 2424 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, 2425 InlineAsmIdentifierInfo &Info, 2426 SMLoc &End) { 2427 // Eat offset, mark start of identifier. 2428 SMLoc Start = Lex().getLoc(); 2429 ID = getTok().getString(); 2430 if (!isParsingMSInlineAsm()) { 2431 if ((getTok().isNot(AsmToken::Identifier) && 2432 getTok().isNot(AsmToken::String)) || 2433 getParser().parsePrimaryExpr(Val, End, nullptr)) 2434 return Error(Start, "unexpected token!"); 2435 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) { 2436 return Error(Start, "unable to lookup expression"); 2437 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) { 2438 return Error(Start, "offset operator cannot yet handle constants"); 2439 } 2440 return false; 2441 } 2442 2443 // Query a candidate string for being an Intel assembly operator 2444 // Report back its kind, or IOK_INVALID if does not evaluated as a known one 2445 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) { 2446 return StringSwitch<unsigned>(Name) 2447 .Cases("TYPE","type",IOK_TYPE) 2448 .Cases("SIZE","size",IOK_SIZE) 2449 .Cases("LENGTH","length",IOK_LENGTH) 2450 .Default(IOK_INVALID); 2451 } 2452 2453 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 2454 /// returns the number of elements in an array. It returns the value 1 for 2455 /// non-array variables. The SIZE operator returns the size of a C or C++ 2456 /// variable. A variable's size is the product of its LENGTH and TYPE. The 2457 /// TYPE operator returns the size of a C or C++ type or variable. If the 2458 /// variable is an array, TYPE returns the size of a single element. 2459 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { 2460 MCAsmParser &Parser = getParser(); 2461 const AsmToken &Tok = Parser.getTok(); 2462 Parser.Lex(); // Eat operator. 2463 2464 const MCExpr *Val = nullptr; 2465 InlineAsmIdentifierInfo Info; 2466 SMLoc Start = Tok.getLoc(), End; 2467 StringRef Identifier = Tok.getString(); 2468 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, 2469 /*IsUnevaluatedOperand=*/true, End)) 2470 return 0; 2471 2472 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 2473 Error(Start, "unable to lookup expression"); 2474 return 0; 2475 } 2476 2477 unsigned CVal = 0; 2478 switch(OpKind) { 2479 default: llvm_unreachable("Unexpected operand kind!"); 2480 case IOK_LENGTH: CVal = Info.Var.Length; break; 2481 case IOK_SIZE: CVal = Info.Var.Size; break; 2482 case IOK_TYPE: CVal = Info.Var.Type; break; 2483 } 2484 2485 return CVal; 2486 } 2487 2488 // Query a candidate string for being an Intel assembly operator 2489 // Report back its kind, or IOK_INVALID if does not evaluated as a known one 2490 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) { 2491 return StringSwitch<unsigned>(Name.lower()) 2492 .Case("type", MOK_TYPE) 2493 .Cases("size", "sizeof", MOK_SIZEOF) 2494 .Cases("length", "lengthof", MOK_LENGTHOF) 2495 .Default(MOK_INVALID); 2496 } 2497 2498 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator 2499 /// returns the number of elements in an array. It returns the value 1 for 2500 /// non-array variables. The SIZEOF operator returns the size of a type or 2501 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE. 2502 /// The TYPE operator returns the size of a variable. If the variable is an 2503 /// array, TYPE returns the size of a single element. 2504 bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) { 2505 MCAsmParser &Parser = getParser(); 2506 SMLoc OpLoc = Parser.getTok().getLoc(); 2507 Parser.Lex(); // Eat operator. 2508 2509 Val = 0; 2510 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) { 2511 // Check for SIZEOF(<type>) and TYPE(<type>). 2512 bool InParens = Parser.getTok().is(AsmToken::LParen); 2513 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok(); 2514 AsmTypeInfo Type; 2515 if (IDTok.is(AsmToken::Identifier) && 2516 !Parser.lookUpType(IDTok.getIdentifier(), Type)) { 2517 Val = Type.Size; 2518 2519 // Eat tokens. 2520 if (InParens) 2521 parseToken(AsmToken::LParen); 2522 parseToken(AsmToken::Identifier); 2523 if (InParens) 2524 parseToken(AsmToken::RParen); 2525 } 2526 } 2527 2528 if (!Val) { 2529 IntelExprStateMachine SM; 2530 SMLoc End, Start = Parser.getTok().getLoc(); 2531 if (ParseIntelExpression(SM, End)) 2532 return true; 2533 2534 switch (OpKind) { 2535 default: 2536 llvm_unreachable("Unexpected operand kind!"); 2537 case MOK_SIZEOF: 2538 Val = SM.getSize(); 2539 break; 2540 case MOK_LENGTHOF: 2541 Val = SM.getLength(); 2542 break; 2543 case MOK_TYPE: 2544 Val = SM.getElementSize(); 2545 break; 2546 } 2547 2548 if (!Val) 2549 return Error(OpLoc, "expression has unknown type", SMRange(Start, End)); 2550 } 2551 2552 return false; 2553 } 2554 2555 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { 2556 Size = StringSwitch<unsigned>(getTok().getString()) 2557 .Cases("BYTE", "byte", 8) 2558 .Cases("WORD", "word", 16) 2559 .Cases("DWORD", "dword", 32) 2560 .Cases("FLOAT", "float", 32) 2561 .Cases("LONG", "long", 32) 2562 .Cases("FWORD", "fword", 48) 2563 .Cases("DOUBLE", "double", 64) 2564 .Cases("QWORD", "qword", 64) 2565 .Cases("MMWORD","mmword", 64) 2566 .Cases("XWORD", "xword", 80) 2567 .Cases("TBYTE", "tbyte", 80) 2568 .Cases("XMMWORD", "xmmword", 128) 2569 .Cases("YMMWORD", "ymmword", 256) 2570 .Cases("ZMMWORD", "zmmword", 512) 2571 .Default(0); 2572 if (Size) { 2573 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word). 2574 if (!(Tok.getString() == "PTR" || Tok.getString() == "ptr")) 2575 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); 2576 Lex(); // Eat ptr. 2577 } 2578 return false; 2579 } 2580 2581 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) { 2582 MCAsmParser &Parser = getParser(); 2583 const AsmToken &Tok = Parser.getTok(); 2584 SMLoc Start, End; 2585 2586 // Parse optional Size directive. 2587 unsigned Size; 2588 if (ParseIntelMemoryOperandSize(Size)) 2589 return true; 2590 bool PtrInOperand = bool(Size); 2591 2592 Start = Tok.getLoc(); 2593 2594 // Rounding mode operand. 2595 if (getLexer().is(AsmToken::LCurly)) 2596 return ParseRoundingModeOp(Start, Operands); 2597 2598 // Register operand. 2599 MCRegister RegNo; 2600 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) { 2601 if (RegNo == X86::RIP) 2602 return Error(Start, "rip can only be used as a base register"); 2603 // A Register followed by ':' is considered a segment override 2604 if (Tok.isNot(AsmToken::Colon)) { 2605 if (PtrInOperand) 2606 return Error(Start, "expected memory operand after 'ptr', " 2607 "found register operand instead"); 2608 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End)); 2609 return false; 2610 } 2611 // An alleged segment override. check if we have a valid segment register 2612 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo)) 2613 return Error(Start, "invalid segment register"); 2614 // Eat ':' and update Start location 2615 Start = Lex().getLoc(); 2616 } 2617 2618 // Immediates and Memory 2619 IntelExprStateMachine SM; 2620 if (ParseIntelExpression(SM, End)) 2621 return true; 2622 2623 if (isParsingMSInlineAsm()) 2624 RewriteIntelExpression(SM, Start, Tok.getLoc()); 2625 2626 int64_t Imm = SM.getImm(); 2627 const MCExpr *Disp = SM.getSym(); 2628 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext()); 2629 if (Disp && Imm) 2630 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext()); 2631 if (!Disp) 2632 Disp = ImmDisp; 2633 2634 // RegNo != 0 specifies a valid segment register, 2635 // and we are parsing a segment override 2636 if (!SM.isMemExpr() && !RegNo) { 2637 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) { 2638 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 2639 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 2640 // Disp includes the address of a variable; make sure this is recorded 2641 // for later handling. 2642 Operands.push_back(X86Operand::CreateImm(Disp, Start, End, 2643 SM.getSymName(), Info.Var.Decl, 2644 Info.Var.IsGlobalLV)); 2645 return false; 2646 } 2647 } 2648 2649 Operands.push_back(X86Operand::CreateImm(Disp, Start, End)); 2650 return false; 2651 } 2652 2653 StringRef ErrMsg; 2654 unsigned BaseReg = SM.getBaseReg(); 2655 unsigned IndexReg = SM.getIndexReg(); 2656 if (IndexReg && BaseReg == X86::RIP) 2657 BaseReg = 0; 2658 unsigned Scale = SM.getScale(); 2659 if (!PtrInOperand) 2660 Size = SM.getElementSize() << 3; 2661 2662 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP && 2663 (IndexReg == X86::ESP || IndexReg == X86::RSP)) 2664 std::swap(BaseReg, IndexReg); 2665 2666 // If BaseReg is a vector register and IndexReg is not, swap them unless 2667 // Scale was specified in which case it would be an error. 2668 if (Scale == 0 && 2669 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 2670 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 2671 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) && 2672 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) || 2673 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) || 2674 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg))) 2675 std::swap(BaseReg, IndexReg); 2676 2677 if (Scale != 0 && 2678 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) 2679 return Error(Start, "16-bit addresses cannot have a scale"); 2680 2681 // If there was no explicit scale specified, change it to 1. 2682 if (Scale == 0) 2683 Scale = 1; 2684 2685 // If this is a 16-bit addressing mode with the base and index in the wrong 2686 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is 2687 // shared with att syntax where order matters. 2688 if ((BaseReg == X86::SI || BaseReg == X86::DI) && 2689 (IndexReg == X86::BX || IndexReg == X86::BP)) 2690 std::swap(BaseReg, IndexReg); 2691 2692 if ((BaseReg || IndexReg) && 2693 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 2694 ErrMsg)) 2695 return Error(Start, ErrMsg); 2696 bool IsUnconditionalBranch = 2697 Name.equals_insensitive("jmp") || Name.equals_insensitive("call"); 2698 if (isParsingMSInlineAsm()) 2699 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, 2700 IsUnconditionalBranch && is64BitMode(), 2701 Start, End, Size, SM.getSymName(), 2702 SM.getIdentifierInfo(), Operands); 2703 2704 // When parsing x64 MS-style assembly, all non-absolute references to a named 2705 // variable default to RIP-relative. 2706 unsigned DefaultBaseReg = X86::NoRegister; 2707 bool MaybeDirectBranchDest = true; 2708 2709 if (Parser.isParsingMasm()) { 2710 if (is64BitMode() && 2711 ((PtrInOperand && !IndexReg) || SM.getElementSize() > 0)) { 2712 DefaultBaseReg = X86::RIP; 2713 } 2714 if (IsUnconditionalBranch) { 2715 if (PtrInOperand) { 2716 MaybeDirectBranchDest = false; 2717 if (is64BitMode()) 2718 DefaultBaseReg = X86::RIP; 2719 } else if (!BaseReg && !IndexReg && Disp && 2720 Disp->getKind() == MCExpr::SymbolRef) { 2721 if (is64BitMode()) { 2722 if (SM.getSize() == 8) { 2723 MaybeDirectBranchDest = false; 2724 DefaultBaseReg = X86::RIP; 2725 } 2726 } else { 2727 if (SM.getSize() == 4 || SM.getSize() == 2) 2728 MaybeDirectBranchDest = false; 2729 } 2730 } 2731 } 2732 } else if (IsUnconditionalBranch) { 2733 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error. 2734 if (!PtrInOperand && SM.isOffsetOperator()) 2735 return Error( 2736 Start, "`OFFSET` operator cannot be used in an unconditional branch"); 2737 if (PtrInOperand || SM.isBracketUsed()) 2738 MaybeDirectBranchDest = false; 2739 } 2740 2741 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister)) 2742 Operands.push_back(X86Operand::CreateMem( 2743 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End, 2744 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr, 2745 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest)); 2746 else 2747 Operands.push_back(X86Operand::CreateMem( 2748 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(), 2749 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false, 2750 MaybeDirectBranchDest)); 2751 return false; 2752 } 2753 2754 bool X86AsmParser::parseATTOperand(OperandVector &Operands) { 2755 MCAsmParser &Parser = getParser(); 2756 switch (getLexer().getKind()) { 2757 case AsmToken::Dollar: { 2758 // $42 or $ID -> immediate. 2759 SMLoc Start = Parser.getTok().getLoc(), End; 2760 Parser.Lex(); 2761 const MCExpr *Val; 2762 // This is an immediate, so we should not parse a register. Do a precheck 2763 // for '%' to supercede intra-register parse errors. 2764 SMLoc L = Parser.getTok().getLoc(); 2765 if (check(getLexer().is(AsmToken::Percent), L, 2766 "expected immediate expression") || 2767 getParser().parseExpression(Val, End) || 2768 check(isa<X86MCExpr>(Val), L, "expected immediate expression")) 2769 return true; 2770 Operands.push_back(X86Operand::CreateImm(Val, Start, End)); 2771 return false; 2772 } 2773 case AsmToken::LCurly: { 2774 SMLoc Start = Parser.getTok().getLoc(); 2775 return ParseRoundingModeOp(Start, Operands); 2776 } 2777 default: { 2778 // This a memory operand or a register. We have some parsing complications 2779 // as a '(' may be part of an immediate expression or the addressing mode 2780 // block. This is complicated by the fact that an assembler-level variable 2781 // may refer either to a register or an immediate expression. 2782 2783 SMLoc Loc = Parser.getTok().getLoc(), EndLoc; 2784 const MCExpr *Expr = nullptr; 2785 unsigned Reg = 0; 2786 if (getLexer().isNot(AsmToken::LParen)) { 2787 // No '(' so this is either a displacement expression or a register. 2788 if (Parser.parseExpression(Expr, EndLoc)) 2789 return true; 2790 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) { 2791 // Segment Register. Reset Expr and copy value to register. 2792 Expr = nullptr; 2793 Reg = RE->getRegNo(); 2794 2795 // Check the register. 2796 if (Reg == X86::EIZ || Reg == X86::RIZ) 2797 return Error( 2798 Loc, "%eiz and %riz can only be used as index registers", 2799 SMRange(Loc, EndLoc)); 2800 if (Reg == X86::RIP) 2801 return Error(Loc, "%rip can only be used as a base register", 2802 SMRange(Loc, EndLoc)); 2803 // Return register that are not segment prefixes immediately. 2804 if (!Parser.parseOptionalToken(AsmToken::Colon)) { 2805 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc)); 2806 return false; 2807 } 2808 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg)) 2809 return Error(Loc, "invalid segment register"); 2810 // Accept a '*' absolute memory reference after the segment. Place it 2811 // before the full memory operand. 2812 if (getLexer().is(AsmToken::Star)) 2813 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 2814 } 2815 } 2816 // This is a Memory operand. 2817 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands); 2818 } 2819 } 2820 } 2821 2822 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic, 2823 // otherwise the EFLAGS Condition Code enumerator. 2824 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) { 2825 return StringSwitch<X86::CondCode>(CC) 2826 .Case("o", X86::COND_O) // Overflow 2827 .Case("no", X86::COND_NO) // No Overflow 2828 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal 2829 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below 2830 .Cases("e", "z", X86::COND_E) // Equal/Zero 2831 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero 2832 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above 2833 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal 2834 .Case("s", X86::COND_S) // Sign 2835 .Case("ns", X86::COND_NS) // No Sign 2836 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even 2837 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd 2838 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal 2839 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less 2840 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater 2841 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal 2842 .Default(X86::COND_INVALID); 2843 } 2844 2845 // true on failure, false otherwise 2846 // If no {z} mark was found - Parser doesn't advance 2847 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z, 2848 const SMLoc &StartLoc) { 2849 MCAsmParser &Parser = getParser(); 2850 // Assuming we are just pass the '{' mark, quering the next token 2851 // Searched for {z}, but none was found. Return false, as no parsing error was 2852 // encountered 2853 if (!(getLexer().is(AsmToken::Identifier) && 2854 (getLexer().getTok().getIdentifier() == "z"))) 2855 return false; 2856 Parser.Lex(); // Eat z 2857 // Query and eat the '}' mark 2858 if (!getLexer().is(AsmToken::RCurly)) 2859 return Error(getLexer().getLoc(), "Expected } at this point"); 2860 Parser.Lex(); // Eat '}' 2861 // Assign Z with the {z} mark operand 2862 Z = X86Operand::CreateToken("{z}", StartLoc); 2863 return false; 2864 } 2865 2866 // true on failure, false otherwise 2867 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) { 2868 MCAsmParser &Parser = getParser(); 2869 if (getLexer().is(AsmToken::LCurly)) { 2870 // Eat "{" and mark the current place. 2871 const SMLoc consumedToken = consumeToken(); 2872 // Distinguish {1to<NUM>} from {%k<NUM>}. 2873 if(getLexer().is(AsmToken::Integer)) { 2874 // Parse memory broadcasting ({1to<NUM>}). 2875 if (getLexer().getTok().getIntVal() != 1) 2876 return TokError("Expected 1to<NUM> at this point"); 2877 StringRef Prefix = getLexer().getTok().getString(); 2878 Parser.Lex(); // Eat first token of 1to8 2879 if (!getLexer().is(AsmToken::Identifier)) 2880 return TokError("Expected 1to<NUM> at this point"); 2881 // Recognize only reasonable suffixes. 2882 SmallVector<char, 5> BroadcastVector; 2883 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier()) 2884 .toStringRef(BroadcastVector); 2885 if (!BroadcastString.starts_with("1to")) 2886 return TokError("Expected 1to<NUM> at this point"); 2887 const char *BroadcastPrimitive = 2888 StringSwitch<const char *>(BroadcastString) 2889 .Case("1to2", "{1to2}") 2890 .Case("1to4", "{1to4}") 2891 .Case("1to8", "{1to8}") 2892 .Case("1to16", "{1to16}") 2893 .Case("1to32", "{1to32}") 2894 .Default(nullptr); 2895 if (!BroadcastPrimitive) 2896 return TokError("Invalid memory broadcast primitive."); 2897 Parser.Lex(); // Eat trailing token of 1toN 2898 if (!getLexer().is(AsmToken::RCurly)) 2899 return TokError("Expected } at this point"); 2900 Parser.Lex(); // Eat "}" 2901 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, 2902 consumedToken)); 2903 // No AVX512 specific primitives can pass 2904 // after memory broadcasting, so return. 2905 return false; 2906 } else { 2907 // Parse either {k}{z}, {z}{k}, {k} or {z} 2908 // last one have no meaning, but GCC accepts it 2909 // Currently, we're just pass a '{' mark 2910 std::unique_ptr<X86Operand> Z; 2911 if (ParseZ(Z, consumedToken)) 2912 return true; 2913 // Reaching here means that parsing of the allegadly '{z}' mark yielded 2914 // no errors. 2915 // Query for the need of further parsing for a {%k<NUM>} mark 2916 if (!Z || getLexer().is(AsmToken::LCurly)) { 2917 SMLoc StartLoc = Z ? consumeToken() : consumedToken; 2918 // Parse an op-mask register mark ({%k<NUM>}), which is now to be 2919 // expected 2920 MCRegister RegNo; 2921 SMLoc RegLoc; 2922 if (!parseRegister(RegNo, RegLoc, StartLoc) && 2923 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) { 2924 if (RegNo == X86::K0) 2925 return Error(RegLoc, "Register k0 can't be used as write mask"); 2926 if (!getLexer().is(AsmToken::RCurly)) 2927 return Error(getLexer().getLoc(), "Expected } at this point"); 2928 Operands.push_back(X86Operand::CreateToken("{", StartLoc)); 2929 Operands.push_back( 2930 X86Operand::CreateReg(RegNo, StartLoc, StartLoc)); 2931 Operands.push_back(X86Operand::CreateToken("}", consumeToken())); 2932 } else 2933 return Error(getLexer().getLoc(), 2934 "Expected an op-mask register at this point"); 2935 // {%k<NUM>} mark is found, inquire for {z} 2936 if (getLexer().is(AsmToken::LCurly) && !Z) { 2937 // Have we've found a parsing error, or found no (expected) {z} mark 2938 // - report an error 2939 if (ParseZ(Z, consumeToken()) || !Z) 2940 return Error(getLexer().getLoc(), 2941 "Expected a {z} mark at this point"); 2942 2943 } 2944 // '{z}' on its own is meaningless, hence should be ignored. 2945 // on the contrary - have it been accompanied by a K register, 2946 // allow it. 2947 if (Z) 2948 Operands.push_back(std::move(Z)); 2949 } 2950 } 2951 } 2952 return false; 2953 } 2954 2955 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix 2956 /// has already been parsed if present. disp may be provided as well. 2957 bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp, 2958 SMLoc StartLoc, SMLoc EndLoc, 2959 OperandVector &Operands) { 2960 MCAsmParser &Parser = getParser(); 2961 SMLoc Loc; 2962 // Based on the initial passed values, we may be in any of these cases, we are 2963 // in one of these cases (with current position (*)): 2964 2965 // 1. seg : * disp (base-index-scale-expr) 2966 // 2. seg : *(disp) (base-index-scale-expr) 2967 // 3. seg : *(base-index-scale-expr) 2968 // 4. disp *(base-index-scale-expr) 2969 // 5. *(disp) (base-index-scale-expr) 2970 // 6. *(base-index-scale-expr) 2971 // 7. disp * 2972 // 8. *(disp) 2973 2974 // If we do not have an displacement yet, check if we're in cases 4 or 6 by 2975 // checking if the first object after the parenthesis is a register (or an 2976 // identifier referring to a register) and parse the displacement or default 2977 // to 0 as appropriate. 2978 auto isAtMemOperand = [this]() { 2979 if (this->getLexer().isNot(AsmToken::LParen)) 2980 return false; 2981 AsmToken Buf[2]; 2982 StringRef Id; 2983 auto TokCount = this->getLexer().peekTokens(Buf, true); 2984 if (TokCount == 0) 2985 return false; 2986 switch (Buf[0].getKind()) { 2987 case AsmToken::Percent: 2988 case AsmToken::Comma: 2989 return true; 2990 // These lower cases are doing a peekIdentifier. 2991 case AsmToken::At: 2992 case AsmToken::Dollar: 2993 if ((TokCount > 1) && 2994 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) && 2995 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer())) 2996 Id = StringRef(Buf[0].getLoc().getPointer(), 2997 Buf[1].getIdentifier().size() + 1); 2998 break; 2999 case AsmToken::Identifier: 3000 case AsmToken::String: 3001 Id = Buf[0].getIdentifier(); 3002 break; 3003 default: 3004 return false; 3005 } 3006 // We have an ID. Check if it is bound to a register. 3007 if (!Id.empty()) { 3008 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id); 3009 if (Sym->isVariable()) { 3010 auto V = Sym->getVariableValue(/*SetUsed*/ false); 3011 return isa<X86MCExpr>(V); 3012 } 3013 } 3014 return false; 3015 }; 3016 3017 if (!Disp) { 3018 // Parse immediate if we're not at a mem operand yet. 3019 if (!isAtMemOperand()) { 3020 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc)) 3021 return true; 3022 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here."); 3023 } else { 3024 // Disp is implicitly zero if we haven't parsed it yet. 3025 Disp = MCConstantExpr::create(0, Parser.getContext()); 3026 } 3027 } 3028 3029 // We are now either at the end of the operand or at the '(' at the start of a 3030 // base-index-scale-expr. 3031 3032 if (!parseOptionalToken(AsmToken::LParen)) { 3033 if (SegReg == 0) 3034 Operands.push_back( 3035 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc)); 3036 else 3037 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 3038 0, 0, 1, StartLoc, EndLoc)); 3039 return false; 3040 } 3041 3042 // If we reached here, then eat the '(' and Process 3043 // the rest of the memory operand. 3044 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 3045 SMLoc BaseLoc = getLexer().getLoc(); 3046 const MCExpr *E; 3047 StringRef ErrMsg; 3048 3049 // Parse BaseReg if one is provided. 3050 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) { 3051 if (Parser.parseExpression(E, EndLoc) || 3052 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here")) 3053 return true; 3054 3055 // Check the register. 3056 BaseReg = cast<X86MCExpr>(E)->getRegNo(); 3057 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) 3058 return Error(BaseLoc, "eiz and riz can only be used as index registers", 3059 SMRange(BaseLoc, EndLoc)); 3060 } 3061 3062 if (parseOptionalToken(AsmToken::Comma)) { 3063 // Following the comma we should have either an index register, or a scale 3064 // value. We don't support the later form, but we want to parse it 3065 // correctly. 3066 // 3067 // Even though it would be completely consistent to support syntax like 3068 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 3069 if (getLexer().isNot(AsmToken::RParen)) { 3070 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc)) 3071 return true; 3072 3073 if (!isa<X86MCExpr>(E)) { 3074 // We've parsed an unexpected Scale Value instead of an index 3075 // register. Interpret it as an absolute. 3076 int64_t ScaleVal; 3077 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr())) 3078 return Error(Loc, "expected absolute expression"); 3079 if (ScaleVal != 1) 3080 Warning(Loc, "scale factor without index register is ignored"); 3081 Scale = 1; 3082 } else { // IndexReg Found. 3083 IndexReg = cast<X86MCExpr>(E)->getRegNo(); 3084 3085 if (BaseReg == X86::RIP) 3086 return Error(Loc, 3087 "%rip as base register can not have an index register"); 3088 if (IndexReg == X86::RIP) 3089 return Error(Loc, "%rip is not allowed as an index register"); 3090 3091 if (parseOptionalToken(AsmToken::Comma)) { 3092 // Parse the scale amount: 3093 // ::= ',' [scale-expression] 3094 3095 // A scale amount without an index is ignored. 3096 if (getLexer().isNot(AsmToken::RParen)) { 3097 int64_t ScaleVal; 3098 if (Parser.parseTokenLoc(Loc) || 3099 Parser.parseAbsoluteExpression(ScaleVal)) 3100 return Error(Loc, "expected scale expression"); 3101 Scale = (unsigned)ScaleVal; 3102 // Validate the scale amount. 3103 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 3104 Scale != 1) 3105 return Error(Loc, "scale factor in 16-bit address must be 1"); 3106 if (checkScale(Scale, ErrMsg)) 3107 return Error(Loc, ErrMsg); 3108 } 3109 } 3110 } 3111 } 3112 } 3113 3114 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 3115 if (parseToken(AsmToken::RParen, "unexpected token in memory operand")) 3116 return true; 3117 3118 // This is to support otherwise illegal operand (%dx) found in various 3119 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now 3120 // be supported. Mark such DX variants separately fix only in special cases. 3121 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 && 3122 isa<MCConstantExpr>(Disp) && 3123 cast<MCConstantExpr>(Disp)->getValue() == 0) { 3124 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc)); 3125 return false; 3126 } 3127 3128 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 3129 ErrMsg)) 3130 return Error(BaseLoc, ErrMsg); 3131 3132 // If the displacement is a constant, check overflows. For 64-bit addressing, 3133 // gas requires isInt<32> and otherwise reports an error. For others, gas 3134 // reports a warning and allows a wider range. E.g. gas allows 3135 // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses 3136 // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000). 3137 if (BaseReg || IndexReg) { 3138 if (auto CE = dyn_cast<MCConstantExpr>(Disp)) { 3139 auto Imm = CE->getValue(); 3140 bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) || 3141 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg); 3142 bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg); 3143 if (Is64) { 3144 if (!isInt<32>(Imm)) 3145 return Error(BaseLoc, "displacement " + Twine(Imm) + 3146 " is not within [-2147483648, 2147483647]"); 3147 } else if (!Is16) { 3148 if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) { 3149 Warning(BaseLoc, "displacement " + Twine(Imm) + 3150 " shortened to 32-bit signed " + 3151 Twine(static_cast<int32_t>(Imm))); 3152 } 3153 } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) { 3154 Warning(BaseLoc, "displacement " + Twine(Imm) + 3155 " shortened to 16-bit signed " + 3156 Twine(static_cast<int16_t>(Imm))); 3157 } 3158 } 3159 } 3160 3161 if (SegReg || BaseReg || IndexReg) 3162 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 3163 BaseReg, IndexReg, Scale, StartLoc, 3164 EndLoc)); 3165 else 3166 Operands.push_back( 3167 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc)); 3168 return false; 3169 } 3170 3171 // Parse either a standard primary expression or a register. 3172 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 3173 MCAsmParser &Parser = getParser(); 3174 // See if this is a register first. 3175 if (getTok().is(AsmToken::Percent) || 3176 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) && 3177 MatchRegisterName(Parser.getTok().getString()))) { 3178 SMLoc StartLoc = Parser.getTok().getLoc(); 3179 MCRegister RegNo; 3180 if (parseRegister(RegNo, StartLoc, EndLoc)) 3181 return true; 3182 Res = X86MCExpr::create(RegNo, Parser.getContext()); 3183 return false; 3184 } 3185 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr); 3186 } 3187 3188 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 3189 SMLoc NameLoc, OperandVector &Operands) { 3190 MCAsmParser &Parser = getParser(); 3191 InstInfo = &Info; 3192 3193 // Reset the forced VEX encoding. 3194 ForcedOpcodePrefix = OpcodePrefix_Default; 3195 ForcedDispEncoding = DispEncoding_Default; 3196 UseApxExtendedReg = false; 3197 ForcedNoFlag = false; 3198 3199 // Parse pseudo prefixes. 3200 while (true) { 3201 if (Name == "{") { 3202 if (getLexer().isNot(AsmToken::Identifier)) 3203 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'"); 3204 std::string Prefix = Parser.getTok().getString().lower(); 3205 Parser.Lex(); // Eat identifier. 3206 if (getLexer().isNot(AsmToken::RCurly)) 3207 return Error(Parser.getTok().getLoc(), "Expected '}'"); 3208 Parser.Lex(); // Eat curly. 3209 3210 if (Prefix == "rex") 3211 ForcedOpcodePrefix = OpcodePrefix_REX; 3212 else if (Prefix == "rex2") 3213 ForcedOpcodePrefix = OpcodePrefix_REX2; 3214 else if (Prefix == "vex") 3215 ForcedOpcodePrefix = OpcodePrefix_VEX; 3216 else if (Prefix == "vex2") 3217 ForcedOpcodePrefix = OpcodePrefix_VEX2; 3218 else if (Prefix == "vex3") 3219 ForcedOpcodePrefix = OpcodePrefix_VEX3; 3220 else if (Prefix == "evex") 3221 ForcedOpcodePrefix = OpcodePrefix_EVEX; 3222 else if (Prefix == "disp8") 3223 ForcedDispEncoding = DispEncoding_Disp8; 3224 else if (Prefix == "disp32") 3225 ForcedDispEncoding = DispEncoding_Disp32; 3226 else if (Prefix == "nf") 3227 ForcedNoFlag = true; 3228 else 3229 return Error(NameLoc, "unknown prefix"); 3230 3231 NameLoc = Parser.getTok().getLoc(); 3232 if (getLexer().is(AsmToken::LCurly)) { 3233 Parser.Lex(); 3234 Name = "{"; 3235 } else { 3236 if (getLexer().isNot(AsmToken::Identifier)) 3237 return Error(Parser.getTok().getLoc(), "Expected identifier"); 3238 // FIXME: The mnemonic won't match correctly if its not in lower case. 3239 Name = Parser.getTok().getString(); 3240 Parser.Lex(); 3241 } 3242 continue; 3243 } 3244 // Parse MASM style pseudo prefixes. 3245 if (isParsingMSInlineAsm()) { 3246 if (Name.equals_insensitive("vex")) 3247 ForcedOpcodePrefix = OpcodePrefix_VEX; 3248 else if (Name.equals_insensitive("vex2")) 3249 ForcedOpcodePrefix = OpcodePrefix_VEX2; 3250 else if (Name.equals_insensitive("vex3")) 3251 ForcedOpcodePrefix = OpcodePrefix_VEX3; 3252 else if (Name.equals_insensitive("evex")) 3253 ForcedOpcodePrefix = OpcodePrefix_EVEX; 3254 3255 if (ForcedOpcodePrefix != OpcodePrefix_Default) { 3256 if (getLexer().isNot(AsmToken::Identifier)) 3257 return Error(Parser.getTok().getLoc(), "Expected identifier"); 3258 // FIXME: The mnemonic won't match correctly if its not in lower case. 3259 Name = Parser.getTok().getString(); 3260 NameLoc = Parser.getTok().getLoc(); 3261 Parser.Lex(); 3262 } 3263 } 3264 break; 3265 } 3266 3267 // Support the suffix syntax for overriding displacement size as well. 3268 if (Name.consume_back(".d32")) { 3269 ForcedDispEncoding = DispEncoding_Disp32; 3270 } else if (Name.consume_back(".d8")) { 3271 ForcedDispEncoding = DispEncoding_Disp8; 3272 } 3273 3274 StringRef PatchedName = Name; 3275 3276 // Hack to skip "short" following Jcc. 3277 if (isParsingIntelSyntax() && 3278 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" || 3279 PatchedName == "jcxz" || PatchedName == "jecxz" || 3280 (PatchedName.starts_with("j") && 3281 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) { 3282 StringRef NextTok = Parser.getTok().getString(); 3283 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short") 3284 : NextTok == "short") { 3285 SMLoc NameEndLoc = 3286 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size()); 3287 // Eat the short keyword. 3288 Parser.Lex(); 3289 // MS and GAS ignore the short keyword; they both determine the jmp type 3290 // based on the distance of the label. (NASM does emit different code with 3291 // and without "short," though.) 3292 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc, 3293 NextTok.size() + 1); 3294 } 3295 } 3296 3297 // FIXME: Hack to recognize setneb as setne. 3298 if (PatchedName.starts_with("set") && PatchedName.ends_with("b") && 3299 PatchedName != "setzub" && PatchedName != "setzunb" && 3300 PatchedName != "setb" && PatchedName != "setnb") 3301 PatchedName = PatchedName.substr(0, Name.size()-1); 3302 3303 unsigned ComparisonPredicate = ~0U; 3304 3305 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}. 3306 if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) && 3307 (PatchedName.ends_with("ss") || PatchedName.ends_with("sd") || 3308 PatchedName.ends_with("sh") || PatchedName.ends_with("ph") || 3309 PatchedName.ends_with("ps") || PatchedName.ends_with("pd"))) { 3310 bool IsVCMP = PatchedName[0] == 'v'; 3311 unsigned CCIdx = IsVCMP ? 4 : 3; 3312 unsigned CC = StringSwitch<unsigned>( 3313 PatchedName.slice(CCIdx, PatchedName.size() - 2)) 3314 .Case("eq", 0x00) 3315 .Case("eq_oq", 0x00) 3316 .Case("lt", 0x01) 3317 .Case("lt_os", 0x01) 3318 .Case("le", 0x02) 3319 .Case("le_os", 0x02) 3320 .Case("unord", 0x03) 3321 .Case("unord_q", 0x03) 3322 .Case("neq", 0x04) 3323 .Case("neq_uq", 0x04) 3324 .Case("nlt", 0x05) 3325 .Case("nlt_us", 0x05) 3326 .Case("nle", 0x06) 3327 .Case("nle_us", 0x06) 3328 .Case("ord", 0x07) 3329 .Case("ord_q", 0x07) 3330 /* AVX only from here */ 3331 .Case("eq_uq", 0x08) 3332 .Case("nge", 0x09) 3333 .Case("nge_us", 0x09) 3334 .Case("ngt", 0x0A) 3335 .Case("ngt_us", 0x0A) 3336 .Case("false", 0x0B) 3337 .Case("false_oq", 0x0B) 3338 .Case("neq_oq", 0x0C) 3339 .Case("ge", 0x0D) 3340 .Case("ge_os", 0x0D) 3341 .Case("gt", 0x0E) 3342 .Case("gt_os", 0x0E) 3343 .Case("true", 0x0F) 3344 .Case("true_uq", 0x0F) 3345 .Case("eq_os", 0x10) 3346 .Case("lt_oq", 0x11) 3347 .Case("le_oq", 0x12) 3348 .Case("unord_s", 0x13) 3349 .Case("neq_us", 0x14) 3350 .Case("nlt_uq", 0x15) 3351 .Case("nle_uq", 0x16) 3352 .Case("ord_s", 0x17) 3353 .Case("eq_us", 0x18) 3354 .Case("nge_uq", 0x19) 3355 .Case("ngt_uq", 0x1A) 3356 .Case("false_os", 0x1B) 3357 .Case("neq_os", 0x1C) 3358 .Case("ge_oq", 0x1D) 3359 .Case("gt_oq", 0x1E) 3360 .Case("true_us", 0x1F) 3361 .Default(~0U); 3362 if (CC != ~0U && (IsVCMP || CC < 8) && 3363 (IsVCMP || PatchedName.back() != 'h')) { 3364 if (PatchedName.ends_with("ss")) 3365 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 3366 else if (PatchedName.ends_with("sd")) 3367 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 3368 else if (PatchedName.ends_with("ps")) 3369 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 3370 else if (PatchedName.ends_with("pd")) 3371 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 3372 else if (PatchedName.ends_with("sh")) 3373 PatchedName = "vcmpsh"; 3374 else if (PatchedName.ends_with("ph")) 3375 PatchedName = "vcmpph"; 3376 else 3377 llvm_unreachable("Unexpected suffix!"); 3378 3379 ComparisonPredicate = CC; 3380 } 3381 } 3382 3383 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}. 3384 if (PatchedName.starts_with("vpcmp") && 3385 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 3386 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 3387 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 3388 unsigned CC = StringSwitch<unsigned>( 3389 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 3390 .Case("eq", 0x0) // Only allowed on unsigned. Checked below. 3391 .Case("lt", 0x1) 3392 .Case("le", 0x2) 3393 //.Case("false", 0x3) // Not a documented alias. 3394 .Case("neq", 0x4) 3395 .Case("nlt", 0x5) 3396 .Case("nle", 0x6) 3397 //.Case("true", 0x7) // Not a documented alias. 3398 .Default(~0U); 3399 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) { 3400 switch (PatchedName.back()) { 3401 default: llvm_unreachable("Unexpected character!"); 3402 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break; 3403 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break; 3404 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break; 3405 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break; 3406 } 3407 // Set up the immediate to push into the operands later. 3408 ComparisonPredicate = CC; 3409 } 3410 } 3411 3412 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}. 3413 if (PatchedName.starts_with("vpcom") && 3414 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 3415 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 3416 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 3417 unsigned CC = StringSwitch<unsigned>( 3418 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 3419 .Case("lt", 0x0) 3420 .Case("le", 0x1) 3421 .Case("gt", 0x2) 3422 .Case("ge", 0x3) 3423 .Case("eq", 0x4) 3424 .Case("neq", 0x5) 3425 .Case("false", 0x6) 3426 .Case("true", 0x7) 3427 .Default(~0U); 3428 if (CC != ~0U) { 3429 switch (PatchedName.back()) { 3430 default: llvm_unreachable("Unexpected character!"); 3431 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break; 3432 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break; 3433 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break; 3434 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break; 3435 } 3436 // Set up the immediate to push into the operands later. 3437 ComparisonPredicate = CC; 3438 } 3439 } 3440 3441 // Determine whether this is an instruction prefix. 3442 // FIXME: 3443 // Enhance prefixes integrity robustness. for example, following forms 3444 // are currently tolerated: 3445 // repz repnz <insn> ; GAS errors for the use of two similar prefixes 3446 // lock addq %rax, %rbx ; Destination operand must be of memory type 3447 // xacquire <insn> ; xacquire must be accompanied by 'lock' 3448 bool IsPrefix = 3449 StringSwitch<bool>(Name) 3450 .Cases("cs", "ds", "es", "fs", "gs", "ss", true) 3451 .Cases("rex64", "data32", "data16", "addr32", "addr16", true) 3452 .Cases("xacquire", "xrelease", true) 3453 .Cases("acquire", "release", isParsingIntelSyntax()) 3454 .Default(false); 3455 3456 auto isLockRepeatNtPrefix = [](StringRef N) { 3457 return StringSwitch<bool>(N) 3458 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true) 3459 .Default(false); 3460 }; 3461 3462 bool CurlyAsEndOfStatement = false; 3463 3464 unsigned Flags = X86::IP_NO_PREFIX; 3465 while (isLockRepeatNtPrefix(Name.lower())) { 3466 unsigned Prefix = 3467 StringSwitch<unsigned>(Name) 3468 .Cases("lock", "lock", X86::IP_HAS_LOCK) 3469 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT) 3470 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) 3471 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK) 3472 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) 3473 Flags |= Prefix; 3474 if (getLexer().is(AsmToken::EndOfStatement)) { 3475 // We don't have real instr with the given prefix 3476 // let's use the prefix as the instr. 3477 // TODO: there could be several prefixes one after another 3478 Flags = X86::IP_NO_PREFIX; 3479 break; 3480 } 3481 // FIXME: The mnemonic won't match correctly if its not in lower case. 3482 Name = Parser.getTok().getString(); 3483 Parser.Lex(); // eat the prefix 3484 // Hack: we could have something like "rep # some comment" or 3485 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" 3486 while (Name.starts_with(";") || Name.starts_with("\n") || 3487 Name.starts_with("#") || Name.starts_with("\t") || 3488 Name.starts_with("/")) { 3489 // FIXME: The mnemonic won't match correctly if its not in lower case. 3490 Name = Parser.getTok().getString(); 3491 Parser.Lex(); // go to next prefix or instr 3492 } 3493 } 3494 3495 if (Flags) 3496 PatchedName = Name; 3497 3498 // Hacks to handle 'data16' and 'data32' 3499 if (PatchedName == "data16" && is16BitMode()) { 3500 return Error(NameLoc, "redundant data16 prefix"); 3501 } 3502 if (PatchedName == "data32") { 3503 if (is32BitMode()) 3504 return Error(NameLoc, "redundant data32 prefix"); 3505 if (is64BitMode()) 3506 return Error(NameLoc, "'data32' is not supported in 64-bit mode"); 3507 // Hack to 'data16' for the table lookup. 3508 PatchedName = "data16"; 3509 3510 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3511 StringRef Next = Parser.getTok().getString(); 3512 getLexer().Lex(); 3513 // data32 effectively changes the instruction suffix. 3514 // TODO Generalize. 3515 if (Next == "callw") 3516 Next = "calll"; 3517 if (Next == "ljmpw") 3518 Next = "ljmpl"; 3519 3520 Name = Next; 3521 PatchedName = Name; 3522 ForcedDataPrefix = X86::Is32Bit; 3523 IsPrefix = false; 3524 } 3525 } 3526 3527 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 3528 3529 // Push the immediate if we extracted one from the mnemonic. 3530 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) { 3531 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 3532 getParser().getContext()); 3533 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 3534 } 3535 3536 // Parse condtional flags after mnemonic. 3537 if ((Name.starts_with("ccmp") || Name.starts_with("ctest")) && 3538 parseCFlagsOp(Operands)) 3539 return true; 3540 3541 // This does the actual operand parsing. Don't parse any more if we have a 3542 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 3543 // just want to parse the "lock" as the first instruction and the "incl" as 3544 // the next one. 3545 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) { 3546 // Parse '*' modifier. 3547 if (getLexer().is(AsmToken::Star)) 3548 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 3549 3550 // Read the operands. 3551 while (true) { 3552 if (parseOperand(Operands, Name)) 3553 return true; 3554 if (HandleAVX512Operand(Operands)) 3555 return true; 3556 3557 // check for comma and eat it 3558 if (getLexer().is(AsmToken::Comma)) 3559 Parser.Lex(); 3560 else 3561 break; 3562 } 3563 3564 // In MS inline asm curly braces mark the beginning/end of a block, 3565 // therefore they should be interepreted as end of statement 3566 CurlyAsEndOfStatement = 3567 isParsingIntelSyntax() && isParsingMSInlineAsm() && 3568 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); 3569 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement) 3570 return TokError("unexpected token in argument list"); 3571 } 3572 3573 // Push the immediate if we extracted one from the mnemonic. 3574 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) { 3575 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 3576 getParser().getContext()); 3577 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 3578 } 3579 3580 // Consume the EndOfStatement or the prefix separator Slash 3581 if (getLexer().is(AsmToken::EndOfStatement) || 3582 (IsPrefix && getLexer().is(AsmToken::Slash))) 3583 Parser.Lex(); 3584 else if (CurlyAsEndOfStatement) 3585 // Add an actual EndOfStatement before the curly brace 3586 Info.AsmRewrites->emplace_back(AOK_EndOfStatement, 3587 getLexer().getTok().getLoc(), 0); 3588 3589 // This is for gas compatibility and cannot be done in td. 3590 // Adding "p" for some floating point with no argument. 3591 // For example: fsub --> fsubp 3592 bool IsFp = 3593 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr"; 3594 if (IsFp && Operands.size() == 1) { 3595 const char *Repl = StringSwitch<const char *>(Name) 3596 .Case("fsub", "fsubp") 3597 .Case("fdiv", "fdivp") 3598 .Case("fsubr", "fsubrp") 3599 .Case("fdivr", "fdivrp"); 3600 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl); 3601 } 3602 3603 if ((Name == "mov" || Name == "movw" || Name == "movl") && 3604 (Operands.size() == 3)) { 3605 X86Operand &Op1 = (X86Operand &)*Operands[1]; 3606 X86Operand &Op2 = (X86Operand &)*Operands[2]; 3607 SMLoc Loc = Op1.getEndLoc(); 3608 // Moving a 32 or 16 bit value into a segment register has the same 3609 // behavior. Modify such instructions to always take shorter form. 3610 if (Op1.isReg() && Op2.isReg() && 3611 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains( 3612 Op2.getReg()) && 3613 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) || 3614 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) { 3615 // Change instruction name to match new instruction. 3616 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) { 3617 Name = is16BitMode() ? "movw" : "movl"; 3618 Operands[0] = X86Operand::CreateToken(Name, NameLoc); 3619 } 3620 // Select the correct equivalent 16-/32-bit source register. 3621 MCRegister Reg = 3622 getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32); 3623 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc); 3624 } 3625 } 3626 3627 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" -> 3628 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 3629 // documented form in various unofficial manuals, so a lot of code uses it. 3630 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" || 3631 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") && 3632 Operands.size() == 3) { 3633 X86Operand &Op = (X86Operand &)*Operands.back(); 3634 if (Op.isDXReg()) 3635 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 3636 Op.getEndLoc()); 3637 } 3638 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al". 3639 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" || 3640 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") && 3641 Operands.size() == 3) { 3642 X86Operand &Op = (X86Operand &)*Operands[1]; 3643 if (Op.isDXReg()) 3644 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 3645 Op.getEndLoc()); 3646 } 3647 3648 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands; 3649 bool HadVerifyError = false; 3650 3651 // Append default arguments to "ins[bwld]" 3652 if (Name.starts_with("ins") && 3653 (Operands.size() == 1 || Operands.size() == 3) && 3654 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" || 3655 Name == "ins")) { 3656 3657 AddDefaultSrcDestOperands(TmpOperands, 3658 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), 3659 DefaultMemDIOperand(NameLoc)); 3660 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3661 } 3662 3663 // Append default arguments to "outs[bwld]" 3664 if (Name.starts_with("outs") && 3665 (Operands.size() == 1 || Operands.size() == 3) && 3666 (Name == "outsb" || Name == "outsw" || Name == "outsl" || 3667 Name == "outsd" || Name == "outs")) { 3668 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 3669 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 3670 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3671 } 3672 3673 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate 3674 // values of $SIREG according to the mode. It would be nice if this 3675 // could be achieved with InstAlias in the tables. 3676 if (Name.starts_with("lods") && 3677 (Operands.size() == 1 || Operands.size() == 2) && 3678 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 3679 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) { 3680 TmpOperands.push_back(DefaultMemSIOperand(NameLoc)); 3681 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3682 } 3683 3684 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate 3685 // values of $DIREG according to the mode. It would be nice if this 3686 // could be achieved with InstAlias in the tables. 3687 if (Name.starts_with("stos") && 3688 (Operands.size() == 1 || Operands.size() == 2) && 3689 (Name == "stos" || Name == "stosb" || Name == "stosw" || 3690 Name == "stosl" || Name == "stosd" || Name == "stosq")) { 3691 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 3692 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3693 } 3694 3695 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate 3696 // values of $DIREG according to the mode. It would be nice if this 3697 // could be achieved with InstAlias in the tables. 3698 if (Name.starts_with("scas") && 3699 (Operands.size() == 1 || Operands.size() == 2) && 3700 (Name == "scas" || Name == "scasb" || Name == "scasw" || 3701 Name == "scasl" || Name == "scasd" || Name == "scasq")) { 3702 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 3703 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3704 } 3705 3706 // Add default SI and DI operands to "cmps[bwlq]". 3707 if (Name.starts_with("cmps") && 3708 (Operands.size() == 1 || Operands.size() == 3) && 3709 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || 3710 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { 3711 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc), 3712 DefaultMemSIOperand(NameLoc)); 3713 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3714 } 3715 3716 // Add default SI and DI operands to "movs[bwlq]". 3717 if (((Name.starts_with("movs") && 3718 (Name == "movs" || Name == "movsb" || Name == "movsw" || 3719 Name == "movsl" || Name == "movsd" || Name == "movsq")) || 3720 (Name.starts_with("smov") && 3721 (Name == "smov" || Name == "smovb" || Name == "smovw" || 3722 Name == "smovl" || Name == "smovd" || Name == "smovq"))) && 3723 (Operands.size() == 1 || Operands.size() == 3)) { 3724 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax()) 3725 Operands.back() = X86Operand::CreateToken("movsl", NameLoc); 3726 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 3727 DefaultMemDIOperand(NameLoc)); 3728 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3729 } 3730 3731 // Check if we encountered an error for one the string insturctions 3732 if (HadVerifyError) { 3733 return HadVerifyError; 3734 } 3735 3736 // Transforms "xlat mem8" into "xlatb" 3737 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) { 3738 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 3739 if (Op1.isMem8()) { 3740 Warning(Op1.getStartLoc(), "memory operand is only for determining the " 3741 "size, (R|E)BX will be used for the location"); 3742 Operands.pop_back(); 3743 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb"); 3744 } 3745 } 3746 3747 if (Flags) 3748 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc)); 3749 return false; 3750 } 3751 3752 static bool convertSSEToAVX(MCInst &Inst) { 3753 ArrayRef<X86TableEntry> Table{X86SSE2AVXTable}; 3754 unsigned Opcode = Inst.getOpcode(); 3755 const auto I = llvm::lower_bound(Table, Opcode); 3756 if (I == Table.end() || I->OldOpc != Opcode) 3757 return false; 3758 3759 Inst.setOpcode(I->NewOpc); 3760 // AVX variant of BLENDVPD/BLENDVPS/PBLENDVB instructions has more 3761 // operand compare to SSE variant, which is added below 3762 if (X86::isBLENDVPD(Opcode) || X86::isBLENDVPS(Opcode) || 3763 X86::isPBLENDVB(Opcode)) 3764 Inst.addOperand(Inst.getOperand(2)); 3765 3766 return true; 3767 } 3768 3769 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { 3770 if (MCOptions.X86Sse2Avx && convertSSEToAVX(Inst)) 3771 return true; 3772 3773 if (ForcedOpcodePrefix != OpcodePrefix_VEX3 && 3774 X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode()))) 3775 return true; 3776 3777 if (X86::optimizeShiftRotateWithImmediateOne(Inst)) 3778 return true; 3779 3780 switch (Inst.getOpcode()) { 3781 default: return false; 3782 case X86::JMP_1: 3783 // {disp32} forces a larger displacement as if the instruction was relaxed. 3784 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}. 3785 // This matches GNU assembler. 3786 if (ForcedDispEncoding == DispEncoding_Disp32) { 3787 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4); 3788 return true; 3789 } 3790 3791 return false; 3792 case X86::JCC_1: 3793 // {disp32} forces a larger displacement as if the instruction was relaxed. 3794 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}. 3795 // This matches GNU assembler. 3796 if (ForcedDispEncoding == DispEncoding_Disp32) { 3797 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4); 3798 return true; 3799 } 3800 3801 return false; 3802 case X86::INT: { 3803 // Transforms "int $3" into "int3" as a size optimization. 3804 // We can't write this as an InstAlias. 3805 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3) 3806 return false; 3807 Inst.clear(); 3808 Inst.setOpcode(X86::INT3); 3809 return true; 3810 } 3811 } 3812 } 3813 3814 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { 3815 using namespace X86; 3816 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 3817 unsigned Opcode = Inst.getOpcode(); 3818 uint64_t TSFlags = MII.get(Opcode).TSFlags; 3819 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) || 3820 isVFMADDCSH(Opcode)) { 3821 unsigned Dest = Inst.getOperand(0).getReg(); 3822 for (unsigned i = 2; i < Inst.getNumOperands(); i++) 3823 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) 3824 return Warning(Ops[0]->getStartLoc(), "Destination register should be " 3825 "distinct from source registers"); 3826 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) || 3827 isVFMULCSH(Opcode)) { 3828 unsigned Dest = Inst.getOperand(0).getReg(); 3829 // The mask variants have different operand list. Scan from the third 3830 // operand to avoid emitting incorrect warning. 3831 // VFMULCPHZrr Dest, Src1, Src2 3832 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2 3833 // VFMULCPHZrrkz Dest, Mask, Src1, Src2 3834 for (unsigned i = ((TSFlags & X86II::EVEX_K) ? 2 : 1); 3835 i < Inst.getNumOperands(); i++) 3836 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) 3837 return Warning(Ops[0]->getStartLoc(), "Destination register should be " 3838 "distinct from source registers"); 3839 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) || 3840 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) || 3841 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) { 3842 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() - 3843 X86::AddrNumOperands - 1).getReg(); 3844 unsigned Src2Enc = MRI->getEncodingValue(Src2); 3845 if (Src2Enc % 4 != 0) { 3846 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2); 3847 unsigned GroupStart = (Src2Enc / 4) * 4; 3848 unsigned GroupEnd = GroupStart + 3; 3849 return Warning(Ops[0]->getStartLoc(), 3850 "source register '" + RegName + "' implicitly denotes '" + 3851 RegName.take_front(3) + Twine(GroupStart) + "' to '" + 3852 RegName.take_front(3) + Twine(GroupEnd) + 3853 "' source group"); 3854 } 3855 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) || 3856 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) || 3857 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) || 3858 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) { 3859 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX; 3860 if (HasEVEX) { 3861 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 3862 unsigned Index = MRI->getEncodingValue( 3863 Inst.getOperand(4 + X86::AddrIndexReg).getReg()); 3864 if (Dest == Index) 3865 return Warning(Ops[0]->getStartLoc(), "index and destination registers " 3866 "should be distinct"); 3867 } else { 3868 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 3869 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg()); 3870 unsigned Index = MRI->getEncodingValue( 3871 Inst.getOperand(3 + X86::AddrIndexReg).getReg()); 3872 if (Dest == Mask || Dest == Index || Mask == Index) 3873 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination " 3874 "registers should be distinct"); 3875 } 3876 } else if (isTCMMIMFP16PS(Opcode) || isTCMMRLFP16PS(Opcode) || 3877 isTDPBF16PS(Opcode) || isTDPFP16PS(Opcode) || isTDPBSSD(Opcode) || 3878 isTDPBSUD(Opcode) || isTDPBUSD(Opcode) || isTDPBUUD(Opcode)) { 3879 unsigned SrcDest = Inst.getOperand(0).getReg(); 3880 unsigned Src1 = Inst.getOperand(2).getReg(); 3881 unsigned Src2 = Inst.getOperand(3).getReg(); 3882 if (SrcDest == Src1 || SrcDest == Src2 || Src1 == Src2) 3883 return Error(Ops[0]->getStartLoc(), "all tmm registers must be distinct"); 3884 } 3885 3886 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to 3887 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX. 3888 if ((TSFlags & X86II::EncodingMask) == 0) { 3889 MCPhysReg HReg = X86::NoRegister; 3890 bool UsesRex = TSFlags & X86II::REX_W; 3891 unsigned NumOps = Inst.getNumOperands(); 3892 for (unsigned i = 0; i != NumOps; ++i) { 3893 const MCOperand &MO = Inst.getOperand(i); 3894 if (!MO.isReg()) 3895 continue; 3896 unsigned Reg = MO.getReg(); 3897 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH) 3898 HReg = Reg; 3899 if (X86II::isX86_64NonExtLowByteReg(Reg) || 3900 X86II::isX86_64ExtendedReg(Reg)) 3901 UsesRex = true; 3902 } 3903 3904 if (UsesRex && HReg != X86::NoRegister) { 3905 StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg); 3906 return Error(Ops[0]->getStartLoc(), 3907 "can't encode '" + RegName + "' in an instruction requiring " 3908 "REX prefix"); 3909 } 3910 } 3911 3912 if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) { 3913 const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg); 3914 if (!MO.isReg() || MO.getReg() != X86::RIP) 3915 return Warning( 3916 Ops[0]->getStartLoc(), 3917 Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'" 3918 : "'prefetchit1'")) + 3919 " only supports RIP-relative address"); 3920 } 3921 return false; 3922 } 3923 3924 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) { 3925 Warning(Loc, "Instruction may be vulnerable to LVI and " 3926 "requires manual mitigation"); 3927 Note(SMLoc(), "See https://software.intel.com/" 3928 "security-software-guidance/insights/" 3929 "deep-dive-load-value-injection#specialinstructions" 3930 " for more information"); 3931 } 3932 3933 /// RET instructions and also instructions that indirect calls/jumps from memory 3934 /// combine a load and a branch within a single instruction. To mitigate these 3935 /// instructions against LVI, they must be decomposed into separate load and 3936 /// branch instructions, with an LFENCE in between. For more details, see: 3937 /// - X86LoadValueInjectionRetHardening.cpp 3938 /// - X86LoadValueInjectionIndirectThunks.cpp 3939 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection 3940 /// 3941 /// Returns `true` if a mitigation was applied or warning was emitted. 3942 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) { 3943 // Information on control-flow instructions that require manual mitigation can 3944 // be found here: 3945 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions 3946 switch (Inst.getOpcode()) { 3947 case X86::RET16: 3948 case X86::RET32: 3949 case X86::RET64: 3950 case X86::RETI16: 3951 case X86::RETI32: 3952 case X86::RETI64: { 3953 MCInst ShlInst, FenceInst; 3954 bool Parse32 = is32BitMode() || Code16GCC; 3955 unsigned Basereg = 3956 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP); 3957 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 3958 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 3959 /*BaseReg=*/Basereg, /*IndexReg=*/0, 3960 /*Scale=*/1, SMLoc{}, SMLoc{}, 0); 3961 ShlInst.setOpcode(X86::SHL64mi); 3962 ShlMemOp->addMemOperands(ShlInst, 5); 3963 ShlInst.addOperand(MCOperand::createImm(0)); 3964 FenceInst.setOpcode(X86::LFENCE); 3965 Out.emitInstruction(ShlInst, getSTI()); 3966 Out.emitInstruction(FenceInst, getSTI()); 3967 return; 3968 } 3969 case X86::JMP16m: 3970 case X86::JMP32m: 3971 case X86::JMP64m: 3972 case X86::CALL16m: 3973 case X86::CALL32m: 3974 case X86::CALL64m: 3975 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3976 return; 3977 } 3978 } 3979 3980 /// To mitigate LVI, every instruction that performs a load can be followed by 3981 /// an LFENCE instruction to squash any potential mis-speculation. There are 3982 /// some instructions that require additional considerations, and may requre 3983 /// manual mitigation. For more details, see: 3984 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection 3985 /// 3986 /// Returns `true` if a mitigation was applied or warning was emitted. 3987 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst, 3988 MCStreamer &Out) { 3989 auto Opcode = Inst.getOpcode(); 3990 auto Flags = Inst.getFlags(); 3991 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) { 3992 // Information on REP string instructions that require manual mitigation can 3993 // be found here: 3994 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions 3995 switch (Opcode) { 3996 case X86::CMPSB: 3997 case X86::CMPSW: 3998 case X86::CMPSL: 3999 case X86::CMPSQ: 4000 case X86::SCASB: 4001 case X86::SCASW: 4002 case X86::SCASL: 4003 case X86::SCASQ: 4004 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 4005 return; 4006 } 4007 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) { 4008 // If a REP instruction is found on its own line, it may or may not be 4009 // followed by a vulnerable instruction. Emit a warning just in case. 4010 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 4011 return; 4012 } 4013 4014 const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); 4015 4016 // Can't mitigate after terminators or calls. A control flow change may have 4017 // already occurred. 4018 if (MCID.isTerminator() || MCID.isCall()) 4019 return; 4020 4021 // LFENCE has the mayLoad property, don't double fence. 4022 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) { 4023 MCInst FenceInst; 4024 FenceInst.setOpcode(X86::LFENCE); 4025 Out.emitInstruction(FenceInst, getSTI()); 4026 } 4027 } 4028 4029 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands, 4030 MCStreamer &Out) { 4031 if (LVIInlineAsmHardening && 4032 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity)) 4033 applyLVICFIMitigation(Inst, Out); 4034 4035 Out.emitInstruction(Inst, getSTI()); 4036 4037 if (LVIInlineAsmHardening && 4038 getSTI().hasFeature(X86::FeatureLVILoadHardening)) 4039 applyLVILoadHardeningMitigation(Inst, Out); 4040 } 4041 4042 static unsigned getPrefixes(OperandVector &Operands) { 4043 unsigned Result = 0; 4044 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back()); 4045 if (Prefix.isPrefix()) { 4046 Result = Prefix.getPrefix(); 4047 Operands.pop_back(); 4048 } 4049 return Result; 4050 } 4051 4052 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4053 OperandVector &Operands, 4054 MCStreamer &Out, uint64_t &ErrorInfo, 4055 bool MatchingInlineAsm) { 4056 assert(!Operands.empty() && "Unexpect empty operand list!"); 4057 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 4058 4059 // First, handle aliases that expand to multiple instructions. 4060 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, 4061 Out, MatchingInlineAsm); 4062 unsigned Prefixes = getPrefixes(Operands); 4063 4064 MCInst Inst; 4065 4066 // If REX/REX2/VEX/EVEX encoding is forced, we need to pass the USE_* flag to 4067 // the encoder and printer. 4068 if (ForcedOpcodePrefix == OpcodePrefix_REX) 4069 Prefixes |= X86::IP_USE_REX; 4070 else if (ForcedOpcodePrefix == OpcodePrefix_REX2) 4071 Prefixes |= X86::IP_USE_REX2; 4072 else if (ForcedOpcodePrefix == OpcodePrefix_VEX) 4073 Prefixes |= X86::IP_USE_VEX; 4074 else if (ForcedOpcodePrefix == OpcodePrefix_VEX2) 4075 Prefixes |= X86::IP_USE_VEX2; 4076 else if (ForcedOpcodePrefix == OpcodePrefix_VEX3) 4077 Prefixes |= X86::IP_USE_VEX3; 4078 else if (ForcedOpcodePrefix == OpcodePrefix_EVEX) 4079 Prefixes |= X86::IP_USE_EVEX; 4080 4081 // Set encoded flags for {disp8} and {disp32}. 4082 if (ForcedDispEncoding == DispEncoding_Disp8) 4083 Prefixes |= X86::IP_USE_DISP8; 4084 else if (ForcedDispEncoding == DispEncoding_Disp32) 4085 Prefixes |= X86::IP_USE_DISP32; 4086 4087 if (Prefixes) 4088 Inst.setFlags(Prefixes); 4089 4090 return isParsingIntelSyntax() 4091 ? matchAndEmitIntelInstruction(IDLoc, Opcode, Inst, Operands, Out, 4092 ErrorInfo, MatchingInlineAsm) 4093 : matchAndEmitATTInstruction(IDLoc, Opcode, Inst, Operands, Out, 4094 ErrorInfo, MatchingInlineAsm); 4095 } 4096 4097 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, 4098 OperandVector &Operands, MCStreamer &Out, 4099 bool MatchingInlineAsm) { 4100 // FIXME: This should be replaced with a real .td file alias mechanism. 4101 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 4102 // call. 4103 const char *Repl = StringSwitch<const char *>(Op.getToken()) 4104 .Case("finit", "fninit") 4105 .Case("fsave", "fnsave") 4106 .Case("fstcw", "fnstcw") 4107 .Case("fstcww", "fnstcw") 4108 .Case("fstenv", "fnstenv") 4109 .Case("fstsw", "fnstsw") 4110 .Case("fstsww", "fnstsw") 4111 .Case("fclex", "fnclex") 4112 .Default(nullptr); 4113 if (Repl) { 4114 MCInst Inst; 4115 Inst.setOpcode(X86::WAIT); 4116 Inst.setLoc(IDLoc); 4117 if (!MatchingInlineAsm) 4118 emitInstruction(Inst, Operands, Out); 4119 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 4120 } 4121 } 4122 4123 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, 4124 const FeatureBitset &MissingFeatures, 4125 bool MatchingInlineAsm) { 4126 assert(MissingFeatures.any() && "Unknown missing feature!"); 4127 SmallString<126> Msg; 4128 raw_svector_ostream OS(Msg); 4129 OS << "instruction requires:"; 4130 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) { 4131 if (MissingFeatures[i]) 4132 OS << ' ' << getSubtargetFeatureName(i); 4133 } 4134 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); 4135 } 4136 4137 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { 4138 unsigned Opc = Inst.getOpcode(); 4139 const MCInstrDesc &MCID = MII.get(Opc); 4140 uint64_t TSFlags = MCID.TSFlags; 4141 4142 if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID)) 4143 return Match_Unsupported; 4144 if (ForcedNoFlag == !(TSFlags & X86II::EVEX_NF) && !X86::isCFCMOVCC(Opc)) 4145 return Match_Unsupported; 4146 4147 switch (ForcedOpcodePrefix) { 4148 case OpcodePrefix_Default: 4149 break; 4150 case OpcodePrefix_REX: 4151 case OpcodePrefix_REX2: 4152 if (TSFlags & X86II::EncodingMask) 4153 return Match_Unsupported; 4154 break; 4155 case OpcodePrefix_VEX: 4156 case OpcodePrefix_VEX2: 4157 case OpcodePrefix_VEX3: 4158 if ((TSFlags & X86II::EncodingMask) != X86II::VEX) 4159 return Match_Unsupported; 4160 break; 4161 case OpcodePrefix_EVEX: 4162 if ((TSFlags & X86II::EncodingMask) != X86II::EVEX) 4163 return Match_Unsupported; 4164 break; 4165 } 4166 4167 if ((TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitVEXPrefix && 4168 (ForcedOpcodePrefix != OpcodePrefix_VEX && 4169 ForcedOpcodePrefix != OpcodePrefix_VEX2 && 4170 ForcedOpcodePrefix != OpcodePrefix_VEX3)) 4171 return Match_Unsupported; 4172 4173 return Match_Success; 4174 } 4175 4176 bool X86AsmParser::matchAndEmitATTInstruction( 4177 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands, 4178 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { 4179 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 4180 SMRange EmptyRange = std::nullopt; 4181 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode 4182 // when matching the instruction. 4183 if (ForcedDataPrefix == X86::Is32Bit) 4184 SwitchMode(X86::Is32Bit); 4185 // First, try a direct match. 4186 FeatureBitset MissingFeatures; 4187 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo, 4188 MissingFeatures, MatchingInlineAsm, 4189 isParsingIntelSyntax()); 4190 if (ForcedDataPrefix == X86::Is32Bit) { 4191 SwitchMode(X86::Is16Bit); 4192 ForcedDataPrefix = 0; 4193 } 4194 switch (OriginalError) { 4195 default: llvm_unreachable("Unexpected match result!"); 4196 case Match_Success: 4197 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4198 return true; 4199 // Some instructions need post-processing to, for example, tweak which 4200 // encoding is selected. Loop on it while changes happen so the 4201 // individual transformations can chain off each other. 4202 if (!MatchingInlineAsm) 4203 while (processInstruction(Inst, Operands)) 4204 ; 4205 4206 Inst.setLoc(IDLoc); 4207 if (!MatchingInlineAsm) 4208 emitInstruction(Inst, Operands, Out); 4209 Opcode = Inst.getOpcode(); 4210 return false; 4211 case Match_InvalidImmUnsignedi4: { 4212 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 4213 if (ErrorLoc == SMLoc()) 4214 ErrorLoc = IDLoc; 4215 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 4216 EmptyRange, MatchingInlineAsm); 4217 } 4218 case Match_MissingFeature: 4219 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm); 4220 case Match_InvalidOperand: 4221 case Match_MnemonicFail: 4222 case Match_Unsupported: 4223 break; 4224 } 4225 if (Op.getToken().empty()) { 4226 Error(IDLoc, "instruction must have size higher than 0", EmptyRange, 4227 MatchingInlineAsm); 4228 return true; 4229 } 4230 4231 // FIXME: Ideally, we would only attempt suffix matches for things which are 4232 // valid prefixes, and we could just infer the right unambiguous 4233 // type. However, that requires substantially more matcher support than the 4234 // following hack. 4235 4236 // Change the operand to point to a temporary token. 4237 StringRef Base = Op.getToken(); 4238 SmallString<16> Tmp; 4239 Tmp += Base; 4240 Tmp += ' '; 4241 Op.setTokenValue(Tmp); 4242 4243 // If this instruction starts with an 'f', then it is a floating point stack 4244 // instruction. These come in up to three forms for 32-bit, 64-bit, and 4245 // 80-bit floating point, which use the suffixes s,l,t respectively. 4246 // 4247 // Otherwise, we assume that this may be an integer instruction, which comes 4248 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 4249 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 4250 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 } 4251 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0"; 4252 4253 // Check for the various suffix matches. 4254 uint64_t ErrorInfoIgnore; 4255 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings. 4256 unsigned Match[4]; 4257 4258 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one. 4259 // So we should make sure the suffix matcher only works for memory variant 4260 // that has the same size with the suffix. 4261 // FIXME: This flag is a workaround for legacy instructions that didn't 4262 // declare non suffix variant assembly. 4263 bool HasVectorReg = false; 4264 X86Operand *MemOp = nullptr; 4265 for (const auto &Op : Operands) { 4266 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 4267 if (X86Op->isVectorReg()) 4268 HasVectorReg = true; 4269 else if (X86Op->isMem()) { 4270 MemOp = X86Op; 4271 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax"); 4272 // Have we found an unqualified memory operand, 4273 // break. IA allows only one memory operand. 4274 break; 4275 } 4276 } 4277 4278 for (unsigned I = 0, E = std::size(Match); I != E; ++I) { 4279 Tmp.back() = Suffixes[I]; 4280 if (MemOp && HasVectorReg) 4281 MemOp->Mem.Size = MemSize[I]; 4282 Match[I] = Match_MnemonicFail; 4283 if (MemOp || !HasVectorReg) { 4284 Match[I] = 4285 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures, 4286 MatchingInlineAsm, isParsingIntelSyntax()); 4287 // If this returned as a missing feature failure, remember that. 4288 if (Match[I] == Match_MissingFeature) 4289 ErrorInfoMissingFeatures = MissingFeatures; 4290 } 4291 } 4292 4293 // Restore the old token. 4294 Op.setTokenValue(Base); 4295 4296 // If exactly one matched, then we treat that as a successful match (and the 4297 // instruction will already have been filled in correctly, since the failing 4298 // matches won't have modified it). 4299 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success); 4300 if (NumSuccessfulMatches == 1) { 4301 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4302 return true; 4303 // Some instructions need post-processing to, for example, tweak which 4304 // encoding is selected. Loop on it while changes happen so the 4305 // individual transformations can chain off each other. 4306 if (!MatchingInlineAsm) 4307 while (processInstruction(Inst, Operands)) 4308 ; 4309 4310 Inst.setLoc(IDLoc); 4311 if (!MatchingInlineAsm) 4312 emitInstruction(Inst, Operands, Out); 4313 Opcode = Inst.getOpcode(); 4314 return false; 4315 } 4316 4317 // Otherwise, the match failed, try to produce a decent error message. 4318 4319 // If we had multiple suffix matches, then identify this as an ambiguous 4320 // match. 4321 if (NumSuccessfulMatches > 1) { 4322 char MatchChars[4]; 4323 unsigned NumMatches = 0; 4324 for (unsigned I = 0, E = std::size(Match); I != E; ++I) 4325 if (Match[I] == Match_Success) 4326 MatchChars[NumMatches++] = Suffixes[I]; 4327 4328 SmallString<126> Msg; 4329 raw_svector_ostream OS(Msg); 4330 OS << "ambiguous instructions require an explicit suffix (could be "; 4331 for (unsigned i = 0; i != NumMatches; ++i) { 4332 if (i != 0) 4333 OS << ", "; 4334 if (i + 1 == NumMatches) 4335 OS << "or "; 4336 OS << "'" << Base << MatchChars[i] << "'"; 4337 } 4338 OS << ")"; 4339 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm); 4340 return true; 4341 } 4342 4343 // Okay, we know that none of the variants matched successfully. 4344 4345 // If all of the instructions reported an invalid mnemonic, then the original 4346 // mnemonic was invalid. 4347 if (llvm::count(Match, Match_MnemonicFail) == 4) { 4348 if (OriginalError == Match_MnemonicFail) 4349 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 4350 Op.getLocRange(), MatchingInlineAsm); 4351 4352 if (OriginalError == Match_Unsupported) 4353 return Error(IDLoc, "unsupported instruction", EmptyRange, 4354 MatchingInlineAsm); 4355 4356 assert(OriginalError == Match_InvalidOperand && "Unexpected error"); 4357 // Recover location info for the operand if we know which was the problem. 4358 if (ErrorInfo != ~0ULL) { 4359 if (ErrorInfo >= Operands.size()) 4360 return Error(IDLoc, "too few operands for instruction", EmptyRange, 4361 MatchingInlineAsm); 4362 4363 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; 4364 if (Operand.getStartLoc().isValid()) { 4365 SMRange OperandRange = Operand.getLocRange(); 4366 return Error(Operand.getStartLoc(), "invalid operand for instruction", 4367 OperandRange, MatchingInlineAsm); 4368 } 4369 } 4370 4371 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4372 MatchingInlineAsm); 4373 } 4374 4375 // If one instruction matched as unsupported, report this as unsupported. 4376 if (llvm::count(Match, Match_Unsupported) == 1) { 4377 return Error(IDLoc, "unsupported instruction", EmptyRange, 4378 MatchingInlineAsm); 4379 } 4380 4381 // If one instruction matched with a missing feature, report this as a 4382 // missing feature. 4383 if (llvm::count(Match, Match_MissingFeature) == 1) { 4384 ErrorInfo = Match_MissingFeature; 4385 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 4386 MatchingInlineAsm); 4387 } 4388 4389 // If one instruction matched with an invalid operand, report this as an 4390 // operand failure. 4391 if (llvm::count(Match, Match_InvalidOperand) == 1) { 4392 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4393 MatchingInlineAsm); 4394 } 4395 4396 // If all of these were an outright failure, report it in a useless way. 4397 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 4398 EmptyRange, MatchingInlineAsm); 4399 return true; 4400 } 4401 4402 bool X86AsmParser::matchAndEmitIntelInstruction( 4403 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands, 4404 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { 4405 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 4406 SMRange EmptyRange = std::nullopt; 4407 // Find one unsized memory operand, if present. 4408 X86Operand *UnsizedMemOp = nullptr; 4409 for (const auto &Op : Operands) { 4410 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 4411 if (X86Op->isMemUnsized()) { 4412 UnsizedMemOp = X86Op; 4413 // Have we found an unqualified memory operand, 4414 // break. IA allows only one memory operand. 4415 break; 4416 } 4417 } 4418 4419 // Allow some instructions to have implicitly pointer-sized operands. This is 4420 // compatible with gas. 4421 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken(); 4422 if (UnsizedMemOp) { 4423 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; 4424 for (const char *Instr : PtrSizedInstrs) { 4425 if (Mnemonic == Instr) { 4426 UnsizedMemOp->Mem.Size = getPointerWidth(); 4427 break; 4428 } 4429 } 4430 } 4431 4432 SmallVector<unsigned, 8> Match; 4433 FeatureBitset ErrorInfoMissingFeatures; 4434 FeatureBitset MissingFeatures; 4435 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken(); 4436 4437 // If unsized push has immediate operand we should default the default pointer 4438 // size for the size. 4439 if (Mnemonic == "push" && Operands.size() == 2) { 4440 auto *X86Op = static_cast<X86Operand *>(Operands[1].get()); 4441 if (X86Op->isImm()) { 4442 // If it's not a constant fall through and let remainder take care of it. 4443 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm()); 4444 unsigned Size = getPointerWidth(); 4445 if (CE && 4446 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) { 4447 SmallString<16> Tmp; 4448 Tmp += Base; 4449 Tmp += (is64BitMode()) 4450 ? "q" 4451 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " "; 4452 Op.setTokenValue(Tmp); 4453 // Do match in ATT mode to allow explicit suffix usage. 4454 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo, 4455 MissingFeatures, MatchingInlineAsm, 4456 false /*isParsingIntelSyntax()*/)); 4457 Op.setTokenValue(Base); 4458 } 4459 } 4460 } 4461 4462 // If an unsized memory operand is present, try to match with each memory 4463 // operand size. In Intel assembly, the size is not part of the instruction 4464 // mnemonic. 4465 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { 4466 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512}; 4467 for (unsigned Size : MopSizes) { 4468 UnsizedMemOp->Mem.Size = Size; 4469 uint64_t ErrorInfoIgnore; 4470 unsigned LastOpcode = Inst.getOpcode(); 4471 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore, 4472 MissingFeatures, MatchingInlineAsm, 4473 isParsingIntelSyntax()); 4474 if (Match.empty() || LastOpcode != Inst.getOpcode()) 4475 Match.push_back(M); 4476 4477 // If this returned as a missing feature failure, remember that. 4478 if (Match.back() == Match_MissingFeature) 4479 ErrorInfoMissingFeatures = MissingFeatures; 4480 } 4481 4482 // Restore the size of the unsized memory operand if we modified it. 4483 UnsizedMemOp->Mem.Size = 0; 4484 } 4485 4486 // If we haven't matched anything yet, this is not a basic integer or FPU 4487 // operation. There shouldn't be any ambiguity in our mnemonic table, so try 4488 // matching with the unsized operand. 4489 if (Match.empty()) { 4490 Match.push_back(MatchInstruction( 4491 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 4492 isParsingIntelSyntax())); 4493 // If this returned as a missing feature failure, remember that. 4494 if (Match.back() == Match_MissingFeature) 4495 ErrorInfoMissingFeatures = MissingFeatures; 4496 } 4497 4498 // Restore the size of the unsized memory operand if we modified it. 4499 if (UnsizedMemOp) 4500 UnsizedMemOp->Mem.Size = 0; 4501 4502 // If it's a bad mnemonic, all results will be the same. 4503 if (Match.back() == Match_MnemonicFail) { 4504 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'", 4505 Op.getLocRange(), MatchingInlineAsm); 4506 } 4507 4508 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success); 4509 4510 // If matching was ambiguous and we had size information from the frontend, 4511 // try again with that. This handles cases like "movxz eax, m8/m16". 4512 if (UnsizedMemOp && NumSuccessfulMatches > 1 && 4513 UnsizedMemOp->getMemFrontendSize()) { 4514 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize(); 4515 unsigned M = MatchInstruction( 4516 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 4517 isParsingIntelSyntax()); 4518 if (M == Match_Success) 4519 NumSuccessfulMatches = 1; 4520 4521 // Add a rewrite that encodes the size information we used from the 4522 // frontend. 4523 InstInfo->AsmRewrites->emplace_back( 4524 AOK_SizeDirective, UnsizedMemOp->getStartLoc(), 4525 /*Len=*/0, UnsizedMemOp->getMemFrontendSize()); 4526 } 4527 4528 // If exactly one matched, then we treat that as a successful match (and the 4529 // instruction will already have been filled in correctly, since the failing 4530 // matches won't have modified it). 4531 if (NumSuccessfulMatches == 1) { 4532 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4533 return true; 4534 // Some instructions need post-processing to, for example, tweak which 4535 // encoding is selected. Loop on it while changes happen so the individual 4536 // transformations can chain off each other. 4537 if (!MatchingInlineAsm) 4538 while (processInstruction(Inst, Operands)) 4539 ; 4540 Inst.setLoc(IDLoc); 4541 if (!MatchingInlineAsm) 4542 emitInstruction(Inst, Operands, Out); 4543 Opcode = Inst.getOpcode(); 4544 return false; 4545 } else if (NumSuccessfulMatches > 1) { 4546 assert(UnsizedMemOp && 4547 "multiple matches only possible with unsized memory operands"); 4548 return Error(UnsizedMemOp->getStartLoc(), 4549 "ambiguous operand size for instruction '" + Mnemonic + "\'", 4550 UnsizedMemOp->getLocRange()); 4551 } 4552 4553 // If one instruction matched as unsupported, report this as unsupported. 4554 if (llvm::count(Match, Match_Unsupported) == 1) { 4555 return Error(IDLoc, "unsupported instruction", EmptyRange, 4556 MatchingInlineAsm); 4557 } 4558 4559 // If one instruction matched with a missing feature, report this as a 4560 // missing feature. 4561 if (llvm::count(Match, Match_MissingFeature) == 1) { 4562 ErrorInfo = Match_MissingFeature; 4563 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 4564 MatchingInlineAsm); 4565 } 4566 4567 // If one instruction matched with an invalid operand, report this as an 4568 // operand failure. 4569 if (llvm::count(Match, Match_InvalidOperand) == 1) { 4570 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4571 MatchingInlineAsm); 4572 } 4573 4574 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) { 4575 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 4576 if (ErrorLoc == SMLoc()) 4577 ErrorLoc = IDLoc; 4578 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 4579 EmptyRange, MatchingInlineAsm); 4580 } 4581 4582 // If all of these were an outright failure, report it in a useless way. 4583 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange, 4584 MatchingInlineAsm); 4585 } 4586 4587 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { 4588 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); 4589 } 4590 4591 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 4592 MCAsmParser &Parser = getParser(); 4593 StringRef IDVal = DirectiveID.getIdentifier(); 4594 if (IDVal.starts_with(".arch")) 4595 return parseDirectiveArch(); 4596 if (IDVal.starts_with(".code")) 4597 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 4598 else if (IDVal.starts_with(".att_syntax")) { 4599 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4600 if (Parser.getTok().getString() == "prefix") 4601 Parser.Lex(); 4602 else if (Parser.getTok().getString() == "noprefix") 4603 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not " 4604 "supported: registers must have a " 4605 "'%' prefix in .att_syntax"); 4606 } 4607 getParser().setAssemblerDialect(0); 4608 return false; 4609 } else if (IDVal.starts_with(".intel_syntax")) { 4610 getParser().setAssemblerDialect(1); 4611 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4612 if (Parser.getTok().getString() == "noprefix") 4613 Parser.Lex(); 4614 else if (Parser.getTok().getString() == "prefix") 4615 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not " 4616 "supported: registers must not have " 4617 "a '%' prefix in .intel_syntax"); 4618 } 4619 return false; 4620 } else if (IDVal == ".nops") 4621 return parseDirectiveNops(DirectiveID.getLoc()); 4622 else if (IDVal == ".even") 4623 return parseDirectiveEven(DirectiveID.getLoc()); 4624 else if (IDVal == ".cv_fpo_proc") 4625 return parseDirectiveFPOProc(DirectiveID.getLoc()); 4626 else if (IDVal == ".cv_fpo_setframe") 4627 return parseDirectiveFPOSetFrame(DirectiveID.getLoc()); 4628 else if (IDVal == ".cv_fpo_pushreg") 4629 return parseDirectiveFPOPushReg(DirectiveID.getLoc()); 4630 else if (IDVal == ".cv_fpo_stackalloc") 4631 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc()); 4632 else if (IDVal == ".cv_fpo_stackalign") 4633 return parseDirectiveFPOStackAlign(DirectiveID.getLoc()); 4634 else if (IDVal == ".cv_fpo_endprologue") 4635 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); 4636 else if (IDVal == ".cv_fpo_endproc") 4637 return parseDirectiveFPOEndProc(DirectiveID.getLoc()); 4638 else if (IDVal == ".seh_pushreg" || 4639 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg"))) 4640 return parseDirectiveSEHPushReg(DirectiveID.getLoc()); 4641 else if (IDVal == ".seh_setframe" || 4642 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe"))) 4643 return parseDirectiveSEHSetFrame(DirectiveID.getLoc()); 4644 else if (IDVal == ".seh_savereg" || 4645 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg"))) 4646 return parseDirectiveSEHSaveReg(DirectiveID.getLoc()); 4647 else if (IDVal == ".seh_savexmm" || 4648 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128"))) 4649 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc()); 4650 else if (IDVal == ".seh_pushframe" || 4651 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe"))) 4652 return parseDirectiveSEHPushFrame(DirectiveID.getLoc()); 4653 4654 return true; 4655 } 4656 4657 bool X86AsmParser::parseDirectiveArch() { 4658 // Ignore .arch for now. 4659 getParser().parseStringToEndOfStatement(); 4660 return false; 4661 } 4662 4663 /// parseDirectiveNops 4664 /// ::= .nops size[, control] 4665 bool X86AsmParser::parseDirectiveNops(SMLoc L) { 4666 int64_t NumBytes = 0, Control = 0; 4667 SMLoc NumBytesLoc, ControlLoc; 4668 const MCSubtargetInfo& STI = getSTI(); 4669 NumBytesLoc = getTok().getLoc(); 4670 if (getParser().checkForValidSection() || 4671 getParser().parseAbsoluteExpression(NumBytes)) 4672 return true; 4673 4674 if (parseOptionalToken(AsmToken::Comma)) { 4675 ControlLoc = getTok().getLoc(); 4676 if (getParser().parseAbsoluteExpression(Control)) 4677 return true; 4678 } 4679 if (getParser().parseEOL()) 4680 return true; 4681 4682 if (NumBytes <= 0) { 4683 Error(NumBytesLoc, "'.nops' directive with non-positive size"); 4684 return false; 4685 } 4686 4687 if (Control < 0) { 4688 Error(ControlLoc, "'.nops' directive with negative NOP size"); 4689 return false; 4690 } 4691 4692 /// Emit nops 4693 getParser().getStreamer().emitNops(NumBytes, Control, L, STI); 4694 4695 return false; 4696 } 4697 4698 /// parseDirectiveEven 4699 /// ::= .even 4700 bool X86AsmParser::parseDirectiveEven(SMLoc L) { 4701 if (parseEOL()) 4702 return false; 4703 4704 const MCSection *Section = getStreamer().getCurrentSectionOnly(); 4705 if (!Section) { 4706 getStreamer().initSections(false, getSTI()); 4707 Section = getStreamer().getCurrentSectionOnly(); 4708 } 4709 if (Section->useCodeAlign()) 4710 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0); 4711 else 4712 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0); 4713 return false; 4714 } 4715 4716 /// ParseDirectiveCode 4717 /// ::= .code16 | .code32 | .code64 4718 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 4719 MCAsmParser &Parser = getParser(); 4720 Code16GCC = false; 4721 if (IDVal == ".code16") { 4722 Parser.Lex(); 4723 if (!is16BitMode()) { 4724 SwitchMode(X86::Is16Bit); 4725 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); 4726 } 4727 } else if (IDVal == ".code16gcc") { 4728 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode. 4729 Parser.Lex(); 4730 Code16GCC = true; 4731 if (!is16BitMode()) { 4732 SwitchMode(X86::Is16Bit); 4733 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); 4734 } 4735 } else if (IDVal == ".code32") { 4736 Parser.Lex(); 4737 if (!is32BitMode()) { 4738 SwitchMode(X86::Is32Bit); 4739 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32); 4740 } 4741 } else if (IDVal == ".code64") { 4742 Parser.Lex(); 4743 if (!is64BitMode()) { 4744 SwitchMode(X86::Is64Bit); 4745 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64); 4746 } 4747 } else { 4748 Error(L, "unknown directive " + IDVal); 4749 return false; 4750 } 4751 4752 return false; 4753 } 4754 4755 // .cv_fpo_proc foo 4756 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) { 4757 MCAsmParser &Parser = getParser(); 4758 StringRef ProcName; 4759 int64_t ParamsSize; 4760 if (Parser.parseIdentifier(ProcName)) 4761 return Parser.TokError("expected symbol name"); 4762 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count")) 4763 return true; 4764 if (!isUIntN(32, ParamsSize)) 4765 return Parser.TokError("parameters size out of range"); 4766 if (parseEOL()) 4767 return true; 4768 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); 4769 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L); 4770 } 4771 4772 // .cv_fpo_setframe ebp 4773 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) { 4774 MCRegister Reg; 4775 SMLoc DummyLoc; 4776 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL()) 4777 return true; 4778 return getTargetStreamer().emitFPOSetFrame(Reg, L); 4779 } 4780 4781 // .cv_fpo_pushreg ebx 4782 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) { 4783 MCRegister Reg; 4784 SMLoc DummyLoc; 4785 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL()) 4786 return true; 4787 return getTargetStreamer().emitFPOPushReg(Reg, L); 4788 } 4789 4790 // .cv_fpo_stackalloc 20 4791 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) { 4792 MCAsmParser &Parser = getParser(); 4793 int64_t Offset; 4794 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL()) 4795 return true; 4796 return getTargetStreamer().emitFPOStackAlloc(Offset, L); 4797 } 4798 4799 // .cv_fpo_stackalign 8 4800 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) { 4801 MCAsmParser &Parser = getParser(); 4802 int64_t Offset; 4803 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL()) 4804 return true; 4805 return getTargetStreamer().emitFPOStackAlign(Offset, L); 4806 } 4807 4808 // .cv_fpo_endprologue 4809 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) { 4810 MCAsmParser &Parser = getParser(); 4811 if (Parser.parseEOL()) 4812 return true; 4813 return getTargetStreamer().emitFPOEndPrologue(L); 4814 } 4815 4816 // .cv_fpo_endproc 4817 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) { 4818 MCAsmParser &Parser = getParser(); 4819 if (Parser.parseEOL()) 4820 return true; 4821 return getTargetStreamer().emitFPOEndProc(L); 4822 } 4823 4824 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID, 4825 MCRegister &RegNo) { 4826 SMLoc startLoc = getLexer().getLoc(); 4827 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 4828 4829 // Try parsing the argument as a register first. 4830 if (getLexer().getTok().isNot(AsmToken::Integer)) { 4831 SMLoc endLoc; 4832 if (parseRegister(RegNo, startLoc, endLoc)) 4833 return true; 4834 4835 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) { 4836 return Error(startLoc, 4837 "register is not supported for use with this directive"); 4838 } 4839 } else { 4840 // Otherwise, an integer number matching the encoding of the desired 4841 // register may appear. 4842 int64_t EncodedReg; 4843 if (getParser().parseAbsoluteExpression(EncodedReg)) 4844 return true; 4845 4846 // The SEH register number is the same as the encoding register number. Map 4847 // from the encoding back to the LLVM register number. 4848 RegNo = 0; 4849 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) { 4850 if (MRI->getEncodingValue(Reg) == EncodedReg) { 4851 RegNo = Reg; 4852 break; 4853 } 4854 } 4855 if (RegNo == 0) { 4856 return Error(startLoc, 4857 "incorrect register number for use with this directive"); 4858 } 4859 } 4860 4861 return false; 4862 } 4863 4864 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) { 4865 MCRegister Reg; 4866 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4867 return true; 4868 4869 if (getLexer().isNot(AsmToken::EndOfStatement)) 4870 return TokError("expected end of directive"); 4871 4872 getParser().Lex(); 4873 getStreamer().emitWinCFIPushReg(Reg, Loc); 4874 return false; 4875 } 4876 4877 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) { 4878 MCRegister Reg; 4879 int64_t Off; 4880 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4881 return true; 4882 if (getLexer().isNot(AsmToken::Comma)) 4883 return TokError("you must specify a stack pointer offset"); 4884 4885 getParser().Lex(); 4886 if (getParser().parseAbsoluteExpression(Off)) 4887 return true; 4888 4889 if (getLexer().isNot(AsmToken::EndOfStatement)) 4890 return TokError("expected end of directive"); 4891 4892 getParser().Lex(); 4893 getStreamer().emitWinCFISetFrame(Reg, Off, Loc); 4894 return false; 4895 } 4896 4897 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) { 4898 MCRegister Reg; 4899 int64_t Off; 4900 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4901 return true; 4902 if (getLexer().isNot(AsmToken::Comma)) 4903 return TokError("you must specify an offset on the stack"); 4904 4905 getParser().Lex(); 4906 if (getParser().parseAbsoluteExpression(Off)) 4907 return true; 4908 4909 if (getLexer().isNot(AsmToken::EndOfStatement)) 4910 return TokError("expected end of directive"); 4911 4912 getParser().Lex(); 4913 getStreamer().emitWinCFISaveReg(Reg, Off, Loc); 4914 return false; 4915 } 4916 4917 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) { 4918 MCRegister Reg; 4919 int64_t Off; 4920 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg)) 4921 return true; 4922 if (getLexer().isNot(AsmToken::Comma)) 4923 return TokError("you must specify an offset on the stack"); 4924 4925 getParser().Lex(); 4926 if (getParser().parseAbsoluteExpression(Off)) 4927 return true; 4928 4929 if (getLexer().isNot(AsmToken::EndOfStatement)) 4930 return TokError("expected end of directive"); 4931 4932 getParser().Lex(); 4933 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc); 4934 return false; 4935 } 4936 4937 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) { 4938 bool Code = false; 4939 StringRef CodeID; 4940 if (getLexer().is(AsmToken::At)) { 4941 SMLoc startLoc = getLexer().getLoc(); 4942 getParser().Lex(); 4943 if (!getParser().parseIdentifier(CodeID)) { 4944 if (CodeID != "code") 4945 return Error(startLoc, "expected @code"); 4946 Code = true; 4947 } 4948 } 4949 4950 if (getLexer().isNot(AsmToken::EndOfStatement)) 4951 return TokError("expected end of directive"); 4952 4953 getParser().Lex(); 4954 getStreamer().emitWinCFIPushFrame(Code, Loc); 4955 return false; 4956 } 4957 4958 // Force static initialization. 4959 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser() { 4960 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target()); 4961 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target()); 4962 } 4963 4964 #define GET_MATCHER_IMPLEMENTATION 4965 #include "X86GenAsmMatcher.inc" 4966