1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86EncodingOptimization.h" 11 #include "MCTargetDesc/X86IntelInstPrinter.h" 12 #include "MCTargetDesc/X86MCExpr.h" 13 #include "MCTargetDesc/X86MCTargetDesc.h" 14 #include "MCTargetDesc/X86TargetStreamer.h" 15 #include "TargetInfo/X86TargetInfo.h" 16 #include "X86AsmParserCommon.h" 17 #include "X86Operand.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringSwitch.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCContext.h" 24 #include "llvm/MC/MCExpr.h" 25 #include "llvm/MC/MCInst.h" 26 #include "llvm/MC/MCInstrInfo.h" 27 #include "llvm/MC/MCParser/MCAsmLexer.h" 28 #include "llvm/MC/MCParser/MCAsmParser.h" 29 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 30 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 31 #include "llvm/MC/MCRegisterInfo.h" 32 #include "llvm/MC/MCSection.h" 33 #include "llvm/MC/MCStreamer.h" 34 #include "llvm/MC/MCSubtargetInfo.h" 35 #include "llvm/MC/MCSymbol.h" 36 #include "llvm/MC/TargetRegistry.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/SourceMgr.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <algorithm> 42 #include <memory> 43 44 using namespace llvm; 45 46 static cl::opt<bool> LVIInlineAsmHardening( 47 "x86-experimental-lvi-inline-asm-hardening", 48 cl::desc("Harden inline assembly code that may be vulnerable to Load Value" 49 " Injection (LVI). This feature is experimental."), cl::Hidden); 50 51 static bool checkScale(unsigned Scale, StringRef &ErrMsg) { 52 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { 53 ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; 54 return true; 55 } 56 return false; 57 } 58 59 namespace { 60 61 static const char OpPrecedence[] = { 62 0, // IC_OR 63 1, // IC_XOR 64 2, // IC_AND 65 4, // IC_LSHIFT 66 4, // IC_RSHIFT 67 5, // IC_PLUS 68 5, // IC_MINUS 69 6, // IC_MULTIPLY 70 6, // IC_DIVIDE 71 6, // IC_MOD 72 7, // IC_NOT 73 8, // IC_NEG 74 9, // IC_RPAREN 75 10, // IC_LPAREN 76 0, // IC_IMM 77 0, // IC_REGISTER 78 3, // IC_EQ 79 3, // IC_NE 80 3, // IC_LT 81 3, // IC_LE 82 3, // IC_GT 83 3 // IC_GE 84 }; 85 86 class X86AsmParser : public MCTargetAsmParser { 87 ParseInstructionInfo *InstInfo; 88 bool Code16GCC; 89 unsigned ForcedDataPrefix = 0; 90 91 enum VEXEncoding { 92 VEXEncoding_Default, 93 VEXEncoding_VEX, 94 VEXEncoding_VEX2, 95 VEXEncoding_VEX3, 96 VEXEncoding_EVEX, 97 }; 98 99 VEXEncoding ForcedVEXEncoding = VEXEncoding_Default; 100 101 enum DispEncoding { 102 DispEncoding_Default, 103 DispEncoding_Disp8, 104 DispEncoding_Disp32, 105 }; 106 107 DispEncoding ForcedDispEncoding = DispEncoding_Default; 108 109 // Does this instruction use apx extended register? 110 bool UseApxExtendedReg = false; 111 // Is this instruction explicitly required not to update flags? 112 bool ForcedNoFlag = false; 113 114 private: 115 SMLoc consumeToken() { 116 MCAsmParser &Parser = getParser(); 117 SMLoc Result = Parser.getTok().getLoc(); 118 Parser.Lex(); 119 return Result; 120 } 121 122 X86TargetStreamer &getTargetStreamer() { 123 assert(getParser().getStreamer().getTargetStreamer() && 124 "do not have a target streamer"); 125 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 126 return static_cast<X86TargetStreamer &>(TS); 127 } 128 129 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst, 130 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures, 131 bool matchingInlineAsm, unsigned VariantID = 0) { 132 // In Code16GCC mode, match as 32-bit. 133 if (Code16GCC) 134 SwitchMode(X86::Is32Bit); 135 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo, 136 MissingFeatures, matchingInlineAsm, 137 VariantID); 138 if (Code16GCC) 139 SwitchMode(X86::Is16Bit); 140 return rv; 141 } 142 143 enum InfixCalculatorTok { 144 IC_OR = 0, 145 IC_XOR, 146 IC_AND, 147 IC_LSHIFT, 148 IC_RSHIFT, 149 IC_PLUS, 150 IC_MINUS, 151 IC_MULTIPLY, 152 IC_DIVIDE, 153 IC_MOD, 154 IC_NOT, 155 IC_NEG, 156 IC_RPAREN, 157 IC_LPAREN, 158 IC_IMM, 159 IC_REGISTER, 160 IC_EQ, 161 IC_NE, 162 IC_LT, 163 IC_LE, 164 IC_GT, 165 IC_GE 166 }; 167 168 enum IntelOperatorKind { 169 IOK_INVALID = 0, 170 IOK_LENGTH, 171 IOK_SIZE, 172 IOK_TYPE, 173 }; 174 175 enum MasmOperatorKind { 176 MOK_INVALID = 0, 177 MOK_LENGTHOF, 178 MOK_SIZEOF, 179 MOK_TYPE, 180 }; 181 182 class InfixCalculator { 183 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 184 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 185 SmallVector<ICToken, 4> PostfixStack; 186 187 bool isUnaryOperator(InfixCalculatorTok Op) const { 188 return Op == IC_NEG || Op == IC_NOT; 189 } 190 191 public: 192 int64_t popOperand() { 193 assert (!PostfixStack.empty() && "Poped an empty stack!"); 194 ICToken Op = PostfixStack.pop_back_val(); 195 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER)) 196 return -1; // The invalid Scale value will be caught later by checkScale 197 return Op.second; 198 } 199 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 200 assert ((Op == IC_IMM || Op == IC_REGISTER) && 201 "Unexpected operand!"); 202 PostfixStack.push_back(std::make_pair(Op, Val)); 203 } 204 205 void popOperator() { InfixOperatorStack.pop_back(); } 206 void pushOperator(InfixCalculatorTok Op) { 207 // Push the new operator if the stack is empty. 208 if (InfixOperatorStack.empty()) { 209 InfixOperatorStack.push_back(Op); 210 return; 211 } 212 213 // Push the new operator if it has a higher precedence than the operator 214 // on the top of the stack or the operator on the top of the stack is a 215 // left parentheses. 216 unsigned Idx = InfixOperatorStack.size() - 1; 217 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 218 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 219 InfixOperatorStack.push_back(Op); 220 return; 221 } 222 223 // The operator on the top of the stack has higher precedence than the 224 // new operator. 225 unsigned ParenCount = 0; 226 while (true) { 227 // Nothing to process. 228 if (InfixOperatorStack.empty()) 229 break; 230 231 Idx = InfixOperatorStack.size() - 1; 232 StackOp = InfixOperatorStack[Idx]; 233 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 234 break; 235 236 // If we have an even parentheses count and we see a left parentheses, 237 // then stop processing. 238 if (!ParenCount && StackOp == IC_LPAREN) 239 break; 240 241 if (StackOp == IC_RPAREN) { 242 ++ParenCount; 243 InfixOperatorStack.pop_back(); 244 } else if (StackOp == IC_LPAREN) { 245 --ParenCount; 246 InfixOperatorStack.pop_back(); 247 } else { 248 InfixOperatorStack.pop_back(); 249 PostfixStack.push_back(std::make_pair(StackOp, 0)); 250 } 251 } 252 // Push the new operator. 253 InfixOperatorStack.push_back(Op); 254 } 255 256 int64_t execute() { 257 // Push any remaining operators onto the postfix stack. 258 while (!InfixOperatorStack.empty()) { 259 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 260 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 261 PostfixStack.push_back(std::make_pair(StackOp, 0)); 262 } 263 264 if (PostfixStack.empty()) 265 return 0; 266 267 SmallVector<ICToken, 16> OperandStack; 268 for (const ICToken &Op : PostfixStack) { 269 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 270 OperandStack.push_back(Op); 271 } else if (isUnaryOperator(Op.first)) { 272 assert (OperandStack.size() > 0 && "Too few operands."); 273 ICToken Operand = OperandStack.pop_back_val(); 274 assert (Operand.first == IC_IMM && 275 "Unary operation with a register!"); 276 switch (Op.first) { 277 default: 278 report_fatal_error("Unexpected operator!"); 279 break; 280 case IC_NEG: 281 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second)); 282 break; 283 case IC_NOT: 284 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second)); 285 break; 286 } 287 } else { 288 assert (OperandStack.size() > 1 && "Too few operands."); 289 int64_t Val; 290 ICToken Op2 = OperandStack.pop_back_val(); 291 ICToken Op1 = OperandStack.pop_back_val(); 292 switch (Op.first) { 293 default: 294 report_fatal_error("Unexpected operator!"); 295 break; 296 case IC_PLUS: 297 Val = Op1.second + Op2.second; 298 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 299 break; 300 case IC_MINUS: 301 Val = Op1.second - Op2.second; 302 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 303 break; 304 case IC_MULTIPLY: 305 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 306 "Multiply operation with an immediate and a register!"); 307 Val = Op1.second * Op2.second; 308 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 309 break; 310 case IC_DIVIDE: 311 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 312 "Divide operation with an immediate and a register!"); 313 assert (Op2.second != 0 && "Division by zero!"); 314 Val = Op1.second / Op2.second; 315 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 316 break; 317 case IC_MOD: 318 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 319 "Modulo operation with an immediate and a register!"); 320 Val = Op1.second % Op2.second; 321 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 322 break; 323 case IC_OR: 324 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 325 "Or operation with an immediate and a register!"); 326 Val = Op1.second | Op2.second; 327 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 328 break; 329 case IC_XOR: 330 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 331 "Xor operation with an immediate and a register!"); 332 Val = Op1.second ^ Op2.second; 333 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 334 break; 335 case IC_AND: 336 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 337 "And operation with an immediate and a register!"); 338 Val = Op1.second & Op2.second; 339 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 340 break; 341 case IC_LSHIFT: 342 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 343 "Left shift operation with an immediate and a register!"); 344 Val = Op1.second << Op2.second; 345 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 346 break; 347 case IC_RSHIFT: 348 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 349 "Right shift operation with an immediate and a register!"); 350 Val = Op1.second >> Op2.second; 351 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 352 break; 353 case IC_EQ: 354 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 355 "Equals operation with an immediate and a register!"); 356 Val = (Op1.second == Op2.second) ? -1 : 0; 357 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 358 break; 359 case IC_NE: 360 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 361 "Not-equals operation with an immediate and a register!"); 362 Val = (Op1.second != Op2.second) ? -1 : 0; 363 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 364 break; 365 case IC_LT: 366 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 367 "Less-than operation with an immediate and a register!"); 368 Val = (Op1.second < Op2.second) ? -1 : 0; 369 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 370 break; 371 case IC_LE: 372 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 373 "Less-than-or-equal operation with an immediate and a " 374 "register!"); 375 Val = (Op1.second <= Op2.second) ? -1 : 0; 376 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 377 break; 378 case IC_GT: 379 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 380 "Greater-than operation with an immediate and a register!"); 381 Val = (Op1.second > Op2.second) ? -1 : 0; 382 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 383 break; 384 case IC_GE: 385 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 386 "Greater-than-or-equal operation with an immediate and a " 387 "register!"); 388 Val = (Op1.second >= Op2.second) ? -1 : 0; 389 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 390 break; 391 } 392 } 393 } 394 assert (OperandStack.size() == 1 && "Expected a single result."); 395 return OperandStack.pop_back_val().second; 396 } 397 }; 398 399 enum IntelExprState { 400 IES_INIT, 401 IES_OR, 402 IES_XOR, 403 IES_AND, 404 IES_EQ, 405 IES_NE, 406 IES_LT, 407 IES_LE, 408 IES_GT, 409 IES_GE, 410 IES_LSHIFT, 411 IES_RSHIFT, 412 IES_PLUS, 413 IES_MINUS, 414 IES_OFFSET, 415 IES_CAST, 416 IES_NOT, 417 IES_MULTIPLY, 418 IES_DIVIDE, 419 IES_MOD, 420 IES_LBRAC, 421 IES_RBRAC, 422 IES_LPAREN, 423 IES_RPAREN, 424 IES_REGISTER, 425 IES_INTEGER, 426 IES_ERROR 427 }; 428 429 class IntelExprStateMachine { 430 IntelExprState State = IES_INIT, PrevState = IES_ERROR; 431 unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0; 432 int64_t Imm = 0; 433 const MCExpr *Sym = nullptr; 434 StringRef SymName; 435 InfixCalculator IC; 436 InlineAsmIdentifierInfo Info; 437 short BracCount = 0; 438 bool MemExpr = false; 439 bool BracketUsed = false; 440 bool OffsetOperator = false; 441 bool AttachToOperandIdx = false; 442 bool IsPIC = false; 443 SMLoc OffsetOperatorLoc; 444 AsmTypeInfo CurType; 445 446 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { 447 if (Sym) { 448 ErrMsg = "cannot use more than one symbol in memory operand"; 449 return true; 450 } 451 Sym = Val; 452 SymName = ID; 453 return false; 454 } 455 456 public: 457 IntelExprStateMachine() = default; 458 459 void addImm(int64_t imm) { Imm += imm; } 460 short getBracCount() const { return BracCount; } 461 bool isMemExpr() const { return MemExpr; } 462 bool isBracketUsed() const { return BracketUsed; } 463 bool isOffsetOperator() const { return OffsetOperator; } 464 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; } 465 unsigned getBaseReg() const { return BaseReg; } 466 unsigned getIndexReg() const { return IndexReg; } 467 unsigned getScale() const { return Scale; } 468 const MCExpr *getSym() const { return Sym; } 469 StringRef getSymName() const { return SymName; } 470 StringRef getType() const { return CurType.Name; } 471 unsigned getSize() const { return CurType.Size; } 472 unsigned getElementSize() const { return CurType.ElementSize; } 473 unsigned getLength() const { return CurType.Length; } 474 int64_t getImm() { return Imm + IC.execute(); } 475 bool isValidEndState() const { 476 return State == IES_RBRAC || State == IES_INTEGER; 477 } 478 479 // Is the intel expression appended after an operand index. 480 // [OperandIdx][Intel Expression] 481 // This is neccessary for checking if it is an independent 482 // intel expression at back end when parse inline asm. 483 void setAppendAfterOperand() { AttachToOperandIdx = true; } 484 485 bool isPIC() const { return IsPIC; } 486 void setPIC() { IsPIC = true; } 487 488 bool hadError() const { return State == IES_ERROR; } 489 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; } 490 491 bool regsUseUpError(StringRef &ErrMsg) { 492 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg] 493 // can not intruduce additional register in inline asm in PIC model. 494 if (IsPIC && AttachToOperandIdx) 495 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!"; 496 else 497 ErrMsg = "BaseReg/IndexReg already set!"; 498 return true; 499 } 500 501 void onOr() { 502 IntelExprState CurrState = State; 503 switch (State) { 504 default: 505 State = IES_ERROR; 506 break; 507 case IES_INTEGER: 508 case IES_RPAREN: 509 case IES_REGISTER: 510 State = IES_OR; 511 IC.pushOperator(IC_OR); 512 break; 513 } 514 PrevState = CurrState; 515 } 516 void onXor() { 517 IntelExprState CurrState = State; 518 switch (State) { 519 default: 520 State = IES_ERROR; 521 break; 522 case IES_INTEGER: 523 case IES_RPAREN: 524 case IES_REGISTER: 525 State = IES_XOR; 526 IC.pushOperator(IC_XOR); 527 break; 528 } 529 PrevState = CurrState; 530 } 531 void onAnd() { 532 IntelExprState CurrState = State; 533 switch (State) { 534 default: 535 State = IES_ERROR; 536 break; 537 case IES_INTEGER: 538 case IES_RPAREN: 539 case IES_REGISTER: 540 State = IES_AND; 541 IC.pushOperator(IC_AND); 542 break; 543 } 544 PrevState = CurrState; 545 } 546 void onEq() { 547 IntelExprState CurrState = State; 548 switch (State) { 549 default: 550 State = IES_ERROR; 551 break; 552 case IES_INTEGER: 553 case IES_RPAREN: 554 case IES_REGISTER: 555 State = IES_EQ; 556 IC.pushOperator(IC_EQ); 557 break; 558 } 559 PrevState = CurrState; 560 } 561 void onNE() { 562 IntelExprState CurrState = State; 563 switch (State) { 564 default: 565 State = IES_ERROR; 566 break; 567 case IES_INTEGER: 568 case IES_RPAREN: 569 case IES_REGISTER: 570 State = IES_NE; 571 IC.pushOperator(IC_NE); 572 break; 573 } 574 PrevState = CurrState; 575 } 576 void onLT() { 577 IntelExprState CurrState = State; 578 switch (State) { 579 default: 580 State = IES_ERROR; 581 break; 582 case IES_INTEGER: 583 case IES_RPAREN: 584 case IES_REGISTER: 585 State = IES_LT; 586 IC.pushOperator(IC_LT); 587 break; 588 } 589 PrevState = CurrState; 590 } 591 void onLE() { 592 IntelExprState CurrState = State; 593 switch (State) { 594 default: 595 State = IES_ERROR; 596 break; 597 case IES_INTEGER: 598 case IES_RPAREN: 599 case IES_REGISTER: 600 State = IES_LE; 601 IC.pushOperator(IC_LE); 602 break; 603 } 604 PrevState = CurrState; 605 } 606 void onGT() { 607 IntelExprState CurrState = State; 608 switch (State) { 609 default: 610 State = IES_ERROR; 611 break; 612 case IES_INTEGER: 613 case IES_RPAREN: 614 case IES_REGISTER: 615 State = IES_GT; 616 IC.pushOperator(IC_GT); 617 break; 618 } 619 PrevState = CurrState; 620 } 621 void onGE() { 622 IntelExprState CurrState = State; 623 switch (State) { 624 default: 625 State = IES_ERROR; 626 break; 627 case IES_INTEGER: 628 case IES_RPAREN: 629 case IES_REGISTER: 630 State = IES_GE; 631 IC.pushOperator(IC_GE); 632 break; 633 } 634 PrevState = CurrState; 635 } 636 void onLShift() { 637 IntelExprState CurrState = State; 638 switch (State) { 639 default: 640 State = IES_ERROR; 641 break; 642 case IES_INTEGER: 643 case IES_RPAREN: 644 case IES_REGISTER: 645 State = IES_LSHIFT; 646 IC.pushOperator(IC_LSHIFT); 647 break; 648 } 649 PrevState = CurrState; 650 } 651 void onRShift() { 652 IntelExprState CurrState = State; 653 switch (State) { 654 default: 655 State = IES_ERROR; 656 break; 657 case IES_INTEGER: 658 case IES_RPAREN: 659 case IES_REGISTER: 660 State = IES_RSHIFT; 661 IC.pushOperator(IC_RSHIFT); 662 break; 663 } 664 PrevState = CurrState; 665 } 666 bool onPlus(StringRef &ErrMsg) { 667 IntelExprState CurrState = State; 668 switch (State) { 669 default: 670 State = IES_ERROR; 671 break; 672 case IES_INTEGER: 673 case IES_RPAREN: 674 case IES_REGISTER: 675 case IES_OFFSET: 676 State = IES_PLUS; 677 IC.pushOperator(IC_PLUS); 678 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 679 // If we already have a BaseReg, then assume this is the IndexReg with 680 // no explicit scale. 681 if (!BaseReg) { 682 BaseReg = TmpReg; 683 } else { 684 if (IndexReg) 685 return regsUseUpError(ErrMsg); 686 IndexReg = TmpReg; 687 Scale = 0; 688 } 689 } 690 break; 691 } 692 PrevState = CurrState; 693 return false; 694 } 695 bool onMinus(StringRef &ErrMsg) { 696 IntelExprState CurrState = State; 697 switch (State) { 698 default: 699 State = IES_ERROR; 700 break; 701 case IES_OR: 702 case IES_XOR: 703 case IES_AND: 704 case IES_EQ: 705 case IES_NE: 706 case IES_LT: 707 case IES_LE: 708 case IES_GT: 709 case IES_GE: 710 case IES_LSHIFT: 711 case IES_RSHIFT: 712 case IES_PLUS: 713 case IES_NOT: 714 case IES_MULTIPLY: 715 case IES_DIVIDE: 716 case IES_MOD: 717 case IES_LPAREN: 718 case IES_RPAREN: 719 case IES_LBRAC: 720 case IES_RBRAC: 721 case IES_INTEGER: 722 case IES_REGISTER: 723 case IES_INIT: 724 case IES_OFFSET: 725 State = IES_MINUS; 726 // push minus operator if it is not a negate operator 727 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || 728 CurrState == IES_INTEGER || CurrState == IES_RBRAC || 729 CurrState == IES_OFFSET) 730 IC.pushOperator(IC_MINUS); 731 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 732 // We have negate operator for Scale: it's illegal 733 ErrMsg = "Scale can't be negative"; 734 return true; 735 } else 736 IC.pushOperator(IC_NEG); 737 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 738 // If we already have a BaseReg, then assume this is the IndexReg with 739 // no explicit scale. 740 if (!BaseReg) { 741 BaseReg = TmpReg; 742 } else { 743 if (IndexReg) 744 return regsUseUpError(ErrMsg); 745 IndexReg = TmpReg; 746 Scale = 0; 747 } 748 } 749 break; 750 } 751 PrevState = CurrState; 752 return false; 753 } 754 void onNot() { 755 IntelExprState CurrState = State; 756 switch (State) { 757 default: 758 State = IES_ERROR; 759 break; 760 case IES_OR: 761 case IES_XOR: 762 case IES_AND: 763 case IES_EQ: 764 case IES_NE: 765 case IES_LT: 766 case IES_LE: 767 case IES_GT: 768 case IES_GE: 769 case IES_LSHIFT: 770 case IES_RSHIFT: 771 case IES_PLUS: 772 case IES_MINUS: 773 case IES_NOT: 774 case IES_MULTIPLY: 775 case IES_DIVIDE: 776 case IES_MOD: 777 case IES_LPAREN: 778 case IES_LBRAC: 779 case IES_INIT: 780 State = IES_NOT; 781 IC.pushOperator(IC_NOT); 782 break; 783 } 784 PrevState = CurrState; 785 } 786 bool onRegister(unsigned Reg, StringRef &ErrMsg) { 787 IntelExprState CurrState = State; 788 switch (State) { 789 default: 790 State = IES_ERROR; 791 break; 792 case IES_PLUS: 793 case IES_LPAREN: 794 case IES_LBRAC: 795 State = IES_REGISTER; 796 TmpReg = Reg; 797 IC.pushOperand(IC_REGISTER); 798 break; 799 case IES_MULTIPLY: 800 // Index Register - Scale * Register 801 if (PrevState == IES_INTEGER) { 802 if (IndexReg) 803 return regsUseUpError(ErrMsg); 804 State = IES_REGISTER; 805 IndexReg = Reg; 806 // Get the scale and replace the 'Scale * Register' with '0'. 807 Scale = IC.popOperand(); 808 if (checkScale(Scale, ErrMsg)) 809 return true; 810 IC.pushOperand(IC_IMM); 811 IC.popOperator(); 812 } else { 813 State = IES_ERROR; 814 } 815 break; 816 } 817 PrevState = CurrState; 818 return false; 819 } 820 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName, 821 const InlineAsmIdentifierInfo &IDInfo, 822 const AsmTypeInfo &Type, bool ParsingMSInlineAsm, 823 StringRef &ErrMsg) { 824 // InlineAsm: Treat an enum value as an integer 825 if (ParsingMSInlineAsm) 826 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 827 return onInteger(IDInfo.Enum.EnumVal, ErrMsg); 828 // Treat a symbolic constant like an integer 829 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef)) 830 return onInteger(CE->getValue(), ErrMsg); 831 PrevState = State; 832 switch (State) { 833 default: 834 State = IES_ERROR; 835 break; 836 case IES_CAST: 837 case IES_PLUS: 838 case IES_MINUS: 839 case IES_NOT: 840 case IES_INIT: 841 case IES_LBRAC: 842 case IES_LPAREN: 843 if (setSymRef(SymRef, SymRefName, ErrMsg)) 844 return true; 845 MemExpr = true; 846 State = IES_INTEGER; 847 IC.pushOperand(IC_IMM); 848 if (ParsingMSInlineAsm) 849 Info = IDInfo; 850 setTypeInfo(Type); 851 break; 852 } 853 return false; 854 } 855 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { 856 IntelExprState CurrState = State; 857 switch (State) { 858 default: 859 State = IES_ERROR; 860 break; 861 case IES_PLUS: 862 case IES_MINUS: 863 case IES_NOT: 864 case IES_OR: 865 case IES_XOR: 866 case IES_AND: 867 case IES_EQ: 868 case IES_NE: 869 case IES_LT: 870 case IES_LE: 871 case IES_GT: 872 case IES_GE: 873 case IES_LSHIFT: 874 case IES_RSHIFT: 875 case IES_DIVIDE: 876 case IES_MOD: 877 case IES_MULTIPLY: 878 case IES_LPAREN: 879 case IES_INIT: 880 case IES_LBRAC: 881 State = IES_INTEGER; 882 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 883 // Index Register - Register * Scale 884 if (IndexReg) 885 return regsUseUpError(ErrMsg); 886 IndexReg = TmpReg; 887 Scale = TmpInt; 888 if (checkScale(Scale, ErrMsg)) 889 return true; 890 // Get the scale and replace the 'Register * Scale' with '0'. 891 IC.popOperator(); 892 } else { 893 IC.pushOperand(IC_IMM, TmpInt); 894 } 895 break; 896 } 897 PrevState = CurrState; 898 return false; 899 } 900 void onStar() { 901 PrevState = State; 902 switch (State) { 903 default: 904 State = IES_ERROR; 905 break; 906 case IES_INTEGER: 907 case IES_REGISTER: 908 case IES_RPAREN: 909 State = IES_MULTIPLY; 910 IC.pushOperator(IC_MULTIPLY); 911 break; 912 } 913 } 914 void onDivide() { 915 PrevState = State; 916 switch (State) { 917 default: 918 State = IES_ERROR; 919 break; 920 case IES_INTEGER: 921 case IES_RPAREN: 922 State = IES_DIVIDE; 923 IC.pushOperator(IC_DIVIDE); 924 break; 925 } 926 } 927 void onMod() { 928 PrevState = State; 929 switch (State) { 930 default: 931 State = IES_ERROR; 932 break; 933 case IES_INTEGER: 934 case IES_RPAREN: 935 State = IES_MOD; 936 IC.pushOperator(IC_MOD); 937 break; 938 } 939 } 940 bool onLBrac() { 941 if (BracCount) 942 return true; 943 PrevState = State; 944 switch (State) { 945 default: 946 State = IES_ERROR; 947 break; 948 case IES_RBRAC: 949 case IES_INTEGER: 950 case IES_RPAREN: 951 State = IES_PLUS; 952 IC.pushOperator(IC_PLUS); 953 CurType.Length = 1; 954 CurType.Size = CurType.ElementSize; 955 break; 956 case IES_INIT: 957 case IES_CAST: 958 assert(!BracCount && "BracCount should be zero on parsing's start"); 959 State = IES_LBRAC; 960 break; 961 } 962 MemExpr = true; 963 BracketUsed = true; 964 BracCount++; 965 return false; 966 } 967 bool onRBrac(StringRef &ErrMsg) { 968 IntelExprState CurrState = State; 969 switch (State) { 970 default: 971 State = IES_ERROR; 972 break; 973 case IES_INTEGER: 974 case IES_OFFSET: 975 case IES_REGISTER: 976 case IES_RPAREN: 977 if (BracCount-- != 1) { 978 ErrMsg = "unexpected bracket encountered"; 979 return true; 980 } 981 State = IES_RBRAC; 982 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 983 // If we already have a BaseReg, then assume this is the IndexReg with 984 // no explicit scale. 985 if (!BaseReg) { 986 BaseReg = TmpReg; 987 } else { 988 if (IndexReg) 989 return regsUseUpError(ErrMsg); 990 IndexReg = TmpReg; 991 Scale = 0; 992 } 993 } 994 break; 995 } 996 PrevState = CurrState; 997 return false; 998 } 999 void onLParen() { 1000 IntelExprState CurrState = State; 1001 switch (State) { 1002 default: 1003 State = IES_ERROR; 1004 break; 1005 case IES_PLUS: 1006 case IES_MINUS: 1007 case IES_NOT: 1008 case IES_OR: 1009 case IES_XOR: 1010 case IES_AND: 1011 case IES_EQ: 1012 case IES_NE: 1013 case IES_LT: 1014 case IES_LE: 1015 case IES_GT: 1016 case IES_GE: 1017 case IES_LSHIFT: 1018 case IES_RSHIFT: 1019 case IES_MULTIPLY: 1020 case IES_DIVIDE: 1021 case IES_MOD: 1022 case IES_LPAREN: 1023 case IES_INIT: 1024 case IES_LBRAC: 1025 State = IES_LPAREN; 1026 IC.pushOperator(IC_LPAREN); 1027 break; 1028 } 1029 PrevState = CurrState; 1030 } 1031 void onRParen() { 1032 PrevState = State; 1033 switch (State) { 1034 default: 1035 State = IES_ERROR; 1036 break; 1037 case IES_INTEGER: 1038 case IES_OFFSET: 1039 case IES_REGISTER: 1040 case IES_RBRAC: 1041 case IES_RPAREN: 1042 State = IES_RPAREN; 1043 IC.pushOperator(IC_RPAREN); 1044 break; 1045 } 1046 } 1047 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID, 1048 const InlineAsmIdentifierInfo &IDInfo, 1049 bool ParsingMSInlineAsm, StringRef &ErrMsg) { 1050 PrevState = State; 1051 switch (State) { 1052 default: 1053 ErrMsg = "unexpected offset operator expression"; 1054 return true; 1055 case IES_PLUS: 1056 case IES_INIT: 1057 case IES_LBRAC: 1058 if (setSymRef(Val, ID, ErrMsg)) 1059 return true; 1060 OffsetOperator = true; 1061 OffsetOperatorLoc = OffsetLoc; 1062 State = IES_OFFSET; 1063 // As we cannot yet resolve the actual value (offset), we retain 1064 // the requested semantics by pushing a '0' to the operands stack 1065 IC.pushOperand(IC_IMM); 1066 if (ParsingMSInlineAsm) { 1067 Info = IDInfo; 1068 } 1069 break; 1070 } 1071 return false; 1072 } 1073 void onCast(AsmTypeInfo Info) { 1074 PrevState = State; 1075 switch (State) { 1076 default: 1077 State = IES_ERROR; 1078 break; 1079 case IES_LPAREN: 1080 setTypeInfo(Info); 1081 State = IES_CAST; 1082 break; 1083 } 1084 } 1085 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; } 1086 }; 1087 1088 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt, 1089 bool MatchingInlineAsm = false) { 1090 MCAsmParser &Parser = getParser(); 1091 if (MatchingInlineAsm) { 1092 if (!getLexer().isAtStartOfStatement()) 1093 Parser.eatToEndOfStatement(); 1094 return false; 1095 } 1096 return Parser.Error(L, Msg, Range); 1097 } 1098 1099 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc, 1100 SMLoc EndLoc); 1101 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1102 bool RestoreOnFailure); 1103 1104 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); 1105 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); 1106 bool IsSIReg(unsigned Reg); 1107 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg); 1108 void 1109 AddDefaultSrcDestOperands(OperandVector &Operands, 1110 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1111 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst); 1112 bool VerifyAndAdjustOperands(OperandVector &OrigOperands, 1113 OperandVector &FinalOperands); 1114 bool parseOperand(OperandVector &Operands, StringRef Name); 1115 bool parseATTOperand(OperandVector &Operands); 1116 bool parseIntelOperand(OperandVector &Operands, StringRef Name); 1117 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, 1118 InlineAsmIdentifierInfo &Info, SMLoc &End); 1119 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); 1120 unsigned IdentifyIntelInlineAsmOperator(StringRef Name); 1121 unsigned ParseIntelInlineAsmOperator(unsigned OpKind); 1122 unsigned IdentifyMasmOperator(StringRef Name); 1123 bool ParseMasmOperator(unsigned OpKind, int64_t &Val); 1124 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands); 1125 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM, 1126 bool &ParseError, SMLoc &End); 1127 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM, 1128 bool &ParseError, SMLoc &End); 1129 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start, 1130 SMLoc End); 1131 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 1132 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier, 1133 InlineAsmIdentifierInfo &Info, 1134 bool IsUnevaluatedOperand, SMLoc &End, 1135 bool IsParsingOffsetOperator = false); 1136 void tryParseOperandIdx(AsmToken::TokenKind PrevTK, 1137 IntelExprStateMachine &SM); 1138 1139 bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc, 1140 SMLoc EndLoc, OperandVector &Operands); 1141 1142 X86::CondCode ParseConditionCode(StringRef CCode); 1143 1144 bool ParseIntelMemoryOperandSize(unsigned &Size); 1145 bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, 1146 unsigned BaseReg, unsigned IndexReg, 1147 unsigned Scale, bool NonAbsMem, SMLoc Start, 1148 SMLoc End, unsigned Size, StringRef Identifier, 1149 const InlineAsmIdentifierInfo &Info, 1150 OperandVector &Operands); 1151 1152 bool parseDirectiveArch(); 1153 bool parseDirectiveNops(SMLoc L); 1154 bool parseDirectiveEven(SMLoc L); 1155 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 1156 1157 /// CodeView FPO data directives. 1158 bool parseDirectiveFPOProc(SMLoc L); 1159 bool parseDirectiveFPOSetFrame(SMLoc L); 1160 bool parseDirectiveFPOPushReg(SMLoc L); 1161 bool parseDirectiveFPOStackAlloc(SMLoc L); 1162 bool parseDirectiveFPOStackAlign(SMLoc L); 1163 bool parseDirectiveFPOEndPrologue(SMLoc L); 1164 bool parseDirectiveFPOEndProc(SMLoc L); 1165 1166 /// SEH directives. 1167 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo); 1168 bool parseDirectiveSEHPushReg(SMLoc); 1169 bool parseDirectiveSEHSetFrame(SMLoc); 1170 bool parseDirectiveSEHSaveReg(SMLoc); 1171 bool parseDirectiveSEHSaveXMM(SMLoc); 1172 bool parseDirectiveSEHPushFrame(SMLoc); 1173 1174 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1175 1176 bool validateInstruction(MCInst &Inst, const OperandVector &Ops); 1177 bool processInstruction(MCInst &Inst, const OperandVector &Ops); 1178 1179 // Load Value Injection (LVI) Mitigations for machine code 1180 void emitWarningForSpecialLVIInstruction(SMLoc Loc); 1181 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out); 1182 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out); 1183 1184 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds 1185 /// instrumentation around Inst. 1186 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); 1187 1188 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1189 OperandVector &Operands, MCStreamer &Out, 1190 uint64_t &ErrorInfo, 1191 bool MatchingInlineAsm) override; 1192 1193 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands, 1194 MCStreamer &Out, bool MatchingInlineAsm); 1195 1196 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures, 1197 bool MatchingInlineAsm); 1198 1199 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 1200 OperandVector &Operands, MCStreamer &Out, 1201 uint64_t &ErrorInfo, 1202 bool MatchingInlineAsm); 1203 1204 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 1205 OperandVector &Operands, MCStreamer &Out, 1206 uint64_t &ErrorInfo, 1207 bool MatchingInlineAsm); 1208 1209 bool OmitRegisterFromClobberLists(unsigned RegNo) override; 1210 1211 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) 1212 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. 1213 /// return false if no parsing errors occurred, true otherwise. 1214 bool HandleAVX512Operand(OperandVector &Operands); 1215 1216 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc); 1217 1218 bool is64BitMode() const { 1219 // FIXME: Can tablegen auto-generate this? 1220 return getSTI().hasFeature(X86::Is64Bit); 1221 } 1222 bool is32BitMode() const { 1223 // FIXME: Can tablegen auto-generate this? 1224 return getSTI().hasFeature(X86::Is32Bit); 1225 } 1226 bool is16BitMode() const { 1227 // FIXME: Can tablegen auto-generate this? 1228 return getSTI().hasFeature(X86::Is16Bit); 1229 } 1230 void SwitchMode(unsigned mode) { 1231 MCSubtargetInfo &STI = copySTI(); 1232 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit}); 1233 FeatureBitset OldMode = STI.getFeatureBits() & AllModes; 1234 FeatureBitset FB = ComputeAvailableFeatures( 1235 STI.ToggleFeature(OldMode.flip(mode))); 1236 setAvailableFeatures(FB); 1237 1238 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes)); 1239 } 1240 1241 unsigned getPointerWidth() { 1242 if (is16BitMode()) return 16; 1243 if (is32BitMode()) return 32; 1244 if (is64BitMode()) return 64; 1245 llvm_unreachable("invalid mode"); 1246 } 1247 1248 bool isParsingIntelSyntax() { 1249 return getParser().getAssemblerDialect(); 1250 } 1251 1252 /// @name Auto-generated Matcher Functions 1253 /// { 1254 1255 #define GET_ASSEMBLER_HEADER 1256 #include "X86GenAsmMatcher.inc" 1257 1258 /// } 1259 1260 public: 1261 enum X86MatchResultTy { 1262 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY, 1263 #define GET_OPERAND_DIAGNOSTIC_TYPES 1264 #include "X86GenAsmMatcher.inc" 1265 }; 1266 1267 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, 1268 const MCInstrInfo &mii, const MCTargetOptions &Options) 1269 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr), 1270 Code16GCC(false) { 1271 1272 Parser.addAliasForDirective(".word", ".2byte"); 1273 1274 // Initialize the set of available features. 1275 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); 1276 } 1277 1278 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; 1279 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1280 SMLoc &EndLoc) override; 1281 1282 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; 1283 1284 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1285 SMLoc NameLoc, OperandVector &Operands) override; 1286 1287 bool ParseDirective(AsmToken DirectiveID) override; 1288 }; 1289 } // end anonymous namespace 1290 1291 #define GET_REGISTER_MATCHER 1292 #define GET_SUBTARGET_FEATURE_NAME 1293 #include "X86GenAsmMatcher.inc" 1294 1295 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, 1296 unsigned Scale, bool Is64BitMode, 1297 StringRef &ErrMsg) { 1298 // If we have both a base register and an index register make sure they are 1299 // both 64-bit or 32-bit registers. 1300 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 1301 1302 if (BaseReg != 0 && 1303 !(BaseReg == X86::RIP || BaseReg == X86::EIP || 1304 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) || 1305 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) || 1306 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) { 1307 ErrMsg = "invalid base+index expression"; 1308 return true; 1309 } 1310 1311 if (IndexReg != 0 && 1312 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ || 1313 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1314 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1315 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1316 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 1317 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 1318 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) { 1319 ErrMsg = "invalid base+index expression"; 1320 return true; 1321 } 1322 1323 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) || 1324 IndexReg == X86::EIP || IndexReg == X86::RIP || 1325 IndexReg == X86::ESP || IndexReg == X86::RSP) { 1326 ErrMsg = "invalid base+index expression"; 1327 return true; 1328 } 1329 1330 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, 1331 // and then only in non-64-bit modes. 1332 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 1333 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP && 1334 BaseReg != X86::SI && BaseReg != X86::DI))) { 1335 ErrMsg = "invalid 16-bit base register"; 1336 return true; 1337 } 1338 1339 if (BaseReg == 0 && 1340 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { 1341 ErrMsg = "16-bit memory operand may not include only index register"; 1342 return true; 1343 } 1344 1345 if (BaseReg != 0 && IndexReg != 0) { 1346 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 1347 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1348 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1349 IndexReg == X86::EIZ)) { 1350 ErrMsg = "base register is 64-bit, but index register is not"; 1351 return true; 1352 } 1353 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 1354 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1355 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1356 IndexReg == X86::RIZ)) { 1357 ErrMsg = "base register is 32-bit, but index register is not"; 1358 return true; 1359 } 1360 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { 1361 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1362 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { 1363 ErrMsg = "base register is 16-bit, but index register is not"; 1364 return true; 1365 } 1366 if ((BaseReg != X86::BX && BaseReg != X86::BP) || 1367 (IndexReg != X86::SI && IndexReg != X86::DI)) { 1368 ErrMsg = "invalid 16-bit base/index register combination"; 1369 return true; 1370 } 1371 } 1372 } 1373 1374 // RIP/EIP-relative addressing is only supported in 64-bit mode. 1375 if (!Is64BitMode && BaseReg != 0 && 1376 (BaseReg == X86::RIP || BaseReg == X86::EIP)) { 1377 ErrMsg = "IP-relative addressing requires 64-bit mode"; 1378 return true; 1379 } 1380 1381 return checkScale(Scale, ErrMsg); 1382 } 1383 1384 bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName, 1385 SMLoc StartLoc, SMLoc EndLoc) { 1386 // If we encounter a %, ignore it. This code handles registers with and 1387 // without the prefix, unprefixed registers can occur in cfi directives. 1388 RegName.consume_front("%"); 1389 1390 RegNo = MatchRegisterName(RegName); 1391 1392 // If the match failed, try the register name as lowercase. 1393 if (RegNo == 0) 1394 RegNo = MatchRegisterName(RegName.lower()); 1395 1396 // The "flags" and "mxcsr" registers cannot be referenced directly. 1397 // Treat it as an identifier instead. 1398 if (isParsingMSInlineAsm() && isParsingIntelSyntax() && 1399 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR)) 1400 RegNo = 0; 1401 1402 if (!is64BitMode()) { 1403 // FIXME: This should be done using Requires<Not64BitMode> and 1404 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 1405 // checked. 1406 if (RegNo == X86::RIZ || RegNo == X86::RIP || 1407 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 1408 X86II::isX86_64NonExtLowByteReg(RegNo) || 1409 X86II::isX86_64ExtendedReg(RegNo)) { 1410 return Error(StartLoc, 1411 "register %" + RegName + " is only available in 64-bit mode", 1412 SMRange(StartLoc, EndLoc)); 1413 } 1414 } 1415 1416 if (X86II::isApxExtendedReg(RegNo)) 1417 UseApxExtendedReg = true; 1418 1419 // If this is "db[0-15]", match it as an alias 1420 // for dr[0-15]. 1421 if (RegNo == 0 && RegName.starts_with("db")) { 1422 if (RegName.size() == 3) { 1423 switch (RegName[2]) { 1424 case '0': 1425 RegNo = X86::DR0; 1426 break; 1427 case '1': 1428 RegNo = X86::DR1; 1429 break; 1430 case '2': 1431 RegNo = X86::DR2; 1432 break; 1433 case '3': 1434 RegNo = X86::DR3; 1435 break; 1436 case '4': 1437 RegNo = X86::DR4; 1438 break; 1439 case '5': 1440 RegNo = X86::DR5; 1441 break; 1442 case '6': 1443 RegNo = X86::DR6; 1444 break; 1445 case '7': 1446 RegNo = X86::DR7; 1447 break; 1448 case '8': 1449 RegNo = X86::DR8; 1450 break; 1451 case '9': 1452 RegNo = X86::DR9; 1453 break; 1454 } 1455 } else if (RegName.size() == 4 && RegName[2] == '1') { 1456 switch (RegName[3]) { 1457 case '0': 1458 RegNo = X86::DR10; 1459 break; 1460 case '1': 1461 RegNo = X86::DR11; 1462 break; 1463 case '2': 1464 RegNo = X86::DR12; 1465 break; 1466 case '3': 1467 RegNo = X86::DR13; 1468 break; 1469 case '4': 1470 RegNo = X86::DR14; 1471 break; 1472 case '5': 1473 RegNo = X86::DR15; 1474 break; 1475 } 1476 } 1477 } 1478 1479 if (RegNo == 0) { 1480 if (isParsingIntelSyntax()) 1481 return true; 1482 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc)); 1483 } 1484 return false; 1485 } 1486 1487 bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 1488 SMLoc &EndLoc, bool RestoreOnFailure) { 1489 MCAsmParser &Parser = getParser(); 1490 MCAsmLexer &Lexer = getLexer(); 1491 RegNo = 0; 1492 1493 SmallVector<AsmToken, 5> Tokens; 1494 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() { 1495 if (RestoreOnFailure) { 1496 while (!Tokens.empty()) { 1497 Lexer.UnLex(Tokens.pop_back_val()); 1498 } 1499 } 1500 }; 1501 1502 const AsmToken &PercentTok = Parser.getTok(); 1503 StartLoc = PercentTok.getLoc(); 1504 1505 // If we encounter a %, ignore it. This code handles registers with and 1506 // without the prefix, unprefixed registers can occur in cfi directives. 1507 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) { 1508 Tokens.push_back(PercentTok); 1509 Parser.Lex(); // Eat percent token. 1510 } 1511 1512 const AsmToken &Tok = Parser.getTok(); 1513 EndLoc = Tok.getEndLoc(); 1514 1515 if (Tok.isNot(AsmToken::Identifier)) { 1516 OnFailure(); 1517 if (isParsingIntelSyntax()) return true; 1518 return Error(StartLoc, "invalid register name", 1519 SMRange(StartLoc, EndLoc)); 1520 } 1521 1522 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) { 1523 OnFailure(); 1524 return true; 1525 } 1526 1527 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 1528 if (RegNo == X86::ST0) { 1529 Tokens.push_back(Tok); 1530 Parser.Lex(); // Eat 'st' 1531 1532 // Check to see if we have '(4)' after %st. 1533 if (Lexer.isNot(AsmToken::LParen)) 1534 return false; 1535 // Lex the paren. 1536 Tokens.push_back(Parser.getTok()); 1537 Parser.Lex(); 1538 1539 const AsmToken &IntTok = Parser.getTok(); 1540 if (IntTok.isNot(AsmToken::Integer)) { 1541 OnFailure(); 1542 return Error(IntTok.getLoc(), "expected stack index"); 1543 } 1544 switch (IntTok.getIntVal()) { 1545 case 0: RegNo = X86::ST0; break; 1546 case 1: RegNo = X86::ST1; break; 1547 case 2: RegNo = X86::ST2; break; 1548 case 3: RegNo = X86::ST3; break; 1549 case 4: RegNo = X86::ST4; break; 1550 case 5: RegNo = X86::ST5; break; 1551 case 6: RegNo = X86::ST6; break; 1552 case 7: RegNo = X86::ST7; break; 1553 default: 1554 OnFailure(); 1555 return Error(IntTok.getLoc(), "invalid stack index"); 1556 } 1557 1558 // Lex IntTok 1559 Tokens.push_back(IntTok); 1560 Parser.Lex(); 1561 if (Lexer.isNot(AsmToken::RParen)) { 1562 OnFailure(); 1563 return Error(Parser.getTok().getLoc(), "expected ')'"); 1564 } 1565 1566 EndLoc = Parser.getTok().getEndLoc(); 1567 Parser.Lex(); // Eat ')' 1568 return false; 1569 } 1570 1571 EndLoc = Parser.getTok().getEndLoc(); 1572 1573 if (RegNo == 0) { 1574 OnFailure(); 1575 if (isParsingIntelSyntax()) return true; 1576 return Error(StartLoc, "invalid register name", 1577 SMRange(StartLoc, EndLoc)); 1578 } 1579 1580 Parser.Lex(); // Eat identifier token. 1581 return false; 1582 } 1583 1584 bool X86AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, 1585 SMLoc &EndLoc) { 1586 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 1587 } 1588 1589 ParseStatus X86AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1590 SMLoc &EndLoc) { 1591 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 1592 bool PendingErrors = getParser().hasPendingError(); 1593 getParser().clearPendingErrors(); 1594 if (PendingErrors) 1595 return ParseStatus::Failure; 1596 if (Result) 1597 return ParseStatus::NoMatch; 1598 return ParseStatus::Success; 1599 } 1600 1601 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { 1602 bool Parse32 = is32BitMode() || Code16GCC; 1603 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI); 1604 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1605 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1606 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1607 Loc, Loc, 0); 1608 } 1609 1610 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { 1611 bool Parse32 = is32BitMode() || Code16GCC; 1612 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI); 1613 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1614 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1615 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1616 Loc, Loc, 0); 1617 } 1618 1619 bool X86AsmParser::IsSIReg(unsigned Reg) { 1620 switch (Reg) { 1621 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!"); 1622 case X86::RSI: 1623 case X86::ESI: 1624 case X86::SI: 1625 return true; 1626 case X86::RDI: 1627 case X86::EDI: 1628 case X86::DI: 1629 return false; 1630 } 1631 } 1632 1633 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, 1634 bool IsSIReg) { 1635 switch (RegClassID) { 1636 default: llvm_unreachable("Unexpected register class"); 1637 case X86::GR64RegClassID: 1638 return IsSIReg ? X86::RSI : X86::RDI; 1639 case X86::GR32RegClassID: 1640 return IsSIReg ? X86::ESI : X86::EDI; 1641 case X86::GR16RegClassID: 1642 return IsSIReg ? X86::SI : X86::DI; 1643 } 1644 } 1645 1646 void X86AsmParser::AddDefaultSrcDestOperands( 1647 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1648 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) { 1649 if (isParsingIntelSyntax()) { 1650 Operands.push_back(std::move(Dst)); 1651 Operands.push_back(std::move(Src)); 1652 } 1653 else { 1654 Operands.push_back(std::move(Src)); 1655 Operands.push_back(std::move(Dst)); 1656 } 1657 } 1658 1659 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, 1660 OperandVector &FinalOperands) { 1661 1662 if (OrigOperands.size() > 1) { 1663 // Check if sizes match, OrigOperands also contains the instruction name 1664 assert(OrigOperands.size() == FinalOperands.size() + 1 && 1665 "Operand size mismatch"); 1666 1667 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings; 1668 // Verify types match 1669 int RegClassID = -1; 1670 for (unsigned int i = 0; i < FinalOperands.size(); ++i) { 1671 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]); 1672 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]); 1673 1674 if (FinalOp.isReg() && 1675 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg())) 1676 // Return false and let a normal complaint about bogus operands happen 1677 return false; 1678 1679 if (FinalOp.isMem()) { 1680 1681 if (!OrigOp.isMem()) 1682 // Return false and let a normal complaint about bogus operands happen 1683 return false; 1684 1685 unsigned OrigReg = OrigOp.Mem.BaseReg; 1686 unsigned FinalReg = FinalOp.Mem.BaseReg; 1687 1688 // If we've already encounterd a register class, make sure all register 1689 // bases are of the same register class 1690 if (RegClassID != -1 && 1691 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) { 1692 return Error(OrigOp.getStartLoc(), 1693 "mismatching source and destination index registers"); 1694 } 1695 1696 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg)) 1697 RegClassID = X86::GR64RegClassID; 1698 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg)) 1699 RegClassID = X86::GR32RegClassID; 1700 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg)) 1701 RegClassID = X86::GR16RegClassID; 1702 else 1703 // Unexpected register class type 1704 // Return false and let a normal complaint about bogus operands happen 1705 return false; 1706 1707 bool IsSI = IsSIReg(FinalReg); 1708 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI); 1709 1710 if (FinalReg != OrigReg) { 1711 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI"; 1712 Warnings.push_back(std::make_pair( 1713 OrigOp.getStartLoc(), 1714 "memory operand is only for determining the size, " + RegName + 1715 " will be used for the location")); 1716 } 1717 1718 FinalOp.Mem.Size = OrigOp.Mem.Size; 1719 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg; 1720 FinalOp.Mem.BaseReg = FinalReg; 1721 } 1722 } 1723 1724 // Produce warnings only if all the operands passed the adjustment - prevent 1725 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings 1726 for (auto &WarningMsg : Warnings) { 1727 Warning(WarningMsg.first, WarningMsg.second); 1728 } 1729 1730 // Remove old operands 1731 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1732 OrigOperands.pop_back(); 1733 } 1734 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end()); 1735 for (auto &Op : FinalOperands) 1736 OrigOperands.push_back(std::move(Op)); 1737 1738 return false; 1739 } 1740 1741 bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) { 1742 if (isParsingIntelSyntax()) 1743 return parseIntelOperand(Operands, Name); 1744 1745 return parseATTOperand(Operands); 1746 } 1747 1748 bool X86AsmParser::CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, 1749 unsigned BaseReg, unsigned IndexReg, 1750 unsigned Scale, bool NonAbsMem, 1751 SMLoc Start, SMLoc End, 1752 unsigned Size, StringRef Identifier, 1753 const InlineAsmIdentifierInfo &Info, 1754 OperandVector &Operands) { 1755 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or 1756 // some other label reference. 1757 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) { 1758 // Create an absolute memory reference in order to match against 1759 // instructions taking a PC relative operand. 1760 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, 1761 End, Size, Identifier, 1762 Info.Label.Decl)); 1763 return false; 1764 } 1765 // We either have a direct symbol reference, or an offset from a symbol. The 1766 // parser always puts the symbol on the LHS, so look there for size 1767 // calculation purposes. 1768 unsigned FrontendSize = 0; 1769 void *Decl = nullptr; 1770 bool IsGlobalLV = false; 1771 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 1772 // Size is in terms of bits in this context. 1773 FrontendSize = Info.Var.Type * 8; 1774 Decl = Info.Var.Decl; 1775 IsGlobalLV = Info.Var.IsGlobalLV; 1776 } 1777 // It is widely common for MS InlineAsm to use a global variable and one/two 1778 // registers in a mmory expression, and though unaccessible via rip/eip. 1779 if (IsGlobalLV) { 1780 if (BaseReg || IndexReg) { 1781 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, 1782 End, Size, Identifier, Decl, 0, 1783 BaseReg && IndexReg)); 1784 return false; 1785 } 1786 if (NonAbsMem) 1787 BaseReg = 1; // Make isAbsMem() false 1788 } 1789 Operands.push_back(X86Operand::CreateMem( 1790 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End, 1791 Size, 1792 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize)); 1793 return false; 1794 } 1795 1796 // Some binary bitwise operators have a named synonymous 1797 // Query a candidate string for being such a named operator 1798 // and if so - invoke the appropriate handler 1799 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, 1800 IntelExprStateMachine &SM, 1801 bool &ParseError, SMLoc &End) { 1802 // A named operator should be either lower or upper case, but not a mix... 1803 // except in MASM, which uses full case-insensitivity. 1804 if (Name.compare(Name.lower()) && Name.compare(Name.upper()) && 1805 !getParser().isParsingMasm()) 1806 return false; 1807 if (Name.equals_insensitive("not")) { 1808 SM.onNot(); 1809 } else if (Name.equals_insensitive("or")) { 1810 SM.onOr(); 1811 } else if (Name.equals_insensitive("shl")) { 1812 SM.onLShift(); 1813 } else if (Name.equals_insensitive("shr")) { 1814 SM.onRShift(); 1815 } else if (Name.equals_insensitive("xor")) { 1816 SM.onXor(); 1817 } else if (Name.equals_insensitive("and")) { 1818 SM.onAnd(); 1819 } else if (Name.equals_insensitive("mod")) { 1820 SM.onMod(); 1821 } else if (Name.equals_insensitive("offset")) { 1822 SMLoc OffsetLoc = getTok().getLoc(); 1823 const MCExpr *Val = nullptr; 1824 StringRef ID; 1825 InlineAsmIdentifierInfo Info; 1826 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End); 1827 if (ParseError) 1828 return true; 1829 StringRef ErrMsg; 1830 ParseError = 1831 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg); 1832 if (ParseError) 1833 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg); 1834 } else { 1835 return false; 1836 } 1837 if (!Name.equals_insensitive("offset")) 1838 End = consumeToken(); 1839 return true; 1840 } 1841 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name, 1842 IntelExprStateMachine &SM, 1843 bool &ParseError, SMLoc &End) { 1844 if (Name.equals_insensitive("eq")) { 1845 SM.onEq(); 1846 } else if (Name.equals_insensitive("ne")) { 1847 SM.onNE(); 1848 } else if (Name.equals_insensitive("lt")) { 1849 SM.onLT(); 1850 } else if (Name.equals_insensitive("le")) { 1851 SM.onLE(); 1852 } else if (Name.equals_insensitive("gt")) { 1853 SM.onGT(); 1854 } else if (Name.equals_insensitive("ge")) { 1855 SM.onGE(); 1856 } else { 1857 return false; 1858 } 1859 End = consumeToken(); 1860 return true; 1861 } 1862 1863 // Check if current intel expression append after an operand. 1864 // Like: [Operand][Intel Expression] 1865 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK, 1866 IntelExprStateMachine &SM) { 1867 if (PrevTK != AsmToken::RBrac) 1868 return; 1869 1870 SM.setAppendAfterOperand(); 1871 } 1872 1873 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1874 MCAsmParser &Parser = getParser(); 1875 StringRef ErrMsg; 1876 1877 AsmToken::TokenKind PrevTK = AsmToken::Error; 1878 1879 if (getContext().getObjectFileInfo()->isPositionIndependent()) 1880 SM.setPIC(); 1881 1882 bool Done = false; 1883 while (!Done) { 1884 // Get a fresh reference on each loop iteration in case the previous 1885 // iteration moved the token storage during UnLex(). 1886 const AsmToken &Tok = Parser.getTok(); 1887 1888 bool UpdateLocLex = true; 1889 AsmToken::TokenKind TK = getLexer().getKind(); 1890 1891 switch (TK) { 1892 default: 1893 if ((Done = SM.isValidEndState())) 1894 break; 1895 return Error(Tok.getLoc(), "unknown token in expression"); 1896 case AsmToken::Error: 1897 return Error(getLexer().getErrLoc(), getLexer().getErr()); 1898 break; 1899 case AsmToken::EndOfStatement: 1900 Done = true; 1901 break; 1902 case AsmToken::Real: 1903 // DotOperator: [ebx].0 1904 UpdateLocLex = false; 1905 if (ParseIntelDotOperator(SM, End)) 1906 return true; 1907 break; 1908 case AsmToken::Dot: 1909 if (!Parser.isParsingMasm()) { 1910 if ((Done = SM.isValidEndState())) 1911 break; 1912 return Error(Tok.getLoc(), "unknown token in expression"); 1913 } 1914 // MASM allows spaces around the dot operator (e.g., "var . x") 1915 Lex(); 1916 UpdateLocLex = false; 1917 if (ParseIntelDotOperator(SM, End)) 1918 return true; 1919 break; 1920 case AsmToken::Dollar: 1921 if (!Parser.isParsingMasm()) { 1922 if ((Done = SM.isValidEndState())) 1923 break; 1924 return Error(Tok.getLoc(), "unknown token in expression"); 1925 } 1926 [[fallthrough]]; 1927 case AsmToken::String: { 1928 if (Parser.isParsingMasm()) { 1929 // MASM parsers handle strings in expressions as constants. 1930 SMLoc ValueLoc = Tok.getLoc(); 1931 int64_t Res; 1932 const MCExpr *Val; 1933 if (Parser.parsePrimaryExpr(Val, End, nullptr)) 1934 return true; 1935 UpdateLocLex = false; 1936 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr())) 1937 return Error(ValueLoc, "expected absolute value"); 1938 if (SM.onInteger(Res, ErrMsg)) 1939 return Error(ValueLoc, ErrMsg); 1940 break; 1941 } 1942 [[fallthrough]]; 1943 } 1944 case AsmToken::At: 1945 case AsmToken::Identifier: { 1946 SMLoc IdentLoc = Tok.getLoc(); 1947 StringRef Identifier = Tok.getString(); 1948 UpdateLocLex = false; 1949 if (Parser.isParsingMasm()) { 1950 size_t DotOffset = Identifier.find_first_of('.'); 1951 if (DotOffset != StringRef::npos) { 1952 consumeToken(); 1953 StringRef LHS = Identifier.slice(0, DotOffset); 1954 StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1); 1955 StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos); 1956 if (!RHS.empty()) { 1957 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS)); 1958 } 1959 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot)); 1960 if (!LHS.empty()) { 1961 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS)); 1962 } 1963 break; 1964 } 1965 } 1966 // (MASM only) <TYPE> PTR operator 1967 if (Parser.isParsingMasm()) { 1968 const AsmToken &NextTok = getLexer().peekTok(); 1969 if (NextTok.is(AsmToken::Identifier) && 1970 NextTok.getIdentifier().equals_insensitive("ptr")) { 1971 AsmTypeInfo Info; 1972 if (Parser.lookUpType(Identifier, Info)) 1973 return Error(Tok.getLoc(), "unknown type"); 1974 SM.onCast(Info); 1975 // Eat type and PTR. 1976 consumeToken(); 1977 End = consumeToken(); 1978 break; 1979 } 1980 } 1981 // Register, or (MASM only) <register>.<field> 1982 MCRegister Reg; 1983 if (Tok.is(AsmToken::Identifier)) { 1984 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) { 1985 if (SM.onRegister(Reg, ErrMsg)) 1986 return Error(IdentLoc, ErrMsg); 1987 break; 1988 } 1989 if (Parser.isParsingMasm()) { 1990 const std::pair<StringRef, StringRef> IDField = 1991 Tok.getString().split('.'); 1992 const StringRef ID = IDField.first, Field = IDField.second; 1993 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size()); 1994 if (!Field.empty() && 1995 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) { 1996 if (SM.onRegister(Reg, ErrMsg)) 1997 return Error(IdentLoc, ErrMsg); 1998 1999 AsmFieldInfo Info; 2000 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data()); 2001 if (Parser.lookUpField(Field, Info)) 2002 return Error(FieldStartLoc, "unknown offset"); 2003 else if (SM.onPlus(ErrMsg)) 2004 return Error(getTok().getLoc(), ErrMsg); 2005 else if (SM.onInteger(Info.Offset, ErrMsg)) 2006 return Error(IdentLoc, ErrMsg); 2007 SM.setTypeInfo(Info.Type); 2008 2009 End = consumeToken(); 2010 break; 2011 } 2012 } 2013 } 2014 // Operator synonymous ("not", "or" etc.) 2015 bool ParseError = false; 2016 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) { 2017 if (ParseError) 2018 return true; 2019 break; 2020 } 2021 if (Parser.isParsingMasm() && 2022 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) { 2023 if (ParseError) 2024 return true; 2025 break; 2026 } 2027 // Symbol reference, when parsing assembly content 2028 InlineAsmIdentifierInfo Info; 2029 AsmFieldInfo FieldInfo; 2030 const MCExpr *Val; 2031 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) { 2032 // MS Dot Operator expression 2033 if (Identifier.count('.') && 2034 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) { 2035 if (ParseIntelDotOperator(SM, End)) 2036 return true; 2037 break; 2038 } 2039 } 2040 if (isParsingMSInlineAsm()) { 2041 // MS InlineAsm operators (TYPE/LENGTH/SIZE) 2042 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { 2043 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { 2044 if (SM.onInteger(Val, ErrMsg)) 2045 return Error(IdentLoc, ErrMsg); 2046 } else { 2047 return true; 2048 } 2049 break; 2050 } 2051 // MS InlineAsm identifier 2052 // Call parseIdentifier() to combine @ with the identifier behind it. 2053 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier)) 2054 return Error(IdentLoc, "expected identifier"); 2055 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) 2056 return true; 2057 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type, 2058 true, ErrMsg)) 2059 return Error(IdentLoc, ErrMsg); 2060 break; 2061 } 2062 if (Parser.isParsingMasm()) { 2063 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) { 2064 int64_t Val; 2065 if (ParseMasmOperator(OpKind, Val)) 2066 return true; 2067 if (SM.onInteger(Val, ErrMsg)) 2068 return Error(IdentLoc, ErrMsg); 2069 break; 2070 } 2071 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) { 2072 // Field offset immediate; <TYPE>.<field specification> 2073 Lex(); // eat type 2074 bool EndDot = parseOptionalToken(AsmToken::Dot); 2075 while (EndDot || (getTok().is(AsmToken::Identifier) && 2076 getTok().getString().starts_with("."))) { 2077 getParser().parseIdentifier(Identifier); 2078 if (!EndDot) 2079 Identifier.consume_front("."); 2080 EndDot = Identifier.consume_back("."); 2081 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier, 2082 FieldInfo)) { 2083 SMLoc IDEnd = 2084 SMLoc::getFromPointer(Identifier.data() + Identifier.size()); 2085 return Error(IdentLoc, "Unable to lookup field reference!", 2086 SMRange(IdentLoc, IDEnd)); 2087 } 2088 if (!EndDot) 2089 EndDot = parseOptionalToken(AsmToken::Dot); 2090 } 2091 if (SM.onInteger(FieldInfo.Offset, ErrMsg)) 2092 return Error(IdentLoc, ErrMsg); 2093 break; 2094 } 2095 } 2096 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) { 2097 return Error(Tok.getLoc(), "Unexpected identifier!"); 2098 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type, 2099 false, ErrMsg)) { 2100 return Error(IdentLoc, ErrMsg); 2101 } 2102 break; 2103 } 2104 case AsmToken::Integer: { 2105 // Look for 'b' or 'f' following an Integer as a directional label 2106 SMLoc Loc = getTok().getLoc(); 2107 int64_t IntVal = getTok().getIntVal(); 2108 End = consumeToken(); 2109 UpdateLocLex = false; 2110 if (getLexer().getKind() == AsmToken::Identifier) { 2111 StringRef IDVal = getTok().getString(); 2112 if (IDVal == "f" || IDVal == "b") { 2113 MCSymbol *Sym = 2114 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); 2115 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 2116 const MCExpr *Val = 2117 MCSymbolRefExpr::create(Sym, Variant, getContext()); 2118 if (IDVal == "b" && Sym->isUndefined()) 2119 return Error(Loc, "invalid reference to undefined symbol"); 2120 StringRef Identifier = Sym->getName(); 2121 InlineAsmIdentifierInfo Info; 2122 AsmTypeInfo Type; 2123 if (SM.onIdentifierExpr(Val, Identifier, Info, Type, 2124 isParsingMSInlineAsm(), ErrMsg)) 2125 return Error(Loc, ErrMsg); 2126 End = consumeToken(); 2127 } else { 2128 if (SM.onInteger(IntVal, ErrMsg)) 2129 return Error(Loc, ErrMsg); 2130 } 2131 } else { 2132 if (SM.onInteger(IntVal, ErrMsg)) 2133 return Error(Loc, ErrMsg); 2134 } 2135 break; 2136 } 2137 case AsmToken::Plus: 2138 if (SM.onPlus(ErrMsg)) 2139 return Error(getTok().getLoc(), ErrMsg); 2140 break; 2141 case AsmToken::Minus: 2142 if (SM.onMinus(ErrMsg)) 2143 return Error(getTok().getLoc(), ErrMsg); 2144 break; 2145 case AsmToken::Tilde: SM.onNot(); break; 2146 case AsmToken::Star: SM.onStar(); break; 2147 case AsmToken::Slash: SM.onDivide(); break; 2148 case AsmToken::Percent: SM.onMod(); break; 2149 case AsmToken::Pipe: SM.onOr(); break; 2150 case AsmToken::Caret: SM.onXor(); break; 2151 case AsmToken::Amp: SM.onAnd(); break; 2152 case AsmToken::LessLess: 2153 SM.onLShift(); break; 2154 case AsmToken::GreaterGreater: 2155 SM.onRShift(); break; 2156 case AsmToken::LBrac: 2157 if (SM.onLBrac()) 2158 return Error(Tok.getLoc(), "unexpected bracket encountered"); 2159 tryParseOperandIdx(PrevTK, SM); 2160 break; 2161 case AsmToken::RBrac: 2162 if (SM.onRBrac(ErrMsg)) { 2163 return Error(Tok.getLoc(), ErrMsg); 2164 } 2165 break; 2166 case AsmToken::LParen: SM.onLParen(); break; 2167 case AsmToken::RParen: SM.onRParen(); break; 2168 } 2169 if (SM.hadError()) 2170 return Error(Tok.getLoc(), "unknown token in expression"); 2171 2172 if (!Done && UpdateLocLex) 2173 End = consumeToken(); 2174 2175 PrevTK = TK; 2176 } 2177 return false; 2178 } 2179 2180 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM, 2181 SMLoc Start, SMLoc End) { 2182 SMLoc Loc = Start; 2183 unsigned ExprLen = End.getPointer() - Start.getPointer(); 2184 // Skip everything before a symbol displacement (if we have one) 2185 if (SM.getSym() && !SM.isOffsetOperator()) { 2186 StringRef SymName = SM.getSymName(); 2187 if (unsigned Len = SymName.data() - Start.getPointer()) 2188 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len); 2189 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size()); 2190 ExprLen = End.getPointer() - (SymName.data() + SymName.size()); 2191 // If we have only a symbol than there's no need for complex rewrite, 2192 // simply skip everything after it 2193 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) { 2194 if (ExprLen) 2195 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen); 2196 return; 2197 } 2198 } 2199 // Build an Intel Expression rewrite 2200 StringRef BaseRegStr; 2201 StringRef IndexRegStr; 2202 StringRef OffsetNameStr; 2203 if (SM.getBaseReg()) 2204 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg()); 2205 if (SM.getIndexReg()) 2206 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg()); 2207 if (SM.isOffsetOperator()) 2208 OffsetNameStr = SM.getSymName(); 2209 // Emit it 2210 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr, 2211 SM.getImm(), SM.isMemExpr()); 2212 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr); 2213 } 2214 2215 // Inline assembly may use variable names with namespace alias qualifiers. 2216 bool X86AsmParser::ParseIntelInlineAsmIdentifier( 2217 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info, 2218 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) { 2219 MCAsmParser &Parser = getParser(); 2220 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly."); 2221 Val = nullptr; 2222 2223 StringRef LineBuf(Identifier.data()); 2224 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 2225 2226 const AsmToken &Tok = Parser.getTok(); 2227 SMLoc Loc = Tok.getLoc(); 2228 2229 // Advance the token stream until the end of the current token is 2230 // after the end of what the frontend claimed. 2231 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 2232 do { 2233 End = Tok.getEndLoc(); 2234 getLexer().Lex(); 2235 } while (End.getPointer() < EndPtr); 2236 Identifier = LineBuf; 2237 2238 // The frontend should end parsing on an assembler token boundary, unless it 2239 // failed parsing. 2240 assert((End.getPointer() == EndPtr || 2241 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) && 2242 "frontend claimed part of a token?"); 2243 2244 // If the identifier lookup was unsuccessful, assume that we are dealing with 2245 // a label. 2246 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) { 2247 StringRef InternalName = 2248 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), 2249 Loc, false); 2250 assert(InternalName.size() && "We should have an internal name here."); 2251 // Push a rewrite for replacing the identifier name with the internal name, 2252 // unless we are parsing the operand of an offset operator 2253 if (!IsParsingOffsetOperator) 2254 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), 2255 InternalName); 2256 else 2257 Identifier = InternalName; 2258 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 2259 return false; 2260 // Create the symbol reference. 2261 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); 2262 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 2263 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); 2264 return false; 2265 } 2266 2267 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand 2268 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) { 2269 MCAsmParser &Parser = getParser(); 2270 const AsmToken &Tok = Parser.getTok(); 2271 // Eat "{" and mark the current place. 2272 const SMLoc consumedToken = consumeToken(); 2273 if (Tok.isNot(AsmToken::Identifier)) 2274 return Error(Tok.getLoc(), "Expected an identifier after {"); 2275 if (Tok.getIdentifier().starts_with("r")) { 2276 int rndMode = StringSwitch<int>(Tok.getIdentifier()) 2277 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) 2278 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF) 2279 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF) 2280 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO) 2281 .Default(-1); 2282 if (-1 == rndMode) 2283 return Error(Tok.getLoc(), "Invalid rounding mode."); 2284 Parser.Lex(); // Eat "r*" of r*-sae 2285 if (!getLexer().is(AsmToken::Minus)) 2286 return Error(Tok.getLoc(), "Expected - at this point"); 2287 Parser.Lex(); // Eat "-" 2288 Parser.Lex(); // Eat the sae 2289 if (!getLexer().is(AsmToken::RCurly)) 2290 return Error(Tok.getLoc(), "Expected } at this point"); 2291 SMLoc End = Tok.getEndLoc(); 2292 Parser.Lex(); // Eat "}" 2293 const MCExpr *RndModeOp = 2294 MCConstantExpr::create(rndMode, Parser.getContext()); 2295 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End)); 2296 return false; 2297 } 2298 if(Tok.getIdentifier().equals("sae")){ 2299 Parser.Lex(); // Eat the sae 2300 if (!getLexer().is(AsmToken::RCurly)) 2301 return Error(Tok.getLoc(), "Expected } at this point"); 2302 Parser.Lex(); // Eat "}" 2303 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken)); 2304 return false; 2305 } 2306 return Error(Tok.getLoc(), "unknown token in expression"); 2307 } 2308 2309 /// Parse the '.' operator. 2310 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, 2311 SMLoc &End) { 2312 const AsmToken &Tok = getTok(); 2313 AsmFieldInfo Info; 2314 2315 // Drop the optional '.'. 2316 StringRef DotDispStr = Tok.getString(); 2317 DotDispStr.consume_front("."); 2318 StringRef TrailingDot; 2319 2320 // .Imm gets lexed as a real. 2321 if (Tok.is(AsmToken::Real)) { 2322 APInt DotDisp; 2323 if (DotDispStr.getAsInteger(10, DotDisp)) 2324 return Error(Tok.getLoc(), "Unexpected offset"); 2325 Info.Offset = DotDisp.getZExtValue(); 2326 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) && 2327 Tok.is(AsmToken::Identifier)) { 2328 if (DotDispStr.ends_with(".")) { 2329 TrailingDot = DotDispStr.substr(DotDispStr.size() - 1); 2330 DotDispStr = DotDispStr.drop_back(1); 2331 } 2332 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 2333 const StringRef Base = BaseMember.first, Member = BaseMember.second; 2334 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) && 2335 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) && 2336 getParser().lookUpField(DotDispStr, Info) && 2337 (!SemaCallback || 2338 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset))) 2339 return Error(Tok.getLoc(), "Unable to lookup field reference!"); 2340 } else { 2341 return Error(Tok.getLoc(), "Unexpected token type!"); 2342 } 2343 2344 // Eat the DotExpression and update End 2345 End = SMLoc::getFromPointer(DotDispStr.data()); 2346 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size(); 2347 while (Tok.getLoc().getPointer() < DotExprEndLoc) 2348 Lex(); 2349 if (!TrailingDot.empty()) 2350 getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot)); 2351 SM.addImm(Info.Offset); 2352 SM.setTypeInfo(Info.Type); 2353 return false; 2354 } 2355 2356 /// Parse the 'offset' operator. 2357 /// This operator is used to specify the location of a given operand 2358 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, 2359 InlineAsmIdentifierInfo &Info, 2360 SMLoc &End) { 2361 // Eat offset, mark start of identifier. 2362 SMLoc Start = Lex().getLoc(); 2363 ID = getTok().getString(); 2364 if (!isParsingMSInlineAsm()) { 2365 if ((getTok().isNot(AsmToken::Identifier) && 2366 getTok().isNot(AsmToken::String)) || 2367 getParser().parsePrimaryExpr(Val, End, nullptr)) 2368 return Error(Start, "unexpected token!"); 2369 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) { 2370 return Error(Start, "unable to lookup expression"); 2371 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) { 2372 return Error(Start, "offset operator cannot yet handle constants"); 2373 } 2374 return false; 2375 } 2376 2377 // Query a candidate string for being an Intel assembly operator 2378 // Report back its kind, or IOK_INVALID if does not evaluated as a known one 2379 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) { 2380 return StringSwitch<unsigned>(Name) 2381 .Cases("TYPE","type",IOK_TYPE) 2382 .Cases("SIZE","size",IOK_SIZE) 2383 .Cases("LENGTH","length",IOK_LENGTH) 2384 .Default(IOK_INVALID); 2385 } 2386 2387 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 2388 /// returns the number of elements in an array. It returns the value 1 for 2389 /// non-array variables. The SIZE operator returns the size of a C or C++ 2390 /// variable. A variable's size is the product of its LENGTH and TYPE. The 2391 /// TYPE operator returns the size of a C or C++ type or variable. If the 2392 /// variable is an array, TYPE returns the size of a single element. 2393 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { 2394 MCAsmParser &Parser = getParser(); 2395 const AsmToken &Tok = Parser.getTok(); 2396 Parser.Lex(); // Eat operator. 2397 2398 const MCExpr *Val = nullptr; 2399 InlineAsmIdentifierInfo Info; 2400 SMLoc Start = Tok.getLoc(), End; 2401 StringRef Identifier = Tok.getString(); 2402 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, 2403 /*IsUnevaluatedOperand=*/true, End)) 2404 return 0; 2405 2406 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 2407 Error(Start, "unable to lookup expression"); 2408 return 0; 2409 } 2410 2411 unsigned CVal = 0; 2412 switch(OpKind) { 2413 default: llvm_unreachable("Unexpected operand kind!"); 2414 case IOK_LENGTH: CVal = Info.Var.Length; break; 2415 case IOK_SIZE: CVal = Info.Var.Size; break; 2416 case IOK_TYPE: CVal = Info.Var.Type; break; 2417 } 2418 2419 return CVal; 2420 } 2421 2422 // Query a candidate string for being an Intel assembly operator 2423 // Report back its kind, or IOK_INVALID if does not evaluated as a known one 2424 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) { 2425 return StringSwitch<unsigned>(Name.lower()) 2426 .Case("type", MOK_TYPE) 2427 .Cases("size", "sizeof", MOK_SIZEOF) 2428 .Cases("length", "lengthof", MOK_LENGTHOF) 2429 .Default(MOK_INVALID); 2430 } 2431 2432 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator 2433 /// returns the number of elements in an array. It returns the value 1 for 2434 /// non-array variables. The SIZEOF operator returns the size of a type or 2435 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE. 2436 /// The TYPE operator returns the size of a variable. If the variable is an 2437 /// array, TYPE returns the size of a single element. 2438 bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) { 2439 MCAsmParser &Parser = getParser(); 2440 SMLoc OpLoc = Parser.getTok().getLoc(); 2441 Parser.Lex(); // Eat operator. 2442 2443 Val = 0; 2444 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) { 2445 // Check for SIZEOF(<type>) and TYPE(<type>). 2446 bool InParens = Parser.getTok().is(AsmToken::LParen); 2447 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok(); 2448 AsmTypeInfo Type; 2449 if (IDTok.is(AsmToken::Identifier) && 2450 !Parser.lookUpType(IDTok.getIdentifier(), Type)) { 2451 Val = Type.Size; 2452 2453 // Eat tokens. 2454 if (InParens) 2455 parseToken(AsmToken::LParen); 2456 parseToken(AsmToken::Identifier); 2457 if (InParens) 2458 parseToken(AsmToken::RParen); 2459 } 2460 } 2461 2462 if (!Val) { 2463 IntelExprStateMachine SM; 2464 SMLoc End, Start = Parser.getTok().getLoc(); 2465 if (ParseIntelExpression(SM, End)) 2466 return true; 2467 2468 switch (OpKind) { 2469 default: 2470 llvm_unreachable("Unexpected operand kind!"); 2471 case MOK_SIZEOF: 2472 Val = SM.getSize(); 2473 break; 2474 case MOK_LENGTHOF: 2475 Val = SM.getLength(); 2476 break; 2477 case MOK_TYPE: 2478 Val = SM.getElementSize(); 2479 break; 2480 } 2481 2482 if (!Val) 2483 return Error(OpLoc, "expression has unknown type", SMRange(Start, End)); 2484 } 2485 2486 return false; 2487 } 2488 2489 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { 2490 Size = StringSwitch<unsigned>(getTok().getString()) 2491 .Cases("BYTE", "byte", 8) 2492 .Cases("WORD", "word", 16) 2493 .Cases("DWORD", "dword", 32) 2494 .Cases("FLOAT", "float", 32) 2495 .Cases("LONG", "long", 32) 2496 .Cases("FWORD", "fword", 48) 2497 .Cases("DOUBLE", "double", 64) 2498 .Cases("QWORD", "qword", 64) 2499 .Cases("MMWORD","mmword", 64) 2500 .Cases("XWORD", "xword", 80) 2501 .Cases("TBYTE", "tbyte", 80) 2502 .Cases("XMMWORD", "xmmword", 128) 2503 .Cases("YMMWORD", "ymmword", 256) 2504 .Cases("ZMMWORD", "zmmword", 512) 2505 .Default(0); 2506 if (Size) { 2507 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word). 2508 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr"))) 2509 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); 2510 Lex(); // Eat ptr. 2511 } 2512 return false; 2513 } 2514 2515 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) { 2516 MCAsmParser &Parser = getParser(); 2517 const AsmToken &Tok = Parser.getTok(); 2518 SMLoc Start, End; 2519 2520 // Parse optional Size directive. 2521 unsigned Size; 2522 if (ParseIntelMemoryOperandSize(Size)) 2523 return true; 2524 bool PtrInOperand = bool(Size); 2525 2526 Start = Tok.getLoc(); 2527 2528 // Rounding mode operand. 2529 if (getLexer().is(AsmToken::LCurly)) 2530 return ParseRoundingModeOp(Start, Operands); 2531 2532 // Register operand. 2533 MCRegister RegNo; 2534 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) { 2535 if (RegNo == X86::RIP) 2536 return Error(Start, "rip can only be used as a base register"); 2537 // A Register followed by ':' is considered a segment override 2538 if (Tok.isNot(AsmToken::Colon)) { 2539 if (PtrInOperand) 2540 return Error(Start, "expected memory operand after 'ptr', " 2541 "found register operand instead"); 2542 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End)); 2543 return false; 2544 } 2545 // An alleged segment override. check if we have a valid segment register 2546 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo)) 2547 return Error(Start, "invalid segment register"); 2548 // Eat ':' and update Start location 2549 Start = Lex().getLoc(); 2550 } 2551 2552 // Immediates and Memory 2553 IntelExprStateMachine SM; 2554 if (ParseIntelExpression(SM, End)) 2555 return true; 2556 2557 if (isParsingMSInlineAsm()) 2558 RewriteIntelExpression(SM, Start, Tok.getLoc()); 2559 2560 int64_t Imm = SM.getImm(); 2561 const MCExpr *Disp = SM.getSym(); 2562 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext()); 2563 if (Disp && Imm) 2564 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext()); 2565 if (!Disp) 2566 Disp = ImmDisp; 2567 2568 // RegNo != 0 specifies a valid segment register, 2569 // and we are parsing a segment override 2570 if (!SM.isMemExpr() && !RegNo) { 2571 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) { 2572 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 2573 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 2574 // Disp includes the address of a variable; make sure this is recorded 2575 // for later handling. 2576 Operands.push_back(X86Operand::CreateImm(Disp, Start, End, 2577 SM.getSymName(), Info.Var.Decl, 2578 Info.Var.IsGlobalLV)); 2579 return false; 2580 } 2581 } 2582 2583 Operands.push_back(X86Operand::CreateImm(Disp, Start, End)); 2584 return false; 2585 } 2586 2587 StringRef ErrMsg; 2588 unsigned BaseReg = SM.getBaseReg(); 2589 unsigned IndexReg = SM.getIndexReg(); 2590 if (IndexReg && BaseReg == X86::RIP) 2591 BaseReg = 0; 2592 unsigned Scale = SM.getScale(); 2593 if (!PtrInOperand) 2594 Size = SM.getElementSize() << 3; 2595 2596 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP && 2597 (IndexReg == X86::ESP || IndexReg == X86::RSP)) 2598 std::swap(BaseReg, IndexReg); 2599 2600 // If BaseReg is a vector register and IndexReg is not, swap them unless 2601 // Scale was specified in which case it would be an error. 2602 if (Scale == 0 && 2603 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 2604 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 2605 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) && 2606 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) || 2607 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) || 2608 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg))) 2609 std::swap(BaseReg, IndexReg); 2610 2611 if (Scale != 0 && 2612 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) 2613 return Error(Start, "16-bit addresses cannot have a scale"); 2614 2615 // If there was no explicit scale specified, change it to 1. 2616 if (Scale == 0) 2617 Scale = 1; 2618 2619 // If this is a 16-bit addressing mode with the base and index in the wrong 2620 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is 2621 // shared with att syntax where order matters. 2622 if ((BaseReg == X86::SI || BaseReg == X86::DI) && 2623 (IndexReg == X86::BX || IndexReg == X86::BP)) 2624 std::swap(BaseReg, IndexReg); 2625 2626 if ((BaseReg || IndexReg) && 2627 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 2628 ErrMsg)) 2629 return Error(Start, ErrMsg); 2630 bool IsUnconditionalBranch = 2631 Name.equals_insensitive("jmp") || Name.equals_insensitive("call"); 2632 if (isParsingMSInlineAsm()) 2633 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, 2634 IsUnconditionalBranch && is64BitMode(), 2635 Start, End, Size, SM.getSymName(), 2636 SM.getIdentifierInfo(), Operands); 2637 2638 // When parsing x64 MS-style assembly, all non-absolute references to a named 2639 // variable default to RIP-relative. 2640 unsigned DefaultBaseReg = X86::NoRegister; 2641 bool MaybeDirectBranchDest = true; 2642 2643 if (Parser.isParsingMasm()) { 2644 if (is64BitMode() && SM.getElementSize() > 0) { 2645 DefaultBaseReg = X86::RIP; 2646 } 2647 if (IsUnconditionalBranch) { 2648 if (PtrInOperand) { 2649 MaybeDirectBranchDest = false; 2650 if (is64BitMode()) 2651 DefaultBaseReg = X86::RIP; 2652 } else if (!BaseReg && !IndexReg && Disp && 2653 Disp->getKind() == MCExpr::SymbolRef) { 2654 if (is64BitMode()) { 2655 if (SM.getSize() == 8) { 2656 MaybeDirectBranchDest = false; 2657 DefaultBaseReg = X86::RIP; 2658 } 2659 } else { 2660 if (SM.getSize() == 4 || SM.getSize() == 2) 2661 MaybeDirectBranchDest = false; 2662 } 2663 } 2664 } 2665 } else if (IsUnconditionalBranch) { 2666 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error. 2667 if (!PtrInOperand && SM.isOffsetOperator()) 2668 return Error( 2669 Start, "`OFFSET` operator cannot be used in an unconditional branch"); 2670 if (PtrInOperand || SM.isBracketUsed()) 2671 MaybeDirectBranchDest = false; 2672 } 2673 2674 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister)) 2675 Operands.push_back(X86Operand::CreateMem( 2676 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End, 2677 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr, 2678 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest)); 2679 else 2680 Operands.push_back(X86Operand::CreateMem( 2681 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(), 2682 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false, 2683 MaybeDirectBranchDest)); 2684 return false; 2685 } 2686 2687 bool X86AsmParser::parseATTOperand(OperandVector &Operands) { 2688 MCAsmParser &Parser = getParser(); 2689 switch (getLexer().getKind()) { 2690 case AsmToken::Dollar: { 2691 // $42 or $ID -> immediate. 2692 SMLoc Start = Parser.getTok().getLoc(), End; 2693 Parser.Lex(); 2694 const MCExpr *Val; 2695 // This is an immediate, so we should not parse a register. Do a precheck 2696 // for '%' to supercede intra-register parse errors. 2697 SMLoc L = Parser.getTok().getLoc(); 2698 if (check(getLexer().is(AsmToken::Percent), L, 2699 "expected immediate expression") || 2700 getParser().parseExpression(Val, End) || 2701 check(isa<X86MCExpr>(Val), L, "expected immediate expression")) 2702 return true; 2703 Operands.push_back(X86Operand::CreateImm(Val, Start, End)); 2704 return false; 2705 } 2706 case AsmToken::LCurly: { 2707 SMLoc Start = Parser.getTok().getLoc(); 2708 return ParseRoundingModeOp(Start, Operands); 2709 } 2710 default: { 2711 // This a memory operand or a register. We have some parsing complications 2712 // as a '(' may be part of an immediate expression or the addressing mode 2713 // block. This is complicated by the fact that an assembler-level variable 2714 // may refer either to a register or an immediate expression. 2715 2716 SMLoc Loc = Parser.getTok().getLoc(), EndLoc; 2717 const MCExpr *Expr = nullptr; 2718 unsigned Reg = 0; 2719 if (getLexer().isNot(AsmToken::LParen)) { 2720 // No '(' so this is either a displacement expression or a register. 2721 if (Parser.parseExpression(Expr, EndLoc)) 2722 return true; 2723 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) { 2724 // Segment Register. Reset Expr and copy value to register. 2725 Expr = nullptr; 2726 Reg = RE->getRegNo(); 2727 2728 // Check the register. 2729 if (Reg == X86::EIZ || Reg == X86::RIZ) 2730 return Error( 2731 Loc, "%eiz and %riz can only be used as index registers", 2732 SMRange(Loc, EndLoc)); 2733 if (Reg == X86::RIP) 2734 return Error(Loc, "%rip can only be used as a base register", 2735 SMRange(Loc, EndLoc)); 2736 // Return register that are not segment prefixes immediately. 2737 if (!Parser.parseOptionalToken(AsmToken::Colon)) { 2738 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc)); 2739 return false; 2740 } 2741 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg)) 2742 return Error(Loc, "invalid segment register"); 2743 // Accept a '*' absolute memory reference after the segment. Place it 2744 // before the full memory operand. 2745 if (getLexer().is(AsmToken::Star)) 2746 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 2747 } 2748 } 2749 // This is a Memory operand. 2750 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands); 2751 } 2752 } 2753 } 2754 2755 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic, 2756 // otherwise the EFLAGS Condition Code enumerator. 2757 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) { 2758 return StringSwitch<X86::CondCode>(CC) 2759 .Case("o", X86::COND_O) // Overflow 2760 .Case("no", X86::COND_NO) // No Overflow 2761 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal 2762 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below 2763 .Cases("e", "z", X86::COND_E) // Equal/Zero 2764 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero 2765 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above 2766 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal 2767 .Case("s", X86::COND_S) // Sign 2768 .Case("ns", X86::COND_NS) // No Sign 2769 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even 2770 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd 2771 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal 2772 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less 2773 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater 2774 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal 2775 .Default(X86::COND_INVALID); 2776 } 2777 2778 // true on failure, false otherwise 2779 // If no {z} mark was found - Parser doesn't advance 2780 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z, 2781 const SMLoc &StartLoc) { 2782 MCAsmParser &Parser = getParser(); 2783 // Assuming we are just pass the '{' mark, quering the next token 2784 // Searched for {z}, but none was found. Return false, as no parsing error was 2785 // encountered 2786 if (!(getLexer().is(AsmToken::Identifier) && 2787 (getLexer().getTok().getIdentifier() == "z"))) 2788 return false; 2789 Parser.Lex(); // Eat z 2790 // Query and eat the '}' mark 2791 if (!getLexer().is(AsmToken::RCurly)) 2792 return Error(getLexer().getLoc(), "Expected } at this point"); 2793 Parser.Lex(); // Eat '}' 2794 // Assign Z with the {z} mark operand 2795 Z = X86Operand::CreateToken("{z}", StartLoc); 2796 return false; 2797 } 2798 2799 // true on failure, false otherwise 2800 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) { 2801 MCAsmParser &Parser = getParser(); 2802 if (getLexer().is(AsmToken::LCurly)) { 2803 // Eat "{" and mark the current place. 2804 const SMLoc consumedToken = consumeToken(); 2805 // Distinguish {1to<NUM>} from {%k<NUM>}. 2806 if(getLexer().is(AsmToken::Integer)) { 2807 // Parse memory broadcasting ({1to<NUM>}). 2808 if (getLexer().getTok().getIntVal() != 1) 2809 return TokError("Expected 1to<NUM> at this point"); 2810 StringRef Prefix = getLexer().getTok().getString(); 2811 Parser.Lex(); // Eat first token of 1to8 2812 if (!getLexer().is(AsmToken::Identifier)) 2813 return TokError("Expected 1to<NUM> at this point"); 2814 // Recognize only reasonable suffixes. 2815 SmallVector<char, 5> BroadcastVector; 2816 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier()) 2817 .toStringRef(BroadcastVector); 2818 if (!BroadcastString.starts_with("1to")) 2819 return TokError("Expected 1to<NUM> at this point"); 2820 const char *BroadcastPrimitive = 2821 StringSwitch<const char *>(BroadcastString) 2822 .Case("1to2", "{1to2}") 2823 .Case("1to4", "{1to4}") 2824 .Case("1to8", "{1to8}") 2825 .Case("1to16", "{1to16}") 2826 .Case("1to32", "{1to32}") 2827 .Default(nullptr); 2828 if (!BroadcastPrimitive) 2829 return TokError("Invalid memory broadcast primitive."); 2830 Parser.Lex(); // Eat trailing token of 1toN 2831 if (!getLexer().is(AsmToken::RCurly)) 2832 return TokError("Expected } at this point"); 2833 Parser.Lex(); // Eat "}" 2834 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, 2835 consumedToken)); 2836 // No AVX512 specific primitives can pass 2837 // after memory broadcasting, so return. 2838 return false; 2839 } else { 2840 // Parse either {k}{z}, {z}{k}, {k} or {z} 2841 // last one have no meaning, but GCC accepts it 2842 // Currently, we're just pass a '{' mark 2843 std::unique_ptr<X86Operand> Z; 2844 if (ParseZ(Z, consumedToken)) 2845 return true; 2846 // Reaching here means that parsing of the allegadly '{z}' mark yielded 2847 // no errors. 2848 // Query for the need of further parsing for a {%k<NUM>} mark 2849 if (!Z || getLexer().is(AsmToken::LCurly)) { 2850 SMLoc StartLoc = Z ? consumeToken() : consumedToken; 2851 // Parse an op-mask register mark ({%k<NUM>}), which is now to be 2852 // expected 2853 MCRegister RegNo; 2854 SMLoc RegLoc; 2855 if (!parseRegister(RegNo, RegLoc, StartLoc) && 2856 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) { 2857 if (RegNo == X86::K0) 2858 return Error(RegLoc, "Register k0 can't be used as write mask"); 2859 if (!getLexer().is(AsmToken::RCurly)) 2860 return Error(getLexer().getLoc(), "Expected } at this point"); 2861 Operands.push_back(X86Operand::CreateToken("{", StartLoc)); 2862 Operands.push_back( 2863 X86Operand::CreateReg(RegNo, StartLoc, StartLoc)); 2864 Operands.push_back(X86Operand::CreateToken("}", consumeToken())); 2865 } else 2866 return Error(getLexer().getLoc(), 2867 "Expected an op-mask register at this point"); 2868 // {%k<NUM>} mark is found, inquire for {z} 2869 if (getLexer().is(AsmToken::LCurly) && !Z) { 2870 // Have we've found a parsing error, or found no (expected) {z} mark 2871 // - report an error 2872 if (ParseZ(Z, consumeToken()) || !Z) 2873 return Error(getLexer().getLoc(), 2874 "Expected a {z} mark at this point"); 2875 2876 } 2877 // '{z}' on its own is meaningless, hence should be ignored. 2878 // on the contrary - have it been accompanied by a K register, 2879 // allow it. 2880 if (Z) 2881 Operands.push_back(std::move(Z)); 2882 } 2883 } 2884 } 2885 return false; 2886 } 2887 2888 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix 2889 /// has already been parsed if present. disp may be provided as well. 2890 bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp, 2891 SMLoc StartLoc, SMLoc EndLoc, 2892 OperandVector &Operands) { 2893 MCAsmParser &Parser = getParser(); 2894 SMLoc Loc; 2895 // Based on the initial passed values, we may be in any of these cases, we are 2896 // in one of these cases (with current position (*)): 2897 2898 // 1. seg : * disp (base-index-scale-expr) 2899 // 2. seg : *(disp) (base-index-scale-expr) 2900 // 3. seg : *(base-index-scale-expr) 2901 // 4. disp *(base-index-scale-expr) 2902 // 5. *(disp) (base-index-scale-expr) 2903 // 6. *(base-index-scale-expr) 2904 // 7. disp * 2905 // 8. *(disp) 2906 2907 // If we do not have an displacement yet, check if we're in cases 4 or 6 by 2908 // checking if the first object after the parenthesis is a register (or an 2909 // identifier referring to a register) and parse the displacement or default 2910 // to 0 as appropriate. 2911 auto isAtMemOperand = [this]() { 2912 if (this->getLexer().isNot(AsmToken::LParen)) 2913 return false; 2914 AsmToken Buf[2]; 2915 StringRef Id; 2916 auto TokCount = this->getLexer().peekTokens(Buf, true); 2917 if (TokCount == 0) 2918 return false; 2919 switch (Buf[0].getKind()) { 2920 case AsmToken::Percent: 2921 case AsmToken::Comma: 2922 return true; 2923 // These lower cases are doing a peekIdentifier. 2924 case AsmToken::At: 2925 case AsmToken::Dollar: 2926 if ((TokCount > 1) && 2927 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) && 2928 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer())) 2929 Id = StringRef(Buf[0].getLoc().getPointer(), 2930 Buf[1].getIdentifier().size() + 1); 2931 break; 2932 case AsmToken::Identifier: 2933 case AsmToken::String: 2934 Id = Buf[0].getIdentifier(); 2935 break; 2936 default: 2937 return false; 2938 } 2939 // We have an ID. Check if it is bound to a register. 2940 if (!Id.empty()) { 2941 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id); 2942 if (Sym->isVariable()) { 2943 auto V = Sym->getVariableValue(/*SetUsed*/ false); 2944 return isa<X86MCExpr>(V); 2945 } 2946 } 2947 return false; 2948 }; 2949 2950 if (!Disp) { 2951 // Parse immediate if we're not at a mem operand yet. 2952 if (!isAtMemOperand()) { 2953 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc)) 2954 return true; 2955 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here."); 2956 } else { 2957 // Disp is implicitly zero if we haven't parsed it yet. 2958 Disp = MCConstantExpr::create(0, Parser.getContext()); 2959 } 2960 } 2961 2962 // We are now either at the end of the operand or at the '(' at the start of a 2963 // base-index-scale-expr. 2964 2965 if (!parseOptionalToken(AsmToken::LParen)) { 2966 if (SegReg == 0) 2967 Operands.push_back( 2968 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc)); 2969 else 2970 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 2971 0, 0, 1, StartLoc, EndLoc)); 2972 return false; 2973 } 2974 2975 // If we reached here, then eat the '(' and Process 2976 // the rest of the memory operand. 2977 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 2978 SMLoc BaseLoc = getLexer().getLoc(); 2979 const MCExpr *E; 2980 StringRef ErrMsg; 2981 2982 // Parse BaseReg if one is provided. 2983 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) { 2984 if (Parser.parseExpression(E, EndLoc) || 2985 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here")) 2986 return true; 2987 2988 // Check the register. 2989 BaseReg = cast<X86MCExpr>(E)->getRegNo(); 2990 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) 2991 return Error(BaseLoc, "eiz and riz can only be used as index registers", 2992 SMRange(BaseLoc, EndLoc)); 2993 } 2994 2995 if (parseOptionalToken(AsmToken::Comma)) { 2996 // Following the comma we should have either an index register, or a scale 2997 // value. We don't support the later form, but we want to parse it 2998 // correctly. 2999 // 3000 // Even though it would be completely consistent to support syntax like 3001 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 3002 if (getLexer().isNot(AsmToken::RParen)) { 3003 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc)) 3004 return true; 3005 3006 if (!isa<X86MCExpr>(E)) { 3007 // We've parsed an unexpected Scale Value instead of an index 3008 // register. Interpret it as an absolute. 3009 int64_t ScaleVal; 3010 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr())) 3011 return Error(Loc, "expected absolute expression"); 3012 if (ScaleVal != 1) 3013 Warning(Loc, "scale factor without index register is ignored"); 3014 Scale = 1; 3015 } else { // IndexReg Found. 3016 IndexReg = cast<X86MCExpr>(E)->getRegNo(); 3017 3018 if (BaseReg == X86::RIP) 3019 return Error(Loc, 3020 "%rip as base register can not have an index register"); 3021 if (IndexReg == X86::RIP) 3022 return Error(Loc, "%rip is not allowed as an index register"); 3023 3024 if (parseOptionalToken(AsmToken::Comma)) { 3025 // Parse the scale amount: 3026 // ::= ',' [scale-expression] 3027 3028 // A scale amount without an index is ignored. 3029 if (getLexer().isNot(AsmToken::RParen)) { 3030 int64_t ScaleVal; 3031 if (Parser.parseTokenLoc(Loc) || 3032 Parser.parseAbsoluteExpression(ScaleVal)) 3033 return Error(Loc, "expected scale expression"); 3034 Scale = (unsigned)ScaleVal; 3035 // Validate the scale amount. 3036 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 3037 Scale != 1) 3038 return Error(Loc, "scale factor in 16-bit address must be 1"); 3039 if (checkScale(Scale, ErrMsg)) 3040 return Error(Loc, ErrMsg); 3041 } 3042 } 3043 } 3044 } 3045 } 3046 3047 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 3048 if (parseToken(AsmToken::RParen, "unexpected token in memory operand")) 3049 return true; 3050 3051 // This is to support otherwise illegal operand (%dx) found in various 3052 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now 3053 // be supported. Mark such DX variants separately fix only in special cases. 3054 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 && 3055 isa<MCConstantExpr>(Disp) && 3056 cast<MCConstantExpr>(Disp)->getValue() == 0) { 3057 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc)); 3058 return false; 3059 } 3060 3061 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 3062 ErrMsg)) 3063 return Error(BaseLoc, ErrMsg); 3064 3065 // If the displacement is a constant, check overflows. For 64-bit addressing, 3066 // gas requires isInt<32> and otherwise reports an error. For others, gas 3067 // reports a warning and allows a wider range. E.g. gas allows 3068 // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses 3069 // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000). 3070 if (BaseReg || IndexReg) { 3071 if (auto CE = dyn_cast<MCConstantExpr>(Disp)) { 3072 auto Imm = CE->getValue(); 3073 bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) || 3074 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg); 3075 bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg); 3076 if (Is64) { 3077 if (!isInt<32>(Imm)) 3078 return Error(BaseLoc, "displacement " + Twine(Imm) + 3079 " is not within [-2147483648, 2147483647]"); 3080 } else if (!Is16) { 3081 if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) { 3082 Warning(BaseLoc, "displacement " + Twine(Imm) + 3083 " shortened to 32-bit signed " + 3084 Twine(static_cast<int32_t>(Imm))); 3085 } 3086 } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) { 3087 Warning(BaseLoc, "displacement " + Twine(Imm) + 3088 " shortened to 16-bit signed " + 3089 Twine(static_cast<int16_t>(Imm))); 3090 } 3091 } 3092 } 3093 3094 if (SegReg || BaseReg || IndexReg) 3095 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 3096 BaseReg, IndexReg, Scale, StartLoc, 3097 EndLoc)); 3098 else 3099 Operands.push_back( 3100 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc)); 3101 return false; 3102 } 3103 3104 // Parse either a standard primary expression or a register. 3105 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 3106 MCAsmParser &Parser = getParser(); 3107 // See if this is a register first. 3108 if (getTok().is(AsmToken::Percent) || 3109 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) && 3110 MatchRegisterName(Parser.getTok().getString()))) { 3111 SMLoc StartLoc = Parser.getTok().getLoc(); 3112 MCRegister RegNo; 3113 if (parseRegister(RegNo, StartLoc, EndLoc)) 3114 return true; 3115 Res = X86MCExpr::create(RegNo, Parser.getContext()); 3116 return false; 3117 } 3118 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr); 3119 } 3120 3121 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 3122 SMLoc NameLoc, OperandVector &Operands) { 3123 MCAsmParser &Parser = getParser(); 3124 InstInfo = &Info; 3125 3126 // Reset the forced VEX encoding. 3127 ForcedVEXEncoding = VEXEncoding_Default; 3128 ForcedDispEncoding = DispEncoding_Default; 3129 UseApxExtendedReg = false; 3130 ForcedNoFlag = false; 3131 3132 // Parse pseudo prefixes. 3133 while (true) { 3134 if (Name == "{") { 3135 if (getLexer().isNot(AsmToken::Identifier)) 3136 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'"); 3137 std::string Prefix = Parser.getTok().getString().lower(); 3138 Parser.Lex(); // Eat identifier. 3139 if (getLexer().isNot(AsmToken::RCurly)) 3140 return Error(Parser.getTok().getLoc(), "Expected '}'"); 3141 Parser.Lex(); // Eat curly. 3142 3143 if (Prefix == "vex") 3144 ForcedVEXEncoding = VEXEncoding_VEX; 3145 else if (Prefix == "vex2") 3146 ForcedVEXEncoding = VEXEncoding_VEX2; 3147 else if (Prefix == "vex3") 3148 ForcedVEXEncoding = VEXEncoding_VEX3; 3149 else if (Prefix == "evex") 3150 ForcedVEXEncoding = VEXEncoding_EVEX; 3151 else if (Prefix == "disp8") 3152 ForcedDispEncoding = DispEncoding_Disp8; 3153 else if (Prefix == "disp32") 3154 ForcedDispEncoding = DispEncoding_Disp32; 3155 else if (Prefix == "nf") 3156 ForcedNoFlag = true; 3157 else 3158 return Error(NameLoc, "unknown prefix"); 3159 3160 NameLoc = Parser.getTok().getLoc(); 3161 if (getLexer().is(AsmToken::LCurly)) { 3162 Parser.Lex(); 3163 Name = "{"; 3164 } else { 3165 if (getLexer().isNot(AsmToken::Identifier)) 3166 return Error(Parser.getTok().getLoc(), "Expected identifier"); 3167 // FIXME: The mnemonic won't match correctly if its not in lower case. 3168 Name = Parser.getTok().getString(); 3169 Parser.Lex(); 3170 } 3171 continue; 3172 } 3173 // Parse MASM style pseudo prefixes. 3174 if (isParsingMSInlineAsm()) { 3175 if (Name.equals_insensitive("vex")) 3176 ForcedVEXEncoding = VEXEncoding_VEX; 3177 else if (Name.equals_insensitive("vex2")) 3178 ForcedVEXEncoding = VEXEncoding_VEX2; 3179 else if (Name.equals_insensitive("vex3")) 3180 ForcedVEXEncoding = VEXEncoding_VEX3; 3181 else if (Name.equals_insensitive("evex")) 3182 ForcedVEXEncoding = VEXEncoding_EVEX; 3183 3184 if (ForcedVEXEncoding != VEXEncoding_Default) { 3185 if (getLexer().isNot(AsmToken::Identifier)) 3186 return Error(Parser.getTok().getLoc(), "Expected identifier"); 3187 // FIXME: The mnemonic won't match correctly if its not in lower case. 3188 Name = Parser.getTok().getString(); 3189 NameLoc = Parser.getTok().getLoc(); 3190 Parser.Lex(); 3191 } 3192 } 3193 break; 3194 } 3195 3196 // Support the suffix syntax for overriding displacement size as well. 3197 if (Name.consume_back(".d32")) { 3198 ForcedDispEncoding = DispEncoding_Disp32; 3199 } else if (Name.consume_back(".d8")) { 3200 ForcedDispEncoding = DispEncoding_Disp8; 3201 } 3202 3203 StringRef PatchedName = Name; 3204 3205 // Hack to skip "short" following Jcc. 3206 if (isParsingIntelSyntax() && 3207 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" || 3208 PatchedName == "jcxz" || PatchedName == "jecxz" || 3209 (PatchedName.starts_with("j") && 3210 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) { 3211 StringRef NextTok = Parser.getTok().getString(); 3212 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short") 3213 : NextTok == "short") { 3214 SMLoc NameEndLoc = 3215 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size()); 3216 // Eat the short keyword. 3217 Parser.Lex(); 3218 // MS and GAS ignore the short keyword; they both determine the jmp type 3219 // based on the distance of the label. (NASM does emit different code with 3220 // and without "short," though.) 3221 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc, 3222 NextTok.size() + 1); 3223 } 3224 } 3225 3226 // FIXME: Hack to recognize setneb as setne. 3227 if (PatchedName.starts_with("set") && PatchedName.ends_with("b") && 3228 PatchedName != "setb" && PatchedName != "setnb") 3229 PatchedName = PatchedName.substr(0, Name.size()-1); 3230 3231 unsigned ComparisonPredicate = ~0U; 3232 3233 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}. 3234 if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) && 3235 (PatchedName.ends_with("ss") || PatchedName.ends_with("sd") || 3236 PatchedName.ends_with("sh") || PatchedName.ends_with("ph") || 3237 PatchedName.ends_with("ps") || PatchedName.ends_with("pd"))) { 3238 bool IsVCMP = PatchedName[0] == 'v'; 3239 unsigned CCIdx = IsVCMP ? 4 : 3; 3240 unsigned CC = StringSwitch<unsigned>( 3241 PatchedName.slice(CCIdx, PatchedName.size() - 2)) 3242 .Case("eq", 0x00) 3243 .Case("eq_oq", 0x00) 3244 .Case("lt", 0x01) 3245 .Case("lt_os", 0x01) 3246 .Case("le", 0x02) 3247 .Case("le_os", 0x02) 3248 .Case("unord", 0x03) 3249 .Case("unord_q", 0x03) 3250 .Case("neq", 0x04) 3251 .Case("neq_uq", 0x04) 3252 .Case("nlt", 0x05) 3253 .Case("nlt_us", 0x05) 3254 .Case("nle", 0x06) 3255 .Case("nle_us", 0x06) 3256 .Case("ord", 0x07) 3257 .Case("ord_q", 0x07) 3258 /* AVX only from here */ 3259 .Case("eq_uq", 0x08) 3260 .Case("nge", 0x09) 3261 .Case("nge_us", 0x09) 3262 .Case("ngt", 0x0A) 3263 .Case("ngt_us", 0x0A) 3264 .Case("false", 0x0B) 3265 .Case("false_oq", 0x0B) 3266 .Case("neq_oq", 0x0C) 3267 .Case("ge", 0x0D) 3268 .Case("ge_os", 0x0D) 3269 .Case("gt", 0x0E) 3270 .Case("gt_os", 0x0E) 3271 .Case("true", 0x0F) 3272 .Case("true_uq", 0x0F) 3273 .Case("eq_os", 0x10) 3274 .Case("lt_oq", 0x11) 3275 .Case("le_oq", 0x12) 3276 .Case("unord_s", 0x13) 3277 .Case("neq_us", 0x14) 3278 .Case("nlt_uq", 0x15) 3279 .Case("nle_uq", 0x16) 3280 .Case("ord_s", 0x17) 3281 .Case("eq_us", 0x18) 3282 .Case("nge_uq", 0x19) 3283 .Case("ngt_uq", 0x1A) 3284 .Case("false_os", 0x1B) 3285 .Case("neq_os", 0x1C) 3286 .Case("ge_oq", 0x1D) 3287 .Case("gt_oq", 0x1E) 3288 .Case("true_us", 0x1F) 3289 .Default(~0U); 3290 if (CC != ~0U && (IsVCMP || CC < 8) && 3291 (IsVCMP || PatchedName.back() != 'h')) { 3292 if (PatchedName.ends_with("ss")) 3293 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 3294 else if (PatchedName.ends_with("sd")) 3295 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 3296 else if (PatchedName.ends_with("ps")) 3297 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 3298 else if (PatchedName.ends_with("pd")) 3299 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 3300 else if (PatchedName.ends_with("sh")) 3301 PatchedName = "vcmpsh"; 3302 else if (PatchedName.ends_with("ph")) 3303 PatchedName = "vcmpph"; 3304 else 3305 llvm_unreachable("Unexpected suffix!"); 3306 3307 ComparisonPredicate = CC; 3308 } 3309 } 3310 3311 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}. 3312 if (PatchedName.starts_with("vpcmp") && 3313 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 3314 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 3315 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 3316 unsigned CC = StringSwitch<unsigned>( 3317 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 3318 .Case("eq", 0x0) // Only allowed on unsigned. Checked below. 3319 .Case("lt", 0x1) 3320 .Case("le", 0x2) 3321 //.Case("false", 0x3) // Not a documented alias. 3322 .Case("neq", 0x4) 3323 .Case("nlt", 0x5) 3324 .Case("nle", 0x6) 3325 //.Case("true", 0x7) // Not a documented alias. 3326 .Default(~0U); 3327 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) { 3328 switch (PatchedName.back()) { 3329 default: llvm_unreachable("Unexpected character!"); 3330 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break; 3331 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break; 3332 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break; 3333 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break; 3334 } 3335 // Set up the immediate to push into the operands later. 3336 ComparisonPredicate = CC; 3337 } 3338 } 3339 3340 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}. 3341 if (PatchedName.starts_with("vpcom") && 3342 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 3343 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 3344 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 3345 unsigned CC = StringSwitch<unsigned>( 3346 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 3347 .Case("lt", 0x0) 3348 .Case("le", 0x1) 3349 .Case("gt", 0x2) 3350 .Case("ge", 0x3) 3351 .Case("eq", 0x4) 3352 .Case("neq", 0x5) 3353 .Case("false", 0x6) 3354 .Case("true", 0x7) 3355 .Default(~0U); 3356 if (CC != ~0U) { 3357 switch (PatchedName.back()) { 3358 default: llvm_unreachable("Unexpected character!"); 3359 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break; 3360 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break; 3361 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break; 3362 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break; 3363 } 3364 // Set up the immediate to push into the operands later. 3365 ComparisonPredicate = CC; 3366 } 3367 } 3368 3369 // Determine whether this is an instruction prefix. 3370 // FIXME: 3371 // Enhance prefixes integrity robustness. for example, following forms 3372 // are currently tolerated: 3373 // repz repnz <insn> ; GAS errors for the use of two similar prefixes 3374 // lock addq %rax, %rbx ; Destination operand must be of memory type 3375 // xacquire <insn> ; xacquire must be accompanied by 'lock' 3376 bool IsPrefix = 3377 StringSwitch<bool>(Name) 3378 .Cases("cs", "ds", "es", "fs", "gs", "ss", true) 3379 .Cases("rex64", "data32", "data16", "addr32", "addr16", true) 3380 .Cases("xacquire", "xrelease", true) 3381 .Cases("acquire", "release", isParsingIntelSyntax()) 3382 .Default(false); 3383 3384 auto isLockRepeatNtPrefix = [](StringRef N) { 3385 return StringSwitch<bool>(N) 3386 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true) 3387 .Default(false); 3388 }; 3389 3390 bool CurlyAsEndOfStatement = false; 3391 3392 unsigned Flags = X86::IP_NO_PREFIX; 3393 while (isLockRepeatNtPrefix(Name.lower())) { 3394 unsigned Prefix = 3395 StringSwitch<unsigned>(Name) 3396 .Cases("lock", "lock", X86::IP_HAS_LOCK) 3397 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT) 3398 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) 3399 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK) 3400 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) 3401 Flags |= Prefix; 3402 if (getLexer().is(AsmToken::EndOfStatement)) { 3403 // We don't have real instr with the given prefix 3404 // let's use the prefix as the instr. 3405 // TODO: there could be several prefixes one after another 3406 Flags = X86::IP_NO_PREFIX; 3407 break; 3408 } 3409 // FIXME: The mnemonic won't match correctly if its not in lower case. 3410 Name = Parser.getTok().getString(); 3411 Parser.Lex(); // eat the prefix 3412 // Hack: we could have something like "rep # some comment" or 3413 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" 3414 while (Name.starts_with(";") || Name.starts_with("\n") || 3415 Name.starts_with("#") || Name.starts_with("\t") || 3416 Name.starts_with("/")) { 3417 // FIXME: The mnemonic won't match correctly if its not in lower case. 3418 Name = Parser.getTok().getString(); 3419 Parser.Lex(); // go to next prefix or instr 3420 } 3421 } 3422 3423 if (Flags) 3424 PatchedName = Name; 3425 3426 // Hacks to handle 'data16' and 'data32' 3427 if (PatchedName == "data16" && is16BitMode()) { 3428 return Error(NameLoc, "redundant data16 prefix"); 3429 } 3430 if (PatchedName == "data32") { 3431 if (is32BitMode()) 3432 return Error(NameLoc, "redundant data32 prefix"); 3433 if (is64BitMode()) 3434 return Error(NameLoc, "'data32' is not supported in 64-bit mode"); 3435 // Hack to 'data16' for the table lookup. 3436 PatchedName = "data16"; 3437 3438 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3439 StringRef Next = Parser.getTok().getString(); 3440 getLexer().Lex(); 3441 // data32 effectively changes the instruction suffix. 3442 // TODO Generalize. 3443 if (Next == "callw") 3444 Next = "calll"; 3445 if (Next == "ljmpw") 3446 Next = "ljmpl"; 3447 3448 Name = Next; 3449 PatchedName = Name; 3450 ForcedDataPrefix = X86::Is32Bit; 3451 IsPrefix = false; 3452 } 3453 } 3454 3455 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 3456 3457 // Push the immediate if we extracted one from the mnemonic. 3458 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) { 3459 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 3460 getParser().getContext()); 3461 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 3462 } 3463 3464 // This does the actual operand parsing. Don't parse any more if we have a 3465 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 3466 // just want to parse the "lock" as the first instruction and the "incl" as 3467 // the next one. 3468 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) { 3469 // Parse '*' modifier. 3470 if (getLexer().is(AsmToken::Star)) 3471 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 3472 3473 // Read the operands. 3474 while (true) { 3475 if (parseOperand(Operands, Name)) 3476 return true; 3477 if (HandleAVX512Operand(Operands)) 3478 return true; 3479 3480 // check for comma and eat it 3481 if (getLexer().is(AsmToken::Comma)) 3482 Parser.Lex(); 3483 else 3484 break; 3485 } 3486 3487 // In MS inline asm curly braces mark the beginning/end of a block, 3488 // therefore they should be interepreted as end of statement 3489 CurlyAsEndOfStatement = 3490 isParsingIntelSyntax() && isParsingMSInlineAsm() && 3491 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); 3492 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement) 3493 return TokError("unexpected token in argument list"); 3494 } 3495 3496 // Push the immediate if we extracted one from the mnemonic. 3497 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) { 3498 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 3499 getParser().getContext()); 3500 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 3501 } 3502 3503 // Consume the EndOfStatement or the prefix separator Slash 3504 if (getLexer().is(AsmToken::EndOfStatement) || 3505 (IsPrefix && getLexer().is(AsmToken::Slash))) 3506 Parser.Lex(); 3507 else if (CurlyAsEndOfStatement) 3508 // Add an actual EndOfStatement before the curly brace 3509 Info.AsmRewrites->emplace_back(AOK_EndOfStatement, 3510 getLexer().getTok().getLoc(), 0); 3511 3512 // This is for gas compatibility and cannot be done in td. 3513 // Adding "p" for some floating point with no argument. 3514 // For example: fsub --> fsubp 3515 bool IsFp = 3516 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr"; 3517 if (IsFp && Operands.size() == 1) { 3518 const char *Repl = StringSwitch<const char *>(Name) 3519 .Case("fsub", "fsubp") 3520 .Case("fdiv", "fdivp") 3521 .Case("fsubr", "fsubrp") 3522 .Case("fdivr", "fdivrp"); 3523 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl); 3524 } 3525 3526 if ((Name == "mov" || Name == "movw" || Name == "movl") && 3527 (Operands.size() == 3)) { 3528 X86Operand &Op1 = (X86Operand &)*Operands[1]; 3529 X86Operand &Op2 = (X86Operand &)*Operands[2]; 3530 SMLoc Loc = Op1.getEndLoc(); 3531 // Moving a 32 or 16 bit value into a segment register has the same 3532 // behavior. Modify such instructions to always take shorter form. 3533 if (Op1.isReg() && Op2.isReg() && 3534 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains( 3535 Op2.getReg()) && 3536 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) || 3537 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) { 3538 // Change instruction name to match new instruction. 3539 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) { 3540 Name = is16BitMode() ? "movw" : "movl"; 3541 Operands[0] = X86Operand::CreateToken(Name, NameLoc); 3542 } 3543 // Select the correct equivalent 16-/32-bit source register. 3544 MCRegister Reg = 3545 getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32); 3546 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc); 3547 } 3548 } 3549 3550 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" -> 3551 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 3552 // documented form in various unofficial manuals, so a lot of code uses it. 3553 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" || 3554 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") && 3555 Operands.size() == 3) { 3556 X86Operand &Op = (X86Operand &)*Operands.back(); 3557 if (Op.isDXReg()) 3558 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 3559 Op.getEndLoc()); 3560 } 3561 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al". 3562 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" || 3563 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") && 3564 Operands.size() == 3) { 3565 X86Operand &Op = (X86Operand &)*Operands[1]; 3566 if (Op.isDXReg()) 3567 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 3568 Op.getEndLoc()); 3569 } 3570 3571 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands; 3572 bool HadVerifyError = false; 3573 3574 // Append default arguments to "ins[bwld]" 3575 if (Name.starts_with("ins") && 3576 (Operands.size() == 1 || Operands.size() == 3) && 3577 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" || 3578 Name == "ins")) { 3579 3580 AddDefaultSrcDestOperands(TmpOperands, 3581 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), 3582 DefaultMemDIOperand(NameLoc)); 3583 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3584 } 3585 3586 // Append default arguments to "outs[bwld]" 3587 if (Name.starts_with("outs") && 3588 (Operands.size() == 1 || Operands.size() == 3) && 3589 (Name == "outsb" || Name == "outsw" || Name == "outsl" || 3590 Name == "outsd" || Name == "outs")) { 3591 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 3592 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 3593 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3594 } 3595 3596 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate 3597 // values of $SIREG according to the mode. It would be nice if this 3598 // could be achieved with InstAlias in the tables. 3599 if (Name.starts_with("lods") && 3600 (Operands.size() == 1 || Operands.size() == 2) && 3601 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 3602 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) { 3603 TmpOperands.push_back(DefaultMemSIOperand(NameLoc)); 3604 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3605 } 3606 3607 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate 3608 // values of $DIREG according to the mode. It would be nice if this 3609 // could be achieved with InstAlias in the tables. 3610 if (Name.starts_with("stos") && 3611 (Operands.size() == 1 || Operands.size() == 2) && 3612 (Name == "stos" || Name == "stosb" || Name == "stosw" || 3613 Name == "stosl" || Name == "stosd" || Name == "stosq")) { 3614 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 3615 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3616 } 3617 3618 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate 3619 // values of $DIREG according to the mode. It would be nice if this 3620 // could be achieved with InstAlias in the tables. 3621 if (Name.starts_with("scas") && 3622 (Operands.size() == 1 || Operands.size() == 2) && 3623 (Name == "scas" || Name == "scasb" || Name == "scasw" || 3624 Name == "scasl" || Name == "scasd" || Name == "scasq")) { 3625 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 3626 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3627 } 3628 3629 // Add default SI and DI operands to "cmps[bwlq]". 3630 if (Name.starts_with("cmps") && 3631 (Operands.size() == 1 || Operands.size() == 3) && 3632 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || 3633 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { 3634 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc), 3635 DefaultMemSIOperand(NameLoc)); 3636 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3637 } 3638 3639 // Add default SI and DI operands to "movs[bwlq]". 3640 if (((Name.starts_with("movs") && 3641 (Name == "movs" || Name == "movsb" || Name == "movsw" || 3642 Name == "movsl" || Name == "movsd" || Name == "movsq")) || 3643 (Name.starts_with("smov") && 3644 (Name == "smov" || Name == "smovb" || Name == "smovw" || 3645 Name == "smovl" || Name == "smovd" || Name == "smovq"))) && 3646 (Operands.size() == 1 || Operands.size() == 3)) { 3647 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax()) 3648 Operands.back() = X86Operand::CreateToken("movsl", NameLoc); 3649 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 3650 DefaultMemDIOperand(NameLoc)); 3651 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3652 } 3653 3654 // Check if we encountered an error for one the string insturctions 3655 if (HadVerifyError) { 3656 return HadVerifyError; 3657 } 3658 3659 // Transforms "xlat mem8" into "xlatb" 3660 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) { 3661 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 3662 if (Op1.isMem8()) { 3663 Warning(Op1.getStartLoc(), "memory operand is only for determining the " 3664 "size, (R|E)BX will be used for the location"); 3665 Operands.pop_back(); 3666 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb"); 3667 } 3668 } 3669 3670 if (Flags) 3671 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc)); 3672 return false; 3673 } 3674 3675 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { 3676 if (ForcedVEXEncoding != VEXEncoding_VEX3 && 3677 X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode()))) 3678 return true; 3679 3680 if (X86::optimizeShiftRotateWithImmediateOne(Inst)) 3681 return true; 3682 3683 switch (Inst.getOpcode()) { 3684 default: return false; 3685 case X86::JMP_1: 3686 // {disp32} forces a larger displacement as if the instruction was relaxed. 3687 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}. 3688 // This matches GNU assembler. 3689 if (ForcedDispEncoding == DispEncoding_Disp32) { 3690 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4); 3691 return true; 3692 } 3693 3694 return false; 3695 case X86::JCC_1: 3696 // {disp32} forces a larger displacement as if the instruction was relaxed. 3697 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}. 3698 // This matches GNU assembler. 3699 if (ForcedDispEncoding == DispEncoding_Disp32) { 3700 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4); 3701 return true; 3702 } 3703 3704 return false; 3705 case X86::INT: { 3706 // Transforms "int $3" into "int3" as a size optimization. 3707 // We can't write this as an InstAlias. 3708 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3) 3709 return false; 3710 Inst.clear(); 3711 Inst.setOpcode(X86::INT3); 3712 return true; 3713 } 3714 } 3715 } 3716 3717 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { 3718 using namespace X86; 3719 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 3720 unsigned Opcode = Inst.getOpcode(); 3721 uint64_t TSFlags = MII.get(Opcode).TSFlags; 3722 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) || 3723 isVFMADDCSH(Opcode)) { 3724 unsigned Dest = Inst.getOperand(0).getReg(); 3725 for (unsigned i = 2; i < Inst.getNumOperands(); i++) 3726 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) 3727 return Warning(Ops[0]->getStartLoc(), "Destination register should be " 3728 "distinct from source registers"); 3729 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) || 3730 isVFMULCSH(Opcode)) { 3731 unsigned Dest = Inst.getOperand(0).getReg(); 3732 // The mask variants have different operand list. Scan from the third 3733 // operand to avoid emitting incorrect warning. 3734 // VFMULCPHZrr Dest, Src1, Src2 3735 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2 3736 // VFMULCPHZrrkz Dest, Mask, Src1, Src2 3737 for (unsigned i = TSFlags & X86II::EVEX_K ? 2 : 1; 3738 i < Inst.getNumOperands(); i++) 3739 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) 3740 return Warning(Ops[0]->getStartLoc(), "Destination register should be " 3741 "distinct from source registers"); 3742 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) || 3743 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) || 3744 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) { 3745 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() - 3746 X86::AddrNumOperands - 1).getReg(); 3747 unsigned Src2Enc = MRI->getEncodingValue(Src2); 3748 if (Src2Enc % 4 != 0) { 3749 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2); 3750 unsigned GroupStart = (Src2Enc / 4) * 4; 3751 unsigned GroupEnd = GroupStart + 3; 3752 return Warning(Ops[0]->getStartLoc(), 3753 "source register '" + RegName + "' implicitly denotes '" + 3754 RegName.take_front(3) + Twine(GroupStart) + "' to '" + 3755 RegName.take_front(3) + Twine(GroupEnd) + 3756 "' source group"); 3757 } 3758 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) || 3759 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) || 3760 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) || 3761 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) { 3762 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX; 3763 if (HasEVEX) { 3764 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 3765 unsigned Index = MRI->getEncodingValue( 3766 Inst.getOperand(4 + X86::AddrIndexReg).getReg()); 3767 if (Dest == Index) 3768 return Warning(Ops[0]->getStartLoc(), "index and destination registers " 3769 "should be distinct"); 3770 } else { 3771 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 3772 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg()); 3773 unsigned Index = MRI->getEncodingValue( 3774 Inst.getOperand(3 + X86::AddrIndexReg).getReg()); 3775 if (Dest == Mask || Dest == Index || Mask == Index) 3776 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination " 3777 "registers should be distinct"); 3778 } 3779 } 3780 3781 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to 3782 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX. 3783 if ((TSFlags & X86II::EncodingMask) == 0) { 3784 MCPhysReg HReg = X86::NoRegister; 3785 bool UsesRex = TSFlags & X86II::REX_W; 3786 unsigned NumOps = Inst.getNumOperands(); 3787 for (unsigned i = 0; i != NumOps; ++i) { 3788 const MCOperand &MO = Inst.getOperand(i); 3789 if (!MO.isReg()) 3790 continue; 3791 unsigned Reg = MO.getReg(); 3792 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH) 3793 HReg = Reg; 3794 if (X86II::isX86_64NonExtLowByteReg(Reg) || 3795 X86II::isX86_64ExtendedReg(Reg)) 3796 UsesRex = true; 3797 } 3798 3799 if (UsesRex && HReg != X86::NoRegister) { 3800 StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg); 3801 return Error(Ops[0]->getStartLoc(), 3802 "can't encode '" + RegName + "' in an instruction requiring " 3803 "REX prefix"); 3804 } 3805 } 3806 3807 if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) { 3808 const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg); 3809 if (!MO.isReg() || MO.getReg() != X86::RIP) 3810 return Warning( 3811 Ops[0]->getStartLoc(), 3812 Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'" 3813 : "'prefetchit1'")) + 3814 " only supports RIP-relative address"); 3815 } 3816 return false; 3817 } 3818 3819 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) { 3820 Warning(Loc, "Instruction may be vulnerable to LVI and " 3821 "requires manual mitigation"); 3822 Note(SMLoc(), "See https://software.intel.com/" 3823 "security-software-guidance/insights/" 3824 "deep-dive-load-value-injection#specialinstructions" 3825 " for more information"); 3826 } 3827 3828 /// RET instructions and also instructions that indirect calls/jumps from memory 3829 /// combine a load and a branch within a single instruction. To mitigate these 3830 /// instructions against LVI, they must be decomposed into separate load and 3831 /// branch instructions, with an LFENCE in between. For more details, see: 3832 /// - X86LoadValueInjectionRetHardening.cpp 3833 /// - X86LoadValueInjectionIndirectThunks.cpp 3834 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection 3835 /// 3836 /// Returns `true` if a mitigation was applied or warning was emitted. 3837 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) { 3838 // Information on control-flow instructions that require manual mitigation can 3839 // be found here: 3840 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions 3841 switch (Inst.getOpcode()) { 3842 case X86::RET16: 3843 case X86::RET32: 3844 case X86::RET64: 3845 case X86::RETI16: 3846 case X86::RETI32: 3847 case X86::RETI64: { 3848 MCInst ShlInst, FenceInst; 3849 bool Parse32 = is32BitMode() || Code16GCC; 3850 unsigned Basereg = 3851 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP); 3852 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 3853 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 3854 /*BaseReg=*/Basereg, /*IndexReg=*/0, 3855 /*Scale=*/1, SMLoc{}, SMLoc{}, 0); 3856 ShlInst.setOpcode(X86::SHL64mi); 3857 ShlMemOp->addMemOperands(ShlInst, 5); 3858 ShlInst.addOperand(MCOperand::createImm(0)); 3859 FenceInst.setOpcode(X86::LFENCE); 3860 Out.emitInstruction(ShlInst, getSTI()); 3861 Out.emitInstruction(FenceInst, getSTI()); 3862 return; 3863 } 3864 case X86::JMP16m: 3865 case X86::JMP32m: 3866 case X86::JMP64m: 3867 case X86::CALL16m: 3868 case X86::CALL32m: 3869 case X86::CALL64m: 3870 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3871 return; 3872 } 3873 } 3874 3875 /// To mitigate LVI, every instruction that performs a load can be followed by 3876 /// an LFENCE instruction to squash any potential mis-speculation. There are 3877 /// some instructions that require additional considerations, and may requre 3878 /// manual mitigation. For more details, see: 3879 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection 3880 /// 3881 /// Returns `true` if a mitigation was applied or warning was emitted. 3882 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst, 3883 MCStreamer &Out) { 3884 auto Opcode = Inst.getOpcode(); 3885 auto Flags = Inst.getFlags(); 3886 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) { 3887 // Information on REP string instructions that require manual mitigation can 3888 // be found here: 3889 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions 3890 switch (Opcode) { 3891 case X86::CMPSB: 3892 case X86::CMPSW: 3893 case X86::CMPSL: 3894 case X86::CMPSQ: 3895 case X86::SCASB: 3896 case X86::SCASW: 3897 case X86::SCASL: 3898 case X86::SCASQ: 3899 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3900 return; 3901 } 3902 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) { 3903 // If a REP instruction is found on its own line, it may or may not be 3904 // followed by a vulnerable instruction. Emit a warning just in case. 3905 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3906 return; 3907 } 3908 3909 const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); 3910 3911 // Can't mitigate after terminators or calls. A control flow change may have 3912 // already occurred. 3913 if (MCID.isTerminator() || MCID.isCall()) 3914 return; 3915 3916 // LFENCE has the mayLoad property, don't double fence. 3917 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) { 3918 MCInst FenceInst; 3919 FenceInst.setOpcode(X86::LFENCE); 3920 Out.emitInstruction(FenceInst, getSTI()); 3921 } 3922 } 3923 3924 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands, 3925 MCStreamer &Out) { 3926 if (LVIInlineAsmHardening && 3927 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity)) 3928 applyLVICFIMitigation(Inst, Out); 3929 3930 Out.emitInstruction(Inst, getSTI()); 3931 3932 if (LVIInlineAsmHardening && 3933 getSTI().hasFeature(X86::FeatureLVILoadHardening)) 3934 applyLVILoadHardeningMitigation(Inst, Out); 3935 } 3936 3937 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3938 OperandVector &Operands, 3939 MCStreamer &Out, uint64_t &ErrorInfo, 3940 bool MatchingInlineAsm) { 3941 if (isParsingIntelSyntax()) 3942 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3943 MatchingInlineAsm); 3944 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3945 MatchingInlineAsm); 3946 } 3947 3948 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, 3949 OperandVector &Operands, MCStreamer &Out, 3950 bool MatchingInlineAsm) { 3951 // FIXME: This should be replaced with a real .td file alias mechanism. 3952 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 3953 // call. 3954 const char *Repl = StringSwitch<const char *>(Op.getToken()) 3955 .Case("finit", "fninit") 3956 .Case("fsave", "fnsave") 3957 .Case("fstcw", "fnstcw") 3958 .Case("fstcww", "fnstcw") 3959 .Case("fstenv", "fnstenv") 3960 .Case("fstsw", "fnstsw") 3961 .Case("fstsww", "fnstsw") 3962 .Case("fclex", "fnclex") 3963 .Default(nullptr); 3964 if (Repl) { 3965 MCInst Inst; 3966 Inst.setOpcode(X86::WAIT); 3967 Inst.setLoc(IDLoc); 3968 if (!MatchingInlineAsm) 3969 emitInstruction(Inst, Operands, Out); 3970 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 3971 } 3972 } 3973 3974 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, 3975 const FeatureBitset &MissingFeatures, 3976 bool MatchingInlineAsm) { 3977 assert(MissingFeatures.any() && "Unknown missing feature!"); 3978 SmallString<126> Msg; 3979 raw_svector_ostream OS(Msg); 3980 OS << "instruction requires:"; 3981 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) { 3982 if (MissingFeatures[i]) 3983 OS << ' ' << getSubtargetFeatureName(i); 3984 } 3985 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); 3986 } 3987 3988 static unsigned getPrefixes(OperandVector &Operands) { 3989 unsigned Result = 0; 3990 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back()); 3991 if (Prefix.isPrefix()) { 3992 Result = Prefix.getPrefix(); 3993 Operands.pop_back(); 3994 } 3995 return Result; 3996 } 3997 3998 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3999 unsigned Opc = Inst.getOpcode(); 4000 const MCInstrDesc &MCID = MII.get(Opc); 4001 4002 if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID)) 4003 return Match_Unsupported; 4004 if (ForcedNoFlag != !!(MCID.TSFlags & X86II::EVEX_NF)) 4005 return Match_Unsupported; 4006 4007 if (ForcedVEXEncoding == VEXEncoding_EVEX && 4008 (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX) 4009 return Match_Unsupported; 4010 4011 if ((ForcedVEXEncoding == VEXEncoding_VEX || 4012 ForcedVEXEncoding == VEXEncoding_VEX2 || 4013 ForcedVEXEncoding == VEXEncoding_VEX3) && 4014 (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX) 4015 return Match_Unsupported; 4016 4017 if ((MCID.TSFlags & X86II::ExplicitOpPrefixMask) == 4018 X86II::ExplicitVEXPrefix && 4019 (ForcedVEXEncoding != VEXEncoding_VEX && 4020 ForcedVEXEncoding != VEXEncoding_VEX2 && 4021 ForcedVEXEncoding != VEXEncoding_VEX3)) 4022 return Match_Unsupported; 4023 4024 return Match_Success; 4025 } 4026 4027 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 4028 OperandVector &Operands, 4029 MCStreamer &Out, 4030 uint64_t &ErrorInfo, 4031 bool MatchingInlineAsm) { 4032 assert(!Operands.empty() && "Unexpect empty operand list!"); 4033 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 4034 SMRange EmptyRange = std::nullopt; 4035 4036 // First, handle aliases that expand to multiple instructions. 4037 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, 4038 Out, MatchingInlineAsm); 4039 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 4040 unsigned Prefixes = getPrefixes(Operands); 4041 4042 MCInst Inst; 4043 4044 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the 4045 // encoder and printer. 4046 if (ForcedVEXEncoding == VEXEncoding_VEX) 4047 Prefixes |= X86::IP_USE_VEX; 4048 else if (ForcedVEXEncoding == VEXEncoding_VEX2) 4049 Prefixes |= X86::IP_USE_VEX2; 4050 else if (ForcedVEXEncoding == VEXEncoding_VEX3) 4051 Prefixes |= X86::IP_USE_VEX3; 4052 else if (ForcedVEXEncoding == VEXEncoding_EVEX) 4053 Prefixes |= X86::IP_USE_EVEX; 4054 4055 // Set encoded flags for {disp8} and {disp32}. 4056 if (ForcedDispEncoding == DispEncoding_Disp8) 4057 Prefixes |= X86::IP_USE_DISP8; 4058 else if (ForcedDispEncoding == DispEncoding_Disp32) 4059 Prefixes |= X86::IP_USE_DISP32; 4060 4061 if (Prefixes) 4062 Inst.setFlags(Prefixes); 4063 4064 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode 4065 // when matching the instruction. 4066 if (ForcedDataPrefix == X86::Is32Bit) 4067 SwitchMode(X86::Is32Bit); 4068 // First, try a direct match. 4069 FeatureBitset MissingFeatures; 4070 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo, 4071 MissingFeatures, MatchingInlineAsm, 4072 isParsingIntelSyntax()); 4073 if (ForcedDataPrefix == X86::Is32Bit) { 4074 SwitchMode(X86::Is16Bit); 4075 ForcedDataPrefix = 0; 4076 } 4077 switch (OriginalError) { 4078 default: llvm_unreachable("Unexpected match result!"); 4079 case Match_Success: 4080 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4081 return true; 4082 // Some instructions need post-processing to, for example, tweak which 4083 // encoding is selected. Loop on it while changes happen so the 4084 // individual transformations can chain off each other. 4085 if (!MatchingInlineAsm) 4086 while (processInstruction(Inst, Operands)) 4087 ; 4088 4089 Inst.setLoc(IDLoc); 4090 if (!MatchingInlineAsm) 4091 emitInstruction(Inst, Operands, Out); 4092 Opcode = Inst.getOpcode(); 4093 return false; 4094 case Match_InvalidImmUnsignedi4: { 4095 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 4096 if (ErrorLoc == SMLoc()) 4097 ErrorLoc = IDLoc; 4098 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 4099 EmptyRange, MatchingInlineAsm); 4100 } 4101 case Match_MissingFeature: 4102 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm); 4103 case Match_InvalidOperand: 4104 case Match_MnemonicFail: 4105 case Match_Unsupported: 4106 break; 4107 } 4108 if (Op.getToken().empty()) { 4109 Error(IDLoc, "instruction must have size higher than 0", EmptyRange, 4110 MatchingInlineAsm); 4111 return true; 4112 } 4113 4114 // FIXME: Ideally, we would only attempt suffix matches for things which are 4115 // valid prefixes, and we could just infer the right unambiguous 4116 // type. However, that requires substantially more matcher support than the 4117 // following hack. 4118 4119 // Change the operand to point to a temporary token. 4120 StringRef Base = Op.getToken(); 4121 SmallString<16> Tmp; 4122 Tmp += Base; 4123 Tmp += ' '; 4124 Op.setTokenValue(Tmp); 4125 4126 // If this instruction starts with an 'f', then it is a floating point stack 4127 // instruction. These come in up to three forms for 32-bit, 64-bit, and 4128 // 80-bit floating point, which use the suffixes s,l,t respectively. 4129 // 4130 // Otherwise, we assume that this may be an integer instruction, which comes 4131 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 4132 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 4133 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 } 4134 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0"; 4135 4136 // Check for the various suffix matches. 4137 uint64_t ErrorInfoIgnore; 4138 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings. 4139 unsigned Match[4]; 4140 4141 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one. 4142 // So we should make sure the suffix matcher only works for memory variant 4143 // that has the same size with the suffix. 4144 // FIXME: This flag is a workaround for legacy instructions that didn't 4145 // declare non suffix variant assembly. 4146 bool HasVectorReg = false; 4147 X86Operand *MemOp = nullptr; 4148 for (const auto &Op : Operands) { 4149 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 4150 if (X86Op->isVectorReg()) 4151 HasVectorReg = true; 4152 else if (X86Op->isMem()) { 4153 MemOp = X86Op; 4154 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax"); 4155 // Have we found an unqualified memory operand, 4156 // break. IA allows only one memory operand. 4157 break; 4158 } 4159 } 4160 4161 for (unsigned I = 0, E = std::size(Match); I != E; ++I) { 4162 Tmp.back() = Suffixes[I]; 4163 if (MemOp && HasVectorReg) 4164 MemOp->Mem.Size = MemSize[I]; 4165 Match[I] = Match_MnemonicFail; 4166 if (MemOp || !HasVectorReg) { 4167 Match[I] = 4168 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures, 4169 MatchingInlineAsm, isParsingIntelSyntax()); 4170 // If this returned as a missing feature failure, remember that. 4171 if (Match[I] == Match_MissingFeature) 4172 ErrorInfoMissingFeatures = MissingFeatures; 4173 } 4174 } 4175 4176 // Restore the old token. 4177 Op.setTokenValue(Base); 4178 4179 // If exactly one matched, then we treat that as a successful match (and the 4180 // instruction will already have been filled in correctly, since the failing 4181 // matches won't have modified it). 4182 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success); 4183 if (NumSuccessfulMatches == 1) { 4184 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4185 return true; 4186 // Some instructions need post-processing to, for example, tweak which 4187 // encoding is selected. Loop on it while changes happen so the 4188 // individual transformations can chain off each other. 4189 if (!MatchingInlineAsm) 4190 while (processInstruction(Inst, Operands)) 4191 ; 4192 4193 Inst.setLoc(IDLoc); 4194 if (!MatchingInlineAsm) 4195 emitInstruction(Inst, Operands, Out); 4196 Opcode = Inst.getOpcode(); 4197 return false; 4198 } 4199 4200 // Otherwise, the match failed, try to produce a decent error message. 4201 4202 // If we had multiple suffix matches, then identify this as an ambiguous 4203 // match. 4204 if (NumSuccessfulMatches > 1) { 4205 char MatchChars[4]; 4206 unsigned NumMatches = 0; 4207 for (unsigned I = 0, E = std::size(Match); I != E; ++I) 4208 if (Match[I] == Match_Success) 4209 MatchChars[NumMatches++] = Suffixes[I]; 4210 4211 SmallString<126> Msg; 4212 raw_svector_ostream OS(Msg); 4213 OS << "ambiguous instructions require an explicit suffix (could be "; 4214 for (unsigned i = 0; i != NumMatches; ++i) { 4215 if (i != 0) 4216 OS << ", "; 4217 if (i + 1 == NumMatches) 4218 OS << "or "; 4219 OS << "'" << Base << MatchChars[i] << "'"; 4220 } 4221 OS << ")"; 4222 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm); 4223 return true; 4224 } 4225 4226 // Okay, we know that none of the variants matched successfully. 4227 4228 // If all of the instructions reported an invalid mnemonic, then the original 4229 // mnemonic was invalid. 4230 if (llvm::count(Match, Match_MnemonicFail) == 4) { 4231 if (OriginalError == Match_MnemonicFail) 4232 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 4233 Op.getLocRange(), MatchingInlineAsm); 4234 4235 if (OriginalError == Match_Unsupported) 4236 return Error(IDLoc, "unsupported instruction", EmptyRange, 4237 MatchingInlineAsm); 4238 4239 assert(OriginalError == Match_InvalidOperand && "Unexpected error"); 4240 // Recover location info for the operand if we know which was the problem. 4241 if (ErrorInfo != ~0ULL) { 4242 if (ErrorInfo >= Operands.size()) 4243 return Error(IDLoc, "too few operands for instruction", EmptyRange, 4244 MatchingInlineAsm); 4245 4246 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; 4247 if (Operand.getStartLoc().isValid()) { 4248 SMRange OperandRange = Operand.getLocRange(); 4249 return Error(Operand.getStartLoc(), "invalid operand for instruction", 4250 OperandRange, MatchingInlineAsm); 4251 } 4252 } 4253 4254 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4255 MatchingInlineAsm); 4256 } 4257 4258 // If one instruction matched as unsupported, report this as unsupported. 4259 if (llvm::count(Match, Match_Unsupported) == 1) { 4260 return Error(IDLoc, "unsupported instruction", EmptyRange, 4261 MatchingInlineAsm); 4262 } 4263 4264 // If one instruction matched with a missing feature, report this as a 4265 // missing feature. 4266 if (llvm::count(Match, Match_MissingFeature) == 1) { 4267 ErrorInfo = Match_MissingFeature; 4268 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 4269 MatchingInlineAsm); 4270 } 4271 4272 // If one instruction matched with an invalid operand, report this as an 4273 // operand failure. 4274 if (llvm::count(Match, Match_InvalidOperand) == 1) { 4275 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4276 MatchingInlineAsm); 4277 } 4278 4279 // If all of these were an outright failure, report it in a useless way. 4280 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 4281 EmptyRange, MatchingInlineAsm); 4282 return true; 4283 } 4284 4285 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 4286 OperandVector &Operands, 4287 MCStreamer &Out, 4288 uint64_t &ErrorInfo, 4289 bool MatchingInlineAsm) { 4290 assert(!Operands.empty() && "Unexpect empty operand list!"); 4291 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 4292 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken(); 4293 SMRange EmptyRange = std::nullopt; 4294 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken(); 4295 unsigned Prefixes = getPrefixes(Operands); 4296 4297 // First, handle aliases that expand to multiple instructions. 4298 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm); 4299 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 4300 4301 MCInst Inst; 4302 4303 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the 4304 // encoder and printer. 4305 if (ForcedVEXEncoding == VEXEncoding_VEX) 4306 Prefixes |= X86::IP_USE_VEX; 4307 else if (ForcedVEXEncoding == VEXEncoding_VEX2) 4308 Prefixes |= X86::IP_USE_VEX2; 4309 else if (ForcedVEXEncoding == VEXEncoding_VEX3) 4310 Prefixes |= X86::IP_USE_VEX3; 4311 else if (ForcedVEXEncoding == VEXEncoding_EVEX) 4312 Prefixes |= X86::IP_USE_EVEX; 4313 4314 // Set encoded flags for {disp8} and {disp32}. 4315 if (ForcedDispEncoding == DispEncoding_Disp8) 4316 Prefixes |= X86::IP_USE_DISP8; 4317 else if (ForcedDispEncoding == DispEncoding_Disp32) 4318 Prefixes |= X86::IP_USE_DISP32; 4319 4320 if (Prefixes) 4321 Inst.setFlags(Prefixes); 4322 4323 // Find one unsized memory operand, if present. 4324 X86Operand *UnsizedMemOp = nullptr; 4325 for (const auto &Op : Operands) { 4326 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 4327 if (X86Op->isMemUnsized()) { 4328 UnsizedMemOp = X86Op; 4329 // Have we found an unqualified memory operand, 4330 // break. IA allows only one memory operand. 4331 break; 4332 } 4333 } 4334 4335 // Allow some instructions to have implicitly pointer-sized operands. This is 4336 // compatible with gas. 4337 if (UnsizedMemOp) { 4338 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; 4339 for (const char *Instr : PtrSizedInstrs) { 4340 if (Mnemonic == Instr) { 4341 UnsizedMemOp->Mem.Size = getPointerWidth(); 4342 break; 4343 } 4344 } 4345 } 4346 4347 SmallVector<unsigned, 8> Match; 4348 FeatureBitset ErrorInfoMissingFeatures; 4349 FeatureBitset MissingFeatures; 4350 4351 // If unsized push has immediate operand we should default the default pointer 4352 // size for the size. 4353 if (Mnemonic == "push" && Operands.size() == 2) { 4354 auto *X86Op = static_cast<X86Operand *>(Operands[1].get()); 4355 if (X86Op->isImm()) { 4356 // If it's not a constant fall through and let remainder take care of it. 4357 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm()); 4358 unsigned Size = getPointerWidth(); 4359 if (CE && 4360 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) { 4361 SmallString<16> Tmp; 4362 Tmp += Base; 4363 Tmp += (is64BitMode()) 4364 ? "q" 4365 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " "; 4366 Op.setTokenValue(Tmp); 4367 // Do match in ATT mode to allow explicit suffix usage. 4368 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo, 4369 MissingFeatures, MatchingInlineAsm, 4370 false /*isParsingIntelSyntax()*/)); 4371 Op.setTokenValue(Base); 4372 } 4373 } 4374 } 4375 4376 // If an unsized memory operand is present, try to match with each memory 4377 // operand size. In Intel assembly, the size is not part of the instruction 4378 // mnemonic. 4379 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { 4380 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512}; 4381 for (unsigned Size : MopSizes) { 4382 UnsizedMemOp->Mem.Size = Size; 4383 uint64_t ErrorInfoIgnore; 4384 unsigned LastOpcode = Inst.getOpcode(); 4385 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore, 4386 MissingFeatures, MatchingInlineAsm, 4387 isParsingIntelSyntax()); 4388 if (Match.empty() || LastOpcode != Inst.getOpcode()) 4389 Match.push_back(M); 4390 4391 // If this returned as a missing feature failure, remember that. 4392 if (Match.back() == Match_MissingFeature) 4393 ErrorInfoMissingFeatures = MissingFeatures; 4394 } 4395 4396 // Restore the size of the unsized memory operand if we modified it. 4397 UnsizedMemOp->Mem.Size = 0; 4398 } 4399 4400 // If we haven't matched anything yet, this is not a basic integer or FPU 4401 // operation. There shouldn't be any ambiguity in our mnemonic table, so try 4402 // matching with the unsized operand. 4403 if (Match.empty()) { 4404 Match.push_back(MatchInstruction( 4405 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 4406 isParsingIntelSyntax())); 4407 // If this returned as a missing feature failure, remember that. 4408 if (Match.back() == Match_MissingFeature) 4409 ErrorInfoMissingFeatures = MissingFeatures; 4410 } 4411 4412 // Restore the size of the unsized memory operand if we modified it. 4413 if (UnsizedMemOp) 4414 UnsizedMemOp->Mem.Size = 0; 4415 4416 // If it's a bad mnemonic, all results will be the same. 4417 if (Match.back() == Match_MnemonicFail) { 4418 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'", 4419 Op.getLocRange(), MatchingInlineAsm); 4420 } 4421 4422 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success); 4423 4424 // If matching was ambiguous and we had size information from the frontend, 4425 // try again with that. This handles cases like "movxz eax, m8/m16". 4426 if (UnsizedMemOp && NumSuccessfulMatches > 1 && 4427 UnsizedMemOp->getMemFrontendSize()) { 4428 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize(); 4429 unsigned M = MatchInstruction( 4430 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 4431 isParsingIntelSyntax()); 4432 if (M == Match_Success) 4433 NumSuccessfulMatches = 1; 4434 4435 // Add a rewrite that encodes the size information we used from the 4436 // frontend. 4437 InstInfo->AsmRewrites->emplace_back( 4438 AOK_SizeDirective, UnsizedMemOp->getStartLoc(), 4439 /*Len=*/0, UnsizedMemOp->getMemFrontendSize()); 4440 } 4441 4442 // If exactly one matched, then we treat that as a successful match (and the 4443 // instruction will already have been filled in correctly, since the failing 4444 // matches won't have modified it). 4445 if (NumSuccessfulMatches == 1) { 4446 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4447 return true; 4448 // Some instructions need post-processing to, for example, tweak which 4449 // encoding is selected. Loop on it while changes happen so the individual 4450 // transformations can chain off each other. 4451 if (!MatchingInlineAsm) 4452 while (processInstruction(Inst, Operands)) 4453 ; 4454 Inst.setLoc(IDLoc); 4455 if (!MatchingInlineAsm) 4456 emitInstruction(Inst, Operands, Out); 4457 Opcode = Inst.getOpcode(); 4458 return false; 4459 } else if (NumSuccessfulMatches > 1) { 4460 assert(UnsizedMemOp && 4461 "multiple matches only possible with unsized memory operands"); 4462 return Error(UnsizedMemOp->getStartLoc(), 4463 "ambiguous operand size for instruction '" + Mnemonic + "\'", 4464 UnsizedMemOp->getLocRange()); 4465 } 4466 4467 // If one instruction matched as unsupported, report this as unsupported. 4468 if (llvm::count(Match, Match_Unsupported) == 1) { 4469 return Error(IDLoc, "unsupported instruction", EmptyRange, 4470 MatchingInlineAsm); 4471 } 4472 4473 // If one instruction matched with a missing feature, report this as a 4474 // missing feature. 4475 if (llvm::count(Match, Match_MissingFeature) == 1) { 4476 ErrorInfo = Match_MissingFeature; 4477 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 4478 MatchingInlineAsm); 4479 } 4480 4481 // If one instruction matched with an invalid operand, report this as an 4482 // operand failure. 4483 if (llvm::count(Match, Match_InvalidOperand) == 1) { 4484 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4485 MatchingInlineAsm); 4486 } 4487 4488 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) { 4489 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 4490 if (ErrorLoc == SMLoc()) 4491 ErrorLoc = IDLoc; 4492 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 4493 EmptyRange, MatchingInlineAsm); 4494 } 4495 4496 // If all of these were an outright failure, report it in a useless way. 4497 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange, 4498 MatchingInlineAsm); 4499 } 4500 4501 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { 4502 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); 4503 } 4504 4505 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 4506 MCAsmParser &Parser = getParser(); 4507 StringRef IDVal = DirectiveID.getIdentifier(); 4508 if (IDVal.starts_with(".arch")) 4509 return parseDirectiveArch(); 4510 if (IDVal.starts_with(".code")) 4511 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 4512 else if (IDVal.starts_with(".att_syntax")) { 4513 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4514 if (Parser.getTok().getString() == "prefix") 4515 Parser.Lex(); 4516 else if (Parser.getTok().getString() == "noprefix") 4517 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not " 4518 "supported: registers must have a " 4519 "'%' prefix in .att_syntax"); 4520 } 4521 getParser().setAssemblerDialect(0); 4522 return false; 4523 } else if (IDVal.starts_with(".intel_syntax")) { 4524 getParser().setAssemblerDialect(1); 4525 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4526 if (Parser.getTok().getString() == "noprefix") 4527 Parser.Lex(); 4528 else if (Parser.getTok().getString() == "prefix") 4529 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not " 4530 "supported: registers must not have " 4531 "a '%' prefix in .intel_syntax"); 4532 } 4533 return false; 4534 } else if (IDVal == ".nops") 4535 return parseDirectiveNops(DirectiveID.getLoc()); 4536 else if (IDVal == ".even") 4537 return parseDirectiveEven(DirectiveID.getLoc()); 4538 else if (IDVal == ".cv_fpo_proc") 4539 return parseDirectiveFPOProc(DirectiveID.getLoc()); 4540 else if (IDVal == ".cv_fpo_setframe") 4541 return parseDirectiveFPOSetFrame(DirectiveID.getLoc()); 4542 else if (IDVal == ".cv_fpo_pushreg") 4543 return parseDirectiveFPOPushReg(DirectiveID.getLoc()); 4544 else if (IDVal == ".cv_fpo_stackalloc") 4545 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc()); 4546 else if (IDVal == ".cv_fpo_stackalign") 4547 return parseDirectiveFPOStackAlign(DirectiveID.getLoc()); 4548 else if (IDVal == ".cv_fpo_endprologue") 4549 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); 4550 else if (IDVal == ".cv_fpo_endproc") 4551 return parseDirectiveFPOEndProc(DirectiveID.getLoc()); 4552 else if (IDVal == ".seh_pushreg" || 4553 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg"))) 4554 return parseDirectiveSEHPushReg(DirectiveID.getLoc()); 4555 else if (IDVal == ".seh_setframe" || 4556 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe"))) 4557 return parseDirectiveSEHSetFrame(DirectiveID.getLoc()); 4558 else if (IDVal == ".seh_savereg" || 4559 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg"))) 4560 return parseDirectiveSEHSaveReg(DirectiveID.getLoc()); 4561 else if (IDVal == ".seh_savexmm" || 4562 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128"))) 4563 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc()); 4564 else if (IDVal == ".seh_pushframe" || 4565 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe"))) 4566 return parseDirectiveSEHPushFrame(DirectiveID.getLoc()); 4567 4568 return true; 4569 } 4570 4571 bool X86AsmParser::parseDirectiveArch() { 4572 // Ignore .arch for now. 4573 getParser().parseStringToEndOfStatement(); 4574 return false; 4575 } 4576 4577 /// parseDirectiveNops 4578 /// ::= .nops size[, control] 4579 bool X86AsmParser::parseDirectiveNops(SMLoc L) { 4580 int64_t NumBytes = 0, Control = 0; 4581 SMLoc NumBytesLoc, ControlLoc; 4582 const MCSubtargetInfo& STI = getSTI(); 4583 NumBytesLoc = getTok().getLoc(); 4584 if (getParser().checkForValidSection() || 4585 getParser().parseAbsoluteExpression(NumBytes)) 4586 return true; 4587 4588 if (parseOptionalToken(AsmToken::Comma)) { 4589 ControlLoc = getTok().getLoc(); 4590 if (getParser().parseAbsoluteExpression(Control)) 4591 return true; 4592 } 4593 if (getParser().parseEOL()) 4594 return true; 4595 4596 if (NumBytes <= 0) { 4597 Error(NumBytesLoc, "'.nops' directive with non-positive size"); 4598 return false; 4599 } 4600 4601 if (Control < 0) { 4602 Error(ControlLoc, "'.nops' directive with negative NOP size"); 4603 return false; 4604 } 4605 4606 /// Emit nops 4607 getParser().getStreamer().emitNops(NumBytes, Control, L, STI); 4608 4609 return false; 4610 } 4611 4612 /// parseDirectiveEven 4613 /// ::= .even 4614 bool X86AsmParser::parseDirectiveEven(SMLoc L) { 4615 if (parseEOL()) 4616 return false; 4617 4618 const MCSection *Section = getStreamer().getCurrentSectionOnly(); 4619 if (!Section) { 4620 getStreamer().initSections(false, getSTI()); 4621 Section = getStreamer().getCurrentSectionOnly(); 4622 } 4623 if (Section->useCodeAlign()) 4624 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0); 4625 else 4626 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0); 4627 return false; 4628 } 4629 4630 /// ParseDirectiveCode 4631 /// ::= .code16 | .code32 | .code64 4632 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 4633 MCAsmParser &Parser = getParser(); 4634 Code16GCC = false; 4635 if (IDVal == ".code16") { 4636 Parser.Lex(); 4637 if (!is16BitMode()) { 4638 SwitchMode(X86::Is16Bit); 4639 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); 4640 } 4641 } else if (IDVal == ".code16gcc") { 4642 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode. 4643 Parser.Lex(); 4644 Code16GCC = true; 4645 if (!is16BitMode()) { 4646 SwitchMode(X86::Is16Bit); 4647 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); 4648 } 4649 } else if (IDVal == ".code32") { 4650 Parser.Lex(); 4651 if (!is32BitMode()) { 4652 SwitchMode(X86::Is32Bit); 4653 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32); 4654 } 4655 } else if (IDVal == ".code64") { 4656 Parser.Lex(); 4657 if (!is64BitMode()) { 4658 SwitchMode(X86::Is64Bit); 4659 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64); 4660 } 4661 } else { 4662 Error(L, "unknown directive " + IDVal); 4663 return false; 4664 } 4665 4666 return false; 4667 } 4668 4669 // .cv_fpo_proc foo 4670 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) { 4671 MCAsmParser &Parser = getParser(); 4672 StringRef ProcName; 4673 int64_t ParamsSize; 4674 if (Parser.parseIdentifier(ProcName)) 4675 return Parser.TokError("expected symbol name"); 4676 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count")) 4677 return true; 4678 if (!isUIntN(32, ParamsSize)) 4679 return Parser.TokError("parameters size out of range"); 4680 if (parseEOL()) 4681 return true; 4682 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); 4683 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L); 4684 } 4685 4686 // .cv_fpo_setframe ebp 4687 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) { 4688 MCRegister Reg; 4689 SMLoc DummyLoc; 4690 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL()) 4691 return true; 4692 return getTargetStreamer().emitFPOSetFrame(Reg, L); 4693 } 4694 4695 // .cv_fpo_pushreg ebx 4696 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) { 4697 MCRegister Reg; 4698 SMLoc DummyLoc; 4699 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL()) 4700 return true; 4701 return getTargetStreamer().emitFPOPushReg(Reg, L); 4702 } 4703 4704 // .cv_fpo_stackalloc 20 4705 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) { 4706 MCAsmParser &Parser = getParser(); 4707 int64_t Offset; 4708 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL()) 4709 return true; 4710 return getTargetStreamer().emitFPOStackAlloc(Offset, L); 4711 } 4712 4713 // .cv_fpo_stackalign 8 4714 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) { 4715 MCAsmParser &Parser = getParser(); 4716 int64_t Offset; 4717 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL()) 4718 return true; 4719 return getTargetStreamer().emitFPOStackAlign(Offset, L); 4720 } 4721 4722 // .cv_fpo_endprologue 4723 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) { 4724 MCAsmParser &Parser = getParser(); 4725 if (Parser.parseEOL()) 4726 return true; 4727 return getTargetStreamer().emitFPOEndPrologue(L); 4728 } 4729 4730 // .cv_fpo_endproc 4731 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) { 4732 MCAsmParser &Parser = getParser(); 4733 if (Parser.parseEOL()) 4734 return true; 4735 return getTargetStreamer().emitFPOEndProc(L); 4736 } 4737 4738 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID, 4739 MCRegister &RegNo) { 4740 SMLoc startLoc = getLexer().getLoc(); 4741 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 4742 4743 // Try parsing the argument as a register first. 4744 if (getLexer().getTok().isNot(AsmToken::Integer)) { 4745 SMLoc endLoc; 4746 if (parseRegister(RegNo, startLoc, endLoc)) 4747 return true; 4748 4749 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) { 4750 return Error(startLoc, 4751 "register is not supported for use with this directive"); 4752 } 4753 } else { 4754 // Otherwise, an integer number matching the encoding of the desired 4755 // register may appear. 4756 int64_t EncodedReg; 4757 if (getParser().parseAbsoluteExpression(EncodedReg)) 4758 return true; 4759 4760 // The SEH register number is the same as the encoding register number. Map 4761 // from the encoding back to the LLVM register number. 4762 RegNo = 0; 4763 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) { 4764 if (MRI->getEncodingValue(Reg) == EncodedReg) { 4765 RegNo = Reg; 4766 break; 4767 } 4768 } 4769 if (RegNo == 0) { 4770 return Error(startLoc, 4771 "incorrect register number for use with this directive"); 4772 } 4773 } 4774 4775 return false; 4776 } 4777 4778 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) { 4779 MCRegister Reg; 4780 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4781 return true; 4782 4783 if (getLexer().isNot(AsmToken::EndOfStatement)) 4784 return TokError("expected end of directive"); 4785 4786 getParser().Lex(); 4787 getStreamer().emitWinCFIPushReg(Reg, Loc); 4788 return false; 4789 } 4790 4791 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) { 4792 MCRegister Reg; 4793 int64_t Off; 4794 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4795 return true; 4796 if (getLexer().isNot(AsmToken::Comma)) 4797 return TokError("you must specify a stack pointer offset"); 4798 4799 getParser().Lex(); 4800 if (getParser().parseAbsoluteExpression(Off)) 4801 return true; 4802 4803 if (getLexer().isNot(AsmToken::EndOfStatement)) 4804 return TokError("expected end of directive"); 4805 4806 getParser().Lex(); 4807 getStreamer().emitWinCFISetFrame(Reg, Off, Loc); 4808 return false; 4809 } 4810 4811 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) { 4812 MCRegister Reg; 4813 int64_t Off; 4814 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4815 return true; 4816 if (getLexer().isNot(AsmToken::Comma)) 4817 return TokError("you must specify an offset on the stack"); 4818 4819 getParser().Lex(); 4820 if (getParser().parseAbsoluteExpression(Off)) 4821 return true; 4822 4823 if (getLexer().isNot(AsmToken::EndOfStatement)) 4824 return TokError("expected end of directive"); 4825 4826 getParser().Lex(); 4827 getStreamer().emitWinCFISaveReg(Reg, Off, Loc); 4828 return false; 4829 } 4830 4831 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) { 4832 MCRegister Reg; 4833 int64_t Off; 4834 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg)) 4835 return true; 4836 if (getLexer().isNot(AsmToken::Comma)) 4837 return TokError("you must specify an offset on the stack"); 4838 4839 getParser().Lex(); 4840 if (getParser().parseAbsoluteExpression(Off)) 4841 return true; 4842 4843 if (getLexer().isNot(AsmToken::EndOfStatement)) 4844 return TokError("expected end of directive"); 4845 4846 getParser().Lex(); 4847 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc); 4848 return false; 4849 } 4850 4851 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) { 4852 bool Code = false; 4853 StringRef CodeID; 4854 if (getLexer().is(AsmToken::At)) { 4855 SMLoc startLoc = getLexer().getLoc(); 4856 getParser().Lex(); 4857 if (!getParser().parseIdentifier(CodeID)) { 4858 if (CodeID != "code") 4859 return Error(startLoc, "expected @code"); 4860 Code = true; 4861 } 4862 } 4863 4864 if (getLexer().isNot(AsmToken::EndOfStatement)) 4865 return TokError("expected end of directive"); 4866 4867 getParser().Lex(); 4868 getStreamer().emitWinCFIPushFrame(Code, Loc); 4869 return false; 4870 } 4871 4872 // Force static initialization. 4873 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser() { 4874 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target()); 4875 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target()); 4876 } 4877 4878 #define GET_MATCHER_IMPLEMENTATION 4879 #include "X86GenAsmMatcher.inc" 4880