1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86EncodingOptimization.h" 11 #include "MCTargetDesc/X86IntelInstPrinter.h" 12 #include "MCTargetDesc/X86MCExpr.h" 13 #include "MCTargetDesc/X86MCTargetDesc.h" 14 #include "MCTargetDesc/X86TargetStreamer.h" 15 #include "TargetInfo/X86TargetInfo.h" 16 #include "X86AsmParserCommon.h" 17 #include "X86Operand.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringSwitch.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCContext.h" 24 #include "llvm/MC/MCExpr.h" 25 #include "llvm/MC/MCInst.h" 26 #include "llvm/MC/MCInstrInfo.h" 27 #include "llvm/MC/MCParser/MCAsmLexer.h" 28 #include "llvm/MC/MCParser/MCAsmParser.h" 29 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 30 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 31 #include "llvm/MC/MCRegisterInfo.h" 32 #include "llvm/MC/MCSection.h" 33 #include "llvm/MC/MCStreamer.h" 34 #include "llvm/MC/MCSubtargetInfo.h" 35 #include "llvm/MC/MCSymbol.h" 36 #include "llvm/MC/TargetRegistry.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/SourceMgr.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <algorithm> 42 #include <memory> 43 44 using namespace llvm; 45 46 static cl::opt<bool> LVIInlineAsmHardening( 47 "x86-experimental-lvi-inline-asm-hardening", 48 cl::desc("Harden inline assembly code that may be vulnerable to Load Value" 49 " Injection (LVI). This feature is experimental."), cl::Hidden); 50 51 static bool checkScale(unsigned Scale, StringRef &ErrMsg) { 52 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { 53 ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; 54 return true; 55 } 56 return false; 57 } 58 59 namespace { 60 61 static const char OpPrecedence[] = { 62 0, // IC_OR 63 1, // IC_XOR 64 2, // IC_AND 65 4, // IC_LSHIFT 66 4, // IC_RSHIFT 67 5, // IC_PLUS 68 5, // IC_MINUS 69 6, // IC_MULTIPLY 70 6, // IC_DIVIDE 71 6, // IC_MOD 72 7, // IC_NOT 73 8, // IC_NEG 74 9, // IC_RPAREN 75 10, // IC_LPAREN 76 0, // IC_IMM 77 0, // IC_REGISTER 78 3, // IC_EQ 79 3, // IC_NE 80 3, // IC_LT 81 3, // IC_LE 82 3, // IC_GT 83 3 // IC_GE 84 }; 85 86 class X86AsmParser : public MCTargetAsmParser { 87 ParseInstructionInfo *InstInfo; 88 bool Code16GCC; 89 unsigned ForcedDataPrefix = 0; 90 91 enum VEXEncoding { 92 VEXEncoding_Default, 93 VEXEncoding_VEX, 94 VEXEncoding_VEX2, 95 VEXEncoding_VEX3, 96 VEXEncoding_EVEX, 97 }; 98 99 VEXEncoding ForcedVEXEncoding = VEXEncoding_Default; 100 101 enum DispEncoding { 102 DispEncoding_Default, 103 DispEncoding_Disp8, 104 DispEncoding_Disp32, 105 }; 106 107 DispEncoding ForcedDispEncoding = DispEncoding_Default; 108 109 private: 110 SMLoc consumeToken() { 111 MCAsmParser &Parser = getParser(); 112 SMLoc Result = Parser.getTok().getLoc(); 113 Parser.Lex(); 114 return Result; 115 } 116 117 X86TargetStreamer &getTargetStreamer() { 118 assert(getParser().getStreamer().getTargetStreamer() && 119 "do not have a target streamer"); 120 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 121 return static_cast<X86TargetStreamer &>(TS); 122 } 123 124 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst, 125 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures, 126 bool matchingInlineAsm, unsigned VariantID = 0) { 127 // In Code16GCC mode, match as 32-bit. 128 if (Code16GCC) 129 SwitchMode(X86::Is32Bit); 130 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo, 131 MissingFeatures, matchingInlineAsm, 132 VariantID); 133 if (Code16GCC) 134 SwitchMode(X86::Is16Bit); 135 return rv; 136 } 137 138 enum InfixCalculatorTok { 139 IC_OR = 0, 140 IC_XOR, 141 IC_AND, 142 IC_LSHIFT, 143 IC_RSHIFT, 144 IC_PLUS, 145 IC_MINUS, 146 IC_MULTIPLY, 147 IC_DIVIDE, 148 IC_MOD, 149 IC_NOT, 150 IC_NEG, 151 IC_RPAREN, 152 IC_LPAREN, 153 IC_IMM, 154 IC_REGISTER, 155 IC_EQ, 156 IC_NE, 157 IC_LT, 158 IC_LE, 159 IC_GT, 160 IC_GE 161 }; 162 163 enum IntelOperatorKind { 164 IOK_INVALID = 0, 165 IOK_LENGTH, 166 IOK_SIZE, 167 IOK_TYPE, 168 }; 169 170 enum MasmOperatorKind { 171 MOK_INVALID = 0, 172 MOK_LENGTHOF, 173 MOK_SIZEOF, 174 MOK_TYPE, 175 }; 176 177 class InfixCalculator { 178 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 179 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 180 SmallVector<ICToken, 4> PostfixStack; 181 182 bool isUnaryOperator(InfixCalculatorTok Op) const { 183 return Op == IC_NEG || Op == IC_NOT; 184 } 185 186 public: 187 int64_t popOperand() { 188 assert (!PostfixStack.empty() && "Poped an empty stack!"); 189 ICToken Op = PostfixStack.pop_back_val(); 190 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER)) 191 return -1; // The invalid Scale value will be caught later by checkScale 192 return Op.second; 193 } 194 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 195 assert ((Op == IC_IMM || Op == IC_REGISTER) && 196 "Unexpected operand!"); 197 PostfixStack.push_back(std::make_pair(Op, Val)); 198 } 199 200 void popOperator() { InfixOperatorStack.pop_back(); } 201 void pushOperator(InfixCalculatorTok Op) { 202 // Push the new operator if the stack is empty. 203 if (InfixOperatorStack.empty()) { 204 InfixOperatorStack.push_back(Op); 205 return; 206 } 207 208 // Push the new operator if it has a higher precedence than the operator 209 // on the top of the stack or the operator on the top of the stack is a 210 // left parentheses. 211 unsigned Idx = InfixOperatorStack.size() - 1; 212 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 213 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 214 InfixOperatorStack.push_back(Op); 215 return; 216 } 217 218 // The operator on the top of the stack has higher precedence than the 219 // new operator. 220 unsigned ParenCount = 0; 221 while (true) { 222 // Nothing to process. 223 if (InfixOperatorStack.empty()) 224 break; 225 226 Idx = InfixOperatorStack.size() - 1; 227 StackOp = InfixOperatorStack[Idx]; 228 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 229 break; 230 231 // If we have an even parentheses count and we see a left parentheses, 232 // then stop processing. 233 if (!ParenCount && StackOp == IC_LPAREN) 234 break; 235 236 if (StackOp == IC_RPAREN) { 237 ++ParenCount; 238 InfixOperatorStack.pop_back(); 239 } else if (StackOp == IC_LPAREN) { 240 --ParenCount; 241 InfixOperatorStack.pop_back(); 242 } else { 243 InfixOperatorStack.pop_back(); 244 PostfixStack.push_back(std::make_pair(StackOp, 0)); 245 } 246 } 247 // Push the new operator. 248 InfixOperatorStack.push_back(Op); 249 } 250 251 int64_t execute() { 252 // Push any remaining operators onto the postfix stack. 253 while (!InfixOperatorStack.empty()) { 254 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 255 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 256 PostfixStack.push_back(std::make_pair(StackOp, 0)); 257 } 258 259 if (PostfixStack.empty()) 260 return 0; 261 262 SmallVector<ICToken, 16> OperandStack; 263 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) { 264 ICToken Op = PostfixStack[i]; 265 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 266 OperandStack.push_back(Op); 267 } else if (isUnaryOperator(Op.first)) { 268 assert (OperandStack.size() > 0 && "Too few operands."); 269 ICToken Operand = OperandStack.pop_back_val(); 270 assert (Operand.first == IC_IMM && 271 "Unary operation with a register!"); 272 switch (Op.first) { 273 default: 274 report_fatal_error("Unexpected operator!"); 275 break; 276 case IC_NEG: 277 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second)); 278 break; 279 case IC_NOT: 280 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second)); 281 break; 282 } 283 } else { 284 assert (OperandStack.size() > 1 && "Too few operands."); 285 int64_t Val; 286 ICToken Op2 = OperandStack.pop_back_val(); 287 ICToken Op1 = OperandStack.pop_back_val(); 288 switch (Op.first) { 289 default: 290 report_fatal_error("Unexpected operator!"); 291 break; 292 case IC_PLUS: 293 Val = Op1.second + Op2.second; 294 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 295 break; 296 case IC_MINUS: 297 Val = Op1.second - Op2.second; 298 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 299 break; 300 case IC_MULTIPLY: 301 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 302 "Multiply operation with an immediate and a register!"); 303 Val = Op1.second * Op2.second; 304 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 305 break; 306 case IC_DIVIDE: 307 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 308 "Divide operation with an immediate and a register!"); 309 assert (Op2.second != 0 && "Division by zero!"); 310 Val = Op1.second / Op2.second; 311 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 312 break; 313 case IC_MOD: 314 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 315 "Modulo operation with an immediate and a register!"); 316 Val = Op1.second % Op2.second; 317 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 318 break; 319 case IC_OR: 320 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 321 "Or operation with an immediate and a register!"); 322 Val = Op1.second | Op2.second; 323 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 324 break; 325 case IC_XOR: 326 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 327 "Xor operation with an immediate and a register!"); 328 Val = Op1.second ^ Op2.second; 329 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 330 break; 331 case IC_AND: 332 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 333 "And operation with an immediate and a register!"); 334 Val = Op1.second & Op2.second; 335 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 336 break; 337 case IC_LSHIFT: 338 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 339 "Left shift operation with an immediate and a register!"); 340 Val = Op1.second << Op2.second; 341 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 342 break; 343 case IC_RSHIFT: 344 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 345 "Right shift operation with an immediate and a register!"); 346 Val = Op1.second >> Op2.second; 347 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 348 break; 349 case IC_EQ: 350 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 351 "Equals operation with an immediate and a register!"); 352 Val = (Op1.second == Op2.second) ? -1 : 0; 353 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 354 break; 355 case IC_NE: 356 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 357 "Not-equals operation with an immediate and a register!"); 358 Val = (Op1.second != Op2.second) ? -1 : 0; 359 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 360 break; 361 case IC_LT: 362 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 363 "Less-than operation with an immediate and a register!"); 364 Val = (Op1.second < Op2.second) ? -1 : 0; 365 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 366 break; 367 case IC_LE: 368 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 369 "Less-than-or-equal operation with an immediate and a " 370 "register!"); 371 Val = (Op1.second <= Op2.second) ? -1 : 0; 372 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 373 break; 374 case IC_GT: 375 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 376 "Greater-than operation with an immediate and a register!"); 377 Val = (Op1.second > Op2.second) ? -1 : 0; 378 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 379 break; 380 case IC_GE: 381 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 382 "Greater-than-or-equal operation with an immediate and a " 383 "register!"); 384 Val = (Op1.second >= Op2.second) ? -1 : 0; 385 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 386 break; 387 } 388 } 389 } 390 assert (OperandStack.size() == 1 && "Expected a single result."); 391 return OperandStack.pop_back_val().second; 392 } 393 }; 394 395 enum IntelExprState { 396 IES_INIT, 397 IES_OR, 398 IES_XOR, 399 IES_AND, 400 IES_EQ, 401 IES_NE, 402 IES_LT, 403 IES_LE, 404 IES_GT, 405 IES_GE, 406 IES_LSHIFT, 407 IES_RSHIFT, 408 IES_PLUS, 409 IES_MINUS, 410 IES_OFFSET, 411 IES_CAST, 412 IES_NOT, 413 IES_MULTIPLY, 414 IES_DIVIDE, 415 IES_MOD, 416 IES_LBRAC, 417 IES_RBRAC, 418 IES_LPAREN, 419 IES_RPAREN, 420 IES_REGISTER, 421 IES_INTEGER, 422 IES_IDENTIFIER, 423 IES_ERROR 424 }; 425 426 class IntelExprStateMachine { 427 IntelExprState State = IES_INIT, PrevState = IES_ERROR; 428 unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0; 429 int64_t Imm = 0; 430 const MCExpr *Sym = nullptr; 431 StringRef SymName; 432 InfixCalculator IC; 433 InlineAsmIdentifierInfo Info; 434 short BracCount = 0; 435 bool MemExpr = false; 436 bool BracketUsed = false; 437 bool OffsetOperator = false; 438 bool AttachToOperandIdx = false; 439 bool IsPIC = false; 440 SMLoc OffsetOperatorLoc; 441 AsmTypeInfo CurType; 442 443 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { 444 if (Sym) { 445 ErrMsg = "cannot use more than one symbol in memory operand"; 446 return true; 447 } 448 Sym = Val; 449 SymName = ID; 450 return false; 451 } 452 453 public: 454 IntelExprStateMachine() = default; 455 456 void addImm(int64_t imm) { Imm += imm; } 457 short getBracCount() const { return BracCount; } 458 bool isMemExpr() const { return MemExpr; } 459 bool isBracketUsed() const { return BracketUsed; } 460 bool isOffsetOperator() const { return OffsetOperator; } 461 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; } 462 unsigned getBaseReg() const { return BaseReg; } 463 unsigned getIndexReg() const { return IndexReg; } 464 unsigned getScale() const { return Scale; } 465 const MCExpr *getSym() const { return Sym; } 466 StringRef getSymName() const { return SymName; } 467 StringRef getType() const { return CurType.Name; } 468 unsigned getSize() const { return CurType.Size; } 469 unsigned getElementSize() const { return CurType.ElementSize; } 470 unsigned getLength() const { return CurType.Length; } 471 int64_t getImm() { return Imm + IC.execute(); } 472 bool isValidEndState() const { 473 return State == IES_RBRAC || State == IES_INTEGER; 474 } 475 476 // Is the intel expression appended after an operand index. 477 // [OperandIdx][Intel Expression] 478 // This is neccessary for checking if it is an independent 479 // intel expression at back end when parse inline asm. 480 void setAppendAfterOperand() { AttachToOperandIdx = true; } 481 482 bool isPIC() const { return IsPIC; } 483 void setPIC() { IsPIC = true; } 484 485 bool hadError() const { return State == IES_ERROR; } 486 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; } 487 488 bool regsUseUpError(StringRef &ErrMsg) { 489 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg] 490 // can not intruduce additional register in inline asm in PIC model. 491 if (IsPIC && AttachToOperandIdx) 492 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!"; 493 else 494 ErrMsg = "BaseReg/IndexReg already set!"; 495 return true; 496 } 497 498 void onOr() { 499 IntelExprState CurrState = State; 500 switch (State) { 501 default: 502 State = IES_ERROR; 503 break; 504 case IES_INTEGER: 505 case IES_RPAREN: 506 case IES_REGISTER: 507 State = IES_OR; 508 IC.pushOperator(IC_OR); 509 break; 510 } 511 PrevState = CurrState; 512 } 513 void onXor() { 514 IntelExprState CurrState = State; 515 switch (State) { 516 default: 517 State = IES_ERROR; 518 break; 519 case IES_INTEGER: 520 case IES_RPAREN: 521 case IES_REGISTER: 522 State = IES_XOR; 523 IC.pushOperator(IC_XOR); 524 break; 525 } 526 PrevState = CurrState; 527 } 528 void onAnd() { 529 IntelExprState CurrState = State; 530 switch (State) { 531 default: 532 State = IES_ERROR; 533 break; 534 case IES_INTEGER: 535 case IES_RPAREN: 536 case IES_REGISTER: 537 State = IES_AND; 538 IC.pushOperator(IC_AND); 539 break; 540 } 541 PrevState = CurrState; 542 } 543 void onEq() { 544 IntelExprState CurrState = State; 545 switch (State) { 546 default: 547 State = IES_ERROR; 548 break; 549 case IES_INTEGER: 550 case IES_RPAREN: 551 case IES_REGISTER: 552 State = IES_EQ; 553 IC.pushOperator(IC_EQ); 554 break; 555 } 556 PrevState = CurrState; 557 } 558 void onNE() { 559 IntelExprState CurrState = State; 560 switch (State) { 561 default: 562 State = IES_ERROR; 563 break; 564 case IES_INTEGER: 565 case IES_RPAREN: 566 case IES_REGISTER: 567 State = IES_NE; 568 IC.pushOperator(IC_NE); 569 break; 570 } 571 PrevState = CurrState; 572 } 573 void onLT() { 574 IntelExprState CurrState = State; 575 switch (State) { 576 default: 577 State = IES_ERROR; 578 break; 579 case IES_INTEGER: 580 case IES_RPAREN: 581 case IES_REGISTER: 582 State = IES_LT; 583 IC.pushOperator(IC_LT); 584 break; 585 } 586 PrevState = CurrState; 587 } 588 void onLE() { 589 IntelExprState CurrState = State; 590 switch (State) { 591 default: 592 State = IES_ERROR; 593 break; 594 case IES_INTEGER: 595 case IES_RPAREN: 596 case IES_REGISTER: 597 State = IES_LE; 598 IC.pushOperator(IC_LE); 599 break; 600 } 601 PrevState = CurrState; 602 } 603 void onGT() { 604 IntelExprState CurrState = State; 605 switch (State) { 606 default: 607 State = IES_ERROR; 608 break; 609 case IES_INTEGER: 610 case IES_RPAREN: 611 case IES_REGISTER: 612 State = IES_GT; 613 IC.pushOperator(IC_GT); 614 break; 615 } 616 PrevState = CurrState; 617 } 618 void onGE() { 619 IntelExprState CurrState = State; 620 switch (State) { 621 default: 622 State = IES_ERROR; 623 break; 624 case IES_INTEGER: 625 case IES_RPAREN: 626 case IES_REGISTER: 627 State = IES_GE; 628 IC.pushOperator(IC_GE); 629 break; 630 } 631 PrevState = CurrState; 632 } 633 void onLShift() { 634 IntelExprState CurrState = State; 635 switch (State) { 636 default: 637 State = IES_ERROR; 638 break; 639 case IES_INTEGER: 640 case IES_RPAREN: 641 case IES_REGISTER: 642 State = IES_LSHIFT; 643 IC.pushOperator(IC_LSHIFT); 644 break; 645 } 646 PrevState = CurrState; 647 } 648 void onRShift() { 649 IntelExprState CurrState = State; 650 switch (State) { 651 default: 652 State = IES_ERROR; 653 break; 654 case IES_INTEGER: 655 case IES_RPAREN: 656 case IES_REGISTER: 657 State = IES_RSHIFT; 658 IC.pushOperator(IC_RSHIFT); 659 break; 660 } 661 PrevState = CurrState; 662 } 663 bool onPlus(StringRef &ErrMsg) { 664 IntelExprState CurrState = State; 665 switch (State) { 666 default: 667 State = IES_ERROR; 668 break; 669 case IES_INTEGER: 670 case IES_RPAREN: 671 case IES_REGISTER: 672 case IES_OFFSET: 673 State = IES_PLUS; 674 IC.pushOperator(IC_PLUS); 675 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 676 // If we already have a BaseReg, then assume this is the IndexReg with 677 // no explicit scale. 678 if (!BaseReg) { 679 BaseReg = TmpReg; 680 } else { 681 if (IndexReg) 682 return regsUseUpError(ErrMsg); 683 IndexReg = TmpReg; 684 Scale = 0; 685 } 686 } 687 break; 688 } 689 PrevState = CurrState; 690 return false; 691 } 692 bool onMinus(StringRef &ErrMsg) { 693 IntelExprState CurrState = State; 694 switch (State) { 695 default: 696 State = IES_ERROR; 697 break; 698 case IES_OR: 699 case IES_XOR: 700 case IES_AND: 701 case IES_EQ: 702 case IES_NE: 703 case IES_LT: 704 case IES_LE: 705 case IES_GT: 706 case IES_GE: 707 case IES_LSHIFT: 708 case IES_RSHIFT: 709 case IES_PLUS: 710 case IES_NOT: 711 case IES_MULTIPLY: 712 case IES_DIVIDE: 713 case IES_MOD: 714 case IES_LPAREN: 715 case IES_RPAREN: 716 case IES_LBRAC: 717 case IES_RBRAC: 718 case IES_INTEGER: 719 case IES_REGISTER: 720 case IES_INIT: 721 case IES_OFFSET: 722 State = IES_MINUS; 723 // push minus operator if it is not a negate operator 724 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || 725 CurrState == IES_INTEGER || CurrState == IES_RBRAC || 726 CurrState == IES_OFFSET) 727 IC.pushOperator(IC_MINUS); 728 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 729 // We have negate operator for Scale: it's illegal 730 ErrMsg = "Scale can't be negative"; 731 return true; 732 } else 733 IC.pushOperator(IC_NEG); 734 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 735 // If we already have a BaseReg, then assume this is the IndexReg with 736 // no explicit scale. 737 if (!BaseReg) { 738 BaseReg = TmpReg; 739 } else { 740 if (IndexReg) 741 return regsUseUpError(ErrMsg); 742 IndexReg = TmpReg; 743 Scale = 0; 744 } 745 } 746 break; 747 } 748 PrevState = CurrState; 749 return false; 750 } 751 void onNot() { 752 IntelExprState CurrState = State; 753 switch (State) { 754 default: 755 State = IES_ERROR; 756 break; 757 case IES_OR: 758 case IES_XOR: 759 case IES_AND: 760 case IES_EQ: 761 case IES_NE: 762 case IES_LT: 763 case IES_LE: 764 case IES_GT: 765 case IES_GE: 766 case IES_LSHIFT: 767 case IES_RSHIFT: 768 case IES_PLUS: 769 case IES_MINUS: 770 case IES_NOT: 771 case IES_MULTIPLY: 772 case IES_DIVIDE: 773 case IES_MOD: 774 case IES_LPAREN: 775 case IES_LBRAC: 776 case IES_INIT: 777 State = IES_NOT; 778 IC.pushOperator(IC_NOT); 779 break; 780 } 781 PrevState = CurrState; 782 } 783 bool onRegister(unsigned Reg, StringRef &ErrMsg) { 784 IntelExprState CurrState = State; 785 switch (State) { 786 default: 787 State = IES_ERROR; 788 break; 789 case IES_PLUS: 790 case IES_LPAREN: 791 case IES_LBRAC: 792 State = IES_REGISTER; 793 TmpReg = Reg; 794 IC.pushOperand(IC_REGISTER); 795 break; 796 case IES_MULTIPLY: 797 // Index Register - Scale * Register 798 if (PrevState == IES_INTEGER) { 799 if (IndexReg) 800 return regsUseUpError(ErrMsg); 801 State = IES_REGISTER; 802 IndexReg = Reg; 803 // Get the scale and replace the 'Scale * Register' with '0'. 804 Scale = IC.popOperand(); 805 if (checkScale(Scale, ErrMsg)) 806 return true; 807 IC.pushOperand(IC_IMM); 808 IC.popOperator(); 809 } else { 810 State = IES_ERROR; 811 } 812 break; 813 } 814 PrevState = CurrState; 815 return false; 816 } 817 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName, 818 const InlineAsmIdentifierInfo &IDInfo, 819 const AsmTypeInfo &Type, bool ParsingMSInlineAsm, 820 StringRef &ErrMsg) { 821 // InlineAsm: Treat an enum value as an integer 822 if (ParsingMSInlineAsm) 823 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 824 return onInteger(IDInfo.Enum.EnumVal, ErrMsg); 825 // Treat a symbolic constant like an integer 826 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef)) 827 return onInteger(CE->getValue(), ErrMsg); 828 PrevState = State; 829 switch (State) { 830 default: 831 State = IES_ERROR; 832 break; 833 case IES_CAST: 834 case IES_PLUS: 835 case IES_MINUS: 836 case IES_NOT: 837 case IES_INIT: 838 case IES_LBRAC: 839 case IES_LPAREN: 840 if (setSymRef(SymRef, SymRefName, ErrMsg)) 841 return true; 842 MemExpr = true; 843 State = IES_INTEGER; 844 IC.pushOperand(IC_IMM); 845 if (ParsingMSInlineAsm) 846 Info = IDInfo; 847 setTypeInfo(Type); 848 break; 849 } 850 return false; 851 } 852 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { 853 IntelExprState CurrState = State; 854 switch (State) { 855 default: 856 State = IES_ERROR; 857 break; 858 case IES_PLUS: 859 case IES_MINUS: 860 case IES_NOT: 861 case IES_OR: 862 case IES_XOR: 863 case IES_AND: 864 case IES_EQ: 865 case IES_NE: 866 case IES_LT: 867 case IES_LE: 868 case IES_GT: 869 case IES_GE: 870 case IES_LSHIFT: 871 case IES_RSHIFT: 872 case IES_DIVIDE: 873 case IES_MOD: 874 case IES_MULTIPLY: 875 case IES_LPAREN: 876 case IES_INIT: 877 case IES_LBRAC: 878 State = IES_INTEGER; 879 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 880 // Index Register - Register * Scale 881 if (IndexReg) 882 return regsUseUpError(ErrMsg); 883 IndexReg = TmpReg; 884 Scale = TmpInt; 885 if (checkScale(Scale, ErrMsg)) 886 return true; 887 // Get the scale and replace the 'Register * Scale' with '0'. 888 IC.popOperator(); 889 } else { 890 IC.pushOperand(IC_IMM, TmpInt); 891 } 892 break; 893 } 894 PrevState = CurrState; 895 return false; 896 } 897 void onStar() { 898 PrevState = State; 899 switch (State) { 900 default: 901 State = IES_ERROR; 902 break; 903 case IES_INTEGER: 904 case IES_REGISTER: 905 case IES_RPAREN: 906 State = IES_MULTIPLY; 907 IC.pushOperator(IC_MULTIPLY); 908 break; 909 } 910 } 911 void onDivide() { 912 PrevState = State; 913 switch (State) { 914 default: 915 State = IES_ERROR; 916 break; 917 case IES_INTEGER: 918 case IES_RPAREN: 919 State = IES_DIVIDE; 920 IC.pushOperator(IC_DIVIDE); 921 break; 922 } 923 } 924 void onMod() { 925 PrevState = State; 926 switch (State) { 927 default: 928 State = IES_ERROR; 929 break; 930 case IES_INTEGER: 931 case IES_RPAREN: 932 State = IES_MOD; 933 IC.pushOperator(IC_MOD); 934 break; 935 } 936 } 937 bool onLBrac() { 938 if (BracCount) 939 return true; 940 PrevState = State; 941 switch (State) { 942 default: 943 State = IES_ERROR; 944 break; 945 case IES_RBRAC: 946 case IES_INTEGER: 947 case IES_RPAREN: 948 State = IES_PLUS; 949 IC.pushOperator(IC_PLUS); 950 CurType.Length = 1; 951 CurType.Size = CurType.ElementSize; 952 break; 953 case IES_INIT: 954 case IES_CAST: 955 assert(!BracCount && "BracCount should be zero on parsing's start"); 956 State = IES_LBRAC; 957 break; 958 } 959 MemExpr = true; 960 BracketUsed = true; 961 BracCount++; 962 return false; 963 } 964 bool onRBrac(StringRef &ErrMsg) { 965 IntelExprState CurrState = State; 966 switch (State) { 967 default: 968 State = IES_ERROR; 969 break; 970 case IES_INTEGER: 971 case IES_OFFSET: 972 case IES_REGISTER: 973 case IES_RPAREN: 974 if (BracCount-- != 1) { 975 ErrMsg = "unexpected bracket encountered"; 976 return true; 977 } 978 State = IES_RBRAC; 979 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 980 // If we already have a BaseReg, then assume this is the IndexReg with 981 // no explicit scale. 982 if (!BaseReg) { 983 BaseReg = TmpReg; 984 } else { 985 if (IndexReg) 986 return regsUseUpError(ErrMsg); 987 IndexReg = TmpReg; 988 Scale = 0; 989 } 990 } 991 break; 992 } 993 PrevState = CurrState; 994 return false; 995 } 996 void onLParen() { 997 IntelExprState CurrState = State; 998 switch (State) { 999 default: 1000 State = IES_ERROR; 1001 break; 1002 case IES_PLUS: 1003 case IES_MINUS: 1004 case IES_NOT: 1005 case IES_OR: 1006 case IES_XOR: 1007 case IES_AND: 1008 case IES_EQ: 1009 case IES_NE: 1010 case IES_LT: 1011 case IES_LE: 1012 case IES_GT: 1013 case IES_GE: 1014 case IES_LSHIFT: 1015 case IES_RSHIFT: 1016 case IES_MULTIPLY: 1017 case IES_DIVIDE: 1018 case IES_MOD: 1019 case IES_LPAREN: 1020 case IES_INIT: 1021 case IES_LBRAC: 1022 State = IES_LPAREN; 1023 IC.pushOperator(IC_LPAREN); 1024 break; 1025 } 1026 PrevState = CurrState; 1027 } 1028 void onRParen() { 1029 PrevState = State; 1030 switch (State) { 1031 default: 1032 State = IES_ERROR; 1033 break; 1034 case IES_INTEGER: 1035 case IES_OFFSET: 1036 case IES_REGISTER: 1037 case IES_RBRAC: 1038 case IES_RPAREN: 1039 State = IES_RPAREN; 1040 IC.pushOperator(IC_RPAREN); 1041 break; 1042 } 1043 } 1044 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID, 1045 const InlineAsmIdentifierInfo &IDInfo, 1046 bool ParsingMSInlineAsm, StringRef &ErrMsg) { 1047 PrevState = State; 1048 switch (State) { 1049 default: 1050 ErrMsg = "unexpected offset operator expression"; 1051 return true; 1052 case IES_PLUS: 1053 case IES_INIT: 1054 case IES_LBRAC: 1055 if (setSymRef(Val, ID, ErrMsg)) 1056 return true; 1057 OffsetOperator = true; 1058 OffsetOperatorLoc = OffsetLoc; 1059 State = IES_OFFSET; 1060 // As we cannot yet resolve the actual value (offset), we retain 1061 // the requested semantics by pushing a '0' to the operands stack 1062 IC.pushOperand(IC_IMM); 1063 if (ParsingMSInlineAsm) { 1064 Info = IDInfo; 1065 } 1066 break; 1067 } 1068 return false; 1069 } 1070 void onCast(AsmTypeInfo Info) { 1071 PrevState = State; 1072 switch (State) { 1073 default: 1074 State = IES_ERROR; 1075 break; 1076 case IES_LPAREN: 1077 setTypeInfo(Info); 1078 State = IES_CAST; 1079 break; 1080 } 1081 } 1082 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; } 1083 }; 1084 1085 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt, 1086 bool MatchingInlineAsm = false) { 1087 MCAsmParser &Parser = getParser(); 1088 if (MatchingInlineAsm) { 1089 if (!getLexer().isAtStartOfStatement()) 1090 Parser.eatToEndOfStatement(); 1091 return false; 1092 } 1093 return Parser.Error(L, Msg, Range); 1094 } 1095 1096 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc, 1097 SMLoc EndLoc); 1098 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1099 bool RestoreOnFailure); 1100 1101 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); 1102 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); 1103 bool IsSIReg(unsigned Reg); 1104 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg); 1105 void 1106 AddDefaultSrcDestOperands(OperandVector &Operands, 1107 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1108 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst); 1109 bool VerifyAndAdjustOperands(OperandVector &OrigOperands, 1110 OperandVector &FinalOperands); 1111 bool parseOperand(OperandVector &Operands, StringRef Name); 1112 bool parseATTOperand(OperandVector &Operands); 1113 bool parseIntelOperand(OperandVector &Operands, StringRef Name); 1114 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, 1115 InlineAsmIdentifierInfo &Info, SMLoc &End); 1116 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); 1117 unsigned IdentifyIntelInlineAsmOperator(StringRef Name); 1118 unsigned ParseIntelInlineAsmOperator(unsigned OpKind); 1119 unsigned IdentifyMasmOperator(StringRef Name); 1120 bool ParseMasmOperator(unsigned OpKind, int64_t &Val); 1121 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands); 1122 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM, 1123 bool &ParseError, SMLoc &End); 1124 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM, 1125 bool &ParseError, SMLoc &End); 1126 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start, 1127 SMLoc End); 1128 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 1129 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier, 1130 InlineAsmIdentifierInfo &Info, 1131 bool IsUnevaluatedOperand, SMLoc &End, 1132 bool IsParsingOffsetOperator = false); 1133 void tryParseOperandIdx(AsmToken::TokenKind PrevTK, 1134 IntelExprStateMachine &SM); 1135 1136 bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc, 1137 SMLoc EndLoc, OperandVector &Operands); 1138 1139 X86::CondCode ParseConditionCode(StringRef CCode); 1140 1141 bool ParseIntelMemoryOperandSize(unsigned &Size); 1142 bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, 1143 unsigned BaseReg, unsigned IndexReg, 1144 unsigned Scale, SMLoc Start, SMLoc End, 1145 unsigned Size, StringRef Identifier, 1146 const InlineAsmIdentifierInfo &Info, 1147 OperandVector &Operands); 1148 1149 bool parseDirectiveArch(); 1150 bool parseDirectiveNops(SMLoc L); 1151 bool parseDirectiveEven(SMLoc L); 1152 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 1153 1154 /// CodeView FPO data directives. 1155 bool parseDirectiveFPOProc(SMLoc L); 1156 bool parseDirectiveFPOSetFrame(SMLoc L); 1157 bool parseDirectiveFPOPushReg(SMLoc L); 1158 bool parseDirectiveFPOStackAlloc(SMLoc L); 1159 bool parseDirectiveFPOStackAlign(SMLoc L); 1160 bool parseDirectiveFPOEndPrologue(SMLoc L); 1161 bool parseDirectiveFPOEndProc(SMLoc L); 1162 1163 /// SEH directives. 1164 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo); 1165 bool parseDirectiveSEHPushReg(SMLoc); 1166 bool parseDirectiveSEHSetFrame(SMLoc); 1167 bool parseDirectiveSEHSaveReg(SMLoc); 1168 bool parseDirectiveSEHSaveXMM(SMLoc); 1169 bool parseDirectiveSEHPushFrame(SMLoc); 1170 1171 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1172 1173 bool validateInstruction(MCInst &Inst, const OperandVector &Ops); 1174 bool processInstruction(MCInst &Inst, const OperandVector &Ops); 1175 1176 // Load Value Injection (LVI) Mitigations for machine code 1177 void emitWarningForSpecialLVIInstruction(SMLoc Loc); 1178 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out); 1179 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out); 1180 1181 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds 1182 /// instrumentation around Inst. 1183 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); 1184 1185 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1186 OperandVector &Operands, MCStreamer &Out, 1187 uint64_t &ErrorInfo, 1188 bool MatchingInlineAsm) override; 1189 1190 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands, 1191 MCStreamer &Out, bool MatchingInlineAsm); 1192 1193 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures, 1194 bool MatchingInlineAsm); 1195 1196 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 1197 OperandVector &Operands, MCStreamer &Out, 1198 uint64_t &ErrorInfo, 1199 bool MatchingInlineAsm); 1200 1201 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 1202 OperandVector &Operands, MCStreamer &Out, 1203 uint64_t &ErrorInfo, 1204 bool MatchingInlineAsm); 1205 1206 bool OmitRegisterFromClobberLists(unsigned RegNo) override; 1207 1208 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) 1209 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. 1210 /// return false if no parsing errors occurred, true otherwise. 1211 bool HandleAVX512Operand(OperandVector &Operands); 1212 1213 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc); 1214 1215 bool is64BitMode() const { 1216 // FIXME: Can tablegen auto-generate this? 1217 return getSTI().hasFeature(X86::Is64Bit); 1218 } 1219 bool is32BitMode() const { 1220 // FIXME: Can tablegen auto-generate this? 1221 return getSTI().hasFeature(X86::Is32Bit); 1222 } 1223 bool is16BitMode() const { 1224 // FIXME: Can tablegen auto-generate this? 1225 return getSTI().hasFeature(X86::Is16Bit); 1226 } 1227 void SwitchMode(unsigned mode) { 1228 MCSubtargetInfo &STI = copySTI(); 1229 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit}); 1230 FeatureBitset OldMode = STI.getFeatureBits() & AllModes; 1231 FeatureBitset FB = ComputeAvailableFeatures( 1232 STI.ToggleFeature(OldMode.flip(mode))); 1233 setAvailableFeatures(FB); 1234 1235 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes)); 1236 } 1237 1238 unsigned getPointerWidth() { 1239 if (is16BitMode()) return 16; 1240 if (is32BitMode()) return 32; 1241 if (is64BitMode()) return 64; 1242 llvm_unreachable("invalid mode"); 1243 } 1244 1245 bool isParsingIntelSyntax() { 1246 return getParser().getAssemblerDialect(); 1247 } 1248 1249 /// @name Auto-generated Matcher Functions 1250 /// { 1251 1252 #define GET_ASSEMBLER_HEADER 1253 #include "X86GenAsmMatcher.inc" 1254 1255 /// } 1256 1257 public: 1258 enum X86MatchResultTy { 1259 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY, 1260 #define GET_OPERAND_DIAGNOSTIC_TYPES 1261 #include "X86GenAsmMatcher.inc" 1262 }; 1263 1264 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, 1265 const MCInstrInfo &mii, const MCTargetOptions &Options) 1266 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr), 1267 Code16GCC(false) { 1268 1269 Parser.addAliasForDirective(".word", ".2byte"); 1270 1271 // Initialize the set of available features. 1272 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); 1273 } 1274 1275 bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc, 1276 SMLoc &EndLoc) override; 1277 OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 1278 SMLoc &EndLoc) override; 1279 1280 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; 1281 1282 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1283 SMLoc NameLoc, OperandVector &Operands) override; 1284 1285 bool ParseDirective(AsmToken DirectiveID) override; 1286 }; 1287 } // end anonymous namespace 1288 1289 #define GET_REGISTER_MATCHER 1290 #define GET_SUBTARGET_FEATURE_NAME 1291 #include "X86GenAsmMatcher.inc" 1292 1293 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, 1294 unsigned Scale, bool Is64BitMode, 1295 StringRef &ErrMsg) { 1296 // If we have both a base register and an index register make sure they are 1297 // both 64-bit or 32-bit registers. 1298 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 1299 1300 if (BaseReg != 0 && 1301 !(BaseReg == X86::RIP || BaseReg == X86::EIP || 1302 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) || 1303 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) || 1304 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) { 1305 ErrMsg = "invalid base+index expression"; 1306 return true; 1307 } 1308 1309 if (IndexReg != 0 && 1310 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ || 1311 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1312 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1313 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1314 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 1315 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 1316 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) { 1317 ErrMsg = "invalid base+index expression"; 1318 return true; 1319 } 1320 1321 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) || 1322 IndexReg == X86::EIP || IndexReg == X86::RIP || 1323 IndexReg == X86::ESP || IndexReg == X86::RSP) { 1324 ErrMsg = "invalid base+index expression"; 1325 return true; 1326 } 1327 1328 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, 1329 // and then only in non-64-bit modes. 1330 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 1331 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP && 1332 BaseReg != X86::SI && BaseReg != X86::DI))) { 1333 ErrMsg = "invalid 16-bit base register"; 1334 return true; 1335 } 1336 1337 if (BaseReg == 0 && 1338 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { 1339 ErrMsg = "16-bit memory operand may not include only index register"; 1340 return true; 1341 } 1342 1343 if (BaseReg != 0 && IndexReg != 0) { 1344 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 1345 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1346 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1347 IndexReg == X86::EIZ)) { 1348 ErrMsg = "base register is 64-bit, but index register is not"; 1349 return true; 1350 } 1351 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 1352 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1353 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1354 IndexReg == X86::RIZ)) { 1355 ErrMsg = "base register is 32-bit, but index register is not"; 1356 return true; 1357 } 1358 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { 1359 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1360 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { 1361 ErrMsg = "base register is 16-bit, but index register is not"; 1362 return true; 1363 } 1364 if ((BaseReg != X86::BX && BaseReg != X86::BP) || 1365 (IndexReg != X86::SI && IndexReg != X86::DI)) { 1366 ErrMsg = "invalid 16-bit base/index register combination"; 1367 return true; 1368 } 1369 } 1370 } 1371 1372 // RIP/EIP-relative addressing is only supported in 64-bit mode. 1373 if (!Is64BitMode && BaseReg != 0 && 1374 (BaseReg == X86::RIP || BaseReg == X86::EIP)) { 1375 ErrMsg = "IP-relative addressing requires 64-bit mode"; 1376 return true; 1377 } 1378 1379 return checkScale(Scale, ErrMsg); 1380 } 1381 1382 bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName, 1383 SMLoc StartLoc, SMLoc EndLoc) { 1384 // If we encounter a %, ignore it. This code handles registers with and 1385 // without the prefix, unprefixed registers can occur in cfi directives. 1386 RegName.consume_front("%"); 1387 1388 RegNo = MatchRegisterName(RegName); 1389 1390 // If the match failed, try the register name as lowercase. 1391 if (RegNo == 0) 1392 RegNo = MatchRegisterName(RegName.lower()); 1393 1394 // The "flags" and "mxcsr" registers cannot be referenced directly. 1395 // Treat it as an identifier instead. 1396 if (isParsingMSInlineAsm() && isParsingIntelSyntax() && 1397 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR)) 1398 RegNo = 0; 1399 1400 if (!is64BitMode()) { 1401 // FIXME: This should be done using Requires<Not64BitMode> and 1402 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 1403 // checked. 1404 if (RegNo == X86::RIZ || RegNo == X86::RIP || 1405 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 1406 X86II::isX86_64NonExtLowByteReg(RegNo) || 1407 X86II::isX86_64ExtendedReg(RegNo)) { 1408 return Error(StartLoc, 1409 "register %" + RegName + " is only available in 64-bit mode", 1410 SMRange(StartLoc, EndLoc)); 1411 } 1412 } 1413 1414 // If this is "db[0-15]", match it as an alias 1415 // for dr[0-15]. 1416 if (RegNo == 0 && RegName.startswith("db")) { 1417 if (RegName.size() == 3) { 1418 switch (RegName[2]) { 1419 case '0': 1420 RegNo = X86::DR0; 1421 break; 1422 case '1': 1423 RegNo = X86::DR1; 1424 break; 1425 case '2': 1426 RegNo = X86::DR2; 1427 break; 1428 case '3': 1429 RegNo = X86::DR3; 1430 break; 1431 case '4': 1432 RegNo = X86::DR4; 1433 break; 1434 case '5': 1435 RegNo = X86::DR5; 1436 break; 1437 case '6': 1438 RegNo = X86::DR6; 1439 break; 1440 case '7': 1441 RegNo = X86::DR7; 1442 break; 1443 case '8': 1444 RegNo = X86::DR8; 1445 break; 1446 case '9': 1447 RegNo = X86::DR9; 1448 break; 1449 } 1450 } else if (RegName.size() == 4 && RegName[2] == '1') { 1451 switch (RegName[3]) { 1452 case '0': 1453 RegNo = X86::DR10; 1454 break; 1455 case '1': 1456 RegNo = X86::DR11; 1457 break; 1458 case '2': 1459 RegNo = X86::DR12; 1460 break; 1461 case '3': 1462 RegNo = X86::DR13; 1463 break; 1464 case '4': 1465 RegNo = X86::DR14; 1466 break; 1467 case '5': 1468 RegNo = X86::DR15; 1469 break; 1470 } 1471 } 1472 } 1473 1474 if (RegNo == 0) { 1475 if (isParsingIntelSyntax()) 1476 return true; 1477 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc)); 1478 } 1479 return false; 1480 } 1481 1482 bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 1483 SMLoc &EndLoc, bool RestoreOnFailure) { 1484 MCAsmParser &Parser = getParser(); 1485 MCAsmLexer &Lexer = getLexer(); 1486 RegNo = 0; 1487 1488 SmallVector<AsmToken, 5> Tokens; 1489 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() { 1490 if (RestoreOnFailure) { 1491 while (!Tokens.empty()) { 1492 Lexer.UnLex(Tokens.pop_back_val()); 1493 } 1494 } 1495 }; 1496 1497 const AsmToken &PercentTok = Parser.getTok(); 1498 StartLoc = PercentTok.getLoc(); 1499 1500 // If we encounter a %, ignore it. This code handles registers with and 1501 // without the prefix, unprefixed registers can occur in cfi directives. 1502 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) { 1503 Tokens.push_back(PercentTok); 1504 Parser.Lex(); // Eat percent token. 1505 } 1506 1507 const AsmToken &Tok = Parser.getTok(); 1508 EndLoc = Tok.getEndLoc(); 1509 1510 if (Tok.isNot(AsmToken::Identifier)) { 1511 OnFailure(); 1512 if (isParsingIntelSyntax()) return true; 1513 return Error(StartLoc, "invalid register name", 1514 SMRange(StartLoc, EndLoc)); 1515 } 1516 1517 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) { 1518 OnFailure(); 1519 return true; 1520 } 1521 1522 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 1523 if (RegNo == X86::ST0) { 1524 Tokens.push_back(Tok); 1525 Parser.Lex(); // Eat 'st' 1526 1527 // Check to see if we have '(4)' after %st. 1528 if (Lexer.isNot(AsmToken::LParen)) 1529 return false; 1530 // Lex the paren. 1531 Tokens.push_back(Parser.getTok()); 1532 Parser.Lex(); 1533 1534 const AsmToken &IntTok = Parser.getTok(); 1535 if (IntTok.isNot(AsmToken::Integer)) { 1536 OnFailure(); 1537 return Error(IntTok.getLoc(), "expected stack index"); 1538 } 1539 switch (IntTok.getIntVal()) { 1540 case 0: RegNo = X86::ST0; break; 1541 case 1: RegNo = X86::ST1; break; 1542 case 2: RegNo = X86::ST2; break; 1543 case 3: RegNo = X86::ST3; break; 1544 case 4: RegNo = X86::ST4; break; 1545 case 5: RegNo = X86::ST5; break; 1546 case 6: RegNo = X86::ST6; break; 1547 case 7: RegNo = X86::ST7; break; 1548 default: 1549 OnFailure(); 1550 return Error(IntTok.getLoc(), "invalid stack index"); 1551 } 1552 1553 // Lex IntTok 1554 Tokens.push_back(IntTok); 1555 Parser.Lex(); 1556 if (Lexer.isNot(AsmToken::RParen)) { 1557 OnFailure(); 1558 return Error(Parser.getTok().getLoc(), "expected ')'"); 1559 } 1560 1561 EndLoc = Parser.getTok().getEndLoc(); 1562 Parser.Lex(); // Eat ')' 1563 return false; 1564 } 1565 1566 EndLoc = Parser.getTok().getEndLoc(); 1567 1568 if (RegNo == 0) { 1569 OnFailure(); 1570 if (isParsingIntelSyntax()) return true; 1571 return Error(StartLoc, "invalid register name", 1572 SMRange(StartLoc, EndLoc)); 1573 } 1574 1575 Parser.Lex(); // Eat identifier token. 1576 return false; 1577 } 1578 1579 bool X86AsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc, 1580 SMLoc &EndLoc) { 1581 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 1582 } 1583 1584 OperandMatchResultTy X86AsmParser::tryParseRegister(MCRegister &RegNo, 1585 SMLoc &StartLoc, 1586 SMLoc &EndLoc) { 1587 bool Result = 1588 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 1589 bool PendingErrors = getParser().hasPendingError(); 1590 getParser().clearPendingErrors(); 1591 if (PendingErrors) 1592 return MatchOperand_ParseFail; 1593 if (Result) 1594 return MatchOperand_NoMatch; 1595 return MatchOperand_Success; 1596 } 1597 1598 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { 1599 bool Parse32 = is32BitMode() || Code16GCC; 1600 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI); 1601 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1602 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1603 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1604 Loc, Loc, 0); 1605 } 1606 1607 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { 1608 bool Parse32 = is32BitMode() || Code16GCC; 1609 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI); 1610 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1611 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1612 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1613 Loc, Loc, 0); 1614 } 1615 1616 bool X86AsmParser::IsSIReg(unsigned Reg) { 1617 switch (Reg) { 1618 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!"); 1619 case X86::RSI: 1620 case X86::ESI: 1621 case X86::SI: 1622 return true; 1623 case X86::RDI: 1624 case X86::EDI: 1625 case X86::DI: 1626 return false; 1627 } 1628 } 1629 1630 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, 1631 bool IsSIReg) { 1632 switch (RegClassID) { 1633 default: llvm_unreachable("Unexpected register class"); 1634 case X86::GR64RegClassID: 1635 return IsSIReg ? X86::RSI : X86::RDI; 1636 case X86::GR32RegClassID: 1637 return IsSIReg ? X86::ESI : X86::EDI; 1638 case X86::GR16RegClassID: 1639 return IsSIReg ? X86::SI : X86::DI; 1640 } 1641 } 1642 1643 void X86AsmParser::AddDefaultSrcDestOperands( 1644 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1645 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) { 1646 if (isParsingIntelSyntax()) { 1647 Operands.push_back(std::move(Dst)); 1648 Operands.push_back(std::move(Src)); 1649 } 1650 else { 1651 Operands.push_back(std::move(Src)); 1652 Operands.push_back(std::move(Dst)); 1653 } 1654 } 1655 1656 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, 1657 OperandVector &FinalOperands) { 1658 1659 if (OrigOperands.size() > 1) { 1660 // Check if sizes match, OrigOperands also contains the instruction name 1661 assert(OrigOperands.size() == FinalOperands.size() + 1 && 1662 "Operand size mismatch"); 1663 1664 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings; 1665 // Verify types match 1666 int RegClassID = -1; 1667 for (unsigned int i = 0; i < FinalOperands.size(); ++i) { 1668 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]); 1669 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]); 1670 1671 if (FinalOp.isReg() && 1672 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg())) 1673 // Return false and let a normal complaint about bogus operands happen 1674 return false; 1675 1676 if (FinalOp.isMem()) { 1677 1678 if (!OrigOp.isMem()) 1679 // Return false and let a normal complaint about bogus operands happen 1680 return false; 1681 1682 unsigned OrigReg = OrigOp.Mem.BaseReg; 1683 unsigned FinalReg = FinalOp.Mem.BaseReg; 1684 1685 // If we've already encounterd a register class, make sure all register 1686 // bases are of the same register class 1687 if (RegClassID != -1 && 1688 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) { 1689 return Error(OrigOp.getStartLoc(), 1690 "mismatching source and destination index registers"); 1691 } 1692 1693 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg)) 1694 RegClassID = X86::GR64RegClassID; 1695 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg)) 1696 RegClassID = X86::GR32RegClassID; 1697 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg)) 1698 RegClassID = X86::GR16RegClassID; 1699 else 1700 // Unexpected register class type 1701 // Return false and let a normal complaint about bogus operands happen 1702 return false; 1703 1704 bool IsSI = IsSIReg(FinalReg); 1705 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI); 1706 1707 if (FinalReg != OrigReg) { 1708 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI"; 1709 Warnings.push_back(std::make_pair( 1710 OrigOp.getStartLoc(), 1711 "memory operand is only for determining the size, " + RegName + 1712 " will be used for the location")); 1713 } 1714 1715 FinalOp.Mem.Size = OrigOp.Mem.Size; 1716 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg; 1717 FinalOp.Mem.BaseReg = FinalReg; 1718 } 1719 } 1720 1721 // Produce warnings only if all the operands passed the adjustment - prevent 1722 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings 1723 for (auto &WarningMsg : Warnings) { 1724 Warning(WarningMsg.first, WarningMsg.second); 1725 } 1726 1727 // Remove old operands 1728 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1729 OrigOperands.pop_back(); 1730 } 1731 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end()); 1732 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1733 OrigOperands.push_back(std::move(FinalOperands[i])); 1734 1735 return false; 1736 } 1737 1738 bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) { 1739 if (isParsingIntelSyntax()) 1740 return parseIntelOperand(Operands, Name); 1741 1742 return parseATTOperand(Operands); 1743 } 1744 1745 bool X86AsmParser::CreateMemForMSInlineAsm( 1746 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, 1747 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, 1748 const InlineAsmIdentifierInfo &Info, OperandVector &Operands) { 1749 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or 1750 // some other label reference. 1751 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) { 1752 // Create an absolute memory reference in order to match against 1753 // instructions taking a PC relative operand. 1754 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, 1755 End, Size, Identifier, 1756 Info.Label.Decl)); 1757 return false; 1758 } 1759 // We either have a direct symbol reference, or an offset from a symbol. The 1760 // parser always puts the symbol on the LHS, so look there for size 1761 // calculation purposes. 1762 unsigned FrontendSize = 0; 1763 void *Decl = nullptr; 1764 bool IsGlobalLV = false; 1765 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 1766 // Size is in terms of bits in this context. 1767 FrontendSize = Info.Var.Type * 8; 1768 Decl = Info.Var.Decl; 1769 IsGlobalLV = Info.Var.IsGlobalLV; 1770 } 1771 // It is widely common for MS InlineAsm to use a global variable and one/two 1772 // registers in a mmory expression, and though unaccessible via rip/eip. 1773 if (IsGlobalLV && (BaseReg || IndexReg)) { 1774 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, 1775 End, Size, Identifier, Decl, 0, 1776 BaseReg && IndexReg)); 1777 return false; 1778 } 1779 Operands.push_back(X86Operand::CreateMem( 1780 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End, 1781 Size, 1782 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize)); 1783 return false; 1784 } 1785 1786 // Some binary bitwise operators have a named synonymous 1787 // Query a candidate string for being such a named operator 1788 // and if so - invoke the appropriate handler 1789 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, 1790 IntelExprStateMachine &SM, 1791 bool &ParseError, SMLoc &End) { 1792 // A named operator should be either lower or upper case, but not a mix... 1793 // except in MASM, which uses full case-insensitivity. 1794 if (Name.compare(Name.lower()) && Name.compare(Name.upper()) && 1795 !getParser().isParsingMasm()) 1796 return false; 1797 if (Name.equals_insensitive("not")) { 1798 SM.onNot(); 1799 } else if (Name.equals_insensitive("or")) { 1800 SM.onOr(); 1801 } else if (Name.equals_insensitive("shl")) { 1802 SM.onLShift(); 1803 } else if (Name.equals_insensitive("shr")) { 1804 SM.onRShift(); 1805 } else if (Name.equals_insensitive("xor")) { 1806 SM.onXor(); 1807 } else if (Name.equals_insensitive("and")) { 1808 SM.onAnd(); 1809 } else if (Name.equals_insensitive("mod")) { 1810 SM.onMod(); 1811 } else if (Name.equals_insensitive("offset")) { 1812 SMLoc OffsetLoc = getTok().getLoc(); 1813 const MCExpr *Val = nullptr; 1814 StringRef ID; 1815 InlineAsmIdentifierInfo Info; 1816 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End); 1817 if (ParseError) 1818 return true; 1819 StringRef ErrMsg; 1820 ParseError = 1821 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg); 1822 if (ParseError) 1823 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg); 1824 } else { 1825 return false; 1826 } 1827 if (!Name.equals_insensitive("offset")) 1828 End = consumeToken(); 1829 return true; 1830 } 1831 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name, 1832 IntelExprStateMachine &SM, 1833 bool &ParseError, SMLoc &End) { 1834 if (Name.equals_insensitive("eq")) { 1835 SM.onEq(); 1836 } else if (Name.equals_insensitive("ne")) { 1837 SM.onNE(); 1838 } else if (Name.equals_insensitive("lt")) { 1839 SM.onLT(); 1840 } else if (Name.equals_insensitive("le")) { 1841 SM.onLE(); 1842 } else if (Name.equals_insensitive("gt")) { 1843 SM.onGT(); 1844 } else if (Name.equals_insensitive("ge")) { 1845 SM.onGE(); 1846 } else { 1847 return false; 1848 } 1849 End = consumeToken(); 1850 return true; 1851 } 1852 1853 // Check if current intel expression append after an operand. 1854 // Like: [Operand][Intel Expression] 1855 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK, 1856 IntelExprStateMachine &SM) { 1857 if (PrevTK != AsmToken::RBrac) 1858 return; 1859 1860 SM.setAppendAfterOperand(); 1861 } 1862 1863 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1864 MCAsmParser &Parser = getParser(); 1865 StringRef ErrMsg; 1866 1867 AsmToken::TokenKind PrevTK = AsmToken::Error; 1868 1869 if (getContext().getObjectFileInfo()->isPositionIndependent()) 1870 SM.setPIC(); 1871 1872 bool Done = false; 1873 while (!Done) { 1874 // Get a fresh reference on each loop iteration in case the previous 1875 // iteration moved the token storage during UnLex(). 1876 const AsmToken &Tok = Parser.getTok(); 1877 1878 bool UpdateLocLex = true; 1879 AsmToken::TokenKind TK = getLexer().getKind(); 1880 1881 switch (TK) { 1882 default: 1883 if ((Done = SM.isValidEndState())) 1884 break; 1885 return Error(Tok.getLoc(), "unknown token in expression"); 1886 case AsmToken::Error: 1887 return Error(getLexer().getErrLoc(), getLexer().getErr()); 1888 break; 1889 case AsmToken::EndOfStatement: 1890 Done = true; 1891 break; 1892 case AsmToken::Real: 1893 // DotOperator: [ebx].0 1894 UpdateLocLex = false; 1895 if (ParseIntelDotOperator(SM, End)) 1896 return true; 1897 break; 1898 case AsmToken::Dot: 1899 if (!Parser.isParsingMasm()) { 1900 if ((Done = SM.isValidEndState())) 1901 break; 1902 return Error(Tok.getLoc(), "unknown token in expression"); 1903 } 1904 // MASM allows spaces around the dot operator (e.g., "var . x") 1905 Lex(); 1906 UpdateLocLex = false; 1907 if (ParseIntelDotOperator(SM, End)) 1908 return true; 1909 break; 1910 case AsmToken::Dollar: 1911 if (!Parser.isParsingMasm()) { 1912 if ((Done = SM.isValidEndState())) 1913 break; 1914 return Error(Tok.getLoc(), "unknown token in expression"); 1915 } 1916 [[fallthrough]]; 1917 case AsmToken::String: { 1918 if (Parser.isParsingMasm()) { 1919 // MASM parsers handle strings in expressions as constants. 1920 SMLoc ValueLoc = Tok.getLoc(); 1921 int64_t Res; 1922 const MCExpr *Val; 1923 if (Parser.parsePrimaryExpr(Val, End, nullptr)) 1924 return true; 1925 UpdateLocLex = false; 1926 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr())) 1927 return Error(ValueLoc, "expected absolute value"); 1928 if (SM.onInteger(Res, ErrMsg)) 1929 return Error(ValueLoc, ErrMsg); 1930 break; 1931 } 1932 [[fallthrough]]; 1933 } 1934 case AsmToken::At: 1935 case AsmToken::Identifier: { 1936 SMLoc IdentLoc = Tok.getLoc(); 1937 StringRef Identifier = Tok.getString(); 1938 UpdateLocLex = false; 1939 if (Parser.isParsingMasm()) { 1940 size_t DotOffset = Identifier.find_first_of('.'); 1941 if (DotOffset != StringRef::npos) { 1942 consumeToken(); 1943 StringRef LHS = Identifier.slice(0, DotOffset); 1944 StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1); 1945 StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos); 1946 if (!RHS.empty()) { 1947 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS)); 1948 } 1949 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot)); 1950 if (!LHS.empty()) { 1951 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS)); 1952 } 1953 break; 1954 } 1955 } 1956 // (MASM only) <TYPE> PTR operator 1957 if (Parser.isParsingMasm()) { 1958 const AsmToken &NextTok = getLexer().peekTok(); 1959 if (NextTok.is(AsmToken::Identifier) && 1960 NextTok.getIdentifier().equals_insensitive("ptr")) { 1961 AsmTypeInfo Info; 1962 if (Parser.lookUpType(Identifier, Info)) 1963 return Error(Tok.getLoc(), "unknown type"); 1964 SM.onCast(Info); 1965 // Eat type and PTR. 1966 consumeToken(); 1967 End = consumeToken(); 1968 break; 1969 } 1970 } 1971 // Register, or (MASM only) <register>.<field> 1972 MCRegister Reg; 1973 if (Tok.is(AsmToken::Identifier)) { 1974 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) { 1975 if (SM.onRegister(Reg, ErrMsg)) 1976 return Error(IdentLoc, ErrMsg); 1977 break; 1978 } 1979 if (Parser.isParsingMasm()) { 1980 const std::pair<StringRef, StringRef> IDField = 1981 Tok.getString().split('.'); 1982 const StringRef ID = IDField.first, Field = IDField.second; 1983 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size()); 1984 if (!Field.empty() && 1985 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) { 1986 if (SM.onRegister(Reg, ErrMsg)) 1987 return Error(IdentLoc, ErrMsg); 1988 1989 AsmFieldInfo Info; 1990 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data()); 1991 if (Parser.lookUpField(Field, Info)) 1992 return Error(FieldStartLoc, "unknown offset"); 1993 else if (SM.onPlus(ErrMsg)) 1994 return Error(getTok().getLoc(), ErrMsg); 1995 else if (SM.onInteger(Info.Offset, ErrMsg)) 1996 return Error(IdentLoc, ErrMsg); 1997 SM.setTypeInfo(Info.Type); 1998 1999 End = consumeToken(); 2000 break; 2001 } 2002 } 2003 } 2004 // Operator synonymous ("not", "or" etc.) 2005 bool ParseError = false; 2006 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) { 2007 if (ParseError) 2008 return true; 2009 break; 2010 } 2011 if (Parser.isParsingMasm() && 2012 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) { 2013 if (ParseError) 2014 return true; 2015 break; 2016 } 2017 // Symbol reference, when parsing assembly content 2018 InlineAsmIdentifierInfo Info; 2019 AsmFieldInfo FieldInfo; 2020 const MCExpr *Val; 2021 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) { 2022 // MS Dot Operator expression 2023 if (Identifier.count('.') && 2024 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) { 2025 if (ParseIntelDotOperator(SM, End)) 2026 return true; 2027 break; 2028 } 2029 } 2030 if (isParsingMSInlineAsm()) { 2031 // MS InlineAsm operators (TYPE/LENGTH/SIZE) 2032 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { 2033 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { 2034 if (SM.onInteger(Val, ErrMsg)) 2035 return Error(IdentLoc, ErrMsg); 2036 } else { 2037 return true; 2038 } 2039 break; 2040 } 2041 // MS InlineAsm identifier 2042 // Call parseIdentifier() to combine @ with the identifier behind it. 2043 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier)) 2044 return Error(IdentLoc, "expected identifier"); 2045 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) 2046 return true; 2047 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type, 2048 true, ErrMsg)) 2049 return Error(IdentLoc, ErrMsg); 2050 break; 2051 } 2052 if (Parser.isParsingMasm()) { 2053 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) { 2054 int64_t Val; 2055 if (ParseMasmOperator(OpKind, Val)) 2056 return true; 2057 if (SM.onInteger(Val, ErrMsg)) 2058 return Error(IdentLoc, ErrMsg); 2059 break; 2060 } 2061 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) { 2062 // Field offset immediate; <TYPE>.<field specification> 2063 Lex(); // eat type 2064 bool EndDot = parseOptionalToken(AsmToken::Dot); 2065 while (EndDot || (getTok().is(AsmToken::Identifier) && 2066 getTok().getString().startswith("."))) { 2067 getParser().parseIdentifier(Identifier); 2068 if (!EndDot) 2069 Identifier.consume_front("."); 2070 EndDot = Identifier.consume_back("."); 2071 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier, 2072 FieldInfo)) { 2073 SMLoc IDEnd = 2074 SMLoc::getFromPointer(Identifier.data() + Identifier.size()); 2075 return Error(IdentLoc, "Unable to lookup field reference!", 2076 SMRange(IdentLoc, IDEnd)); 2077 } 2078 if (!EndDot) 2079 EndDot = parseOptionalToken(AsmToken::Dot); 2080 } 2081 if (SM.onInteger(FieldInfo.Offset, ErrMsg)) 2082 return Error(IdentLoc, ErrMsg); 2083 break; 2084 } 2085 } 2086 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) { 2087 return Error(Tok.getLoc(), "Unexpected identifier!"); 2088 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type, 2089 false, ErrMsg)) { 2090 return Error(IdentLoc, ErrMsg); 2091 } 2092 break; 2093 } 2094 case AsmToken::Integer: { 2095 // Look for 'b' or 'f' following an Integer as a directional label 2096 SMLoc Loc = getTok().getLoc(); 2097 int64_t IntVal = getTok().getIntVal(); 2098 End = consumeToken(); 2099 UpdateLocLex = false; 2100 if (getLexer().getKind() == AsmToken::Identifier) { 2101 StringRef IDVal = getTok().getString(); 2102 if (IDVal == "f" || IDVal == "b") { 2103 MCSymbol *Sym = 2104 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); 2105 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 2106 const MCExpr *Val = 2107 MCSymbolRefExpr::create(Sym, Variant, getContext()); 2108 if (IDVal == "b" && Sym->isUndefined()) 2109 return Error(Loc, "invalid reference to undefined symbol"); 2110 StringRef Identifier = Sym->getName(); 2111 InlineAsmIdentifierInfo Info; 2112 AsmTypeInfo Type; 2113 if (SM.onIdentifierExpr(Val, Identifier, Info, Type, 2114 isParsingMSInlineAsm(), ErrMsg)) 2115 return Error(Loc, ErrMsg); 2116 End = consumeToken(); 2117 } else { 2118 if (SM.onInteger(IntVal, ErrMsg)) 2119 return Error(Loc, ErrMsg); 2120 } 2121 } else { 2122 if (SM.onInteger(IntVal, ErrMsg)) 2123 return Error(Loc, ErrMsg); 2124 } 2125 break; 2126 } 2127 case AsmToken::Plus: 2128 if (SM.onPlus(ErrMsg)) 2129 return Error(getTok().getLoc(), ErrMsg); 2130 break; 2131 case AsmToken::Minus: 2132 if (SM.onMinus(ErrMsg)) 2133 return Error(getTok().getLoc(), ErrMsg); 2134 break; 2135 case AsmToken::Tilde: SM.onNot(); break; 2136 case AsmToken::Star: SM.onStar(); break; 2137 case AsmToken::Slash: SM.onDivide(); break; 2138 case AsmToken::Percent: SM.onMod(); break; 2139 case AsmToken::Pipe: SM.onOr(); break; 2140 case AsmToken::Caret: SM.onXor(); break; 2141 case AsmToken::Amp: SM.onAnd(); break; 2142 case AsmToken::LessLess: 2143 SM.onLShift(); break; 2144 case AsmToken::GreaterGreater: 2145 SM.onRShift(); break; 2146 case AsmToken::LBrac: 2147 if (SM.onLBrac()) 2148 return Error(Tok.getLoc(), "unexpected bracket encountered"); 2149 tryParseOperandIdx(PrevTK, SM); 2150 break; 2151 case AsmToken::RBrac: 2152 if (SM.onRBrac(ErrMsg)) { 2153 return Error(Tok.getLoc(), ErrMsg); 2154 } 2155 break; 2156 case AsmToken::LParen: SM.onLParen(); break; 2157 case AsmToken::RParen: SM.onRParen(); break; 2158 } 2159 if (SM.hadError()) 2160 return Error(Tok.getLoc(), "unknown token in expression"); 2161 2162 if (!Done && UpdateLocLex) 2163 End = consumeToken(); 2164 2165 PrevTK = TK; 2166 } 2167 return false; 2168 } 2169 2170 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM, 2171 SMLoc Start, SMLoc End) { 2172 SMLoc Loc = Start; 2173 unsigned ExprLen = End.getPointer() - Start.getPointer(); 2174 // Skip everything before a symbol displacement (if we have one) 2175 if (SM.getSym() && !SM.isOffsetOperator()) { 2176 StringRef SymName = SM.getSymName(); 2177 if (unsigned Len = SymName.data() - Start.getPointer()) 2178 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len); 2179 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size()); 2180 ExprLen = End.getPointer() - (SymName.data() + SymName.size()); 2181 // If we have only a symbol than there's no need for complex rewrite, 2182 // simply skip everything after it 2183 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) { 2184 if (ExprLen) 2185 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen); 2186 return; 2187 } 2188 } 2189 // Build an Intel Expression rewrite 2190 StringRef BaseRegStr; 2191 StringRef IndexRegStr; 2192 StringRef OffsetNameStr; 2193 if (SM.getBaseReg()) 2194 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg()); 2195 if (SM.getIndexReg()) 2196 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg()); 2197 if (SM.isOffsetOperator()) 2198 OffsetNameStr = SM.getSymName(); 2199 // Emit it 2200 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr, 2201 SM.getImm(), SM.isMemExpr()); 2202 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr); 2203 } 2204 2205 // Inline assembly may use variable names with namespace alias qualifiers. 2206 bool X86AsmParser::ParseIntelInlineAsmIdentifier( 2207 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info, 2208 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) { 2209 MCAsmParser &Parser = getParser(); 2210 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly."); 2211 Val = nullptr; 2212 2213 StringRef LineBuf(Identifier.data()); 2214 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 2215 2216 const AsmToken &Tok = Parser.getTok(); 2217 SMLoc Loc = Tok.getLoc(); 2218 2219 // Advance the token stream until the end of the current token is 2220 // after the end of what the frontend claimed. 2221 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 2222 do { 2223 End = Tok.getEndLoc(); 2224 getLexer().Lex(); 2225 } while (End.getPointer() < EndPtr); 2226 Identifier = LineBuf; 2227 2228 // The frontend should end parsing on an assembler token boundary, unless it 2229 // failed parsing. 2230 assert((End.getPointer() == EndPtr || 2231 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) && 2232 "frontend claimed part of a token?"); 2233 2234 // If the identifier lookup was unsuccessful, assume that we are dealing with 2235 // a label. 2236 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) { 2237 StringRef InternalName = 2238 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), 2239 Loc, false); 2240 assert(InternalName.size() && "We should have an internal name here."); 2241 // Push a rewrite for replacing the identifier name with the internal name, 2242 // unless we are parsing the operand of an offset operator 2243 if (!IsParsingOffsetOperator) 2244 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), 2245 InternalName); 2246 else 2247 Identifier = InternalName; 2248 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 2249 return false; 2250 // Create the symbol reference. 2251 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); 2252 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 2253 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); 2254 return false; 2255 } 2256 2257 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand 2258 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) { 2259 MCAsmParser &Parser = getParser(); 2260 const AsmToken &Tok = Parser.getTok(); 2261 // Eat "{" and mark the current place. 2262 const SMLoc consumedToken = consumeToken(); 2263 if (Tok.isNot(AsmToken::Identifier)) 2264 return Error(Tok.getLoc(), "Expected an identifier after {"); 2265 if (Tok.getIdentifier().startswith("r")){ 2266 int rndMode = StringSwitch<int>(Tok.getIdentifier()) 2267 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) 2268 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF) 2269 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF) 2270 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO) 2271 .Default(-1); 2272 if (-1 == rndMode) 2273 return Error(Tok.getLoc(), "Invalid rounding mode."); 2274 Parser.Lex(); // Eat "r*" of r*-sae 2275 if (!getLexer().is(AsmToken::Minus)) 2276 return Error(Tok.getLoc(), "Expected - at this point"); 2277 Parser.Lex(); // Eat "-" 2278 Parser.Lex(); // Eat the sae 2279 if (!getLexer().is(AsmToken::RCurly)) 2280 return Error(Tok.getLoc(), "Expected } at this point"); 2281 SMLoc End = Tok.getEndLoc(); 2282 Parser.Lex(); // Eat "}" 2283 const MCExpr *RndModeOp = 2284 MCConstantExpr::create(rndMode, Parser.getContext()); 2285 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End)); 2286 return false; 2287 } 2288 if(Tok.getIdentifier().equals("sae")){ 2289 Parser.Lex(); // Eat the sae 2290 if (!getLexer().is(AsmToken::RCurly)) 2291 return Error(Tok.getLoc(), "Expected } at this point"); 2292 Parser.Lex(); // Eat "}" 2293 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken)); 2294 return false; 2295 } 2296 return Error(Tok.getLoc(), "unknown token in expression"); 2297 } 2298 2299 /// Parse the '.' operator. 2300 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, 2301 SMLoc &End) { 2302 const AsmToken &Tok = getTok(); 2303 AsmFieldInfo Info; 2304 2305 // Drop the optional '.'. 2306 StringRef DotDispStr = Tok.getString(); 2307 if (DotDispStr.startswith(".")) 2308 DotDispStr = DotDispStr.drop_front(1); 2309 StringRef TrailingDot; 2310 2311 // .Imm gets lexed as a real. 2312 if (Tok.is(AsmToken::Real)) { 2313 APInt DotDisp; 2314 if (DotDispStr.getAsInteger(10, DotDisp)) 2315 return Error(Tok.getLoc(), "Unexpected offset"); 2316 Info.Offset = DotDisp.getZExtValue(); 2317 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) && 2318 Tok.is(AsmToken::Identifier)) { 2319 if (DotDispStr.endswith(".")) { 2320 TrailingDot = DotDispStr.substr(DotDispStr.size() - 1); 2321 DotDispStr = DotDispStr.drop_back(1); 2322 } 2323 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 2324 const StringRef Base = BaseMember.first, Member = BaseMember.second; 2325 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) && 2326 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) && 2327 getParser().lookUpField(DotDispStr, Info) && 2328 (!SemaCallback || 2329 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset))) 2330 return Error(Tok.getLoc(), "Unable to lookup field reference!"); 2331 } else { 2332 return Error(Tok.getLoc(), "Unexpected token type!"); 2333 } 2334 2335 // Eat the DotExpression and update End 2336 End = SMLoc::getFromPointer(DotDispStr.data()); 2337 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size(); 2338 while (Tok.getLoc().getPointer() < DotExprEndLoc) 2339 Lex(); 2340 if (!TrailingDot.empty()) 2341 getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot)); 2342 SM.addImm(Info.Offset); 2343 SM.setTypeInfo(Info.Type); 2344 return false; 2345 } 2346 2347 /// Parse the 'offset' operator. 2348 /// This operator is used to specify the location of a given operand 2349 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, 2350 InlineAsmIdentifierInfo &Info, 2351 SMLoc &End) { 2352 // Eat offset, mark start of identifier. 2353 SMLoc Start = Lex().getLoc(); 2354 ID = getTok().getString(); 2355 if (!isParsingMSInlineAsm()) { 2356 if ((getTok().isNot(AsmToken::Identifier) && 2357 getTok().isNot(AsmToken::String)) || 2358 getParser().parsePrimaryExpr(Val, End, nullptr)) 2359 return Error(Start, "unexpected token!"); 2360 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) { 2361 return Error(Start, "unable to lookup expression"); 2362 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) { 2363 return Error(Start, "offset operator cannot yet handle constants"); 2364 } 2365 return false; 2366 } 2367 2368 // Query a candidate string for being an Intel assembly operator 2369 // Report back its kind, or IOK_INVALID if does not evaluated as a known one 2370 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) { 2371 return StringSwitch<unsigned>(Name) 2372 .Cases("TYPE","type",IOK_TYPE) 2373 .Cases("SIZE","size",IOK_SIZE) 2374 .Cases("LENGTH","length",IOK_LENGTH) 2375 .Default(IOK_INVALID); 2376 } 2377 2378 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 2379 /// returns the number of elements in an array. It returns the value 1 for 2380 /// non-array variables. The SIZE operator returns the size of a C or C++ 2381 /// variable. A variable's size is the product of its LENGTH and TYPE. The 2382 /// TYPE operator returns the size of a C or C++ type or variable. If the 2383 /// variable is an array, TYPE returns the size of a single element. 2384 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { 2385 MCAsmParser &Parser = getParser(); 2386 const AsmToken &Tok = Parser.getTok(); 2387 Parser.Lex(); // Eat operator. 2388 2389 const MCExpr *Val = nullptr; 2390 InlineAsmIdentifierInfo Info; 2391 SMLoc Start = Tok.getLoc(), End; 2392 StringRef Identifier = Tok.getString(); 2393 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, 2394 /*IsUnevaluatedOperand=*/true, End)) 2395 return 0; 2396 2397 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 2398 Error(Start, "unable to lookup expression"); 2399 return 0; 2400 } 2401 2402 unsigned CVal = 0; 2403 switch(OpKind) { 2404 default: llvm_unreachable("Unexpected operand kind!"); 2405 case IOK_LENGTH: CVal = Info.Var.Length; break; 2406 case IOK_SIZE: CVal = Info.Var.Size; break; 2407 case IOK_TYPE: CVal = Info.Var.Type; break; 2408 } 2409 2410 return CVal; 2411 } 2412 2413 // Query a candidate string for being an Intel assembly operator 2414 // Report back its kind, or IOK_INVALID if does not evaluated as a known one 2415 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) { 2416 return StringSwitch<unsigned>(Name.lower()) 2417 .Case("type", MOK_TYPE) 2418 .Cases("size", "sizeof", MOK_SIZEOF) 2419 .Cases("length", "lengthof", MOK_LENGTHOF) 2420 .Default(MOK_INVALID); 2421 } 2422 2423 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator 2424 /// returns the number of elements in an array. It returns the value 1 for 2425 /// non-array variables. The SIZEOF operator returns the size of a type or 2426 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE. 2427 /// The TYPE operator returns the size of a variable. If the variable is an 2428 /// array, TYPE returns the size of a single element. 2429 bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) { 2430 MCAsmParser &Parser = getParser(); 2431 SMLoc OpLoc = Parser.getTok().getLoc(); 2432 Parser.Lex(); // Eat operator. 2433 2434 Val = 0; 2435 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) { 2436 // Check for SIZEOF(<type>) and TYPE(<type>). 2437 bool InParens = Parser.getTok().is(AsmToken::LParen); 2438 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok(); 2439 AsmTypeInfo Type; 2440 if (IDTok.is(AsmToken::Identifier) && 2441 !Parser.lookUpType(IDTok.getIdentifier(), Type)) { 2442 Val = Type.Size; 2443 2444 // Eat tokens. 2445 if (InParens) 2446 parseToken(AsmToken::LParen); 2447 parseToken(AsmToken::Identifier); 2448 if (InParens) 2449 parseToken(AsmToken::RParen); 2450 } 2451 } 2452 2453 if (!Val) { 2454 IntelExprStateMachine SM; 2455 SMLoc End, Start = Parser.getTok().getLoc(); 2456 if (ParseIntelExpression(SM, End)) 2457 return true; 2458 2459 switch (OpKind) { 2460 default: 2461 llvm_unreachable("Unexpected operand kind!"); 2462 case MOK_SIZEOF: 2463 Val = SM.getSize(); 2464 break; 2465 case MOK_LENGTHOF: 2466 Val = SM.getLength(); 2467 break; 2468 case MOK_TYPE: 2469 Val = SM.getElementSize(); 2470 break; 2471 } 2472 2473 if (!Val) 2474 return Error(OpLoc, "expression has unknown type", SMRange(Start, End)); 2475 } 2476 2477 return false; 2478 } 2479 2480 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { 2481 Size = StringSwitch<unsigned>(getTok().getString()) 2482 .Cases("BYTE", "byte", 8) 2483 .Cases("WORD", "word", 16) 2484 .Cases("DWORD", "dword", 32) 2485 .Cases("FLOAT", "float", 32) 2486 .Cases("LONG", "long", 32) 2487 .Cases("FWORD", "fword", 48) 2488 .Cases("DOUBLE", "double", 64) 2489 .Cases("QWORD", "qword", 64) 2490 .Cases("MMWORD","mmword", 64) 2491 .Cases("XWORD", "xword", 80) 2492 .Cases("TBYTE", "tbyte", 80) 2493 .Cases("XMMWORD", "xmmword", 128) 2494 .Cases("YMMWORD", "ymmword", 256) 2495 .Cases("ZMMWORD", "zmmword", 512) 2496 .Default(0); 2497 if (Size) { 2498 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word). 2499 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr"))) 2500 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); 2501 Lex(); // Eat ptr. 2502 } 2503 return false; 2504 } 2505 2506 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) { 2507 MCAsmParser &Parser = getParser(); 2508 const AsmToken &Tok = Parser.getTok(); 2509 SMLoc Start, End; 2510 2511 // Parse optional Size directive. 2512 unsigned Size; 2513 if (ParseIntelMemoryOperandSize(Size)) 2514 return true; 2515 bool PtrInOperand = bool(Size); 2516 2517 Start = Tok.getLoc(); 2518 2519 // Rounding mode operand. 2520 if (getLexer().is(AsmToken::LCurly)) 2521 return ParseRoundingModeOp(Start, Operands); 2522 2523 // Register operand. 2524 MCRegister RegNo; 2525 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) { 2526 if (RegNo == X86::RIP) 2527 return Error(Start, "rip can only be used as a base register"); 2528 // A Register followed by ':' is considered a segment override 2529 if (Tok.isNot(AsmToken::Colon)) { 2530 if (PtrInOperand) 2531 return Error(Start, "expected memory operand after 'ptr', " 2532 "found register operand instead"); 2533 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End)); 2534 return false; 2535 } 2536 // An alleged segment override. check if we have a valid segment register 2537 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo)) 2538 return Error(Start, "invalid segment register"); 2539 // Eat ':' and update Start location 2540 Start = Lex().getLoc(); 2541 } 2542 2543 // Immediates and Memory 2544 IntelExprStateMachine SM; 2545 if (ParseIntelExpression(SM, End)) 2546 return true; 2547 2548 if (isParsingMSInlineAsm()) 2549 RewriteIntelExpression(SM, Start, Tok.getLoc()); 2550 2551 int64_t Imm = SM.getImm(); 2552 const MCExpr *Disp = SM.getSym(); 2553 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext()); 2554 if (Disp && Imm) 2555 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext()); 2556 if (!Disp) 2557 Disp = ImmDisp; 2558 2559 // RegNo != 0 specifies a valid segment register, 2560 // and we are parsing a segment override 2561 if (!SM.isMemExpr() && !RegNo) { 2562 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) { 2563 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 2564 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 2565 // Disp includes the address of a variable; make sure this is recorded 2566 // for later handling. 2567 Operands.push_back(X86Operand::CreateImm(Disp, Start, End, 2568 SM.getSymName(), Info.Var.Decl, 2569 Info.Var.IsGlobalLV)); 2570 return false; 2571 } 2572 } 2573 2574 Operands.push_back(X86Operand::CreateImm(Disp, Start, End)); 2575 return false; 2576 } 2577 2578 StringRef ErrMsg; 2579 unsigned BaseReg = SM.getBaseReg(); 2580 unsigned IndexReg = SM.getIndexReg(); 2581 if (IndexReg && BaseReg == X86::RIP) 2582 BaseReg = 0; 2583 unsigned Scale = SM.getScale(); 2584 if (!PtrInOperand) 2585 Size = SM.getElementSize() << 3; 2586 2587 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP && 2588 (IndexReg == X86::ESP || IndexReg == X86::RSP)) 2589 std::swap(BaseReg, IndexReg); 2590 2591 // If BaseReg is a vector register and IndexReg is not, swap them unless 2592 // Scale was specified in which case it would be an error. 2593 if (Scale == 0 && 2594 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 2595 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 2596 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) && 2597 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) || 2598 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) || 2599 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg))) 2600 std::swap(BaseReg, IndexReg); 2601 2602 if (Scale != 0 && 2603 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) 2604 return Error(Start, "16-bit addresses cannot have a scale"); 2605 2606 // If there was no explicit scale specified, change it to 1. 2607 if (Scale == 0) 2608 Scale = 1; 2609 2610 // If this is a 16-bit addressing mode with the base and index in the wrong 2611 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is 2612 // shared with att syntax where order matters. 2613 if ((BaseReg == X86::SI || BaseReg == X86::DI) && 2614 (IndexReg == X86::BX || IndexReg == X86::BP)) 2615 std::swap(BaseReg, IndexReg); 2616 2617 if ((BaseReg || IndexReg) && 2618 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 2619 ErrMsg)) 2620 return Error(Start, ErrMsg); 2621 if (isParsingMSInlineAsm()) 2622 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start, 2623 End, Size, SM.getSymName(), 2624 SM.getIdentifierInfo(), Operands); 2625 2626 // When parsing x64 MS-style assembly, all non-absolute references to a named 2627 // variable default to RIP-relative. 2628 unsigned DefaultBaseReg = X86::NoRegister; 2629 bool MaybeDirectBranchDest = true; 2630 2631 bool IsUnconditionalBranch = 2632 Name.equals_insensitive("jmp") || Name.equals_insensitive("call"); 2633 if (Parser.isParsingMasm()) { 2634 if (is64BitMode() && SM.getElementSize() > 0) { 2635 DefaultBaseReg = X86::RIP; 2636 } 2637 if (IsUnconditionalBranch) { 2638 if (PtrInOperand) { 2639 MaybeDirectBranchDest = false; 2640 if (is64BitMode()) 2641 DefaultBaseReg = X86::RIP; 2642 } else if (!BaseReg && !IndexReg && Disp && 2643 Disp->getKind() == MCExpr::SymbolRef) { 2644 if (is64BitMode()) { 2645 if (SM.getSize() == 8) { 2646 MaybeDirectBranchDest = false; 2647 DefaultBaseReg = X86::RIP; 2648 } 2649 } else { 2650 if (SM.getSize() == 4 || SM.getSize() == 2) 2651 MaybeDirectBranchDest = false; 2652 } 2653 } 2654 } 2655 } else if (IsUnconditionalBranch) { 2656 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error. 2657 if (!PtrInOperand && SM.isOffsetOperator()) 2658 return Error( 2659 Start, "`OFFSET` operator cannot be used in an unconditional branch"); 2660 if (PtrInOperand || SM.isBracketUsed()) 2661 MaybeDirectBranchDest = false; 2662 } 2663 2664 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister)) 2665 Operands.push_back(X86Operand::CreateMem( 2666 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End, 2667 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr, 2668 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest)); 2669 else 2670 Operands.push_back(X86Operand::CreateMem( 2671 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(), 2672 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false, 2673 MaybeDirectBranchDest)); 2674 return false; 2675 } 2676 2677 bool X86AsmParser::parseATTOperand(OperandVector &Operands) { 2678 MCAsmParser &Parser = getParser(); 2679 switch (getLexer().getKind()) { 2680 case AsmToken::Dollar: { 2681 // $42 or $ID -> immediate. 2682 SMLoc Start = Parser.getTok().getLoc(), End; 2683 Parser.Lex(); 2684 const MCExpr *Val; 2685 // This is an immediate, so we should not parse a register. Do a precheck 2686 // for '%' to supercede intra-register parse errors. 2687 SMLoc L = Parser.getTok().getLoc(); 2688 if (check(getLexer().is(AsmToken::Percent), L, 2689 "expected immediate expression") || 2690 getParser().parseExpression(Val, End) || 2691 check(isa<X86MCExpr>(Val), L, "expected immediate expression")) 2692 return true; 2693 Operands.push_back(X86Operand::CreateImm(Val, Start, End)); 2694 return false; 2695 } 2696 case AsmToken::LCurly: { 2697 SMLoc Start = Parser.getTok().getLoc(); 2698 return ParseRoundingModeOp(Start, Operands); 2699 } 2700 default: { 2701 // This a memory operand or a register. We have some parsing complications 2702 // as a '(' may be part of an immediate expression or the addressing mode 2703 // block. This is complicated by the fact that an assembler-level variable 2704 // may refer either to a register or an immediate expression. 2705 2706 SMLoc Loc = Parser.getTok().getLoc(), EndLoc; 2707 const MCExpr *Expr = nullptr; 2708 unsigned Reg = 0; 2709 if (getLexer().isNot(AsmToken::LParen)) { 2710 // No '(' so this is either a displacement expression or a register. 2711 if (Parser.parseExpression(Expr, EndLoc)) 2712 return true; 2713 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) { 2714 // Segment Register. Reset Expr and copy value to register. 2715 Expr = nullptr; 2716 Reg = RE->getRegNo(); 2717 2718 // Check the register. 2719 if (Reg == X86::EIZ || Reg == X86::RIZ) 2720 return Error( 2721 Loc, "%eiz and %riz can only be used as index registers", 2722 SMRange(Loc, EndLoc)); 2723 if (Reg == X86::RIP) 2724 return Error(Loc, "%rip can only be used as a base register", 2725 SMRange(Loc, EndLoc)); 2726 // Return register that are not segment prefixes immediately. 2727 if (!Parser.parseOptionalToken(AsmToken::Colon)) { 2728 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc)); 2729 return false; 2730 } 2731 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg)) 2732 return Error(Loc, "invalid segment register"); 2733 // Accept a '*' absolute memory reference after the segment. Place it 2734 // before the full memory operand. 2735 if (getLexer().is(AsmToken::Star)) 2736 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 2737 } 2738 } 2739 // This is a Memory operand. 2740 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands); 2741 } 2742 } 2743 } 2744 2745 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic, 2746 // otherwise the EFLAGS Condition Code enumerator. 2747 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) { 2748 return StringSwitch<X86::CondCode>(CC) 2749 .Case("o", X86::COND_O) // Overflow 2750 .Case("no", X86::COND_NO) // No Overflow 2751 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal 2752 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below 2753 .Cases("e", "z", X86::COND_E) // Equal/Zero 2754 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero 2755 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above 2756 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal 2757 .Case("s", X86::COND_S) // Sign 2758 .Case("ns", X86::COND_NS) // No Sign 2759 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even 2760 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd 2761 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal 2762 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less 2763 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater 2764 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal 2765 .Default(X86::COND_INVALID); 2766 } 2767 2768 // true on failure, false otherwise 2769 // If no {z} mark was found - Parser doesn't advance 2770 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z, 2771 const SMLoc &StartLoc) { 2772 MCAsmParser &Parser = getParser(); 2773 // Assuming we are just pass the '{' mark, quering the next token 2774 // Searched for {z}, but none was found. Return false, as no parsing error was 2775 // encountered 2776 if (!(getLexer().is(AsmToken::Identifier) && 2777 (getLexer().getTok().getIdentifier() == "z"))) 2778 return false; 2779 Parser.Lex(); // Eat z 2780 // Query and eat the '}' mark 2781 if (!getLexer().is(AsmToken::RCurly)) 2782 return Error(getLexer().getLoc(), "Expected } at this point"); 2783 Parser.Lex(); // Eat '}' 2784 // Assign Z with the {z} mark operand 2785 Z = X86Operand::CreateToken("{z}", StartLoc); 2786 return false; 2787 } 2788 2789 // true on failure, false otherwise 2790 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) { 2791 MCAsmParser &Parser = getParser(); 2792 if (getLexer().is(AsmToken::LCurly)) { 2793 // Eat "{" and mark the current place. 2794 const SMLoc consumedToken = consumeToken(); 2795 // Distinguish {1to<NUM>} from {%k<NUM>}. 2796 if(getLexer().is(AsmToken::Integer)) { 2797 // Parse memory broadcasting ({1to<NUM>}). 2798 if (getLexer().getTok().getIntVal() != 1) 2799 return TokError("Expected 1to<NUM> at this point"); 2800 StringRef Prefix = getLexer().getTok().getString(); 2801 Parser.Lex(); // Eat first token of 1to8 2802 if (!getLexer().is(AsmToken::Identifier)) 2803 return TokError("Expected 1to<NUM> at this point"); 2804 // Recognize only reasonable suffixes. 2805 SmallVector<char, 5> BroadcastVector; 2806 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier()) 2807 .toStringRef(BroadcastVector); 2808 if (!BroadcastString.startswith("1to")) 2809 return TokError("Expected 1to<NUM> at this point"); 2810 const char *BroadcastPrimitive = 2811 StringSwitch<const char *>(BroadcastString) 2812 .Case("1to2", "{1to2}") 2813 .Case("1to4", "{1to4}") 2814 .Case("1to8", "{1to8}") 2815 .Case("1to16", "{1to16}") 2816 .Case("1to32", "{1to32}") 2817 .Default(nullptr); 2818 if (!BroadcastPrimitive) 2819 return TokError("Invalid memory broadcast primitive."); 2820 Parser.Lex(); // Eat trailing token of 1toN 2821 if (!getLexer().is(AsmToken::RCurly)) 2822 return TokError("Expected } at this point"); 2823 Parser.Lex(); // Eat "}" 2824 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, 2825 consumedToken)); 2826 // No AVX512 specific primitives can pass 2827 // after memory broadcasting, so return. 2828 return false; 2829 } else { 2830 // Parse either {k}{z}, {z}{k}, {k} or {z} 2831 // last one have no meaning, but GCC accepts it 2832 // Currently, we're just pass a '{' mark 2833 std::unique_ptr<X86Operand> Z; 2834 if (ParseZ(Z, consumedToken)) 2835 return true; 2836 // Reaching here means that parsing of the allegadly '{z}' mark yielded 2837 // no errors. 2838 // Query for the need of further parsing for a {%k<NUM>} mark 2839 if (!Z || getLexer().is(AsmToken::LCurly)) { 2840 SMLoc StartLoc = Z ? consumeToken() : consumedToken; 2841 // Parse an op-mask register mark ({%k<NUM>}), which is now to be 2842 // expected 2843 MCRegister RegNo; 2844 SMLoc RegLoc; 2845 if (!parseRegister(RegNo, RegLoc, StartLoc) && 2846 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) { 2847 if (RegNo == X86::K0) 2848 return Error(RegLoc, "Register k0 can't be used as write mask"); 2849 if (!getLexer().is(AsmToken::RCurly)) 2850 return Error(getLexer().getLoc(), "Expected } at this point"); 2851 Operands.push_back(X86Operand::CreateToken("{", StartLoc)); 2852 Operands.push_back( 2853 X86Operand::CreateReg(RegNo, StartLoc, StartLoc)); 2854 Operands.push_back(X86Operand::CreateToken("}", consumeToken())); 2855 } else 2856 return Error(getLexer().getLoc(), 2857 "Expected an op-mask register at this point"); 2858 // {%k<NUM>} mark is found, inquire for {z} 2859 if (getLexer().is(AsmToken::LCurly) && !Z) { 2860 // Have we've found a parsing error, or found no (expected) {z} mark 2861 // - report an error 2862 if (ParseZ(Z, consumeToken()) || !Z) 2863 return Error(getLexer().getLoc(), 2864 "Expected a {z} mark at this point"); 2865 2866 } 2867 // '{z}' on its own is meaningless, hence should be ignored. 2868 // on the contrary - have it been accompanied by a K register, 2869 // allow it. 2870 if (Z) 2871 Operands.push_back(std::move(Z)); 2872 } 2873 } 2874 } 2875 return false; 2876 } 2877 2878 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix 2879 /// has already been parsed if present. disp may be provided as well. 2880 bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp, 2881 SMLoc StartLoc, SMLoc EndLoc, 2882 OperandVector &Operands) { 2883 MCAsmParser &Parser = getParser(); 2884 SMLoc Loc; 2885 // Based on the initial passed values, we may be in any of these cases, we are 2886 // in one of these cases (with current position (*)): 2887 2888 // 1. seg : * disp (base-index-scale-expr) 2889 // 2. seg : *(disp) (base-index-scale-expr) 2890 // 3. seg : *(base-index-scale-expr) 2891 // 4. disp *(base-index-scale-expr) 2892 // 5. *(disp) (base-index-scale-expr) 2893 // 6. *(base-index-scale-expr) 2894 // 7. disp * 2895 // 8. *(disp) 2896 2897 // If we do not have an displacement yet, check if we're in cases 4 or 6 by 2898 // checking if the first object after the parenthesis is a register (or an 2899 // identifier referring to a register) and parse the displacement or default 2900 // to 0 as appropriate. 2901 auto isAtMemOperand = [this]() { 2902 if (this->getLexer().isNot(AsmToken::LParen)) 2903 return false; 2904 AsmToken Buf[2]; 2905 StringRef Id; 2906 auto TokCount = this->getLexer().peekTokens(Buf, true); 2907 if (TokCount == 0) 2908 return false; 2909 switch (Buf[0].getKind()) { 2910 case AsmToken::Percent: 2911 case AsmToken::Comma: 2912 return true; 2913 // These lower cases are doing a peekIdentifier. 2914 case AsmToken::At: 2915 case AsmToken::Dollar: 2916 if ((TokCount > 1) && 2917 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) && 2918 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer())) 2919 Id = StringRef(Buf[0].getLoc().getPointer(), 2920 Buf[1].getIdentifier().size() + 1); 2921 break; 2922 case AsmToken::Identifier: 2923 case AsmToken::String: 2924 Id = Buf[0].getIdentifier(); 2925 break; 2926 default: 2927 return false; 2928 } 2929 // We have an ID. Check if it is bound to a register. 2930 if (!Id.empty()) { 2931 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id); 2932 if (Sym->isVariable()) { 2933 auto V = Sym->getVariableValue(/*SetUsed*/ false); 2934 return isa<X86MCExpr>(V); 2935 } 2936 } 2937 return false; 2938 }; 2939 2940 if (!Disp) { 2941 // Parse immediate if we're not at a mem operand yet. 2942 if (!isAtMemOperand()) { 2943 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc)) 2944 return true; 2945 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here."); 2946 } else { 2947 // Disp is implicitly zero if we haven't parsed it yet. 2948 Disp = MCConstantExpr::create(0, Parser.getContext()); 2949 } 2950 } 2951 2952 // We are now either at the end of the operand or at the '(' at the start of a 2953 // base-index-scale-expr. 2954 2955 if (!parseOptionalToken(AsmToken::LParen)) { 2956 if (SegReg == 0) 2957 Operands.push_back( 2958 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc)); 2959 else 2960 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 2961 0, 0, 1, StartLoc, EndLoc)); 2962 return false; 2963 } 2964 2965 // If we reached here, then eat the '(' and Process 2966 // the rest of the memory operand. 2967 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 2968 SMLoc BaseLoc = getLexer().getLoc(); 2969 const MCExpr *E; 2970 StringRef ErrMsg; 2971 2972 // Parse BaseReg if one is provided. 2973 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) { 2974 if (Parser.parseExpression(E, EndLoc) || 2975 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here")) 2976 return true; 2977 2978 // Check the register. 2979 BaseReg = cast<X86MCExpr>(E)->getRegNo(); 2980 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) 2981 return Error(BaseLoc, "eiz and riz can only be used as index registers", 2982 SMRange(BaseLoc, EndLoc)); 2983 } 2984 2985 if (parseOptionalToken(AsmToken::Comma)) { 2986 // Following the comma we should have either an index register, or a scale 2987 // value. We don't support the later form, but we want to parse it 2988 // correctly. 2989 // 2990 // Even though it would be completely consistent to support syntax like 2991 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 2992 if (getLexer().isNot(AsmToken::RParen)) { 2993 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc)) 2994 return true; 2995 2996 if (!isa<X86MCExpr>(E)) { 2997 // We've parsed an unexpected Scale Value instead of an index 2998 // register. Interpret it as an absolute. 2999 int64_t ScaleVal; 3000 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr())) 3001 return Error(Loc, "expected absolute expression"); 3002 if (ScaleVal != 1) 3003 Warning(Loc, "scale factor without index register is ignored"); 3004 Scale = 1; 3005 } else { // IndexReg Found. 3006 IndexReg = cast<X86MCExpr>(E)->getRegNo(); 3007 3008 if (BaseReg == X86::RIP) 3009 return Error(Loc, 3010 "%rip as base register can not have an index register"); 3011 if (IndexReg == X86::RIP) 3012 return Error(Loc, "%rip is not allowed as an index register"); 3013 3014 if (parseOptionalToken(AsmToken::Comma)) { 3015 // Parse the scale amount: 3016 // ::= ',' [scale-expression] 3017 3018 // A scale amount without an index is ignored. 3019 if (getLexer().isNot(AsmToken::RParen)) { 3020 int64_t ScaleVal; 3021 if (Parser.parseTokenLoc(Loc) || 3022 Parser.parseAbsoluteExpression(ScaleVal)) 3023 return Error(Loc, "expected scale expression"); 3024 Scale = (unsigned)ScaleVal; 3025 // Validate the scale amount. 3026 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 3027 Scale != 1) 3028 return Error(Loc, "scale factor in 16-bit address must be 1"); 3029 if (checkScale(Scale, ErrMsg)) 3030 return Error(Loc, ErrMsg); 3031 } 3032 } 3033 } 3034 } 3035 } 3036 3037 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 3038 if (parseToken(AsmToken::RParen, "unexpected token in memory operand")) 3039 return true; 3040 3041 // This is to support otherwise illegal operand (%dx) found in various 3042 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now 3043 // be supported. Mark such DX variants separately fix only in special cases. 3044 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 && 3045 isa<MCConstantExpr>(Disp) && 3046 cast<MCConstantExpr>(Disp)->getValue() == 0) { 3047 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc)); 3048 return false; 3049 } 3050 3051 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 3052 ErrMsg)) 3053 return Error(BaseLoc, ErrMsg); 3054 3055 if (SegReg || BaseReg || IndexReg) 3056 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 3057 BaseReg, IndexReg, Scale, StartLoc, 3058 EndLoc)); 3059 else 3060 Operands.push_back( 3061 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc)); 3062 return false; 3063 } 3064 3065 // Parse either a standard primary expression or a register. 3066 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 3067 MCAsmParser &Parser = getParser(); 3068 // See if this is a register first. 3069 if (getTok().is(AsmToken::Percent) || 3070 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) && 3071 MatchRegisterName(Parser.getTok().getString()))) { 3072 SMLoc StartLoc = Parser.getTok().getLoc(); 3073 MCRegister RegNo; 3074 if (parseRegister(RegNo, StartLoc, EndLoc)) 3075 return true; 3076 Res = X86MCExpr::create(RegNo, Parser.getContext()); 3077 return false; 3078 } 3079 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr); 3080 } 3081 3082 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 3083 SMLoc NameLoc, OperandVector &Operands) { 3084 MCAsmParser &Parser = getParser(); 3085 InstInfo = &Info; 3086 3087 // Reset the forced VEX encoding. 3088 ForcedVEXEncoding = VEXEncoding_Default; 3089 ForcedDispEncoding = DispEncoding_Default; 3090 3091 // Parse pseudo prefixes. 3092 while (true) { 3093 if (Name == "{") { 3094 if (getLexer().isNot(AsmToken::Identifier)) 3095 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'"); 3096 std::string Prefix = Parser.getTok().getString().lower(); 3097 Parser.Lex(); // Eat identifier. 3098 if (getLexer().isNot(AsmToken::RCurly)) 3099 return Error(Parser.getTok().getLoc(), "Expected '}'"); 3100 Parser.Lex(); // Eat curly. 3101 3102 if (Prefix == "vex") 3103 ForcedVEXEncoding = VEXEncoding_VEX; 3104 else if (Prefix == "vex2") 3105 ForcedVEXEncoding = VEXEncoding_VEX2; 3106 else if (Prefix == "vex3") 3107 ForcedVEXEncoding = VEXEncoding_VEX3; 3108 else if (Prefix == "evex") 3109 ForcedVEXEncoding = VEXEncoding_EVEX; 3110 else if (Prefix == "disp8") 3111 ForcedDispEncoding = DispEncoding_Disp8; 3112 else if (Prefix == "disp32") 3113 ForcedDispEncoding = DispEncoding_Disp32; 3114 else 3115 return Error(NameLoc, "unknown prefix"); 3116 3117 NameLoc = Parser.getTok().getLoc(); 3118 if (getLexer().is(AsmToken::LCurly)) { 3119 Parser.Lex(); 3120 Name = "{"; 3121 } else { 3122 if (getLexer().isNot(AsmToken::Identifier)) 3123 return Error(Parser.getTok().getLoc(), "Expected identifier"); 3124 // FIXME: The mnemonic won't match correctly if its not in lower case. 3125 Name = Parser.getTok().getString(); 3126 Parser.Lex(); 3127 } 3128 continue; 3129 } 3130 // Parse MASM style pseudo prefixes. 3131 if (isParsingMSInlineAsm()) { 3132 if (Name.equals_insensitive("vex")) 3133 ForcedVEXEncoding = VEXEncoding_VEX; 3134 else if (Name.equals_insensitive("vex2")) 3135 ForcedVEXEncoding = VEXEncoding_VEX2; 3136 else if (Name.equals_insensitive("vex3")) 3137 ForcedVEXEncoding = VEXEncoding_VEX3; 3138 else if (Name.equals_insensitive("evex")) 3139 ForcedVEXEncoding = VEXEncoding_EVEX; 3140 3141 if (ForcedVEXEncoding != VEXEncoding_Default) { 3142 if (getLexer().isNot(AsmToken::Identifier)) 3143 return Error(Parser.getTok().getLoc(), "Expected identifier"); 3144 // FIXME: The mnemonic won't match correctly if its not in lower case. 3145 Name = Parser.getTok().getString(); 3146 NameLoc = Parser.getTok().getLoc(); 3147 Parser.Lex(); 3148 } 3149 } 3150 break; 3151 } 3152 3153 // Support the suffix syntax for overriding displacement size as well. 3154 if (Name.consume_back(".d32")) { 3155 ForcedDispEncoding = DispEncoding_Disp32; 3156 } else if (Name.consume_back(".d8")) { 3157 ForcedDispEncoding = DispEncoding_Disp8; 3158 } 3159 3160 StringRef PatchedName = Name; 3161 3162 // Hack to skip "short" following Jcc. 3163 if (isParsingIntelSyntax() && 3164 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" || 3165 PatchedName == "jcxz" || PatchedName == "jecxz" || 3166 (PatchedName.startswith("j") && 3167 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) { 3168 StringRef NextTok = Parser.getTok().getString(); 3169 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short") 3170 : NextTok == "short") { 3171 SMLoc NameEndLoc = 3172 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size()); 3173 // Eat the short keyword. 3174 Parser.Lex(); 3175 // MS and GAS ignore the short keyword; they both determine the jmp type 3176 // based on the distance of the label. (NASM does emit different code with 3177 // and without "short," though.) 3178 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc, 3179 NextTok.size() + 1); 3180 } 3181 } 3182 3183 // FIXME: Hack to recognize setneb as setne. 3184 if (PatchedName.startswith("set") && PatchedName.endswith("b") && 3185 PatchedName != "setb" && PatchedName != "setnb") 3186 PatchedName = PatchedName.substr(0, Name.size()-1); 3187 3188 unsigned ComparisonPredicate = ~0U; 3189 3190 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}. 3191 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && 3192 (PatchedName.endswith("ss") || PatchedName.endswith("sd") || 3193 PatchedName.endswith("sh") || PatchedName.endswith("ph") || 3194 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { 3195 bool IsVCMP = PatchedName[0] == 'v'; 3196 unsigned CCIdx = IsVCMP ? 4 : 3; 3197 unsigned CC = StringSwitch<unsigned>( 3198 PatchedName.slice(CCIdx, PatchedName.size() - 2)) 3199 .Case("eq", 0x00) 3200 .Case("eq_oq", 0x00) 3201 .Case("lt", 0x01) 3202 .Case("lt_os", 0x01) 3203 .Case("le", 0x02) 3204 .Case("le_os", 0x02) 3205 .Case("unord", 0x03) 3206 .Case("unord_q", 0x03) 3207 .Case("neq", 0x04) 3208 .Case("neq_uq", 0x04) 3209 .Case("nlt", 0x05) 3210 .Case("nlt_us", 0x05) 3211 .Case("nle", 0x06) 3212 .Case("nle_us", 0x06) 3213 .Case("ord", 0x07) 3214 .Case("ord_q", 0x07) 3215 /* AVX only from here */ 3216 .Case("eq_uq", 0x08) 3217 .Case("nge", 0x09) 3218 .Case("nge_us", 0x09) 3219 .Case("ngt", 0x0A) 3220 .Case("ngt_us", 0x0A) 3221 .Case("false", 0x0B) 3222 .Case("false_oq", 0x0B) 3223 .Case("neq_oq", 0x0C) 3224 .Case("ge", 0x0D) 3225 .Case("ge_os", 0x0D) 3226 .Case("gt", 0x0E) 3227 .Case("gt_os", 0x0E) 3228 .Case("true", 0x0F) 3229 .Case("true_uq", 0x0F) 3230 .Case("eq_os", 0x10) 3231 .Case("lt_oq", 0x11) 3232 .Case("le_oq", 0x12) 3233 .Case("unord_s", 0x13) 3234 .Case("neq_us", 0x14) 3235 .Case("nlt_uq", 0x15) 3236 .Case("nle_uq", 0x16) 3237 .Case("ord_s", 0x17) 3238 .Case("eq_us", 0x18) 3239 .Case("nge_uq", 0x19) 3240 .Case("ngt_uq", 0x1A) 3241 .Case("false_os", 0x1B) 3242 .Case("neq_os", 0x1C) 3243 .Case("ge_oq", 0x1D) 3244 .Case("gt_oq", 0x1E) 3245 .Case("true_us", 0x1F) 3246 .Default(~0U); 3247 if (CC != ~0U && (IsVCMP || CC < 8) && 3248 (IsVCMP || PatchedName.back() != 'h')) { 3249 if (PatchedName.endswith("ss")) 3250 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 3251 else if (PatchedName.endswith("sd")) 3252 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 3253 else if (PatchedName.endswith("ps")) 3254 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 3255 else if (PatchedName.endswith("pd")) 3256 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 3257 else if (PatchedName.endswith("sh")) 3258 PatchedName = "vcmpsh"; 3259 else if (PatchedName.endswith("ph")) 3260 PatchedName = "vcmpph"; 3261 else 3262 llvm_unreachable("Unexpected suffix!"); 3263 3264 ComparisonPredicate = CC; 3265 } 3266 } 3267 3268 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}. 3269 if (PatchedName.startswith("vpcmp") && 3270 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 3271 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 3272 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 3273 unsigned CC = StringSwitch<unsigned>( 3274 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 3275 .Case("eq", 0x0) // Only allowed on unsigned. Checked below. 3276 .Case("lt", 0x1) 3277 .Case("le", 0x2) 3278 //.Case("false", 0x3) // Not a documented alias. 3279 .Case("neq", 0x4) 3280 .Case("nlt", 0x5) 3281 .Case("nle", 0x6) 3282 //.Case("true", 0x7) // Not a documented alias. 3283 .Default(~0U); 3284 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) { 3285 switch (PatchedName.back()) { 3286 default: llvm_unreachable("Unexpected character!"); 3287 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break; 3288 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break; 3289 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break; 3290 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break; 3291 } 3292 // Set up the immediate to push into the operands later. 3293 ComparisonPredicate = CC; 3294 } 3295 } 3296 3297 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}. 3298 if (PatchedName.startswith("vpcom") && 3299 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 3300 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 3301 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 3302 unsigned CC = StringSwitch<unsigned>( 3303 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 3304 .Case("lt", 0x0) 3305 .Case("le", 0x1) 3306 .Case("gt", 0x2) 3307 .Case("ge", 0x3) 3308 .Case("eq", 0x4) 3309 .Case("neq", 0x5) 3310 .Case("false", 0x6) 3311 .Case("true", 0x7) 3312 .Default(~0U); 3313 if (CC != ~0U) { 3314 switch (PatchedName.back()) { 3315 default: llvm_unreachable("Unexpected character!"); 3316 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break; 3317 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break; 3318 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break; 3319 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break; 3320 } 3321 // Set up the immediate to push into the operands later. 3322 ComparisonPredicate = CC; 3323 } 3324 } 3325 3326 3327 // Determine whether this is an instruction prefix. 3328 // FIXME: 3329 // Enhance prefixes integrity robustness. for example, following forms 3330 // are currently tolerated: 3331 // repz repnz <insn> ; GAS errors for the use of two similar prefixes 3332 // lock addq %rax, %rbx ; Destination operand must be of memory type 3333 // xacquire <insn> ; xacquire must be accompanied by 'lock' 3334 bool IsPrefix = 3335 StringSwitch<bool>(Name) 3336 .Cases("cs", "ds", "es", "fs", "gs", "ss", true) 3337 .Cases("rex64", "data32", "data16", "addr32", "addr16", true) 3338 .Cases("xacquire", "xrelease", true) 3339 .Cases("acquire", "release", isParsingIntelSyntax()) 3340 .Default(false); 3341 3342 auto isLockRepeatNtPrefix = [](StringRef N) { 3343 return StringSwitch<bool>(N) 3344 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true) 3345 .Default(false); 3346 }; 3347 3348 bool CurlyAsEndOfStatement = false; 3349 3350 unsigned Flags = X86::IP_NO_PREFIX; 3351 while (isLockRepeatNtPrefix(Name.lower())) { 3352 unsigned Prefix = 3353 StringSwitch<unsigned>(Name) 3354 .Cases("lock", "lock", X86::IP_HAS_LOCK) 3355 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT) 3356 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) 3357 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK) 3358 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) 3359 Flags |= Prefix; 3360 if (getLexer().is(AsmToken::EndOfStatement)) { 3361 // We don't have real instr with the given prefix 3362 // let's use the prefix as the instr. 3363 // TODO: there could be several prefixes one after another 3364 Flags = X86::IP_NO_PREFIX; 3365 break; 3366 } 3367 // FIXME: The mnemonic won't match correctly if its not in lower case. 3368 Name = Parser.getTok().getString(); 3369 Parser.Lex(); // eat the prefix 3370 // Hack: we could have something like "rep # some comment" or 3371 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" 3372 while (Name.startswith(";") || Name.startswith("\n") || 3373 Name.startswith("#") || Name.startswith("\t") || 3374 Name.startswith("/")) { 3375 // FIXME: The mnemonic won't match correctly if its not in lower case. 3376 Name = Parser.getTok().getString(); 3377 Parser.Lex(); // go to next prefix or instr 3378 } 3379 } 3380 3381 if (Flags) 3382 PatchedName = Name; 3383 3384 // Hacks to handle 'data16' and 'data32' 3385 if (PatchedName == "data16" && is16BitMode()) { 3386 return Error(NameLoc, "redundant data16 prefix"); 3387 } 3388 if (PatchedName == "data32") { 3389 if (is32BitMode()) 3390 return Error(NameLoc, "redundant data32 prefix"); 3391 if (is64BitMode()) 3392 return Error(NameLoc, "'data32' is not supported in 64-bit mode"); 3393 // Hack to 'data16' for the table lookup. 3394 PatchedName = "data16"; 3395 3396 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3397 StringRef Next = Parser.getTok().getString(); 3398 getLexer().Lex(); 3399 // data32 effectively changes the instruction suffix. 3400 // TODO Generalize. 3401 if (Next == "callw") 3402 Next = "calll"; 3403 if (Next == "ljmpw") 3404 Next = "ljmpl"; 3405 3406 Name = Next; 3407 PatchedName = Name; 3408 ForcedDataPrefix = X86::Is32Bit; 3409 IsPrefix = false; 3410 } 3411 } 3412 3413 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 3414 3415 // Push the immediate if we extracted one from the mnemonic. 3416 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) { 3417 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 3418 getParser().getContext()); 3419 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 3420 } 3421 3422 // This does the actual operand parsing. Don't parse any more if we have a 3423 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 3424 // just want to parse the "lock" as the first instruction and the "incl" as 3425 // the next one. 3426 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) { 3427 // Parse '*' modifier. 3428 if (getLexer().is(AsmToken::Star)) 3429 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 3430 3431 // Read the operands. 3432 while (true) { 3433 if (parseOperand(Operands, Name)) 3434 return true; 3435 if (HandleAVX512Operand(Operands)) 3436 return true; 3437 3438 // check for comma and eat it 3439 if (getLexer().is(AsmToken::Comma)) 3440 Parser.Lex(); 3441 else 3442 break; 3443 } 3444 3445 // In MS inline asm curly braces mark the beginning/end of a block, 3446 // therefore they should be interepreted as end of statement 3447 CurlyAsEndOfStatement = 3448 isParsingIntelSyntax() && isParsingMSInlineAsm() && 3449 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); 3450 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement) 3451 return TokError("unexpected token in argument list"); 3452 } 3453 3454 // Push the immediate if we extracted one from the mnemonic. 3455 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) { 3456 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 3457 getParser().getContext()); 3458 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 3459 } 3460 3461 // Consume the EndOfStatement or the prefix separator Slash 3462 if (getLexer().is(AsmToken::EndOfStatement) || 3463 (IsPrefix && getLexer().is(AsmToken::Slash))) 3464 Parser.Lex(); 3465 else if (CurlyAsEndOfStatement) 3466 // Add an actual EndOfStatement before the curly brace 3467 Info.AsmRewrites->emplace_back(AOK_EndOfStatement, 3468 getLexer().getTok().getLoc(), 0); 3469 3470 // This is for gas compatibility and cannot be done in td. 3471 // Adding "p" for some floating point with no argument. 3472 // For example: fsub --> fsubp 3473 bool IsFp = 3474 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr"; 3475 if (IsFp && Operands.size() == 1) { 3476 const char *Repl = StringSwitch<const char *>(Name) 3477 .Case("fsub", "fsubp") 3478 .Case("fdiv", "fdivp") 3479 .Case("fsubr", "fsubrp") 3480 .Case("fdivr", "fdivrp"); 3481 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl); 3482 } 3483 3484 if ((Name == "mov" || Name == "movw" || Name == "movl") && 3485 (Operands.size() == 3)) { 3486 X86Operand &Op1 = (X86Operand &)*Operands[1]; 3487 X86Operand &Op2 = (X86Operand &)*Operands[2]; 3488 SMLoc Loc = Op1.getEndLoc(); 3489 // Moving a 32 or 16 bit value into a segment register has the same 3490 // behavior. Modify such instructions to always take shorter form. 3491 if (Op1.isReg() && Op2.isReg() && 3492 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains( 3493 Op2.getReg()) && 3494 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) || 3495 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) { 3496 // Change instruction name to match new instruction. 3497 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) { 3498 Name = is16BitMode() ? "movw" : "movl"; 3499 Operands[0] = X86Operand::CreateToken(Name, NameLoc); 3500 } 3501 // Select the correct equivalent 16-/32-bit source register. 3502 MCRegister Reg = 3503 getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32); 3504 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc); 3505 } 3506 } 3507 3508 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" -> 3509 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 3510 // documented form in various unofficial manuals, so a lot of code uses it. 3511 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" || 3512 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") && 3513 Operands.size() == 3) { 3514 X86Operand &Op = (X86Operand &)*Operands.back(); 3515 if (Op.isDXReg()) 3516 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 3517 Op.getEndLoc()); 3518 } 3519 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al". 3520 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" || 3521 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") && 3522 Operands.size() == 3) { 3523 X86Operand &Op = (X86Operand &)*Operands[1]; 3524 if (Op.isDXReg()) 3525 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 3526 Op.getEndLoc()); 3527 } 3528 3529 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands; 3530 bool HadVerifyError = false; 3531 3532 // Append default arguments to "ins[bwld]" 3533 if (Name.startswith("ins") && 3534 (Operands.size() == 1 || Operands.size() == 3) && 3535 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" || 3536 Name == "ins")) { 3537 3538 AddDefaultSrcDestOperands(TmpOperands, 3539 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), 3540 DefaultMemDIOperand(NameLoc)); 3541 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3542 } 3543 3544 // Append default arguments to "outs[bwld]" 3545 if (Name.startswith("outs") && 3546 (Operands.size() == 1 || Operands.size() == 3) && 3547 (Name == "outsb" || Name == "outsw" || Name == "outsl" || 3548 Name == "outsd" || Name == "outs")) { 3549 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 3550 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 3551 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3552 } 3553 3554 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate 3555 // values of $SIREG according to the mode. It would be nice if this 3556 // could be achieved with InstAlias in the tables. 3557 if (Name.startswith("lods") && 3558 (Operands.size() == 1 || Operands.size() == 2) && 3559 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 3560 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) { 3561 TmpOperands.push_back(DefaultMemSIOperand(NameLoc)); 3562 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3563 } 3564 3565 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate 3566 // values of $DIREG according to the mode. It would be nice if this 3567 // could be achieved with InstAlias in the tables. 3568 if (Name.startswith("stos") && 3569 (Operands.size() == 1 || Operands.size() == 2) && 3570 (Name == "stos" || Name == "stosb" || Name == "stosw" || 3571 Name == "stosl" || Name == "stosd" || Name == "stosq")) { 3572 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 3573 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3574 } 3575 3576 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate 3577 // values of $DIREG according to the mode. It would be nice if this 3578 // could be achieved with InstAlias in the tables. 3579 if (Name.startswith("scas") && 3580 (Operands.size() == 1 || Operands.size() == 2) && 3581 (Name == "scas" || Name == "scasb" || Name == "scasw" || 3582 Name == "scasl" || Name == "scasd" || Name == "scasq")) { 3583 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 3584 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3585 } 3586 3587 // Add default SI and DI operands to "cmps[bwlq]". 3588 if (Name.startswith("cmps") && 3589 (Operands.size() == 1 || Operands.size() == 3) && 3590 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || 3591 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { 3592 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc), 3593 DefaultMemSIOperand(NameLoc)); 3594 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3595 } 3596 3597 // Add default SI and DI operands to "movs[bwlq]". 3598 if (((Name.startswith("movs") && 3599 (Name == "movs" || Name == "movsb" || Name == "movsw" || 3600 Name == "movsl" || Name == "movsd" || Name == "movsq")) || 3601 (Name.startswith("smov") && 3602 (Name == "smov" || Name == "smovb" || Name == "smovw" || 3603 Name == "smovl" || Name == "smovd" || Name == "smovq"))) && 3604 (Operands.size() == 1 || Operands.size() == 3)) { 3605 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax()) 3606 Operands.back() = X86Operand::CreateToken("movsl", NameLoc); 3607 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 3608 DefaultMemDIOperand(NameLoc)); 3609 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3610 } 3611 3612 // Check if we encountered an error for one the string insturctions 3613 if (HadVerifyError) { 3614 return HadVerifyError; 3615 } 3616 3617 // Transforms "xlat mem8" into "xlatb" 3618 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) { 3619 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 3620 if (Op1.isMem8()) { 3621 Warning(Op1.getStartLoc(), "memory operand is only for determining the " 3622 "size, (R|E)BX will be used for the location"); 3623 Operands.pop_back(); 3624 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb"); 3625 } 3626 } 3627 3628 if (Flags) 3629 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc)); 3630 return false; 3631 } 3632 3633 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { 3634 if (ForcedVEXEncoding != VEXEncoding_VEX3 && 3635 X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode()))) 3636 return true; 3637 3638 if (X86::optimizeShiftRotateWithImmediateOne(Inst)) 3639 return true; 3640 3641 switch (Inst.getOpcode()) { 3642 default: return false; 3643 case X86::JMP_1: 3644 // {disp32} forces a larger displacement as if the instruction was relaxed. 3645 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}. 3646 // This matches GNU assembler. 3647 if (ForcedDispEncoding == DispEncoding_Disp32) { 3648 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4); 3649 return true; 3650 } 3651 3652 return false; 3653 case X86::JCC_1: 3654 // {disp32} forces a larger displacement as if the instruction was relaxed. 3655 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}. 3656 // This matches GNU assembler. 3657 if (ForcedDispEncoding == DispEncoding_Disp32) { 3658 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4); 3659 return true; 3660 } 3661 3662 return false; 3663 case X86::INT: { 3664 // Transforms "int $3" into "int3" as a size optimization. 3665 // We can't write this as an InstAlias. 3666 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3) 3667 return false; 3668 Inst.clear(); 3669 Inst.setOpcode(X86::INT3); 3670 return true; 3671 } 3672 } 3673 } 3674 3675 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { 3676 using namespace X86; 3677 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 3678 unsigned Opcode = Inst.getOpcode(); 3679 uint64_t TSFlags = MII.get(Opcode).TSFlags; 3680 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) || 3681 isVFMADDCSH(Opcode)) { 3682 unsigned Dest = Inst.getOperand(0).getReg(); 3683 for (unsigned i = 2; i < Inst.getNumOperands(); i++) 3684 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) 3685 return Warning(Ops[0]->getStartLoc(), "Destination register should be " 3686 "distinct from source registers"); 3687 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) || 3688 isVFMULCSH(Opcode)) { 3689 unsigned Dest = Inst.getOperand(0).getReg(); 3690 // The mask variants have different operand list. Scan from the third 3691 // operand to avoid emitting incorrect warning. 3692 // VFMULCPHZrr Dest, Src1, Src2 3693 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2 3694 // VFMULCPHZrrkz Dest, Mask, Src1, Src2 3695 for (unsigned i = TSFlags & X86II::EVEX_K ? 2 : 1; 3696 i < Inst.getNumOperands(); i++) 3697 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) 3698 return Warning(Ops[0]->getStartLoc(), "Destination register should be " 3699 "distinct from source registers"); 3700 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) || 3701 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) || 3702 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) { 3703 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() - 3704 X86::AddrNumOperands - 1).getReg(); 3705 unsigned Src2Enc = MRI->getEncodingValue(Src2); 3706 if (Src2Enc % 4 != 0) { 3707 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2); 3708 unsigned GroupStart = (Src2Enc / 4) * 4; 3709 unsigned GroupEnd = GroupStart + 3; 3710 return Warning(Ops[0]->getStartLoc(), 3711 "source register '" + RegName + "' implicitly denotes '" + 3712 RegName.take_front(3) + Twine(GroupStart) + "' to '" + 3713 RegName.take_front(3) + Twine(GroupEnd) + 3714 "' source group"); 3715 } 3716 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) || 3717 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) || 3718 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) || 3719 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) { 3720 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX; 3721 if (HasEVEX) { 3722 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 3723 unsigned Index = MRI->getEncodingValue( 3724 Inst.getOperand(4 + X86::AddrIndexReg).getReg()); 3725 if (Dest == Index) 3726 return Warning(Ops[0]->getStartLoc(), "index and destination registers " 3727 "should be distinct"); 3728 } else { 3729 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 3730 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg()); 3731 unsigned Index = MRI->getEncodingValue( 3732 Inst.getOperand(3 + X86::AddrIndexReg).getReg()); 3733 if (Dest == Mask || Dest == Index || Mask == Index) 3734 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination " 3735 "registers should be distinct"); 3736 } 3737 } 3738 3739 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to 3740 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX. 3741 if ((TSFlags & X86II::EncodingMask) == 0) { 3742 MCPhysReg HReg = X86::NoRegister; 3743 bool UsesRex = TSFlags & X86II::REX_W; 3744 unsigned NumOps = Inst.getNumOperands(); 3745 for (unsigned i = 0; i != NumOps; ++i) { 3746 const MCOperand &MO = Inst.getOperand(i); 3747 if (!MO.isReg()) 3748 continue; 3749 unsigned Reg = MO.getReg(); 3750 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH) 3751 HReg = Reg; 3752 if (X86II::isX86_64NonExtLowByteReg(Reg) || 3753 X86II::isX86_64ExtendedReg(Reg)) 3754 UsesRex = true; 3755 } 3756 3757 if (UsesRex && HReg != X86::NoRegister) { 3758 StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg); 3759 return Error(Ops[0]->getStartLoc(), 3760 "can't encode '" + RegName + "' in an instruction requiring " 3761 "REX prefix"); 3762 } 3763 } 3764 3765 if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) { 3766 const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg); 3767 if (!MO.isReg() || MO.getReg() != X86::RIP) 3768 return Warning( 3769 Ops[0]->getStartLoc(), 3770 Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'" 3771 : "'prefetchit1'")) + 3772 " only supports RIP-relative address"); 3773 } 3774 return false; 3775 } 3776 3777 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) { 3778 Warning(Loc, "Instruction may be vulnerable to LVI and " 3779 "requires manual mitigation"); 3780 Note(SMLoc(), "See https://software.intel.com/" 3781 "security-software-guidance/insights/" 3782 "deep-dive-load-value-injection#specialinstructions" 3783 " for more information"); 3784 } 3785 3786 /// RET instructions and also instructions that indirect calls/jumps from memory 3787 /// combine a load and a branch within a single instruction. To mitigate these 3788 /// instructions against LVI, they must be decomposed into separate load and 3789 /// branch instructions, with an LFENCE in between. For more details, see: 3790 /// - X86LoadValueInjectionRetHardening.cpp 3791 /// - X86LoadValueInjectionIndirectThunks.cpp 3792 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection 3793 /// 3794 /// Returns `true` if a mitigation was applied or warning was emitted. 3795 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) { 3796 // Information on control-flow instructions that require manual mitigation can 3797 // be found here: 3798 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions 3799 switch (Inst.getOpcode()) { 3800 case X86::RET16: 3801 case X86::RET32: 3802 case X86::RET64: 3803 case X86::RETI16: 3804 case X86::RETI32: 3805 case X86::RETI64: { 3806 MCInst ShlInst, FenceInst; 3807 bool Parse32 = is32BitMode() || Code16GCC; 3808 unsigned Basereg = 3809 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP); 3810 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 3811 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 3812 /*BaseReg=*/Basereg, /*IndexReg=*/0, 3813 /*Scale=*/1, SMLoc{}, SMLoc{}, 0); 3814 ShlInst.setOpcode(X86::SHL64mi); 3815 ShlMemOp->addMemOperands(ShlInst, 5); 3816 ShlInst.addOperand(MCOperand::createImm(0)); 3817 FenceInst.setOpcode(X86::LFENCE); 3818 Out.emitInstruction(ShlInst, getSTI()); 3819 Out.emitInstruction(FenceInst, getSTI()); 3820 return; 3821 } 3822 case X86::JMP16m: 3823 case X86::JMP32m: 3824 case X86::JMP64m: 3825 case X86::CALL16m: 3826 case X86::CALL32m: 3827 case X86::CALL64m: 3828 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3829 return; 3830 } 3831 } 3832 3833 /// To mitigate LVI, every instruction that performs a load can be followed by 3834 /// an LFENCE instruction to squash any potential mis-speculation. There are 3835 /// some instructions that require additional considerations, and may requre 3836 /// manual mitigation. For more details, see: 3837 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection 3838 /// 3839 /// Returns `true` if a mitigation was applied or warning was emitted. 3840 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst, 3841 MCStreamer &Out) { 3842 auto Opcode = Inst.getOpcode(); 3843 auto Flags = Inst.getFlags(); 3844 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) { 3845 // Information on REP string instructions that require manual mitigation can 3846 // be found here: 3847 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions 3848 switch (Opcode) { 3849 case X86::CMPSB: 3850 case X86::CMPSW: 3851 case X86::CMPSL: 3852 case X86::CMPSQ: 3853 case X86::SCASB: 3854 case X86::SCASW: 3855 case X86::SCASL: 3856 case X86::SCASQ: 3857 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3858 return; 3859 } 3860 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) { 3861 // If a REP instruction is found on its own line, it may or may not be 3862 // followed by a vulnerable instruction. Emit a warning just in case. 3863 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3864 return; 3865 } 3866 3867 const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); 3868 3869 // Can't mitigate after terminators or calls. A control flow change may have 3870 // already occurred. 3871 if (MCID.isTerminator() || MCID.isCall()) 3872 return; 3873 3874 // LFENCE has the mayLoad property, don't double fence. 3875 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) { 3876 MCInst FenceInst; 3877 FenceInst.setOpcode(X86::LFENCE); 3878 Out.emitInstruction(FenceInst, getSTI()); 3879 } 3880 } 3881 3882 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands, 3883 MCStreamer &Out) { 3884 if (LVIInlineAsmHardening && 3885 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity)) 3886 applyLVICFIMitigation(Inst, Out); 3887 3888 Out.emitInstruction(Inst, getSTI()); 3889 3890 if (LVIInlineAsmHardening && 3891 getSTI().hasFeature(X86::FeatureLVILoadHardening)) 3892 applyLVILoadHardeningMitigation(Inst, Out); 3893 } 3894 3895 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3896 OperandVector &Operands, 3897 MCStreamer &Out, uint64_t &ErrorInfo, 3898 bool MatchingInlineAsm) { 3899 if (isParsingIntelSyntax()) 3900 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3901 MatchingInlineAsm); 3902 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3903 MatchingInlineAsm); 3904 } 3905 3906 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, 3907 OperandVector &Operands, MCStreamer &Out, 3908 bool MatchingInlineAsm) { 3909 // FIXME: This should be replaced with a real .td file alias mechanism. 3910 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 3911 // call. 3912 const char *Repl = StringSwitch<const char *>(Op.getToken()) 3913 .Case("finit", "fninit") 3914 .Case("fsave", "fnsave") 3915 .Case("fstcw", "fnstcw") 3916 .Case("fstcww", "fnstcw") 3917 .Case("fstenv", "fnstenv") 3918 .Case("fstsw", "fnstsw") 3919 .Case("fstsww", "fnstsw") 3920 .Case("fclex", "fnclex") 3921 .Default(nullptr); 3922 if (Repl) { 3923 MCInst Inst; 3924 Inst.setOpcode(X86::WAIT); 3925 Inst.setLoc(IDLoc); 3926 if (!MatchingInlineAsm) 3927 emitInstruction(Inst, Operands, Out); 3928 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 3929 } 3930 } 3931 3932 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, 3933 const FeatureBitset &MissingFeatures, 3934 bool MatchingInlineAsm) { 3935 assert(MissingFeatures.any() && "Unknown missing feature!"); 3936 SmallString<126> Msg; 3937 raw_svector_ostream OS(Msg); 3938 OS << "instruction requires:"; 3939 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) { 3940 if (MissingFeatures[i]) 3941 OS << ' ' << getSubtargetFeatureName(i); 3942 } 3943 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); 3944 } 3945 3946 static unsigned getPrefixes(OperandVector &Operands) { 3947 unsigned Result = 0; 3948 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back()); 3949 if (Prefix.isPrefix()) { 3950 Result = Prefix.getPrefix(); 3951 Operands.pop_back(); 3952 } 3953 return Result; 3954 } 3955 3956 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3957 unsigned Opc = Inst.getOpcode(); 3958 const MCInstrDesc &MCID = MII.get(Opc); 3959 3960 if (ForcedVEXEncoding == VEXEncoding_EVEX && 3961 (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX) 3962 return Match_Unsupported; 3963 3964 if ((ForcedVEXEncoding == VEXEncoding_VEX || 3965 ForcedVEXEncoding == VEXEncoding_VEX2 || 3966 ForcedVEXEncoding == VEXEncoding_VEX3) && 3967 (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX) 3968 return Match_Unsupported; 3969 3970 // These instructions are only available with {vex}, {vex2} or {vex3} prefix 3971 if (MCID.TSFlags & X86II::ExplicitVEXPrefix && 3972 (ForcedVEXEncoding != VEXEncoding_VEX && 3973 ForcedVEXEncoding != VEXEncoding_VEX2 && 3974 ForcedVEXEncoding != VEXEncoding_VEX3)) 3975 return Match_Unsupported; 3976 3977 return Match_Success; 3978 } 3979 3980 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 3981 OperandVector &Operands, 3982 MCStreamer &Out, 3983 uint64_t &ErrorInfo, 3984 bool MatchingInlineAsm) { 3985 assert(!Operands.empty() && "Unexpect empty operand list!"); 3986 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 3987 SMRange EmptyRange = std::nullopt; 3988 3989 // First, handle aliases that expand to multiple instructions. 3990 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, 3991 Out, MatchingInlineAsm); 3992 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 3993 unsigned Prefixes = getPrefixes(Operands); 3994 3995 MCInst Inst; 3996 3997 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the 3998 // encoder and printer. 3999 if (ForcedVEXEncoding == VEXEncoding_VEX) 4000 Prefixes |= X86::IP_USE_VEX; 4001 else if (ForcedVEXEncoding == VEXEncoding_VEX2) 4002 Prefixes |= X86::IP_USE_VEX2; 4003 else if (ForcedVEXEncoding == VEXEncoding_VEX3) 4004 Prefixes |= X86::IP_USE_VEX3; 4005 else if (ForcedVEXEncoding == VEXEncoding_EVEX) 4006 Prefixes |= X86::IP_USE_EVEX; 4007 4008 // Set encoded flags for {disp8} and {disp32}. 4009 if (ForcedDispEncoding == DispEncoding_Disp8) 4010 Prefixes |= X86::IP_USE_DISP8; 4011 else if (ForcedDispEncoding == DispEncoding_Disp32) 4012 Prefixes |= X86::IP_USE_DISP32; 4013 4014 if (Prefixes) 4015 Inst.setFlags(Prefixes); 4016 4017 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode 4018 // when matching the instruction. 4019 if (ForcedDataPrefix == X86::Is32Bit) 4020 SwitchMode(X86::Is32Bit); 4021 // First, try a direct match. 4022 FeatureBitset MissingFeatures; 4023 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo, 4024 MissingFeatures, MatchingInlineAsm, 4025 isParsingIntelSyntax()); 4026 if (ForcedDataPrefix == X86::Is32Bit) { 4027 SwitchMode(X86::Is16Bit); 4028 ForcedDataPrefix = 0; 4029 } 4030 switch (OriginalError) { 4031 default: llvm_unreachable("Unexpected match result!"); 4032 case Match_Success: 4033 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4034 return true; 4035 // Some instructions need post-processing to, for example, tweak which 4036 // encoding is selected. Loop on it while changes happen so the 4037 // individual transformations can chain off each other. 4038 if (!MatchingInlineAsm) 4039 while (processInstruction(Inst, Operands)) 4040 ; 4041 4042 Inst.setLoc(IDLoc); 4043 if (!MatchingInlineAsm) 4044 emitInstruction(Inst, Operands, Out); 4045 Opcode = Inst.getOpcode(); 4046 return false; 4047 case Match_InvalidImmUnsignedi4: { 4048 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 4049 if (ErrorLoc == SMLoc()) 4050 ErrorLoc = IDLoc; 4051 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 4052 EmptyRange, MatchingInlineAsm); 4053 } 4054 case Match_MissingFeature: 4055 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm); 4056 case Match_InvalidOperand: 4057 case Match_MnemonicFail: 4058 case Match_Unsupported: 4059 break; 4060 } 4061 if (Op.getToken().empty()) { 4062 Error(IDLoc, "instruction must have size higher than 0", EmptyRange, 4063 MatchingInlineAsm); 4064 return true; 4065 } 4066 4067 // FIXME: Ideally, we would only attempt suffix matches for things which are 4068 // valid prefixes, and we could just infer the right unambiguous 4069 // type. However, that requires substantially more matcher support than the 4070 // following hack. 4071 4072 // Change the operand to point to a temporary token. 4073 StringRef Base = Op.getToken(); 4074 SmallString<16> Tmp; 4075 Tmp += Base; 4076 Tmp += ' '; 4077 Op.setTokenValue(Tmp); 4078 4079 // If this instruction starts with an 'f', then it is a floating point stack 4080 // instruction. These come in up to three forms for 32-bit, 64-bit, and 4081 // 80-bit floating point, which use the suffixes s,l,t respectively. 4082 // 4083 // Otherwise, we assume that this may be an integer instruction, which comes 4084 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 4085 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 4086 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 } 4087 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0"; 4088 4089 // Check for the various suffix matches. 4090 uint64_t ErrorInfoIgnore; 4091 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings. 4092 unsigned Match[4]; 4093 4094 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one. 4095 // So we should make sure the suffix matcher only works for memory variant 4096 // that has the same size with the suffix. 4097 // FIXME: This flag is a workaround for legacy instructions that didn't 4098 // declare non suffix variant assembly. 4099 bool HasVectorReg = false; 4100 X86Operand *MemOp = nullptr; 4101 for (const auto &Op : Operands) { 4102 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 4103 if (X86Op->isVectorReg()) 4104 HasVectorReg = true; 4105 else if (X86Op->isMem()) { 4106 MemOp = X86Op; 4107 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax"); 4108 // Have we found an unqualified memory operand, 4109 // break. IA allows only one memory operand. 4110 break; 4111 } 4112 } 4113 4114 for (unsigned I = 0, E = std::size(Match); I != E; ++I) { 4115 Tmp.back() = Suffixes[I]; 4116 if (MemOp && HasVectorReg) 4117 MemOp->Mem.Size = MemSize[I]; 4118 Match[I] = Match_MnemonicFail; 4119 if (MemOp || !HasVectorReg) { 4120 Match[I] = 4121 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures, 4122 MatchingInlineAsm, isParsingIntelSyntax()); 4123 // If this returned as a missing feature failure, remember that. 4124 if (Match[I] == Match_MissingFeature) 4125 ErrorInfoMissingFeatures = MissingFeatures; 4126 } 4127 } 4128 4129 // Restore the old token. 4130 Op.setTokenValue(Base); 4131 4132 // If exactly one matched, then we treat that as a successful match (and the 4133 // instruction will already have been filled in correctly, since the failing 4134 // matches won't have modified it). 4135 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success); 4136 if (NumSuccessfulMatches == 1) { 4137 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4138 return true; 4139 // Some instructions need post-processing to, for example, tweak which 4140 // encoding is selected. Loop on it while changes happen so the 4141 // individual transformations can chain off each other. 4142 if (!MatchingInlineAsm) 4143 while (processInstruction(Inst, Operands)) 4144 ; 4145 4146 Inst.setLoc(IDLoc); 4147 if (!MatchingInlineAsm) 4148 emitInstruction(Inst, Operands, Out); 4149 Opcode = Inst.getOpcode(); 4150 return false; 4151 } 4152 4153 // Otherwise, the match failed, try to produce a decent error message. 4154 4155 // If we had multiple suffix matches, then identify this as an ambiguous 4156 // match. 4157 if (NumSuccessfulMatches > 1) { 4158 char MatchChars[4]; 4159 unsigned NumMatches = 0; 4160 for (unsigned I = 0, E = std::size(Match); I != E; ++I) 4161 if (Match[I] == Match_Success) 4162 MatchChars[NumMatches++] = Suffixes[I]; 4163 4164 SmallString<126> Msg; 4165 raw_svector_ostream OS(Msg); 4166 OS << "ambiguous instructions require an explicit suffix (could be "; 4167 for (unsigned i = 0; i != NumMatches; ++i) { 4168 if (i != 0) 4169 OS << ", "; 4170 if (i + 1 == NumMatches) 4171 OS << "or "; 4172 OS << "'" << Base << MatchChars[i] << "'"; 4173 } 4174 OS << ")"; 4175 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm); 4176 return true; 4177 } 4178 4179 // Okay, we know that none of the variants matched successfully. 4180 4181 // If all of the instructions reported an invalid mnemonic, then the original 4182 // mnemonic was invalid. 4183 if (llvm::count(Match, Match_MnemonicFail) == 4) { 4184 if (OriginalError == Match_MnemonicFail) 4185 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 4186 Op.getLocRange(), MatchingInlineAsm); 4187 4188 if (OriginalError == Match_Unsupported) 4189 return Error(IDLoc, "unsupported instruction", EmptyRange, 4190 MatchingInlineAsm); 4191 4192 assert(OriginalError == Match_InvalidOperand && "Unexpected error"); 4193 // Recover location info for the operand if we know which was the problem. 4194 if (ErrorInfo != ~0ULL) { 4195 if (ErrorInfo >= Operands.size()) 4196 return Error(IDLoc, "too few operands for instruction", EmptyRange, 4197 MatchingInlineAsm); 4198 4199 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; 4200 if (Operand.getStartLoc().isValid()) { 4201 SMRange OperandRange = Operand.getLocRange(); 4202 return Error(Operand.getStartLoc(), "invalid operand for instruction", 4203 OperandRange, MatchingInlineAsm); 4204 } 4205 } 4206 4207 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4208 MatchingInlineAsm); 4209 } 4210 4211 // If one instruction matched as unsupported, report this as unsupported. 4212 if (llvm::count(Match, Match_Unsupported) == 1) { 4213 return Error(IDLoc, "unsupported instruction", EmptyRange, 4214 MatchingInlineAsm); 4215 } 4216 4217 // If one instruction matched with a missing feature, report this as a 4218 // missing feature. 4219 if (llvm::count(Match, Match_MissingFeature) == 1) { 4220 ErrorInfo = Match_MissingFeature; 4221 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 4222 MatchingInlineAsm); 4223 } 4224 4225 // If one instruction matched with an invalid operand, report this as an 4226 // operand failure. 4227 if (llvm::count(Match, Match_InvalidOperand) == 1) { 4228 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4229 MatchingInlineAsm); 4230 } 4231 4232 // If all of these were an outright failure, report it in a useless way. 4233 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 4234 EmptyRange, MatchingInlineAsm); 4235 return true; 4236 } 4237 4238 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 4239 OperandVector &Operands, 4240 MCStreamer &Out, 4241 uint64_t &ErrorInfo, 4242 bool MatchingInlineAsm) { 4243 assert(!Operands.empty() && "Unexpect empty operand list!"); 4244 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 4245 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken(); 4246 SMRange EmptyRange = std::nullopt; 4247 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken(); 4248 unsigned Prefixes = getPrefixes(Operands); 4249 4250 // First, handle aliases that expand to multiple instructions. 4251 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm); 4252 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 4253 4254 MCInst Inst; 4255 4256 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the 4257 // encoder and printer. 4258 if (ForcedVEXEncoding == VEXEncoding_VEX) 4259 Prefixes |= X86::IP_USE_VEX; 4260 else if (ForcedVEXEncoding == VEXEncoding_VEX2) 4261 Prefixes |= X86::IP_USE_VEX2; 4262 else if (ForcedVEXEncoding == VEXEncoding_VEX3) 4263 Prefixes |= X86::IP_USE_VEX3; 4264 else if (ForcedVEXEncoding == VEXEncoding_EVEX) 4265 Prefixes |= X86::IP_USE_EVEX; 4266 4267 // Set encoded flags for {disp8} and {disp32}. 4268 if (ForcedDispEncoding == DispEncoding_Disp8) 4269 Prefixes |= X86::IP_USE_DISP8; 4270 else if (ForcedDispEncoding == DispEncoding_Disp32) 4271 Prefixes |= X86::IP_USE_DISP32; 4272 4273 if (Prefixes) 4274 Inst.setFlags(Prefixes); 4275 4276 // Find one unsized memory operand, if present. 4277 X86Operand *UnsizedMemOp = nullptr; 4278 for (const auto &Op : Operands) { 4279 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 4280 if (X86Op->isMemUnsized()) { 4281 UnsizedMemOp = X86Op; 4282 // Have we found an unqualified memory operand, 4283 // break. IA allows only one memory operand. 4284 break; 4285 } 4286 } 4287 4288 // Allow some instructions to have implicitly pointer-sized operands. This is 4289 // compatible with gas. 4290 if (UnsizedMemOp) { 4291 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; 4292 for (const char *Instr : PtrSizedInstrs) { 4293 if (Mnemonic == Instr) { 4294 UnsizedMemOp->Mem.Size = getPointerWidth(); 4295 break; 4296 } 4297 } 4298 } 4299 4300 SmallVector<unsigned, 8> Match; 4301 FeatureBitset ErrorInfoMissingFeatures; 4302 FeatureBitset MissingFeatures; 4303 4304 // If unsized push has immediate operand we should default the default pointer 4305 // size for the size. 4306 if (Mnemonic == "push" && Operands.size() == 2) { 4307 auto *X86Op = static_cast<X86Operand *>(Operands[1].get()); 4308 if (X86Op->isImm()) { 4309 // If it's not a constant fall through and let remainder take care of it. 4310 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm()); 4311 unsigned Size = getPointerWidth(); 4312 if (CE && 4313 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) { 4314 SmallString<16> Tmp; 4315 Tmp += Base; 4316 Tmp += (is64BitMode()) 4317 ? "q" 4318 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " "; 4319 Op.setTokenValue(Tmp); 4320 // Do match in ATT mode to allow explicit suffix usage. 4321 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo, 4322 MissingFeatures, MatchingInlineAsm, 4323 false /*isParsingIntelSyntax()*/)); 4324 Op.setTokenValue(Base); 4325 } 4326 } 4327 } 4328 4329 // If an unsized memory operand is present, try to match with each memory 4330 // operand size. In Intel assembly, the size is not part of the instruction 4331 // mnemonic. 4332 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { 4333 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512}; 4334 for (unsigned Size : MopSizes) { 4335 UnsizedMemOp->Mem.Size = Size; 4336 uint64_t ErrorInfoIgnore; 4337 unsigned LastOpcode = Inst.getOpcode(); 4338 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore, 4339 MissingFeatures, MatchingInlineAsm, 4340 isParsingIntelSyntax()); 4341 if (Match.empty() || LastOpcode != Inst.getOpcode()) 4342 Match.push_back(M); 4343 4344 // If this returned as a missing feature failure, remember that. 4345 if (Match.back() == Match_MissingFeature) 4346 ErrorInfoMissingFeatures = MissingFeatures; 4347 } 4348 4349 // Restore the size of the unsized memory operand if we modified it. 4350 UnsizedMemOp->Mem.Size = 0; 4351 } 4352 4353 // If we haven't matched anything yet, this is not a basic integer or FPU 4354 // operation. There shouldn't be any ambiguity in our mnemonic table, so try 4355 // matching with the unsized operand. 4356 if (Match.empty()) { 4357 Match.push_back(MatchInstruction( 4358 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 4359 isParsingIntelSyntax())); 4360 // If this returned as a missing feature failure, remember that. 4361 if (Match.back() == Match_MissingFeature) 4362 ErrorInfoMissingFeatures = MissingFeatures; 4363 } 4364 4365 // Restore the size of the unsized memory operand if we modified it. 4366 if (UnsizedMemOp) 4367 UnsizedMemOp->Mem.Size = 0; 4368 4369 // If it's a bad mnemonic, all results will be the same. 4370 if (Match.back() == Match_MnemonicFail) { 4371 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'", 4372 Op.getLocRange(), MatchingInlineAsm); 4373 } 4374 4375 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success); 4376 4377 // If matching was ambiguous and we had size information from the frontend, 4378 // try again with that. This handles cases like "movxz eax, m8/m16". 4379 if (UnsizedMemOp && NumSuccessfulMatches > 1 && 4380 UnsizedMemOp->getMemFrontendSize()) { 4381 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize(); 4382 unsigned M = MatchInstruction( 4383 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 4384 isParsingIntelSyntax()); 4385 if (M == Match_Success) 4386 NumSuccessfulMatches = 1; 4387 4388 // Add a rewrite that encodes the size information we used from the 4389 // frontend. 4390 InstInfo->AsmRewrites->emplace_back( 4391 AOK_SizeDirective, UnsizedMemOp->getStartLoc(), 4392 /*Len=*/0, UnsizedMemOp->getMemFrontendSize()); 4393 } 4394 4395 // If exactly one matched, then we treat that as a successful match (and the 4396 // instruction will already have been filled in correctly, since the failing 4397 // matches won't have modified it). 4398 if (NumSuccessfulMatches == 1) { 4399 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4400 return true; 4401 // Some instructions need post-processing to, for example, tweak which 4402 // encoding is selected. Loop on it while changes happen so the individual 4403 // transformations can chain off each other. 4404 if (!MatchingInlineAsm) 4405 while (processInstruction(Inst, Operands)) 4406 ; 4407 Inst.setLoc(IDLoc); 4408 if (!MatchingInlineAsm) 4409 emitInstruction(Inst, Operands, Out); 4410 Opcode = Inst.getOpcode(); 4411 return false; 4412 } else if (NumSuccessfulMatches > 1) { 4413 assert(UnsizedMemOp && 4414 "multiple matches only possible with unsized memory operands"); 4415 return Error(UnsizedMemOp->getStartLoc(), 4416 "ambiguous operand size for instruction '" + Mnemonic + "\'", 4417 UnsizedMemOp->getLocRange()); 4418 } 4419 4420 // If one instruction matched as unsupported, report this as unsupported. 4421 if (llvm::count(Match, Match_Unsupported) == 1) { 4422 return Error(IDLoc, "unsupported instruction", EmptyRange, 4423 MatchingInlineAsm); 4424 } 4425 4426 // If one instruction matched with a missing feature, report this as a 4427 // missing feature. 4428 if (llvm::count(Match, Match_MissingFeature) == 1) { 4429 ErrorInfo = Match_MissingFeature; 4430 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 4431 MatchingInlineAsm); 4432 } 4433 4434 // If one instruction matched with an invalid operand, report this as an 4435 // operand failure. 4436 if (llvm::count(Match, Match_InvalidOperand) == 1) { 4437 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4438 MatchingInlineAsm); 4439 } 4440 4441 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) { 4442 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 4443 if (ErrorLoc == SMLoc()) 4444 ErrorLoc = IDLoc; 4445 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 4446 EmptyRange, MatchingInlineAsm); 4447 } 4448 4449 // If all of these were an outright failure, report it in a useless way. 4450 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange, 4451 MatchingInlineAsm); 4452 } 4453 4454 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { 4455 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); 4456 } 4457 4458 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 4459 MCAsmParser &Parser = getParser(); 4460 StringRef IDVal = DirectiveID.getIdentifier(); 4461 if (IDVal.startswith(".arch")) 4462 return parseDirectiveArch(); 4463 if (IDVal.startswith(".code")) 4464 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 4465 else if (IDVal.startswith(".att_syntax")) { 4466 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4467 if (Parser.getTok().getString() == "prefix") 4468 Parser.Lex(); 4469 else if (Parser.getTok().getString() == "noprefix") 4470 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not " 4471 "supported: registers must have a " 4472 "'%' prefix in .att_syntax"); 4473 } 4474 getParser().setAssemblerDialect(0); 4475 return false; 4476 } else if (IDVal.startswith(".intel_syntax")) { 4477 getParser().setAssemblerDialect(1); 4478 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4479 if (Parser.getTok().getString() == "noprefix") 4480 Parser.Lex(); 4481 else if (Parser.getTok().getString() == "prefix") 4482 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not " 4483 "supported: registers must not have " 4484 "a '%' prefix in .intel_syntax"); 4485 } 4486 return false; 4487 } else if (IDVal == ".nops") 4488 return parseDirectiveNops(DirectiveID.getLoc()); 4489 else if (IDVal == ".even") 4490 return parseDirectiveEven(DirectiveID.getLoc()); 4491 else if (IDVal == ".cv_fpo_proc") 4492 return parseDirectiveFPOProc(DirectiveID.getLoc()); 4493 else if (IDVal == ".cv_fpo_setframe") 4494 return parseDirectiveFPOSetFrame(DirectiveID.getLoc()); 4495 else if (IDVal == ".cv_fpo_pushreg") 4496 return parseDirectiveFPOPushReg(DirectiveID.getLoc()); 4497 else if (IDVal == ".cv_fpo_stackalloc") 4498 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc()); 4499 else if (IDVal == ".cv_fpo_stackalign") 4500 return parseDirectiveFPOStackAlign(DirectiveID.getLoc()); 4501 else if (IDVal == ".cv_fpo_endprologue") 4502 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); 4503 else if (IDVal == ".cv_fpo_endproc") 4504 return parseDirectiveFPOEndProc(DirectiveID.getLoc()); 4505 else if (IDVal == ".seh_pushreg" || 4506 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg"))) 4507 return parseDirectiveSEHPushReg(DirectiveID.getLoc()); 4508 else if (IDVal == ".seh_setframe" || 4509 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe"))) 4510 return parseDirectiveSEHSetFrame(DirectiveID.getLoc()); 4511 else if (IDVal == ".seh_savereg" || 4512 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg"))) 4513 return parseDirectiveSEHSaveReg(DirectiveID.getLoc()); 4514 else if (IDVal == ".seh_savexmm" || 4515 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128"))) 4516 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc()); 4517 else if (IDVal == ".seh_pushframe" || 4518 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe"))) 4519 return parseDirectiveSEHPushFrame(DirectiveID.getLoc()); 4520 4521 return true; 4522 } 4523 4524 bool X86AsmParser::parseDirectiveArch() { 4525 // Ignore .arch for now. 4526 getParser().parseStringToEndOfStatement(); 4527 return false; 4528 } 4529 4530 /// parseDirectiveNops 4531 /// ::= .nops size[, control] 4532 bool X86AsmParser::parseDirectiveNops(SMLoc L) { 4533 int64_t NumBytes = 0, Control = 0; 4534 SMLoc NumBytesLoc, ControlLoc; 4535 const MCSubtargetInfo& STI = getSTI(); 4536 NumBytesLoc = getTok().getLoc(); 4537 if (getParser().checkForValidSection() || 4538 getParser().parseAbsoluteExpression(NumBytes)) 4539 return true; 4540 4541 if (parseOptionalToken(AsmToken::Comma)) { 4542 ControlLoc = getTok().getLoc(); 4543 if (getParser().parseAbsoluteExpression(Control)) 4544 return true; 4545 } 4546 if (getParser().parseEOL()) 4547 return true; 4548 4549 if (NumBytes <= 0) { 4550 Error(NumBytesLoc, "'.nops' directive with non-positive size"); 4551 return false; 4552 } 4553 4554 if (Control < 0) { 4555 Error(ControlLoc, "'.nops' directive with negative NOP size"); 4556 return false; 4557 } 4558 4559 /// Emit nops 4560 getParser().getStreamer().emitNops(NumBytes, Control, L, STI); 4561 4562 return false; 4563 } 4564 4565 /// parseDirectiveEven 4566 /// ::= .even 4567 bool X86AsmParser::parseDirectiveEven(SMLoc L) { 4568 if (parseEOL()) 4569 return false; 4570 4571 const MCSection *Section = getStreamer().getCurrentSectionOnly(); 4572 if (!Section) { 4573 getStreamer().initSections(false, getSTI()); 4574 Section = getStreamer().getCurrentSectionOnly(); 4575 } 4576 if (Section->useCodeAlign()) 4577 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0); 4578 else 4579 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0); 4580 return false; 4581 } 4582 4583 /// ParseDirectiveCode 4584 /// ::= .code16 | .code32 | .code64 4585 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 4586 MCAsmParser &Parser = getParser(); 4587 Code16GCC = false; 4588 if (IDVal == ".code16") { 4589 Parser.Lex(); 4590 if (!is16BitMode()) { 4591 SwitchMode(X86::Is16Bit); 4592 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); 4593 } 4594 } else if (IDVal == ".code16gcc") { 4595 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode. 4596 Parser.Lex(); 4597 Code16GCC = true; 4598 if (!is16BitMode()) { 4599 SwitchMode(X86::Is16Bit); 4600 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); 4601 } 4602 } else if (IDVal == ".code32") { 4603 Parser.Lex(); 4604 if (!is32BitMode()) { 4605 SwitchMode(X86::Is32Bit); 4606 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32); 4607 } 4608 } else if (IDVal == ".code64") { 4609 Parser.Lex(); 4610 if (!is64BitMode()) { 4611 SwitchMode(X86::Is64Bit); 4612 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64); 4613 } 4614 } else { 4615 Error(L, "unknown directive " + IDVal); 4616 return false; 4617 } 4618 4619 return false; 4620 } 4621 4622 // .cv_fpo_proc foo 4623 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) { 4624 MCAsmParser &Parser = getParser(); 4625 StringRef ProcName; 4626 int64_t ParamsSize; 4627 if (Parser.parseIdentifier(ProcName)) 4628 return Parser.TokError("expected symbol name"); 4629 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count")) 4630 return true; 4631 if (!isUIntN(32, ParamsSize)) 4632 return Parser.TokError("parameters size out of range"); 4633 if (parseEOL()) 4634 return true; 4635 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); 4636 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L); 4637 } 4638 4639 // .cv_fpo_setframe ebp 4640 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) { 4641 MCRegister Reg; 4642 SMLoc DummyLoc; 4643 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL()) 4644 return true; 4645 return getTargetStreamer().emitFPOSetFrame(Reg, L); 4646 } 4647 4648 // .cv_fpo_pushreg ebx 4649 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) { 4650 MCRegister Reg; 4651 SMLoc DummyLoc; 4652 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL()) 4653 return true; 4654 return getTargetStreamer().emitFPOPushReg(Reg, L); 4655 } 4656 4657 // .cv_fpo_stackalloc 20 4658 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) { 4659 MCAsmParser &Parser = getParser(); 4660 int64_t Offset; 4661 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL()) 4662 return true; 4663 return getTargetStreamer().emitFPOStackAlloc(Offset, L); 4664 } 4665 4666 // .cv_fpo_stackalign 8 4667 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) { 4668 MCAsmParser &Parser = getParser(); 4669 int64_t Offset; 4670 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL()) 4671 return true; 4672 return getTargetStreamer().emitFPOStackAlign(Offset, L); 4673 } 4674 4675 // .cv_fpo_endprologue 4676 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) { 4677 MCAsmParser &Parser = getParser(); 4678 if (Parser.parseEOL()) 4679 return true; 4680 return getTargetStreamer().emitFPOEndPrologue(L); 4681 } 4682 4683 // .cv_fpo_endproc 4684 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) { 4685 MCAsmParser &Parser = getParser(); 4686 if (Parser.parseEOL()) 4687 return true; 4688 return getTargetStreamer().emitFPOEndProc(L); 4689 } 4690 4691 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID, 4692 MCRegister &RegNo) { 4693 SMLoc startLoc = getLexer().getLoc(); 4694 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 4695 4696 // Try parsing the argument as a register first. 4697 if (getLexer().getTok().isNot(AsmToken::Integer)) { 4698 SMLoc endLoc; 4699 if (parseRegister(RegNo, startLoc, endLoc)) 4700 return true; 4701 4702 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) { 4703 return Error(startLoc, 4704 "register is not supported for use with this directive"); 4705 } 4706 } else { 4707 // Otherwise, an integer number matching the encoding of the desired 4708 // register may appear. 4709 int64_t EncodedReg; 4710 if (getParser().parseAbsoluteExpression(EncodedReg)) 4711 return true; 4712 4713 // The SEH register number is the same as the encoding register number. Map 4714 // from the encoding back to the LLVM register number. 4715 RegNo = 0; 4716 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) { 4717 if (MRI->getEncodingValue(Reg) == EncodedReg) { 4718 RegNo = Reg; 4719 break; 4720 } 4721 } 4722 if (RegNo == 0) { 4723 return Error(startLoc, 4724 "incorrect register number for use with this directive"); 4725 } 4726 } 4727 4728 return false; 4729 } 4730 4731 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) { 4732 MCRegister Reg; 4733 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4734 return true; 4735 4736 if (getLexer().isNot(AsmToken::EndOfStatement)) 4737 return TokError("expected end of directive"); 4738 4739 getParser().Lex(); 4740 getStreamer().emitWinCFIPushReg(Reg, Loc); 4741 return false; 4742 } 4743 4744 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) { 4745 MCRegister Reg; 4746 int64_t Off; 4747 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4748 return true; 4749 if (getLexer().isNot(AsmToken::Comma)) 4750 return TokError("you must specify a stack pointer offset"); 4751 4752 getParser().Lex(); 4753 if (getParser().parseAbsoluteExpression(Off)) 4754 return true; 4755 4756 if (getLexer().isNot(AsmToken::EndOfStatement)) 4757 return TokError("expected end of directive"); 4758 4759 getParser().Lex(); 4760 getStreamer().emitWinCFISetFrame(Reg, Off, Loc); 4761 return false; 4762 } 4763 4764 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) { 4765 MCRegister Reg; 4766 int64_t Off; 4767 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4768 return true; 4769 if (getLexer().isNot(AsmToken::Comma)) 4770 return TokError("you must specify an offset on the stack"); 4771 4772 getParser().Lex(); 4773 if (getParser().parseAbsoluteExpression(Off)) 4774 return true; 4775 4776 if (getLexer().isNot(AsmToken::EndOfStatement)) 4777 return TokError("expected end of directive"); 4778 4779 getParser().Lex(); 4780 getStreamer().emitWinCFISaveReg(Reg, Off, Loc); 4781 return false; 4782 } 4783 4784 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) { 4785 MCRegister Reg; 4786 int64_t Off; 4787 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg)) 4788 return true; 4789 if (getLexer().isNot(AsmToken::Comma)) 4790 return TokError("you must specify an offset on the stack"); 4791 4792 getParser().Lex(); 4793 if (getParser().parseAbsoluteExpression(Off)) 4794 return true; 4795 4796 if (getLexer().isNot(AsmToken::EndOfStatement)) 4797 return TokError("expected end of directive"); 4798 4799 getParser().Lex(); 4800 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc); 4801 return false; 4802 } 4803 4804 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) { 4805 bool Code = false; 4806 StringRef CodeID; 4807 if (getLexer().is(AsmToken::At)) { 4808 SMLoc startLoc = getLexer().getLoc(); 4809 getParser().Lex(); 4810 if (!getParser().parseIdentifier(CodeID)) { 4811 if (CodeID != "code") 4812 return Error(startLoc, "expected @code"); 4813 Code = true; 4814 } 4815 } 4816 4817 if (getLexer().isNot(AsmToken::EndOfStatement)) 4818 return TokError("expected end of directive"); 4819 4820 getParser().Lex(); 4821 getStreamer().emitWinCFIPushFrame(Code, Loc); 4822 return false; 4823 } 4824 4825 // Force static initialization. 4826 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser() { 4827 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target()); 4828 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target()); 4829 } 4830 4831 #define GET_MATCHER_IMPLEMENTATION 4832 #include "X86GenAsmMatcher.inc" 4833