xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86EncodingOptimization.h"
11 #include "MCTargetDesc/X86IntelInstPrinter.h"
12 #include "MCTargetDesc/X86MCAsmInfo.h"
13 #include "MCTargetDesc/X86MCExpr.h"
14 #include "MCTargetDesc/X86MCTargetDesc.h"
15 #include "MCTargetDesc/X86TargetStreamer.h"
16 #include "TargetInfo/X86TargetInfo.h"
17 #include "X86Operand.h"
18 #include "llvm-c/Visibility.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/ADT/Twine.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCParser/AsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCRegisterInfo.h"
33 #include "llvm/MC/MCSection.h"
34 #include "llvm/MC/MCStreamer.h"
35 #include "llvm/MC/MCSubtargetInfo.h"
36 #include "llvm/MC/MCSymbol.h"
37 #include "llvm/MC/TargetRegistry.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Compiler.h"
40 #include "llvm/Support/SourceMgr.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <algorithm>
43 #include <memory>
44 
45 using namespace llvm;
46 
47 static cl::opt<bool> LVIInlineAsmHardening(
48     "x86-experimental-lvi-inline-asm-hardening",
49     cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
50              " Injection (LVI). This feature is experimental."), cl::Hidden);
51 
checkScale(unsigned Scale,StringRef & ErrMsg)52 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
53   if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
54     ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
55     return true;
56   }
57   return false;
58 }
59 
60 namespace {
61 
62 // Including the generated SSE2AVX compression tables.
63 #define GET_X86_SSE2AVX_TABLE
64 #include "X86GenInstrMapping.inc"
65 
66 static const char OpPrecedence[] = {
67     0,  // IC_OR
68     1,  // IC_XOR
69     2,  // IC_AND
70     4,  // IC_LSHIFT
71     4,  // IC_RSHIFT
72     5,  // IC_PLUS
73     5,  // IC_MINUS
74     6,  // IC_MULTIPLY
75     6,  // IC_DIVIDE
76     6,  // IC_MOD
77     7,  // IC_NOT
78     8,  // IC_NEG
79     9,  // IC_RPAREN
80     10, // IC_LPAREN
81     0,  // IC_IMM
82     0,  // IC_REGISTER
83     3,  // IC_EQ
84     3,  // IC_NE
85     3,  // IC_LT
86     3,  // IC_LE
87     3,  // IC_GT
88     3   // IC_GE
89 };
90 
91 class X86AsmParser : public MCTargetAsmParser {
92   ParseInstructionInfo *InstInfo;
93   bool Code16GCC;
94   unsigned ForcedDataPrefix = 0;
95 
96   enum OpcodePrefix {
97     OpcodePrefix_Default,
98     OpcodePrefix_REX,
99     OpcodePrefix_REX2,
100     OpcodePrefix_VEX,
101     OpcodePrefix_VEX2,
102     OpcodePrefix_VEX3,
103     OpcodePrefix_EVEX,
104   };
105 
106   OpcodePrefix ForcedOpcodePrefix = OpcodePrefix_Default;
107 
108   enum DispEncoding {
109     DispEncoding_Default,
110     DispEncoding_Disp8,
111     DispEncoding_Disp32,
112   };
113 
114   DispEncoding ForcedDispEncoding = DispEncoding_Default;
115 
116   // Does this instruction use apx extended register?
117   bool UseApxExtendedReg = false;
118   // Is this instruction explicitly required not to update flags?
119   bool ForcedNoFlag = false;
120 
121 private:
consumeToken()122   SMLoc consumeToken() {
123     MCAsmParser &Parser = getParser();
124     SMLoc Result = Parser.getTok().getLoc();
125     Parser.Lex();
126     return Result;
127   }
128 
tokenIsStartOfStatement(AsmToken::TokenKind Token)129   bool tokenIsStartOfStatement(AsmToken::TokenKind Token) override {
130     return Token == AsmToken::LCurly;
131   }
132 
getTargetStreamer()133   X86TargetStreamer &getTargetStreamer() {
134     assert(getParser().getStreamer().getTargetStreamer() &&
135            "do not have a target streamer");
136     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
137     return static_cast<X86TargetStreamer &>(TS);
138   }
139 
MatchInstruction(const OperandVector & Operands,MCInst & Inst,uint64_t & ErrorInfo,FeatureBitset & MissingFeatures,bool matchingInlineAsm,unsigned VariantID=0)140   unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
141                             uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
142                             bool matchingInlineAsm, unsigned VariantID = 0) {
143     // In Code16GCC mode, match as 32-bit.
144     if (Code16GCC)
145       SwitchMode(X86::Is32Bit);
146     unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
147                                        MissingFeatures, matchingInlineAsm,
148                                        VariantID);
149     if (Code16GCC)
150       SwitchMode(X86::Is16Bit);
151     return rv;
152   }
153 
154   enum InfixCalculatorTok {
155     IC_OR = 0,
156     IC_XOR,
157     IC_AND,
158     IC_LSHIFT,
159     IC_RSHIFT,
160     IC_PLUS,
161     IC_MINUS,
162     IC_MULTIPLY,
163     IC_DIVIDE,
164     IC_MOD,
165     IC_NOT,
166     IC_NEG,
167     IC_RPAREN,
168     IC_LPAREN,
169     IC_IMM,
170     IC_REGISTER,
171     IC_EQ,
172     IC_NE,
173     IC_LT,
174     IC_LE,
175     IC_GT,
176     IC_GE
177   };
178 
179   enum IntelOperatorKind {
180     IOK_INVALID = 0,
181     IOK_LENGTH,
182     IOK_SIZE,
183     IOK_TYPE,
184   };
185 
186   enum MasmOperatorKind {
187     MOK_INVALID = 0,
188     MOK_LENGTHOF,
189     MOK_SIZEOF,
190     MOK_TYPE,
191   };
192 
193   class InfixCalculator {
194     typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
195     SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
196     SmallVector<ICToken, 4> PostfixStack;
197 
isUnaryOperator(InfixCalculatorTok Op) const198     bool isUnaryOperator(InfixCalculatorTok Op) const {
199       return Op == IC_NEG || Op == IC_NOT;
200     }
201 
202   public:
popOperand()203     int64_t popOperand() {
204       assert (!PostfixStack.empty() && "Poped an empty stack!");
205       ICToken Op = PostfixStack.pop_back_val();
206       if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
207         return -1; // The invalid Scale value will be caught later by checkScale
208       return Op.second;
209     }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)210     void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
211       assert ((Op == IC_IMM || Op == IC_REGISTER) &&
212               "Unexpected operand!");
213       PostfixStack.push_back(std::make_pair(Op, Val));
214     }
215 
popOperator()216     void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)217     void pushOperator(InfixCalculatorTok Op) {
218       // Push the new operator if the stack is empty.
219       if (InfixOperatorStack.empty()) {
220         InfixOperatorStack.push_back(Op);
221         return;
222       }
223 
224       // Push the new operator if it has a higher precedence than the operator
225       // on the top of the stack or the operator on the top of the stack is a
226       // left parentheses.
227       unsigned Idx = InfixOperatorStack.size() - 1;
228       InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
229       if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
230         InfixOperatorStack.push_back(Op);
231         return;
232       }
233 
234       // The operator on the top of the stack has higher precedence than the
235       // new operator.
236       unsigned ParenCount = 0;
237       while (true) {
238         // Nothing to process.
239         if (InfixOperatorStack.empty())
240           break;
241 
242         Idx = InfixOperatorStack.size() - 1;
243         StackOp = InfixOperatorStack[Idx];
244         if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
245           break;
246 
247         // If we have an even parentheses count and we see a left parentheses,
248         // then stop processing.
249         if (!ParenCount && StackOp == IC_LPAREN)
250           break;
251 
252         if (StackOp == IC_RPAREN) {
253           ++ParenCount;
254           InfixOperatorStack.pop_back();
255         } else if (StackOp == IC_LPAREN) {
256           --ParenCount;
257           InfixOperatorStack.pop_back();
258         } else {
259           InfixOperatorStack.pop_back();
260           PostfixStack.push_back(std::make_pair(StackOp, 0));
261         }
262       }
263       // Push the new operator.
264       InfixOperatorStack.push_back(Op);
265     }
266 
execute()267     int64_t execute() {
268       // Push any remaining operators onto the postfix stack.
269       while (!InfixOperatorStack.empty()) {
270         InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
271         if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
272           PostfixStack.push_back(std::make_pair(StackOp, 0));
273       }
274 
275       if (PostfixStack.empty())
276         return 0;
277 
278       SmallVector<ICToken, 16> OperandStack;
279       for (const ICToken &Op : PostfixStack) {
280         if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
281           OperandStack.push_back(Op);
282         } else if (isUnaryOperator(Op.first)) {
283           assert (OperandStack.size() > 0 && "Too few operands.");
284           ICToken Operand = OperandStack.pop_back_val();
285           assert (Operand.first == IC_IMM &&
286                   "Unary operation with a register!");
287           switch (Op.first) {
288           default:
289             report_fatal_error("Unexpected operator!");
290             break;
291           case IC_NEG:
292             OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
293             break;
294           case IC_NOT:
295             OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
296             break;
297           }
298         } else {
299           assert (OperandStack.size() > 1 && "Too few operands.");
300           int64_t Val;
301           ICToken Op2 = OperandStack.pop_back_val();
302           ICToken Op1 = OperandStack.pop_back_val();
303           switch (Op.first) {
304           default:
305             report_fatal_error("Unexpected operator!");
306             break;
307           case IC_PLUS:
308             Val = Op1.second + Op2.second;
309             OperandStack.push_back(std::make_pair(IC_IMM, Val));
310             break;
311           case IC_MINUS:
312             Val = Op1.second - Op2.second;
313             OperandStack.push_back(std::make_pair(IC_IMM, Val));
314             break;
315           case IC_MULTIPLY:
316             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
317                     "Multiply operation with an immediate and a register!");
318             Val = Op1.second * Op2.second;
319             OperandStack.push_back(std::make_pair(IC_IMM, Val));
320             break;
321           case IC_DIVIDE:
322             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
323                     "Divide operation with an immediate and a register!");
324             assert (Op2.second != 0 && "Division by zero!");
325             Val = Op1.second / Op2.second;
326             OperandStack.push_back(std::make_pair(IC_IMM, Val));
327             break;
328           case IC_MOD:
329             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
330                     "Modulo operation with an immediate and a register!");
331             Val = Op1.second % Op2.second;
332             OperandStack.push_back(std::make_pair(IC_IMM, Val));
333             break;
334           case IC_OR:
335             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
336                     "Or operation with an immediate and a register!");
337             Val = Op1.second | Op2.second;
338             OperandStack.push_back(std::make_pair(IC_IMM, Val));
339             break;
340           case IC_XOR:
341             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
342               "Xor operation with an immediate and a register!");
343             Val = Op1.second ^ Op2.second;
344             OperandStack.push_back(std::make_pair(IC_IMM, Val));
345             break;
346           case IC_AND:
347             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
348                     "And operation with an immediate and a register!");
349             Val = Op1.second & Op2.second;
350             OperandStack.push_back(std::make_pair(IC_IMM, Val));
351             break;
352           case IC_LSHIFT:
353             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
354                     "Left shift operation with an immediate and a register!");
355             Val = Op1.second << Op2.second;
356             OperandStack.push_back(std::make_pair(IC_IMM, Val));
357             break;
358           case IC_RSHIFT:
359             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
360                     "Right shift operation with an immediate and a register!");
361             Val = Op1.second >> Op2.second;
362             OperandStack.push_back(std::make_pair(IC_IMM, Val));
363             break;
364           case IC_EQ:
365             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
366                    "Equals operation with an immediate and a register!");
367             Val = (Op1.second == Op2.second) ? -1 : 0;
368             OperandStack.push_back(std::make_pair(IC_IMM, Val));
369             break;
370           case IC_NE:
371             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
372                    "Not-equals operation with an immediate and a register!");
373             Val = (Op1.second != Op2.second) ? -1 : 0;
374             OperandStack.push_back(std::make_pair(IC_IMM, Val));
375             break;
376           case IC_LT:
377             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
378                    "Less-than operation with an immediate and a register!");
379             Val = (Op1.second < Op2.second) ? -1 : 0;
380             OperandStack.push_back(std::make_pair(IC_IMM, Val));
381             break;
382           case IC_LE:
383             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
384                    "Less-than-or-equal operation with an immediate and a "
385                    "register!");
386             Val = (Op1.second <= Op2.second) ? -1 : 0;
387             OperandStack.push_back(std::make_pair(IC_IMM, Val));
388             break;
389           case IC_GT:
390             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
391                    "Greater-than operation with an immediate and a register!");
392             Val = (Op1.second > Op2.second) ? -1 : 0;
393             OperandStack.push_back(std::make_pair(IC_IMM, Val));
394             break;
395           case IC_GE:
396             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
397                    "Greater-than-or-equal operation with an immediate and a "
398                    "register!");
399             Val = (Op1.second >= Op2.second) ? -1 : 0;
400             OperandStack.push_back(std::make_pair(IC_IMM, Val));
401             break;
402           }
403         }
404       }
405       assert (OperandStack.size() == 1 && "Expected a single result.");
406       return OperandStack.pop_back_val().second;
407     }
408   };
409 
410   enum IntelExprState {
411     IES_INIT,
412     IES_OR,
413     IES_XOR,
414     IES_AND,
415     IES_EQ,
416     IES_NE,
417     IES_LT,
418     IES_LE,
419     IES_GT,
420     IES_GE,
421     IES_LSHIFT,
422     IES_RSHIFT,
423     IES_PLUS,
424     IES_MINUS,
425     IES_OFFSET,
426     IES_CAST,
427     IES_NOT,
428     IES_MULTIPLY,
429     IES_DIVIDE,
430     IES_MOD,
431     IES_LBRAC,
432     IES_RBRAC,
433     IES_LPAREN,
434     IES_RPAREN,
435     IES_REGISTER,
436     IES_INTEGER,
437     IES_ERROR
438   };
439 
440   class IntelExprStateMachine {
441     IntelExprState State = IES_INIT, PrevState = IES_ERROR;
442     MCRegister BaseReg, IndexReg, TmpReg;
443     unsigned Scale = 0;
444     int64_t Imm = 0;
445     const MCExpr *Sym = nullptr;
446     StringRef SymName;
447     InfixCalculator IC;
448     InlineAsmIdentifierInfo Info;
449     short BracCount = 0;
450     bool MemExpr = false;
451     bool BracketUsed = false;
452     bool OffsetOperator = false;
453     bool AttachToOperandIdx = false;
454     bool IsPIC = false;
455     SMLoc OffsetOperatorLoc;
456     AsmTypeInfo CurType;
457 
setSymRef(const MCExpr * Val,StringRef ID,StringRef & ErrMsg)458     bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
459       if (Sym) {
460         ErrMsg = "cannot use more than one symbol in memory operand";
461         return true;
462       }
463       Sym = Val;
464       SymName = ID;
465       return false;
466     }
467 
468   public:
469     IntelExprStateMachine() = default;
470 
addImm(int64_t imm)471     void addImm(int64_t imm) { Imm += imm; }
getBracCount() const472     short getBracCount() const { return BracCount; }
isMemExpr() const473     bool isMemExpr() const { return MemExpr; }
isBracketUsed() const474     bool isBracketUsed() const { return BracketUsed; }
isOffsetOperator() const475     bool isOffsetOperator() const { return OffsetOperator; }
getOffsetLoc() const476     SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
getBaseReg() const477     MCRegister getBaseReg() const { return BaseReg; }
getIndexReg() const478     MCRegister getIndexReg() const { return IndexReg; }
getScale() const479     unsigned getScale() const { return Scale; }
getSym() const480     const MCExpr *getSym() const { return Sym; }
getSymName() const481     StringRef getSymName() const { return SymName; }
getType() const482     StringRef getType() const { return CurType.Name; }
getSize() const483     unsigned getSize() const { return CurType.Size; }
getElementSize() const484     unsigned getElementSize() const { return CurType.ElementSize; }
getLength() const485     unsigned getLength() const { return CurType.Length; }
getImm()486     int64_t getImm() { return Imm + IC.execute(); }
isValidEndState() const487     bool isValidEndState() const {
488       return State == IES_RBRAC || State == IES_RPAREN ||
489              State == IES_INTEGER || State == IES_REGISTER ||
490              State == IES_OFFSET;
491     }
492 
493     // Is the intel expression appended after an operand index.
494     // [OperandIdx][Intel Expression]
495     // This is neccessary for checking if it is an independent
496     // intel expression at back end when parse inline asm.
setAppendAfterOperand()497     void setAppendAfterOperand() { AttachToOperandIdx = true; }
498 
isPIC() const499     bool isPIC() const { return IsPIC; }
setPIC()500     void setPIC() { IsPIC = true; }
501 
hadError() const502     bool hadError() const { return State == IES_ERROR; }
getIdentifierInfo() const503     const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
504 
regsUseUpError(StringRef & ErrMsg)505     bool regsUseUpError(StringRef &ErrMsg) {
506       // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
507       // can not intruduce additional register in inline asm in PIC model.
508       if (IsPIC && AttachToOperandIdx)
509         ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
510       else
511         ErrMsg = "BaseReg/IndexReg already set!";
512       return true;
513     }
514 
onOr()515     void onOr() {
516       IntelExprState CurrState = State;
517       switch (State) {
518       default:
519         State = IES_ERROR;
520         break;
521       case IES_INTEGER:
522       case IES_RPAREN:
523       case IES_REGISTER:
524         State = IES_OR;
525         IC.pushOperator(IC_OR);
526         break;
527       }
528       PrevState = CurrState;
529     }
onXor()530     void onXor() {
531       IntelExprState CurrState = State;
532       switch (State) {
533       default:
534         State = IES_ERROR;
535         break;
536       case IES_INTEGER:
537       case IES_RPAREN:
538       case IES_REGISTER:
539         State = IES_XOR;
540         IC.pushOperator(IC_XOR);
541         break;
542       }
543       PrevState = CurrState;
544     }
onAnd()545     void onAnd() {
546       IntelExprState CurrState = State;
547       switch (State) {
548       default:
549         State = IES_ERROR;
550         break;
551       case IES_INTEGER:
552       case IES_RPAREN:
553       case IES_REGISTER:
554         State = IES_AND;
555         IC.pushOperator(IC_AND);
556         break;
557       }
558       PrevState = CurrState;
559     }
onEq()560     void onEq() {
561       IntelExprState CurrState = State;
562       switch (State) {
563       default:
564         State = IES_ERROR;
565         break;
566       case IES_INTEGER:
567       case IES_RPAREN:
568       case IES_REGISTER:
569         State = IES_EQ;
570         IC.pushOperator(IC_EQ);
571         break;
572       }
573       PrevState = CurrState;
574     }
onNE()575     void onNE() {
576       IntelExprState CurrState = State;
577       switch (State) {
578       default:
579         State = IES_ERROR;
580         break;
581       case IES_INTEGER:
582       case IES_RPAREN:
583       case IES_REGISTER:
584         State = IES_NE;
585         IC.pushOperator(IC_NE);
586         break;
587       }
588       PrevState = CurrState;
589     }
onLT()590     void onLT() {
591       IntelExprState CurrState = State;
592       switch (State) {
593       default:
594         State = IES_ERROR;
595         break;
596       case IES_INTEGER:
597       case IES_RPAREN:
598       case IES_REGISTER:
599         State = IES_LT;
600         IC.pushOperator(IC_LT);
601         break;
602       }
603       PrevState = CurrState;
604     }
onLE()605     void onLE() {
606       IntelExprState CurrState = State;
607       switch (State) {
608       default:
609         State = IES_ERROR;
610         break;
611       case IES_INTEGER:
612       case IES_RPAREN:
613       case IES_REGISTER:
614         State = IES_LE;
615         IC.pushOperator(IC_LE);
616         break;
617       }
618       PrevState = CurrState;
619     }
onGT()620     void onGT() {
621       IntelExprState CurrState = State;
622       switch (State) {
623       default:
624         State = IES_ERROR;
625         break;
626       case IES_INTEGER:
627       case IES_RPAREN:
628       case IES_REGISTER:
629         State = IES_GT;
630         IC.pushOperator(IC_GT);
631         break;
632       }
633       PrevState = CurrState;
634     }
onGE()635     void onGE() {
636       IntelExprState CurrState = State;
637       switch (State) {
638       default:
639         State = IES_ERROR;
640         break;
641       case IES_INTEGER:
642       case IES_RPAREN:
643       case IES_REGISTER:
644         State = IES_GE;
645         IC.pushOperator(IC_GE);
646         break;
647       }
648       PrevState = CurrState;
649     }
onLShift()650     void onLShift() {
651       IntelExprState CurrState = State;
652       switch (State) {
653       default:
654         State = IES_ERROR;
655         break;
656       case IES_INTEGER:
657       case IES_RPAREN:
658       case IES_REGISTER:
659         State = IES_LSHIFT;
660         IC.pushOperator(IC_LSHIFT);
661         break;
662       }
663       PrevState = CurrState;
664     }
onRShift()665     void onRShift() {
666       IntelExprState CurrState = State;
667       switch (State) {
668       default:
669         State = IES_ERROR;
670         break;
671       case IES_INTEGER:
672       case IES_RPAREN:
673       case IES_REGISTER:
674         State = IES_RSHIFT;
675         IC.pushOperator(IC_RSHIFT);
676         break;
677       }
678       PrevState = CurrState;
679     }
onPlus(StringRef & ErrMsg)680     bool onPlus(StringRef &ErrMsg) {
681       IntelExprState CurrState = State;
682       switch (State) {
683       default:
684         State = IES_ERROR;
685         break;
686       case IES_INTEGER:
687       case IES_RPAREN:
688       case IES_REGISTER:
689       case IES_OFFSET:
690         State = IES_PLUS;
691         IC.pushOperator(IC_PLUS);
692         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
693           // If we already have a BaseReg, then assume this is the IndexReg with
694           // no explicit scale.
695           if (!BaseReg) {
696             BaseReg = TmpReg;
697           } else {
698             if (IndexReg)
699               return regsUseUpError(ErrMsg);
700             IndexReg = TmpReg;
701             Scale = 0;
702           }
703         }
704         break;
705       }
706       PrevState = CurrState;
707       return false;
708     }
onMinus(StringRef & ErrMsg)709     bool onMinus(StringRef &ErrMsg) {
710       IntelExprState CurrState = State;
711       switch (State) {
712       default:
713         State = IES_ERROR;
714         break;
715       case IES_OR:
716       case IES_XOR:
717       case IES_AND:
718       case IES_EQ:
719       case IES_NE:
720       case IES_LT:
721       case IES_LE:
722       case IES_GT:
723       case IES_GE:
724       case IES_LSHIFT:
725       case IES_RSHIFT:
726       case IES_PLUS:
727       case IES_NOT:
728       case IES_MULTIPLY:
729       case IES_DIVIDE:
730       case IES_MOD:
731       case IES_LPAREN:
732       case IES_RPAREN:
733       case IES_LBRAC:
734       case IES_RBRAC:
735       case IES_INTEGER:
736       case IES_REGISTER:
737       case IES_INIT:
738       case IES_OFFSET:
739         State = IES_MINUS;
740         // push minus operator if it is not a negate operator
741         if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
742             CurrState == IES_INTEGER  || CurrState == IES_RBRAC  ||
743             CurrState == IES_OFFSET)
744           IC.pushOperator(IC_MINUS);
745         else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
746           // We have negate operator for Scale: it's illegal
747           ErrMsg = "Scale can't be negative";
748           return true;
749         } else
750           IC.pushOperator(IC_NEG);
751         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
752           // If we already have a BaseReg, then assume this is the IndexReg with
753           // no explicit scale.
754           if (!BaseReg) {
755             BaseReg = TmpReg;
756           } else {
757             if (IndexReg)
758               return regsUseUpError(ErrMsg);
759             IndexReg = TmpReg;
760             Scale = 0;
761           }
762         }
763         break;
764       }
765       PrevState = CurrState;
766       return false;
767     }
onNot()768     void onNot() {
769       IntelExprState CurrState = State;
770       switch (State) {
771       default:
772         State = IES_ERROR;
773         break;
774       case IES_OR:
775       case IES_XOR:
776       case IES_AND:
777       case IES_EQ:
778       case IES_NE:
779       case IES_LT:
780       case IES_LE:
781       case IES_GT:
782       case IES_GE:
783       case IES_LSHIFT:
784       case IES_RSHIFT:
785       case IES_PLUS:
786       case IES_MINUS:
787       case IES_NOT:
788       case IES_MULTIPLY:
789       case IES_DIVIDE:
790       case IES_MOD:
791       case IES_LPAREN:
792       case IES_LBRAC:
793       case IES_INIT:
794         State = IES_NOT;
795         IC.pushOperator(IC_NOT);
796         break;
797       }
798       PrevState = CurrState;
799     }
onRegister(MCRegister Reg,StringRef & ErrMsg)800     bool onRegister(MCRegister Reg, StringRef &ErrMsg) {
801       IntelExprState CurrState = State;
802       switch (State) {
803       default:
804         State = IES_ERROR;
805         break;
806       case IES_PLUS:
807       case IES_LPAREN:
808       case IES_LBRAC:
809         State = IES_REGISTER;
810         TmpReg = Reg;
811         IC.pushOperand(IC_REGISTER);
812         break;
813       case IES_MULTIPLY:
814         // Index Register - Scale * Register
815         if (PrevState == IES_INTEGER) {
816           if (IndexReg)
817             return regsUseUpError(ErrMsg);
818           State = IES_REGISTER;
819           IndexReg = Reg;
820           // Get the scale and replace the 'Scale * Register' with '0'.
821           Scale = IC.popOperand();
822           if (checkScale(Scale, ErrMsg))
823             return true;
824           IC.pushOperand(IC_IMM);
825           IC.popOperator();
826         } else {
827           State = IES_ERROR;
828         }
829         break;
830       }
831       PrevState = CurrState;
832       return false;
833     }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName,const InlineAsmIdentifierInfo & IDInfo,const AsmTypeInfo & Type,bool ParsingMSInlineAsm,StringRef & ErrMsg)834     bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
835                           const InlineAsmIdentifierInfo &IDInfo,
836                           const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
837                           StringRef &ErrMsg) {
838       // InlineAsm: Treat an enum value as an integer
839       if (ParsingMSInlineAsm)
840         if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
841           return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
842       // Treat a symbolic constant like an integer
843       if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
844         return onInteger(CE->getValue(), ErrMsg);
845       PrevState = State;
846       switch (State) {
847       default:
848         State = IES_ERROR;
849         break;
850       case IES_CAST:
851       case IES_PLUS:
852       case IES_MINUS:
853       case IES_NOT:
854       case IES_INIT:
855       case IES_LBRAC:
856       case IES_LPAREN:
857         if (setSymRef(SymRef, SymRefName, ErrMsg))
858           return true;
859         MemExpr = true;
860         State = IES_INTEGER;
861         IC.pushOperand(IC_IMM);
862         if (ParsingMSInlineAsm)
863           Info = IDInfo;
864         setTypeInfo(Type);
865         break;
866       }
867       return false;
868     }
onInteger(int64_t TmpInt,StringRef & ErrMsg)869     bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
870       IntelExprState CurrState = State;
871       switch (State) {
872       default:
873         State = IES_ERROR;
874         break;
875       case IES_PLUS:
876       case IES_MINUS:
877       case IES_NOT:
878       case IES_OR:
879       case IES_XOR:
880       case IES_AND:
881       case IES_EQ:
882       case IES_NE:
883       case IES_LT:
884       case IES_LE:
885       case IES_GT:
886       case IES_GE:
887       case IES_LSHIFT:
888       case IES_RSHIFT:
889       case IES_DIVIDE:
890       case IES_MOD:
891       case IES_MULTIPLY:
892       case IES_LPAREN:
893       case IES_INIT:
894       case IES_LBRAC:
895         State = IES_INTEGER;
896         if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
897           // Index Register - Register * Scale
898           if (IndexReg)
899             return regsUseUpError(ErrMsg);
900           IndexReg = TmpReg;
901           Scale = TmpInt;
902           if (checkScale(Scale, ErrMsg))
903             return true;
904           // Get the scale and replace the 'Register * Scale' with '0'.
905           IC.popOperator();
906         } else {
907           IC.pushOperand(IC_IMM, TmpInt);
908         }
909         break;
910       }
911       PrevState = CurrState;
912       return false;
913     }
onStar()914     void onStar() {
915       PrevState = State;
916       switch (State) {
917       default:
918         State = IES_ERROR;
919         break;
920       case IES_INTEGER:
921       case IES_REGISTER:
922       case IES_RPAREN:
923         State = IES_MULTIPLY;
924         IC.pushOperator(IC_MULTIPLY);
925         break;
926       }
927     }
onDivide()928     void onDivide() {
929       PrevState = State;
930       switch (State) {
931       default:
932         State = IES_ERROR;
933         break;
934       case IES_INTEGER:
935       case IES_RPAREN:
936         State = IES_DIVIDE;
937         IC.pushOperator(IC_DIVIDE);
938         break;
939       }
940     }
onMod()941     void onMod() {
942       PrevState = State;
943       switch (State) {
944       default:
945         State = IES_ERROR;
946         break;
947       case IES_INTEGER:
948       case IES_RPAREN:
949         State = IES_MOD;
950         IC.pushOperator(IC_MOD);
951         break;
952       }
953     }
onLBrac()954     bool onLBrac() {
955       if (BracCount)
956         return true;
957       PrevState = State;
958       switch (State) {
959       default:
960         State = IES_ERROR;
961         break;
962       case IES_RBRAC:
963       case IES_INTEGER:
964       case IES_RPAREN:
965         State = IES_PLUS;
966         IC.pushOperator(IC_PLUS);
967         CurType.Length = 1;
968         CurType.Size = CurType.ElementSize;
969         break;
970       case IES_INIT:
971       case IES_CAST:
972         assert(!BracCount && "BracCount should be zero on parsing's start");
973         State = IES_LBRAC;
974         break;
975       }
976       MemExpr = true;
977       BracketUsed = true;
978       BracCount++;
979       return false;
980     }
onRBrac(StringRef & ErrMsg)981     bool onRBrac(StringRef &ErrMsg) {
982       IntelExprState CurrState = State;
983       switch (State) {
984       default:
985         State = IES_ERROR;
986         break;
987       case IES_INTEGER:
988       case IES_OFFSET:
989       case IES_REGISTER:
990       case IES_RPAREN:
991         if (BracCount-- != 1) {
992           ErrMsg = "unexpected bracket encountered";
993           return true;
994         }
995         State = IES_RBRAC;
996         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
997           // If we already have a BaseReg, then assume this is the IndexReg with
998           // no explicit scale.
999           if (!BaseReg) {
1000             BaseReg = TmpReg;
1001           } else {
1002             if (IndexReg)
1003               return regsUseUpError(ErrMsg);
1004             IndexReg = TmpReg;
1005             Scale = 0;
1006           }
1007         }
1008         break;
1009       }
1010       PrevState = CurrState;
1011       return false;
1012     }
onLParen()1013     void onLParen() {
1014       IntelExprState CurrState = State;
1015       switch (State) {
1016       default:
1017         State = IES_ERROR;
1018         break;
1019       case IES_PLUS:
1020       case IES_MINUS:
1021       case IES_NOT:
1022       case IES_OR:
1023       case IES_XOR:
1024       case IES_AND:
1025       case IES_EQ:
1026       case IES_NE:
1027       case IES_LT:
1028       case IES_LE:
1029       case IES_GT:
1030       case IES_GE:
1031       case IES_LSHIFT:
1032       case IES_RSHIFT:
1033       case IES_MULTIPLY:
1034       case IES_DIVIDE:
1035       case IES_MOD:
1036       case IES_LPAREN:
1037       case IES_INIT:
1038       case IES_LBRAC:
1039         State = IES_LPAREN;
1040         IC.pushOperator(IC_LPAREN);
1041         break;
1042       }
1043       PrevState = CurrState;
1044     }
onRParen(StringRef & ErrMsg)1045     bool onRParen(StringRef &ErrMsg) {
1046       IntelExprState CurrState = State;
1047       switch (State) {
1048       default:
1049         State = IES_ERROR;
1050         break;
1051       case IES_INTEGER:
1052       case IES_OFFSET:
1053       case IES_REGISTER:
1054       case IES_RBRAC:
1055       case IES_RPAREN:
1056         State = IES_RPAREN;
1057         // In the case of a multiply, onRegister has already set IndexReg
1058         // directly, with appropriate scale.
1059         // Otherwise if we just saw a register it has only been stored in
1060         // TmpReg, so we need to store it into the state machine.
1061         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
1062           // If we already have a BaseReg, then assume this is the IndexReg with
1063           // no explicit scale.
1064           if (!BaseReg) {
1065             BaseReg = TmpReg;
1066           } else {
1067             if (IndexReg)
1068               return regsUseUpError(ErrMsg);
1069             IndexReg = TmpReg;
1070             Scale = 0;
1071           }
1072         }
1073         IC.pushOperator(IC_RPAREN);
1074         break;
1075       }
1076       PrevState = CurrState;
1077       return false;
1078     }
onOffset(const MCExpr * Val,SMLoc OffsetLoc,StringRef ID,const InlineAsmIdentifierInfo & IDInfo,bool ParsingMSInlineAsm,StringRef & ErrMsg)1079     bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1080                   const InlineAsmIdentifierInfo &IDInfo,
1081                   bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1082       PrevState = State;
1083       switch (State) {
1084       default:
1085         ErrMsg = "unexpected offset operator expression";
1086         return true;
1087       case IES_PLUS:
1088       case IES_INIT:
1089       case IES_LBRAC:
1090         if (setSymRef(Val, ID, ErrMsg))
1091           return true;
1092         OffsetOperator = true;
1093         OffsetOperatorLoc = OffsetLoc;
1094         State = IES_OFFSET;
1095         // As we cannot yet resolve the actual value (offset), we retain
1096         // the requested semantics by pushing a '0' to the operands stack
1097         IC.pushOperand(IC_IMM);
1098         if (ParsingMSInlineAsm) {
1099           Info = IDInfo;
1100         }
1101         break;
1102       }
1103       return false;
1104     }
onCast(AsmTypeInfo Info)1105     void onCast(AsmTypeInfo Info) {
1106       PrevState = State;
1107       switch (State) {
1108       default:
1109         State = IES_ERROR;
1110         break;
1111       case IES_LPAREN:
1112         setTypeInfo(Info);
1113         State = IES_CAST;
1114         break;
1115       }
1116     }
setTypeInfo(AsmTypeInfo Type)1117     void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1118   };
1119 
Error(SMLoc L,const Twine & Msg,SMRange Range=std::nullopt,bool MatchingInlineAsm=false)1120   bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt,
1121              bool MatchingInlineAsm = false) {
1122     MCAsmParser &Parser = getParser();
1123     if (MatchingInlineAsm) {
1124       return false;
1125     }
1126     return Parser.Error(L, Msg, Range);
1127   }
1128 
1129   bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc,
1130                            SMLoc EndLoc);
1131   bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1132                      bool RestoreOnFailure);
1133 
1134   std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1135   std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1136   bool IsSIReg(MCRegister Reg);
1137   MCRegister GetSIDIForRegClass(unsigned RegClassID, bool IsSIReg);
1138   void
1139   AddDefaultSrcDestOperands(OperandVector &Operands,
1140                             std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1141                             std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1142   bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1143                                OperandVector &FinalOperands);
1144   bool parseOperand(OperandVector &Operands, StringRef Name);
1145   bool parseATTOperand(OperandVector &Operands);
1146   bool parseIntelOperand(OperandVector &Operands, StringRef Name);
1147   bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1148                                 InlineAsmIdentifierInfo &Info, SMLoc &End);
1149   bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1150   unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1151   unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1152   unsigned IdentifyMasmOperator(StringRef Name);
1153   bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1154   bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1155   bool parseCFlagsOp(OperandVector &Operands);
1156   bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1157                                bool &ParseError, SMLoc &End);
1158   bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1159                               bool &ParseError, SMLoc &End);
1160   void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1161                               SMLoc End);
1162   bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1163   bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1164                                      InlineAsmIdentifierInfo &Info,
1165                                      bool IsUnevaluatedOperand, SMLoc &End,
1166                                      bool IsParsingOffsetOperator = false);
1167   void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1168                           IntelExprStateMachine &SM);
1169 
1170   bool ParseMemOperand(MCRegister SegReg, const MCExpr *Disp, SMLoc StartLoc,
1171                        SMLoc EndLoc, OperandVector &Operands);
1172 
1173   X86::CondCode ParseConditionCode(StringRef CCode);
1174 
1175   bool ParseIntelMemoryOperandSize(unsigned &Size);
1176   bool CreateMemForMSInlineAsm(MCRegister SegReg, const MCExpr *Disp,
1177                                MCRegister BaseReg, MCRegister IndexReg,
1178                                unsigned Scale, bool NonAbsMem, SMLoc Start,
1179                                SMLoc End, unsigned Size, StringRef Identifier,
1180                                const InlineAsmIdentifierInfo &Info,
1181                                OperandVector &Operands);
1182 
1183   bool parseDirectiveArch();
1184   bool parseDirectiveNops(SMLoc L);
1185   bool parseDirectiveEven(SMLoc L);
1186   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1187 
1188   /// CodeView FPO data directives.
1189   bool parseDirectiveFPOProc(SMLoc L);
1190   bool parseDirectiveFPOSetFrame(SMLoc L);
1191   bool parseDirectiveFPOPushReg(SMLoc L);
1192   bool parseDirectiveFPOStackAlloc(SMLoc L);
1193   bool parseDirectiveFPOStackAlign(SMLoc L);
1194   bool parseDirectiveFPOEndPrologue(SMLoc L);
1195   bool parseDirectiveFPOEndProc(SMLoc L);
1196 
1197   /// SEH directives.
1198   bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo);
1199   bool parseDirectiveSEHPushReg(SMLoc);
1200   bool parseDirectiveSEHSetFrame(SMLoc);
1201   bool parseDirectiveSEHSaveReg(SMLoc);
1202   bool parseDirectiveSEHSaveXMM(SMLoc);
1203   bool parseDirectiveSEHPushFrame(SMLoc);
1204 
1205   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1206 
1207   bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1208   bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1209 
1210   // Load Value Injection (LVI) Mitigations for machine code
1211   void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1212   void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1213   void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1214 
1215   /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1216   /// instrumentation around Inst.
1217   void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
1218 
1219   bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1220                                OperandVector &Operands, MCStreamer &Out,
1221                                uint64_t &ErrorInfo,
1222                                bool MatchingInlineAsm) override;
1223 
1224   void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1225                          MCStreamer &Out, bool MatchingInlineAsm);
1226 
1227   bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1228                            bool MatchingInlineAsm);
1229 
1230   bool matchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1231                                   OperandVector &Operands, MCStreamer &Out,
1232                                   uint64_t &ErrorInfo, bool MatchingInlineAsm);
1233 
1234   bool matchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1235                                     OperandVector &Operands, MCStreamer &Out,
1236                                     uint64_t &ErrorInfo,
1237                                     bool MatchingInlineAsm);
1238 
1239   bool omitRegisterFromClobberLists(MCRegister Reg) override;
1240 
1241   /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1242   /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1243   /// return false if no parsing errors occurred, true otherwise.
1244   bool HandleAVX512Operand(OperandVector &Operands);
1245 
1246   bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
1247 
is64BitMode() const1248   bool is64BitMode() const {
1249     // FIXME: Can tablegen auto-generate this?
1250     return getSTI().hasFeature(X86::Is64Bit);
1251   }
is32BitMode() const1252   bool is32BitMode() const {
1253     // FIXME: Can tablegen auto-generate this?
1254     return getSTI().hasFeature(X86::Is32Bit);
1255   }
is16BitMode() const1256   bool is16BitMode() const {
1257     // FIXME: Can tablegen auto-generate this?
1258     return getSTI().hasFeature(X86::Is16Bit);
1259   }
SwitchMode(unsigned mode)1260   void SwitchMode(unsigned mode) {
1261     MCSubtargetInfo &STI = copySTI();
1262     FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
1263     FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1264     FeatureBitset FB = ComputeAvailableFeatures(
1265       STI.ToggleFeature(OldMode.flip(mode)));
1266     setAvailableFeatures(FB);
1267 
1268     assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1269   }
1270 
getPointerWidth()1271   unsigned getPointerWidth() {
1272     if (is16BitMode()) return 16;
1273     if (is32BitMode()) return 32;
1274     if (is64BitMode()) return 64;
1275     llvm_unreachable("invalid mode");
1276   }
1277 
isParsingIntelSyntax()1278   bool isParsingIntelSyntax() {
1279     return getParser().getAssemblerDialect();
1280   }
1281 
1282   /// @name Auto-generated Matcher Functions
1283   /// {
1284 
1285 #define GET_ASSEMBLER_HEADER
1286 #include "X86GenAsmMatcher.inc"
1287 
1288   /// }
1289 
1290 public:
1291   enum X86MatchResultTy {
1292     Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1293 #define GET_OPERAND_DIAGNOSTIC_TYPES
1294 #include "X86GenAsmMatcher.inc"
1295   };
1296 
X86AsmParser(const MCSubtargetInfo & sti,MCAsmParser & Parser,const MCInstrInfo & mii,const MCTargetOptions & Options)1297   X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1298                const MCInstrInfo &mii, const MCTargetOptions &Options)
1299       : MCTargetAsmParser(Options, sti, mii),  InstInfo(nullptr),
1300         Code16GCC(false) {
1301 
1302     Parser.addAliasForDirective(".word", ".2byte");
1303 
1304     // Initialize the set of available features.
1305     setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1306   }
1307 
1308   bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1309   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1310                                SMLoc &EndLoc) override;
1311 
1312   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1313 
1314   bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1315                         SMLoc NameLoc, OperandVector &Operands) override;
1316 
1317   bool ParseDirective(AsmToken DirectiveID) override;
1318 };
1319 } // end anonymous namespace
1320 
1321 #define GET_REGISTER_MATCHER
1322 #define GET_SUBTARGET_FEATURE_NAME
1323 #include "X86GenAsmMatcher.inc"
1324 
CheckBaseRegAndIndexRegAndScale(MCRegister BaseReg,MCRegister IndexReg,unsigned Scale,bool Is64BitMode,StringRef & ErrMsg)1325 static bool CheckBaseRegAndIndexRegAndScale(MCRegister BaseReg,
1326                                             MCRegister IndexReg, unsigned Scale,
1327                                             bool Is64BitMode,
1328                                             StringRef &ErrMsg) {
1329   // If we have both a base register and an index register make sure they are
1330   // both 64-bit or 32-bit registers.
1331   // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1332 
1333   if (BaseReg &&
1334       !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1335         X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1336         X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1337         X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1338     ErrMsg = "invalid base+index expression";
1339     return true;
1340   }
1341 
1342   if (IndexReg &&
1343       !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1344         X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1345         X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1346         X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1347         X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1348         X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1349         X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1350     ErrMsg = "invalid base+index expression";
1351     return true;
1352   }
1353 
1354   if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg) ||
1355       IndexReg == X86::EIP || IndexReg == X86::RIP || IndexReg == X86::ESP ||
1356       IndexReg == X86::RSP) {
1357     ErrMsg = "invalid base+index expression";
1358     return true;
1359   }
1360 
1361   // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1362   // and then only in non-64-bit modes.
1363   if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1364       (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1365                        BaseReg != X86::SI && BaseReg != X86::DI))) {
1366     ErrMsg = "invalid 16-bit base register";
1367     return true;
1368   }
1369 
1370   if (!BaseReg &&
1371       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1372     ErrMsg = "16-bit memory operand may not include only index register";
1373     return true;
1374   }
1375 
1376   if (BaseReg && IndexReg) {
1377     if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1378         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1379          X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1380          IndexReg == X86::EIZ)) {
1381       ErrMsg = "base register is 64-bit, but index register is not";
1382       return true;
1383     }
1384     if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1385         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1386          X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1387          IndexReg == X86::RIZ)) {
1388       ErrMsg = "base register is 32-bit, but index register is not";
1389       return true;
1390     }
1391     if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1392       if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1393           X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1394         ErrMsg = "base register is 16-bit, but index register is not";
1395         return true;
1396       }
1397       if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1398           (IndexReg != X86::SI && IndexReg != X86::DI)) {
1399         ErrMsg = "invalid 16-bit base/index register combination";
1400         return true;
1401       }
1402     }
1403   }
1404 
1405   // RIP/EIP-relative addressing is only supported in 64-bit mode.
1406   if (!Is64BitMode && (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1407     ErrMsg = "IP-relative addressing requires 64-bit mode";
1408     return true;
1409   }
1410 
1411   return checkScale(Scale, ErrMsg);
1412 }
1413 
MatchRegisterByName(MCRegister & RegNo,StringRef RegName,SMLoc StartLoc,SMLoc EndLoc)1414 bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName,
1415                                        SMLoc StartLoc, SMLoc EndLoc) {
1416   // If we encounter a %, ignore it. This code handles registers with and
1417   // without the prefix, unprefixed registers can occur in cfi directives.
1418   RegName.consume_front("%");
1419 
1420   RegNo = MatchRegisterName(RegName);
1421 
1422   // If the match failed, try the register name as lowercase.
1423   if (!RegNo)
1424     RegNo = MatchRegisterName(RegName.lower());
1425 
1426   // The "flags" and "mxcsr" registers cannot be referenced directly.
1427   // Treat it as an identifier instead.
1428   if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1429       (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1430     RegNo = MCRegister();
1431 
1432   if (!is64BitMode()) {
1433     // FIXME: This should be done using Requires<Not64BitMode> and
1434     // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1435     // checked.
1436     if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1437         X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1438         X86II::isX86_64NonExtLowByteReg(RegNo) ||
1439         X86II::isX86_64ExtendedReg(RegNo)) {
1440       return Error(StartLoc,
1441                    "register %" + RegName + " is only available in 64-bit mode",
1442                    SMRange(StartLoc, EndLoc));
1443     }
1444   }
1445 
1446   if (X86II::isApxExtendedReg(RegNo))
1447     UseApxExtendedReg = true;
1448 
1449   // If this is "db[0-15]", match it as an alias
1450   // for dr[0-15].
1451   if (!RegNo && RegName.starts_with("db")) {
1452     if (RegName.size() == 3) {
1453       switch (RegName[2]) {
1454       case '0':
1455         RegNo = X86::DR0;
1456         break;
1457       case '1':
1458         RegNo = X86::DR1;
1459         break;
1460       case '2':
1461         RegNo = X86::DR2;
1462         break;
1463       case '3':
1464         RegNo = X86::DR3;
1465         break;
1466       case '4':
1467         RegNo = X86::DR4;
1468         break;
1469       case '5':
1470         RegNo = X86::DR5;
1471         break;
1472       case '6':
1473         RegNo = X86::DR6;
1474         break;
1475       case '7':
1476         RegNo = X86::DR7;
1477         break;
1478       case '8':
1479         RegNo = X86::DR8;
1480         break;
1481       case '9':
1482         RegNo = X86::DR9;
1483         break;
1484       }
1485     } else if (RegName.size() == 4 && RegName[2] == '1') {
1486       switch (RegName[3]) {
1487       case '0':
1488         RegNo = X86::DR10;
1489         break;
1490       case '1':
1491         RegNo = X86::DR11;
1492         break;
1493       case '2':
1494         RegNo = X86::DR12;
1495         break;
1496       case '3':
1497         RegNo = X86::DR13;
1498         break;
1499       case '4':
1500         RegNo = X86::DR14;
1501         break;
1502       case '5':
1503         RegNo = X86::DR15;
1504         break;
1505       }
1506     }
1507   }
1508 
1509   if (!RegNo) {
1510     if (isParsingIntelSyntax())
1511       return true;
1512     return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1513   }
1514   return false;
1515 }
1516 
ParseRegister(MCRegister & RegNo,SMLoc & StartLoc,SMLoc & EndLoc,bool RestoreOnFailure)1517 bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1518                                  SMLoc &EndLoc, bool RestoreOnFailure) {
1519   MCAsmParser &Parser = getParser();
1520   AsmLexer &Lexer = getLexer();
1521   RegNo = MCRegister();
1522 
1523   SmallVector<AsmToken, 5> Tokens;
1524   auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1525     if (RestoreOnFailure) {
1526       while (!Tokens.empty()) {
1527         Lexer.UnLex(Tokens.pop_back_val());
1528       }
1529     }
1530   };
1531 
1532   const AsmToken &PercentTok = Parser.getTok();
1533   StartLoc = PercentTok.getLoc();
1534 
1535   // If we encounter a %, ignore it. This code handles registers with and
1536   // without the prefix, unprefixed registers can occur in cfi directives.
1537   if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1538     Tokens.push_back(PercentTok);
1539     Parser.Lex(); // Eat percent token.
1540   }
1541 
1542   const AsmToken &Tok = Parser.getTok();
1543   EndLoc = Tok.getEndLoc();
1544 
1545   if (Tok.isNot(AsmToken::Identifier)) {
1546     OnFailure();
1547     if (isParsingIntelSyntax()) return true;
1548     return Error(StartLoc, "invalid register name",
1549                  SMRange(StartLoc, EndLoc));
1550   }
1551 
1552   if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1553     OnFailure();
1554     return true;
1555   }
1556 
1557   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1558   if (RegNo == X86::ST0) {
1559     Tokens.push_back(Tok);
1560     Parser.Lex(); // Eat 'st'
1561 
1562     // Check to see if we have '(4)' after %st.
1563     if (Lexer.isNot(AsmToken::LParen))
1564       return false;
1565     // Lex the paren.
1566     Tokens.push_back(Parser.getTok());
1567     Parser.Lex();
1568 
1569     const AsmToken &IntTok = Parser.getTok();
1570     if (IntTok.isNot(AsmToken::Integer)) {
1571       OnFailure();
1572       return Error(IntTok.getLoc(), "expected stack index");
1573     }
1574     switch (IntTok.getIntVal()) {
1575     case 0: RegNo = X86::ST0; break;
1576     case 1: RegNo = X86::ST1; break;
1577     case 2: RegNo = X86::ST2; break;
1578     case 3: RegNo = X86::ST3; break;
1579     case 4: RegNo = X86::ST4; break;
1580     case 5: RegNo = X86::ST5; break;
1581     case 6: RegNo = X86::ST6; break;
1582     case 7: RegNo = X86::ST7; break;
1583     default:
1584       OnFailure();
1585       return Error(IntTok.getLoc(), "invalid stack index");
1586     }
1587 
1588     // Lex IntTok
1589     Tokens.push_back(IntTok);
1590     Parser.Lex();
1591     if (Lexer.isNot(AsmToken::RParen)) {
1592       OnFailure();
1593       return Error(Parser.getTok().getLoc(), "expected ')'");
1594     }
1595 
1596     EndLoc = Parser.getTok().getEndLoc();
1597     Parser.Lex(); // Eat ')'
1598     return false;
1599   }
1600 
1601   EndLoc = Parser.getTok().getEndLoc();
1602 
1603   if (!RegNo) {
1604     OnFailure();
1605     if (isParsingIntelSyntax()) return true;
1606     return Error(StartLoc, "invalid register name",
1607                  SMRange(StartLoc, EndLoc));
1608   }
1609 
1610   Parser.Lex(); // Eat identifier token.
1611   return false;
1612 }
1613 
parseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)1614 bool X86AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
1615                                  SMLoc &EndLoc) {
1616   return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1617 }
1618 
tryParseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)1619 ParseStatus X86AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1620                                            SMLoc &EndLoc) {
1621   bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1622   bool PendingErrors = getParser().hasPendingError();
1623   getParser().clearPendingErrors();
1624   if (PendingErrors)
1625     return ParseStatus::Failure;
1626   if (Result)
1627     return ParseStatus::NoMatch;
1628   return ParseStatus::Success;
1629 }
1630 
DefaultMemSIOperand(SMLoc Loc)1631 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1632   bool Parse32 = is32BitMode() || Code16GCC;
1633   MCRegister Basereg =
1634       is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1635   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1636   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1637                                /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1638                                Loc, Loc, 0);
1639 }
1640 
DefaultMemDIOperand(SMLoc Loc)1641 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1642   bool Parse32 = is32BitMode() || Code16GCC;
1643   MCRegister Basereg =
1644       is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1645   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1646   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1647                                /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1648                                Loc, Loc, 0);
1649 }
1650 
IsSIReg(MCRegister Reg)1651 bool X86AsmParser::IsSIReg(MCRegister Reg) {
1652   switch (Reg.id()) {
1653   default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1654   case X86::RSI:
1655   case X86::ESI:
1656   case X86::SI:
1657     return true;
1658   case X86::RDI:
1659   case X86::EDI:
1660   case X86::DI:
1661     return false;
1662   }
1663 }
1664 
GetSIDIForRegClass(unsigned RegClassID,bool IsSIReg)1665 MCRegister X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, bool IsSIReg) {
1666   switch (RegClassID) {
1667   default: llvm_unreachable("Unexpected register class");
1668   case X86::GR64RegClassID:
1669     return IsSIReg ? X86::RSI : X86::RDI;
1670   case X86::GR32RegClassID:
1671     return IsSIReg ? X86::ESI : X86::EDI;
1672   case X86::GR16RegClassID:
1673     return IsSIReg ? X86::SI : X86::DI;
1674   }
1675 }
1676 
AddDefaultSrcDestOperands(OperandVector & Operands,std::unique_ptr<llvm::MCParsedAsmOperand> && Src,std::unique_ptr<llvm::MCParsedAsmOperand> && Dst)1677 void X86AsmParser::AddDefaultSrcDestOperands(
1678     OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1679     std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1680   if (isParsingIntelSyntax()) {
1681     Operands.push_back(std::move(Dst));
1682     Operands.push_back(std::move(Src));
1683   }
1684   else {
1685     Operands.push_back(std::move(Src));
1686     Operands.push_back(std::move(Dst));
1687   }
1688 }
1689 
VerifyAndAdjustOperands(OperandVector & OrigOperands,OperandVector & FinalOperands)1690 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1691                                            OperandVector &FinalOperands) {
1692 
1693   if (OrigOperands.size() > 1) {
1694     // Check if sizes match, OrigOperands also contains the instruction name
1695     assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1696            "Operand size mismatch");
1697 
1698     SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1699     // Verify types match
1700     int RegClassID = -1;
1701     for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1702       X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1703       X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1704 
1705       if (FinalOp.isReg() &&
1706           (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1707         // Return false and let a normal complaint about bogus operands happen
1708         return false;
1709 
1710       if (FinalOp.isMem()) {
1711 
1712         if (!OrigOp.isMem())
1713           // Return false and let a normal complaint about bogus operands happen
1714           return false;
1715 
1716         MCRegister OrigReg = OrigOp.Mem.BaseReg;
1717         MCRegister FinalReg = FinalOp.Mem.BaseReg;
1718 
1719         // If we've already encounterd a register class, make sure all register
1720         // bases are of the same register class
1721         if (RegClassID != -1 &&
1722             !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1723           return Error(OrigOp.getStartLoc(),
1724                        "mismatching source and destination index registers");
1725         }
1726 
1727         if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1728           RegClassID = X86::GR64RegClassID;
1729         else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1730           RegClassID = X86::GR32RegClassID;
1731         else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1732           RegClassID = X86::GR16RegClassID;
1733         else
1734           // Unexpected register class type
1735           // Return false and let a normal complaint about bogus operands happen
1736           return false;
1737 
1738         bool IsSI = IsSIReg(FinalReg);
1739         FinalReg = GetSIDIForRegClass(RegClassID, IsSI);
1740 
1741         if (FinalReg != OrigReg) {
1742           std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1743           Warnings.push_back(std::make_pair(
1744               OrigOp.getStartLoc(),
1745               "memory operand is only for determining the size, " + RegName +
1746                   " will be used for the location"));
1747         }
1748 
1749         FinalOp.Mem.Size = OrigOp.Mem.Size;
1750         FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1751         FinalOp.Mem.BaseReg = FinalReg;
1752       }
1753     }
1754 
1755     // Produce warnings only if all the operands passed the adjustment - prevent
1756     // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1757     for (auto &WarningMsg : Warnings) {
1758       Warning(WarningMsg.first, WarningMsg.second);
1759     }
1760 
1761     // Remove old operands
1762     for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1763       OrigOperands.pop_back();
1764   }
1765   // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1766   for (auto &Op : FinalOperands)
1767     OrigOperands.push_back(std::move(Op));
1768 
1769   return false;
1770 }
1771 
parseOperand(OperandVector & Operands,StringRef Name)1772 bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
1773   if (isParsingIntelSyntax())
1774     return parseIntelOperand(Operands, Name);
1775 
1776   return parseATTOperand(Operands);
1777 }
1778 
CreateMemForMSInlineAsm(MCRegister SegReg,const MCExpr * Disp,MCRegister BaseReg,MCRegister IndexReg,unsigned Scale,bool NonAbsMem,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,const InlineAsmIdentifierInfo & Info,OperandVector & Operands)1779 bool X86AsmParser::CreateMemForMSInlineAsm(
1780     MCRegister SegReg, const MCExpr *Disp, MCRegister BaseReg,
1781     MCRegister IndexReg, unsigned Scale, bool NonAbsMem, SMLoc Start, SMLoc End,
1782     unsigned Size, StringRef Identifier, const InlineAsmIdentifierInfo &Info,
1783     OperandVector &Operands) {
1784   // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1785   // some other label reference.
1786   if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
1787     // Create an absolute memory reference in order to match against
1788     // instructions taking a PC relative operand.
1789     Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1790                                              End, Size, Identifier,
1791                                              Info.Label.Decl));
1792     return false;
1793   }
1794   // We either have a direct symbol reference, or an offset from a symbol.  The
1795   // parser always puts the symbol on the LHS, so look there for size
1796   // calculation purposes.
1797   unsigned FrontendSize = 0;
1798   void *Decl = nullptr;
1799   bool IsGlobalLV = false;
1800   if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1801     // Size is in terms of bits in this context.
1802     FrontendSize = Info.Var.Type * 8;
1803     Decl = Info.Var.Decl;
1804     IsGlobalLV = Info.Var.IsGlobalLV;
1805   }
1806   // It is widely common for MS InlineAsm to use a global variable and one/two
1807   // registers in a mmory expression, and though unaccessible via rip/eip.
1808   if (IsGlobalLV) {
1809     if (BaseReg || IndexReg) {
1810       Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1811                                                End, Size, Identifier, Decl, 0,
1812                                                BaseReg && IndexReg));
1813       return false;
1814     }
1815     if (NonAbsMem)
1816       BaseReg = 1; // Make isAbsMem() false
1817   }
1818   Operands.push_back(X86Operand::CreateMem(
1819       getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1820       Size,
1821       /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1822   return false;
1823 }
1824 
1825 // Some binary bitwise operators have a named synonymous
1826 // Query a candidate string for being such a named operator
1827 // and if so - invoke the appropriate handler
ParseIntelNamedOperator(StringRef Name,IntelExprStateMachine & SM,bool & ParseError,SMLoc & End)1828 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1829                                            IntelExprStateMachine &SM,
1830                                            bool &ParseError, SMLoc &End) {
1831   // A named operator should be either lower or upper case, but not a mix...
1832   // except in MASM, which uses full case-insensitivity.
1833   if (Name != Name.lower() && Name != Name.upper() &&
1834       !getParser().isParsingMasm())
1835     return false;
1836   if (Name.equals_insensitive("not")) {
1837     SM.onNot();
1838   } else if (Name.equals_insensitive("or")) {
1839     SM.onOr();
1840   } else if (Name.equals_insensitive("shl")) {
1841     SM.onLShift();
1842   } else if (Name.equals_insensitive("shr")) {
1843     SM.onRShift();
1844   } else if (Name.equals_insensitive("xor")) {
1845     SM.onXor();
1846   } else if (Name.equals_insensitive("and")) {
1847     SM.onAnd();
1848   } else if (Name.equals_insensitive("mod")) {
1849     SM.onMod();
1850   } else if (Name.equals_insensitive("offset")) {
1851     SMLoc OffsetLoc = getTok().getLoc();
1852     const MCExpr *Val = nullptr;
1853     StringRef ID;
1854     InlineAsmIdentifierInfo Info;
1855     ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1856     if (ParseError)
1857       return true;
1858     StringRef ErrMsg;
1859     ParseError =
1860         SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1861     if (ParseError)
1862       return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1863   } else {
1864     return false;
1865   }
1866   if (!Name.equals_insensitive("offset"))
1867     End = consumeToken();
1868   return true;
1869 }
ParseMasmNamedOperator(StringRef Name,IntelExprStateMachine & SM,bool & ParseError,SMLoc & End)1870 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1871                                           IntelExprStateMachine &SM,
1872                                           bool &ParseError, SMLoc &End) {
1873   if (Name.equals_insensitive("eq")) {
1874     SM.onEq();
1875   } else if (Name.equals_insensitive("ne")) {
1876     SM.onNE();
1877   } else if (Name.equals_insensitive("lt")) {
1878     SM.onLT();
1879   } else if (Name.equals_insensitive("le")) {
1880     SM.onLE();
1881   } else if (Name.equals_insensitive("gt")) {
1882     SM.onGT();
1883   } else if (Name.equals_insensitive("ge")) {
1884     SM.onGE();
1885   } else {
1886     return false;
1887   }
1888   End = consumeToken();
1889   return true;
1890 }
1891 
1892 // Check if current intel expression append after an operand.
1893 // Like: [Operand][Intel Expression]
tryParseOperandIdx(AsmToken::TokenKind PrevTK,IntelExprStateMachine & SM)1894 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1895                                       IntelExprStateMachine &SM) {
1896   if (PrevTK != AsmToken::RBrac)
1897     return;
1898 
1899   SM.setAppendAfterOperand();
1900 }
1901 
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1902 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1903   MCAsmParser &Parser = getParser();
1904   StringRef ErrMsg;
1905 
1906   AsmToken::TokenKind PrevTK = AsmToken::Error;
1907 
1908   if (getContext().getObjectFileInfo()->isPositionIndependent())
1909     SM.setPIC();
1910 
1911   bool Done = false;
1912   while (!Done) {
1913     // Get a fresh reference on each loop iteration in case the previous
1914     // iteration moved the token storage during UnLex().
1915     const AsmToken &Tok = Parser.getTok();
1916 
1917     bool UpdateLocLex = true;
1918     AsmToken::TokenKind TK = getLexer().getKind();
1919 
1920     switch (TK) {
1921     default:
1922       if ((Done = SM.isValidEndState()))
1923         break;
1924       return Error(Tok.getLoc(), "unknown token in expression");
1925     case AsmToken::Error:
1926       return Error(getLexer().getErrLoc(), getLexer().getErr());
1927       break;
1928     case AsmToken::Real:
1929       // DotOperator: [ebx].0
1930       UpdateLocLex = false;
1931       if (ParseIntelDotOperator(SM, End))
1932         return true;
1933       break;
1934     case AsmToken::Dot:
1935       if (!Parser.isParsingMasm()) {
1936         if ((Done = SM.isValidEndState()))
1937           break;
1938         return Error(Tok.getLoc(), "unknown token in expression");
1939       }
1940       // MASM allows spaces around the dot operator (e.g., "var . x")
1941       Lex();
1942       UpdateLocLex = false;
1943       if (ParseIntelDotOperator(SM, End))
1944         return true;
1945       break;
1946     case AsmToken::Dollar:
1947       if (!Parser.isParsingMasm()) {
1948         if ((Done = SM.isValidEndState()))
1949           break;
1950         return Error(Tok.getLoc(), "unknown token in expression");
1951       }
1952       [[fallthrough]];
1953     case AsmToken::String: {
1954       if (Parser.isParsingMasm()) {
1955         // MASM parsers handle strings in expressions as constants.
1956         SMLoc ValueLoc = Tok.getLoc();
1957         int64_t Res;
1958         const MCExpr *Val;
1959         if (Parser.parsePrimaryExpr(Val, End, nullptr))
1960           return true;
1961         UpdateLocLex = false;
1962         if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1963           return Error(ValueLoc, "expected absolute value");
1964         if (SM.onInteger(Res, ErrMsg))
1965           return Error(ValueLoc, ErrMsg);
1966         break;
1967       }
1968       [[fallthrough]];
1969     }
1970     case AsmToken::At:
1971     case AsmToken::Identifier: {
1972       SMLoc IdentLoc = Tok.getLoc();
1973       StringRef Identifier = Tok.getString();
1974       UpdateLocLex = false;
1975       if (Parser.isParsingMasm()) {
1976         size_t DotOffset = Identifier.find_first_of('.');
1977         if (DotOffset != StringRef::npos) {
1978           consumeToken();
1979           StringRef LHS = Identifier.slice(0, DotOffset);
1980           StringRef Dot = Identifier.substr(DotOffset, 1);
1981           StringRef RHS = Identifier.substr(DotOffset + 1);
1982           if (!RHS.empty()) {
1983             getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1984           }
1985           getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1986           if (!LHS.empty()) {
1987             getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1988           }
1989           break;
1990         }
1991       }
1992       // (MASM only) <TYPE> PTR operator
1993       if (Parser.isParsingMasm()) {
1994         const AsmToken &NextTok = getLexer().peekTok();
1995         if (NextTok.is(AsmToken::Identifier) &&
1996             NextTok.getIdentifier().equals_insensitive("ptr")) {
1997           AsmTypeInfo Info;
1998           if (Parser.lookUpType(Identifier, Info))
1999             return Error(Tok.getLoc(), "unknown type");
2000           SM.onCast(Info);
2001           // Eat type and PTR.
2002           consumeToken();
2003           End = consumeToken();
2004           break;
2005         }
2006       }
2007       // Register, or (MASM only) <register>.<field>
2008       MCRegister Reg;
2009       if (Tok.is(AsmToken::Identifier)) {
2010         if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
2011           if (SM.onRegister(Reg, ErrMsg))
2012             return Error(IdentLoc, ErrMsg);
2013           break;
2014         }
2015         if (Parser.isParsingMasm()) {
2016           const std::pair<StringRef, StringRef> IDField =
2017               Tok.getString().split('.');
2018           const StringRef ID = IDField.first, Field = IDField.second;
2019           SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
2020           if (!Field.empty() &&
2021               !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
2022             if (SM.onRegister(Reg, ErrMsg))
2023               return Error(IdentLoc, ErrMsg);
2024 
2025             AsmFieldInfo Info;
2026             SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
2027             if (Parser.lookUpField(Field, Info))
2028               return Error(FieldStartLoc, "unknown offset");
2029             else if (SM.onPlus(ErrMsg))
2030               return Error(getTok().getLoc(), ErrMsg);
2031             else if (SM.onInteger(Info.Offset, ErrMsg))
2032               return Error(IdentLoc, ErrMsg);
2033             SM.setTypeInfo(Info.Type);
2034 
2035             End = consumeToken();
2036             break;
2037           }
2038         }
2039       }
2040       // Operator synonymous ("not", "or" etc.)
2041       bool ParseError = false;
2042       if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
2043         if (ParseError)
2044           return true;
2045         break;
2046       }
2047       if (Parser.isParsingMasm() &&
2048           ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
2049         if (ParseError)
2050           return true;
2051         break;
2052       }
2053       // Symbol reference, when parsing assembly content
2054       InlineAsmIdentifierInfo Info;
2055       AsmFieldInfo FieldInfo;
2056       const MCExpr *Val;
2057       if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
2058         // MS Dot Operator expression
2059         if (Identifier.contains('.') &&
2060             (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
2061           if (ParseIntelDotOperator(SM, End))
2062             return true;
2063           break;
2064         }
2065       }
2066       if (isParsingMSInlineAsm()) {
2067         // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2068         if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2069           if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2070             if (SM.onInteger(Val, ErrMsg))
2071               return Error(IdentLoc, ErrMsg);
2072           } else {
2073             return true;
2074           }
2075           break;
2076         }
2077         // MS InlineAsm identifier
2078         // Call parseIdentifier() to combine @ with the identifier behind it.
2079         if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2080           return Error(IdentLoc, "expected identifier");
2081         if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2082           return true;
2083         else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2084                                      true, ErrMsg))
2085           return Error(IdentLoc, ErrMsg);
2086         break;
2087       }
2088       if (Parser.isParsingMasm()) {
2089         if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2090           int64_t Val;
2091           if (ParseMasmOperator(OpKind, Val))
2092             return true;
2093           if (SM.onInteger(Val, ErrMsg))
2094             return Error(IdentLoc, ErrMsg);
2095           break;
2096         }
2097         if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2098           // Field offset immediate; <TYPE>.<field specification>
2099           Lex(); // eat type
2100           bool EndDot = parseOptionalToken(AsmToken::Dot);
2101           while (EndDot || (getTok().is(AsmToken::Identifier) &&
2102                             getTok().getString().starts_with("."))) {
2103             getParser().parseIdentifier(Identifier);
2104             if (!EndDot)
2105               Identifier.consume_front(".");
2106             EndDot = Identifier.consume_back(".");
2107             if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2108                                         FieldInfo)) {
2109               SMLoc IDEnd =
2110                   SMLoc::getFromPointer(Identifier.data() + Identifier.size());
2111               return Error(IdentLoc, "Unable to lookup field reference!",
2112                            SMRange(IdentLoc, IDEnd));
2113             }
2114             if (!EndDot)
2115               EndDot = parseOptionalToken(AsmToken::Dot);
2116           }
2117           if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2118             return Error(IdentLoc, ErrMsg);
2119           break;
2120         }
2121       }
2122       if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2123         return Error(Tok.getLoc(), "Unexpected identifier!");
2124       } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2125                                      false, ErrMsg)) {
2126         return Error(IdentLoc, ErrMsg);
2127       }
2128       break;
2129     }
2130     case AsmToken::Integer: {
2131       // Look for 'b' or 'f' following an Integer as a directional label
2132       SMLoc Loc = getTok().getLoc();
2133       int64_t IntVal = getTok().getIntVal();
2134       End = consumeToken();
2135       UpdateLocLex = false;
2136       if (getLexer().getKind() == AsmToken::Identifier) {
2137         StringRef IDVal = getTok().getString();
2138         if (IDVal == "f" || IDVal == "b") {
2139           MCSymbol *Sym =
2140               getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2141           auto Variant = X86::S_None;
2142           const MCExpr *Val =
2143               MCSymbolRefExpr::create(Sym, Variant, getContext());
2144           if (IDVal == "b" && Sym->isUndefined())
2145             return Error(Loc, "invalid reference to undefined symbol");
2146           StringRef Identifier = Sym->getName();
2147           InlineAsmIdentifierInfo Info;
2148           AsmTypeInfo Type;
2149           if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2150                                   isParsingMSInlineAsm(), ErrMsg))
2151             return Error(Loc, ErrMsg);
2152           End = consumeToken();
2153         } else {
2154           if (SM.onInteger(IntVal, ErrMsg))
2155             return Error(Loc, ErrMsg);
2156         }
2157       } else {
2158         if (SM.onInteger(IntVal, ErrMsg))
2159           return Error(Loc, ErrMsg);
2160       }
2161       break;
2162     }
2163     case AsmToken::Plus:
2164       if (SM.onPlus(ErrMsg))
2165         return Error(getTok().getLoc(), ErrMsg);
2166       break;
2167     case AsmToken::Minus:
2168       if (SM.onMinus(ErrMsg))
2169         return Error(getTok().getLoc(), ErrMsg);
2170       break;
2171     case AsmToken::Tilde:   SM.onNot(); break;
2172     case AsmToken::Star:    SM.onStar(); break;
2173     case AsmToken::Slash:   SM.onDivide(); break;
2174     case AsmToken::Percent: SM.onMod(); break;
2175     case AsmToken::Pipe:    SM.onOr(); break;
2176     case AsmToken::Caret:   SM.onXor(); break;
2177     case AsmToken::Amp:     SM.onAnd(); break;
2178     case AsmToken::LessLess:
2179                             SM.onLShift(); break;
2180     case AsmToken::GreaterGreater:
2181                             SM.onRShift(); break;
2182     case AsmToken::LBrac:
2183       if (SM.onLBrac())
2184         return Error(Tok.getLoc(), "unexpected bracket encountered");
2185       tryParseOperandIdx(PrevTK, SM);
2186       break;
2187     case AsmToken::RBrac:
2188       if (SM.onRBrac(ErrMsg)) {
2189         return Error(Tok.getLoc(), ErrMsg);
2190       }
2191       break;
2192     case AsmToken::LParen:  SM.onLParen(); break;
2193     case AsmToken::RParen:
2194       if (SM.onRParen(ErrMsg)) {
2195         return Error(Tok.getLoc(), ErrMsg);
2196       }
2197       break;
2198     }
2199     if (SM.hadError())
2200       return Error(Tok.getLoc(), "unknown token in expression");
2201 
2202     if (!Done && UpdateLocLex)
2203       End = consumeToken();
2204 
2205     PrevTK = TK;
2206   }
2207   return false;
2208 }
2209 
RewriteIntelExpression(IntelExprStateMachine & SM,SMLoc Start,SMLoc End)2210 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2211                                           SMLoc Start, SMLoc End) {
2212   SMLoc Loc = Start;
2213   unsigned ExprLen = End.getPointer() - Start.getPointer();
2214   // Skip everything before a symbol displacement (if we have one)
2215   if (SM.getSym() && !SM.isOffsetOperator()) {
2216     StringRef SymName = SM.getSymName();
2217     if (unsigned Len = SymName.data() - Start.getPointer())
2218       InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2219     Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2220     ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2221     // If we have only a symbol than there's no need for complex rewrite,
2222     // simply skip everything after it
2223     if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2224       if (ExprLen)
2225         InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2226       return;
2227     }
2228   }
2229   // Build an Intel Expression rewrite
2230   StringRef BaseRegStr;
2231   StringRef IndexRegStr;
2232   StringRef OffsetNameStr;
2233   if (SM.getBaseReg())
2234     BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2235   if (SM.getIndexReg())
2236     IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2237   if (SM.isOffsetOperator())
2238     OffsetNameStr = SM.getSymName();
2239   // Emit it
2240   IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2241                  SM.getImm(), SM.isMemExpr());
2242   InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2243 }
2244 
2245 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelInlineAsmIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End,bool IsParsingOffsetOperator)2246 bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2247     const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2248     bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2249   MCAsmParser &Parser = getParser();
2250   assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2251   Val = nullptr;
2252 
2253   StringRef LineBuf(Identifier.data());
2254   SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2255 
2256   const AsmToken &Tok = Parser.getTok();
2257   SMLoc Loc = Tok.getLoc();
2258 
2259   // Advance the token stream until the end of the current token is
2260   // after the end of what the frontend claimed.
2261   const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2262   do {
2263     End = Tok.getEndLoc();
2264     getLexer().Lex();
2265   } while (End.getPointer() < EndPtr);
2266   Identifier = LineBuf;
2267 
2268   // The frontend should end parsing on an assembler token boundary, unless it
2269   // failed parsing.
2270   assert((End.getPointer() == EndPtr ||
2271           Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
2272           "frontend claimed part of a token?");
2273 
2274   // If the identifier lookup was unsuccessful, assume that we are dealing with
2275   // a label.
2276   if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
2277     StringRef InternalName =
2278       SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2279                                          Loc, false);
2280     assert(InternalName.size() && "We should have an internal name here.");
2281     // Push a rewrite for replacing the identifier name with the internal name,
2282     // unless we are parsing the operand of an offset operator
2283     if (!IsParsingOffsetOperator)
2284       InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2285                                           InternalName);
2286     else
2287       Identifier = InternalName;
2288   } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2289     return false;
2290   // Create the symbol reference.
2291   MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2292   auto Variant = X86::S_None;
2293   Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2294   return false;
2295 }
2296 
2297 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
ParseRoundingModeOp(SMLoc Start,OperandVector & Operands)2298 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2299   MCAsmParser &Parser = getParser();
2300   const AsmToken &Tok = Parser.getTok();
2301   // Eat "{" and mark the current place.
2302   const SMLoc consumedToken = consumeToken();
2303   if (Tok.isNot(AsmToken::Identifier))
2304     return Error(Tok.getLoc(), "Expected an identifier after {");
2305   if (Tok.getIdentifier().starts_with("r")) {
2306     int rndMode = StringSwitch<int>(Tok.getIdentifier())
2307       .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
2308       .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
2309       .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
2310       .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
2311       .Default(-1);
2312     if (-1 == rndMode)
2313       return Error(Tok.getLoc(), "Invalid rounding mode.");
2314      Parser.Lex();  // Eat "r*" of r*-sae
2315     if (!getLexer().is(AsmToken::Minus))
2316       return Error(Tok.getLoc(), "Expected - at this point");
2317     Parser.Lex();  // Eat "-"
2318     Parser.Lex();  // Eat the sae
2319     if (!getLexer().is(AsmToken::RCurly))
2320       return Error(Tok.getLoc(), "Expected } at this point");
2321     SMLoc End = Tok.getEndLoc();
2322     Parser.Lex();  // Eat "}"
2323     const MCExpr *RndModeOp =
2324       MCConstantExpr::create(rndMode, Parser.getContext());
2325     Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2326     return false;
2327   }
2328   if (Tok.getIdentifier() == "sae") {
2329     Parser.Lex();  // Eat the sae
2330     if (!getLexer().is(AsmToken::RCurly))
2331       return Error(Tok.getLoc(), "Expected } at this point");
2332     Parser.Lex();  // Eat "}"
2333     Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2334     return false;
2335   }
2336   return Error(Tok.getLoc(), "unknown token in expression");
2337 }
2338 
2339 /// Parse condtional flags for CCMP/CTEST, e.g {dfv=of,sf,zf,cf} right after
2340 /// mnemonic.
parseCFlagsOp(OperandVector & Operands)2341 bool X86AsmParser::parseCFlagsOp(OperandVector &Operands) {
2342   MCAsmParser &Parser = getParser();
2343   AsmToken Tok = Parser.getTok();
2344   const SMLoc Start = Tok.getLoc();
2345   if (!Tok.is(AsmToken::LCurly))
2346     return Error(Tok.getLoc(), "Expected { at this point");
2347   Parser.Lex(); // Eat "{"
2348   Tok = Parser.getTok();
2349   if (Tok.getIdentifier().lower() != "dfv")
2350     return Error(Tok.getLoc(), "Expected dfv at this point");
2351   Parser.Lex(); // Eat "dfv"
2352   Tok = Parser.getTok();
2353   if (!Tok.is(AsmToken::Equal))
2354     return Error(Tok.getLoc(), "Expected = at this point");
2355   Parser.Lex(); // Eat "="
2356 
2357   Tok = Parser.getTok();
2358   SMLoc End;
2359   if (Tok.is(AsmToken::RCurly)) {
2360     End = Tok.getEndLoc();
2361     Operands.push_back(X86Operand::CreateImm(
2362         MCConstantExpr::create(0, Parser.getContext()), Start, End));
2363     Parser.Lex(); // Eat "}"
2364     return false;
2365   }
2366   unsigned CFlags = 0;
2367   for (unsigned I = 0; I < 4; ++I) {
2368     Tok = Parser.getTok();
2369     unsigned CFlag = StringSwitch<unsigned>(Tok.getIdentifier().lower())
2370                          .Case("of", 0x8)
2371                          .Case("sf", 0x4)
2372                          .Case("zf", 0x2)
2373                          .Case("cf", 0x1)
2374                          .Default(~0U);
2375     if (CFlag == ~0U)
2376       return Error(Tok.getLoc(), "Invalid conditional flags");
2377 
2378     if (CFlags & CFlag)
2379       return Error(Tok.getLoc(), "Duplicated conditional flag");
2380     CFlags |= CFlag;
2381 
2382     Parser.Lex(); // Eat one conditional flag
2383     Tok = Parser.getTok();
2384     if (Tok.is(AsmToken::RCurly)) {
2385       End = Tok.getEndLoc();
2386       Operands.push_back(X86Operand::CreateImm(
2387           MCConstantExpr::create(CFlags, Parser.getContext()), Start, End));
2388       Parser.Lex(); // Eat "}"
2389       return false;
2390     } else if (I == 3) {
2391       return Error(Tok.getLoc(), "Expected } at this point");
2392     } else if (Tok.isNot(AsmToken::Comma)) {
2393       return Error(Tok.getLoc(), "Expected } or , at this point");
2394     }
2395     Parser.Lex(); // Eat ","
2396   }
2397   llvm_unreachable("Unexpected control flow");
2398 }
2399 
2400 /// Parse the '.' operator.
ParseIntelDotOperator(IntelExprStateMachine & SM,SMLoc & End)2401 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2402                                          SMLoc &End) {
2403   const AsmToken &Tok = getTok();
2404   AsmFieldInfo Info;
2405 
2406   // Drop the optional '.'.
2407   StringRef DotDispStr = Tok.getString();
2408   DotDispStr.consume_front(".");
2409   bool TrailingDot = false;
2410 
2411   // .Imm gets lexed as a real.
2412   if (Tok.is(AsmToken::Real)) {
2413     APInt DotDisp;
2414     if (DotDispStr.getAsInteger(10, DotDisp))
2415       return Error(Tok.getLoc(), "Unexpected offset");
2416     Info.Offset = DotDisp.getZExtValue();
2417   } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2418              Tok.is(AsmToken::Identifier)) {
2419     TrailingDot = DotDispStr.consume_back(".");
2420     const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2421     const StringRef Base = BaseMember.first, Member = BaseMember.second;
2422     if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2423         getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2424         getParser().lookUpField(DotDispStr, Info) &&
2425         (!SemaCallback ||
2426          SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2427       return Error(Tok.getLoc(), "Unable to lookup field reference!");
2428   } else {
2429     return Error(Tok.getLoc(), "Unexpected token type!");
2430   }
2431 
2432   // Eat the DotExpression and update End
2433   End = SMLoc::getFromPointer(DotDispStr.data());
2434   const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2435   while (Tok.getLoc().getPointer() < DotExprEndLoc)
2436     Lex();
2437   if (TrailingDot)
2438     getLexer().UnLex(AsmToken(AsmToken::Dot, "."));
2439   SM.addImm(Info.Offset);
2440   SM.setTypeInfo(Info.Type);
2441   return false;
2442 }
2443 
2444 /// Parse the 'offset' operator.
2445 /// This operator is used to specify the location of a given operand
ParseIntelOffsetOperator(const MCExpr * & Val,StringRef & ID,InlineAsmIdentifierInfo & Info,SMLoc & End)2446 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2447                                             InlineAsmIdentifierInfo &Info,
2448                                             SMLoc &End) {
2449   // Eat offset, mark start of identifier.
2450   SMLoc Start = Lex().getLoc();
2451   ID = getTok().getString();
2452   if (!isParsingMSInlineAsm()) {
2453     if ((getTok().isNot(AsmToken::Identifier) &&
2454          getTok().isNot(AsmToken::String)) ||
2455         getParser().parsePrimaryExpr(Val, End, nullptr))
2456       return Error(Start, "unexpected token!");
2457   } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2458     return Error(Start, "unable to lookup expression");
2459   } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2460     return Error(Start, "offset operator cannot yet handle constants");
2461   }
2462   return false;
2463 }
2464 
2465 // Query a candidate string for being an Intel assembly operator
2466 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyIntelInlineAsmOperator(StringRef Name)2467 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2468   return StringSwitch<unsigned>(Name)
2469     .Cases("TYPE","type",IOK_TYPE)
2470     .Cases("SIZE","size",IOK_SIZE)
2471     .Cases("LENGTH","length",IOK_LENGTH)
2472     .Default(IOK_INVALID);
2473 }
2474 
2475 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators.  The LENGTH operator
2476 /// returns the number of elements in an array.  It returns the value 1 for
2477 /// non-array variables.  The SIZE operator returns the size of a C or C++
2478 /// variable.  A variable's size is the product of its LENGTH and TYPE.  The
2479 /// TYPE operator returns the size of a C or C++ type or variable. If the
2480 /// variable is an array, TYPE returns the size of a single element.
ParseIntelInlineAsmOperator(unsigned OpKind)2481 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2482   MCAsmParser &Parser = getParser();
2483   const AsmToken &Tok = Parser.getTok();
2484   Parser.Lex(); // Eat operator.
2485 
2486   const MCExpr *Val = nullptr;
2487   InlineAsmIdentifierInfo Info;
2488   SMLoc Start = Tok.getLoc(), End;
2489   StringRef Identifier = Tok.getString();
2490   if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2491                                     /*IsUnevaluatedOperand=*/true, End))
2492     return 0;
2493 
2494   if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2495     Error(Start, "unable to lookup expression");
2496     return 0;
2497   }
2498 
2499   unsigned CVal = 0;
2500   switch(OpKind) {
2501   default: llvm_unreachable("Unexpected operand kind!");
2502   case IOK_LENGTH: CVal = Info.Var.Length; break;
2503   case IOK_SIZE: CVal = Info.Var.Size; break;
2504   case IOK_TYPE: CVal = Info.Var.Type; break;
2505   }
2506 
2507   return CVal;
2508 }
2509 
2510 // Query a candidate string for being an Intel assembly operator
2511 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyMasmOperator(StringRef Name)2512 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2513   return StringSwitch<unsigned>(Name.lower())
2514       .Case("type", MOK_TYPE)
2515       .Cases("size", "sizeof", MOK_SIZEOF)
2516       .Cases("length", "lengthof", MOK_LENGTHOF)
2517       .Default(MOK_INVALID);
2518 }
2519 
2520 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators.  The LENGTHOF operator
2521 /// returns the number of elements in an array.  It returns the value 1 for
2522 /// non-array variables.  The SIZEOF operator returns the size of a type or
2523 /// variable in bytes.  A variable's size is the product of its LENGTH and TYPE.
2524 /// The TYPE operator returns the size of a variable. If the variable is an
2525 /// array, TYPE returns the size of a single element.
ParseMasmOperator(unsigned OpKind,int64_t & Val)2526 bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2527   MCAsmParser &Parser = getParser();
2528   SMLoc OpLoc = Parser.getTok().getLoc();
2529   Parser.Lex(); // Eat operator.
2530 
2531   Val = 0;
2532   if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2533     // Check for SIZEOF(<type>) and TYPE(<type>).
2534     bool InParens = Parser.getTok().is(AsmToken::LParen);
2535     const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2536     AsmTypeInfo Type;
2537     if (IDTok.is(AsmToken::Identifier) &&
2538         !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2539       Val = Type.Size;
2540 
2541       // Eat tokens.
2542       if (InParens)
2543         parseToken(AsmToken::LParen);
2544       parseToken(AsmToken::Identifier);
2545       if (InParens)
2546         parseToken(AsmToken::RParen);
2547     }
2548   }
2549 
2550   if (!Val) {
2551     IntelExprStateMachine SM;
2552     SMLoc End, Start = Parser.getTok().getLoc();
2553     if (ParseIntelExpression(SM, End))
2554       return true;
2555 
2556     switch (OpKind) {
2557     default:
2558       llvm_unreachable("Unexpected operand kind!");
2559     case MOK_SIZEOF:
2560       Val = SM.getSize();
2561       break;
2562     case MOK_LENGTHOF:
2563       Val = SM.getLength();
2564       break;
2565     case MOK_TYPE:
2566       Val = SM.getElementSize();
2567       break;
2568     }
2569 
2570     if (!Val)
2571       return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2572   }
2573 
2574   return false;
2575 }
2576 
ParseIntelMemoryOperandSize(unsigned & Size)2577 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
2578   Size = StringSwitch<unsigned>(getTok().getString())
2579     .Cases("BYTE", "byte", 8)
2580     .Cases("WORD", "word", 16)
2581     .Cases("DWORD", "dword", 32)
2582     .Cases("FLOAT", "float", 32)
2583     .Cases("LONG", "long", 32)
2584     .Cases("FWORD", "fword", 48)
2585     .Cases("DOUBLE", "double", 64)
2586     .Cases("QWORD", "qword", 64)
2587     .Cases("MMWORD","mmword", 64)
2588     .Cases("XWORD", "xword", 80)
2589     .Cases("TBYTE", "tbyte", 80)
2590     .Cases("XMMWORD", "xmmword", 128)
2591     .Cases("YMMWORD", "ymmword", 256)
2592     .Cases("ZMMWORD", "zmmword", 512)
2593     .Default(0);
2594   if (Size) {
2595     const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2596     if (!(Tok.getString() == "PTR" || Tok.getString() == "ptr"))
2597       return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2598     Lex(); // Eat ptr.
2599   }
2600   return false;
2601 }
2602 
parseIntelOperand(OperandVector & Operands,StringRef Name)2603 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
2604   MCAsmParser &Parser = getParser();
2605   const AsmToken &Tok = Parser.getTok();
2606   SMLoc Start, End;
2607 
2608   // Parse optional Size directive.
2609   unsigned Size;
2610   if (ParseIntelMemoryOperandSize(Size))
2611     return true;
2612   bool PtrInOperand = bool(Size);
2613 
2614   Start = Tok.getLoc();
2615 
2616   // Rounding mode operand.
2617   if (getLexer().is(AsmToken::LCurly))
2618     return ParseRoundingModeOp(Start, Operands);
2619 
2620   // Register operand.
2621   MCRegister RegNo;
2622   if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) {
2623     if (RegNo == X86::RIP)
2624       return Error(Start, "rip can only be used as a base register");
2625     // A Register followed by ':' is considered a segment override
2626     if (Tok.isNot(AsmToken::Colon)) {
2627       if (PtrInOperand)
2628         return Error(Start, "expected memory operand after 'ptr', "
2629                             "found register operand instead");
2630       Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2631       return false;
2632     }
2633     // An alleged segment override. check if we have a valid segment register
2634     if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2635       return Error(Start, "invalid segment register");
2636     // Eat ':' and update Start location
2637     Start = Lex().getLoc();
2638   }
2639 
2640   // Immediates and Memory
2641   IntelExprStateMachine SM;
2642   if (ParseIntelExpression(SM, End))
2643     return true;
2644 
2645   if (isParsingMSInlineAsm())
2646     RewriteIntelExpression(SM, Start, Tok.getLoc());
2647 
2648   int64_t Imm = SM.getImm();
2649   const MCExpr *Disp = SM.getSym();
2650   const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2651   if (Disp && Imm)
2652     Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2653   if (!Disp)
2654     Disp = ImmDisp;
2655 
2656   // RegNo != 0 specifies a valid segment register,
2657   // and we are parsing a segment override
2658   if (!SM.isMemExpr() && !RegNo) {
2659     if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2660       const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2661       if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2662         // Disp includes the address of a variable; make sure this is recorded
2663         // for later handling.
2664         Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2665                                                  SM.getSymName(), Info.Var.Decl,
2666                                                  Info.Var.IsGlobalLV));
2667         return false;
2668       }
2669     }
2670 
2671     Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2672     return false;
2673   }
2674 
2675   StringRef ErrMsg;
2676   MCRegister BaseReg = SM.getBaseReg();
2677   MCRegister IndexReg = SM.getIndexReg();
2678   if (IndexReg && BaseReg == X86::RIP)
2679     BaseReg = MCRegister();
2680   unsigned Scale = SM.getScale();
2681   if (!PtrInOperand)
2682     Size = SM.getElementSize() << 3;
2683 
2684   if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2685       (IndexReg == X86::ESP || IndexReg == X86::RSP))
2686     std::swap(BaseReg, IndexReg);
2687 
2688   // If BaseReg is a vector register and IndexReg is not, swap them unless
2689   // Scale was specified in which case it would be an error.
2690   if (Scale == 0 &&
2691       !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2692         X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2693         X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2694       (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2695        X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2696        X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2697     std::swap(BaseReg, IndexReg);
2698 
2699   if (Scale != 0 &&
2700       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2701     return Error(Start, "16-bit addresses cannot have a scale");
2702 
2703   // If there was no explicit scale specified, change it to 1.
2704   if (Scale == 0)
2705     Scale = 1;
2706 
2707   // If this is a 16-bit addressing mode with the base and index in the wrong
2708   // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2709   // shared with att syntax where order matters.
2710   if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2711       (IndexReg == X86::BX || IndexReg == X86::BP))
2712     std::swap(BaseReg, IndexReg);
2713 
2714   if ((BaseReg || IndexReg) &&
2715       CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2716                                       ErrMsg))
2717     return Error(Start, ErrMsg);
2718   bool IsUnconditionalBranch =
2719       Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2720   if (isParsingMSInlineAsm())
2721     return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale,
2722                                    IsUnconditionalBranch && is64BitMode(),
2723                                    Start, End, Size, SM.getSymName(),
2724                                    SM.getIdentifierInfo(), Operands);
2725 
2726   // When parsing x64 MS-style assembly, all non-absolute references to a named
2727   // variable default to RIP-relative.
2728   MCRegister DefaultBaseReg;
2729   bool MaybeDirectBranchDest = true;
2730 
2731   if (Parser.isParsingMasm()) {
2732     if (is64BitMode() &&
2733         ((PtrInOperand && !IndexReg) || SM.getElementSize() > 0)) {
2734       DefaultBaseReg = X86::RIP;
2735     }
2736     if (IsUnconditionalBranch) {
2737       if (PtrInOperand) {
2738         MaybeDirectBranchDest = false;
2739         if (is64BitMode())
2740           DefaultBaseReg = X86::RIP;
2741       } else if (!BaseReg && !IndexReg && Disp &&
2742                  Disp->getKind() == MCExpr::SymbolRef) {
2743         if (is64BitMode()) {
2744           if (SM.getSize() == 8) {
2745             MaybeDirectBranchDest = false;
2746             DefaultBaseReg = X86::RIP;
2747           }
2748         } else {
2749           if (SM.getSize() == 4 || SM.getSize() == 2)
2750             MaybeDirectBranchDest = false;
2751         }
2752       }
2753     }
2754   } else if (IsUnconditionalBranch) {
2755     // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error.
2756     if (!PtrInOperand && SM.isOffsetOperator())
2757       return Error(
2758           Start, "`OFFSET` operator cannot be used in an unconditional branch");
2759     if (PtrInOperand || SM.isBracketUsed())
2760       MaybeDirectBranchDest = false;
2761   }
2762 
2763   if ((BaseReg || IndexReg || RegNo || DefaultBaseReg))
2764     Operands.push_back(X86Operand::CreateMem(
2765         getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2766         Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2767         /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
2768   else
2769     Operands.push_back(X86Operand::CreateMem(
2770         getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2771         /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2772         MaybeDirectBranchDest));
2773   return false;
2774 }
2775 
parseATTOperand(OperandVector & Operands)2776 bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
2777   MCAsmParser &Parser = getParser();
2778   switch (getLexer().getKind()) {
2779   case AsmToken::Dollar: {
2780     // $42 or $ID -> immediate.
2781     SMLoc Start = Parser.getTok().getLoc(), End;
2782     Parser.Lex();
2783     const MCExpr *Val;
2784     // This is an immediate, so we should not parse a register. Do a precheck
2785     // for '%' to supercede intra-register parse errors.
2786     SMLoc L = Parser.getTok().getLoc();
2787     if (check(getLexer().is(AsmToken::Percent), L,
2788               "expected immediate expression") ||
2789         getParser().parseExpression(Val, End) ||
2790         check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2791       return true;
2792     Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2793     return false;
2794   }
2795   case AsmToken::LCurly: {
2796     SMLoc Start = Parser.getTok().getLoc();
2797     return ParseRoundingModeOp(Start, Operands);
2798   }
2799   default: {
2800     // This a memory operand or a register. We have some parsing complications
2801     // as a '(' may be part of an immediate expression or the addressing mode
2802     // block. This is complicated by the fact that an assembler-level variable
2803     // may refer either to a register or an immediate expression.
2804 
2805     SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2806     const MCExpr *Expr = nullptr;
2807     MCRegister Reg;
2808     if (getLexer().isNot(AsmToken::LParen)) {
2809       // No '(' so this is either a displacement expression or a register.
2810       if (Parser.parseExpression(Expr, EndLoc))
2811         return true;
2812       if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2813         // Segment Register. Reset Expr and copy value to register.
2814         Expr = nullptr;
2815         Reg = RE->getReg();
2816 
2817         // Check the register.
2818         if (Reg == X86::EIZ || Reg == X86::RIZ)
2819           return Error(
2820               Loc, "%eiz and %riz can only be used as index registers",
2821               SMRange(Loc, EndLoc));
2822         if (Reg == X86::RIP)
2823           return Error(Loc, "%rip can only be used as a base register",
2824                        SMRange(Loc, EndLoc));
2825         // Return register that are not segment prefixes immediately.
2826         if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2827           Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2828           return false;
2829         }
2830         if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2831           return Error(Loc, "invalid segment register");
2832         // Accept a '*' absolute memory reference after the segment. Place it
2833         // before the full memory operand.
2834         if (getLexer().is(AsmToken::Star))
2835           Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2836       }
2837     }
2838     // This is a Memory operand.
2839     return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2840   }
2841   }
2842 }
2843 
2844 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2845 // otherwise the EFLAGS Condition Code enumerator.
ParseConditionCode(StringRef CC)2846 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2847   return StringSwitch<X86::CondCode>(CC)
2848       .Case("o", X86::COND_O)          // Overflow
2849       .Case("no", X86::COND_NO)        // No Overflow
2850       .Cases("b", "nae", X86::COND_B)  // Below/Neither Above nor Equal
2851       .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2852       .Cases("e", "z", X86::COND_E)    // Equal/Zero
2853       .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2854       .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2855       .Cases("a", "nbe", X86::COND_A)  // Above/Neither Below nor Equal
2856       .Case("s", X86::COND_S)          // Sign
2857       .Case("ns", X86::COND_NS)        // No Sign
2858       .Cases("p", "pe", X86::COND_P)   // Parity/Parity Even
2859       .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2860       .Cases("l", "nge", X86::COND_L)  // Less/Neither Greater nor Equal
2861       .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2862       .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2863       .Cases("g", "nle", X86::COND_G)  // Greater/Neither Less nor Equal
2864       .Default(X86::COND_INVALID);
2865 }
2866 
2867 // true on failure, false otherwise
2868 // If no {z} mark was found - Parser doesn't advance
ParseZ(std::unique_ptr<X86Operand> & Z,const SMLoc & StartLoc)2869 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2870                           const SMLoc &StartLoc) {
2871   MCAsmParser &Parser = getParser();
2872   // Assuming we are just pass the '{' mark, quering the next token
2873   // Searched for {z}, but none was found. Return false, as no parsing error was
2874   // encountered
2875   if (!(getLexer().is(AsmToken::Identifier) &&
2876         (getLexer().getTok().getIdentifier() == "z")))
2877     return false;
2878   Parser.Lex(); // Eat z
2879   // Query and eat the '}' mark
2880   if (!getLexer().is(AsmToken::RCurly))
2881     return Error(getLexer().getLoc(), "Expected } at this point");
2882   Parser.Lex(); // Eat '}'
2883   // Assign Z with the {z} mark operand
2884   Z = X86Operand::CreateToken("{z}", StartLoc);
2885   return false;
2886 }
2887 
2888 // true on failure, false otherwise
HandleAVX512Operand(OperandVector & Operands)2889 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2890   MCAsmParser &Parser = getParser();
2891   if (getLexer().is(AsmToken::LCurly)) {
2892     // Eat "{" and mark the current place.
2893     const SMLoc consumedToken = consumeToken();
2894     // Distinguish {1to<NUM>} from {%k<NUM>}.
2895     if(getLexer().is(AsmToken::Integer)) {
2896       // Parse memory broadcasting ({1to<NUM>}).
2897       if (getLexer().getTok().getIntVal() != 1)
2898         return TokError("Expected 1to<NUM> at this point");
2899       StringRef Prefix = getLexer().getTok().getString();
2900       Parser.Lex(); // Eat first token of 1to8
2901       if (!getLexer().is(AsmToken::Identifier))
2902         return TokError("Expected 1to<NUM> at this point");
2903       // Recognize only reasonable suffixes.
2904       SmallVector<char, 5> BroadcastVector;
2905       StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2906                                       .toStringRef(BroadcastVector);
2907       if (!BroadcastString.starts_with("1to"))
2908         return TokError("Expected 1to<NUM> at this point");
2909       const char *BroadcastPrimitive =
2910           StringSwitch<const char *>(BroadcastString)
2911               .Case("1to2", "{1to2}")
2912               .Case("1to4", "{1to4}")
2913               .Case("1to8", "{1to8}")
2914               .Case("1to16", "{1to16}")
2915               .Case("1to32", "{1to32}")
2916               .Default(nullptr);
2917       if (!BroadcastPrimitive)
2918         return TokError("Invalid memory broadcast primitive.");
2919       Parser.Lex(); // Eat trailing token of 1toN
2920       if (!getLexer().is(AsmToken::RCurly))
2921         return TokError("Expected } at this point");
2922       Parser.Lex();  // Eat "}"
2923       Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2924                                                  consumedToken));
2925       // No AVX512 specific primitives can pass
2926       // after memory broadcasting, so return.
2927       return false;
2928     } else {
2929       // Parse either {k}{z}, {z}{k}, {k} or {z}
2930       // last one have no meaning, but GCC accepts it
2931       // Currently, we're just pass a '{' mark
2932       std::unique_ptr<X86Operand> Z;
2933       if (ParseZ(Z, consumedToken))
2934         return true;
2935       // Reaching here means that parsing of the allegadly '{z}' mark yielded
2936       // no errors.
2937       // Query for the need of further parsing for a {%k<NUM>} mark
2938       if (!Z || getLexer().is(AsmToken::LCurly)) {
2939         SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2940         // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2941         // expected
2942         MCRegister RegNo;
2943         SMLoc RegLoc;
2944         if (!parseRegister(RegNo, RegLoc, StartLoc) &&
2945             X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2946           if (RegNo == X86::K0)
2947             return Error(RegLoc, "Register k0 can't be used as write mask");
2948           if (!getLexer().is(AsmToken::RCurly))
2949             return Error(getLexer().getLoc(), "Expected } at this point");
2950           Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2951           Operands.push_back(
2952               X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2953           Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2954         } else
2955           return Error(getLexer().getLoc(),
2956                         "Expected an op-mask register at this point");
2957         // {%k<NUM>} mark is found, inquire for {z}
2958         if (getLexer().is(AsmToken::LCurly) && !Z) {
2959           // Have we've found a parsing error, or found no (expected) {z} mark
2960           // - report an error
2961           if (ParseZ(Z, consumeToken()) || !Z)
2962             return Error(getLexer().getLoc(),
2963                          "Expected a {z} mark at this point");
2964 
2965         }
2966         // '{z}' on its own is meaningless, hence should be ignored.
2967         // on the contrary - have it been accompanied by a K register,
2968         // allow it.
2969         if (Z)
2970           Operands.push_back(std::move(Z));
2971       }
2972     }
2973   }
2974   return false;
2975 }
2976 
2977 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'.  The '%ds:' prefix
2978 /// has already been parsed if present. disp may be provided as well.
ParseMemOperand(MCRegister SegReg,const MCExpr * Disp,SMLoc StartLoc,SMLoc EndLoc,OperandVector & Operands)2979 bool X86AsmParser::ParseMemOperand(MCRegister SegReg, const MCExpr *Disp,
2980                                    SMLoc StartLoc, SMLoc EndLoc,
2981                                    OperandVector &Operands) {
2982   MCAsmParser &Parser = getParser();
2983   SMLoc Loc;
2984   // Based on the initial passed values, we may be in any of these cases, we are
2985   // in one of these cases (with current position (*)):
2986 
2987   //   1. seg : * disp  (base-index-scale-expr)
2988   //   2. seg : *(disp) (base-index-scale-expr)
2989   //   3. seg :       *(base-index-scale-expr)
2990   //   4.        disp  *(base-index-scale-expr)
2991   //   5.      *(disp)  (base-index-scale-expr)
2992   //   6.             *(base-index-scale-expr)
2993   //   7.  disp *
2994   //   8. *(disp)
2995 
2996   // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2997   // checking if the first object after the parenthesis is a register (or an
2998   // identifier referring to a register) and parse the displacement or default
2999   // to 0 as appropriate.
3000   auto isAtMemOperand = [this]() {
3001     if (this->getLexer().isNot(AsmToken::LParen))
3002       return false;
3003     AsmToken Buf[2];
3004     StringRef Id;
3005     auto TokCount = this->getLexer().peekTokens(Buf, true);
3006     if (TokCount == 0)
3007       return false;
3008     switch (Buf[0].getKind()) {
3009     case AsmToken::Percent:
3010     case AsmToken::Comma:
3011       return true;
3012     // These lower cases are doing a peekIdentifier.
3013     case AsmToken::At:
3014     case AsmToken::Dollar:
3015       if ((TokCount > 1) &&
3016           (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
3017           (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
3018         Id = StringRef(Buf[0].getLoc().getPointer(),
3019                        Buf[1].getIdentifier().size() + 1);
3020       break;
3021     case AsmToken::Identifier:
3022     case AsmToken::String:
3023       Id = Buf[0].getIdentifier();
3024       break;
3025     default:
3026       return false;
3027     }
3028     // We have an ID. Check if it is bound to a register.
3029     if (!Id.empty()) {
3030       MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
3031       if (Sym->isVariable()) {
3032         auto V = Sym->getVariableValue();
3033         return isa<X86MCExpr>(V);
3034       }
3035     }
3036     return false;
3037   };
3038 
3039   if (!Disp) {
3040     // Parse immediate if we're not at a mem operand yet.
3041     if (!isAtMemOperand()) {
3042       if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
3043         return true;
3044       assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
3045     } else {
3046       // Disp is implicitly zero if we haven't parsed it yet.
3047       Disp = MCConstantExpr::create(0, Parser.getContext());
3048     }
3049   }
3050 
3051   // We are now either at the end of the operand or at the '(' at the start of a
3052   // base-index-scale-expr.
3053 
3054   if (!parseOptionalToken(AsmToken::LParen)) {
3055     if (!SegReg)
3056       Operands.push_back(
3057           X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3058     else
3059       Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3060                                                0, 0, 1, StartLoc, EndLoc));
3061     return false;
3062   }
3063 
3064   // If we reached here, then eat the '(' and Process
3065   // the rest of the memory operand.
3066   MCRegister BaseReg, IndexReg;
3067   unsigned Scale = 1;
3068   SMLoc BaseLoc = getLexer().getLoc();
3069   const MCExpr *E;
3070   StringRef ErrMsg;
3071 
3072   // Parse BaseReg if one is provided.
3073   if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
3074     if (Parser.parseExpression(E, EndLoc) ||
3075         check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
3076       return true;
3077 
3078     // Check the register.
3079     BaseReg = cast<X86MCExpr>(E)->getReg();
3080     if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
3081       return Error(BaseLoc, "eiz and riz can only be used as index registers",
3082                    SMRange(BaseLoc, EndLoc));
3083   }
3084 
3085   if (parseOptionalToken(AsmToken::Comma)) {
3086     // Following the comma we should have either an index register, or a scale
3087     // value. We don't support the later form, but we want to parse it
3088     // correctly.
3089     //
3090     // Even though it would be completely consistent to support syntax like
3091     // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
3092     if (getLexer().isNot(AsmToken::RParen)) {
3093       if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
3094         return true;
3095 
3096       if (!isa<X86MCExpr>(E)) {
3097         // We've parsed an unexpected Scale Value instead of an index
3098         // register. Interpret it as an absolute.
3099         int64_t ScaleVal;
3100         if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
3101           return Error(Loc, "expected absolute expression");
3102         if (ScaleVal != 1)
3103           Warning(Loc, "scale factor without index register is ignored");
3104         Scale = 1;
3105       } else { // IndexReg Found.
3106         IndexReg = cast<X86MCExpr>(E)->getReg();
3107 
3108         if (BaseReg == X86::RIP)
3109           return Error(Loc,
3110                        "%rip as base register can not have an index register");
3111         if (IndexReg == X86::RIP)
3112           return Error(Loc, "%rip is not allowed as an index register");
3113 
3114         if (parseOptionalToken(AsmToken::Comma)) {
3115           // Parse the scale amount:
3116           //  ::= ',' [scale-expression]
3117 
3118           // A scale amount without an index is ignored.
3119           if (getLexer().isNot(AsmToken::RParen)) {
3120             int64_t ScaleVal;
3121             if (Parser.parseTokenLoc(Loc) ||
3122                 Parser.parseAbsoluteExpression(ScaleVal))
3123               return Error(Loc, "expected scale expression");
3124             Scale = (unsigned)ScaleVal;
3125             // Validate the scale amount.
3126             if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
3127                 Scale != 1)
3128               return Error(Loc, "scale factor in 16-bit address must be 1");
3129             if (checkScale(Scale, ErrMsg))
3130               return Error(Loc, ErrMsg);
3131           }
3132         }
3133       }
3134     }
3135   }
3136 
3137   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3138   if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
3139     return true;
3140 
3141   // This is to support otherwise illegal operand (%dx) found in various
3142   // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3143   // be supported. Mark such DX variants separately fix only in special cases.
3144   if (BaseReg == X86::DX && !IndexReg && Scale == 1 && !SegReg &&
3145       isa<MCConstantExpr>(Disp) &&
3146       cast<MCConstantExpr>(Disp)->getValue() == 0) {
3147     Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
3148     return false;
3149   }
3150 
3151   if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
3152                                       ErrMsg))
3153     return Error(BaseLoc, ErrMsg);
3154 
3155   // If the displacement is a constant, check overflows. For 64-bit addressing,
3156   // gas requires isInt<32> and otherwise reports an error. For others, gas
3157   // reports a warning and allows a wider range. E.g. gas allows
3158   // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses
3159   // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000).
3160   if (BaseReg || IndexReg) {
3161     if (auto CE = dyn_cast<MCConstantExpr>(Disp)) {
3162       auto Imm = CE->getValue();
3163       bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
3164                   X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg);
3165       bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg);
3166       if (Is64) {
3167         if (!isInt<32>(Imm))
3168           return Error(BaseLoc, "displacement " + Twine(Imm) +
3169                                     " is not within [-2147483648, 2147483647]");
3170       } else if (!Is16) {
3171         if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3172           Warning(BaseLoc, "displacement " + Twine(Imm) +
3173                                " shortened to 32-bit signed " +
3174                                Twine(static_cast<int32_t>(Imm)));
3175         }
3176       } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3177         Warning(BaseLoc, "displacement " + Twine(Imm) +
3178                              " shortened to 16-bit signed " +
3179                              Twine(static_cast<int16_t>(Imm)));
3180       }
3181     }
3182   }
3183 
3184   if (SegReg || BaseReg || IndexReg)
3185     Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3186                                              BaseReg, IndexReg, Scale, StartLoc,
3187                                              EndLoc));
3188   else
3189     Operands.push_back(
3190         X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3191   return false;
3192 }
3193 
3194 // Parse either a standard primary expression or a register.
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)3195 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3196   MCAsmParser &Parser = getParser();
3197   // See if this is a register first.
3198   if (getTok().is(AsmToken::Percent) ||
3199       (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3200        MatchRegisterName(Parser.getTok().getString()))) {
3201     SMLoc StartLoc = Parser.getTok().getLoc();
3202     MCRegister RegNo;
3203     if (parseRegister(RegNo, StartLoc, EndLoc))
3204       return true;
3205     Res = X86MCExpr::create(RegNo, Parser.getContext());
3206     return false;
3207   }
3208   return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3209 }
3210 
parseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)3211 bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
3212                                     SMLoc NameLoc, OperandVector &Operands) {
3213   MCAsmParser &Parser = getParser();
3214   InstInfo = &Info;
3215 
3216   // Reset the forced VEX encoding.
3217   ForcedOpcodePrefix = OpcodePrefix_Default;
3218   ForcedDispEncoding = DispEncoding_Default;
3219   UseApxExtendedReg = false;
3220   ForcedNoFlag = false;
3221 
3222   // Parse pseudo prefixes.
3223   while (true) {
3224     if (Name == "{") {
3225       if (getLexer().isNot(AsmToken::Identifier))
3226         return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3227       std::string Prefix = Parser.getTok().getString().lower();
3228       Parser.Lex(); // Eat identifier.
3229       if (getLexer().isNot(AsmToken::RCurly))
3230         return Error(Parser.getTok().getLoc(), "Expected '}'");
3231       Parser.Lex(); // Eat curly.
3232 
3233       if (Prefix == "rex")
3234         ForcedOpcodePrefix = OpcodePrefix_REX;
3235       else if (Prefix == "rex2")
3236         ForcedOpcodePrefix = OpcodePrefix_REX2;
3237       else if (Prefix == "vex")
3238         ForcedOpcodePrefix = OpcodePrefix_VEX;
3239       else if (Prefix == "vex2")
3240         ForcedOpcodePrefix = OpcodePrefix_VEX2;
3241       else if (Prefix == "vex3")
3242         ForcedOpcodePrefix = OpcodePrefix_VEX3;
3243       else if (Prefix == "evex")
3244         ForcedOpcodePrefix = OpcodePrefix_EVEX;
3245       else if (Prefix == "disp8")
3246         ForcedDispEncoding = DispEncoding_Disp8;
3247       else if (Prefix == "disp32")
3248         ForcedDispEncoding = DispEncoding_Disp32;
3249       else if (Prefix == "nf")
3250         ForcedNoFlag = true;
3251       else
3252         return Error(NameLoc, "unknown prefix");
3253 
3254       NameLoc = Parser.getTok().getLoc();
3255       if (getLexer().is(AsmToken::LCurly)) {
3256         Parser.Lex();
3257         Name = "{";
3258       } else {
3259         if (getLexer().isNot(AsmToken::Identifier))
3260           return Error(Parser.getTok().getLoc(), "Expected identifier");
3261         // FIXME: The mnemonic won't match correctly if its not in lower case.
3262         Name = Parser.getTok().getString();
3263         Parser.Lex();
3264       }
3265       continue;
3266     }
3267     // Parse MASM style pseudo prefixes.
3268     if (isParsingMSInlineAsm()) {
3269       if (Name.equals_insensitive("vex"))
3270         ForcedOpcodePrefix = OpcodePrefix_VEX;
3271       else if (Name.equals_insensitive("vex2"))
3272         ForcedOpcodePrefix = OpcodePrefix_VEX2;
3273       else if (Name.equals_insensitive("vex3"))
3274         ForcedOpcodePrefix = OpcodePrefix_VEX3;
3275       else if (Name.equals_insensitive("evex"))
3276         ForcedOpcodePrefix = OpcodePrefix_EVEX;
3277 
3278       if (ForcedOpcodePrefix != OpcodePrefix_Default) {
3279         if (getLexer().isNot(AsmToken::Identifier))
3280           return Error(Parser.getTok().getLoc(), "Expected identifier");
3281         // FIXME: The mnemonic won't match correctly if its not in lower case.
3282         Name = Parser.getTok().getString();
3283         NameLoc = Parser.getTok().getLoc();
3284         Parser.Lex();
3285       }
3286     }
3287     break;
3288   }
3289 
3290   // Support the suffix syntax for overriding displacement size as well.
3291   if (Name.consume_back(".d32")) {
3292     ForcedDispEncoding = DispEncoding_Disp32;
3293   } else if (Name.consume_back(".d8")) {
3294     ForcedDispEncoding = DispEncoding_Disp8;
3295   }
3296 
3297   StringRef PatchedName = Name;
3298 
3299   // Hack to skip "short" following Jcc.
3300   if (isParsingIntelSyntax() &&
3301       (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3302        PatchedName == "jcxz" || PatchedName == "jecxz" ||
3303        (PatchedName.starts_with("j") &&
3304         ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3305     StringRef NextTok = Parser.getTok().getString();
3306     if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
3307                                : NextTok == "short") {
3308       SMLoc NameEndLoc =
3309           NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3310       // Eat the short keyword.
3311       Parser.Lex();
3312       // MS and GAS ignore the short keyword; they both determine the jmp type
3313       // based on the distance of the label. (NASM does emit different code with
3314       // and without "short," though.)
3315       InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3316                                           NextTok.size() + 1);
3317     }
3318   }
3319 
3320   // FIXME: Hack to recognize setneb as setne.
3321   if (PatchedName.starts_with("set") && PatchedName.ends_with("b") &&
3322       PatchedName != "setzub" && PatchedName != "setzunb" &&
3323       PatchedName != "setb" && PatchedName != "setnb")
3324     PatchedName = PatchedName.substr(0, Name.size()-1);
3325 
3326   unsigned ComparisonPredicate = ~0U;
3327 
3328   // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3329   if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) &&
3330       (PatchedName.ends_with("ss") || PatchedName.ends_with("sd") ||
3331        PatchedName.ends_with("sh") || PatchedName.ends_with("ph") ||
3332        PatchedName.ends_with("bf16") || PatchedName.ends_with("ps") ||
3333        PatchedName.ends_with("pd"))) {
3334     bool IsVCMP = PatchedName[0] == 'v';
3335     unsigned CCIdx = IsVCMP ? 4 : 3;
3336     unsigned suffixLength = PatchedName.ends_with("bf16") ? 5 : 2;
3337     unsigned CC = StringSwitch<unsigned>(
3338       PatchedName.slice(CCIdx, PatchedName.size() - suffixLength))
3339       .Case("eq",       0x00)
3340       .Case("eq_oq",    0x00)
3341       .Case("lt",       0x01)
3342       .Case("lt_os",    0x01)
3343       .Case("le",       0x02)
3344       .Case("le_os",    0x02)
3345       .Case("unord",    0x03)
3346       .Case("unord_q",  0x03)
3347       .Case("neq",      0x04)
3348       .Case("neq_uq",   0x04)
3349       .Case("nlt",      0x05)
3350       .Case("nlt_us",   0x05)
3351       .Case("nle",      0x06)
3352       .Case("nle_us",   0x06)
3353       .Case("ord",      0x07)
3354       .Case("ord_q",    0x07)
3355       /* AVX only from here */
3356       .Case("eq_uq",    0x08)
3357       .Case("nge",      0x09)
3358       .Case("nge_us",   0x09)
3359       .Case("ngt",      0x0A)
3360       .Case("ngt_us",   0x0A)
3361       .Case("false",    0x0B)
3362       .Case("false_oq", 0x0B)
3363       .Case("neq_oq",   0x0C)
3364       .Case("ge",       0x0D)
3365       .Case("ge_os",    0x0D)
3366       .Case("gt",       0x0E)
3367       .Case("gt_os",    0x0E)
3368       .Case("true",     0x0F)
3369       .Case("true_uq",  0x0F)
3370       .Case("eq_os",    0x10)
3371       .Case("lt_oq",    0x11)
3372       .Case("le_oq",    0x12)
3373       .Case("unord_s",  0x13)
3374       .Case("neq_us",   0x14)
3375       .Case("nlt_uq",   0x15)
3376       .Case("nle_uq",   0x16)
3377       .Case("ord_s",    0x17)
3378       .Case("eq_us",    0x18)
3379       .Case("nge_uq",   0x19)
3380       .Case("ngt_uq",   0x1A)
3381       .Case("false_os", 0x1B)
3382       .Case("neq_os",   0x1C)
3383       .Case("ge_oq",    0x1D)
3384       .Case("gt_oq",    0x1E)
3385       .Case("true_us",  0x1F)
3386       .Default(~0U);
3387     if (CC != ~0U && (IsVCMP || CC < 8) &&
3388         (IsVCMP || PatchedName.back() != 'h')) {
3389       if (PatchedName.ends_with("ss"))
3390         PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3391       else if (PatchedName.ends_with("sd"))
3392         PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3393       else if (PatchedName.ends_with("ps"))
3394         PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3395       else if (PatchedName.ends_with("pd"))
3396         PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3397       else if (PatchedName.ends_with("sh"))
3398         PatchedName = "vcmpsh";
3399       else if (PatchedName.ends_with("ph"))
3400         PatchedName = "vcmpph";
3401       else if (PatchedName.ends_with("bf16"))
3402         PatchedName = "vcmpbf16";
3403       else
3404         llvm_unreachable("Unexpected suffix!");
3405 
3406       ComparisonPredicate = CC;
3407     }
3408   }
3409 
3410   // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3411   if (PatchedName.starts_with("vpcmp") &&
3412       (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3413        PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3414     unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3415     unsigned CC = StringSwitch<unsigned>(
3416       PatchedName.slice(5, PatchedName.size() - SuffixSize))
3417       .Case("eq",    0x0) // Only allowed on unsigned. Checked below.
3418       .Case("lt",    0x1)
3419       .Case("le",    0x2)
3420       //.Case("false", 0x3) // Not a documented alias.
3421       .Case("neq",   0x4)
3422       .Case("nlt",   0x5)
3423       .Case("nle",   0x6)
3424       //.Case("true",  0x7) // Not a documented alias.
3425       .Default(~0U);
3426     if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3427       switch (PatchedName.back()) {
3428       default: llvm_unreachable("Unexpected character!");
3429       case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3430       case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3431       case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3432       case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3433       }
3434       // Set up the immediate to push into the operands later.
3435       ComparisonPredicate = CC;
3436     }
3437   }
3438 
3439   // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3440   if (PatchedName.starts_with("vpcom") &&
3441       (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3442        PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3443     unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3444     unsigned CC = StringSwitch<unsigned>(
3445       PatchedName.slice(5, PatchedName.size() - SuffixSize))
3446       .Case("lt",    0x0)
3447       .Case("le",    0x1)
3448       .Case("gt",    0x2)
3449       .Case("ge",    0x3)
3450       .Case("eq",    0x4)
3451       .Case("neq",   0x5)
3452       .Case("false", 0x6)
3453       .Case("true",  0x7)
3454       .Default(~0U);
3455     if (CC != ~0U) {
3456       switch (PatchedName.back()) {
3457       default: llvm_unreachable("Unexpected character!");
3458       case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3459       case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3460       case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3461       case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3462       }
3463       // Set up the immediate to push into the operands later.
3464       ComparisonPredicate = CC;
3465     }
3466   }
3467 
3468   // Determine whether this is an instruction prefix.
3469   // FIXME:
3470   // Enhance prefixes integrity robustness. for example, following forms
3471   // are currently tolerated:
3472   // repz repnz <insn>    ; GAS errors for the use of two similar prefixes
3473   // lock addq %rax, %rbx ; Destination operand must be of memory type
3474   // xacquire <insn>      ; xacquire must be accompanied by 'lock'
3475   bool IsPrefix =
3476       StringSwitch<bool>(Name)
3477           .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3478           .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3479           .Cases("xacquire", "xrelease", true)
3480           .Cases("acquire", "release", isParsingIntelSyntax())
3481           .Default(false);
3482 
3483   auto isLockRepeatNtPrefix = [](StringRef N) {
3484     return StringSwitch<bool>(N)
3485         .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3486         .Default(false);
3487   };
3488 
3489   bool CurlyAsEndOfStatement = false;
3490 
3491   unsigned Flags = X86::IP_NO_PREFIX;
3492   while (isLockRepeatNtPrefix(Name.lower())) {
3493     unsigned Prefix =
3494         StringSwitch<unsigned>(Name)
3495             .Cases("lock", "lock", X86::IP_HAS_LOCK)
3496             .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3497             .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3498             .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3499             .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3500     Flags |= Prefix;
3501     if (getLexer().is(AsmToken::EndOfStatement)) {
3502       // We don't have real instr with the given prefix
3503       //  let's use the prefix as the instr.
3504       // TODO: there could be several prefixes one after another
3505       Flags = X86::IP_NO_PREFIX;
3506       break;
3507     }
3508     // FIXME: The mnemonic won't match correctly if its not in lower case.
3509     Name = Parser.getTok().getString();
3510     Parser.Lex(); // eat the prefix
3511     // Hack: we could have something like "rep # some comment" or
3512     //    "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3513     while (Name.starts_with(";") || Name.starts_with("\n") ||
3514            Name.starts_with("#") || Name.starts_with("\t") ||
3515            Name.starts_with("/")) {
3516       // FIXME: The mnemonic won't match correctly if its not in lower case.
3517       Name = Parser.getTok().getString();
3518       Parser.Lex(); // go to next prefix or instr
3519     }
3520   }
3521 
3522   if (Flags)
3523     PatchedName = Name;
3524 
3525   // Hacks to handle 'data16' and 'data32'
3526   if (PatchedName == "data16" && is16BitMode()) {
3527     return Error(NameLoc, "redundant data16 prefix");
3528   }
3529   if (PatchedName == "data32") {
3530     if (is32BitMode())
3531       return Error(NameLoc, "redundant data32 prefix");
3532     if (is64BitMode())
3533       return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3534     // Hack to 'data16' for the table lookup.
3535     PatchedName = "data16";
3536 
3537     if (getLexer().isNot(AsmToken::EndOfStatement)) {
3538       StringRef Next = Parser.getTok().getString();
3539       getLexer().Lex();
3540       // data32 effectively changes the instruction suffix.
3541       // TODO Generalize.
3542       if (Next == "callw")
3543         Next = "calll";
3544       if (Next == "ljmpw")
3545         Next = "ljmpl";
3546 
3547       Name = Next;
3548       PatchedName = Name;
3549       ForcedDataPrefix = X86::Is32Bit;
3550       IsPrefix = false;
3551     }
3552   }
3553 
3554   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3555 
3556   // Push the immediate if we extracted one from the mnemonic.
3557   if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3558     const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3559                                                  getParser().getContext());
3560     Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3561   }
3562 
3563   // Parse condtional flags after mnemonic.
3564   if ((Name.starts_with("ccmp") || Name.starts_with("ctest")) &&
3565       parseCFlagsOp(Operands))
3566     return true;
3567 
3568   // This does the actual operand parsing.  Don't parse any more if we have a
3569   // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3570   // just want to parse the "lock" as the first instruction and the "incl" as
3571   // the next one.
3572   if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
3573     // Parse '*' modifier.
3574     if (getLexer().is(AsmToken::Star))
3575       Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3576 
3577     // Read the operands.
3578     while (true) {
3579       if (parseOperand(Operands, Name))
3580         return true;
3581       if (HandleAVX512Operand(Operands))
3582         return true;
3583 
3584       // check for comma and eat it
3585       if (getLexer().is(AsmToken::Comma))
3586         Parser.Lex();
3587       else
3588         break;
3589      }
3590 
3591     // In MS inline asm curly braces mark the beginning/end of a block,
3592     // therefore they should be interepreted as end of statement
3593     CurlyAsEndOfStatement =
3594         isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3595         (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3596     if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3597       return TokError("unexpected token in argument list");
3598   }
3599 
3600   // Push the immediate if we extracted one from the mnemonic.
3601   if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3602     const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3603                                                  getParser().getContext());
3604     Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3605   }
3606 
3607   // Consume the EndOfStatement or the prefix separator Slash
3608   if (getLexer().is(AsmToken::EndOfStatement) ||
3609       (IsPrefix && getLexer().is(AsmToken::Slash)))
3610     Parser.Lex();
3611   else if (CurlyAsEndOfStatement)
3612     // Add an actual EndOfStatement before the curly brace
3613     Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3614                                    getLexer().getTok().getLoc(), 0);
3615 
3616   // This is for gas compatibility and cannot be done in td.
3617   // Adding "p" for some floating point with no argument.
3618   // For example: fsub --> fsubp
3619   bool IsFp =
3620     Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3621   if (IsFp && Operands.size() == 1) {
3622     const char *Repl = StringSwitch<const char *>(Name)
3623       .Case("fsub", "fsubp")
3624       .Case("fdiv", "fdivp")
3625       .Case("fsubr", "fsubrp")
3626       .Case("fdivr", "fdivrp");
3627     static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3628   }
3629 
3630   if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3631       (Operands.size() == 3)) {
3632     X86Operand &Op1 = (X86Operand &)*Operands[1];
3633     X86Operand &Op2 = (X86Operand &)*Operands[2];
3634     SMLoc Loc = Op1.getEndLoc();
3635     // Moving a 32 or 16 bit value into a segment register has the same
3636     // behavior. Modify such instructions to always take shorter form.
3637     if (Op1.isReg() && Op2.isReg() &&
3638         X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3639             Op2.getReg()) &&
3640         (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3641          X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3642       // Change instruction name to match new instruction.
3643       if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3644         Name = is16BitMode() ? "movw" : "movl";
3645         Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3646       }
3647       // Select the correct equivalent 16-/32-bit source register.
3648       MCRegister Reg =
3649           getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32);
3650       Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3651     }
3652   }
3653 
3654   // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3655   // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
3656   // documented form in various unofficial manuals, so a lot of code uses it.
3657   if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3658        Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3659       Operands.size() == 3) {
3660     X86Operand &Op = (X86Operand &)*Operands.back();
3661     if (Op.isDXReg())
3662       Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3663                                               Op.getEndLoc());
3664   }
3665   // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3666   if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3667        Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3668       Operands.size() == 3) {
3669     X86Operand &Op = (X86Operand &)*Operands[1];
3670     if (Op.isDXReg())
3671       Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3672                                           Op.getEndLoc());
3673   }
3674 
3675   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
3676   bool HadVerifyError = false;
3677 
3678   // Append default arguments to "ins[bwld]"
3679   if (Name.starts_with("ins") &&
3680       (Operands.size() == 1 || Operands.size() == 3) &&
3681       (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3682        Name == "ins")) {
3683 
3684     AddDefaultSrcDestOperands(TmpOperands,
3685                               X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3686                               DefaultMemDIOperand(NameLoc));
3687     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3688   }
3689 
3690   // Append default arguments to "outs[bwld]"
3691   if (Name.starts_with("outs") &&
3692       (Operands.size() == 1 || Operands.size() == 3) &&
3693       (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3694        Name == "outsd" || Name == "outs")) {
3695     AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3696                               X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3697     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3698   }
3699 
3700   // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3701   // values of $SIREG according to the mode. It would be nice if this
3702   // could be achieved with InstAlias in the tables.
3703   if (Name.starts_with("lods") &&
3704       (Operands.size() == 1 || Operands.size() == 2) &&
3705       (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3706        Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3707     TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3708     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3709   }
3710 
3711   // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3712   // values of $DIREG according to the mode. It would be nice if this
3713   // could be achieved with InstAlias in the tables.
3714   if (Name.starts_with("stos") &&
3715       (Operands.size() == 1 || Operands.size() == 2) &&
3716       (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3717        Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3718     TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3719     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3720   }
3721 
3722   // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3723   // values of $DIREG according to the mode. It would be nice if this
3724   // could be achieved with InstAlias in the tables.
3725   if (Name.starts_with("scas") &&
3726       (Operands.size() == 1 || Operands.size() == 2) &&
3727       (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3728        Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3729     TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3730     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3731   }
3732 
3733   // Add default SI and DI operands to "cmps[bwlq]".
3734   if (Name.starts_with("cmps") &&
3735       (Operands.size() == 1 || Operands.size() == 3) &&
3736       (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3737        Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3738     AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3739                               DefaultMemSIOperand(NameLoc));
3740     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3741   }
3742 
3743   // Add default SI and DI operands to "movs[bwlq]".
3744   if (((Name.starts_with("movs") &&
3745         (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3746          Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3747        (Name.starts_with("smov") &&
3748         (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3749          Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3750       (Operands.size() == 1 || Operands.size() == 3)) {
3751     if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3752       Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3753     AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3754                               DefaultMemDIOperand(NameLoc));
3755     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3756   }
3757 
3758   // Check if we encountered an error for one the string insturctions
3759   if (HadVerifyError) {
3760     return HadVerifyError;
3761   }
3762 
3763   // Transforms "xlat mem8" into "xlatb"
3764   if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3765     X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3766     if (Op1.isMem8()) {
3767       Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3768                                  "size, (R|E)BX will be used for the location");
3769       Operands.pop_back();
3770       static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3771     }
3772   }
3773 
3774   if (Flags)
3775     Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3776   return false;
3777 }
3778 
convertSSEToAVX(MCInst & Inst)3779 static bool convertSSEToAVX(MCInst &Inst) {
3780   ArrayRef<X86TableEntry> Table{X86SSE2AVXTable};
3781   unsigned Opcode = Inst.getOpcode();
3782   const auto I = llvm::lower_bound(Table, Opcode);
3783   if (I == Table.end() || I->OldOpc != Opcode)
3784     return false;
3785 
3786   Inst.setOpcode(I->NewOpc);
3787   // AVX variant of BLENDVPD/BLENDVPS/PBLENDVB instructions has more
3788   // operand compare to SSE variant, which is added below
3789   if (X86::isBLENDVPD(Opcode) || X86::isBLENDVPS(Opcode) ||
3790       X86::isPBLENDVB(Opcode))
3791     Inst.addOperand(Inst.getOperand(2));
3792 
3793   return true;
3794 }
3795 
processInstruction(MCInst & Inst,const OperandVector & Ops)3796 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3797   if (MCOptions.X86Sse2Avx && convertSSEToAVX(Inst))
3798     return true;
3799 
3800   if (ForcedOpcodePrefix != OpcodePrefix_VEX3 &&
3801       X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
3802     return true;
3803 
3804   if (X86::optimizeShiftRotateWithImmediateOne(Inst))
3805     return true;
3806 
3807   auto replaceWithCCMPCTEST = [&](unsigned Opcode) -> bool {
3808     if (ForcedOpcodePrefix == OpcodePrefix_EVEX) {
3809       Inst.setFlags(~(X86::IP_USE_EVEX)&Inst.getFlags());
3810       Inst.setOpcode(Opcode);
3811       Inst.addOperand(MCOperand::createImm(0));
3812       Inst.addOperand(MCOperand::createImm(10));
3813       return true;
3814     }
3815     return false;
3816   };
3817 
3818   switch (Inst.getOpcode()) {
3819   default: return false;
3820   case X86::JMP_1:
3821     // {disp32} forces a larger displacement as if the instruction was relaxed.
3822     // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3823     // This matches GNU assembler.
3824     if (ForcedDispEncoding == DispEncoding_Disp32) {
3825       Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3826       return true;
3827     }
3828 
3829     return false;
3830   case X86::JCC_1:
3831     // {disp32} forces a larger displacement as if the instruction was relaxed.
3832     // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3833     // This matches GNU assembler.
3834     if (ForcedDispEncoding == DispEncoding_Disp32) {
3835       Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3836       return true;
3837     }
3838 
3839     return false;
3840   case X86::INT: {
3841     // Transforms "int $3" into "int3" as a size optimization.
3842     // We can't write this as an InstAlias.
3843     if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3844       return false;
3845     Inst.clear();
3846     Inst.setOpcode(X86::INT3);
3847     return true;
3848   }
3849   // `{evex} cmp <>, <>` is alias of `ccmpt {dfv=} <>, <>`, and
3850   // `{evex} test <>, <>` is alias of `ctest {dfv=} <>, <>`
3851 #define FROM_TO(FROM, TO)                                                      \
3852   case X86::FROM:                                                              \
3853     return replaceWithCCMPCTEST(X86::TO);
3854     FROM_TO(CMP64rr, CCMP64rr)
3855     FROM_TO(CMP64mi32, CCMP64mi32)
3856     FROM_TO(CMP64mi8, CCMP64mi8)
3857     FROM_TO(CMP64mr, CCMP64mr)
3858     FROM_TO(CMP64ri32, CCMP64ri32)
3859     FROM_TO(CMP64ri8, CCMP64ri8)
3860     FROM_TO(CMP64rm, CCMP64rm)
3861 
3862     FROM_TO(CMP32rr, CCMP32rr)
3863     FROM_TO(CMP32mi, CCMP32mi)
3864     FROM_TO(CMP32mi8, CCMP32mi8)
3865     FROM_TO(CMP32mr, CCMP32mr)
3866     FROM_TO(CMP32ri, CCMP32ri)
3867     FROM_TO(CMP32ri8, CCMP32ri8)
3868     FROM_TO(CMP32rm, CCMP32rm)
3869 
3870     FROM_TO(CMP16rr, CCMP16rr)
3871     FROM_TO(CMP16mi, CCMP16mi)
3872     FROM_TO(CMP16mi8, CCMP16mi8)
3873     FROM_TO(CMP16mr, CCMP16mr)
3874     FROM_TO(CMP16ri, CCMP16ri)
3875     FROM_TO(CMP16ri8, CCMP16ri8)
3876     FROM_TO(CMP16rm, CCMP16rm)
3877 
3878     FROM_TO(CMP8rr, CCMP8rr)
3879     FROM_TO(CMP8mi, CCMP8mi)
3880     FROM_TO(CMP8mr, CCMP8mr)
3881     FROM_TO(CMP8ri, CCMP8ri)
3882     FROM_TO(CMP8rm, CCMP8rm)
3883 
3884     FROM_TO(TEST64rr, CTEST64rr)
3885     FROM_TO(TEST64mi32, CTEST64mi32)
3886     FROM_TO(TEST64mr, CTEST64mr)
3887     FROM_TO(TEST64ri32, CTEST64ri32)
3888 
3889     FROM_TO(TEST32rr, CTEST32rr)
3890     FROM_TO(TEST32mi, CTEST32mi)
3891     FROM_TO(TEST32mr, CTEST32mr)
3892     FROM_TO(TEST32ri, CTEST32ri)
3893 
3894     FROM_TO(TEST16rr, CTEST16rr)
3895     FROM_TO(TEST16mi, CTEST16mi)
3896     FROM_TO(TEST16mr, CTEST16mr)
3897     FROM_TO(TEST16ri, CTEST16ri)
3898 
3899     FROM_TO(TEST8rr, CTEST8rr)
3900     FROM_TO(TEST8mi, CTEST8mi)
3901     FROM_TO(TEST8mr, CTEST8mr)
3902     FROM_TO(TEST8ri, CTEST8ri)
3903 #undef FROM_TO
3904   }
3905 }
3906 
validateInstruction(MCInst & Inst,const OperandVector & Ops)3907 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3908   using namespace X86;
3909   const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3910   unsigned Opcode = Inst.getOpcode();
3911   uint64_t TSFlags = MII.get(Opcode).TSFlags;
3912   if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
3913       isVFMADDCSH(Opcode)) {
3914     MCRegister Dest = Inst.getOperand(0).getReg();
3915     for (unsigned i = 2; i < Inst.getNumOperands(); i++)
3916       if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3917         return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3918                                               "distinct from source registers");
3919   } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
3920              isVFMULCSH(Opcode)) {
3921     MCRegister Dest = Inst.getOperand(0).getReg();
3922     // The mask variants have different operand list. Scan from the third
3923     // operand to avoid emitting incorrect warning.
3924     //    VFMULCPHZrr   Dest, Src1, Src2
3925     //    VFMULCPHZrrk  Dest, Dest, Mask, Src1, Src2
3926     //    VFMULCPHZrrkz Dest, Mask, Src1, Src2
3927     for (unsigned i = ((TSFlags & X86II::EVEX_K) ? 2 : 1);
3928          i < Inst.getNumOperands(); i++)
3929       if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3930         return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3931                                               "distinct from source registers");
3932   } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
3933              isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
3934              isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
3935     MCRegister Src2 =
3936         Inst.getOperand(Inst.getNumOperands() - X86::AddrNumOperands - 1)
3937             .getReg();
3938     unsigned Src2Enc = MRI->getEncodingValue(Src2);
3939     if (Src2Enc % 4 != 0) {
3940       StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
3941       unsigned GroupStart = (Src2Enc / 4) * 4;
3942       unsigned GroupEnd = GroupStart + 3;
3943       return Warning(Ops[0]->getStartLoc(),
3944                      "source register '" + RegName + "' implicitly denotes '" +
3945                      RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3946                      RegName.take_front(3) + Twine(GroupEnd) +
3947                      "' source group");
3948     }
3949   } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
3950              isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
3951              isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
3952              isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
3953     bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
3954     if (HasEVEX) {
3955       unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3956       unsigned Index = MRI->getEncodingValue(
3957           Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3958       if (Dest == Index)
3959         return Warning(Ops[0]->getStartLoc(), "index and destination registers "
3960                                               "should be distinct");
3961     } else {
3962       unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3963       unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
3964       unsigned Index = MRI->getEncodingValue(
3965           Inst.getOperand(3 + X86::AddrIndexReg).getReg());
3966       if (Dest == Mask || Dest == Index || Mask == Index)
3967         return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
3968                                               "registers should be distinct");
3969     }
3970   } else if (isTCMMIMFP16PS(Opcode) || isTCMMRLFP16PS(Opcode) ||
3971              isTDPBF16PS(Opcode) || isTDPFP16PS(Opcode) || isTDPBSSD(Opcode) ||
3972              isTDPBSUD(Opcode) || isTDPBUSD(Opcode) || isTDPBUUD(Opcode)) {
3973     MCRegister SrcDest = Inst.getOperand(0).getReg();
3974     MCRegister Src1 = Inst.getOperand(2).getReg();
3975     MCRegister Src2 = Inst.getOperand(3).getReg();
3976     if (SrcDest == Src1 || SrcDest == Src2 || Src1 == Src2)
3977       return Error(Ops[0]->getStartLoc(), "all tmm registers must be distinct");
3978   }
3979 
3980   // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3981   // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3982   if ((TSFlags & X86II::EncodingMask) == 0) {
3983     MCRegister HReg;
3984     bool UsesRex = TSFlags & X86II::REX_W;
3985     unsigned NumOps = Inst.getNumOperands();
3986     for (unsigned i = 0; i != NumOps; ++i) {
3987       const MCOperand &MO = Inst.getOperand(i);
3988       if (!MO.isReg())
3989         continue;
3990       MCRegister Reg = MO.getReg();
3991       if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
3992         HReg = Reg;
3993       if (X86II::isX86_64NonExtLowByteReg(Reg) ||
3994           X86II::isX86_64ExtendedReg(Reg))
3995         UsesRex = true;
3996     }
3997 
3998     if (UsesRex && HReg) {
3999       StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg);
4000       return Error(Ops[0]->getStartLoc(),
4001                    "can't encode '" + RegName + "' in an instruction requiring "
4002                    "REX prefix");
4003     }
4004   }
4005 
4006   if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) {
4007     const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg);
4008     if (!MO.isReg() || MO.getReg() != X86::RIP)
4009       return Warning(
4010           Ops[0]->getStartLoc(),
4011           Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'"
4012                                                       : "'prefetchit1'")) +
4013               " only supports RIP-relative address");
4014   }
4015   return false;
4016 }
4017 
emitWarningForSpecialLVIInstruction(SMLoc Loc)4018 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
4019   Warning(Loc, "Instruction may be vulnerable to LVI and "
4020                "requires manual mitigation");
4021   Note(SMLoc(), "See https://software.intel.com/"
4022                 "security-software-guidance/insights/"
4023                 "deep-dive-load-value-injection#specialinstructions"
4024                 " for more information");
4025 }
4026 
4027 /// RET instructions and also instructions that indirect calls/jumps from memory
4028 /// combine a load and a branch within a single instruction. To mitigate these
4029 /// instructions against LVI, they must be decomposed into separate load and
4030 /// branch instructions, with an LFENCE in between. For more details, see:
4031 /// - X86LoadValueInjectionRetHardening.cpp
4032 /// - X86LoadValueInjectionIndirectThunks.cpp
4033 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4034 ///
4035 /// Returns `true` if a mitigation was applied or warning was emitted.
applyLVICFIMitigation(MCInst & Inst,MCStreamer & Out)4036 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
4037   // Information on control-flow instructions that require manual mitigation can
4038   // be found here:
4039   // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4040   switch (Inst.getOpcode()) {
4041   case X86::RET16:
4042   case X86::RET32:
4043   case X86::RET64:
4044   case X86::RETI16:
4045   case X86::RETI32:
4046   case X86::RETI64: {
4047     MCInst ShlInst, FenceInst;
4048     bool Parse32 = is32BitMode() || Code16GCC;
4049     MCRegister Basereg =
4050         is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
4051     const MCExpr *Disp = MCConstantExpr::create(0, getContext());
4052     auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
4053                                           /*BaseReg=*/Basereg, /*IndexReg=*/0,
4054                                           /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
4055     ShlInst.setOpcode(X86::SHL64mi);
4056     ShlMemOp->addMemOperands(ShlInst, 5);
4057     ShlInst.addOperand(MCOperand::createImm(0));
4058     FenceInst.setOpcode(X86::LFENCE);
4059     Out.emitInstruction(ShlInst, getSTI());
4060     Out.emitInstruction(FenceInst, getSTI());
4061     return;
4062   }
4063   case X86::JMP16m:
4064   case X86::JMP32m:
4065   case X86::JMP64m:
4066   case X86::CALL16m:
4067   case X86::CALL32m:
4068   case X86::CALL64m:
4069     emitWarningForSpecialLVIInstruction(Inst.getLoc());
4070     return;
4071   }
4072 }
4073 
4074 /// To mitigate LVI, every instruction that performs a load can be followed by
4075 /// an LFENCE instruction to squash any potential mis-speculation. There are
4076 /// some instructions that require additional considerations, and may requre
4077 /// manual mitigation. For more details, see:
4078 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4079 ///
4080 /// Returns `true` if a mitigation was applied or warning was emitted.
applyLVILoadHardeningMitigation(MCInst & Inst,MCStreamer & Out)4081 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
4082                                                    MCStreamer &Out) {
4083   auto Opcode = Inst.getOpcode();
4084   auto Flags = Inst.getFlags();
4085   if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
4086     // Information on REP string instructions that require manual mitigation can
4087     // be found here:
4088     // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4089     switch (Opcode) {
4090     case X86::CMPSB:
4091     case X86::CMPSW:
4092     case X86::CMPSL:
4093     case X86::CMPSQ:
4094     case X86::SCASB:
4095     case X86::SCASW:
4096     case X86::SCASL:
4097     case X86::SCASQ:
4098       emitWarningForSpecialLVIInstruction(Inst.getLoc());
4099       return;
4100     }
4101   } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
4102     // If a REP instruction is found on its own line, it may or may not be
4103     // followed by a vulnerable instruction. Emit a warning just in case.
4104     emitWarningForSpecialLVIInstruction(Inst.getLoc());
4105     return;
4106   }
4107 
4108   const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
4109 
4110   // Can't mitigate after terminators or calls. A control flow change may have
4111   // already occurred.
4112   if (MCID.isTerminator() || MCID.isCall())
4113     return;
4114 
4115   // LFENCE has the mayLoad property, don't double fence.
4116   if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
4117     MCInst FenceInst;
4118     FenceInst.setOpcode(X86::LFENCE);
4119     Out.emitInstruction(FenceInst, getSTI());
4120   }
4121 }
4122 
emitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)4123 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
4124                                    MCStreamer &Out) {
4125   if (LVIInlineAsmHardening &&
4126       getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity))
4127     applyLVICFIMitigation(Inst, Out);
4128 
4129   Out.emitInstruction(Inst, getSTI());
4130 
4131   if (LVIInlineAsmHardening &&
4132       getSTI().hasFeature(X86::FeatureLVILoadHardening))
4133     applyLVILoadHardeningMitigation(Inst, Out);
4134 }
4135 
getPrefixes(OperandVector & Operands)4136 static unsigned getPrefixes(OperandVector &Operands) {
4137   unsigned Result = 0;
4138   X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
4139   if (Prefix.isPrefix()) {
4140     Result = Prefix.getPrefix();
4141     Operands.pop_back();
4142   }
4143   return Result;
4144 }
4145 
matchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4146 bool X86AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4147                                            OperandVector &Operands,
4148                                            MCStreamer &Out, uint64_t &ErrorInfo,
4149                                            bool MatchingInlineAsm) {
4150   assert(!Operands.empty() && "Unexpect empty operand list!");
4151   assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4152 
4153   // First, handle aliases that expand to multiple instructions.
4154   MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
4155                     Out, MatchingInlineAsm);
4156   unsigned Prefixes = getPrefixes(Operands);
4157 
4158   MCInst Inst;
4159 
4160   // If REX/REX2/VEX/EVEX encoding is forced, we need to pass the USE_* flag to
4161   // the encoder and printer.
4162   if (ForcedOpcodePrefix == OpcodePrefix_REX)
4163     Prefixes |= X86::IP_USE_REX;
4164   else if (ForcedOpcodePrefix == OpcodePrefix_REX2)
4165     Prefixes |= X86::IP_USE_REX2;
4166   else if (ForcedOpcodePrefix == OpcodePrefix_VEX)
4167     Prefixes |= X86::IP_USE_VEX;
4168   else if (ForcedOpcodePrefix == OpcodePrefix_VEX2)
4169     Prefixes |= X86::IP_USE_VEX2;
4170   else if (ForcedOpcodePrefix == OpcodePrefix_VEX3)
4171     Prefixes |= X86::IP_USE_VEX3;
4172   else if (ForcedOpcodePrefix == OpcodePrefix_EVEX)
4173     Prefixes |= X86::IP_USE_EVEX;
4174 
4175   // Set encoded flags for {disp8} and {disp32}.
4176   if (ForcedDispEncoding == DispEncoding_Disp8)
4177     Prefixes |= X86::IP_USE_DISP8;
4178   else if (ForcedDispEncoding == DispEncoding_Disp32)
4179     Prefixes |= X86::IP_USE_DISP32;
4180 
4181   if (Prefixes)
4182     Inst.setFlags(Prefixes);
4183 
4184   return isParsingIntelSyntax()
4185              ? matchAndEmitIntelInstruction(IDLoc, Opcode, Inst, Operands, Out,
4186                                             ErrorInfo, MatchingInlineAsm)
4187              : matchAndEmitATTInstruction(IDLoc, Opcode, Inst, Operands, Out,
4188                                           ErrorInfo, MatchingInlineAsm);
4189 }
4190 
MatchFPUWaitAlias(SMLoc IDLoc,X86Operand & Op,OperandVector & Operands,MCStreamer & Out,bool MatchingInlineAsm)4191 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
4192                                      OperandVector &Operands, MCStreamer &Out,
4193                                      bool MatchingInlineAsm) {
4194   // FIXME: This should be replaced with a real .td file alias mechanism.
4195   // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4196   // call.
4197   const char *Repl = StringSwitch<const char *>(Op.getToken())
4198                          .Case("finit", "fninit")
4199                          .Case("fsave", "fnsave")
4200                          .Case("fstcw", "fnstcw")
4201                          .Case("fstcww", "fnstcw")
4202                          .Case("fstenv", "fnstenv")
4203                          .Case("fstsw", "fnstsw")
4204                          .Case("fstsww", "fnstsw")
4205                          .Case("fclex", "fnclex")
4206                          .Default(nullptr);
4207   if (Repl) {
4208     MCInst Inst;
4209     Inst.setOpcode(X86::WAIT);
4210     Inst.setLoc(IDLoc);
4211     if (!MatchingInlineAsm)
4212       emitInstruction(Inst, Operands, Out);
4213     Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
4214   }
4215 }
4216 
ErrorMissingFeature(SMLoc IDLoc,const FeatureBitset & MissingFeatures,bool MatchingInlineAsm)4217 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
4218                                        const FeatureBitset &MissingFeatures,
4219                                        bool MatchingInlineAsm) {
4220   assert(MissingFeatures.any() && "Unknown missing feature!");
4221   SmallString<126> Msg;
4222   raw_svector_ostream OS(Msg);
4223   OS << "instruction requires:";
4224   for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
4225     if (MissingFeatures[i])
4226       OS << ' ' << getSubtargetFeatureName(i);
4227   }
4228   return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
4229 }
4230 
checkTargetMatchPredicate(MCInst & Inst)4231 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
4232   unsigned Opc = Inst.getOpcode();
4233   const MCInstrDesc &MCID = MII.get(Opc);
4234   uint64_t TSFlags = MCID.TSFlags;
4235 
4236   if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID))
4237     return Match_Unsupported;
4238   if (ForcedNoFlag == !(TSFlags & X86II::EVEX_NF) && !X86::isCFCMOVCC(Opc))
4239     return Match_Unsupported;
4240 
4241   switch (ForcedOpcodePrefix) {
4242   case OpcodePrefix_Default:
4243     break;
4244   case OpcodePrefix_REX:
4245   case OpcodePrefix_REX2:
4246     if (TSFlags & X86II::EncodingMask)
4247       return Match_Unsupported;
4248     break;
4249   case OpcodePrefix_VEX:
4250   case OpcodePrefix_VEX2:
4251   case OpcodePrefix_VEX3:
4252     if ((TSFlags & X86II::EncodingMask) != X86II::VEX)
4253       return Match_Unsupported;
4254     break;
4255   case OpcodePrefix_EVEX:
4256     if (is64BitMode() && (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
4257         !X86::isCMP(Opc) && !X86::isTEST(Opc))
4258       return Match_Unsupported;
4259     if (!is64BitMode() && (TSFlags & X86II::EncodingMask) != X86II::EVEX)
4260       return Match_Unsupported;
4261     break;
4262   }
4263 
4264   if ((TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitVEXPrefix &&
4265       (ForcedOpcodePrefix != OpcodePrefix_VEX &&
4266        ForcedOpcodePrefix != OpcodePrefix_VEX2 &&
4267        ForcedOpcodePrefix != OpcodePrefix_VEX3))
4268     return Match_Unsupported;
4269 
4270   return Match_Success;
4271 }
4272 
matchAndEmitATTInstruction(SMLoc IDLoc,unsigned & Opcode,MCInst & Inst,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4273 bool X86AsmParser::matchAndEmitATTInstruction(
4274     SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4275     MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4276   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4277   SMRange EmptyRange = std::nullopt;
4278   // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4279   // when matching the instruction.
4280   if (ForcedDataPrefix == X86::Is32Bit)
4281     SwitchMode(X86::Is32Bit);
4282   // First, try a direct match.
4283   FeatureBitset MissingFeatures;
4284   unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4285                                             MissingFeatures, MatchingInlineAsm,
4286                                             isParsingIntelSyntax());
4287   if (ForcedDataPrefix == X86::Is32Bit) {
4288     SwitchMode(X86::Is16Bit);
4289     ForcedDataPrefix = 0;
4290   }
4291   switch (OriginalError) {
4292   default: llvm_unreachable("Unexpected match result!");
4293   case Match_Success:
4294     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4295       return true;
4296     // Some instructions need post-processing to, for example, tweak which
4297     // encoding is selected. Loop on it while changes happen so the
4298     // individual transformations can chain off each other.
4299     if (!MatchingInlineAsm)
4300       while (processInstruction(Inst, Operands))
4301         ;
4302 
4303     Inst.setLoc(IDLoc);
4304     if (!MatchingInlineAsm)
4305       emitInstruction(Inst, Operands, Out);
4306     Opcode = Inst.getOpcode();
4307     return false;
4308   case Match_InvalidImmUnsignedi4: {
4309     SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4310     if (ErrorLoc == SMLoc())
4311       ErrorLoc = IDLoc;
4312     return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4313                  EmptyRange, MatchingInlineAsm);
4314   }
4315   case Match_MissingFeature:
4316     return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4317   case Match_InvalidOperand:
4318   case Match_MnemonicFail:
4319   case Match_Unsupported:
4320     break;
4321   }
4322   if (Op.getToken().empty()) {
4323     Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4324           MatchingInlineAsm);
4325     return true;
4326   }
4327 
4328   // FIXME: Ideally, we would only attempt suffix matches for things which are
4329   // valid prefixes, and we could just infer the right unambiguous
4330   // type. However, that requires substantially more matcher support than the
4331   // following hack.
4332 
4333   // Change the operand to point to a temporary token.
4334   StringRef Base = Op.getToken();
4335   SmallString<16> Tmp;
4336   Tmp += Base;
4337   Tmp += ' ';
4338   Op.setTokenValue(Tmp);
4339 
4340   // If this instruction starts with an 'f', then it is a floating point stack
4341   // instruction.  These come in up to three forms for 32-bit, 64-bit, and
4342   // 80-bit floating point, which use the suffixes s,l,t respectively.
4343   //
4344   // Otherwise, we assume that this may be an integer instruction, which comes
4345   // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4346   const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4347   // MemSize corresponding to Suffixes.  { 8, 16, 32, 64 }    { 32, 64, 80, 0 }
4348   const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4349 
4350   // Check for the various suffix matches.
4351   uint64_t ErrorInfoIgnore;
4352   FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4353   unsigned Match[4];
4354 
4355   // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4356   // So we should make sure the suffix matcher only works for memory variant
4357   // that has the same size with the suffix.
4358   // FIXME: This flag is a workaround for legacy instructions that didn't
4359   // declare non suffix variant assembly.
4360   bool HasVectorReg = false;
4361   X86Operand *MemOp = nullptr;
4362   for (const auto &Op : Operands) {
4363     X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4364     if (X86Op->isVectorReg())
4365       HasVectorReg = true;
4366     else if (X86Op->isMem()) {
4367       MemOp = X86Op;
4368       assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4369       // Have we found an unqualified memory operand,
4370       // break. IA allows only one memory operand.
4371       break;
4372     }
4373   }
4374 
4375   for (unsigned I = 0, E = std::size(Match); I != E; ++I) {
4376     Tmp.back() = Suffixes[I];
4377     if (MemOp && HasVectorReg)
4378       MemOp->Mem.Size = MemSize[I];
4379     Match[I] = Match_MnemonicFail;
4380     if (MemOp || !HasVectorReg) {
4381       Match[I] =
4382           MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4383                            MatchingInlineAsm, isParsingIntelSyntax());
4384       // If this returned as a missing feature failure, remember that.
4385       if (Match[I] == Match_MissingFeature)
4386         ErrorInfoMissingFeatures = MissingFeatures;
4387     }
4388   }
4389 
4390   // Restore the old token.
4391   Op.setTokenValue(Base);
4392 
4393   // If exactly one matched, then we treat that as a successful match (and the
4394   // instruction will already have been filled in correctly, since the failing
4395   // matches won't have modified it).
4396   unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4397   if (NumSuccessfulMatches == 1) {
4398     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4399       return true;
4400     // Some instructions need post-processing to, for example, tweak which
4401     // encoding is selected. Loop on it while changes happen so the
4402     // individual transformations can chain off each other.
4403     if (!MatchingInlineAsm)
4404       while (processInstruction(Inst, Operands))
4405         ;
4406 
4407     Inst.setLoc(IDLoc);
4408     if (!MatchingInlineAsm)
4409       emitInstruction(Inst, Operands, Out);
4410     Opcode = Inst.getOpcode();
4411     return false;
4412   }
4413 
4414   // Otherwise, the match failed, try to produce a decent error message.
4415 
4416   // If we had multiple suffix matches, then identify this as an ambiguous
4417   // match.
4418   if (NumSuccessfulMatches > 1) {
4419     char MatchChars[4];
4420     unsigned NumMatches = 0;
4421     for (unsigned I = 0, E = std::size(Match); I != E; ++I)
4422       if (Match[I] == Match_Success)
4423         MatchChars[NumMatches++] = Suffixes[I];
4424 
4425     SmallString<126> Msg;
4426     raw_svector_ostream OS(Msg);
4427     OS << "ambiguous instructions require an explicit suffix (could be ";
4428     for (unsigned i = 0; i != NumMatches; ++i) {
4429       if (i != 0)
4430         OS << ", ";
4431       if (i + 1 == NumMatches)
4432         OS << "or ";
4433       OS << "'" << Base << MatchChars[i] << "'";
4434     }
4435     OS << ")";
4436     Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4437     return true;
4438   }
4439 
4440   // Okay, we know that none of the variants matched successfully.
4441 
4442   // If all of the instructions reported an invalid mnemonic, then the original
4443   // mnemonic was invalid.
4444   if (llvm::count(Match, Match_MnemonicFail) == 4) {
4445     if (OriginalError == Match_MnemonicFail)
4446       return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4447                    Op.getLocRange(), MatchingInlineAsm);
4448 
4449     if (OriginalError == Match_Unsupported)
4450       return Error(IDLoc, "unsupported instruction", EmptyRange,
4451                    MatchingInlineAsm);
4452 
4453     assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4454     // Recover location info for the operand if we know which was the problem.
4455     if (ErrorInfo != ~0ULL) {
4456       if (ErrorInfo >= Operands.size())
4457         return Error(IDLoc, "too few operands for instruction", EmptyRange,
4458                      MatchingInlineAsm);
4459 
4460       X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4461       if (Operand.getStartLoc().isValid()) {
4462         SMRange OperandRange = Operand.getLocRange();
4463         return Error(Operand.getStartLoc(), "invalid operand for instruction",
4464                      OperandRange, MatchingInlineAsm);
4465       }
4466     }
4467 
4468     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4469                  MatchingInlineAsm);
4470   }
4471 
4472   // If one instruction matched as unsupported, report this as unsupported.
4473   if (llvm::count(Match, Match_Unsupported) == 1) {
4474     return Error(IDLoc, "unsupported instruction", EmptyRange,
4475                  MatchingInlineAsm);
4476   }
4477 
4478   // If one instruction matched with a missing feature, report this as a
4479   // missing feature.
4480   if (llvm::count(Match, Match_MissingFeature) == 1) {
4481     ErrorInfo = Match_MissingFeature;
4482     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4483                                MatchingInlineAsm);
4484   }
4485 
4486   // If one instruction matched with an invalid operand, report this as an
4487   // operand failure.
4488   if (llvm::count(Match, Match_InvalidOperand) == 1) {
4489     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4490                  MatchingInlineAsm);
4491   }
4492 
4493   // If all of these were an outright failure, report it in a useless way.
4494   Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4495         EmptyRange, MatchingInlineAsm);
4496   return true;
4497 }
4498 
matchAndEmitIntelInstruction(SMLoc IDLoc,unsigned & Opcode,MCInst & Inst,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4499 bool X86AsmParser::matchAndEmitIntelInstruction(
4500     SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4501     MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4502   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4503   SMRange EmptyRange = std::nullopt;
4504   // Find one unsized memory operand, if present.
4505   X86Operand *UnsizedMemOp = nullptr;
4506   for (const auto &Op : Operands) {
4507     X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4508     if (X86Op->isMemUnsized()) {
4509       UnsizedMemOp = X86Op;
4510       // Have we found an unqualified memory operand,
4511       // break. IA allows only one memory operand.
4512       break;
4513     }
4514   }
4515 
4516   // Allow some instructions to have implicitly pointer-sized operands.  This is
4517   // compatible with gas.
4518   StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4519   if (UnsizedMemOp) {
4520     static const char *const PtrSizedInstrs[] = {"call", "jmp", "push", "pop"};
4521     for (const char *Instr : PtrSizedInstrs) {
4522       if (Mnemonic == Instr) {
4523         UnsizedMemOp->Mem.Size = getPointerWidth();
4524         break;
4525       }
4526     }
4527   }
4528 
4529   SmallVector<unsigned, 8> Match;
4530   FeatureBitset ErrorInfoMissingFeatures;
4531   FeatureBitset MissingFeatures;
4532   StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4533 
4534   // If unsized push has immediate operand we should default the default pointer
4535   // size for the size.
4536   if (Mnemonic == "push" && Operands.size() == 2) {
4537     auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4538     if (X86Op->isImm()) {
4539       // If it's not a constant fall through and let remainder take care of it.
4540       const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4541       unsigned Size = getPointerWidth();
4542       if (CE &&
4543           (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4544         SmallString<16> Tmp;
4545         Tmp += Base;
4546         Tmp += (is64BitMode())
4547                    ? "q"
4548                    : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4549         Op.setTokenValue(Tmp);
4550         // Do match in ATT mode to allow explicit suffix usage.
4551         Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4552                                          MissingFeatures, MatchingInlineAsm,
4553                                          false /*isParsingIntelSyntax()*/));
4554         Op.setTokenValue(Base);
4555       }
4556     }
4557   }
4558 
4559   // If an unsized memory operand is present, try to match with each memory
4560   // operand size.  In Intel assembly, the size is not part of the instruction
4561   // mnemonic.
4562   if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4563     static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4564     for (unsigned Size : MopSizes) {
4565       UnsizedMemOp->Mem.Size = Size;
4566       uint64_t ErrorInfoIgnore;
4567       unsigned LastOpcode = Inst.getOpcode();
4568       unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4569                                     MissingFeatures, MatchingInlineAsm,
4570                                     isParsingIntelSyntax());
4571       if (Match.empty() || LastOpcode != Inst.getOpcode())
4572         Match.push_back(M);
4573 
4574       // If this returned as a missing feature failure, remember that.
4575       if (Match.back() == Match_MissingFeature)
4576         ErrorInfoMissingFeatures = MissingFeatures;
4577     }
4578 
4579     // Restore the size of the unsized memory operand if we modified it.
4580     UnsizedMemOp->Mem.Size = 0;
4581   }
4582 
4583   // If we haven't matched anything yet, this is not a basic integer or FPU
4584   // operation.  There shouldn't be any ambiguity in our mnemonic table, so try
4585   // matching with the unsized operand.
4586   if (Match.empty()) {
4587     Match.push_back(MatchInstruction(
4588         Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4589         isParsingIntelSyntax()));
4590     // If this returned as a missing feature failure, remember that.
4591     if (Match.back() == Match_MissingFeature)
4592       ErrorInfoMissingFeatures = MissingFeatures;
4593   }
4594 
4595   // Restore the size of the unsized memory operand if we modified it.
4596   if (UnsizedMemOp)
4597     UnsizedMemOp->Mem.Size = 0;
4598 
4599   // If it's a bad mnemonic, all results will be the same.
4600   if (Match.back() == Match_MnemonicFail) {
4601     return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4602                  Op.getLocRange(), MatchingInlineAsm);
4603   }
4604 
4605   unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4606 
4607   // If matching was ambiguous and we had size information from the frontend,
4608   // try again with that. This handles cases like "movxz eax, m8/m16".
4609   if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4610       UnsizedMemOp->getMemFrontendSize()) {
4611     UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4612     unsigned M = MatchInstruction(
4613         Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4614         isParsingIntelSyntax());
4615     if (M == Match_Success)
4616       NumSuccessfulMatches = 1;
4617 
4618     // Add a rewrite that encodes the size information we used from the
4619     // frontend.
4620     InstInfo->AsmRewrites->emplace_back(
4621         AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4622         /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4623   }
4624 
4625   // If exactly one matched, then we treat that as a successful match (and the
4626   // instruction will already have been filled in correctly, since the failing
4627   // matches won't have modified it).
4628   if (NumSuccessfulMatches == 1) {
4629     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4630       return true;
4631     // Some instructions need post-processing to, for example, tweak which
4632     // encoding is selected. Loop on it while changes happen so the individual
4633     // transformations can chain off each other.
4634     if (!MatchingInlineAsm)
4635       while (processInstruction(Inst, Operands))
4636         ;
4637     Inst.setLoc(IDLoc);
4638     if (!MatchingInlineAsm)
4639       emitInstruction(Inst, Operands, Out);
4640     Opcode = Inst.getOpcode();
4641     return false;
4642   } else if (NumSuccessfulMatches > 1) {
4643     assert(UnsizedMemOp &&
4644            "multiple matches only possible with unsized memory operands");
4645     return Error(UnsizedMemOp->getStartLoc(),
4646                  "ambiguous operand size for instruction '" + Mnemonic + "\'",
4647                  UnsizedMemOp->getLocRange());
4648   }
4649 
4650   // If one instruction matched as unsupported, report this as unsupported.
4651   if (llvm::count(Match, Match_Unsupported) == 1) {
4652     return Error(IDLoc, "unsupported instruction", EmptyRange,
4653                  MatchingInlineAsm);
4654   }
4655 
4656   // If one instruction matched with a missing feature, report this as a
4657   // missing feature.
4658   if (llvm::count(Match, Match_MissingFeature) == 1) {
4659     ErrorInfo = Match_MissingFeature;
4660     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4661                                MatchingInlineAsm);
4662   }
4663 
4664   // If one instruction matched with an invalid operand, report this as an
4665   // operand failure.
4666   if (llvm::count(Match, Match_InvalidOperand) == 1) {
4667     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4668                  MatchingInlineAsm);
4669   }
4670 
4671   if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
4672     SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4673     if (ErrorLoc == SMLoc())
4674       ErrorLoc = IDLoc;
4675     return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4676                  EmptyRange, MatchingInlineAsm);
4677   }
4678 
4679   // If all of these were an outright failure, report it in a useless way.
4680   return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4681                MatchingInlineAsm);
4682 }
4683 
omitRegisterFromClobberLists(MCRegister Reg)4684 bool X86AsmParser::omitRegisterFromClobberLists(MCRegister Reg) {
4685   return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg);
4686 }
4687 
ParseDirective(AsmToken DirectiveID)4688 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4689   MCAsmParser &Parser = getParser();
4690   StringRef IDVal = DirectiveID.getIdentifier();
4691   if (IDVal.starts_with(".arch"))
4692     return parseDirectiveArch();
4693   if (IDVal.starts_with(".code"))
4694     return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4695   else if (IDVal.starts_with(".att_syntax")) {
4696     if (getLexer().isNot(AsmToken::EndOfStatement)) {
4697       if (Parser.getTok().getString() == "prefix")
4698         Parser.Lex();
4699       else if (Parser.getTok().getString() == "noprefix")
4700         return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4701                                            "supported: registers must have a "
4702                                            "'%' prefix in .att_syntax");
4703     }
4704     getParser().setAssemblerDialect(0);
4705     return false;
4706   } else if (IDVal.starts_with(".intel_syntax")) {
4707     getParser().setAssemblerDialect(1);
4708     if (getLexer().isNot(AsmToken::EndOfStatement)) {
4709       if (Parser.getTok().getString() == "noprefix")
4710         Parser.Lex();
4711       else if (Parser.getTok().getString() == "prefix")
4712         return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4713                                            "supported: registers must not have "
4714                                            "a '%' prefix in .intel_syntax");
4715     }
4716     return false;
4717   } else if (IDVal == ".nops")
4718     return parseDirectiveNops(DirectiveID.getLoc());
4719   else if (IDVal == ".even")
4720     return parseDirectiveEven(DirectiveID.getLoc());
4721   else if (IDVal == ".cv_fpo_proc")
4722     return parseDirectiveFPOProc(DirectiveID.getLoc());
4723   else if (IDVal == ".cv_fpo_setframe")
4724     return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4725   else if (IDVal == ".cv_fpo_pushreg")
4726     return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4727   else if (IDVal == ".cv_fpo_stackalloc")
4728     return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4729   else if (IDVal == ".cv_fpo_stackalign")
4730     return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4731   else if (IDVal == ".cv_fpo_endprologue")
4732     return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4733   else if (IDVal == ".cv_fpo_endproc")
4734     return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4735   else if (IDVal == ".seh_pushreg" ||
4736            (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg")))
4737     return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4738   else if (IDVal == ".seh_setframe" ||
4739            (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe")))
4740     return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4741   else if (IDVal == ".seh_savereg" ||
4742            (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg")))
4743     return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4744   else if (IDVal == ".seh_savexmm" ||
4745            (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128")))
4746     return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4747   else if (IDVal == ".seh_pushframe" ||
4748            (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe")))
4749     return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4750 
4751   return true;
4752 }
4753 
parseDirectiveArch()4754 bool X86AsmParser::parseDirectiveArch() {
4755   // Ignore .arch for now.
4756   getParser().parseStringToEndOfStatement();
4757   return false;
4758 }
4759 
4760 /// parseDirectiveNops
4761 ///  ::= .nops size[, control]
parseDirectiveNops(SMLoc L)4762 bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4763   int64_t NumBytes = 0, Control = 0;
4764   SMLoc NumBytesLoc, ControlLoc;
4765   const MCSubtargetInfo& STI = getSTI();
4766   NumBytesLoc = getTok().getLoc();
4767   if (getParser().checkForValidSection() ||
4768       getParser().parseAbsoluteExpression(NumBytes))
4769     return true;
4770 
4771   if (parseOptionalToken(AsmToken::Comma)) {
4772     ControlLoc = getTok().getLoc();
4773     if (getParser().parseAbsoluteExpression(Control))
4774       return true;
4775   }
4776   if (getParser().parseEOL())
4777     return true;
4778 
4779   if (NumBytes <= 0) {
4780     Error(NumBytesLoc, "'.nops' directive with non-positive size");
4781     return false;
4782   }
4783 
4784   if (Control < 0) {
4785     Error(ControlLoc, "'.nops' directive with negative NOP size");
4786     return false;
4787   }
4788 
4789   /// Emit nops
4790   getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
4791 
4792   return false;
4793 }
4794 
4795 /// parseDirectiveEven
4796 ///  ::= .even
parseDirectiveEven(SMLoc L)4797 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4798   if (parseEOL())
4799     return false;
4800 
4801   const MCSection *Section = getStreamer().getCurrentSectionOnly();
4802   if (!Section) {
4803     getStreamer().initSections(false, getSTI());
4804     Section = getStreamer().getCurrentSectionOnly();
4805   }
4806   if (Section->useCodeAlign())
4807     getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4808   else
4809     getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4810   return false;
4811 }
4812 
4813 /// ParseDirectiveCode
4814 ///  ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)4815 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4816   MCAsmParser &Parser = getParser();
4817   Code16GCC = false;
4818   if (IDVal == ".code16") {
4819     Parser.Lex();
4820     if (!is16BitMode()) {
4821       SwitchMode(X86::Is16Bit);
4822       getTargetStreamer().emitCode16();
4823     }
4824   } else if (IDVal == ".code16gcc") {
4825     // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4826     Parser.Lex();
4827     Code16GCC = true;
4828     if (!is16BitMode()) {
4829       SwitchMode(X86::Is16Bit);
4830       getTargetStreamer().emitCode16();
4831     }
4832   } else if (IDVal == ".code32") {
4833     Parser.Lex();
4834     if (!is32BitMode()) {
4835       SwitchMode(X86::Is32Bit);
4836       getTargetStreamer().emitCode32();
4837     }
4838   } else if (IDVal == ".code64") {
4839     Parser.Lex();
4840     if (!is64BitMode()) {
4841       SwitchMode(X86::Is64Bit);
4842       getTargetStreamer().emitCode64();
4843     }
4844   } else {
4845     Error(L, "unknown directive " + IDVal);
4846     return false;
4847   }
4848 
4849   return false;
4850 }
4851 
4852 // .cv_fpo_proc foo
parseDirectiveFPOProc(SMLoc L)4853 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4854   MCAsmParser &Parser = getParser();
4855   StringRef ProcName;
4856   int64_t ParamsSize;
4857   if (Parser.parseIdentifier(ProcName))
4858     return Parser.TokError("expected symbol name");
4859   if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4860     return true;
4861   if (!isUIntN(32, ParamsSize))
4862     return Parser.TokError("parameters size out of range");
4863   if (parseEOL())
4864     return true;
4865   MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4866   return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4867 }
4868 
4869 // .cv_fpo_setframe ebp
parseDirectiveFPOSetFrame(SMLoc L)4870 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4871   MCRegister Reg;
4872   SMLoc DummyLoc;
4873   if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4874     return true;
4875   return getTargetStreamer().emitFPOSetFrame(Reg, L);
4876 }
4877 
4878 // .cv_fpo_pushreg ebx
parseDirectiveFPOPushReg(SMLoc L)4879 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4880   MCRegister Reg;
4881   SMLoc DummyLoc;
4882   if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4883     return true;
4884   return getTargetStreamer().emitFPOPushReg(Reg, L);
4885 }
4886 
4887 // .cv_fpo_stackalloc 20
parseDirectiveFPOStackAlloc(SMLoc L)4888 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4889   MCAsmParser &Parser = getParser();
4890   int64_t Offset;
4891   if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4892     return true;
4893   return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4894 }
4895 
4896 // .cv_fpo_stackalign 8
parseDirectiveFPOStackAlign(SMLoc L)4897 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4898   MCAsmParser &Parser = getParser();
4899   int64_t Offset;
4900   if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4901     return true;
4902   return getTargetStreamer().emitFPOStackAlign(Offset, L);
4903 }
4904 
4905 // .cv_fpo_endprologue
parseDirectiveFPOEndPrologue(SMLoc L)4906 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4907   MCAsmParser &Parser = getParser();
4908   if (Parser.parseEOL())
4909     return true;
4910   return getTargetStreamer().emitFPOEndPrologue(L);
4911 }
4912 
4913 // .cv_fpo_endproc
parseDirectiveFPOEndProc(SMLoc L)4914 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4915   MCAsmParser &Parser = getParser();
4916   if (Parser.parseEOL())
4917     return true;
4918   return getTargetStreamer().emitFPOEndProc(L);
4919 }
4920 
parseSEHRegisterNumber(unsigned RegClassID,MCRegister & RegNo)4921 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4922                                           MCRegister &RegNo) {
4923   SMLoc startLoc = getLexer().getLoc();
4924   const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4925 
4926   // Try parsing the argument as a register first.
4927   if (getLexer().getTok().isNot(AsmToken::Integer)) {
4928     SMLoc endLoc;
4929     if (parseRegister(RegNo, startLoc, endLoc))
4930       return true;
4931 
4932     if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4933       return Error(startLoc,
4934                    "register is not supported for use with this directive");
4935     }
4936   } else {
4937     // Otherwise, an integer number matching the encoding of the desired
4938     // register may appear.
4939     int64_t EncodedReg;
4940     if (getParser().parseAbsoluteExpression(EncodedReg))
4941       return true;
4942 
4943     // The SEH register number is the same as the encoding register number. Map
4944     // from the encoding back to the LLVM register number.
4945     RegNo = MCRegister();
4946     for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4947       if (MRI->getEncodingValue(Reg) == EncodedReg) {
4948         RegNo = Reg;
4949         break;
4950       }
4951     }
4952     if (!RegNo) {
4953       return Error(startLoc,
4954                    "incorrect register number for use with this directive");
4955     }
4956   }
4957 
4958   return false;
4959 }
4960 
parseDirectiveSEHPushReg(SMLoc Loc)4961 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
4962   MCRegister Reg;
4963   if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4964     return true;
4965 
4966   if (getLexer().isNot(AsmToken::EndOfStatement))
4967     return TokError("expected end of directive");
4968 
4969   getParser().Lex();
4970   getStreamer().emitWinCFIPushReg(Reg, Loc);
4971   return false;
4972 }
4973 
parseDirectiveSEHSetFrame(SMLoc Loc)4974 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
4975   MCRegister Reg;
4976   int64_t Off;
4977   if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4978     return true;
4979   if (getLexer().isNot(AsmToken::Comma))
4980     return TokError("you must specify a stack pointer offset");
4981 
4982   getParser().Lex();
4983   if (getParser().parseAbsoluteExpression(Off))
4984     return true;
4985 
4986   if (getLexer().isNot(AsmToken::EndOfStatement))
4987     return TokError("expected end of directive");
4988 
4989   getParser().Lex();
4990   getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
4991   return false;
4992 }
4993 
parseDirectiveSEHSaveReg(SMLoc Loc)4994 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
4995   MCRegister Reg;
4996   int64_t Off;
4997   if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4998     return true;
4999   if (getLexer().isNot(AsmToken::Comma))
5000     return TokError("you must specify an offset on the stack");
5001 
5002   getParser().Lex();
5003   if (getParser().parseAbsoluteExpression(Off))
5004     return true;
5005 
5006   if (getLexer().isNot(AsmToken::EndOfStatement))
5007     return TokError("expected end of directive");
5008 
5009   getParser().Lex();
5010   getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
5011   return false;
5012 }
5013 
parseDirectiveSEHSaveXMM(SMLoc Loc)5014 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
5015   MCRegister Reg;
5016   int64_t Off;
5017   if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
5018     return true;
5019   if (getLexer().isNot(AsmToken::Comma))
5020     return TokError("you must specify an offset on the stack");
5021 
5022   getParser().Lex();
5023   if (getParser().parseAbsoluteExpression(Off))
5024     return true;
5025 
5026   if (getLexer().isNot(AsmToken::EndOfStatement))
5027     return TokError("expected end of directive");
5028 
5029   getParser().Lex();
5030   getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
5031   return false;
5032 }
5033 
parseDirectiveSEHPushFrame(SMLoc Loc)5034 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
5035   bool Code = false;
5036   StringRef CodeID;
5037   if (getLexer().is(AsmToken::At)) {
5038     SMLoc startLoc = getLexer().getLoc();
5039     getParser().Lex();
5040     if (!getParser().parseIdentifier(CodeID)) {
5041       if (CodeID != "code")
5042         return Error(startLoc, "expected @code");
5043       Code = true;
5044     }
5045   }
5046 
5047   if (getLexer().isNot(AsmToken::EndOfStatement))
5048     return TokError("expected end of directive");
5049 
5050   getParser().Lex();
5051   getStreamer().emitWinCFIPushFrame(Code, Loc);
5052   return false;
5053 }
5054 
5055 // Force static initialization.
LLVMInitializeX86AsmParser()5056 extern "C" LLVM_C_ABI void LLVMInitializeX86AsmParser() {
5057   RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
5058   RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
5059 }
5060 
5061 #define GET_MATCHER_IMPLEMENTATION
5062 #include "X86GenAsmMatcher.inc"
5063