1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86EncodingOptimization.h"
11 #include "MCTargetDesc/X86IntelInstPrinter.h"
12 #include "MCTargetDesc/X86MCAsmInfo.h"
13 #include "MCTargetDesc/X86MCExpr.h"
14 #include "MCTargetDesc/X86MCTargetDesc.h"
15 #include "MCTargetDesc/X86TargetStreamer.h"
16 #include "TargetInfo/X86TargetInfo.h"
17 #include "X86Operand.h"
18 #include "llvm-c/Visibility.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/ADT/Twine.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCParser/AsmLexer.h"
29 #include "llvm/MC/MCParser/MCAsmParser.h"
30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
31 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
32 #include "llvm/MC/MCRegisterInfo.h"
33 #include "llvm/MC/MCSection.h"
34 #include "llvm/MC/MCStreamer.h"
35 #include "llvm/MC/MCSubtargetInfo.h"
36 #include "llvm/MC/MCSymbol.h"
37 #include "llvm/MC/TargetRegistry.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/Compiler.h"
40 #include "llvm/Support/SourceMgr.h"
41 #include "llvm/Support/raw_ostream.h"
42 #include <algorithm>
43 #include <memory>
44
45 using namespace llvm;
46
47 static cl::opt<bool> LVIInlineAsmHardening(
48 "x86-experimental-lvi-inline-asm-hardening",
49 cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
50 " Injection (LVI). This feature is experimental."), cl::Hidden);
51
checkScale(unsigned Scale,StringRef & ErrMsg)52 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
53 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
54 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
55 return true;
56 }
57 return false;
58 }
59
60 namespace {
61
62 // Including the generated SSE2AVX compression tables.
63 #define GET_X86_SSE2AVX_TABLE
64 #include "X86GenInstrMapping.inc"
65
66 static const char OpPrecedence[] = {
67 0, // IC_OR
68 1, // IC_XOR
69 2, // IC_AND
70 4, // IC_LSHIFT
71 4, // IC_RSHIFT
72 5, // IC_PLUS
73 5, // IC_MINUS
74 6, // IC_MULTIPLY
75 6, // IC_DIVIDE
76 6, // IC_MOD
77 7, // IC_NOT
78 8, // IC_NEG
79 9, // IC_RPAREN
80 10, // IC_LPAREN
81 0, // IC_IMM
82 0, // IC_REGISTER
83 3, // IC_EQ
84 3, // IC_NE
85 3, // IC_LT
86 3, // IC_LE
87 3, // IC_GT
88 3 // IC_GE
89 };
90
91 class X86AsmParser : public MCTargetAsmParser {
92 ParseInstructionInfo *InstInfo;
93 bool Code16GCC;
94 unsigned ForcedDataPrefix = 0;
95
96 enum OpcodePrefix {
97 OpcodePrefix_Default,
98 OpcodePrefix_REX,
99 OpcodePrefix_REX2,
100 OpcodePrefix_VEX,
101 OpcodePrefix_VEX2,
102 OpcodePrefix_VEX3,
103 OpcodePrefix_EVEX,
104 };
105
106 OpcodePrefix ForcedOpcodePrefix = OpcodePrefix_Default;
107
108 enum DispEncoding {
109 DispEncoding_Default,
110 DispEncoding_Disp8,
111 DispEncoding_Disp32,
112 };
113
114 DispEncoding ForcedDispEncoding = DispEncoding_Default;
115
116 // Does this instruction use apx extended register?
117 bool UseApxExtendedReg = false;
118 // Is this instruction explicitly required not to update flags?
119 bool ForcedNoFlag = false;
120
121 private:
consumeToken()122 SMLoc consumeToken() {
123 MCAsmParser &Parser = getParser();
124 SMLoc Result = Parser.getTok().getLoc();
125 Parser.Lex();
126 return Result;
127 }
128
tokenIsStartOfStatement(AsmToken::TokenKind Token)129 bool tokenIsStartOfStatement(AsmToken::TokenKind Token) override {
130 return Token == AsmToken::LCurly;
131 }
132
getTargetStreamer()133 X86TargetStreamer &getTargetStreamer() {
134 assert(getParser().getStreamer().getTargetStreamer() &&
135 "do not have a target streamer");
136 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
137 return static_cast<X86TargetStreamer &>(TS);
138 }
139
MatchInstruction(const OperandVector & Operands,MCInst & Inst,uint64_t & ErrorInfo,FeatureBitset & MissingFeatures,bool matchingInlineAsm,unsigned VariantID=0)140 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
141 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
142 bool matchingInlineAsm, unsigned VariantID = 0) {
143 // In Code16GCC mode, match as 32-bit.
144 if (Code16GCC)
145 SwitchMode(X86::Is32Bit);
146 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
147 MissingFeatures, matchingInlineAsm,
148 VariantID);
149 if (Code16GCC)
150 SwitchMode(X86::Is16Bit);
151 return rv;
152 }
153
154 enum InfixCalculatorTok {
155 IC_OR = 0,
156 IC_XOR,
157 IC_AND,
158 IC_LSHIFT,
159 IC_RSHIFT,
160 IC_PLUS,
161 IC_MINUS,
162 IC_MULTIPLY,
163 IC_DIVIDE,
164 IC_MOD,
165 IC_NOT,
166 IC_NEG,
167 IC_RPAREN,
168 IC_LPAREN,
169 IC_IMM,
170 IC_REGISTER,
171 IC_EQ,
172 IC_NE,
173 IC_LT,
174 IC_LE,
175 IC_GT,
176 IC_GE
177 };
178
179 enum IntelOperatorKind {
180 IOK_INVALID = 0,
181 IOK_LENGTH,
182 IOK_SIZE,
183 IOK_TYPE,
184 };
185
186 enum MasmOperatorKind {
187 MOK_INVALID = 0,
188 MOK_LENGTHOF,
189 MOK_SIZEOF,
190 MOK_TYPE,
191 };
192
193 class InfixCalculator {
194 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
195 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
196 SmallVector<ICToken, 4> PostfixStack;
197
isUnaryOperator(InfixCalculatorTok Op) const198 bool isUnaryOperator(InfixCalculatorTok Op) const {
199 return Op == IC_NEG || Op == IC_NOT;
200 }
201
202 public:
popOperand()203 int64_t popOperand() {
204 assert (!PostfixStack.empty() && "Poped an empty stack!");
205 ICToken Op = PostfixStack.pop_back_val();
206 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
207 return -1; // The invalid Scale value will be caught later by checkScale
208 return Op.second;
209 }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)210 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
211 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
212 "Unexpected operand!");
213 PostfixStack.push_back(std::make_pair(Op, Val));
214 }
215
popOperator()216 void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)217 void pushOperator(InfixCalculatorTok Op) {
218 // Push the new operator if the stack is empty.
219 if (InfixOperatorStack.empty()) {
220 InfixOperatorStack.push_back(Op);
221 return;
222 }
223
224 // Push the new operator if it has a higher precedence than the operator
225 // on the top of the stack or the operator on the top of the stack is a
226 // left parentheses.
227 unsigned Idx = InfixOperatorStack.size() - 1;
228 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
229 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
230 InfixOperatorStack.push_back(Op);
231 return;
232 }
233
234 // The operator on the top of the stack has higher precedence than the
235 // new operator.
236 unsigned ParenCount = 0;
237 while (true) {
238 // Nothing to process.
239 if (InfixOperatorStack.empty())
240 break;
241
242 Idx = InfixOperatorStack.size() - 1;
243 StackOp = InfixOperatorStack[Idx];
244 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
245 break;
246
247 // If we have an even parentheses count and we see a left parentheses,
248 // then stop processing.
249 if (!ParenCount && StackOp == IC_LPAREN)
250 break;
251
252 if (StackOp == IC_RPAREN) {
253 ++ParenCount;
254 InfixOperatorStack.pop_back();
255 } else if (StackOp == IC_LPAREN) {
256 --ParenCount;
257 InfixOperatorStack.pop_back();
258 } else {
259 InfixOperatorStack.pop_back();
260 PostfixStack.push_back(std::make_pair(StackOp, 0));
261 }
262 }
263 // Push the new operator.
264 InfixOperatorStack.push_back(Op);
265 }
266
execute()267 int64_t execute() {
268 // Push any remaining operators onto the postfix stack.
269 while (!InfixOperatorStack.empty()) {
270 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
271 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
272 PostfixStack.push_back(std::make_pair(StackOp, 0));
273 }
274
275 if (PostfixStack.empty())
276 return 0;
277
278 SmallVector<ICToken, 16> OperandStack;
279 for (const ICToken &Op : PostfixStack) {
280 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
281 OperandStack.push_back(Op);
282 } else if (isUnaryOperator(Op.first)) {
283 assert (OperandStack.size() > 0 && "Too few operands.");
284 ICToken Operand = OperandStack.pop_back_val();
285 assert (Operand.first == IC_IMM &&
286 "Unary operation with a register!");
287 switch (Op.first) {
288 default:
289 report_fatal_error("Unexpected operator!");
290 break;
291 case IC_NEG:
292 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
293 break;
294 case IC_NOT:
295 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
296 break;
297 }
298 } else {
299 assert (OperandStack.size() > 1 && "Too few operands.");
300 int64_t Val;
301 ICToken Op2 = OperandStack.pop_back_val();
302 ICToken Op1 = OperandStack.pop_back_val();
303 switch (Op.first) {
304 default:
305 report_fatal_error("Unexpected operator!");
306 break;
307 case IC_PLUS:
308 Val = Op1.second + Op2.second;
309 OperandStack.push_back(std::make_pair(IC_IMM, Val));
310 break;
311 case IC_MINUS:
312 Val = Op1.second - Op2.second;
313 OperandStack.push_back(std::make_pair(IC_IMM, Val));
314 break;
315 case IC_MULTIPLY:
316 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
317 "Multiply operation with an immediate and a register!");
318 Val = Op1.second * Op2.second;
319 OperandStack.push_back(std::make_pair(IC_IMM, Val));
320 break;
321 case IC_DIVIDE:
322 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
323 "Divide operation with an immediate and a register!");
324 assert (Op2.second != 0 && "Division by zero!");
325 Val = Op1.second / Op2.second;
326 OperandStack.push_back(std::make_pair(IC_IMM, Val));
327 break;
328 case IC_MOD:
329 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
330 "Modulo operation with an immediate and a register!");
331 Val = Op1.second % Op2.second;
332 OperandStack.push_back(std::make_pair(IC_IMM, Val));
333 break;
334 case IC_OR:
335 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
336 "Or operation with an immediate and a register!");
337 Val = Op1.second | Op2.second;
338 OperandStack.push_back(std::make_pair(IC_IMM, Val));
339 break;
340 case IC_XOR:
341 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
342 "Xor operation with an immediate and a register!");
343 Val = Op1.second ^ Op2.second;
344 OperandStack.push_back(std::make_pair(IC_IMM, Val));
345 break;
346 case IC_AND:
347 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
348 "And operation with an immediate and a register!");
349 Val = Op1.second & Op2.second;
350 OperandStack.push_back(std::make_pair(IC_IMM, Val));
351 break;
352 case IC_LSHIFT:
353 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
354 "Left shift operation with an immediate and a register!");
355 Val = Op1.second << Op2.second;
356 OperandStack.push_back(std::make_pair(IC_IMM, Val));
357 break;
358 case IC_RSHIFT:
359 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
360 "Right shift operation with an immediate and a register!");
361 Val = Op1.second >> Op2.second;
362 OperandStack.push_back(std::make_pair(IC_IMM, Val));
363 break;
364 case IC_EQ:
365 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
366 "Equals operation with an immediate and a register!");
367 Val = (Op1.second == Op2.second) ? -1 : 0;
368 OperandStack.push_back(std::make_pair(IC_IMM, Val));
369 break;
370 case IC_NE:
371 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
372 "Not-equals operation with an immediate and a register!");
373 Val = (Op1.second != Op2.second) ? -1 : 0;
374 OperandStack.push_back(std::make_pair(IC_IMM, Val));
375 break;
376 case IC_LT:
377 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
378 "Less-than operation with an immediate and a register!");
379 Val = (Op1.second < Op2.second) ? -1 : 0;
380 OperandStack.push_back(std::make_pair(IC_IMM, Val));
381 break;
382 case IC_LE:
383 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
384 "Less-than-or-equal operation with an immediate and a "
385 "register!");
386 Val = (Op1.second <= Op2.second) ? -1 : 0;
387 OperandStack.push_back(std::make_pair(IC_IMM, Val));
388 break;
389 case IC_GT:
390 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
391 "Greater-than operation with an immediate and a register!");
392 Val = (Op1.second > Op2.second) ? -1 : 0;
393 OperandStack.push_back(std::make_pair(IC_IMM, Val));
394 break;
395 case IC_GE:
396 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
397 "Greater-than-or-equal operation with an immediate and a "
398 "register!");
399 Val = (Op1.second >= Op2.second) ? -1 : 0;
400 OperandStack.push_back(std::make_pair(IC_IMM, Val));
401 break;
402 }
403 }
404 }
405 assert (OperandStack.size() == 1 && "Expected a single result.");
406 return OperandStack.pop_back_val().second;
407 }
408 };
409
410 enum IntelExprState {
411 IES_INIT,
412 IES_OR,
413 IES_XOR,
414 IES_AND,
415 IES_EQ,
416 IES_NE,
417 IES_LT,
418 IES_LE,
419 IES_GT,
420 IES_GE,
421 IES_LSHIFT,
422 IES_RSHIFT,
423 IES_PLUS,
424 IES_MINUS,
425 IES_OFFSET,
426 IES_CAST,
427 IES_NOT,
428 IES_MULTIPLY,
429 IES_DIVIDE,
430 IES_MOD,
431 IES_LBRAC,
432 IES_RBRAC,
433 IES_LPAREN,
434 IES_RPAREN,
435 IES_REGISTER,
436 IES_INTEGER,
437 IES_ERROR
438 };
439
440 class IntelExprStateMachine {
441 IntelExprState State = IES_INIT, PrevState = IES_ERROR;
442 MCRegister BaseReg, IndexReg, TmpReg;
443 unsigned Scale = 0;
444 int64_t Imm = 0;
445 const MCExpr *Sym = nullptr;
446 StringRef SymName;
447 InfixCalculator IC;
448 InlineAsmIdentifierInfo Info;
449 short BracCount = 0;
450 bool MemExpr = false;
451 bool BracketUsed = false;
452 bool OffsetOperator = false;
453 bool AttachToOperandIdx = false;
454 bool IsPIC = false;
455 SMLoc OffsetOperatorLoc;
456 AsmTypeInfo CurType;
457
setSymRef(const MCExpr * Val,StringRef ID,StringRef & ErrMsg)458 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
459 if (Sym) {
460 ErrMsg = "cannot use more than one symbol in memory operand";
461 return true;
462 }
463 Sym = Val;
464 SymName = ID;
465 return false;
466 }
467
468 public:
469 IntelExprStateMachine() = default;
470
addImm(int64_t imm)471 void addImm(int64_t imm) { Imm += imm; }
getBracCount() const472 short getBracCount() const { return BracCount; }
isMemExpr() const473 bool isMemExpr() const { return MemExpr; }
isBracketUsed() const474 bool isBracketUsed() const { return BracketUsed; }
isOffsetOperator() const475 bool isOffsetOperator() const { return OffsetOperator; }
getOffsetLoc() const476 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
getBaseReg() const477 MCRegister getBaseReg() const { return BaseReg; }
getIndexReg() const478 MCRegister getIndexReg() const { return IndexReg; }
getScale() const479 unsigned getScale() const { return Scale; }
getSym() const480 const MCExpr *getSym() const { return Sym; }
getSymName() const481 StringRef getSymName() const { return SymName; }
getType() const482 StringRef getType() const { return CurType.Name; }
getSize() const483 unsigned getSize() const { return CurType.Size; }
getElementSize() const484 unsigned getElementSize() const { return CurType.ElementSize; }
getLength() const485 unsigned getLength() const { return CurType.Length; }
getImm()486 int64_t getImm() { return Imm + IC.execute(); }
isValidEndState() const487 bool isValidEndState() const {
488 return State == IES_RBRAC || State == IES_RPAREN ||
489 State == IES_INTEGER || State == IES_REGISTER ||
490 State == IES_OFFSET;
491 }
492
493 // Is the intel expression appended after an operand index.
494 // [OperandIdx][Intel Expression]
495 // This is neccessary for checking if it is an independent
496 // intel expression at back end when parse inline asm.
setAppendAfterOperand()497 void setAppendAfterOperand() { AttachToOperandIdx = true; }
498
isPIC() const499 bool isPIC() const { return IsPIC; }
setPIC()500 void setPIC() { IsPIC = true; }
501
hadError() const502 bool hadError() const { return State == IES_ERROR; }
getIdentifierInfo() const503 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
504
regsUseUpError(StringRef & ErrMsg)505 bool regsUseUpError(StringRef &ErrMsg) {
506 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
507 // can not intruduce additional register in inline asm in PIC model.
508 if (IsPIC && AttachToOperandIdx)
509 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
510 else
511 ErrMsg = "BaseReg/IndexReg already set!";
512 return true;
513 }
514
onOr()515 void onOr() {
516 IntelExprState CurrState = State;
517 switch (State) {
518 default:
519 State = IES_ERROR;
520 break;
521 case IES_INTEGER:
522 case IES_RPAREN:
523 case IES_REGISTER:
524 State = IES_OR;
525 IC.pushOperator(IC_OR);
526 break;
527 }
528 PrevState = CurrState;
529 }
onXor()530 void onXor() {
531 IntelExprState CurrState = State;
532 switch (State) {
533 default:
534 State = IES_ERROR;
535 break;
536 case IES_INTEGER:
537 case IES_RPAREN:
538 case IES_REGISTER:
539 State = IES_XOR;
540 IC.pushOperator(IC_XOR);
541 break;
542 }
543 PrevState = CurrState;
544 }
onAnd()545 void onAnd() {
546 IntelExprState CurrState = State;
547 switch (State) {
548 default:
549 State = IES_ERROR;
550 break;
551 case IES_INTEGER:
552 case IES_RPAREN:
553 case IES_REGISTER:
554 State = IES_AND;
555 IC.pushOperator(IC_AND);
556 break;
557 }
558 PrevState = CurrState;
559 }
onEq()560 void onEq() {
561 IntelExprState CurrState = State;
562 switch (State) {
563 default:
564 State = IES_ERROR;
565 break;
566 case IES_INTEGER:
567 case IES_RPAREN:
568 case IES_REGISTER:
569 State = IES_EQ;
570 IC.pushOperator(IC_EQ);
571 break;
572 }
573 PrevState = CurrState;
574 }
onNE()575 void onNE() {
576 IntelExprState CurrState = State;
577 switch (State) {
578 default:
579 State = IES_ERROR;
580 break;
581 case IES_INTEGER:
582 case IES_RPAREN:
583 case IES_REGISTER:
584 State = IES_NE;
585 IC.pushOperator(IC_NE);
586 break;
587 }
588 PrevState = CurrState;
589 }
onLT()590 void onLT() {
591 IntelExprState CurrState = State;
592 switch (State) {
593 default:
594 State = IES_ERROR;
595 break;
596 case IES_INTEGER:
597 case IES_RPAREN:
598 case IES_REGISTER:
599 State = IES_LT;
600 IC.pushOperator(IC_LT);
601 break;
602 }
603 PrevState = CurrState;
604 }
onLE()605 void onLE() {
606 IntelExprState CurrState = State;
607 switch (State) {
608 default:
609 State = IES_ERROR;
610 break;
611 case IES_INTEGER:
612 case IES_RPAREN:
613 case IES_REGISTER:
614 State = IES_LE;
615 IC.pushOperator(IC_LE);
616 break;
617 }
618 PrevState = CurrState;
619 }
onGT()620 void onGT() {
621 IntelExprState CurrState = State;
622 switch (State) {
623 default:
624 State = IES_ERROR;
625 break;
626 case IES_INTEGER:
627 case IES_RPAREN:
628 case IES_REGISTER:
629 State = IES_GT;
630 IC.pushOperator(IC_GT);
631 break;
632 }
633 PrevState = CurrState;
634 }
onGE()635 void onGE() {
636 IntelExprState CurrState = State;
637 switch (State) {
638 default:
639 State = IES_ERROR;
640 break;
641 case IES_INTEGER:
642 case IES_RPAREN:
643 case IES_REGISTER:
644 State = IES_GE;
645 IC.pushOperator(IC_GE);
646 break;
647 }
648 PrevState = CurrState;
649 }
onLShift()650 void onLShift() {
651 IntelExprState CurrState = State;
652 switch (State) {
653 default:
654 State = IES_ERROR;
655 break;
656 case IES_INTEGER:
657 case IES_RPAREN:
658 case IES_REGISTER:
659 State = IES_LSHIFT;
660 IC.pushOperator(IC_LSHIFT);
661 break;
662 }
663 PrevState = CurrState;
664 }
onRShift()665 void onRShift() {
666 IntelExprState CurrState = State;
667 switch (State) {
668 default:
669 State = IES_ERROR;
670 break;
671 case IES_INTEGER:
672 case IES_RPAREN:
673 case IES_REGISTER:
674 State = IES_RSHIFT;
675 IC.pushOperator(IC_RSHIFT);
676 break;
677 }
678 PrevState = CurrState;
679 }
onPlus(StringRef & ErrMsg)680 bool onPlus(StringRef &ErrMsg) {
681 IntelExprState CurrState = State;
682 switch (State) {
683 default:
684 State = IES_ERROR;
685 break;
686 case IES_INTEGER:
687 case IES_RPAREN:
688 case IES_REGISTER:
689 case IES_OFFSET:
690 State = IES_PLUS;
691 IC.pushOperator(IC_PLUS);
692 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
693 // If we already have a BaseReg, then assume this is the IndexReg with
694 // no explicit scale.
695 if (!BaseReg) {
696 BaseReg = TmpReg;
697 } else {
698 if (IndexReg)
699 return regsUseUpError(ErrMsg);
700 IndexReg = TmpReg;
701 Scale = 0;
702 }
703 }
704 break;
705 }
706 PrevState = CurrState;
707 return false;
708 }
onMinus(StringRef & ErrMsg)709 bool onMinus(StringRef &ErrMsg) {
710 IntelExprState CurrState = State;
711 switch (State) {
712 default:
713 State = IES_ERROR;
714 break;
715 case IES_OR:
716 case IES_XOR:
717 case IES_AND:
718 case IES_EQ:
719 case IES_NE:
720 case IES_LT:
721 case IES_LE:
722 case IES_GT:
723 case IES_GE:
724 case IES_LSHIFT:
725 case IES_RSHIFT:
726 case IES_PLUS:
727 case IES_NOT:
728 case IES_MULTIPLY:
729 case IES_DIVIDE:
730 case IES_MOD:
731 case IES_LPAREN:
732 case IES_RPAREN:
733 case IES_LBRAC:
734 case IES_RBRAC:
735 case IES_INTEGER:
736 case IES_REGISTER:
737 case IES_INIT:
738 case IES_OFFSET:
739 State = IES_MINUS;
740 // push minus operator if it is not a negate operator
741 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
742 CurrState == IES_INTEGER || CurrState == IES_RBRAC ||
743 CurrState == IES_OFFSET)
744 IC.pushOperator(IC_MINUS);
745 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
746 // We have negate operator for Scale: it's illegal
747 ErrMsg = "Scale can't be negative";
748 return true;
749 } else
750 IC.pushOperator(IC_NEG);
751 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
752 // If we already have a BaseReg, then assume this is the IndexReg with
753 // no explicit scale.
754 if (!BaseReg) {
755 BaseReg = TmpReg;
756 } else {
757 if (IndexReg)
758 return regsUseUpError(ErrMsg);
759 IndexReg = TmpReg;
760 Scale = 0;
761 }
762 }
763 break;
764 }
765 PrevState = CurrState;
766 return false;
767 }
onNot()768 void onNot() {
769 IntelExprState CurrState = State;
770 switch (State) {
771 default:
772 State = IES_ERROR;
773 break;
774 case IES_OR:
775 case IES_XOR:
776 case IES_AND:
777 case IES_EQ:
778 case IES_NE:
779 case IES_LT:
780 case IES_LE:
781 case IES_GT:
782 case IES_GE:
783 case IES_LSHIFT:
784 case IES_RSHIFT:
785 case IES_PLUS:
786 case IES_MINUS:
787 case IES_NOT:
788 case IES_MULTIPLY:
789 case IES_DIVIDE:
790 case IES_MOD:
791 case IES_LPAREN:
792 case IES_LBRAC:
793 case IES_INIT:
794 State = IES_NOT;
795 IC.pushOperator(IC_NOT);
796 break;
797 }
798 PrevState = CurrState;
799 }
onRegister(MCRegister Reg,StringRef & ErrMsg)800 bool onRegister(MCRegister Reg, StringRef &ErrMsg) {
801 IntelExprState CurrState = State;
802 switch (State) {
803 default:
804 State = IES_ERROR;
805 break;
806 case IES_PLUS:
807 case IES_LPAREN:
808 case IES_LBRAC:
809 State = IES_REGISTER;
810 TmpReg = Reg;
811 IC.pushOperand(IC_REGISTER);
812 break;
813 case IES_MULTIPLY:
814 // Index Register - Scale * Register
815 if (PrevState == IES_INTEGER) {
816 if (IndexReg)
817 return regsUseUpError(ErrMsg);
818 State = IES_REGISTER;
819 IndexReg = Reg;
820 // Get the scale and replace the 'Scale * Register' with '0'.
821 Scale = IC.popOperand();
822 if (checkScale(Scale, ErrMsg))
823 return true;
824 IC.pushOperand(IC_IMM);
825 IC.popOperator();
826 } else {
827 State = IES_ERROR;
828 }
829 break;
830 }
831 PrevState = CurrState;
832 return false;
833 }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName,const InlineAsmIdentifierInfo & IDInfo,const AsmTypeInfo & Type,bool ParsingMSInlineAsm,StringRef & ErrMsg)834 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
835 const InlineAsmIdentifierInfo &IDInfo,
836 const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
837 StringRef &ErrMsg) {
838 // InlineAsm: Treat an enum value as an integer
839 if (ParsingMSInlineAsm)
840 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
841 return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
842 // Treat a symbolic constant like an integer
843 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
844 return onInteger(CE->getValue(), ErrMsg);
845 PrevState = State;
846 switch (State) {
847 default:
848 State = IES_ERROR;
849 break;
850 case IES_CAST:
851 case IES_PLUS:
852 case IES_MINUS:
853 case IES_NOT:
854 case IES_INIT:
855 case IES_LBRAC:
856 case IES_LPAREN:
857 if (setSymRef(SymRef, SymRefName, ErrMsg))
858 return true;
859 MemExpr = true;
860 State = IES_INTEGER;
861 IC.pushOperand(IC_IMM);
862 if (ParsingMSInlineAsm)
863 Info = IDInfo;
864 setTypeInfo(Type);
865 break;
866 }
867 return false;
868 }
onInteger(int64_t TmpInt,StringRef & ErrMsg)869 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
870 IntelExprState CurrState = State;
871 switch (State) {
872 default:
873 State = IES_ERROR;
874 break;
875 case IES_PLUS:
876 case IES_MINUS:
877 case IES_NOT:
878 case IES_OR:
879 case IES_XOR:
880 case IES_AND:
881 case IES_EQ:
882 case IES_NE:
883 case IES_LT:
884 case IES_LE:
885 case IES_GT:
886 case IES_GE:
887 case IES_LSHIFT:
888 case IES_RSHIFT:
889 case IES_DIVIDE:
890 case IES_MOD:
891 case IES_MULTIPLY:
892 case IES_LPAREN:
893 case IES_INIT:
894 case IES_LBRAC:
895 State = IES_INTEGER;
896 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
897 // Index Register - Register * Scale
898 if (IndexReg)
899 return regsUseUpError(ErrMsg);
900 IndexReg = TmpReg;
901 Scale = TmpInt;
902 if (checkScale(Scale, ErrMsg))
903 return true;
904 // Get the scale and replace the 'Register * Scale' with '0'.
905 IC.popOperator();
906 } else {
907 IC.pushOperand(IC_IMM, TmpInt);
908 }
909 break;
910 }
911 PrevState = CurrState;
912 return false;
913 }
onStar()914 void onStar() {
915 PrevState = State;
916 switch (State) {
917 default:
918 State = IES_ERROR;
919 break;
920 case IES_INTEGER:
921 case IES_REGISTER:
922 case IES_RPAREN:
923 State = IES_MULTIPLY;
924 IC.pushOperator(IC_MULTIPLY);
925 break;
926 }
927 }
onDivide()928 void onDivide() {
929 PrevState = State;
930 switch (State) {
931 default:
932 State = IES_ERROR;
933 break;
934 case IES_INTEGER:
935 case IES_RPAREN:
936 State = IES_DIVIDE;
937 IC.pushOperator(IC_DIVIDE);
938 break;
939 }
940 }
onMod()941 void onMod() {
942 PrevState = State;
943 switch (State) {
944 default:
945 State = IES_ERROR;
946 break;
947 case IES_INTEGER:
948 case IES_RPAREN:
949 State = IES_MOD;
950 IC.pushOperator(IC_MOD);
951 break;
952 }
953 }
onLBrac()954 bool onLBrac() {
955 if (BracCount)
956 return true;
957 PrevState = State;
958 switch (State) {
959 default:
960 State = IES_ERROR;
961 break;
962 case IES_RBRAC:
963 case IES_INTEGER:
964 case IES_RPAREN:
965 State = IES_PLUS;
966 IC.pushOperator(IC_PLUS);
967 CurType.Length = 1;
968 CurType.Size = CurType.ElementSize;
969 break;
970 case IES_INIT:
971 case IES_CAST:
972 assert(!BracCount && "BracCount should be zero on parsing's start");
973 State = IES_LBRAC;
974 break;
975 }
976 MemExpr = true;
977 BracketUsed = true;
978 BracCount++;
979 return false;
980 }
onRBrac(StringRef & ErrMsg)981 bool onRBrac(StringRef &ErrMsg) {
982 IntelExprState CurrState = State;
983 switch (State) {
984 default:
985 State = IES_ERROR;
986 break;
987 case IES_INTEGER:
988 case IES_OFFSET:
989 case IES_REGISTER:
990 case IES_RPAREN:
991 if (BracCount-- != 1) {
992 ErrMsg = "unexpected bracket encountered";
993 return true;
994 }
995 State = IES_RBRAC;
996 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
997 // If we already have a BaseReg, then assume this is the IndexReg with
998 // no explicit scale.
999 if (!BaseReg) {
1000 BaseReg = TmpReg;
1001 } else {
1002 if (IndexReg)
1003 return regsUseUpError(ErrMsg);
1004 IndexReg = TmpReg;
1005 Scale = 0;
1006 }
1007 }
1008 break;
1009 }
1010 PrevState = CurrState;
1011 return false;
1012 }
onLParen()1013 void onLParen() {
1014 IntelExprState CurrState = State;
1015 switch (State) {
1016 default:
1017 State = IES_ERROR;
1018 break;
1019 case IES_PLUS:
1020 case IES_MINUS:
1021 case IES_NOT:
1022 case IES_OR:
1023 case IES_XOR:
1024 case IES_AND:
1025 case IES_EQ:
1026 case IES_NE:
1027 case IES_LT:
1028 case IES_LE:
1029 case IES_GT:
1030 case IES_GE:
1031 case IES_LSHIFT:
1032 case IES_RSHIFT:
1033 case IES_MULTIPLY:
1034 case IES_DIVIDE:
1035 case IES_MOD:
1036 case IES_LPAREN:
1037 case IES_INIT:
1038 case IES_LBRAC:
1039 State = IES_LPAREN;
1040 IC.pushOperator(IC_LPAREN);
1041 break;
1042 }
1043 PrevState = CurrState;
1044 }
onRParen(StringRef & ErrMsg)1045 bool onRParen(StringRef &ErrMsg) {
1046 IntelExprState CurrState = State;
1047 switch (State) {
1048 default:
1049 State = IES_ERROR;
1050 break;
1051 case IES_INTEGER:
1052 case IES_OFFSET:
1053 case IES_REGISTER:
1054 case IES_RBRAC:
1055 case IES_RPAREN:
1056 State = IES_RPAREN;
1057 // In the case of a multiply, onRegister has already set IndexReg
1058 // directly, with appropriate scale.
1059 // Otherwise if we just saw a register it has only been stored in
1060 // TmpReg, so we need to store it into the state machine.
1061 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
1062 // If we already have a BaseReg, then assume this is the IndexReg with
1063 // no explicit scale.
1064 if (!BaseReg) {
1065 BaseReg = TmpReg;
1066 } else {
1067 if (IndexReg)
1068 return regsUseUpError(ErrMsg);
1069 IndexReg = TmpReg;
1070 Scale = 0;
1071 }
1072 }
1073 IC.pushOperator(IC_RPAREN);
1074 break;
1075 }
1076 PrevState = CurrState;
1077 return false;
1078 }
onOffset(const MCExpr * Val,SMLoc OffsetLoc,StringRef ID,const InlineAsmIdentifierInfo & IDInfo,bool ParsingMSInlineAsm,StringRef & ErrMsg)1079 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1080 const InlineAsmIdentifierInfo &IDInfo,
1081 bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1082 PrevState = State;
1083 switch (State) {
1084 default:
1085 ErrMsg = "unexpected offset operator expression";
1086 return true;
1087 case IES_PLUS:
1088 case IES_INIT:
1089 case IES_LBRAC:
1090 if (setSymRef(Val, ID, ErrMsg))
1091 return true;
1092 OffsetOperator = true;
1093 OffsetOperatorLoc = OffsetLoc;
1094 State = IES_OFFSET;
1095 // As we cannot yet resolve the actual value (offset), we retain
1096 // the requested semantics by pushing a '0' to the operands stack
1097 IC.pushOperand(IC_IMM);
1098 if (ParsingMSInlineAsm) {
1099 Info = IDInfo;
1100 }
1101 break;
1102 }
1103 return false;
1104 }
onCast(AsmTypeInfo Info)1105 void onCast(AsmTypeInfo Info) {
1106 PrevState = State;
1107 switch (State) {
1108 default:
1109 State = IES_ERROR;
1110 break;
1111 case IES_LPAREN:
1112 setTypeInfo(Info);
1113 State = IES_CAST;
1114 break;
1115 }
1116 }
setTypeInfo(AsmTypeInfo Type)1117 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1118 };
1119
Error(SMLoc L,const Twine & Msg,SMRange Range=std::nullopt,bool MatchingInlineAsm=false)1120 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt,
1121 bool MatchingInlineAsm = false) {
1122 MCAsmParser &Parser = getParser();
1123 if (MatchingInlineAsm) {
1124 return false;
1125 }
1126 return Parser.Error(L, Msg, Range);
1127 }
1128
1129 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc,
1130 SMLoc EndLoc);
1131 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1132 bool RestoreOnFailure);
1133
1134 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1135 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1136 bool IsSIReg(MCRegister Reg);
1137 MCRegister GetSIDIForRegClass(unsigned RegClassID, bool IsSIReg);
1138 void
1139 AddDefaultSrcDestOperands(OperandVector &Operands,
1140 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1141 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1142 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1143 OperandVector &FinalOperands);
1144 bool parseOperand(OperandVector &Operands, StringRef Name);
1145 bool parseATTOperand(OperandVector &Operands);
1146 bool parseIntelOperand(OperandVector &Operands, StringRef Name);
1147 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1148 InlineAsmIdentifierInfo &Info, SMLoc &End);
1149 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1150 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1151 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1152 unsigned IdentifyMasmOperator(StringRef Name);
1153 bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1154 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1155 bool parseCFlagsOp(OperandVector &Operands);
1156 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1157 bool &ParseError, SMLoc &End);
1158 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1159 bool &ParseError, SMLoc &End);
1160 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1161 SMLoc End);
1162 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1163 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1164 InlineAsmIdentifierInfo &Info,
1165 bool IsUnevaluatedOperand, SMLoc &End,
1166 bool IsParsingOffsetOperator = false);
1167 void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1168 IntelExprStateMachine &SM);
1169
1170 bool ParseMemOperand(MCRegister SegReg, const MCExpr *Disp, SMLoc StartLoc,
1171 SMLoc EndLoc, OperandVector &Operands);
1172
1173 X86::CondCode ParseConditionCode(StringRef CCode);
1174
1175 bool ParseIntelMemoryOperandSize(unsigned &Size);
1176 bool CreateMemForMSInlineAsm(MCRegister SegReg, const MCExpr *Disp,
1177 MCRegister BaseReg, MCRegister IndexReg,
1178 unsigned Scale, bool NonAbsMem, SMLoc Start,
1179 SMLoc End, unsigned Size, StringRef Identifier,
1180 const InlineAsmIdentifierInfo &Info,
1181 OperandVector &Operands);
1182
1183 bool parseDirectiveArch();
1184 bool parseDirectiveNops(SMLoc L);
1185 bool parseDirectiveEven(SMLoc L);
1186 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1187
1188 /// CodeView FPO data directives.
1189 bool parseDirectiveFPOProc(SMLoc L);
1190 bool parseDirectiveFPOSetFrame(SMLoc L);
1191 bool parseDirectiveFPOPushReg(SMLoc L);
1192 bool parseDirectiveFPOStackAlloc(SMLoc L);
1193 bool parseDirectiveFPOStackAlign(SMLoc L);
1194 bool parseDirectiveFPOEndPrologue(SMLoc L);
1195 bool parseDirectiveFPOEndProc(SMLoc L);
1196
1197 /// SEH directives.
1198 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo);
1199 bool parseDirectiveSEHPushReg(SMLoc);
1200 bool parseDirectiveSEHSetFrame(SMLoc);
1201 bool parseDirectiveSEHSaveReg(SMLoc);
1202 bool parseDirectiveSEHSaveXMM(SMLoc);
1203 bool parseDirectiveSEHPushFrame(SMLoc);
1204
1205 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1206
1207 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1208 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1209
1210 // Load Value Injection (LVI) Mitigations for machine code
1211 void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1212 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1213 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1214
1215 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1216 /// instrumentation around Inst.
1217 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
1218
1219 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1220 OperandVector &Operands, MCStreamer &Out,
1221 uint64_t &ErrorInfo,
1222 bool MatchingInlineAsm) override;
1223
1224 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1225 MCStreamer &Out, bool MatchingInlineAsm);
1226
1227 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1228 bool MatchingInlineAsm);
1229
1230 bool matchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1231 OperandVector &Operands, MCStreamer &Out,
1232 uint64_t &ErrorInfo, bool MatchingInlineAsm);
1233
1234 bool matchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst,
1235 OperandVector &Operands, MCStreamer &Out,
1236 uint64_t &ErrorInfo,
1237 bool MatchingInlineAsm);
1238
1239 bool omitRegisterFromClobberLists(MCRegister Reg) override;
1240
1241 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1242 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1243 /// return false if no parsing errors occurred, true otherwise.
1244 bool HandleAVX512Operand(OperandVector &Operands);
1245
1246 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
1247
is64BitMode() const1248 bool is64BitMode() const {
1249 // FIXME: Can tablegen auto-generate this?
1250 return getSTI().hasFeature(X86::Is64Bit);
1251 }
is32BitMode() const1252 bool is32BitMode() const {
1253 // FIXME: Can tablegen auto-generate this?
1254 return getSTI().hasFeature(X86::Is32Bit);
1255 }
is16BitMode() const1256 bool is16BitMode() const {
1257 // FIXME: Can tablegen auto-generate this?
1258 return getSTI().hasFeature(X86::Is16Bit);
1259 }
SwitchMode(unsigned mode)1260 void SwitchMode(unsigned mode) {
1261 MCSubtargetInfo &STI = copySTI();
1262 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
1263 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1264 FeatureBitset FB = ComputeAvailableFeatures(
1265 STI.ToggleFeature(OldMode.flip(mode)));
1266 setAvailableFeatures(FB);
1267
1268 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1269 }
1270
getPointerWidth()1271 unsigned getPointerWidth() {
1272 if (is16BitMode()) return 16;
1273 if (is32BitMode()) return 32;
1274 if (is64BitMode()) return 64;
1275 llvm_unreachable("invalid mode");
1276 }
1277
isParsingIntelSyntax()1278 bool isParsingIntelSyntax() {
1279 return getParser().getAssemblerDialect();
1280 }
1281
1282 /// @name Auto-generated Matcher Functions
1283 /// {
1284
1285 #define GET_ASSEMBLER_HEADER
1286 #include "X86GenAsmMatcher.inc"
1287
1288 /// }
1289
1290 public:
1291 enum X86MatchResultTy {
1292 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1293 #define GET_OPERAND_DIAGNOSTIC_TYPES
1294 #include "X86GenAsmMatcher.inc"
1295 };
1296
X86AsmParser(const MCSubtargetInfo & sti,MCAsmParser & Parser,const MCInstrInfo & mii,const MCTargetOptions & Options)1297 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1298 const MCInstrInfo &mii, const MCTargetOptions &Options)
1299 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
1300 Code16GCC(false) {
1301
1302 Parser.addAliasForDirective(".word", ".2byte");
1303
1304 // Initialize the set of available features.
1305 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1306 }
1307
1308 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1309 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1310 SMLoc &EndLoc) override;
1311
1312 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1313
1314 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1315 SMLoc NameLoc, OperandVector &Operands) override;
1316
1317 bool ParseDirective(AsmToken DirectiveID) override;
1318 };
1319 } // end anonymous namespace
1320
1321 #define GET_REGISTER_MATCHER
1322 #define GET_SUBTARGET_FEATURE_NAME
1323 #include "X86GenAsmMatcher.inc"
1324
CheckBaseRegAndIndexRegAndScale(MCRegister BaseReg,MCRegister IndexReg,unsigned Scale,bool Is64BitMode,StringRef & ErrMsg)1325 static bool CheckBaseRegAndIndexRegAndScale(MCRegister BaseReg,
1326 MCRegister IndexReg, unsigned Scale,
1327 bool Is64BitMode,
1328 StringRef &ErrMsg) {
1329 // If we have both a base register and an index register make sure they are
1330 // both 64-bit or 32-bit registers.
1331 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1332
1333 if (BaseReg &&
1334 !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1335 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1336 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1337 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1338 ErrMsg = "invalid base+index expression";
1339 return true;
1340 }
1341
1342 if (IndexReg &&
1343 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1344 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1345 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1346 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1347 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1348 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1349 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1350 ErrMsg = "invalid base+index expression";
1351 return true;
1352 }
1353
1354 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg) ||
1355 IndexReg == X86::EIP || IndexReg == X86::RIP || IndexReg == X86::ESP ||
1356 IndexReg == X86::RSP) {
1357 ErrMsg = "invalid base+index expression";
1358 return true;
1359 }
1360
1361 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1362 // and then only in non-64-bit modes.
1363 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1364 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1365 BaseReg != X86::SI && BaseReg != X86::DI))) {
1366 ErrMsg = "invalid 16-bit base register";
1367 return true;
1368 }
1369
1370 if (!BaseReg &&
1371 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1372 ErrMsg = "16-bit memory operand may not include only index register";
1373 return true;
1374 }
1375
1376 if (BaseReg && IndexReg) {
1377 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1378 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1379 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1380 IndexReg == X86::EIZ)) {
1381 ErrMsg = "base register is 64-bit, but index register is not";
1382 return true;
1383 }
1384 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1385 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1386 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1387 IndexReg == X86::RIZ)) {
1388 ErrMsg = "base register is 32-bit, but index register is not";
1389 return true;
1390 }
1391 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1392 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1393 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1394 ErrMsg = "base register is 16-bit, but index register is not";
1395 return true;
1396 }
1397 if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1398 (IndexReg != X86::SI && IndexReg != X86::DI)) {
1399 ErrMsg = "invalid 16-bit base/index register combination";
1400 return true;
1401 }
1402 }
1403 }
1404
1405 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1406 if (!Is64BitMode && (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1407 ErrMsg = "IP-relative addressing requires 64-bit mode";
1408 return true;
1409 }
1410
1411 return checkScale(Scale, ErrMsg);
1412 }
1413
MatchRegisterByName(MCRegister & RegNo,StringRef RegName,SMLoc StartLoc,SMLoc EndLoc)1414 bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName,
1415 SMLoc StartLoc, SMLoc EndLoc) {
1416 // If we encounter a %, ignore it. This code handles registers with and
1417 // without the prefix, unprefixed registers can occur in cfi directives.
1418 RegName.consume_front("%");
1419
1420 RegNo = MatchRegisterName(RegName);
1421
1422 // If the match failed, try the register name as lowercase.
1423 if (!RegNo)
1424 RegNo = MatchRegisterName(RegName.lower());
1425
1426 // The "flags" and "mxcsr" registers cannot be referenced directly.
1427 // Treat it as an identifier instead.
1428 if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1429 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1430 RegNo = MCRegister();
1431
1432 if (!is64BitMode()) {
1433 // FIXME: This should be done using Requires<Not64BitMode> and
1434 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1435 // checked.
1436 if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1437 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1438 X86II::isX86_64NonExtLowByteReg(RegNo) ||
1439 X86II::isX86_64ExtendedReg(RegNo)) {
1440 return Error(StartLoc,
1441 "register %" + RegName + " is only available in 64-bit mode",
1442 SMRange(StartLoc, EndLoc));
1443 }
1444 }
1445
1446 if (X86II::isApxExtendedReg(RegNo))
1447 UseApxExtendedReg = true;
1448
1449 // If this is "db[0-15]", match it as an alias
1450 // for dr[0-15].
1451 if (!RegNo && RegName.starts_with("db")) {
1452 if (RegName.size() == 3) {
1453 switch (RegName[2]) {
1454 case '0':
1455 RegNo = X86::DR0;
1456 break;
1457 case '1':
1458 RegNo = X86::DR1;
1459 break;
1460 case '2':
1461 RegNo = X86::DR2;
1462 break;
1463 case '3':
1464 RegNo = X86::DR3;
1465 break;
1466 case '4':
1467 RegNo = X86::DR4;
1468 break;
1469 case '5':
1470 RegNo = X86::DR5;
1471 break;
1472 case '6':
1473 RegNo = X86::DR6;
1474 break;
1475 case '7':
1476 RegNo = X86::DR7;
1477 break;
1478 case '8':
1479 RegNo = X86::DR8;
1480 break;
1481 case '9':
1482 RegNo = X86::DR9;
1483 break;
1484 }
1485 } else if (RegName.size() == 4 && RegName[2] == '1') {
1486 switch (RegName[3]) {
1487 case '0':
1488 RegNo = X86::DR10;
1489 break;
1490 case '1':
1491 RegNo = X86::DR11;
1492 break;
1493 case '2':
1494 RegNo = X86::DR12;
1495 break;
1496 case '3':
1497 RegNo = X86::DR13;
1498 break;
1499 case '4':
1500 RegNo = X86::DR14;
1501 break;
1502 case '5':
1503 RegNo = X86::DR15;
1504 break;
1505 }
1506 }
1507 }
1508
1509 if (!RegNo) {
1510 if (isParsingIntelSyntax())
1511 return true;
1512 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1513 }
1514 return false;
1515 }
1516
ParseRegister(MCRegister & RegNo,SMLoc & StartLoc,SMLoc & EndLoc,bool RestoreOnFailure)1517 bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1518 SMLoc &EndLoc, bool RestoreOnFailure) {
1519 MCAsmParser &Parser = getParser();
1520 AsmLexer &Lexer = getLexer();
1521 RegNo = MCRegister();
1522
1523 SmallVector<AsmToken, 5> Tokens;
1524 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1525 if (RestoreOnFailure) {
1526 while (!Tokens.empty()) {
1527 Lexer.UnLex(Tokens.pop_back_val());
1528 }
1529 }
1530 };
1531
1532 const AsmToken &PercentTok = Parser.getTok();
1533 StartLoc = PercentTok.getLoc();
1534
1535 // If we encounter a %, ignore it. This code handles registers with and
1536 // without the prefix, unprefixed registers can occur in cfi directives.
1537 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1538 Tokens.push_back(PercentTok);
1539 Parser.Lex(); // Eat percent token.
1540 }
1541
1542 const AsmToken &Tok = Parser.getTok();
1543 EndLoc = Tok.getEndLoc();
1544
1545 if (Tok.isNot(AsmToken::Identifier)) {
1546 OnFailure();
1547 if (isParsingIntelSyntax()) return true;
1548 return Error(StartLoc, "invalid register name",
1549 SMRange(StartLoc, EndLoc));
1550 }
1551
1552 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1553 OnFailure();
1554 return true;
1555 }
1556
1557 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1558 if (RegNo == X86::ST0) {
1559 Tokens.push_back(Tok);
1560 Parser.Lex(); // Eat 'st'
1561
1562 // Check to see if we have '(4)' after %st.
1563 if (Lexer.isNot(AsmToken::LParen))
1564 return false;
1565 // Lex the paren.
1566 Tokens.push_back(Parser.getTok());
1567 Parser.Lex();
1568
1569 const AsmToken &IntTok = Parser.getTok();
1570 if (IntTok.isNot(AsmToken::Integer)) {
1571 OnFailure();
1572 return Error(IntTok.getLoc(), "expected stack index");
1573 }
1574 switch (IntTok.getIntVal()) {
1575 case 0: RegNo = X86::ST0; break;
1576 case 1: RegNo = X86::ST1; break;
1577 case 2: RegNo = X86::ST2; break;
1578 case 3: RegNo = X86::ST3; break;
1579 case 4: RegNo = X86::ST4; break;
1580 case 5: RegNo = X86::ST5; break;
1581 case 6: RegNo = X86::ST6; break;
1582 case 7: RegNo = X86::ST7; break;
1583 default:
1584 OnFailure();
1585 return Error(IntTok.getLoc(), "invalid stack index");
1586 }
1587
1588 // Lex IntTok
1589 Tokens.push_back(IntTok);
1590 Parser.Lex();
1591 if (Lexer.isNot(AsmToken::RParen)) {
1592 OnFailure();
1593 return Error(Parser.getTok().getLoc(), "expected ')'");
1594 }
1595
1596 EndLoc = Parser.getTok().getEndLoc();
1597 Parser.Lex(); // Eat ')'
1598 return false;
1599 }
1600
1601 EndLoc = Parser.getTok().getEndLoc();
1602
1603 if (!RegNo) {
1604 OnFailure();
1605 if (isParsingIntelSyntax()) return true;
1606 return Error(StartLoc, "invalid register name",
1607 SMRange(StartLoc, EndLoc));
1608 }
1609
1610 Parser.Lex(); // Eat identifier token.
1611 return false;
1612 }
1613
parseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)1614 bool X86AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
1615 SMLoc &EndLoc) {
1616 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1617 }
1618
tryParseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)1619 ParseStatus X86AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1620 SMLoc &EndLoc) {
1621 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1622 bool PendingErrors = getParser().hasPendingError();
1623 getParser().clearPendingErrors();
1624 if (PendingErrors)
1625 return ParseStatus::Failure;
1626 if (Result)
1627 return ParseStatus::NoMatch;
1628 return ParseStatus::Success;
1629 }
1630
DefaultMemSIOperand(SMLoc Loc)1631 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1632 bool Parse32 = is32BitMode() || Code16GCC;
1633 MCRegister Basereg =
1634 is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1635 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1636 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1637 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1638 Loc, Loc, 0);
1639 }
1640
DefaultMemDIOperand(SMLoc Loc)1641 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1642 bool Parse32 = is32BitMode() || Code16GCC;
1643 MCRegister Basereg =
1644 is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1645 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1646 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1647 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1648 Loc, Loc, 0);
1649 }
1650
IsSIReg(MCRegister Reg)1651 bool X86AsmParser::IsSIReg(MCRegister Reg) {
1652 switch (Reg.id()) {
1653 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1654 case X86::RSI:
1655 case X86::ESI:
1656 case X86::SI:
1657 return true;
1658 case X86::RDI:
1659 case X86::EDI:
1660 case X86::DI:
1661 return false;
1662 }
1663 }
1664
GetSIDIForRegClass(unsigned RegClassID,bool IsSIReg)1665 MCRegister X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, bool IsSIReg) {
1666 switch (RegClassID) {
1667 default: llvm_unreachable("Unexpected register class");
1668 case X86::GR64RegClassID:
1669 return IsSIReg ? X86::RSI : X86::RDI;
1670 case X86::GR32RegClassID:
1671 return IsSIReg ? X86::ESI : X86::EDI;
1672 case X86::GR16RegClassID:
1673 return IsSIReg ? X86::SI : X86::DI;
1674 }
1675 }
1676
AddDefaultSrcDestOperands(OperandVector & Operands,std::unique_ptr<llvm::MCParsedAsmOperand> && Src,std::unique_ptr<llvm::MCParsedAsmOperand> && Dst)1677 void X86AsmParser::AddDefaultSrcDestOperands(
1678 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1679 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1680 if (isParsingIntelSyntax()) {
1681 Operands.push_back(std::move(Dst));
1682 Operands.push_back(std::move(Src));
1683 }
1684 else {
1685 Operands.push_back(std::move(Src));
1686 Operands.push_back(std::move(Dst));
1687 }
1688 }
1689
VerifyAndAdjustOperands(OperandVector & OrigOperands,OperandVector & FinalOperands)1690 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1691 OperandVector &FinalOperands) {
1692
1693 if (OrigOperands.size() > 1) {
1694 // Check if sizes match, OrigOperands also contains the instruction name
1695 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1696 "Operand size mismatch");
1697
1698 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1699 // Verify types match
1700 int RegClassID = -1;
1701 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1702 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1703 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1704
1705 if (FinalOp.isReg() &&
1706 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1707 // Return false and let a normal complaint about bogus operands happen
1708 return false;
1709
1710 if (FinalOp.isMem()) {
1711
1712 if (!OrigOp.isMem())
1713 // Return false and let a normal complaint about bogus operands happen
1714 return false;
1715
1716 MCRegister OrigReg = OrigOp.Mem.BaseReg;
1717 MCRegister FinalReg = FinalOp.Mem.BaseReg;
1718
1719 // If we've already encounterd a register class, make sure all register
1720 // bases are of the same register class
1721 if (RegClassID != -1 &&
1722 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1723 return Error(OrigOp.getStartLoc(),
1724 "mismatching source and destination index registers");
1725 }
1726
1727 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1728 RegClassID = X86::GR64RegClassID;
1729 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1730 RegClassID = X86::GR32RegClassID;
1731 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1732 RegClassID = X86::GR16RegClassID;
1733 else
1734 // Unexpected register class type
1735 // Return false and let a normal complaint about bogus operands happen
1736 return false;
1737
1738 bool IsSI = IsSIReg(FinalReg);
1739 FinalReg = GetSIDIForRegClass(RegClassID, IsSI);
1740
1741 if (FinalReg != OrigReg) {
1742 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1743 Warnings.push_back(std::make_pair(
1744 OrigOp.getStartLoc(),
1745 "memory operand is only for determining the size, " + RegName +
1746 " will be used for the location"));
1747 }
1748
1749 FinalOp.Mem.Size = OrigOp.Mem.Size;
1750 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1751 FinalOp.Mem.BaseReg = FinalReg;
1752 }
1753 }
1754
1755 // Produce warnings only if all the operands passed the adjustment - prevent
1756 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1757 for (auto &WarningMsg : Warnings) {
1758 Warning(WarningMsg.first, WarningMsg.second);
1759 }
1760
1761 // Remove old operands
1762 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1763 OrigOperands.pop_back();
1764 }
1765 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1766 for (auto &Op : FinalOperands)
1767 OrigOperands.push_back(std::move(Op));
1768
1769 return false;
1770 }
1771
parseOperand(OperandVector & Operands,StringRef Name)1772 bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
1773 if (isParsingIntelSyntax())
1774 return parseIntelOperand(Operands, Name);
1775
1776 return parseATTOperand(Operands);
1777 }
1778
CreateMemForMSInlineAsm(MCRegister SegReg,const MCExpr * Disp,MCRegister BaseReg,MCRegister IndexReg,unsigned Scale,bool NonAbsMem,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,const InlineAsmIdentifierInfo & Info,OperandVector & Operands)1779 bool X86AsmParser::CreateMemForMSInlineAsm(
1780 MCRegister SegReg, const MCExpr *Disp, MCRegister BaseReg,
1781 MCRegister IndexReg, unsigned Scale, bool NonAbsMem, SMLoc Start, SMLoc End,
1782 unsigned Size, StringRef Identifier, const InlineAsmIdentifierInfo &Info,
1783 OperandVector &Operands) {
1784 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1785 // some other label reference.
1786 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
1787 // Create an absolute memory reference in order to match against
1788 // instructions taking a PC relative operand.
1789 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1790 End, Size, Identifier,
1791 Info.Label.Decl));
1792 return false;
1793 }
1794 // We either have a direct symbol reference, or an offset from a symbol. The
1795 // parser always puts the symbol on the LHS, so look there for size
1796 // calculation purposes.
1797 unsigned FrontendSize = 0;
1798 void *Decl = nullptr;
1799 bool IsGlobalLV = false;
1800 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1801 // Size is in terms of bits in this context.
1802 FrontendSize = Info.Var.Type * 8;
1803 Decl = Info.Var.Decl;
1804 IsGlobalLV = Info.Var.IsGlobalLV;
1805 }
1806 // It is widely common for MS InlineAsm to use a global variable and one/two
1807 // registers in a mmory expression, and though unaccessible via rip/eip.
1808 if (IsGlobalLV) {
1809 if (BaseReg || IndexReg) {
1810 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1811 End, Size, Identifier, Decl, 0,
1812 BaseReg && IndexReg));
1813 return false;
1814 }
1815 if (NonAbsMem)
1816 BaseReg = 1; // Make isAbsMem() false
1817 }
1818 Operands.push_back(X86Operand::CreateMem(
1819 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1820 Size,
1821 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1822 return false;
1823 }
1824
1825 // Some binary bitwise operators have a named synonymous
1826 // Query a candidate string for being such a named operator
1827 // and if so - invoke the appropriate handler
ParseIntelNamedOperator(StringRef Name,IntelExprStateMachine & SM,bool & ParseError,SMLoc & End)1828 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1829 IntelExprStateMachine &SM,
1830 bool &ParseError, SMLoc &End) {
1831 // A named operator should be either lower or upper case, but not a mix...
1832 // except in MASM, which uses full case-insensitivity.
1833 if (Name != Name.lower() && Name != Name.upper() &&
1834 !getParser().isParsingMasm())
1835 return false;
1836 if (Name.equals_insensitive("not")) {
1837 SM.onNot();
1838 } else if (Name.equals_insensitive("or")) {
1839 SM.onOr();
1840 } else if (Name.equals_insensitive("shl")) {
1841 SM.onLShift();
1842 } else if (Name.equals_insensitive("shr")) {
1843 SM.onRShift();
1844 } else if (Name.equals_insensitive("xor")) {
1845 SM.onXor();
1846 } else if (Name.equals_insensitive("and")) {
1847 SM.onAnd();
1848 } else if (Name.equals_insensitive("mod")) {
1849 SM.onMod();
1850 } else if (Name.equals_insensitive("offset")) {
1851 SMLoc OffsetLoc = getTok().getLoc();
1852 const MCExpr *Val = nullptr;
1853 StringRef ID;
1854 InlineAsmIdentifierInfo Info;
1855 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1856 if (ParseError)
1857 return true;
1858 StringRef ErrMsg;
1859 ParseError =
1860 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1861 if (ParseError)
1862 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1863 } else {
1864 return false;
1865 }
1866 if (!Name.equals_insensitive("offset"))
1867 End = consumeToken();
1868 return true;
1869 }
ParseMasmNamedOperator(StringRef Name,IntelExprStateMachine & SM,bool & ParseError,SMLoc & End)1870 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1871 IntelExprStateMachine &SM,
1872 bool &ParseError, SMLoc &End) {
1873 if (Name.equals_insensitive("eq")) {
1874 SM.onEq();
1875 } else if (Name.equals_insensitive("ne")) {
1876 SM.onNE();
1877 } else if (Name.equals_insensitive("lt")) {
1878 SM.onLT();
1879 } else if (Name.equals_insensitive("le")) {
1880 SM.onLE();
1881 } else if (Name.equals_insensitive("gt")) {
1882 SM.onGT();
1883 } else if (Name.equals_insensitive("ge")) {
1884 SM.onGE();
1885 } else {
1886 return false;
1887 }
1888 End = consumeToken();
1889 return true;
1890 }
1891
1892 // Check if current intel expression append after an operand.
1893 // Like: [Operand][Intel Expression]
tryParseOperandIdx(AsmToken::TokenKind PrevTK,IntelExprStateMachine & SM)1894 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1895 IntelExprStateMachine &SM) {
1896 if (PrevTK != AsmToken::RBrac)
1897 return;
1898
1899 SM.setAppendAfterOperand();
1900 }
1901
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1902 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1903 MCAsmParser &Parser = getParser();
1904 StringRef ErrMsg;
1905
1906 AsmToken::TokenKind PrevTK = AsmToken::Error;
1907
1908 if (getContext().getObjectFileInfo()->isPositionIndependent())
1909 SM.setPIC();
1910
1911 bool Done = false;
1912 while (!Done) {
1913 // Get a fresh reference on each loop iteration in case the previous
1914 // iteration moved the token storage during UnLex().
1915 const AsmToken &Tok = Parser.getTok();
1916
1917 bool UpdateLocLex = true;
1918 AsmToken::TokenKind TK = getLexer().getKind();
1919
1920 switch (TK) {
1921 default:
1922 if ((Done = SM.isValidEndState()))
1923 break;
1924 return Error(Tok.getLoc(), "unknown token in expression");
1925 case AsmToken::Error:
1926 return Error(getLexer().getErrLoc(), getLexer().getErr());
1927 break;
1928 case AsmToken::Real:
1929 // DotOperator: [ebx].0
1930 UpdateLocLex = false;
1931 if (ParseIntelDotOperator(SM, End))
1932 return true;
1933 break;
1934 case AsmToken::Dot:
1935 if (!Parser.isParsingMasm()) {
1936 if ((Done = SM.isValidEndState()))
1937 break;
1938 return Error(Tok.getLoc(), "unknown token in expression");
1939 }
1940 // MASM allows spaces around the dot operator (e.g., "var . x")
1941 Lex();
1942 UpdateLocLex = false;
1943 if (ParseIntelDotOperator(SM, End))
1944 return true;
1945 break;
1946 case AsmToken::Dollar:
1947 if (!Parser.isParsingMasm()) {
1948 if ((Done = SM.isValidEndState()))
1949 break;
1950 return Error(Tok.getLoc(), "unknown token in expression");
1951 }
1952 [[fallthrough]];
1953 case AsmToken::String: {
1954 if (Parser.isParsingMasm()) {
1955 // MASM parsers handle strings in expressions as constants.
1956 SMLoc ValueLoc = Tok.getLoc();
1957 int64_t Res;
1958 const MCExpr *Val;
1959 if (Parser.parsePrimaryExpr(Val, End, nullptr))
1960 return true;
1961 UpdateLocLex = false;
1962 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1963 return Error(ValueLoc, "expected absolute value");
1964 if (SM.onInteger(Res, ErrMsg))
1965 return Error(ValueLoc, ErrMsg);
1966 break;
1967 }
1968 [[fallthrough]];
1969 }
1970 case AsmToken::At:
1971 case AsmToken::Identifier: {
1972 SMLoc IdentLoc = Tok.getLoc();
1973 StringRef Identifier = Tok.getString();
1974 UpdateLocLex = false;
1975 if (Parser.isParsingMasm()) {
1976 size_t DotOffset = Identifier.find_first_of('.');
1977 if (DotOffset != StringRef::npos) {
1978 consumeToken();
1979 StringRef LHS = Identifier.slice(0, DotOffset);
1980 StringRef Dot = Identifier.substr(DotOffset, 1);
1981 StringRef RHS = Identifier.substr(DotOffset + 1);
1982 if (!RHS.empty()) {
1983 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1984 }
1985 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1986 if (!LHS.empty()) {
1987 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1988 }
1989 break;
1990 }
1991 }
1992 // (MASM only) <TYPE> PTR operator
1993 if (Parser.isParsingMasm()) {
1994 const AsmToken &NextTok = getLexer().peekTok();
1995 if (NextTok.is(AsmToken::Identifier) &&
1996 NextTok.getIdentifier().equals_insensitive("ptr")) {
1997 AsmTypeInfo Info;
1998 if (Parser.lookUpType(Identifier, Info))
1999 return Error(Tok.getLoc(), "unknown type");
2000 SM.onCast(Info);
2001 // Eat type and PTR.
2002 consumeToken();
2003 End = consumeToken();
2004 break;
2005 }
2006 }
2007 // Register, or (MASM only) <register>.<field>
2008 MCRegister Reg;
2009 if (Tok.is(AsmToken::Identifier)) {
2010 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
2011 if (SM.onRegister(Reg, ErrMsg))
2012 return Error(IdentLoc, ErrMsg);
2013 break;
2014 }
2015 if (Parser.isParsingMasm()) {
2016 const std::pair<StringRef, StringRef> IDField =
2017 Tok.getString().split('.');
2018 const StringRef ID = IDField.first, Field = IDField.second;
2019 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
2020 if (!Field.empty() &&
2021 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
2022 if (SM.onRegister(Reg, ErrMsg))
2023 return Error(IdentLoc, ErrMsg);
2024
2025 AsmFieldInfo Info;
2026 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
2027 if (Parser.lookUpField(Field, Info))
2028 return Error(FieldStartLoc, "unknown offset");
2029 else if (SM.onPlus(ErrMsg))
2030 return Error(getTok().getLoc(), ErrMsg);
2031 else if (SM.onInteger(Info.Offset, ErrMsg))
2032 return Error(IdentLoc, ErrMsg);
2033 SM.setTypeInfo(Info.Type);
2034
2035 End = consumeToken();
2036 break;
2037 }
2038 }
2039 }
2040 // Operator synonymous ("not", "or" etc.)
2041 bool ParseError = false;
2042 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
2043 if (ParseError)
2044 return true;
2045 break;
2046 }
2047 if (Parser.isParsingMasm() &&
2048 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
2049 if (ParseError)
2050 return true;
2051 break;
2052 }
2053 // Symbol reference, when parsing assembly content
2054 InlineAsmIdentifierInfo Info;
2055 AsmFieldInfo FieldInfo;
2056 const MCExpr *Val;
2057 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
2058 // MS Dot Operator expression
2059 if (Identifier.contains('.') &&
2060 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
2061 if (ParseIntelDotOperator(SM, End))
2062 return true;
2063 break;
2064 }
2065 }
2066 if (isParsingMSInlineAsm()) {
2067 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2068 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2069 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2070 if (SM.onInteger(Val, ErrMsg))
2071 return Error(IdentLoc, ErrMsg);
2072 } else {
2073 return true;
2074 }
2075 break;
2076 }
2077 // MS InlineAsm identifier
2078 // Call parseIdentifier() to combine @ with the identifier behind it.
2079 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2080 return Error(IdentLoc, "expected identifier");
2081 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2082 return true;
2083 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2084 true, ErrMsg))
2085 return Error(IdentLoc, ErrMsg);
2086 break;
2087 }
2088 if (Parser.isParsingMasm()) {
2089 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2090 int64_t Val;
2091 if (ParseMasmOperator(OpKind, Val))
2092 return true;
2093 if (SM.onInteger(Val, ErrMsg))
2094 return Error(IdentLoc, ErrMsg);
2095 break;
2096 }
2097 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2098 // Field offset immediate; <TYPE>.<field specification>
2099 Lex(); // eat type
2100 bool EndDot = parseOptionalToken(AsmToken::Dot);
2101 while (EndDot || (getTok().is(AsmToken::Identifier) &&
2102 getTok().getString().starts_with("."))) {
2103 getParser().parseIdentifier(Identifier);
2104 if (!EndDot)
2105 Identifier.consume_front(".");
2106 EndDot = Identifier.consume_back(".");
2107 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2108 FieldInfo)) {
2109 SMLoc IDEnd =
2110 SMLoc::getFromPointer(Identifier.data() + Identifier.size());
2111 return Error(IdentLoc, "Unable to lookup field reference!",
2112 SMRange(IdentLoc, IDEnd));
2113 }
2114 if (!EndDot)
2115 EndDot = parseOptionalToken(AsmToken::Dot);
2116 }
2117 if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2118 return Error(IdentLoc, ErrMsg);
2119 break;
2120 }
2121 }
2122 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2123 return Error(Tok.getLoc(), "Unexpected identifier!");
2124 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2125 false, ErrMsg)) {
2126 return Error(IdentLoc, ErrMsg);
2127 }
2128 break;
2129 }
2130 case AsmToken::Integer: {
2131 // Look for 'b' or 'f' following an Integer as a directional label
2132 SMLoc Loc = getTok().getLoc();
2133 int64_t IntVal = getTok().getIntVal();
2134 End = consumeToken();
2135 UpdateLocLex = false;
2136 if (getLexer().getKind() == AsmToken::Identifier) {
2137 StringRef IDVal = getTok().getString();
2138 if (IDVal == "f" || IDVal == "b") {
2139 MCSymbol *Sym =
2140 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2141 auto Variant = X86::S_None;
2142 const MCExpr *Val =
2143 MCSymbolRefExpr::create(Sym, Variant, getContext());
2144 if (IDVal == "b" && Sym->isUndefined())
2145 return Error(Loc, "invalid reference to undefined symbol");
2146 StringRef Identifier = Sym->getName();
2147 InlineAsmIdentifierInfo Info;
2148 AsmTypeInfo Type;
2149 if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2150 isParsingMSInlineAsm(), ErrMsg))
2151 return Error(Loc, ErrMsg);
2152 End = consumeToken();
2153 } else {
2154 if (SM.onInteger(IntVal, ErrMsg))
2155 return Error(Loc, ErrMsg);
2156 }
2157 } else {
2158 if (SM.onInteger(IntVal, ErrMsg))
2159 return Error(Loc, ErrMsg);
2160 }
2161 break;
2162 }
2163 case AsmToken::Plus:
2164 if (SM.onPlus(ErrMsg))
2165 return Error(getTok().getLoc(), ErrMsg);
2166 break;
2167 case AsmToken::Minus:
2168 if (SM.onMinus(ErrMsg))
2169 return Error(getTok().getLoc(), ErrMsg);
2170 break;
2171 case AsmToken::Tilde: SM.onNot(); break;
2172 case AsmToken::Star: SM.onStar(); break;
2173 case AsmToken::Slash: SM.onDivide(); break;
2174 case AsmToken::Percent: SM.onMod(); break;
2175 case AsmToken::Pipe: SM.onOr(); break;
2176 case AsmToken::Caret: SM.onXor(); break;
2177 case AsmToken::Amp: SM.onAnd(); break;
2178 case AsmToken::LessLess:
2179 SM.onLShift(); break;
2180 case AsmToken::GreaterGreater:
2181 SM.onRShift(); break;
2182 case AsmToken::LBrac:
2183 if (SM.onLBrac())
2184 return Error(Tok.getLoc(), "unexpected bracket encountered");
2185 tryParseOperandIdx(PrevTK, SM);
2186 break;
2187 case AsmToken::RBrac:
2188 if (SM.onRBrac(ErrMsg)) {
2189 return Error(Tok.getLoc(), ErrMsg);
2190 }
2191 break;
2192 case AsmToken::LParen: SM.onLParen(); break;
2193 case AsmToken::RParen:
2194 if (SM.onRParen(ErrMsg)) {
2195 return Error(Tok.getLoc(), ErrMsg);
2196 }
2197 break;
2198 }
2199 if (SM.hadError())
2200 return Error(Tok.getLoc(), "unknown token in expression");
2201
2202 if (!Done && UpdateLocLex)
2203 End = consumeToken();
2204
2205 PrevTK = TK;
2206 }
2207 return false;
2208 }
2209
RewriteIntelExpression(IntelExprStateMachine & SM,SMLoc Start,SMLoc End)2210 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2211 SMLoc Start, SMLoc End) {
2212 SMLoc Loc = Start;
2213 unsigned ExprLen = End.getPointer() - Start.getPointer();
2214 // Skip everything before a symbol displacement (if we have one)
2215 if (SM.getSym() && !SM.isOffsetOperator()) {
2216 StringRef SymName = SM.getSymName();
2217 if (unsigned Len = SymName.data() - Start.getPointer())
2218 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2219 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2220 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2221 // If we have only a symbol than there's no need for complex rewrite,
2222 // simply skip everything after it
2223 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2224 if (ExprLen)
2225 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2226 return;
2227 }
2228 }
2229 // Build an Intel Expression rewrite
2230 StringRef BaseRegStr;
2231 StringRef IndexRegStr;
2232 StringRef OffsetNameStr;
2233 if (SM.getBaseReg())
2234 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2235 if (SM.getIndexReg())
2236 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2237 if (SM.isOffsetOperator())
2238 OffsetNameStr = SM.getSymName();
2239 // Emit it
2240 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2241 SM.getImm(), SM.isMemExpr());
2242 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2243 }
2244
2245 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelInlineAsmIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End,bool IsParsingOffsetOperator)2246 bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2247 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2248 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2249 MCAsmParser &Parser = getParser();
2250 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2251 Val = nullptr;
2252
2253 StringRef LineBuf(Identifier.data());
2254 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2255
2256 const AsmToken &Tok = Parser.getTok();
2257 SMLoc Loc = Tok.getLoc();
2258
2259 // Advance the token stream until the end of the current token is
2260 // after the end of what the frontend claimed.
2261 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2262 do {
2263 End = Tok.getEndLoc();
2264 getLexer().Lex();
2265 } while (End.getPointer() < EndPtr);
2266 Identifier = LineBuf;
2267
2268 // The frontend should end parsing on an assembler token boundary, unless it
2269 // failed parsing.
2270 assert((End.getPointer() == EndPtr ||
2271 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
2272 "frontend claimed part of a token?");
2273
2274 // If the identifier lookup was unsuccessful, assume that we are dealing with
2275 // a label.
2276 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
2277 StringRef InternalName =
2278 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2279 Loc, false);
2280 assert(InternalName.size() && "We should have an internal name here.");
2281 // Push a rewrite for replacing the identifier name with the internal name,
2282 // unless we are parsing the operand of an offset operator
2283 if (!IsParsingOffsetOperator)
2284 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2285 InternalName);
2286 else
2287 Identifier = InternalName;
2288 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2289 return false;
2290 // Create the symbol reference.
2291 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2292 auto Variant = X86::S_None;
2293 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2294 return false;
2295 }
2296
2297 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
ParseRoundingModeOp(SMLoc Start,OperandVector & Operands)2298 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2299 MCAsmParser &Parser = getParser();
2300 const AsmToken &Tok = Parser.getTok();
2301 // Eat "{" and mark the current place.
2302 const SMLoc consumedToken = consumeToken();
2303 if (Tok.isNot(AsmToken::Identifier))
2304 return Error(Tok.getLoc(), "Expected an identifier after {");
2305 if (Tok.getIdentifier().starts_with("r")) {
2306 int rndMode = StringSwitch<int>(Tok.getIdentifier())
2307 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
2308 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
2309 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
2310 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
2311 .Default(-1);
2312 if (-1 == rndMode)
2313 return Error(Tok.getLoc(), "Invalid rounding mode.");
2314 Parser.Lex(); // Eat "r*" of r*-sae
2315 if (!getLexer().is(AsmToken::Minus))
2316 return Error(Tok.getLoc(), "Expected - at this point");
2317 Parser.Lex(); // Eat "-"
2318 Parser.Lex(); // Eat the sae
2319 if (!getLexer().is(AsmToken::RCurly))
2320 return Error(Tok.getLoc(), "Expected } at this point");
2321 SMLoc End = Tok.getEndLoc();
2322 Parser.Lex(); // Eat "}"
2323 const MCExpr *RndModeOp =
2324 MCConstantExpr::create(rndMode, Parser.getContext());
2325 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2326 return false;
2327 }
2328 if (Tok.getIdentifier() == "sae") {
2329 Parser.Lex(); // Eat the sae
2330 if (!getLexer().is(AsmToken::RCurly))
2331 return Error(Tok.getLoc(), "Expected } at this point");
2332 Parser.Lex(); // Eat "}"
2333 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2334 return false;
2335 }
2336 return Error(Tok.getLoc(), "unknown token in expression");
2337 }
2338
2339 /// Parse condtional flags for CCMP/CTEST, e.g {dfv=of,sf,zf,cf} right after
2340 /// mnemonic.
parseCFlagsOp(OperandVector & Operands)2341 bool X86AsmParser::parseCFlagsOp(OperandVector &Operands) {
2342 MCAsmParser &Parser = getParser();
2343 AsmToken Tok = Parser.getTok();
2344 const SMLoc Start = Tok.getLoc();
2345 if (!Tok.is(AsmToken::LCurly))
2346 return Error(Tok.getLoc(), "Expected { at this point");
2347 Parser.Lex(); // Eat "{"
2348 Tok = Parser.getTok();
2349 if (Tok.getIdentifier().lower() != "dfv")
2350 return Error(Tok.getLoc(), "Expected dfv at this point");
2351 Parser.Lex(); // Eat "dfv"
2352 Tok = Parser.getTok();
2353 if (!Tok.is(AsmToken::Equal))
2354 return Error(Tok.getLoc(), "Expected = at this point");
2355 Parser.Lex(); // Eat "="
2356
2357 Tok = Parser.getTok();
2358 SMLoc End;
2359 if (Tok.is(AsmToken::RCurly)) {
2360 End = Tok.getEndLoc();
2361 Operands.push_back(X86Operand::CreateImm(
2362 MCConstantExpr::create(0, Parser.getContext()), Start, End));
2363 Parser.Lex(); // Eat "}"
2364 return false;
2365 }
2366 unsigned CFlags = 0;
2367 for (unsigned I = 0; I < 4; ++I) {
2368 Tok = Parser.getTok();
2369 unsigned CFlag = StringSwitch<unsigned>(Tok.getIdentifier().lower())
2370 .Case("of", 0x8)
2371 .Case("sf", 0x4)
2372 .Case("zf", 0x2)
2373 .Case("cf", 0x1)
2374 .Default(~0U);
2375 if (CFlag == ~0U)
2376 return Error(Tok.getLoc(), "Invalid conditional flags");
2377
2378 if (CFlags & CFlag)
2379 return Error(Tok.getLoc(), "Duplicated conditional flag");
2380 CFlags |= CFlag;
2381
2382 Parser.Lex(); // Eat one conditional flag
2383 Tok = Parser.getTok();
2384 if (Tok.is(AsmToken::RCurly)) {
2385 End = Tok.getEndLoc();
2386 Operands.push_back(X86Operand::CreateImm(
2387 MCConstantExpr::create(CFlags, Parser.getContext()), Start, End));
2388 Parser.Lex(); // Eat "}"
2389 return false;
2390 } else if (I == 3) {
2391 return Error(Tok.getLoc(), "Expected } at this point");
2392 } else if (Tok.isNot(AsmToken::Comma)) {
2393 return Error(Tok.getLoc(), "Expected } or , at this point");
2394 }
2395 Parser.Lex(); // Eat ","
2396 }
2397 llvm_unreachable("Unexpected control flow");
2398 }
2399
2400 /// Parse the '.' operator.
ParseIntelDotOperator(IntelExprStateMachine & SM,SMLoc & End)2401 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2402 SMLoc &End) {
2403 const AsmToken &Tok = getTok();
2404 AsmFieldInfo Info;
2405
2406 // Drop the optional '.'.
2407 StringRef DotDispStr = Tok.getString();
2408 DotDispStr.consume_front(".");
2409 bool TrailingDot = false;
2410
2411 // .Imm gets lexed as a real.
2412 if (Tok.is(AsmToken::Real)) {
2413 APInt DotDisp;
2414 if (DotDispStr.getAsInteger(10, DotDisp))
2415 return Error(Tok.getLoc(), "Unexpected offset");
2416 Info.Offset = DotDisp.getZExtValue();
2417 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2418 Tok.is(AsmToken::Identifier)) {
2419 TrailingDot = DotDispStr.consume_back(".");
2420 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2421 const StringRef Base = BaseMember.first, Member = BaseMember.second;
2422 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2423 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2424 getParser().lookUpField(DotDispStr, Info) &&
2425 (!SemaCallback ||
2426 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2427 return Error(Tok.getLoc(), "Unable to lookup field reference!");
2428 } else {
2429 return Error(Tok.getLoc(), "Unexpected token type!");
2430 }
2431
2432 // Eat the DotExpression and update End
2433 End = SMLoc::getFromPointer(DotDispStr.data());
2434 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2435 while (Tok.getLoc().getPointer() < DotExprEndLoc)
2436 Lex();
2437 if (TrailingDot)
2438 getLexer().UnLex(AsmToken(AsmToken::Dot, "."));
2439 SM.addImm(Info.Offset);
2440 SM.setTypeInfo(Info.Type);
2441 return false;
2442 }
2443
2444 /// Parse the 'offset' operator.
2445 /// This operator is used to specify the location of a given operand
ParseIntelOffsetOperator(const MCExpr * & Val,StringRef & ID,InlineAsmIdentifierInfo & Info,SMLoc & End)2446 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2447 InlineAsmIdentifierInfo &Info,
2448 SMLoc &End) {
2449 // Eat offset, mark start of identifier.
2450 SMLoc Start = Lex().getLoc();
2451 ID = getTok().getString();
2452 if (!isParsingMSInlineAsm()) {
2453 if ((getTok().isNot(AsmToken::Identifier) &&
2454 getTok().isNot(AsmToken::String)) ||
2455 getParser().parsePrimaryExpr(Val, End, nullptr))
2456 return Error(Start, "unexpected token!");
2457 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2458 return Error(Start, "unable to lookup expression");
2459 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2460 return Error(Start, "offset operator cannot yet handle constants");
2461 }
2462 return false;
2463 }
2464
2465 // Query a candidate string for being an Intel assembly operator
2466 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyIntelInlineAsmOperator(StringRef Name)2467 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2468 return StringSwitch<unsigned>(Name)
2469 .Cases("TYPE","type",IOK_TYPE)
2470 .Cases("SIZE","size",IOK_SIZE)
2471 .Cases("LENGTH","length",IOK_LENGTH)
2472 .Default(IOK_INVALID);
2473 }
2474
2475 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2476 /// returns the number of elements in an array. It returns the value 1 for
2477 /// non-array variables. The SIZE operator returns the size of a C or C++
2478 /// variable. A variable's size is the product of its LENGTH and TYPE. The
2479 /// TYPE operator returns the size of a C or C++ type or variable. If the
2480 /// variable is an array, TYPE returns the size of a single element.
ParseIntelInlineAsmOperator(unsigned OpKind)2481 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2482 MCAsmParser &Parser = getParser();
2483 const AsmToken &Tok = Parser.getTok();
2484 Parser.Lex(); // Eat operator.
2485
2486 const MCExpr *Val = nullptr;
2487 InlineAsmIdentifierInfo Info;
2488 SMLoc Start = Tok.getLoc(), End;
2489 StringRef Identifier = Tok.getString();
2490 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2491 /*IsUnevaluatedOperand=*/true, End))
2492 return 0;
2493
2494 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2495 Error(Start, "unable to lookup expression");
2496 return 0;
2497 }
2498
2499 unsigned CVal = 0;
2500 switch(OpKind) {
2501 default: llvm_unreachable("Unexpected operand kind!");
2502 case IOK_LENGTH: CVal = Info.Var.Length; break;
2503 case IOK_SIZE: CVal = Info.Var.Size; break;
2504 case IOK_TYPE: CVal = Info.Var.Type; break;
2505 }
2506
2507 return CVal;
2508 }
2509
2510 // Query a candidate string for being an Intel assembly operator
2511 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyMasmOperator(StringRef Name)2512 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2513 return StringSwitch<unsigned>(Name.lower())
2514 .Case("type", MOK_TYPE)
2515 .Cases("size", "sizeof", MOK_SIZEOF)
2516 .Cases("length", "lengthof", MOK_LENGTHOF)
2517 .Default(MOK_INVALID);
2518 }
2519
2520 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2521 /// returns the number of elements in an array. It returns the value 1 for
2522 /// non-array variables. The SIZEOF operator returns the size of a type or
2523 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2524 /// The TYPE operator returns the size of a variable. If the variable is an
2525 /// array, TYPE returns the size of a single element.
ParseMasmOperator(unsigned OpKind,int64_t & Val)2526 bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2527 MCAsmParser &Parser = getParser();
2528 SMLoc OpLoc = Parser.getTok().getLoc();
2529 Parser.Lex(); // Eat operator.
2530
2531 Val = 0;
2532 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2533 // Check for SIZEOF(<type>) and TYPE(<type>).
2534 bool InParens = Parser.getTok().is(AsmToken::LParen);
2535 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2536 AsmTypeInfo Type;
2537 if (IDTok.is(AsmToken::Identifier) &&
2538 !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2539 Val = Type.Size;
2540
2541 // Eat tokens.
2542 if (InParens)
2543 parseToken(AsmToken::LParen);
2544 parseToken(AsmToken::Identifier);
2545 if (InParens)
2546 parseToken(AsmToken::RParen);
2547 }
2548 }
2549
2550 if (!Val) {
2551 IntelExprStateMachine SM;
2552 SMLoc End, Start = Parser.getTok().getLoc();
2553 if (ParseIntelExpression(SM, End))
2554 return true;
2555
2556 switch (OpKind) {
2557 default:
2558 llvm_unreachable("Unexpected operand kind!");
2559 case MOK_SIZEOF:
2560 Val = SM.getSize();
2561 break;
2562 case MOK_LENGTHOF:
2563 Val = SM.getLength();
2564 break;
2565 case MOK_TYPE:
2566 Val = SM.getElementSize();
2567 break;
2568 }
2569
2570 if (!Val)
2571 return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2572 }
2573
2574 return false;
2575 }
2576
ParseIntelMemoryOperandSize(unsigned & Size)2577 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
2578 Size = StringSwitch<unsigned>(getTok().getString())
2579 .Cases("BYTE", "byte", 8)
2580 .Cases("WORD", "word", 16)
2581 .Cases("DWORD", "dword", 32)
2582 .Cases("FLOAT", "float", 32)
2583 .Cases("LONG", "long", 32)
2584 .Cases("FWORD", "fword", 48)
2585 .Cases("DOUBLE", "double", 64)
2586 .Cases("QWORD", "qword", 64)
2587 .Cases("MMWORD","mmword", 64)
2588 .Cases("XWORD", "xword", 80)
2589 .Cases("TBYTE", "tbyte", 80)
2590 .Cases("XMMWORD", "xmmword", 128)
2591 .Cases("YMMWORD", "ymmword", 256)
2592 .Cases("ZMMWORD", "zmmword", 512)
2593 .Default(0);
2594 if (Size) {
2595 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2596 if (!(Tok.getString() == "PTR" || Tok.getString() == "ptr"))
2597 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2598 Lex(); // Eat ptr.
2599 }
2600 return false;
2601 }
2602
parseIntelOperand(OperandVector & Operands,StringRef Name)2603 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
2604 MCAsmParser &Parser = getParser();
2605 const AsmToken &Tok = Parser.getTok();
2606 SMLoc Start, End;
2607
2608 // Parse optional Size directive.
2609 unsigned Size;
2610 if (ParseIntelMemoryOperandSize(Size))
2611 return true;
2612 bool PtrInOperand = bool(Size);
2613
2614 Start = Tok.getLoc();
2615
2616 // Rounding mode operand.
2617 if (getLexer().is(AsmToken::LCurly))
2618 return ParseRoundingModeOp(Start, Operands);
2619
2620 // Register operand.
2621 MCRegister RegNo;
2622 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) {
2623 if (RegNo == X86::RIP)
2624 return Error(Start, "rip can only be used as a base register");
2625 // A Register followed by ':' is considered a segment override
2626 if (Tok.isNot(AsmToken::Colon)) {
2627 if (PtrInOperand)
2628 return Error(Start, "expected memory operand after 'ptr', "
2629 "found register operand instead");
2630 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2631 return false;
2632 }
2633 // An alleged segment override. check if we have a valid segment register
2634 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2635 return Error(Start, "invalid segment register");
2636 // Eat ':' and update Start location
2637 Start = Lex().getLoc();
2638 }
2639
2640 // Immediates and Memory
2641 IntelExprStateMachine SM;
2642 if (ParseIntelExpression(SM, End))
2643 return true;
2644
2645 if (isParsingMSInlineAsm())
2646 RewriteIntelExpression(SM, Start, Tok.getLoc());
2647
2648 int64_t Imm = SM.getImm();
2649 const MCExpr *Disp = SM.getSym();
2650 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2651 if (Disp && Imm)
2652 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2653 if (!Disp)
2654 Disp = ImmDisp;
2655
2656 // RegNo != 0 specifies a valid segment register,
2657 // and we are parsing a segment override
2658 if (!SM.isMemExpr() && !RegNo) {
2659 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2660 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2661 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2662 // Disp includes the address of a variable; make sure this is recorded
2663 // for later handling.
2664 Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2665 SM.getSymName(), Info.Var.Decl,
2666 Info.Var.IsGlobalLV));
2667 return false;
2668 }
2669 }
2670
2671 Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2672 return false;
2673 }
2674
2675 StringRef ErrMsg;
2676 MCRegister BaseReg = SM.getBaseReg();
2677 MCRegister IndexReg = SM.getIndexReg();
2678 if (IndexReg && BaseReg == X86::RIP)
2679 BaseReg = MCRegister();
2680 unsigned Scale = SM.getScale();
2681 if (!PtrInOperand)
2682 Size = SM.getElementSize() << 3;
2683
2684 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2685 (IndexReg == X86::ESP || IndexReg == X86::RSP))
2686 std::swap(BaseReg, IndexReg);
2687
2688 // If BaseReg is a vector register and IndexReg is not, swap them unless
2689 // Scale was specified in which case it would be an error.
2690 if (Scale == 0 &&
2691 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2692 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2693 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2694 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2695 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2696 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2697 std::swap(BaseReg, IndexReg);
2698
2699 if (Scale != 0 &&
2700 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2701 return Error(Start, "16-bit addresses cannot have a scale");
2702
2703 // If there was no explicit scale specified, change it to 1.
2704 if (Scale == 0)
2705 Scale = 1;
2706
2707 // If this is a 16-bit addressing mode with the base and index in the wrong
2708 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2709 // shared with att syntax where order matters.
2710 if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2711 (IndexReg == X86::BX || IndexReg == X86::BP))
2712 std::swap(BaseReg, IndexReg);
2713
2714 if ((BaseReg || IndexReg) &&
2715 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2716 ErrMsg))
2717 return Error(Start, ErrMsg);
2718 bool IsUnconditionalBranch =
2719 Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2720 if (isParsingMSInlineAsm())
2721 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale,
2722 IsUnconditionalBranch && is64BitMode(),
2723 Start, End, Size, SM.getSymName(),
2724 SM.getIdentifierInfo(), Operands);
2725
2726 // When parsing x64 MS-style assembly, all non-absolute references to a named
2727 // variable default to RIP-relative.
2728 MCRegister DefaultBaseReg;
2729 bool MaybeDirectBranchDest = true;
2730
2731 if (Parser.isParsingMasm()) {
2732 if (is64BitMode() &&
2733 ((PtrInOperand && !IndexReg) || SM.getElementSize() > 0)) {
2734 DefaultBaseReg = X86::RIP;
2735 }
2736 if (IsUnconditionalBranch) {
2737 if (PtrInOperand) {
2738 MaybeDirectBranchDest = false;
2739 if (is64BitMode())
2740 DefaultBaseReg = X86::RIP;
2741 } else if (!BaseReg && !IndexReg && Disp &&
2742 Disp->getKind() == MCExpr::SymbolRef) {
2743 if (is64BitMode()) {
2744 if (SM.getSize() == 8) {
2745 MaybeDirectBranchDest = false;
2746 DefaultBaseReg = X86::RIP;
2747 }
2748 } else {
2749 if (SM.getSize() == 4 || SM.getSize() == 2)
2750 MaybeDirectBranchDest = false;
2751 }
2752 }
2753 }
2754 } else if (IsUnconditionalBranch) {
2755 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error.
2756 if (!PtrInOperand && SM.isOffsetOperator())
2757 return Error(
2758 Start, "`OFFSET` operator cannot be used in an unconditional branch");
2759 if (PtrInOperand || SM.isBracketUsed())
2760 MaybeDirectBranchDest = false;
2761 }
2762
2763 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg))
2764 Operands.push_back(X86Operand::CreateMem(
2765 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2766 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2767 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
2768 else
2769 Operands.push_back(X86Operand::CreateMem(
2770 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2771 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2772 MaybeDirectBranchDest));
2773 return false;
2774 }
2775
parseATTOperand(OperandVector & Operands)2776 bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
2777 MCAsmParser &Parser = getParser();
2778 switch (getLexer().getKind()) {
2779 case AsmToken::Dollar: {
2780 // $42 or $ID -> immediate.
2781 SMLoc Start = Parser.getTok().getLoc(), End;
2782 Parser.Lex();
2783 const MCExpr *Val;
2784 // This is an immediate, so we should not parse a register. Do a precheck
2785 // for '%' to supercede intra-register parse errors.
2786 SMLoc L = Parser.getTok().getLoc();
2787 if (check(getLexer().is(AsmToken::Percent), L,
2788 "expected immediate expression") ||
2789 getParser().parseExpression(Val, End) ||
2790 check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2791 return true;
2792 Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2793 return false;
2794 }
2795 case AsmToken::LCurly: {
2796 SMLoc Start = Parser.getTok().getLoc();
2797 return ParseRoundingModeOp(Start, Operands);
2798 }
2799 default: {
2800 // This a memory operand or a register. We have some parsing complications
2801 // as a '(' may be part of an immediate expression or the addressing mode
2802 // block. This is complicated by the fact that an assembler-level variable
2803 // may refer either to a register or an immediate expression.
2804
2805 SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2806 const MCExpr *Expr = nullptr;
2807 MCRegister Reg;
2808 if (getLexer().isNot(AsmToken::LParen)) {
2809 // No '(' so this is either a displacement expression or a register.
2810 if (Parser.parseExpression(Expr, EndLoc))
2811 return true;
2812 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2813 // Segment Register. Reset Expr and copy value to register.
2814 Expr = nullptr;
2815 Reg = RE->getReg();
2816
2817 // Check the register.
2818 if (Reg == X86::EIZ || Reg == X86::RIZ)
2819 return Error(
2820 Loc, "%eiz and %riz can only be used as index registers",
2821 SMRange(Loc, EndLoc));
2822 if (Reg == X86::RIP)
2823 return Error(Loc, "%rip can only be used as a base register",
2824 SMRange(Loc, EndLoc));
2825 // Return register that are not segment prefixes immediately.
2826 if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2827 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2828 return false;
2829 }
2830 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2831 return Error(Loc, "invalid segment register");
2832 // Accept a '*' absolute memory reference after the segment. Place it
2833 // before the full memory operand.
2834 if (getLexer().is(AsmToken::Star))
2835 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2836 }
2837 }
2838 // This is a Memory operand.
2839 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2840 }
2841 }
2842 }
2843
2844 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2845 // otherwise the EFLAGS Condition Code enumerator.
ParseConditionCode(StringRef CC)2846 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2847 return StringSwitch<X86::CondCode>(CC)
2848 .Case("o", X86::COND_O) // Overflow
2849 .Case("no", X86::COND_NO) // No Overflow
2850 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
2851 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2852 .Cases("e", "z", X86::COND_E) // Equal/Zero
2853 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2854 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2855 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
2856 .Case("s", X86::COND_S) // Sign
2857 .Case("ns", X86::COND_NS) // No Sign
2858 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
2859 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2860 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
2861 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2862 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2863 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
2864 .Default(X86::COND_INVALID);
2865 }
2866
2867 // true on failure, false otherwise
2868 // If no {z} mark was found - Parser doesn't advance
ParseZ(std::unique_ptr<X86Operand> & Z,const SMLoc & StartLoc)2869 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2870 const SMLoc &StartLoc) {
2871 MCAsmParser &Parser = getParser();
2872 // Assuming we are just pass the '{' mark, quering the next token
2873 // Searched for {z}, but none was found. Return false, as no parsing error was
2874 // encountered
2875 if (!(getLexer().is(AsmToken::Identifier) &&
2876 (getLexer().getTok().getIdentifier() == "z")))
2877 return false;
2878 Parser.Lex(); // Eat z
2879 // Query and eat the '}' mark
2880 if (!getLexer().is(AsmToken::RCurly))
2881 return Error(getLexer().getLoc(), "Expected } at this point");
2882 Parser.Lex(); // Eat '}'
2883 // Assign Z with the {z} mark operand
2884 Z = X86Operand::CreateToken("{z}", StartLoc);
2885 return false;
2886 }
2887
2888 // true on failure, false otherwise
HandleAVX512Operand(OperandVector & Operands)2889 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2890 MCAsmParser &Parser = getParser();
2891 if (getLexer().is(AsmToken::LCurly)) {
2892 // Eat "{" and mark the current place.
2893 const SMLoc consumedToken = consumeToken();
2894 // Distinguish {1to<NUM>} from {%k<NUM>}.
2895 if(getLexer().is(AsmToken::Integer)) {
2896 // Parse memory broadcasting ({1to<NUM>}).
2897 if (getLexer().getTok().getIntVal() != 1)
2898 return TokError("Expected 1to<NUM> at this point");
2899 StringRef Prefix = getLexer().getTok().getString();
2900 Parser.Lex(); // Eat first token of 1to8
2901 if (!getLexer().is(AsmToken::Identifier))
2902 return TokError("Expected 1to<NUM> at this point");
2903 // Recognize only reasonable suffixes.
2904 SmallVector<char, 5> BroadcastVector;
2905 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2906 .toStringRef(BroadcastVector);
2907 if (!BroadcastString.starts_with("1to"))
2908 return TokError("Expected 1to<NUM> at this point");
2909 const char *BroadcastPrimitive =
2910 StringSwitch<const char *>(BroadcastString)
2911 .Case("1to2", "{1to2}")
2912 .Case("1to4", "{1to4}")
2913 .Case("1to8", "{1to8}")
2914 .Case("1to16", "{1to16}")
2915 .Case("1to32", "{1to32}")
2916 .Default(nullptr);
2917 if (!BroadcastPrimitive)
2918 return TokError("Invalid memory broadcast primitive.");
2919 Parser.Lex(); // Eat trailing token of 1toN
2920 if (!getLexer().is(AsmToken::RCurly))
2921 return TokError("Expected } at this point");
2922 Parser.Lex(); // Eat "}"
2923 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2924 consumedToken));
2925 // No AVX512 specific primitives can pass
2926 // after memory broadcasting, so return.
2927 return false;
2928 } else {
2929 // Parse either {k}{z}, {z}{k}, {k} or {z}
2930 // last one have no meaning, but GCC accepts it
2931 // Currently, we're just pass a '{' mark
2932 std::unique_ptr<X86Operand> Z;
2933 if (ParseZ(Z, consumedToken))
2934 return true;
2935 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2936 // no errors.
2937 // Query for the need of further parsing for a {%k<NUM>} mark
2938 if (!Z || getLexer().is(AsmToken::LCurly)) {
2939 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2940 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2941 // expected
2942 MCRegister RegNo;
2943 SMLoc RegLoc;
2944 if (!parseRegister(RegNo, RegLoc, StartLoc) &&
2945 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2946 if (RegNo == X86::K0)
2947 return Error(RegLoc, "Register k0 can't be used as write mask");
2948 if (!getLexer().is(AsmToken::RCurly))
2949 return Error(getLexer().getLoc(), "Expected } at this point");
2950 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2951 Operands.push_back(
2952 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2953 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2954 } else
2955 return Error(getLexer().getLoc(),
2956 "Expected an op-mask register at this point");
2957 // {%k<NUM>} mark is found, inquire for {z}
2958 if (getLexer().is(AsmToken::LCurly) && !Z) {
2959 // Have we've found a parsing error, or found no (expected) {z} mark
2960 // - report an error
2961 if (ParseZ(Z, consumeToken()) || !Z)
2962 return Error(getLexer().getLoc(),
2963 "Expected a {z} mark at this point");
2964
2965 }
2966 // '{z}' on its own is meaningless, hence should be ignored.
2967 // on the contrary - have it been accompanied by a K register,
2968 // allow it.
2969 if (Z)
2970 Operands.push_back(std::move(Z));
2971 }
2972 }
2973 }
2974 return false;
2975 }
2976
2977 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2978 /// has already been parsed if present. disp may be provided as well.
ParseMemOperand(MCRegister SegReg,const MCExpr * Disp,SMLoc StartLoc,SMLoc EndLoc,OperandVector & Operands)2979 bool X86AsmParser::ParseMemOperand(MCRegister SegReg, const MCExpr *Disp,
2980 SMLoc StartLoc, SMLoc EndLoc,
2981 OperandVector &Operands) {
2982 MCAsmParser &Parser = getParser();
2983 SMLoc Loc;
2984 // Based on the initial passed values, we may be in any of these cases, we are
2985 // in one of these cases (with current position (*)):
2986
2987 // 1. seg : * disp (base-index-scale-expr)
2988 // 2. seg : *(disp) (base-index-scale-expr)
2989 // 3. seg : *(base-index-scale-expr)
2990 // 4. disp *(base-index-scale-expr)
2991 // 5. *(disp) (base-index-scale-expr)
2992 // 6. *(base-index-scale-expr)
2993 // 7. disp *
2994 // 8. *(disp)
2995
2996 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2997 // checking if the first object after the parenthesis is a register (or an
2998 // identifier referring to a register) and parse the displacement or default
2999 // to 0 as appropriate.
3000 auto isAtMemOperand = [this]() {
3001 if (this->getLexer().isNot(AsmToken::LParen))
3002 return false;
3003 AsmToken Buf[2];
3004 StringRef Id;
3005 auto TokCount = this->getLexer().peekTokens(Buf, true);
3006 if (TokCount == 0)
3007 return false;
3008 switch (Buf[0].getKind()) {
3009 case AsmToken::Percent:
3010 case AsmToken::Comma:
3011 return true;
3012 // These lower cases are doing a peekIdentifier.
3013 case AsmToken::At:
3014 case AsmToken::Dollar:
3015 if ((TokCount > 1) &&
3016 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
3017 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
3018 Id = StringRef(Buf[0].getLoc().getPointer(),
3019 Buf[1].getIdentifier().size() + 1);
3020 break;
3021 case AsmToken::Identifier:
3022 case AsmToken::String:
3023 Id = Buf[0].getIdentifier();
3024 break;
3025 default:
3026 return false;
3027 }
3028 // We have an ID. Check if it is bound to a register.
3029 if (!Id.empty()) {
3030 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
3031 if (Sym->isVariable()) {
3032 auto V = Sym->getVariableValue();
3033 return isa<X86MCExpr>(V);
3034 }
3035 }
3036 return false;
3037 };
3038
3039 if (!Disp) {
3040 // Parse immediate if we're not at a mem operand yet.
3041 if (!isAtMemOperand()) {
3042 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
3043 return true;
3044 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
3045 } else {
3046 // Disp is implicitly zero if we haven't parsed it yet.
3047 Disp = MCConstantExpr::create(0, Parser.getContext());
3048 }
3049 }
3050
3051 // We are now either at the end of the operand or at the '(' at the start of a
3052 // base-index-scale-expr.
3053
3054 if (!parseOptionalToken(AsmToken::LParen)) {
3055 if (!SegReg)
3056 Operands.push_back(
3057 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3058 else
3059 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3060 0, 0, 1, StartLoc, EndLoc));
3061 return false;
3062 }
3063
3064 // If we reached here, then eat the '(' and Process
3065 // the rest of the memory operand.
3066 MCRegister BaseReg, IndexReg;
3067 unsigned Scale = 1;
3068 SMLoc BaseLoc = getLexer().getLoc();
3069 const MCExpr *E;
3070 StringRef ErrMsg;
3071
3072 // Parse BaseReg if one is provided.
3073 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
3074 if (Parser.parseExpression(E, EndLoc) ||
3075 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
3076 return true;
3077
3078 // Check the register.
3079 BaseReg = cast<X86MCExpr>(E)->getReg();
3080 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
3081 return Error(BaseLoc, "eiz and riz can only be used as index registers",
3082 SMRange(BaseLoc, EndLoc));
3083 }
3084
3085 if (parseOptionalToken(AsmToken::Comma)) {
3086 // Following the comma we should have either an index register, or a scale
3087 // value. We don't support the later form, but we want to parse it
3088 // correctly.
3089 //
3090 // Even though it would be completely consistent to support syntax like
3091 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
3092 if (getLexer().isNot(AsmToken::RParen)) {
3093 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
3094 return true;
3095
3096 if (!isa<X86MCExpr>(E)) {
3097 // We've parsed an unexpected Scale Value instead of an index
3098 // register. Interpret it as an absolute.
3099 int64_t ScaleVal;
3100 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
3101 return Error(Loc, "expected absolute expression");
3102 if (ScaleVal != 1)
3103 Warning(Loc, "scale factor without index register is ignored");
3104 Scale = 1;
3105 } else { // IndexReg Found.
3106 IndexReg = cast<X86MCExpr>(E)->getReg();
3107
3108 if (BaseReg == X86::RIP)
3109 return Error(Loc,
3110 "%rip as base register can not have an index register");
3111 if (IndexReg == X86::RIP)
3112 return Error(Loc, "%rip is not allowed as an index register");
3113
3114 if (parseOptionalToken(AsmToken::Comma)) {
3115 // Parse the scale amount:
3116 // ::= ',' [scale-expression]
3117
3118 // A scale amount without an index is ignored.
3119 if (getLexer().isNot(AsmToken::RParen)) {
3120 int64_t ScaleVal;
3121 if (Parser.parseTokenLoc(Loc) ||
3122 Parser.parseAbsoluteExpression(ScaleVal))
3123 return Error(Loc, "expected scale expression");
3124 Scale = (unsigned)ScaleVal;
3125 // Validate the scale amount.
3126 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
3127 Scale != 1)
3128 return Error(Loc, "scale factor in 16-bit address must be 1");
3129 if (checkScale(Scale, ErrMsg))
3130 return Error(Loc, ErrMsg);
3131 }
3132 }
3133 }
3134 }
3135 }
3136
3137 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3138 if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
3139 return true;
3140
3141 // This is to support otherwise illegal operand (%dx) found in various
3142 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3143 // be supported. Mark such DX variants separately fix only in special cases.
3144 if (BaseReg == X86::DX && !IndexReg && Scale == 1 && !SegReg &&
3145 isa<MCConstantExpr>(Disp) &&
3146 cast<MCConstantExpr>(Disp)->getValue() == 0) {
3147 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
3148 return false;
3149 }
3150
3151 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
3152 ErrMsg))
3153 return Error(BaseLoc, ErrMsg);
3154
3155 // If the displacement is a constant, check overflows. For 64-bit addressing,
3156 // gas requires isInt<32> and otherwise reports an error. For others, gas
3157 // reports a warning and allows a wider range. E.g. gas allows
3158 // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses
3159 // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000).
3160 if (BaseReg || IndexReg) {
3161 if (auto CE = dyn_cast<MCConstantExpr>(Disp)) {
3162 auto Imm = CE->getValue();
3163 bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
3164 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg);
3165 bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg);
3166 if (Is64) {
3167 if (!isInt<32>(Imm))
3168 return Error(BaseLoc, "displacement " + Twine(Imm) +
3169 " is not within [-2147483648, 2147483647]");
3170 } else if (!Is16) {
3171 if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3172 Warning(BaseLoc, "displacement " + Twine(Imm) +
3173 " shortened to 32-bit signed " +
3174 Twine(static_cast<int32_t>(Imm)));
3175 }
3176 } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
3177 Warning(BaseLoc, "displacement " + Twine(Imm) +
3178 " shortened to 16-bit signed " +
3179 Twine(static_cast<int16_t>(Imm)));
3180 }
3181 }
3182 }
3183
3184 if (SegReg || BaseReg || IndexReg)
3185 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3186 BaseReg, IndexReg, Scale, StartLoc,
3187 EndLoc));
3188 else
3189 Operands.push_back(
3190 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3191 return false;
3192 }
3193
3194 // Parse either a standard primary expression or a register.
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)3195 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3196 MCAsmParser &Parser = getParser();
3197 // See if this is a register first.
3198 if (getTok().is(AsmToken::Percent) ||
3199 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3200 MatchRegisterName(Parser.getTok().getString()))) {
3201 SMLoc StartLoc = Parser.getTok().getLoc();
3202 MCRegister RegNo;
3203 if (parseRegister(RegNo, StartLoc, EndLoc))
3204 return true;
3205 Res = X86MCExpr::create(RegNo, Parser.getContext());
3206 return false;
3207 }
3208 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3209 }
3210
parseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)3211 bool X86AsmParser::parseInstruction(ParseInstructionInfo &Info, StringRef Name,
3212 SMLoc NameLoc, OperandVector &Operands) {
3213 MCAsmParser &Parser = getParser();
3214 InstInfo = &Info;
3215
3216 // Reset the forced VEX encoding.
3217 ForcedOpcodePrefix = OpcodePrefix_Default;
3218 ForcedDispEncoding = DispEncoding_Default;
3219 UseApxExtendedReg = false;
3220 ForcedNoFlag = false;
3221
3222 // Parse pseudo prefixes.
3223 while (true) {
3224 if (Name == "{") {
3225 if (getLexer().isNot(AsmToken::Identifier))
3226 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3227 std::string Prefix = Parser.getTok().getString().lower();
3228 Parser.Lex(); // Eat identifier.
3229 if (getLexer().isNot(AsmToken::RCurly))
3230 return Error(Parser.getTok().getLoc(), "Expected '}'");
3231 Parser.Lex(); // Eat curly.
3232
3233 if (Prefix == "rex")
3234 ForcedOpcodePrefix = OpcodePrefix_REX;
3235 else if (Prefix == "rex2")
3236 ForcedOpcodePrefix = OpcodePrefix_REX2;
3237 else if (Prefix == "vex")
3238 ForcedOpcodePrefix = OpcodePrefix_VEX;
3239 else if (Prefix == "vex2")
3240 ForcedOpcodePrefix = OpcodePrefix_VEX2;
3241 else if (Prefix == "vex3")
3242 ForcedOpcodePrefix = OpcodePrefix_VEX3;
3243 else if (Prefix == "evex")
3244 ForcedOpcodePrefix = OpcodePrefix_EVEX;
3245 else if (Prefix == "disp8")
3246 ForcedDispEncoding = DispEncoding_Disp8;
3247 else if (Prefix == "disp32")
3248 ForcedDispEncoding = DispEncoding_Disp32;
3249 else if (Prefix == "nf")
3250 ForcedNoFlag = true;
3251 else
3252 return Error(NameLoc, "unknown prefix");
3253
3254 NameLoc = Parser.getTok().getLoc();
3255 if (getLexer().is(AsmToken::LCurly)) {
3256 Parser.Lex();
3257 Name = "{";
3258 } else {
3259 if (getLexer().isNot(AsmToken::Identifier))
3260 return Error(Parser.getTok().getLoc(), "Expected identifier");
3261 // FIXME: The mnemonic won't match correctly if its not in lower case.
3262 Name = Parser.getTok().getString();
3263 Parser.Lex();
3264 }
3265 continue;
3266 }
3267 // Parse MASM style pseudo prefixes.
3268 if (isParsingMSInlineAsm()) {
3269 if (Name.equals_insensitive("vex"))
3270 ForcedOpcodePrefix = OpcodePrefix_VEX;
3271 else if (Name.equals_insensitive("vex2"))
3272 ForcedOpcodePrefix = OpcodePrefix_VEX2;
3273 else if (Name.equals_insensitive("vex3"))
3274 ForcedOpcodePrefix = OpcodePrefix_VEX3;
3275 else if (Name.equals_insensitive("evex"))
3276 ForcedOpcodePrefix = OpcodePrefix_EVEX;
3277
3278 if (ForcedOpcodePrefix != OpcodePrefix_Default) {
3279 if (getLexer().isNot(AsmToken::Identifier))
3280 return Error(Parser.getTok().getLoc(), "Expected identifier");
3281 // FIXME: The mnemonic won't match correctly if its not in lower case.
3282 Name = Parser.getTok().getString();
3283 NameLoc = Parser.getTok().getLoc();
3284 Parser.Lex();
3285 }
3286 }
3287 break;
3288 }
3289
3290 // Support the suffix syntax for overriding displacement size as well.
3291 if (Name.consume_back(".d32")) {
3292 ForcedDispEncoding = DispEncoding_Disp32;
3293 } else if (Name.consume_back(".d8")) {
3294 ForcedDispEncoding = DispEncoding_Disp8;
3295 }
3296
3297 StringRef PatchedName = Name;
3298
3299 // Hack to skip "short" following Jcc.
3300 if (isParsingIntelSyntax() &&
3301 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3302 PatchedName == "jcxz" || PatchedName == "jecxz" ||
3303 (PatchedName.starts_with("j") &&
3304 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3305 StringRef NextTok = Parser.getTok().getString();
3306 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
3307 : NextTok == "short") {
3308 SMLoc NameEndLoc =
3309 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3310 // Eat the short keyword.
3311 Parser.Lex();
3312 // MS and GAS ignore the short keyword; they both determine the jmp type
3313 // based on the distance of the label. (NASM does emit different code with
3314 // and without "short," though.)
3315 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3316 NextTok.size() + 1);
3317 }
3318 }
3319
3320 // FIXME: Hack to recognize setneb as setne.
3321 if (PatchedName.starts_with("set") && PatchedName.ends_with("b") &&
3322 PatchedName != "setzub" && PatchedName != "setzunb" &&
3323 PatchedName != "setb" && PatchedName != "setnb")
3324 PatchedName = PatchedName.substr(0, Name.size()-1);
3325
3326 unsigned ComparisonPredicate = ~0U;
3327
3328 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3329 if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) &&
3330 (PatchedName.ends_with("ss") || PatchedName.ends_with("sd") ||
3331 PatchedName.ends_with("sh") || PatchedName.ends_with("ph") ||
3332 PatchedName.ends_with("bf16") || PatchedName.ends_with("ps") ||
3333 PatchedName.ends_with("pd"))) {
3334 bool IsVCMP = PatchedName[0] == 'v';
3335 unsigned CCIdx = IsVCMP ? 4 : 3;
3336 unsigned suffixLength = PatchedName.ends_with("bf16") ? 5 : 2;
3337 unsigned CC = StringSwitch<unsigned>(
3338 PatchedName.slice(CCIdx, PatchedName.size() - suffixLength))
3339 .Case("eq", 0x00)
3340 .Case("eq_oq", 0x00)
3341 .Case("lt", 0x01)
3342 .Case("lt_os", 0x01)
3343 .Case("le", 0x02)
3344 .Case("le_os", 0x02)
3345 .Case("unord", 0x03)
3346 .Case("unord_q", 0x03)
3347 .Case("neq", 0x04)
3348 .Case("neq_uq", 0x04)
3349 .Case("nlt", 0x05)
3350 .Case("nlt_us", 0x05)
3351 .Case("nle", 0x06)
3352 .Case("nle_us", 0x06)
3353 .Case("ord", 0x07)
3354 .Case("ord_q", 0x07)
3355 /* AVX only from here */
3356 .Case("eq_uq", 0x08)
3357 .Case("nge", 0x09)
3358 .Case("nge_us", 0x09)
3359 .Case("ngt", 0x0A)
3360 .Case("ngt_us", 0x0A)
3361 .Case("false", 0x0B)
3362 .Case("false_oq", 0x0B)
3363 .Case("neq_oq", 0x0C)
3364 .Case("ge", 0x0D)
3365 .Case("ge_os", 0x0D)
3366 .Case("gt", 0x0E)
3367 .Case("gt_os", 0x0E)
3368 .Case("true", 0x0F)
3369 .Case("true_uq", 0x0F)
3370 .Case("eq_os", 0x10)
3371 .Case("lt_oq", 0x11)
3372 .Case("le_oq", 0x12)
3373 .Case("unord_s", 0x13)
3374 .Case("neq_us", 0x14)
3375 .Case("nlt_uq", 0x15)
3376 .Case("nle_uq", 0x16)
3377 .Case("ord_s", 0x17)
3378 .Case("eq_us", 0x18)
3379 .Case("nge_uq", 0x19)
3380 .Case("ngt_uq", 0x1A)
3381 .Case("false_os", 0x1B)
3382 .Case("neq_os", 0x1C)
3383 .Case("ge_oq", 0x1D)
3384 .Case("gt_oq", 0x1E)
3385 .Case("true_us", 0x1F)
3386 .Default(~0U);
3387 if (CC != ~0U && (IsVCMP || CC < 8) &&
3388 (IsVCMP || PatchedName.back() != 'h')) {
3389 if (PatchedName.ends_with("ss"))
3390 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3391 else if (PatchedName.ends_with("sd"))
3392 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3393 else if (PatchedName.ends_with("ps"))
3394 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3395 else if (PatchedName.ends_with("pd"))
3396 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3397 else if (PatchedName.ends_with("sh"))
3398 PatchedName = "vcmpsh";
3399 else if (PatchedName.ends_with("ph"))
3400 PatchedName = "vcmpph";
3401 else if (PatchedName.ends_with("bf16"))
3402 PatchedName = "vcmpbf16";
3403 else
3404 llvm_unreachable("Unexpected suffix!");
3405
3406 ComparisonPredicate = CC;
3407 }
3408 }
3409
3410 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3411 if (PatchedName.starts_with("vpcmp") &&
3412 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3413 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3414 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3415 unsigned CC = StringSwitch<unsigned>(
3416 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3417 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3418 .Case("lt", 0x1)
3419 .Case("le", 0x2)
3420 //.Case("false", 0x3) // Not a documented alias.
3421 .Case("neq", 0x4)
3422 .Case("nlt", 0x5)
3423 .Case("nle", 0x6)
3424 //.Case("true", 0x7) // Not a documented alias.
3425 .Default(~0U);
3426 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3427 switch (PatchedName.back()) {
3428 default: llvm_unreachable("Unexpected character!");
3429 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3430 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3431 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3432 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3433 }
3434 // Set up the immediate to push into the operands later.
3435 ComparisonPredicate = CC;
3436 }
3437 }
3438
3439 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3440 if (PatchedName.starts_with("vpcom") &&
3441 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3442 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3443 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3444 unsigned CC = StringSwitch<unsigned>(
3445 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3446 .Case("lt", 0x0)
3447 .Case("le", 0x1)
3448 .Case("gt", 0x2)
3449 .Case("ge", 0x3)
3450 .Case("eq", 0x4)
3451 .Case("neq", 0x5)
3452 .Case("false", 0x6)
3453 .Case("true", 0x7)
3454 .Default(~0U);
3455 if (CC != ~0U) {
3456 switch (PatchedName.back()) {
3457 default: llvm_unreachable("Unexpected character!");
3458 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3459 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3460 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3461 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3462 }
3463 // Set up the immediate to push into the operands later.
3464 ComparisonPredicate = CC;
3465 }
3466 }
3467
3468 // Determine whether this is an instruction prefix.
3469 // FIXME:
3470 // Enhance prefixes integrity robustness. for example, following forms
3471 // are currently tolerated:
3472 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3473 // lock addq %rax, %rbx ; Destination operand must be of memory type
3474 // xacquire <insn> ; xacquire must be accompanied by 'lock'
3475 bool IsPrefix =
3476 StringSwitch<bool>(Name)
3477 .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3478 .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3479 .Cases("xacquire", "xrelease", true)
3480 .Cases("acquire", "release", isParsingIntelSyntax())
3481 .Default(false);
3482
3483 auto isLockRepeatNtPrefix = [](StringRef N) {
3484 return StringSwitch<bool>(N)
3485 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3486 .Default(false);
3487 };
3488
3489 bool CurlyAsEndOfStatement = false;
3490
3491 unsigned Flags = X86::IP_NO_PREFIX;
3492 while (isLockRepeatNtPrefix(Name.lower())) {
3493 unsigned Prefix =
3494 StringSwitch<unsigned>(Name)
3495 .Cases("lock", "lock", X86::IP_HAS_LOCK)
3496 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3497 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3498 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3499 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3500 Flags |= Prefix;
3501 if (getLexer().is(AsmToken::EndOfStatement)) {
3502 // We don't have real instr with the given prefix
3503 // let's use the prefix as the instr.
3504 // TODO: there could be several prefixes one after another
3505 Flags = X86::IP_NO_PREFIX;
3506 break;
3507 }
3508 // FIXME: The mnemonic won't match correctly if its not in lower case.
3509 Name = Parser.getTok().getString();
3510 Parser.Lex(); // eat the prefix
3511 // Hack: we could have something like "rep # some comment" or
3512 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3513 while (Name.starts_with(";") || Name.starts_with("\n") ||
3514 Name.starts_with("#") || Name.starts_with("\t") ||
3515 Name.starts_with("/")) {
3516 // FIXME: The mnemonic won't match correctly if its not in lower case.
3517 Name = Parser.getTok().getString();
3518 Parser.Lex(); // go to next prefix or instr
3519 }
3520 }
3521
3522 if (Flags)
3523 PatchedName = Name;
3524
3525 // Hacks to handle 'data16' and 'data32'
3526 if (PatchedName == "data16" && is16BitMode()) {
3527 return Error(NameLoc, "redundant data16 prefix");
3528 }
3529 if (PatchedName == "data32") {
3530 if (is32BitMode())
3531 return Error(NameLoc, "redundant data32 prefix");
3532 if (is64BitMode())
3533 return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3534 // Hack to 'data16' for the table lookup.
3535 PatchedName = "data16";
3536
3537 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3538 StringRef Next = Parser.getTok().getString();
3539 getLexer().Lex();
3540 // data32 effectively changes the instruction suffix.
3541 // TODO Generalize.
3542 if (Next == "callw")
3543 Next = "calll";
3544 if (Next == "ljmpw")
3545 Next = "ljmpl";
3546
3547 Name = Next;
3548 PatchedName = Name;
3549 ForcedDataPrefix = X86::Is32Bit;
3550 IsPrefix = false;
3551 }
3552 }
3553
3554 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3555
3556 // Push the immediate if we extracted one from the mnemonic.
3557 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3558 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3559 getParser().getContext());
3560 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3561 }
3562
3563 // Parse condtional flags after mnemonic.
3564 if ((Name.starts_with("ccmp") || Name.starts_with("ctest")) &&
3565 parseCFlagsOp(Operands))
3566 return true;
3567
3568 // This does the actual operand parsing. Don't parse any more if we have a
3569 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3570 // just want to parse the "lock" as the first instruction and the "incl" as
3571 // the next one.
3572 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
3573 // Parse '*' modifier.
3574 if (getLexer().is(AsmToken::Star))
3575 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3576
3577 // Read the operands.
3578 while (true) {
3579 if (parseOperand(Operands, Name))
3580 return true;
3581 if (HandleAVX512Operand(Operands))
3582 return true;
3583
3584 // check for comma and eat it
3585 if (getLexer().is(AsmToken::Comma))
3586 Parser.Lex();
3587 else
3588 break;
3589 }
3590
3591 // In MS inline asm curly braces mark the beginning/end of a block,
3592 // therefore they should be interepreted as end of statement
3593 CurlyAsEndOfStatement =
3594 isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3595 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3596 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3597 return TokError("unexpected token in argument list");
3598 }
3599
3600 // Push the immediate if we extracted one from the mnemonic.
3601 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3602 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3603 getParser().getContext());
3604 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3605 }
3606
3607 // Consume the EndOfStatement or the prefix separator Slash
3608 if (getLexer().is(AsmToken::EndOfStatement) ||
3609 (IsPrefix && getLexer().is(AsmToken::Slash)))
3610 Parser.Lex();
3611 else if (CurlyAsEndOfStatement)
3612 // Add an actual EndOfStatement before the curly brace
3613 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3614 getLexer().getTok().getLoc(), 0);
3615
3616 // This is for gas compatibility and cannot be done in td.
3617 // Adding "p" for some floating point with no argument.
3618 // For example: fsub --> fsubp
3619 bool IsFp =
3620 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3621 if (IsFp && Operands.size() == 1) {
3622 const char *Repl = StringSwitch<const char *>(Name)
3623 .Case("fsub", "fsubp")
3624 .Case("fdiv", "fdivp")
3625 .Case("fsubr", "fsubrp")
3626 .Case("fdivr", "fdivrp");
3627 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3628 }
3629
3630 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3631 (Operands.size() == 3)) {
3632 X86Operand &Op1 = (X86Operand &)*Operands[1];
3633 X86Operand &Op2 = (X86Operand &)*Operands[2];
3634 SMLoc Loc = Op1.getEndLoc();
3635 // Moving a 32 or 16 bit value into a segment register has the same
3636 // behavior. Modify such instructions to always take shorter form.
3637 if (Op1.isReg() && Op2.isReg() &&
3638 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3639 Op2.getReg()) &&
3640 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3641 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3642 // Change instruction name to match new instruction.
3643 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3644 Name = is16BitMode() ? "movw" : "movl";
3645 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3646 }
3647 // Select the correct equivalent 16-/32-bit source register.
3648 MCRegister Reg =
3649 getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32);
3650 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3651 }
3652 }
3653
3654 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3655 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3656 // documented form in various unofficial manuals, so a lot of code uses it.
3657 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3658 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3659 Operands.size() == 3) {
3660 X86Operand &Op = (X86Operand &)*Operands.back();
3661 if (Op.isDXReg())
3662 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3663 Op.getEndLoc());
3664 }
3665 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3666 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3667 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3668 Operands.size() == 3) {
3669 X86Operand &Op = (X86Operand &)*Operands[1];
3670 if (Op.isDXReg())
3671 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3672 Op.getEndLoc());
3673 }
3674
3675 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
3676 bool HadVerifyError = false;
3677
3678 // Append default arguments to "ins[bwld]"
3679 if (Name.starts_with("ins") &&
3680 (Operands.size() == 1 || Operands.size() == 3) &&
3681 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3682 Name == "ins")) {
3683
3684 AddDefaultSrcDestOperands(TmpOperands,
3685 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3686 DefaultMemDIOperand(NameLoc));
3687 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3688 }
3689
3690 // Append default arguments to "outs[bwld]"
3691 if (Name.starts_with("outs") &&
3692 (Operands.size() == 1 || Operands.size() == 3) &&
3693 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3694 Name == "outsd" || Name == "outs")) {
3695 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3696 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3697 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3698 }
3699
3700 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3701 // values of $SIREG according to the mode. It would be nice if this
3702 // could be achieved with InstAlias in the tables.
3703 if (Name.starts_with("lods") &&
3704 (Operands.size() == 1 || Operands.size() == 2) &&
3705 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3706 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3707 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3708 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3709 }
3710
3711 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3712 // values of $DIREG according to the mode. It would be nice if this
3713 // could be achieved with InstAlias in the tables.
3714 if (Name.starts_with("stos") &&
3715 (Operands.size() == 1 || Operands.size() == 2) &&
3716 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3717 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3718 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3719 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3720 }
3721
3722 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3723 // values of $DIREG according to the mode. It would be nice if this
3724 // could be achieved with InstAlias in the tables.
3725 if (Name.starts_with("scas") &&
3726 (Operands.size() == 1 || Operands.size() == 2) &&
3727 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3728 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3729 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3730 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3731 }
3732
3733 // Add default SI and DI operands to "cmps[bwlq]".
3734 if (Name.starts_with("cmps") &&
3735 (Operands.size() == 1 || Operands.size() == 3) &&
3736 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3737 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3738 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3739 DefaultMemSIOperand(NameLoc));
3740 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3741 }
3742
3743 // Add default SI and DI operands to "movs[bwlq]".
3744 if (((Name.starts_with("movs") &&
3745 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3746 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3747 (Name.starts_with("smov") &&
3748 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3749 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3750 (Operands.size() == 1 || Operands.size() == 3)) {
3751 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3752 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3753 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3754 DefaultMemDIOperand(NameLoc));
3755 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3756 }
3757
3758 // Check if we encountered an error for one the string insturctions
3759 if (HadVerifyError) {
3760 return HadVerifyError;
3761 }
3762
3763 // Transforms "xlat mem8" into "xlatb"
3764 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3765 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3766 if (Op1.isMem8()) {
3767 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3768 "size, (R|E)BX will be used for the location");
3769 Operands.pop_back();
3770 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3771 }
3772 }
3773
3774 if (Flags)
3775 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3776 return false;
3777 }
3778
convertSSEToAVX(MCInst & Inst)3779 static bool convertSSEToAVX(MCInst &Inst) {
3780 ArrayRef<X86TableEntry> Table{X86SSE2AVXTable};
3781 unsigned Opcode = Inst.getOpcode();
3782 const auto I = llvm::lower_bound(Table, Opcode);
3783 if (I == Table.end() || I->OldOpc != Opcode)
3784 return false;
3785
3786 Inst.setOpcode(I->NewOpc);
3787 // AVX variant of BLENDVPD/BLENDVPS/PBLENDVB instructions has more
3788 // operand compare to SSE variant, which is added below
3789 if (X86::isBLENDVPD(Opcode) || X86::isBLENDVPS(Opcode) ||
3790 X86::isPBLENDVB(Opcode))
3791 Inst.addOperand(Inst.getOperand(2));
3792
3793 return true;
3794 }
3795
processInstruction(MCInst & Inst,const OperandVector & Ops)3796 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3797 if (MCOptions.X86Sse2Avx && convertSSEToAVX(Inst))
3798 return true;
3799
3800 if (ForcedOpcodePrefix != OpcodePrefix_VEX3 &&
3801 X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
3802 return true;
3803
3804 if (X86::optimizeShiftRotateWithImmediateOne(Inst))
3805 return true;
3806
3807 auto replaceWithCCMPCTEST = [&](unsigned Opcode) -> bool {
3808 if (ForcedOpcodePrefix == OpcodePrefix_EVEX) {
3809 Inst.setFlags(~(X86::IP_USE_EVEX)&Inst.getFlags());
3810 Inst.setOpcode(Opcode);
3811 Inst.addOperand(MCOperand::createImm(0));
3812 Inst.addOperand(MCOperand::createImm(10));
3813 return true;
3814 }
3815 return false;
3816 };
3817
3818 switch (Inst.getOpcode()) {
3819 default: return false;
3820 case X86::JMP_1:
3821 // {disp32} forces a larger displacement as if the instruction was relaxed.
3822 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3823 // This matches GNU assembler.
3824 if (ForcedDispEncoding == DispEncoding_Disp32) {
3825 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3826 return true;
3827 }
3828
3829 return false;
3830 case X86::JCC_1:
3831 // {disp32} forces a larger displacement as if the instruction was relaxed.
3832 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3833 // This matches GNU assembler.
3834 if (ForcedDispEncoding == DispEncoding_Disp32) {
3835 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3836 return true;
3837 }
3838
3839 return false;
3840 case X86::INT: {
3841 // Transforms "int $3" into "int3" as a size optimization.
3842 // We can't write this as an InstAlias.
3843 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3844 return false;
3845 Inst.clear();
3846 Inst.setOpcode(X86::INT3);
3847 return true;
3848 }
3849 // `{evex} cmp <>, <>` is alias of `ccmpt {dfv=} <>, <>`, and
3850 // `{evex} test <>, <>` is alias of `ctest {dfv=} <>, <>`
3851 #define FROM_TO(FROM, TO) \
3852 case X86::FROM: \
3853 return replaceWithCCMPCTEST(X86::TO);
3854 FROM_TO(CMP64rr, CCMP64rr)
3855 FROM_TO(CMP64mi32, CCMP64mi32)
3856 FROM_TO(CMP64mi8, CCMP64mi8)
3857 FROM_TO(CMP64mr, CCMP64mr)
3858 FROM_TO(CMP64ri32, CCMP64ri32)
3859 FROM_TO(CMP64ri8, CCMP64ri8)
3860 FROM_TO(CMP64rm, CCMP64rm)
3861
3862 FROM_TO(CMP32rr, CCMP32rr)
3863 FROM_TO(CMP32mi, CCMP32mi)
3864 FROM_TO(CMP32mi8, CCMP32mi8)
3865 FROM_TO(CMP32mr, CCMP32mr)
3866 FROM_TO(CMP32ri, CCMP32ri)
3867 FROM_TO(CMP32ri8, CCMP32ri8)
3868 FROM_TO(CMP32rm, CCMP32rm)
3869
3870 FROM_TO(CMP16rr, CCMP16rr)
3871 FROM_TO(CMP16mi, CCMP16mi)
3872 FROM_TO(CMP16mi8, CCMP16mi8)
3873 FROM_TO(CMP16mr, CCMP16mr)
3874 FROM_TO(CMP16ri, CCMP16ri)
3875 FROM_TO(CMP16ri8, CCMP16ri8)
3876 FROM_TO(CMP16rm, CCMP16rm)
3877
3878 FROM_TO(CMP8rr, CCMP8rr)
3879 FROM_TO(CMP8mi, CCMP8mi)
3880 FROM_TO(CMP8mr, CCMP8mr)
3881 FROM_TO(CMP8ri, CCMP8ri)
3882 FROM_TO(CMP8rm, CCMP8rm)
3883
3884 FROM_TO(TEST64rr, CTEST64rr)
3885 FROM_TO(TEST64mi32, CTEST64mi32)
3886 FROM_TO(TEST64mr, CTEST64mr)
3887 FROM_TO(TEST64ri32, CTEST64ri32)
3888
3889 FROM_TO(TEST32rr, CTEST32rr)
3890 FROM_TO(TEST32mi, CTEST32mi)
3891 FROM_TO(TEST32mr, CTEST32mr)
3892 FROM_TO(TEST32ri, CTEST32ri)
3893
3894 FROM_TO(TEST16rr, CTEST16rr)
3895 FROM_TO(TEST16mi, CTEST16mi)
3896 FROM_TO(TEST16mr, CTEST16mr)
3897 FROM_TO(TEST16ri, CTEST16ri)
3898
3899 FROM_TO(TEST8rr, CTEST8rr)
3900 FROM_TO(TEST8mi, CTEST8mi)
3901 FROM_TO(TEST8mr, CTEST8mr)
3902 FROM_TO(TEST8ri, CTEST8ri)
3903 #undef FROM_TO
3904 }
3905 }
3906
validateInstruction(MCInst & Inst,const OperandVector & Ops)3907 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3908 using namespace X86;
3909 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3910 unsigned Opcode = Inst.getOpcode();
3911 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3912 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
3913 isVFMADDCSH(Opcode)) {
3914 MCRegister Dest = Inst.getOperand(0).getReg();
3915 for (unsigned i = 2; i < Inst.getNumOperands(); i++)
3916 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3917 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3918 "distinct from source registers");
3919 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
3920 isVFMULCSH(Opcode)) {
3921 MCRegister Dest = Inst.getOperand(0).getReg();
3922 // The mask variants have different operand list. Scan from the third
3923 // operand to avoid emitting incorrect warning.
3924 // VFMULCPHZrr Dest, Src1, Src2
3925 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2
3926 // VFMULCPHZrrkz Dest, Mask, Src1, Src2
3927 for (unsigned i = ((TSFlags & X86II::EVEX_K) ? 2 : 1);
3928 i < Inst.getNumOperands(); i++)
3929 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3930 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3931 "distinct from source registers");
3932 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
3933 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
3934 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
3935 MCRegister Src2 =
3936 Inst.getOperand(Inst.getNumOperands() - X86::AddrNumOperands - 1)
3937 .getReg();
3938 unsigned Src2Enc = MRI->getEncodingValue(Src2);
3939 if (Src2Enc % 4 != 0) {
3940 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
3941 unsigned GroupStart = (Src2Enc / 4) * 4;
3942 unsigned GroupEnd = GroupStart + 3;
3943 return Warning(Ops[0]->getStartLoc(),
3944 "source register '" + RegName + "' implicitly denotes '" +
3945 RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3946 RegName.take_front(3) + Twine(GroupEnd) +
3947 "' source group");
3948 }
3949 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
3950 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
3951 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
3952 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
3953 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
3954 if (HasEVEX) {
3955 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3956 unsigned Index = MRI->getEncodingValue(
3957 Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3958 if (Dest == Index)
3959 return Warning(Ops[0]->getStartLoc(), "index and destination registers "
3960 "should be distinct");
3961 } else {
3962 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3963 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
3964 unsigned Index = MRI->getEncodingValue(
3965 Inst.getOperand(3 + X86::AddrIndexReg).getReg());
3966 if (Dest == Mask || Dest == Index || Mask == Index)
3967 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
3968 "registers should be distinct");
3969 }
3970 } else if (isTCMMIMFP16PS(Opcode) || isTCMMRLFP16PS(Opcode) ||
3971 isTDPBF16PS(Opcode) || isTDPFP16PS(Opcode) || isTDPBSSD(Opcode) ||
3972 isTDPBSUD(Opcode) || isTDPBUSD(Opcode) || isTDPBUUD(Opcode)) {
3973 MCRegister SrcDest = Inst.getOperand(0).getReg();
3974 MCRegister Src1 = Inst.getOperand(2).getReg();
3975 MCRegister Src2 = Inst.getOperand(3).getReg();
3976 if (SrcDest == Src1 || SrcDest == Src2 || Src1 == Src2)
3977 return Error(Ops[0]->getStartLoc(), "all tmm registers must be distinct");
3978 }
3979
3980 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3981 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3982 if ((TSFlags & X86II::EncodingMask) == 0) {
3983 MCRegister HReg;
3984 bool UsesRex = TSFlags & X86II::REX_W;
3985 unsigned NumOps = Inst.getNumOperands();
3986 for (unsigned i = 0; i != NumOps; ++i) {
3987 const MCOperand &MO = Inst.getOperand(i);
3988 if (!MO.isReg())
3989 continue;
3990 MCRegister Reg = MO.getReg();
3991 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
3992 HReg = Reg;
3993 if (X86II::isX86_64NonExtLowByteReg(Reg) ||
3994 X86II::isX86_64ExtendedReg(Reg))
3995 UsesRex = true;
3996 }
3997
3998 if (UsesRex && HReg) {
3999 StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg);
4000 return Error(Ops[0]->getStartLoc(),
4001 "can't encode '" + RegName + "' in an instruction requiring "
4002 "REX prefix");
4003 }
4004 }
4005
4006 if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) {
4007 const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg);
4008 if (!MO.isReg() || MO.getReg() != X86::RIP)
4009 return Warning(
4010 Ops[0]->getStartLoc(),
4011 Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'"
4012 : "'prefetchit1'")) +
4013 " only supports RIP-relative address");
4014 }
4015 return false;
4016 }
4017
emitWarningForSpecialLVIInstruction(SMLoc Loc)4018 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
4019 Warning(Loc, "Instruction may be vulnerable to LVI and "
4020 "requires manual mitigation");
4021 Note(SMLoc(), "See https://software.intel.com/"
4022 "security-software-guidance/insights/"
4023 "deep-dive-load-value-injection#specialinstructions"
4024 " for more information");
4025 }
4026
4027 /// RET instructions and also instructions that indirect calls/jumps from memory
4028 /// combine a load and a branch within a single instruction. To mitigate these
4029 /// instructions against LVI, they must be decomposed into separate load and
4030 /// branch instructions, with an LFENCE in between. For more details, see:
4031 /// - X86LoadValueInjectionRetHardening.cpp
4032 /// - X86LoadValueInjectionIndirectThunks.cpp
4033 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4034 ///
4035 /// Returns `true` if a mitigation was applied or warning was emitted.
applyLVICFIMitigation(MCInst & Inst,MCStreamer & Out)4036 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
4037 // Information on control-flow instructions that require manual mitigation can
4038 // be found here:
4039 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4040 switch (Inst.getOpcode()) {
4041 case X86::RET16:
4042 case X86::RET32:
4043 case X86::RET64:
4044 case X86::RETI16:
4045 case X86::RETI32:
4046 case X86::RETI64: {
4047 MCInst ShlInst, FenceInst;
4048 bool Parse32 = is32BitMode() || Code16GCC;
4049 MCRegister Basereg =
4050 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
4051 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
4052 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
4053 /*BaseReg=*/Basereg, /*IndexReg=*/0,
4054 /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
4055 ShlInst.setOpcode(X86::SHL64mi);
4056 ShlMemOp->addMemOperands(ShlInst, 5);
4057 ShlInst.addOperand(MCOperand::createImm(0));
4058 FenceInst.setOpcode(X86::LFENCE);
4059 Out.emitInstruction(ShlInst, getSTI());
4060 Out.emitInstruction(FenceInst, getSTI());
4061 return;
4062 }
4063 case X86::JMP16m:
4064 case X86::JMP32m:
4065 case X86::JMP64m:
4066 case X86::CALL16m:
4067 case X86::CALL32m:
4068 case X86::CALL64m:
4069 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4070 return;
4071 }
4072 }
4073
4074 /// To mitigate LVI, every instruction that performs a load can be followed by
4075 /// an LFENCE instruction to squash any potential mis-speculation. There are
4076 /// some instructions that require additional considerations, and may requre
4077 /// manual mitigation. For more details, see:
4078 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
4079 ///
4080 /// Returns `true` if a mitigation was applied or warning was emitted.
applyLVILoadHardeningMitigation(MCInst & Inst,MCStreamer & Out)4081 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
4082 MCStreamer &Out) {
4083 auto Opcode = Inst.getOpcode();
4084 auto Flags = Inst.getFlags();
4085 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
4086 // Information on REP string instructions that require manual mitigation can
4087 // be found here:
4088 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4089 switch (Opcode) {
4090 case X86::CMPSB:
4091 case X86::CMPSW:
4092 case X86::CMPSL:
4093 case X86::CMPSQ:
4094 case X86::SCASB:
4095 case X86::SCASW:
4096 case X86::SCASL:
4097 case X86::SCASQ:
4098 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4099 return;
4100 }
4101 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
4102 // If a REP instruction is found on its own line, it may or may not be
4103 // followed by a vulnerable instruction. Emit a warning just in case.
4104 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4105 return;
4106 }
4107
4108 const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
4109
4110 // Can't mitigate after terminators or calls. A control flow change may have
4111 // already occurred.
4112 if (MCID.isTerminator() || MCID.isCall())
4113 return;
4114
4115 // LFENCE has the mayLoad property, don't double fence.
4116 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
4117 MCInst FenceInst;
4118 FenceInst.setOpcode(X86::LFENCE);
4119 Out.emitInstruction(FenceInst, getSTI());
4120 }
4121 }
4122
emitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)4123 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
4124 MCStreamer &Out) {
4125 if (LVIInlineAsmHardening &&
4126 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity))
4127 applyLVICFIMitigation(Inst, Out);
4128
4129 Out.emitInstruction(Inst, getSTI());
4130
4131 if (LVIInlineAsmHardening &&
4132 getSTI().hasFeature(X86::FeatureLVILoadHardening))
4133 applyLVILoadHardeningMitigation(Inst, Out);
4134 }
4135
getPrefixes(OperandVector & Operands)4136 static unsigned getPrefixes(OperandVector &Operands) {
4137 unsigned Result = 0;
4138 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
4139 if (Prefix.isPrefix()) {
4140 Result = Prefix.getPrefix();
4141 Operands.pop_back();
4142 }
4143 return Result;
4144 }
4145
matchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4146 bool X86AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4147 OperandVector &Operands,
4148 MCStreamer &Out, uint64_t &ErrorInfo,
4149 bool MatchingInlineAsm) {
4150 assert(!Operands.empty() && "Unexpect empty operand list!");
4151 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4152
4153 // First, handle aliases that expand to multiple instructions.
4154 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
4155 Out, MatchingInlineAsm);
4156 unsigned Prefixes = getPrefixes(Operands);
4157
4158 MCInst Inst;
4159
4160 // If REX/REX2/VEX/EVEX encoding is forced, we need to pass the USE_* flag to
4161 // the encoder and printer.
4162 if (ForcedOpcodePrefix == OpcodePrefix_REX)
4163 Prefixes |= X86::IP_USE_REX;
4164 else if (ForcedOpcodePrefix == OpcodePrefix_REX2)
4165 Prefixes |= X86::IP_USE_REX2;
4166 else if (ForcedOpcodePrefix == OpcodePrefix_VEX)
4167 Prefixes |= X86::IP_USE_VEX;
4168 else if (ForcedOpcodePrefix == OpcodePrefix_VEX2)
4169 Prefixes |= X86::IP_USE_VEX2;
4170 else if (ForcedOpcodePrefix == OpcodePrefix_VEX3)
4171 Prefixes |= X86::IP_USE_VEX3;
4172 else if (ForcedOpcodePrefix == OpcodePrefix_EVEX)
4173 Prefixes |= X86::IP_USE_EVEX;
4174
4175 // Set encoded flags for {disp8} and {disp32}.
4176 if (ForcedDispEncoding == DispEncoding_Disp8)
4177 Prefixes |= X86::IP_USE_DISP8;
4178 else if (ForcedDispEncoding == DispEncoding_Disp32)
4179 Prefixes |= X86::IP_USE_DISP32;
4180
4181 if (Prefixes)
4182 Inst.setFlags(Prefixes);
4183
4184 return isParsingIntelSyntax()
4185 ? matchAndEmitIntelInstruction(IDLoc, Opcode, Inst, Operands, Out,
4186 ErrorInfo, MatchingInlineAsm)
4187 : matchAndEmitATTInstruction(IDLoc, Opcode, Inst, Operands, Out,
4188 ErrorInfo, MatchingInlineAsm);
4189 }
4190
MatchFPUWaitAlias(SMLoc IDLoc,X86Operand & Op,OperandVector & Operands,MCStreamer & Out,bool MatchingInlineAsm)4191 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
4192 OperandVector &Operands, MCStreamer &Out,
4193 bool MatchingInlineAsm) {
4194 // FIXME: This should be replaced with a real .td file alias mechanism.
4195 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4196 // call.
4197 const char *Repl = StringSwitch<const char *>(Op.getToken())
4198 .Case("finit", "fninit")
4199 .Case("fsave", "fnsave")
4200 .Case("fstcw", "fnstcw")
4201 .Case("fstcww", "fnstcw")
4202 .Case("fstenv", "fnstenv")
4203 .Case("fstsw", "fnstsw")
4204 .Case("fstsww", "fnstsw")
4205 .Case("fclex", "fnclex")
4206 .Default(nullptr);
4207 if (Repl) {
4208 MCInst Inst;
4209 Inst.setOpcode(X86::WAIT);
4210 Inst.setLoc(IDLoc);
4211 if (!MatchingInlineAsm)
4212 emitInstruction(Inst, Operands, Out);
4213 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
4214 }
4215 }
4216
ErrorMissingFeature(SMLoc IDLoc,const FeatureBitset & MissingFeatures,bool MatchingInlineAsm)4217 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
4218 const FeatureBitset &MissingFeatures,
4219 bool MatchingInlineAsm) {
4220 assert(MissingFeatures.any() && "Unknown missing feature!");
4221 SmallString<126> Msg;
4222 raw_svector_ostream OS(Msg);
4223 OS << "instruction requires:";
4224 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
4225 if (MissingFeatures[i])
4226 OS << ' ' << getSubtargetFeatureName(i);
4227 }
4228 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
4229 }
4230
checkTargetMatchPredicate(MCInst & Inst)4231 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
4232 unsigned Opc = Inst.getOpcode();
4233 const MCInstrDesc &MCID = MII.get(Opc);
4234 uint64_t TSFlags = MCID.TSFlags;
4235
4236 if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID))
4237 return Match_Unsupported;
4238 if (ForcedNoFlag == !(TSFlags & X86II::EVEX_NF) && !X86::isCFCMOVCC(Opc))
4239 return Match_Unsupported;
4240
4241 switch (ForcedOpcodePrefix) {
4242 case OpcodePrefix_Default:
4243 break;
4244 case OpcodePrefix_REX:
4245 case OpcodePrefix_REX2:
4246 if (TSFlags & X86II::EncodingMask)
4247 return Match_Unsupported;
4248 break;
4249 case OpcodePrefix_VEX:
4250 case OpcodePrefix_VEX2:
4251 case OpcodePrefix_VEX3:
4252 if ((TSFlags & X86II::EncodingMask) != X86II::VEX)
4253 return Match_Unsupported;
4254 break;
4255 case OpcodePrefix_EVEX:
4256 if (is64BitMode() && (TSFlags & X86II::EncodingMask) != X86II::EVEX &&
4257 !X86::isCMP(Opc) && !X86::isTEST(Opc))
4258 return Match_Unsupported;
4259 if (!is64BitMode() && (TSFlags & X86II::EncodingMask) != X86II::EVEX)
4260 return Match_Unsupported;
4261 break;
4262 }
4263
4264 if ((TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitVEXPrefix &&
4265 (ForcedOpcodePrefix != OpcodePrefix_VEX &&
4266 ForcedOpcodePrefix != OpcodePrefix_VEX2 &&
4267 ForcedOpcodePrefix != OpcodePrefix_VEX3))
4268 return Match_Unsupported;
4269
4270 return Match_Success;
4271 }
4272
matchAndEmitATTInstruction(SMLoc IDLoc,unsigned & Opcode,MCInst & Inst,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4273 bool X86AsmParser::matchAndEmitATTInstruction(
4274 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4275 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4276 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4277 SMRange EmptyRange = std::nullopt;
4278 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4279 // when matching the instruction.
4280 if (ForcedDataPrefix == X86::Is32Bit)
4281 SwitchMode(X86::Is32Bit);
4282 // First, try a direct match.
4283 FeatureBitset MissingFeatures;
4284 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4285 MissingFeatures, MatchingInlineAsm,
4286 isParsingIntelSyntax());
4287 if (ForcedDataPrefix == X86::Is32Bit) {
4288 SwitchMode(X86::Is16Bit);
4289 ForcedDataPrefix = 0;
4290 }
4291 switch (OriginalError) {
4292 default: llvm_unreachable("Unexpected match result!");
4293 case Match_Success:
4294 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4295 return true;
4296 // Some instructions need post-processing to, for example, tweak which
4297 // encoding is selected. Loop on it while changes happen so the
4298 // individual transformations can chain off each other.
4299 if (!MatchingInlineAsm)
4300 while (processInstruction(Inst, Operands))
4301 ;
4302
4303 Inst.setLoc(IDLoc);
4304 if (!MatchingInlineAsm)
4305 emitInstruction(Inst, Operands, Out);
4306 Opcode = Inst.getOpcode();
4307 return false;
4308 case Match_InvalidImmUnsignedi4: {
4309 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4310 if (ErrorLoc == SMLoc())
4311 ErrorLoc = IDLoc;
4312 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4313 EmptyRange, MatchingInlineAsm);
4314 }
4315 case Match_MissingFeature:
4316 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4317 case Match_InvalidOperand:
4318 case Match_MnemonicFail:
4319 case Match_Unsupported:
4320 break;
4321 }
4322 if (Op.getToken().empty()) {
4323 Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4324 MatchingInlineAsm);
4325 return true;
4326 }
4327
4328 // FIXME: Ideally, we would only attempt suffix matches for things which are
4329 // valid prefixes, and we could just infer the right unambiguous
4330 // type. However, that requires substantially more matcher support than the
4331 // following hack.
4332
4333 // Change the operand to point to a temporary token.
4334 StringRef Base = Op.getToken();
4335 SmallString<16> Tmp;
4336 Tmp += Base;
4337 Tmp += ' ';
4338 Op.setTokenValue(Tmp);
4339
4340 // If this instruction starts with an 'f', then it is a floating point stack
4341 // instruction. These come in up to three forms for 32-bit, 64-bit, and
4342 // 80-bit floating point, which use the suffixes s,l,t respectively.
4343 //
4344 // Otherwise, we assume that this may be an integer instruction, which comes
4345 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4346 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4347 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4348 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4349
4350 // Check for the various suffix matches.
4351 uint64_t ErrorInfoIgnore;
4352 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4353 unsigned Match[4];
4354
4355 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4356 // So we should make sure the suffix matcher only works for memory variant
4357 // that has the same size with the suffix.
4358 // FIXME: This flag is a workaround for legacy instructions that didn't
4359 // declare non suffix variant assembly.
4360 bool HasVectorReg = false;
4361 X86Operand *MemOp = nullptr;
4362 for (const auto &Op : Operands) {
4363 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4364 if (X86Op->isVectorReg())
4365 HasVectorReg = true;
4366 else if (X86Op->isMem()) {
4367 MemOp = X86Op;
4368 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4369 // Have we found an unqualified memory operand,
4370 // break. IA allows only one memory operand.
4371 break;
4372 }
4373 }
4374
4375 for (unsigned I = 0, E = std::size(Match); I != E; ++I) {
4376 Tmp.back() = Suffixes[I];
4377 if (MemOp && HasVectorReg)
4378 MemOp->Mem.Size = MemSize[I];
4379 Match[I] = Match_MnemonicFail;
4380 if (MemOp || !HasVectorReg) {
4381 Match[I] =
4382 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4383 MatchingInlineAsm, isParsingIntelSyntax());
4384 // If this returned as a missing feature failure, remember that.
4385 if (Match[I] == Match_MissingFeature)
4386 ErrorInfoMissingFeatures = MissingFeatures;
4387 }
4388 }
4389
4390 // Restore the old token.
4391 Op.setTokenValue(Base);
4392
4393 // If exactly one matched, then we treat that as a successful match (and the
4394 // instruction will already have been filled in correctly, since the failing
4395 // matches won't have modified it).
4396 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4397 if (NumSuccessfulMatches == 1) {
4398 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4399 return true;
4400 // Some instructions need post-processing to, for example, tweak which
4401 // encoding is selected. Loop on it while changes happen so the
4402 // individual transformations can chain off each other.
4403 if (!MatchingInlineAsm)
4404 while (processInstruction(Inst, Operands))
4405 ;
4406
4407 Inst.setLoc(IDLoc);
4408 if (!MatchingInlineAsm)
4409 emitInstruction(Inst, Operands, Out);
4410 Opcode = Inst.getOpcode();
4411 return false;
4412 }
4413
4414 // Otherwise, the match failed, try to produce a decent error message.
4415
4416 // If we had multiple suffix matches, then identify this as an ambiguous
4417 // match.
4418 if (NumSuccessfulMatches > 1) {
4419 char MatchChars[4];
4420 unsigned NumMatches = 0;
4421 for (unsigned I = 0, E = std::size(Match); I != E; ++I)
4422 if (Match[I] == Match_Success)
4423 MatchChars[NumMatches++] = Suffixes[I];
4424
4425 SmallString<126> Msg;
4426 raw_svector_ostream OS(Msg);
4427 OS << "ambiguous instructions require an explicit suffix (could be ";
4428 for (unsigned i = 0; i != NumMatches; ++i) {
4429 if (i != 0)
4430 OS << ", ";
4431 if (i + 1 == NumMatches)
4432 OS << "or ";
4433 OS << "'" << Base << MatchChars[i] << "'";
4434 }
4435 OS << ")";
4436 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4437 return true;
4438 }
4439
4440 // Okay, we know that none of the variants matched successfully.
4441
4442 // If all of the instructions reported an invalid mnemonic, then the original
4443 // mnemonic was invalid.
4444 if (llvm::count(Match, Match_MnemonicFail) == 4) {
4445 if (OriginalError == Match_MnemonicFail)
4446 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4447 Op.getLocRange(), MatchingInlineAsm);
4448
4449 if (OriginalError == Match_Unsupported)
4450 return Error(IDLoc, "unsupported instruction", EmptyRange,
4451 MatchingInlineAsm);
4452
4453 assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4454 // Recover location info for the operand if we know which was the problem.
4455 if (ErrorInfo != ~0ULL) {
4456 if (ErrorInfo >= Operands.size())
4457 return Error(IDLoc, "too few operands for instruction", EmptyRange,
4458 MatchingInlineAsm);
4459
4460 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4461 if (Operand.getStartLoc().isValid()) {
4462 SMRange OperandRange = Operand.getLocRange();
4463 return Error(Operand.getStartLoc(), "invalid operand for instruction",
4464 OperandRange, MatchingInlineAsm);
4465 }
4466 }
4467
4468 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4469 MatchingInlineAsm);
4470 }
4471
4472 // If one instruction matched as unsupported, report this as unsupported.
4473 if (llvm::count(Match, Match_Unsupported) == 1) {
4474 return Error(IDLoc, "unsupported instruction", EmptyRange,
4475 MatchingInlineAsm);
4476 }
4477
4478 // If one instruction matched with a missing feature, report this as a
4479 // missing feature.
4480 if (llvm::count(Match, Match_MissingFeature) == 1) {
4481 ErrorInfo = Match_MissingFeature;
4482 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4483 MatchingInlineAsm);
4484 }
4485
4486 // If one instruction matched with an invalid operand, report this as an
4487 // operand failure.
4488 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4489 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4490 MatchingInlineAsm);
4491 }
4492
4493 // If all of these were an outright failure, report it in a useless way.
4494 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4495 EmptyRange, MatchingInlineAsm);
4496 return true;
4497 }
4498
matchAndEmitIntelInstruction(SMLoc IDLoc,unsigned & Opcode,MCInst & Inst,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4499 bool X86AsmParser::matchAndEmitIntelInstruction(
4500 SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands,
4501 MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) {
4502 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4503 SMRange EmptyRange = std::nullopt;
4504 // Find one unsized memory operand, if present.
4505 X86Operand *UnsizedMemOp = nullptr;
4506 for (const auto &Op : Operands) {
4507 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4508 if (X86Op->isMemUnsized()) {
4509 UnsizedMemOp = X86Op;
4510 // Have we found an unqualified memory operand,
4511 // break. IA allows only one memory operand.
4512 break;
4513 }
4514 }
4515
4516 // Allow some instructions to have implicitly pointer-sized operands. This is
4517 // compatible with gas.
4518 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4519 if (UnsizedMemOp) {
4520 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push", "pop"};
4521 for (const char *Instr : PtrSizedInstrs) {
4522 if (Mnemonic == Instr) {
4523 UnsizedMemOp->Mem.Size = getPointerWidth();
4524 break;
4525 }
4526 }
4527 }
4528
4529 SmallVector<unsigned, 8> Match;
4530 FeatureBitset ErrorInfoMissingFeatures;
4531 FeatureBitset MissingFeatures;
4532 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4533
4534 // If unsized push has immediate operand we should default the default pointer
4535 // size for the size.
4536 if (Mnemonic == "push" && Operands.size() == 2) {
4537 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4538 if (X86Op->isImm()) {
4539 // If it's not a constant fall through and let remainder take care of it.
4540 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4541 unsigned Size = getPointerWidth();
4542 if (CE &&
4543 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4544 SmallString<16> Tmp;
4545 Tmp += Base;
4546 Tmp += (is64BitMode())
4547 ? "q"
4548 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4549 Op.setTokenValue(Tmp);
4550 // Do match in ATT mode to allow explicit suffix usage.
4551 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4552 MissingFeatures, MatchingInlineAsm,
4553 false /*isParsingIntelSyntax()*/));
4554 Op.setTokenValue(Base);
4555 }
4556 }
4557 }
4558
4559 // If an unsized memory operand is present, try to match with each memory
4560 // operand size. In Intel assembly, the size is not part of the instruction
4561 // mnemonic.
4562 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4563 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4564 for (unsigned Size : MopSizes) {
4565 UnsizedMemOp->Mem.Size = Size;
4566 uint64_t ErrorInfoIgnore;
4567 unsigned LastOpcode = Inst.getOpcode();
4568 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4569 MissingFeatures, MatchingInlineAsm,
4570 isParsingIntelSyntax());
4571 if (Match.empty() || LastOpcode != Inst.getOpcode())
4572 Match.push_back(M);
4573
4574 // If this returned as a missing feature failure, remember that.
4575 if (Match.back() == Match_MissingFeature)
4576 ErrorInfoMissingFeatures = MissingFeatures;
4577 }
4578
4579 // Restore the size of the unsized memory operand if we modified it.
4580 UnsizedMemOp->Mem.Size = 0;
4581 }
4582
4583 // If we haven't matched anything yet, this is not a basic integer or FPU
4584 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4585 // matching with the unsized operand.
4586 if (Match.empty()) {
4587 Match.push_back(MatchInstruction(
4588 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4589 isParsingIntelSyntax()));
4590 // If this returned as a missing feature failure, remember that.
4591 if (Match.back() == Match_MissingFeature)
4592 ErrorInfoMissingFeatures = MissingFeatures;
4593 }
4594
4595 // Restore the size of the unsized memory operand if we modified it.
4596 if (UnsizedMemOp)
4597 UnsizedMemOp->Mem.Size = 0;
4598
4599 // If it's a bad mnemonic, all results will be the same.
4600 if (Match.back() == Match_MnemonicFail) {
4601 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4602 Op.getLocRange(), MatchingInlineAsm);
4603 }
4604
4605 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4606
4607 // If matching was ambiguous and we had size information from the frontend,
4608 // try again with that. This handles cases like "movxz eax, m8/m16".
4609 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4610 UnsizedMemOp->getMemFrontendSize()) {
4611 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4612 unsigned M = MatchInstruction(
4613 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4614 isParsingIntelSyntax());
4615 if (M == Match_Success)
4616 NumSuccessfulMatches = 1;
4617
4618 // Add a rewrite that encodes the size information we used from the
4619 // frontend.
4620 InstInfo->AsmRewrites->emplace_back(
4621 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4622 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4623 }
4624
4625 // If exactly one matched, then we treat that as a successful match (and the
4626 // instruction will already have been filled in correctly, since the failing
4627 // matches won't have modified it).
4628 if (NumSuccessfulMatches == 1) {
4629 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4630 return true;
4631 // Some instructions need post-processing to, for example, tweak which
4632 // encoding is selected. Loop on it while changes happen so the individual
4633 // transformations can chain off each other.
4634 if (!MatchingInlineAsm)
4635 while (processInstruction(Inst, Operands))
4636 ;
4637 Inst.setLoc(IDLoc);
4638 if (!MatchingInlineAsm)
4639 emitInstruction(Inst, Operands, Out);
4640 Opcode = Inst.getOpcode();
4641 return false;
4642 } else if (NumSuccessfulMatches > 1) {
4643 assert(UnsizedMemOp &&
4644 "multiple matches only possible with unsized memory operands");
4645 return Error(UnsizedMemOp->getStartLoc(),
4646 "ambiguous operand size for instruction '" + Mnemonic + "\'",
4647 UnsizedMemOp->getLocRange());
4648 }
4649
4650 // If one instruction matched as unsupported, report this as unsupported.
4651 if (llvm::count(Match, Match_Unsupported) == 1) {
4652 return Error(IDLoc, "unsupported instruction", EmptyRange,
4653 MatchingInlineAsm);
4654 }
4655
4656 // If one instruction matched with a missing feature, report this as a
4657 // missing feature.
4658 if (llvm::count(Match, Match_MissingFeature) == 1) {
4659 ErrorInfo = Match_MissingFeature;
4660 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4661 MatchingInlineAsm);
4662 }
4663
4664 // If one instruction matched with an invalid operand, report this as an
4665 // operand failure.
4666 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4667 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4668 MatchingInlineAsm);
4669 }
4670
4671 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
4672 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4673 if (ErrorLoc == SMLoc())
4674 ErrorLoc = IDLoc;
4675 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4676 EmptyRange, MatchingInlineAsm);
4677 }
4678
4679 // If all of these were an outright failure, report it in a useless way.
4680 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4681 MatchingInlineAsm);
4682 }
4683
omitRegisterFromClobberLists(MCRegister Reg)4684 bool X86AsmParser::omitRegisterFromClobberLists(MCRegister Reg) {
4685 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg);
4686 }
4687
ParseDirective(AsmToken DirectiveID)4688 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4689 MCAsmParser &Parser = getParser();
4690 StringRef IDVal = DirectiveID.getIdentifier();
4691 if (IDVal.starts_with(".arch"))
4692 return parseDirectiveArch();
4693 if (IDVal.starts_with(".code"))
4694 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4695 else if (IDVal.starts_with(".att_syntax")) {
4696 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4697 if (Parser.getTok().getString() == "prefix")
4698 Parser.Lex();
4699 else if (Parser.getTok().getString() == "noprefix")
4700 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4701 "supported: registers must have a "
4702 "'%' prefix in .att_syntax");
4703 }
4704 getParser().setAssemblerDialect(0);
4705 return false;
4706 } else if (IDVal.starts_with(".intel_syntax")) {
4707 getParser().setAssemblerDialect(1);
4708 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4709 if (Parser.getTok().getString() == "noprefix")
4710 Parser.Lex();
4711 else if (Parser.getTok().getString() == "prefix")
4712 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4713 "supported: registers must not have "
4714 "a '%' prefix in .intel_syntax");
4715 }
4716 return false;
4717 } else if (IDVal == ".nops")
4718 return parseDirectiveNops(DirectiveID.getLoc());
4719 else if (IDVal == ".even")
4720 return parseDirectiveEven(DirectiveID.getLoc());
4721 else if (IDVal == ".cv_fpo_proc")
4722 return parseDirectiveFPOProc(DirectiveID.getLoc());
4723 else if (IDVal == ".cv_fpo_setframe")
4724 return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4725 else if (IDVal == ".cv_fpo_pushreg")
4726 return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4727 else if (IDVal == ".cv_fpo_stackalloc")
4728 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4729 else if (IDVal == ".cv_fpo_stackalign")
4730 return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4731 else if (IDVal == ".cv_fpo_endprologue")
4732 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4733 else if (IDVal == ".cv_fpo_endproc")
4734 return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4735 else if (IDVal == ".seh_pushreg" ||
4736 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg")))
4737 return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4738 else if (IDVal == ".seh_setframe" ||
4739 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe")))
4740 return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4741 else if (IDVal == ".seh_savereg" ||
4742 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg")))
4743 return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4744 else if (IDVal == ".seh_savexmm" ||
4745 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128")))
4746 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4747 else if (IDVal == ".seh_pushframe" ||
4748 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe")))
4749 return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4750
4751 return true;
4752 }
4753
parseDirectiveArch()4754 bool X86AsmParser::parseDirectiveArch() {
4755 // Ignore .arch for now.
4756 getParser().parseStringToEndOfStatement();
4757 return false;
4758 }
4759
4760 /// parseDirectiveNops
4761 /// ::= .nops size[, control]
parseDirectiveNops(SMLoc L)4762 bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4763 int64_t NumBytes = 0, Control = 0;
4764 SMLoc NumBytesLoc, ControlLoc;
4765 const MCSubtargetInfo& STI = getSTI();
4766 NumBytesLoc = getTok().getLoc();
4767 if (getParser().checkForValidSection() ||
4768 getParser().parseAbsoluteExpression(NumBytes))
4769 return true;
4770
4771 if (parseOptionalToken(AsmToken::Comma)) {
4772 ControlLoc = getTok().getLoc();
4773 if (getParser().parseAbsoluteExpression(Control))
4774 return true;
4775 }
4776 if (getParser().parseEOL())
4777 return true;
4778
4779 if (NumBytes <= 0) {
4780 Error(NumBytesLoc, "'.nops' directive with non-positive size");
4781 return false;
4782 }
4783
4784 if (Control < 0) {
4785 Error(ControlLoc, "'.nops' directive with negative NOP size");
4786 return false;
4787 }
4788
4789 /// Emit nops
4790 getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
4791
4792 return false;
4793 }
4794
4795 /// parseDirectiveEven
4796 /// ::= .even
parseDirectiveEven(SMLoc L)4797 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4798 if (parseEOL())
4799 return false;
4800
4801 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4802 if (!Section) {
4803 getStreamer().initSections(false, getSTI());
4804 Section = getStreamer().getCurrentSectionOnly();
4805 }
4806 if (Section->useCodeAlign())
4807 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4808 else
4809 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4810 return false;
4811 }
4812
4813 /// ParseDirectiveCode
4814 /// ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)4815 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4816 MCAsmParser &Parser = getParser();
4817 Code16GCC = false;
4818 if (IDVal == ".code16") {
4819 Parser.Lex();
4820 if (!is16BitMode()) {
4821 SwitchMode(X86::Is16Bit);
4822 getTargetStreamer().emitCode16();
4823 }
4824 } else if (IDVal == ".code16gcc") {
4825 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4826 Parser.Lex();
4827 Code16GCC = true;
4828 if (!is16BitMode()) {
4829 SwitchMode(X86::Is16Bit);
4830 getTargetStreamer().emitCode16();
4831 }
4832 } else if (IDVal == ".code32") {
4833 Parser.Lex();
4834 if (!is32BitMode()) {
4835 SwitchMode(X86::Is32Bit);
4836 getTargetStreamer().emitCode32();
4837 }
4838 } else if (IDVal == ".code64") {
4839 Parser.Lex();
4840 if (!is64BitMode()) {
4841 SwitchMode(X86::Is64Bit);
4842 getTargetStreamer().emitCode64();
4843 }
4844 } else {
4845 Error(L, "unknown directive " + IDVal);
4846 return false;
4847 }
4848
4849 return false;
4850 }
4851
4852 // .cv_fpo_proc foo
parseDirectiveFPOProc(SMLoc L)4853 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4854 MCAsmParser &Parser = getParser();
4855 StringRef ProcName;
4856 int64_t ParamsSize;
4857 if (Parser.parseIdentifier(ProcName))
4858 return Parser.TokError("expected symbol name");
4859 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4860 return true;
4861 if (!isUIntN(32, ParamsSize))
4862 return Parser.TokError("parameters size out of range");
4863 if (parseEOL())
4864 return true;
4865 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4866 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4867 }
4868
4869 // .cv_fpo_setframe ebp
parseDirectiveFPOSetFrame(SMLoc L)4870 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4871 MCRegister Reg;
4872 SMLoc DummyLoc;
4873 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4874 return true;
4875 return getTargetStreamer().emitFPOSetFrame(Reg, L);
4876 }
4877
4878 // .cv_fpo_pushreg ebx
parseDirectiveFPOPushReg(SMLoc L)4879 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4880 MCRegister Reg;
4881 SMLoc DummyLoc;
4882 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4883 return true;
4884 return getTargetStreamer().emitFPOPushReg(Reg, L);
4885 }
4886
4887 // .cv_fpo_stackalloc 20
parseDirectiveFPOStackAlloc(SMLoc L)4888 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4889 MCAsmParser &Parser = getParser();
4890 int64_t Offset;
4891 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4892 return true;
4893 return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4894 }
4895
4896 // .cv_fpo_stackalign 8
parseDirectiveFPOStackAlign(SMLoc L)4897 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4898 MCAsmParser &Parser = getParser();
4899 int64_t Offset;
4900 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4901 return true;
4902 return getTargetStreamer().emitFPOStackAlign(Offset, L);
4903 }
4904
4905 // .cv_fpo_endprologue
parseDirectiveFPOEndPrologue(SMLoc L)4906 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4907 MCAsmParser &Parser = getParser();
4908 if (Parser.parseEOL())
4909 return true;
4910 return getTargetStreamer().emitFPOEndPrologue(L);
4911 }
4912
4913 // .cv_fpo_endproc
parseDirectiveFPOEndProc(SMLoc L)4914 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4915 MCAsmParser &Parser = getParser();
4916 if (Parser.parseEOL())
4917 return true;
4918 return getTargetStreamer().emitFPOEndProc(L);
4919 }
4920
parseSEHRegisterNumber(unsigned RegClassID,MCRegister & RegNo)4921 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4922 MCRegister &RegNo) {
4923 SMLoc startLoc = getLexer().getLoc();
4924 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4925
4926 // Try parsing the argument as a register first.
4927 if (getLexer().getTok().isNot(AsmToken::Integer)) {
4928 SMLoc endLoc;
4929 if (parseRegister(RegNo, startLoc, endLoc))
4930 return true;
4931
4932 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4933 return Error(startLoc,
4934 "register is not supported for use with this directive");
4935 }
4936 } else {
4937 // Otherwise, an integer number matching the encoding of the desired
4938 // register may appear.
4939 int64_t EncodedReg;
4940 if (getParser().parseAbsoluteExpression(EncodedReg))
4941 return true;
4942
4943 // The SEH register number is the same as the encoding register number. Map
4944 // from the encoding back to the LLVM register number.
4945 RegNo = MCRegister();
4946 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4947 if (MRI->getEncodingValue(Reg) == EncodedReg) {
4948 RegNo = Reg;
4949 break;
4950 }
4951 }
4952 if (!RegNo) {
4953 return Error(startLoc,
4954 "incorrect register number for use with this directive");
4955 }
4956 }
4957
4958 return false;
4959 }
4960
parseDirectiveSEHPushReg(SMLoc Loc)4961 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
4962 MCRegister Reg;
4963 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4964 return true;
4965
4966 if (getLexer().isNot(AsmToken::EndOfStatement))
4967 return TokError("expected end of directive");
4968
4969 getParser().Lex();
4970 getStreamer().emitWinCFIPushReg(Reg, Loc);
4971 return false;
4972 }
4973
parseDirectiveSEHSetFrame(SMLoc Loc)4974 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
4975 MCRegister Reg;
4976 int64_t Off;
4977 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4978 return true;
4979 if (getLexer().isNot(AsmToken::Comma))
4980 return TokError("you must specify a stack pointer offset");
4981
4982 getParser().Lex();
4983 if (getParser().parseAbsoluteExpression(Off))
4984 return true;
4985
4986 if (getLexer().isNot(AsmToken::EndOfStatement))
4987 return TokError("expected end of directive");
4988
4989 getParser().Lex();
4990 getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
4991 return false;
4992 }
4993
parseDirectiveSEHSaveReg(SMLoc Loc)4994 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
4995 MCRegister Reg;
4996 int64_t Off;
4997 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4998 return true;
4999 if (getLexer().isNot(AsmToken::Comma))
5000 return TokError("you must specify an offset on the stack");
5001
5002 getParser().Lex();
5003 if (getParser().parseAbsoluteExpression(Off))
5004 return true;
5005
5006 if (getLexer().isNot(AsmToken::EndOfStatement))
5007 return TokError("expected end of directive");
5008
5009 getParser().Lex();
5010 getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
5011 return false;
5012 }
5013
parseDirectiveSEHSaveXMM(SMLoc Loc)5014 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
5015 MCRegister Reg;
5016 int64_t Off;
5017 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
5018 return true;
5019 if (getLexer().isNot(AsmToken::Comma))
5020 return TokError("you must specify an offset on the stack");
5021
5022 getParser().Lex();
5023 if (getParser().parseAbsoluteExpression(Off))
5024 return true;
5025
5026 if (getLexer().isNot(AsmToken::EndOfStatement))
5027 return TokError("expected end of directive");
5028
5029 getParser().Lex();
5030 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
5031 return false;
5032 }
5033
parseDirectiveSEHPushFrame(SMLoc Loc)5034 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
5035 bool Code = false;
5036 StringRef CodeID;
5037 if (getLexer().is(AsmToken::At)) {
5038 SMLoc startLoc = getLexer().getLoc();
5039 getParser().Lex();
5040 if (!getParser().parseIdentifier(CodeID)) {
5041 if (CodeID != "code")
5042 return Error(startLoc, "expected @code");
5043 Code = true;
5044 }
5045 }
5046
5047 if (getLexer().isNot(AsmToken::EndOfStatement))
5048 return TokError("expected end of directive");
5049
5050 getParser().Lex();
5051 getStreamer().emitWinCFIPushFrame(Code, Loc);
5052 return false;
5053 }
5054
5055 // Force static initialization.
LLVMInitializeX86AsmParser()5056 extern "C" LLVM_C_ABI void LLVMInitializeX86AsmParser() {
5057 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
5058 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
5059 }
5060
5061 #define GET_MATCHER_IMPLEMENTATION
5062 #include "X86GenAsmMatcher.inc"
5063