xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains code to lower X86 MachineInstrs to their corresponding
10 // MCInst records.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/X86ATTInstPrinter.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86InstComments.h"
17 #include "MCTargetDesc/X86ShuffleDecode.h"
18 #include "MCTargetDesc/X86TargetStreamer.h"
19 #include "X86AsmPrinter.h"
20 #include "X86RegisterInfo.h"
21 #include "X86ShuffleDecodeConstantPool.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/iterator_range.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/Mangler.h"
34 #include "llvm/MC/MCAsmInfo.h"
35 #include "llvm/MC/MCCodeEmitter.h"
36 #include "llvm/MC/MCContext.h"
37 #include "llvm/MC/MCExpr.h"
38 #include "llvm/MC/MCFixup.h"
39 #include "llvm/MC/MCInst.h"
40 #include "llvm/MC/MCInstBuilder.h"
41 #include "llvm/MC/MCSection.h"
42 #include "llvm/MC/MCSectionELF.h"
43 #include "llvm/MC/MCStreamer.h"
44 #include "llvm/MC/MCSymbol.h"
45 #include "llvm/MC/MCSymbolELF.h"
46 #include "llvm/MC/TargetRegistry.h"
47 #include "llvm/Target/TargetLoweringObjectFile.h"
48 #include "llvm/Target/TargetMachine.h"
49 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
50 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
51 
52 using namespace llvm;
53 
54 namespace {
55 
56 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
57 class X86MCInstLower {
58   MCContext &Ctx;
59   const MachineFunction &MF;
60   const TargetMachine &TM;
61   const MCAsmInfo &MAI;
62   X86AsmPrinter &AsmPrinter;
63 
64 public:
65   X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
66 
67   Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
68                                           const MachineOperand &MO) const;
69   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
70 
71   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
72   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
73 
74 private:
75   MachineModuleInfoMachO &getMachOMMI() const;
76 };
77 
78 } // end anonymous namespace
79 
80 /// A RAII helper which defines a region of instructions which can't have
81 /// padding added between them for correctness.
82 struct NoAutoPaddingScope {
83   MCStreamer &OS;
84   const bool OldAllowAutoPadding;
85   NoAutoPaddingScope(MCStreamer &OS)
86       : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
87     changeAndComment(false);
88   }
89   ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
90   void changeAndComment(bool b) {
91     if (b == OS.getAllowAutoPadding())
92       return;
93     OS.setAllowAutoPadding(b);
94     if (b)
95       OS.emitRawComment("autopadding");
96     else
97       OS.emitRawComment("noautopadding");
98   }
99 };
100 
101 // Emit a minimal sequence of nops spanning NumBytes bytes.
102 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
103                         const X86Subtarget *Subtarget);
104 
105 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
106                                                  const MCSubtargetInfo &STI,
107                                                  MCCodeEmitter *CodeEmitter) {
108   if (InShadow) {
109     SmallString<256> Code;
110     SmallVector<MCFixup, 4> Fixups;
111     raw_svector_ostream VecOS(Code);
112     CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
113     CurrentShadowSize += Code.size();
114     if (CurrentShadowSize >= RequiredShadowSize)
115       InShadow = false; // The shadow is big enough. Stop counting.
116   }
117 }
118 
119 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
120     MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
121   if (InShadow && CurrentShadowSize < RequiredShadowSize) {
122     InShadow = false;
123     emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
124                 &MF->getSubtarget<X86Subtarget>());
125   }
126 }
127 
128 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
129   OutStreamer->emitInstruction(Inst, getSubtargetInfo());
130   SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
131 }
132 
133 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
134                                X86AsmPrinter &asmprinter)
135     : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
136       AsmPrinter(asmprinter) {}
137 
138 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
139   return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
140 }
141 
142 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
143 /// operand to an MCSymbol.
144 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
145   const Triple &TT = TM.getTargetTriple();
146   if (MO.isGlobal() && TT.isOSBinFormatELF())
147     return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
148 
149   const DataLayout &DL = MF.getDataLayout();
150   assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
151          "Isn't a symbol reference");
152 
153   MCSymbol *Sym = nullptr;
154   SmallString<128> Name;
155   StringRef Suffix;
156 
157   switch (MO.getTargetFlags()) {
158   case X86II::MO_DLLIMPORT:
159     // Handle dllimport linkage.
160     Name += "__imp_";
161     break;
162   case X86II::MO_COFFSTUB:
163     Name += ".refptr.";
164     break;
165   case X86II::MO_DARWIN_NONLAZY:
166   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
167     Suffix = "$non_lazy_ptr";
168     break;
169   }
170 
171   if (!Suffix.empty())
172     Name += DL.getPrivateGlobalPrefix();
173 
174   if (MO.isGlobal()) {
175     const GlobalValue *GV = MO.getGlobal();
176     AsmPrinter.getNameWithPrefix(Name, GV);
177   } else if (MO.isSymbol()) {
178     Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
179   } else if (MO.isMBB()) {
180     assert(Suffix.empty());
181     Sym = MO.getMBB()->getSymbol();
182   }
183 
184   Name += Suffix;
185   if (!Sym)
186     Sym = Ctx.getOrCreateSymbol(Name);
187 
188   // If the target flags on the operand changes the name of the symbol, do that
189   // before we return the symbol.
190   switch (MO.getTargetFlags()) {
191   default:
192     break;
193   case X86II::MO_COFFSTUB: {
194     MachineModuleInfoCOFF &MMICOFF =
195         MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
196     MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
197     if (!StubSym.getPointer()) {
198       assert(MO.isGlobal() && "Extern symbol not handled yet");
199       StubSym = MachineModuleInfoImpl::StubValueTy(
200           AsmPrinter.getSymbol(MO.getGlobal()), true);
201     }
202     break;
203   }
204   case X86II::MO_DARWIN_NONLAZY:
205   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
206     MachineModuleInfoImpl::StubValueTy &StubSym =
207         getMachOMMI().getGVStubEntry(Sym);
208     if (!StubSym.getPointer()) {
209       assert(MO.isGlobal() && "Extern symbol not handled yet");
210       StubSym = MachineModuleInfoImpl::StubValueTy(
211           AsmPrinter.getSymbol(MO.getGlobal()),
212           !MO.getGlobal()->hasInternalLinkage());
213     }
214     break;
215   }
216   }
217 
218   return Sym;
219 }
220 
221 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
222                                              MCSymbol *Sym) const {
223   // FIXME: We would like an efficient form for this, so we don't have to do a
224   // lot of extra uniquing.
225   const MCExpr *Expr = nullptr;
226   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
227 
228   switch (MO.getTargetFlags()) {
229   default:
230     llvm_unreachable("Unknown target flag on GV operand");
231   case X86II::MO_NO_FLAG: // No flag.
232   // These affect the name of the symbol, not any suffix.
233   case X86II::MO_DARWIN_NONLAZY:
234   case X86II::MO_DLLIMPORT:
235   case X86II::MO_COFFSTUB:
236     break;
237 
238   case X86II::MO_TLVP:
239     RefKind = MCSymbolRefExpr::VK_TLVP;
240     break;
241   case X86II::MO_TLVP_PIC_BASE:
242     Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
243     // Subtract the pic base.
244     Expr = MCBinaryExpr::createSub(
245         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
246     break;
247   case X86II::MO_SECREL:
248     RefKind = MCSymbolRefExpr::VK_SECREL;
249     break;
250   case X86II::MO_TLSGD:
251     RefKind = MCSymbolRefExpr::VK_TLSGD;
252     break;
253   case X86II::MO_TLSLD:
254     RefKind = MCSymbolRefExpr::VK_TLSLD;
255     break;
256   case X86II::MO_TLSLDM:
257     RefKind = MCSymbolRefExpr::VK_TLSLDM;
258     break;
259   case X86II::MO_GOTTPOFF:
260     RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
261     break;
262   case X86II::MO_INDNTPOFF:
263     RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
264     break;
265   case X86II::MO_TPOFF:
266     RefKind = MCSymbolRefExpr::VK_TPOFF;
267     break;
268   case X86II::MO_DTPOFF:
269     RefKind = MCSymbolRefExpr::VK_DTPOFF;
270     break;
271   case X86II::MO_NTPOFF:
272     RefKind = MCSymbolRefExpr::VK_NTPOFF;
273     break;
274   case X86II::MO_GOTNTPOFF:
275     RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
276     break;
277   case X86II::MO_GOTPCREL:
278     RefKind = MCSymbolRefExpr::VK_GOTPCREL;
279     break;
280   case X86II::MO_GOTPCREL_NORELAX:
281     RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX;
282     break;
283   case X86II::MO_GOT:
284     RefKind = MCSymbolRefExpr::VK_GOT;
285     break;
286   case X86II::MO_GOTOFF:
287     RefKind = MCSymbolRefExpr::VK_GOTOFF;
288     break;
289   case X86II::MO_PLT:
290     RefKind = MCSymbolRefExpr::VK_PLT;
291     break;
292   case X86II::MO_ABS8:
293     RefKind = MCSymbolRefExpr::VK_X86_ABS8;
294     break;
295   case X86II::MO_PIC_BASE_OFFSET:
296   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
297     Expr = MCSymbolRefExpr::create(Sym, Ctx);
298     // Subtract the pic base.
299     Expr = MCBinaryExpr::createSub(
300         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
301     if (MO.isJTI()) {
302       assert(MAI.doesSetDirectiveSuppressReloc());
303       // If .set directive is supported, use it to reduce the number of
304       // relocations the assembler will generate for differences between
305       // local labels. This is only safe when the symbols are in the same
306       // section so we are restricting it to jumptable references.
307       MCSymbol *Label = Ctx.createTempSymbol();
308       AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
309       Expr = MCSymbolRefExpr::create(Label, Ctx);
310     }
311     break;
312   }
313 
314   if (!Expr)
315     Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
316 
317   if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
318     Expr = MCBinaryExpr::createAdd(
319         Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
320   return MCOperand::createExpr(Expr);
321 }
322 
323 /// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
324 /// a short fixed-register form.
325 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
326   unsigned ImmOp = Inst.getNumOperands() - 1;
327   assert(Inst.getOperand(0).isReg() &&
328          (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
329          ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
330            Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
331           Inst.getNumOperands() == 2) &&
332          "Unexpected instruction!");
333 
334   // Check whether the destination register can be fixed.
335   unsigned Reg = Inst.getOperand(0).getReg();
336   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
337     return;
338 
339   // If so, rewrite the instruction.
340   MCOperand Saved = Inst.getOperand(ImmOp);
341   Inst = MCInst();
342   Inst.setOpcode(Opcode);
343   Inst.addOperand(Saved);
344 }
345 
346 /// If a movsx instruction has a shorter encoding for the used register
347 /// simplify the instruction to use it instead.
348 static void SimplifyMOVSX(MCInst &Inst) {
349   unsigned NewOpcode = 0;
350   unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
351   switch (Inst.getOpcode()) {
352   default:
353     llvm_unreachable("Unexpected instruction!");
354   case X86::MOVSX16rr8: // movsbw %al, %ax   --> cbtw
355     if (Op0 == X86::AX && Op1 == X86::AL)
356       NewOpcode = X86::CBW;
357     break;
358   case X86::MOVSX32rr16: // movswl %ax, %eax  --> cwtl
359     if (Op0 == X86::EAX && Op1 == X86::AX)
360       NewOpcode = X86::CWDE;
361     break;
362   case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
363     if (Op0 == X86::RAX && Op1 == X86::EAX)
364       NewOpcode = X86::CDQE;
365     break;
366   }
367 
368   if (NewOpcode != 0) {
369     Inst = MCInst();
370     Inst.setOpcode(NewOpcode);
371   }
372 }
373 
374 /// Simplify things like MOV32rm to MOV32o32a.
375 static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
376                                   unsigned Opcode) {
377   // Don't make these simplifications in 64-bit mode; other assemblers don't
378   // perform them because they make the code larger.
379   if (Printer.getSubtarget().is64Bit())
380     return;
381 
382   bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
383   unsigned AddrBase = IsStore;
384   unsigned RegOp = IsStore ? 0 : 5;
385   unsigned AddrOp = AddrBase + 3;
386   assert(
387       Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
388       Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
389       Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
390       Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
391       Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
392       (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
393       "Unexpected instruction!");
394 
395   // Check whether the destination register can be fixed.
396   unsigned Reg = Inst.getOperand(RegOp).getReg();
397   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
398     return;
399 
400   // Check whether this is an absolute address.
401   // FIXME: We know TLVP symbol refs aren't, but there should be a better way
402   // to do this here.
403   bool Absolute = true;
404   if (Inst.getOperand(AddrOp).isExpr()) {
405     const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
406     if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
407       if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
408         Absolute = false;
409   }
410 
411   if (Absolute &&
412       (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
413        Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
414        Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
415     return;
416 
417   // If so, rewrite the instruction.
418   MCOperand Saved = Inst.getOperand(AddrOp);
419   MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
420   Inst = MCInst();
421   Inst.setOpcode(Opcode);
422   Inst.addOperand(Saved);
423   Inst.addOperand(Seg);
424 }
425 
426 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
427   return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
428 }
429 
430 Optional<MCOperand>
431 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
432                                     const MachineOperand &MO) const {
433   switch (MO.getType()) {
434   default:
435     MI->print(errs());
436     llvm_unreachable("unknown operand type");
437   case MachineOperand::MO_Register:
438     // Ignore all implicit register operands.
439     if (MO.isImplicit())
440       return None;
441     return MCOperand::createReg(MO.getReg());
442   case MachineOperand::MO_Immediate:
443     return MCOperand::createImm(MO.getImm());
444   case MachineOperand::MO_MachineBasicBlock:
445   case MachineOperand::MO_GlobalAddress:
446   case MachineOperand::MO_ExternalSymbol:
447     return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
448   case MachineOperand::MO_MCSymbol:
449     return LowerSymbolOperand(MO, MO.getMCSymbol());
450   case MachineOperand::MO_JumpTableIndex:
451     return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
452   case MachineOperand::MO_ConstantPoolIndex:
453     return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
454   case MachineOperand::MO_BlockAddress:
455     return LowerSymbolOperand(
456         MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
457   case MachineOperand::MO_RegisterMask:
458     // Ignore call clobbers.
459     return None;
460   }
461 }
462 
463 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
464 // information.
465 static unsigned convertTailJumpOpcode(unsigned Opcode) {
466   switch (Opcode) {
467   case X86::TAILJMPr:
468     Opcode = X86::JMP32r;
469     break;
470   case X86::TAILJMPm:
471     Opcode = X86::JMP32m;
472     break;
473   case X86::TAILJMPr64:
474     Opcode = X86::JMP64r;
475     break;
476   case X86::TAILJMPm64:
477     Opcode = X86::JMP64m;
478     break;
479   case X86::TAILJMPr64_REX:
480     Opcode = X86::JMP64r_REX;
481     break;
482   case X86::TAILJMPm64_REX:
483     Opcode = X86::JMP64m_REX;
484     break;
485   case X86::TAILJMPd:
486   case X86::TAILJMPd64:
487     Opcode = X86::JMP_1;
488     break;
489   case X86::TAILJMPd_CC:
490   case X86::TAILJMPd64_CC:
491     Opcode = X86::JCC_1;
492     break;
493   }
494 
495   return Opcode;
496 }
497 
498 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
499   OutMI.setOpcode(MI->getOpcode());
500 
501   for (const MachineOperand &MO : MI->operands())
502     if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
503       OutMI.addOperand(MaybeMCOp.getValue());
504 
505   // Handle a few special cases to eliminate operand modifiers.
506   switch (OutMI.getOpcode()) {
507   case X86::LEA64_32r:
508   case X86::LEA64r:
509   case X86::LEA16r:
510   case X86::LEA32r:
511     // LEA should have a segment register, but it must be empty.
512     assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
513            "Unexpected # of LEA operands");
514     assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
515            "LEA has segment specified!");
516     break;
517 
518   case X86::MULX32Hrr:
519   case X86::MULX32Hrm:
520   case X86::MULX64Hrr:
521   case X86::MULX64Hrm: {
522     // Turn into regular MULX by duplicating the destination.
523     unsigned NewOpc;
524     switch (OutMI.getOpcode()) {
525     default: llvm_unreachable("Invalid opcode");
526     case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
527     case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
528     case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
529     case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
530     }
531     OutMI.setOpcode(NewOpc);
532     // Duplicate the destination.
533     unsigned DestReg = OutMI.getOperand(0).getReg();
534     OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
535     break;
536   }
537 
538   // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
539   // if one of the registers is extended, but other isn't.
540   case X86::VMOVZPQILo2PQIrr:
541   case X86::VMOVAPDrr:
542   case X86::VMOVAPDYrr:
543   case X86::VMOVAPSrr:
544   case X86::VMOVAPSYrr:
545   case X86::VMOVDQArr:
546   case X86::VMOVDQAYrr:
547   case X86::VMOVDQUrr:
548   case X86::VMOVDQUYrr:
549   case X86::VMOVUPDrr:
550   case X86::VMOVUPDYrr:
551   case X86::VMOVUPSrr:
552   case X86::VMOVUPSYrr: {
553     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
554         X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
555       unsigned NewOpc;
556       switch (OutMI.getOpcode()) {
557       default: llvm_unreachable("Invalid opcode");
558       case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
559       case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
560       case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
561       case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
562       case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
563       case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
564       case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
565       case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
566       case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
567       case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
568       case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
569       case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
570       case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
571       }
572       OutMI.setOpcode(NewOpc);
573     }
574     break;
575   }
576   case X86::VMOVSDrr:
577   case X86::VMOVSSrr: {
578     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
579         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
580       unsigned NewOpc;
581       switch (OutMI.getOpcode()) {
582       default: llvm_unreachable("Invalid opcode");
583       case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
584       case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
585       }
586       OutMI.setOpcode(NewOpc);
587     }
588     break;
589   }
590 
591   case X86::VPCMPBZ128rmi:  case X86::VPCMPBZ128rmik:
592   case X86::VPCMPBZ128rri:  case X86::VPCMPBZ128rrik:
593   case X86::VPCMPBZ256rmi:  case X86::VPCMPBZ256rmik:
594   case X86::VPCMPBZ256rri:  case X86::VPCMPBZ256rrik:
595   case X86::VPCMPBZrmi:     case X86::VPCMPBZrmik:
596   case X86::VPCMPBZrri:     case X86::VPCMPBZrrik:
597   case X86::VPCMPDZ128rmi:  case X86::VPCMPDZ128rmik:
598   case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
599   case X86::VPCMPDZ128rri:  case X86::VPCMPDZ128rrik:
600   case X86::VPCMPDZ256rmi:  case X86::VPCMPDZ256rmik:
601   case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
602   case X86::VPCMPDZ256rri:  case X86::VPCMPDZ256rrik:
603   case X86::VPCMPDZrmi:     case X86::VPCMPDZrmik:
604   case X86::VPCMPDZrmib:    case X86::VPCMPDZrmibk:
605   case X86::VPCMPDZrri:     case X86::VPCMPDZrrik:
606   case X86::VPCMPQZ128rmi:  case X86::VPCMPQZ128rmik:
607   case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
608   case X86::VPCMPQZ128rri:  case X86::VPCMPQZ128rrik:
609   case X86::VPCMPQZ256rmi:  case X86::VPCMPQZ256rmik:
610   case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
611   case X86::VPCMPQZ256rri:  case X86::VPCMPQZ256rrik:
612   case X86::VPCMPQZrmi:     case X86::VPCMPQZrmik:
613   case X86::VPCMPQZrmib:    case X86::VPCMPQZrmibk:
614   case X86::VPCMPQZrri:     case X86::VPCMPQZrrik:
615   case X86::VPCMPWZ128rmi:  case X86::VPCMPWZ128rmik:
616   case X86::VPCMPWZ128rri:  case X86::VPCMPWZ128rrik:
617   case X86::VPCMPWZ256rmi:  case X86::VPCMPWZ256rmik:
618   case X86::VPCMPWZ256rri:  case X86::VPCMPWZ256rrik:
619   case X86::VPCMPWZrmi:     case X86::VPCMPWZrmik:
620   case X86::VPCMPWZrri:     case X86::VPCMPWZrrik: {
621     // Turn immediate 0 into the VPCMPEQ instruction.
622     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
623       unsigned NewOpc;
624       switch (OutMI.getOpcode()) {
625       default: llvm_unreachable("Invalid opcode");
626       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPEQBZ128rm;   break;
627       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPEQBZ128rmk;  break;
628       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPEQBZ128rr;   break;
629       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPEQBZ128rrk;  break;
630       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPEQBZ256rm;   break;
631       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPEQBZ256rmk;  break;
632       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPEQBZ256rr;   break;
633       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPEQBZ256rrk;  break;
634       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPEQBZrm;      break;
635       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPEQBZrmk;     break;
636       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPEQBZrr;      break;
637       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPEQBZrrk;     break;
638       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPEQDZ128rm;   break;
639       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPEQDZ128rmb;  break;
640       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
641       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPEQDZ128rmk;  break;
642       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPEQDZ128rr;   break;
643       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPEQDZ128rrk;  break;
644       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPEQDZ256rm;   break;
645       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPEQDZ256rmb;  break;
646       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
647       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPEQDZ256rmk;  break;
648       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPEQDZ256rr;   break;
649       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPEQDZ256rrk;  break;
650       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPEQDZrm;      break;
651       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPEQDZrmb;     break;
652       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPEQDZrmbk;    break;
653       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPEQDZrmk;     break;
654       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPEQDZrr;      break;
655       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPEQDZrrk;     break;
656       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPEQQZ128rm;   break;
657       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPEQQZ128rmb;  break;
658       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
659       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPEQQZ128rmk;  break;
660       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPEQQZ128rr;   break;
661       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPEQQZ128rrk;  break;
662       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPEQQZ256rm;   break;
663       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPEQQZ256rmb;  break;
664       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
665       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPEQQZ256rmk;  break;
666       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPEQQZ256rr;   break;
667       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPEQQZ256rrk;  break;
668       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPEQQZrm;      break;
669       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPEQQZrmb;     break;
670       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPEQQZrmbk;    break;
671       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPEQQZrmk;     break;
672       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPEQQZrr;      break;
673       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPEQQZrrk;     break;
674       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPEQWZ128rm;   break;
675       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPEQWZ128rmk;  break;
676       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPEQWZ128rr;   break;
677       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPEQWZ128rrk;  break;
678       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPEQWZ256rm;   break;
679       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPEQWZ256rmk;  break;
680       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPEQWZ256rr;   break;
681       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPEQWZ256rrk;  break;
682       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPEQWZrm;      break;
683       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPEQWZrmk;     break;
684       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPEQWZrr;      break;
685       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPEQWZrrk;     break;
686       }
687 
688       OutMI.setOpcode(NewOpc);
689       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
690       break;
691     }
692 
693     // Turn immediate 6 into the VPCMPGT instruction.
694     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
695       unsigned NewOpc;
696       switch (OutMI.getOpcode()) {
697       default: llvm_unreachable("Invalid opcode");
698       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPGTBZ128rm;   break;
699       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPGTBZ128rmk;  break;
700       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPGTBZ128rr;   break;
701       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPGTBZ128rrk;  break;
702       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPGTBZ256rm;   break;
703       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPGTBZ256rmk;  break;
704       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPGTBZ256rr;   break;
705       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPGTBZ256rrk;  break;
706       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPGTBZrm;      break;
707       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPGTBZrmk;     break;
708       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPGTBZrr;      break;
709       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPGTBZrrk;     break;
710       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPGTDZ128rm;   break;
711       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPGTDZ128rmb;  break;
712       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
713       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPGTDZ128rmk;  break;
714       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPGTDZ128rr;   break;
715       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPGTDZ128rrk;  break;
716       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPGTDZ256rm;   break;
717       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPGTDZ256rmb;  break;
718       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
719       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPGTDZ256rmk;  break;
720       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPGTDZ256rr;   break;
721       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPGTDZ256rrk;  break;
722       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPGTDZrm;      break;
723       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPGTDZrmb;     break;
724       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPGTDZrmbk;    break;
725       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPGTDZrmk;     break;
726       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPGTDZrr;      break;
727       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPGTDZrrk;     break;
728       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPGTQZ128rm;   break;
729       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPGTQZ128rmb;  break;
730       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
731       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPGTQZ128rmk;  break;
732       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPGTQZ128rr;   break;
733       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPGTQZ128rrk;  break;
734       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPGTQZ256rm;   break;
735       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPGTQZ256rmb;  break;
736       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
737       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPGTQZ256rmk;  break;
738       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPGTQZ256rr;   break;
739       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPGTQZ256rrk;  break;
740       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPGTQZrm;      break;
741       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPGTQZrmb;     break;
742       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPGTQZrmbk;    break;
743       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPGTQZrmk;     break;
744       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPGTQZrr;      break;
745       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPGTQZrrk;     break;
746       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPGTWZ128rm;   break;
747       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPGTWZ128rmk;  break;
748       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPGTWZ128rr;   break;
749       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPGTWZ128rrk;  break;
750       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPGTWZ256rm;   break;
751       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPGTWZ256rmk;  break;
752       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPGTWZ256rr;   break;
753       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPGTWZ256rrk;  break;
754       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPGTWZrm;      break;
755       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPGTWZrmk;     break;
756       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPGTWZrr;      break;
757       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPGTWZrrk;     break;
758       }
759 
760       OutMI.setOpcode(NewOpc);
761       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
762       break;
763     }
764 
765     break;
766   }
767 
768   // CALL64r, CALL64pcrel32 - These instructions used to have
769   // register inputs modeled as normal uses instead of implicit uses.  As such,
770   // they we used to truncate off all but the first operand (the callee). This
771   // issue seems to have been fixed at some point. This assert verifies that.
772   case X86::CALL64r:
773   case X86::CALL64pcrel32:
774     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
775     break;
776 
777   case X86::EH_RETURN:
778   case X86::EH_RETURN64: {
779     OutMI = MCInst();
780     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
781     break;
782   }
783 
784   case X86::CLEANUPRET: {
785     // Replace CLEANUPRET with the appropriate RET.
786     OutMI = MCInst();
787     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
788     break;
789   }
790 
791   case X86::CATCHRET: {
792     // Replace CATCHRET with the appropriate RET.
793     const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
794     unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
795     OutMI = MCInst();
796     OutMI.setOpcode(getRetOpcode(Subtarget));
797     OutMI.addOperand(MCOperand::createReg(ReturnReg));
798     break;
799   }
800 
801   // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
802   // instruction.
803   case X86::TAILJMPr:
804   case X86::TAILJMPr64:
805   case X86::TAILJMPr64_REX:
806   case X86::TAILJMPd:
807   case X86::TAILJMPd64:
808     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
809     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
810     break;
811 
812   case X86::TAILJMPd_CC:
813   case X86::TAILJMPd64_CC:
814     assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
815     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
816     break;
817 
818   case X86::TAILJMPm:
819   case X86::TAILJMPm64:
820   case X86::TAILJMPm64_REX:
821     assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
822            "Unexpected number of operands!");
823     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
824     break;
825 
826   case X86::DEC16r:
827   case X86::DEC32r:
828   case X86::INC16r:
829   case X86::INC32r:
830     // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
831     if (!AsmPrinter.getSubtarget().is64Bit()) {
832       unsigned Opcode;
833       switch (OutMI.getOpcode()) {
834       default: llvm_unreachable("Invalid opcode");
835       case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
836       case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
837       case X86::INC16r: Opcode = X86::INC16r_alt; break;
838       case X86::INC32r: Opcode = X86::INC32r_alt; break;
839       }
840       OutMI.setOpcode(Opcode);
841     }
842     break;
843 
844   // We don't currently select the correct instruction form for instructions
845   // which have a short %eax, etc. form. Handle this by custom lowering, for
846   // now.
847   //
848   // Note, we are currently not handling the following instructions:
849   // MOV64ao8, MOV64o8a
850   // XCHG16ar, XCHG32ar, XCHG64ar
851   case X86::MOV8mr_NOREX:
852   case X86::MOV8mr:
853   case X86::MOV8rm_NOREX:
854   case X86::MOV8rm:
855   case X86::MOV16mr:
856   case X86::MOV16rm:
857   case X86::MOV32mr:
858   case X86::MOV32rm: {
859     unsigned NewOpc;
860     switch (OutMI.getOpcode()) {
861     default: llvm_unreachable("Invalid opcode");
862     case X86::MOV8mr_NOREX:
863     case X86::MOV8mr:  NewOpc = X86::MOV8o32a; break;
864     case X86::MOV8rm_NOREX:
865     case X86::MOV8rm:  NewOpc = X86::MOV8ao32; break;
866     case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
867     case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
868     case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
869     case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
870     }
871     SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
872     break;
873   }
874 
875   case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
876   case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
877   case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
878   case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
879   case X86::OR8ri:  case X86::OR16ri:  case X86::OR32ri:  case X86::OR64ri32:
880   case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
881   case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
882   case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
883   case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
884     unsigned NewOpc;
885     switch (OutMI.getOpcode()) {
886     default: llvm_unreachable("Invalid opcode");
887     case X86::ADC8ri:     NewOpc = X86::ADC8i8;    break;
888     case X86::ADC16ri:    NewOpc = X86::ADC16i16;  break;
889     case X86::ADC32ri:    NewOpc = X86::ADC32i32;  break;
890     case X86::ADC64ri32:  NewOpc = X86::ADC64i32;  break;
891     case X86::ADD8ri:     NewOpc = X86::ADD8i8;    break;
892     case X86::ADD16ri:    NewOpc = X86::ADD16i16;  break;
893     case X86::ADD32ri:    NewOpc = X86::ADD32i32;  break;
894     case X86::ADD64ri32:  NewOpc = X86::ADD64i32;  break;
895     case X86::AND8ri:     NewOpc = X86::AND8i8;    break;
896     case X86::AND16ri:    NewOpc = X86::AND16i16;  break;
897     case X86::AND32ri:    NewOpc = X86::AND32i32;  break;
898     case X86::AND64ri32:  NewOpc = X86::AND64i32;  break;
899     case X86::CMP8ri:     NewOpc = X86::CMP8i8;    break;
900     case X86::CMP16ri:    NewOpc = X86::CMP16i16;  break;
901     case X86::CMP32ri:    NewOpc = X86::CMP32i32;  break;
902     case X86::CMP64ri32:  NewOpc = X86::CMP64i32;  break;
903     case X86::OR8ri:      NewOpc = X86::OR8i8;     break;
904     case X86::OR16ri:     NewOpc = X86::OR16i16;   break;
905     case X86::OR32ri:     NewOpc = X86::OR32i32;   break;
906     case X86::OR64ri32:   NewOpc = X86::OR64i32;   break;
907     case X86::SBB8ri:     NewOpc = X86::SBB8i8;    break;
908     case X86::SBB16ri:    NewOpc = X86::SBB16i16;  break;
909     case X86::SBB32ri:    NewOpc = X86::SBB32i32;  break;
910     case X86::SBB64ri32:  NewOpc = X86::SBB64i32;  break;
911     case X86::SUB8ri:     NewOpc = X86::SUB8i8;    break;
912     case X86::SUB16ri:    NewOpc = X86::SUB16i16;  break;
913     case X86::SUB32ri:    NewOpc = X86::SUB32i32;  break;
914     case X86::SUB64ri32:  NewOpc = X86::SUB64i32;  break;
915     case X86::TEST8ri:    NewOpc = X86::TEST8i8;   break;
916     case X86::TEST16ri:   NewOpc = X86::TEST16i16; break;
917     case X86::TEST32ri:   NewOpc = X86::TEST32i32; break;
918     case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
919     case X86::XOR8ri:     NewOpc = X86::XOR8i8;    break;
920     case X86::XOR16ri:    NewOpc = X86::XOR16i16;  break;
921     case X86::XOR32ri:    NewOpc = X86::XOR32i32;  break;
922     case X86::XOR64ri32:  NewOpc = X86::XOR64i32;  break;
923     }
924     SimplifyShortImmForm(OutMI, NewOpc);
925     break;
926   }
927 
928   // Try to shrink some forms of movsx.
929   case X86::MOVSX16rr8:
930   case X86::MOVSX32rr16:
931   case X86::MOVSX64rr32:
932     SimplifyMOVSX(OutMI);
933     break;
934 
935   case X86::VCMPPDrri:
936   case X86::VCMPPDYrri:
937   case X86::VCMPPSrri:
938   case X86::VCMPPSYrri:
939   case X86::VCMPSDrr:
940   case X86::VCMPSSrr: {
941     // Swap the operands if it will enable a 2 byte VEX encoding.
942     // FIXME: Change the immediate to improve opportunities?
943     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
944         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
945       unsigned Imm = MI->getOperand(3).getImm() & 0x7;
946       switch (Imm) {
947       default: break;
948       case 0x00: // EQUAL
949       case 0x03: // UNORDERED
950       case 0x04: // NOT EQUAL
951       case 0x07: // ORDERED
952         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
953         break;
954       }
955     }
956     break;
957   }
958 
959   case X86::VMOVHLPSrr:
960   case X86::VUNPCKHPDrr:
961     // These are not truly commutable so hide them from the default case.
962     break;
963 
964   default: {
965     // If the instruction is a commutable arithmetic instruction we might be
966     // able to commute the operands to get a 2 byte VEX prefix.
967     uint64_t TSFlags = MI->getDesc().TSFlags;
968     if (MI->getDesc().isCommutable() &&
969         (TSFlags & X86II::EncodingMask) == X86II::VEX &&
970         (TSFlags & X86II::OpMapMask) == X86II::TB &&
971         (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
972         !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
973         OutMI.getNumOperands() == 3) {
974       if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
975           X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
976         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
977     }
978     break;
979   }
980   }
981 }
982 
983 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
984                                  const MachineInstr &MI) {
985   NoAutoPaddingScope NoPadScope(*OutStreamer);
986   bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
987                   MI.getOpcode() != X86::TLS_base_addr32;
988   bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
989                       MI.getOpcode() == X86::TLS_base_addr64;
990   MCContext &Ctx = OutStreamer->getContext();
991 
992   MCSymbolRefExpr::VariantKind SRVK;
993   switch (MI.getOpcode()) {
994   case X86::TLS_addr32:
995   case X86::TLS_addr64:
996   case X86::TLS_addrX32:
997     SRVK = MCSymbolRefExpr::VK_TLSGD;
998     break;
999   case X86::TLS_base_addr32:
1000     SRVK = MCSymbolRefExpr::VK_TLSLDM;
1001     break;
1002   case X86::TLS_base_addr64:
1003   case X86::TLS_base_addrX32:
1004     SRVK = MCSymbolRefExpr::VK_TLSLD;
1005     break;
1006   default:
1007     llvm_unreachable("unexpected opcode");
1008   }
1009 
1010   const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
1011       MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
1012 
1013   // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
1014   // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
1015   // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
1016   // only using GOT when GOTPCRELX is enabled.
1017   // TODO Delete the workaround when GOTPCRELX becomes commonplace.
1018   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
1019                 Ctx.getAsmInfo()->canRelaxRelocations();
1020 
1021   if (Is64Bits) {
1022     bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
1023     if (NeedsPadding && Is64BitsLP64)
1024       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1025     EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
1026                                 .addReg(X86::RDI)
1027                                 .addReg(X86::RIP)
1028                                 .addImm(1)
1029                                 .addReg(0)
1030                                 .addExpr(Sym)
1031                                 .addReg(0));
1032     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
1033     if (NeedsPadding) {
1034       if (!UseGot)
1035         EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1036       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1037       EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
1038     }
1039     if (UseGot) {
1040       const MCExpr *Expr = MCSymbolRefExpr::create(
1041           TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
1042       EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
1043                                   .addReg(X86::RIP)
1044                                   .addImm(1)
1045                                   .addReg(0)
1046                                   .addExpr(Expr)
1047                                   .addReg(0));
1048     } else {
1049       EmitAndCountInstruction(
1050           MCInstBuilder(X86::CALL64pcrel32)
1051               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1052                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1053     }
1054   } else {
1055     if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
1056       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1057                                   .addReg(X86::EAX)
1058                                   .addReg(0)
1059                                   .addImm(1)
1060                                   .addReg(X86::EBX)
1061                                   .addExpr(Sym)
1062                                   .addReg(0));
1063     } else {
1064       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1065                                   .addReg(X86::EAX)
1066                                   .addReg(X86::EBX)
1067                                   .addImm(1)
1068                                   .addReg(0)
1069                                   .addExpr(Sym)
1070                                   .addReg(0));
1071     }
1072 
1073     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
1074     if (UseGot) {
1075       const MCExpr *Expr =
1076           MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
1077       EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
1078                                   .addReg(X86::EBX)
1079                                   .addImm(1)
1080                                   .addReg(0)
1081                                   .addExpr(Expr)
1082                                   .addReg(0));
1083     } else {
1084       EmitAndCountInstruction(
1085           MCInstBuilder(X86::CALLpcrel32)
1086               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1087                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1088     }
1089   }
1090 }
1091 
1092 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
1093 /// bytes.  Return the size of nop emitted.
1094 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
1095                         const X86Subtarget *Subtarget) {
1096   // Determine the longest nop which can be efficiently decoded for the given
1097   // target cpu.  15-bytes is the longest single NOP instruction, but some
1098   // platforms can't decode the longest forms efficiently.
1099   unsigned MaxNopLength = 1;
1100   if (Subtarget->is64Bit()) {
1101     // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
1102     // IndexReg/BaseReg below need to be updated.
1103     if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
1104       MaxNopLength = 7;
1105     else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
1106       MaxNopLength = 15;
1107     else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
1108       MaxNopLength = 11;
1109     else
1110       MaxNopLength = 10;
1111   } if (Subtarget->is32Bit())
1112     MaxNopLength = 2;
1113 
1114   // Cap a single nop emission at the profitable value for the target
1115   NumBytes = std::min(NumBytes, MaxNopLength);
1116 
1117   unsigned NopSize;
1118   unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
1119   IndexReg = Displacement = SegmentReg = 0;
1120   BaseReg = X86::RAX;
1121   ScaleVal = 1;
1122   switch (NumBytes) {
1123   case 0:
1124     llvm_unreachable("Zero nops?");
1125     break;
1126   case 1:
1127     NopSize = 1;
1128     Opc = X86::NOOP;
1129     break;
1130   case 2:
1131     NopSize = 2;
1132     Opc = X86::XCHG16ar;
1133     break;
1134   case 3:
1135     NopSize = 3;
1136     Opc = X86::NOOPL;
1137     break;
1138   case 4:
1139     NopSize = 4;
1140     Opc = X86::NOOPL;
1141     Displacement = 8;
1142     break;
1143   case 5:
1144     NopSize = 5;
1145     Opc = X86::NOOPL;
1146     Displacement = 8;
1147     IndexReg = X86::RAX;
1148     break;
1149   case 6:
1150     NopSize = 6;
1151     Opc = X86::NOOPW;
1152     Displacement = 8;
1153     IndexReg = X86::RAX;
1154     break;
1155   case 7:
1156     NopSize = 7;
1157     Opc = X86::NOOPL;
1158     Displacement = 512;
1159     break;
1160   case 8:
1161     NopSize = 8;
1162     Opc = X86::NOOPL;
1163     Displacement = 512;
1164     IndexReg = X86::RAX;
1165     break;
1166   case 9:
1167     NopSize = 9;
1168     Opc = X86::NOOPW;
1169     Displacement = 512;
1170     IndexReg = X86::RAX;
1171     break;
1172   default:
1173     NopSize = 10;
1174     Opc = X86::NOOPW;
1175     Displacement = 512;
1176     IndexReg = X86::RAX;
1177     SegmentReg = X86::CS;
1178     break;
1179   }
1180 
1181   unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
1182   NopSize += NumPrefixes;
1183   for (unsigned i = 0; i != NumPrefixes; ++i)
1184     OS.emitBytes("\x66");
1185 
1186   switch (Opc) {
1187   default: llvm_unreachable("Unexpected opcode");
1188   case X86::NOOP:
1189     OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
1190     break;
1191   case X86::XCHG16ar:
1192     OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
1193                        *Subtarget);
1194     break;
1195   case X86::NOOPL:
1196   case X86::NOOPW:
1197     OS.emitInstruction(MCInstBuilder(Opc)
1198                            .addReg(BaseReg)
1199                            .addImm(ScaleVal)
1200                            .addReg(IndexReg)
1201                            .addImm(Displacement)
1202                            .addReg(SegmentReg),
1203                        *Subtarget);
1204     break;
1205   }
1206   assert(NopSize <= NumBytes && "We overemitted?");
1207   return NopSize;
1208 }
1209 
1210 /// Emit the optimal amount of multi-byte nops on X86.
1211 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
1212                         const X86Subtarget *Subtarget) {
1213   unsigned NopsToEmit = NumBytes;
1214   (void)NopsToEmit;
1215   while (NumBytes) {
1216     NumBytes -= emitNop(OS, NumBytes, Subtarget);
1217     assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
1218   }
1219 }
1220 
1221 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
1222                                     X86MCInstLower &MCIL) {
1223   assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
1224 
1225   NoAutoPaddingScope NoPadScope(*OutStreamer);
1226 
1227   StatepointOpers SOpers(&MI);
1228   if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
1229     emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
1230   } else {
1231     // Lower call target and choose correct opcode
1232     const MachineOperand &CallTarget = SOpers.getCallTarget();
1233     MCOperand CallTargetMCOp;
1234     unsigned CallOpcode;
1235     switch (CallTarget.getType()) {
1236     case MachineOperand::MO_GlobalAddress:
1237     case MachineOperand::MO_ExternalSymbol:
1238       CallTargetMCOp = MCIL.LowerSymbolOperand(
1239           CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
1240       CallOpcode = X86::CALL64pcrel32;
1241       // Currently, we only support relative addressing with statepoints.
1242       // Otherwise, we'll need a scratch register to hold the target
1243       // address.  You'll fail asserts during load & relocation if this
1244       // symbol is to far away. (TODO: support non-relative addressing)
1245       break;
1246     case MachineOperand::MO_Immediate:
1247       CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
1248       CallOpcode = X86::CALL64pcrel32;
1249       // Currently, we only support relative addressing with statepoints.
1250       // Otherwise, we'll need a scratch register to hold the target
1251       // immediate.  You'll fail asserts during load & relocation if this
1252       // address is to far away. (TODO: support non-relative addressing)
1253       break;
1254     case MachineOperand::MO_Register:
1255       // FIXME: Add retpoline support and remove this.
1256       if (Subtarget->useIndirectThunkCalls())
1257         report_fatal_error("Lowering register statepoints with thunks not "
1258                            "yet implemented.");
1259       CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
1260       CallOpcode = X86::CALL64r;
1261       break;
1262     default:
1263       llvm_unreachable("Unsupported operand type in statepoint call target");
1264       break;
1265     }
1266 
1267     // Emit call
1268     MCInst CallInst;
1269     CallInst.setOpcode(CallOpcode);
1270     CallInst.addOperand(CallTargetMCOp);
1271     OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
1272   }
1273 
1274   // Record our statepoint node in the same section used by STACKMAP
1275   // and PATCHPOINT
1276   auto &Ctx = OutStreamer->getContext();
1277   MCSymbol *MILabel = Ctx.createTempSymbol();
1278   OutStreamer->emitLabel(MILabel);
1279   SM.recordStatepoint(*MILabel, MI);
1280 }
1281 
1282 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
1283                                      X86MCInstLower &MCIL) {
1284   // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
1285   //                  <opcode>, <operands>
1286 
1287   NoAutoPaddingScope NoPadScope(*OutStreamer);
1288 
1289   Register DefRegister = FaultingMI.getOperand(0).getReg();
1290   FaultMaps::FaultKind FK =
1291       static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
1292   MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
1293   unsigned Opcode = FaultingMI.getOperand(3).getImm();
1294   unsigned OperandsBeginIdx = 4;
1295 
1296   auto &Ctx = OutStreamer->getContext();
1297   MCSymbol *FaultingLabel = Ctx.createTempSymbol();
1298   OutStreamer->emitLabel(FaultingLabel);
1299 
1300   assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
1301   FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
1302 
1303   MCInst MI;
1304   MI.setOpcode(Opcode);
1305 
1306   if (DefRegister != X86::NoRegister)
1307     MI.addOperand(MCOperand::createReg(DefRegister));
1308 
1309   for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
1310             E = FaultingMI.operands_end();
1311        I != E; ++I)
1312     if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
1313       MI.addOperand(MaybeOperand.getValue());
1314 
1315   OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
1316   OutStreamer->emitInstruction(MI, getSubtargetInfo());
1317 }
1318 
1319 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
1320                                      X86MCInstLower &MCIL) {
1321   bool Is64Bits = Subtarget->is64Bit();
1322   MCContext &Ctx = OutStreamer->getContext();
1323   MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
1324   const MCSymbolRefExpr *Op =
1325       MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
1326 
1327   EmitAndCountInstruction(
1328       MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
1329           .addExpr(Op));
1330 }
1331 
1332 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
1333   // FIXME: Make this work on non-ELF.
1334   if (!TM.getTargetTriple().isOSBinFormatELF()) {
1335     report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
1336     return;
1337   }
1338 
1339   unsigned Reg = MI.getOperand(0).getReg().id();
1340   ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
1341 
1342   MCSymbol *&Sym =
1343       AsanMemaccessSymbols[AsanMemaccessTuple(Reg, AccessInfo.Packed)];
1344   if (!Sym) {
1345     std::string Name = AccessInfo.IsWrite ? "store" : "load";
1346     std::string SymName = "__asan_check_" + Name +
1347                           utostr(1ULL << AccessInfo.AccessSizeIndex) + "_rn" +
1348                           utostr(Reg);
1349     Sym = OutContext.getOrCreateSymbol(SymName);
1350   }
1351 
1352   EmitAndCountInstruction(
1353       MCInstBuilder(X86::CALL64pcrel32)
1354           .addExpr(MCSymbolRefExpr::create(Sym, OutContext)));
1355 }
1356 
1357 void X86AsmPrinter::emitAsanMemaccessPartial(Module &M, unsigned Reg,
1358                                              const ASanAccessInfo &AccessInfo,
1359                                              MCSubtargetInfo &STI) {
1360   assert(AccessInfo.AccessSizeIndex == 0 || AccessInfo.AccessSizeIndex == 1 ||
1361          AccessInfo.AccessSizeIndex == 2);
1362   assert(Reg != X86::R8);
1363 
1364   uint64_t ShadowBase;
1365   int MappingScale;
1366   bool OrShadowOffset;
1367   getAddressSanitizerParams(
1368       Triple(M.getTargetTriple()), M.getDataLayout().getPointerSizeInBits(),
1369       AccessInfo.CompileKernel, &ShadowBase, &MappingScale, &OrShadowOffset);
1370 
1371   OutStreamer->emitInstruction(
1372       MCInstBuilder(X86::MOV64rr).addReg(X86::R8).addReg(X86::NoRegister + Reg),
1373       STI);
1374   OutStreamer->emitInstruction(MCInstBuilder(X86::SHR64ri)
1375                                    .addReg(X86::R8)
1376                                    .addReg(X86::R8)
1377                                    .addImm(MappingScale),
1378                                STI);
1379   if (OrShadowOffset) {
1380     OutStreamer->emitInstruction(MCInstBuilder(X86::OR64ri32)
1381                                      .addReg(X86::R8)
1382                                      .addReg(X86::R8)
1383                                      .addImm(ShadowBase),
1384                                  STI);
1385     OutStreamer->emitInstruction(MCInstBuilder(X86::MOV8rm)
1386                                      .addReg(X86::R8B)
1387                                      .addReg(X86::R8)
1388                                      .addImm(1)
1389                                      .addReg(X86::NoRegister)
1390                                      .addImm(0)
1391                                      .addReg(X86::NoRegister),
1392                                  STI);
1393     OutStreamer->emitInstruction(
1394         MCInstBuilder(X86::TEST8rr).addReg(X86::R8B).addReg(X86::R8B), STI);
1395   } else {
1396     OutStreamer->emitInstruction(MCInstBuilder(X86::MOVSX32rm8)
1397                                      .addReg(X86::R8D)
1398                                      .addReg(X86::R8)
1399                                      .addImm(1)
1400                                      .addReg(X86::NoRegister)
1401                                      .addImm(ShadowBase)
1402                                      .addReg(X86::NoRegister),
1403                                  STI);
1404     OutStreamer->emitInstruction(
1405         MCInstBuilder(X86::TEST32rr).addReg(X86::R8D).addReg(X86::R8D), STI);
1406   }
1407   MCSymbol *AdditionalCheck = OutContext.createTempSymbol();
1408   OutStreamer->emitInstruction(
1409       MCInstBuilder(X86::JCC_1)
1410           .addExpr(MCSymbolRefExpr::create(AdditionalCheck, OutContext))
1411           .addImm(X86::COND_NE),
1412       STI);
1413   MCSymbol *ReturnSym = OutContext.createTempSymbol();
1414   OutStreamer->emitLabel(ReturnSym);
1415   OutStreamer->emitInstruction(MCInstBuilder(getRetOpcode(*Subtarget)), STI);
1416 
1417   // Shadow byte is non-zero so we need to perform additional checks.
1418   OutStreamer->emitLabel(AdditionalCheck);
1419   OutStreamer->emitInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RCX),
1420                                STI);
1421   OutStreamer->emitInstruction(MCInstBuilder(X86::MOV64rr)
1422                                    .addReg(X86::RCX)
1423                                    .addReg(X86::NoRegister + Reg),
1424                                STI);
1425   const size_t Granularity = 1ULL << MappingScale;
1426   OutStreamer->emitInstruction(MCInstBuilder(X86::AND32ri8)
1427                                    .addReg(X86::NoRegister)
1428                                    .addReg(X86::ECX)
1429                                    .addImm(Granularity - 1),
1430                                STI);
1431   if (AccessInfo.AccessSizeIndex == 1) {
1432     OutStreamer->emitInstruction(MCInstBuilder(X86::ADD32ri8)
1433                                      .addReg(X86::NoRegister)
1434                                      .addReg(X86::ECX)
1435                                      .addImm(1),
1436                                  STI);
1437   } else if (AccessInfo.AccessSizeIndex == 2) {
1438     OutStreamer->emitInstruction(MCInstBuilder(X86::ADD32ri8)
1439                                      .addReg(X86::NoRegister)
1440                                      .addReg(X86::ECX)
1441                                      .addImm(3),
1442                                  STI);
1443   }
1444 
1445   OutStreamer->emitInstruction(
1446       MCInstBuilder(X86::CMP32rr).addReg(X86::ECX).addReg(X86::R8D).addImm(1),
1447       STI);
1448   OutStreamer->emitInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RCX),
1449                                STI);
1450   OutStreamer->emitInstruction(
1451       MCInstBuilder(X86::JCC_1)
1452           .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext))
1453           .addImm(X86::COND_L),
1454       STI);
1455 
1456   emitAsanReportError(M, Reg, AccessInfo, STI);
1457 }
1458 
1459 void X86AsmPrinter::emitAsanMemaccessFull(Module &M, unsigned Reg,
1460                                           const ASanAccessInfo &AccessInfo,
1461                                           MCSubtargetInfo &STI) {
1462   assert(AccessInfo.AccessSizeIndex == 3 || AccessInfo.AccessSizeIndex == 4);
1463   assert(Reg != X86::R8);
1464 
1465   uint64_t ShadowBase;
1466   int MappingScale;
1467   bool OrShadowOffset;
1468   getAddressSanitizerParams(
1469       Triple(M.getTargetTriple()), M.getDataLayout().getPointerSizeInBits(),
1470       AccessInfo.CompileKernel, &ShadowBase, &MappingScale, &OrShadowOffset);
1471 
1472   OutStreamer->emitInstruction(
1473       MCInstBuilder(X86::MOV64rr).addReg(X86::R8).addReg(X86::NoRegister + Reg),
1474       STI);
1475   OutStreamer->emitInstruction(MCInstBuilder(X86::SHR64ri)
1476                                    .addReg(X86::R8)
1477                                    .addReg(X86::R8)
1478                                    .addImm(MappingScale),
1479                                STI);
1480   if (OrShadowOffset) {
1481     OutStreamer->emitInstruction(MCInstBuilder(X86::OR64ri32)
1482                                      .addReg(X86::R8)
1483                                      .addReg(X86::R8)
1484                                      .addImm(ShadowBase),
1485                                  STI);
1486     auto OpCode = AccessInfo.AccessSizeIndex == 3 ? X86::CMP8mi : X86::CMP16mi8;
1487     OutStreamer->emitInstruction(MCInstBuilder(OpCode)
1488                                      .addReg(X86::R8)
1489                                      .addImm(1)
1490                                      .addReg(X86::NoRegister)
1491                                      .addImm(0)
1492                                      .addReg(X86::NoRegister)
1493                                      .addImm(0),
1494                                  STI);
1495   } else {
1496     auto OpCode = AccessInfo.AccessSizeIndex == 3 ? X86::CMP8mi : X86::CMP16mi8;
1497     OutStreamer->emitInstruction(MCInstBuilder(OpCode)
1498                                      .addReg(X86::R8)
1499                                      .addImm(1)
1500                                      .addReg(X86::NoRegister)
1501                                      .addImm(ShadowBase)
1502                                      .addReg(X86::NoRegister)
1503                                      .addImm(0),
1504                                  STI);
1505   }
1506   MCSymbol *ReportCode = OutContext.createTempSymbol();
1507   OutStreamer->emitInstruction(
1508       MCInstBuilder(X86::JCC_1)
1509           .addExpr(MCSymbolRefExpr::create(ReportCode, OutContext))
1510           .addImm(X86::COND_NE),
1511       STI);
1512   MCSymbol *ReturnSym = OutContext.createTempSymbol();
1513   OutStreamer->emitLabel(ReturnSym);
1514   OutStreamer->emitInstruction(MCInstBuilder(getRetOpcode(*Subtarget)), STI);
1515 
1516   OutStreamer->emitLabel(ReportCode);
1517   emitAsanReportError(M, Reg, AccessInfo, STI);
1518 }
1519 
1520 void X86AsmPrinter::emitAsanReportError(Module &M, unsigned Reg,
1521                                         const ASanAccessInfo &AccessInfo,
1522                                         MCSubtargetInfo &STI) {
1523   std::string Name = AccessInfo.IsWrite ? "store" : "load";
1524   MCSymbol *ReportError = OutContext.getOrCreateSymbol(
1525       "__asan_report_" + Name + utostr(1ULL << AccessInfo.AccessSizeIndex));
1526   OutStreamer->emitInstruction(MCInstBuilder(X86::MOV64rr)
1527                                    .addReg(X86::RDI)
1528                                    .addReg(X86::NoRegister + Reg),
1529                                STI);
1530   OutStreamer->emitInstruction(
1531       MCInstBuilder(X86::JMP_4)
1532           .addExpr(MCSymbolRefExpr::create(ReportError, MCSymbolRefExpr::VK_PLT,
1533                                            OutContext)),
1534       STI);
1535 }
1536 
1537 void X86AsmPrinter::emitAsanMemaccessSymbols(Module &M) {
1538   if (AsanMemaccessSymbols.empty())
1539     return;
1540 
1541   const Triple &TT = TM.getTargetTriple();
1542   assert(TT.isOSBinFormatELF());
1543   std::unique_ptr<MCSubtargetInfo> STI(
1544       TM.getTarget().createMCSubtargetInfo(TT.str(), "", ""));
1545   assert(STI && "Unable to create subtarget info");
1546 
1547   for (auto &P : AsanMemaccessSymbols) {
1548     MCSymbol *Sym = P.second;
1549     OutStreamer->SwitchSection(OutContext.getELFSection(
1550         ".text.hot", ELF::SHT_PROGBITS,
1551         ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, Sym->getName(),
1552         /*IsComdat=*/true));
1553 
1554     OutStreamer->emitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
1555     OutStreamer->emitSymbolAttribute(Sym, MCSA_Weak);
1556     OutStreamer->emitSymbolAttribute(Sym, MCSA_Hidden);
1557     OutStreamer->emitLabel(Sym);
1558 
1559     unsigned Reg = std::get<0>(P.first);
1560     ASanAccessInfo AccessInfo(std::get<1>(P.first));
1561 
1562     if (AccessInfo.AccessSizeIndex < 3) {
1563       emitAsanMemaccessPartial(M, Reg, AccessInfo, *STI);
1564     } else {
1565       emitAsanMemaccessFull(M, Reg, AccessInfo, *STI);
1566     }
1567   }
1568 }
1569 
1570 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
1571                                       X86MCInstLower &MCIL) {
1572   // PATCHABLE_OP minsize, opcode, operands
1573 
1574   NoAutoPaddingScope NoPadScope(*OutStreamer);
1575 
1576   unsigned MinSize = MI.getOperand(0).getImm();
1577   unsigned Opcode = MI.getOperand(1).getImm();
1578 
1579   MCInst MCI;
1580   MCI.setOpcode(Opcode);
1581   for (auto &MO : drop_begin(MI.operands(), 2))
1582     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1583       MCI.addOperand(MaybeOperand.getValue());
1584 
1585   SmallString<256> Code;
1586   SmallVector<MCFixup, 4> Fixups;
1587   raw_svector_ostream VecOS(Code);
1588   CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1589 
1590   if (Code.size() < MinSize) {
1591     if (MinSize == 2 && Subtarget->is32Bit() &&
1592         Subtarget->isTargetWindowsMSVC() &&
1593         (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
1594       // For compatibilty reasons, when targetting MSVC, is is important to
1595       // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1596       // rely specifically on this pattern to be able to patch a function.
1597       // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1598       OutStreamer->emitInstruction(
1599           MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1600           *Subtarget);
1601     } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
1602       // This is an optimization that lets us get away without emitting a nop in
1603       // many cases.
1604       //
1605       // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1606       // bytes too, so the check on MinSize is important.
1607       MCI.setOpcode(X86::PUSH64rmr);
1608     } else {
1609       unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
1610       assert(NopSize == MinSize && "Could not implement MinSize!");
1611       (void)NopSize;
1612     }
1613   }
1614 
1615   OutStreamer->emitInstruction(MCI, getSubtargetInfo());
1616 }
1617 
1618 // Lower a stackmap of the form:
1619 // <id>, <shadowBytes>, ...
1620 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1621   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1622 
1623   auto &Ctx = OutStreamer->getContext();
1624   MCSymbol *MILabel = Ctx.createTempSymbol();
1625   OutStreamer->emitLabel(MILabel);
1626 
1627   SM.recordStackMap(*MILabel, MI);
1628   unsigned NumShadowBytes = MI.getOperand(1).getImm();
1629   SMShadowTracker.reset(NumShadowBytes);
1630 }
1631 
1632 // Lower a patchpoint of the form:
1633 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1634 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1635                                     X86MCInstLower &MCIL) {
1636   assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1637 
1638   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1639 
1640   NoAutoPaddingScope NoPadScope(*OutStreamer);
1641 
1642   auto &Ctx = OutStreamer->getContext();
1643   MCSymbol *MILabel = Ctx.createTempSymbol();
1644   OutStreamer->emitLabel(MILabel);
1645   SM.recordPatchPoint(*MILabel, MI);
1646 
1647   PatchPointOpers opers(&MI);
1648   unsigned ScratchIdx = opers.getNextScratchIdx();
1649   unsigned EncodedBytes = 0;
1650   const MachineOperand &CalleeMO = opers.getCallTarget();
1651 
1652   // Check for null target. If target is non-null (i.e. is non-zero or is
1653   // symbolic) then emit a call.
1654   if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1655     MCOperand CalleeMCOp;
1656     switch (CalleeMO.getType()) {
1657     default:
1658       /// FIXME: Add a verifier check for bad callee types.
1659       llvm_unreachable("Unrecognized callee operand type.");
1660     case MachineOperand::MO_Immediate:
1661       if (CalleeMO.getImm())
1662         CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1663       break;
1664     case MachineOperand::MO_ExternalSymbol:
1665     case MachineOperand::MO_GlobalAddress:
1666       CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1667                                            MCIL.GetSymbolFromOperand(CalleeMO));
1668       break;
1669     }
1670 
1671     // Emit MOV to materialize the target address and the CALL to target.
1672     // This is encoded with 12-13 bytes, depending on which register is used.
1673     Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1674     if (X86II::isX86_64ExtendedReg(ScratchReg))
1675       EncodedBytes = 13;
1676     else
1677       EncodedBytes = 12;
1678 
1679     EmitAndCountInstruction(
1680         MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1681     // FIXME: Add retpoline support and remove this.
1682     if (Subtarget->useIndirectThunkCalls())
1683       report_fatal_error(
1684           "Lowering patchpoint with thunks not yet implemented.");
1685     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1686   }
1687 
1688   // Emit padding.
1689   unsigned NumBytes = opers.getNumPatchBytes();
1690   assert(NumBytes >= EncodedBytes &&
1691          "Patchpoint can't request size less than the length of a call.");
1692 
1693   emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1694 }
1695 
1696 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1697                                               X86MCInstLower &MCIL) {
1698   assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1699 
1700   NoAutoPaddingScope NoPadScope(*OutStreamer);
1701 
1702   // We want to emit the following pattern, which follows the x86 calling
1703   // convention to prepare for the trampoline call to be patched in.
1704   //
1705   //   .p2align 1, ...
1706   // .Lxray_event_sled_N:
1707   //   jmp +N                        // jump across the instrumentation sled
1708   //   ...                           // set up arguments in register
1709   //   callq __xray_CustomEvent@plt  // force dependency to symbol
1710   //   ...
1711   //   <jump here>
1712   //
1713   // After patching, it would look something like:
1714   //
1715   //   nopw (2-byte nop)
1716   //   ...
1717   //   callq __xrayCustomEvent  // already lowered
1718   //   ...
1719   //
1720   // ---
1721   // First we emit the label and the jump.
1722   auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1723   OutStreamer->AddComment("# XRay Custom Event Log");
1724   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1725   OutStreamer->emitLabel(CurSled);
1726 
1727   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1728   // an operand (computed as an offset from the jmp instruction).
1729   // FIXME: Find another less hacky way do force the relative jump.
1730   OutStreamer->emitBinaryData("\xeb\x0f");
1731 
1732   // The default C calling convention will place two arguments into %rcx and
1733   // %rdx -- so we only work with those.
1734   const Register DestRegs[] = {X86::RDI, X86::RSI};
1735   bool UsedMask[] = {false, false};
1736   // Filled out in loop.
1737   Register SrcRegs[] = {0, 0};
1738 
1739   // Then we put the operands in the %rdi and %rsi registers. We spill the
1740   // values in the register before we clobber them, and mark them as used in
1741   // UsedMask. In case the arguments are already in the correct register, we use
1742   // emit nops appropriately sized to keep the sled the same size in every
1743   // situation.
1744   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1745     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1746       assert(Op->isReg() && "Only support arguments in registers");
1747       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1748       if (SrcRegs[I] != DestRegs[I]) {
1749         UsedMask[I] = true;
1750         EmitAndCountInstruction(
1751             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1752       } else {
1753         emitX86Nops(*OutStreamer, 4, Subtarget);
1754       }
1755     }
1756 
1757   // Now that the register values are stashed, mov arguments into place.
1758   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1759   // earlier DestReg. We will have already overwritten over the register before
1760   // we can copy from it.
1761   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1762     if (SrcRegs[I] != DestRegs[I])
1763       EmitAndCountInstruction(
1764           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1765 
1766   // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1767   // name of the trampoline to be implemented by the XRay runtime.
1768   auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1769   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1770   if (isPositionIndependent())
1771     TOp.setTargetFlags(X86II::MO_PLT);
1772 
1773   // Emit the call instruction.
1774   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1775                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1776 
1777   // Restore caller-saved and used registers.
1778   for (unsigned I = sizeof UsedMask; I-- > 0;)
1779     if (UsedMask[I])
1780       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1781     else
1782       emitX86Nops(*OutStreamer, 1, Subtarget);
1783 
1784   OutStreamer->AddComment("xray custom event end.");
1785 
1786   // Record the sled version. Version 0 of this sled was spelled differently, so
1787   // we let the runtime handle the different offsets we're using. Version 2
1788   // changed the absolute address to a PC-relative address.
1789   recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1790 }
1791 
1792 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1793                                                     X86MCInstLower &MCIL) {
1794   assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1795 
1796   NoAutoPaddingScope NoPadScope(*OutStreamer);
1797 
1798   // We want to emit the following pattern, which follows the x86 calling
1799   // convention to prepare for the trampoline call to be patched in.
1800   //
1801   //   .p2align 1, ...
1802   // .Lxray_event_sled_N:
1803   //   jmp +N                        // jump across the instrumentation sled
1804   //   ...                           // set up arguments in register
1805   //   callq __xray_TypedEvent@plt  // force dependency to symbol
1806   //   ...
1807   //   <jump here>
1808   //
1809   // After patching, it would look something like:
1810   //
1811   //   nopw (2-byte nop)
1812   //   ...
1813   //   callq __xrayTypedEvent  // already lowered
1814   //   ...
1815   //
1816   // ---
1817   // First we emit the label and the jump.
1818   auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1819   OutStreamer->AddComment("# XRay Typed Event Log");
1820   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1821   OutStreamer->emitLabel(CurSled);
1822 
1823   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1824   // an operand (computed as an offset from the jmp instruction).
1825   // FIXME: Find another less hacky way do force the relative jump.
1826   OutStreamer->emitBinaryData("\xeb\x14");
1827 
1828   // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1829   // so we'll work with those. Or we may be called via SystemV, in which case
1830   // we don't have to do any translation.
1831   const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1832   bool UsedMask[] = {false, false, false};
1833 
1834   // Will fill out src regs in the loop.
1835   Register SrcRegs[] = {0, 0, 0};
1836 
1837   // Then we put the operands in the SystemV registers. We spill the values in
1838   // the registers before we clobber them, and mark them as used in UsedMask.
1839   // In case the arguments are already in the correct register, we emit nops
1840   // appropriately sized to keep the sled the same size in every situation.
1841   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1842     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1843       // TODO: Is register only support adequate?
1844       assert(Op->isReg() && "Only supports arguments in registers");
1845       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1846       if (SrcRegs[I] != DestRegs[I]) {
1847         UsedMask[I] = true;
1848         EmitAndCountInstruction(
1849             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1850       } else {
1851         emitX86Nops(*OutStreamer, 4, Subtarget);
1852       }
1853     }
1854 
1855   // In the above loop we only stash all of the destination registers or emit
1856   // nops if the arguments are already in the right place. Doing the actually
1857   // moving is postponed until after all the registers are stashed so nothing
1858   // is clobbers. We've already added nops to account for the size of mov and
1859   // push if the register is in the right place, so we only have to worry about
1860   // emitting movs.
1861   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1862   // earlier DestReg. We will have already overwritten over the register before
1863   // we can copy from it.
1864   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1865     if (UsedMask[I])
1866       EmitAndCountInstruction(
1867           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1868 
1869   // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1870   // name of the trampoline to be implemented by the XRay runtime.
1871   auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1872   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1873   if (isPositionIndependent())
1874     TOp.setTargetFlags(X86II::MO_PLT);
1875 
1876   // Emit the call instruction.
1877   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1878                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1879 
1880   // Restore caller-saved and used registers.
1881   for (unsigned I = sizeof UsedMask; I-- > 0;)
1882     if (UsedMask[I])
1883       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1884     else
1885       emitX86Nops(*OutStreamer, 1, Subtarget);
1886 
1887   OutStreamer->AddComment("xray typed event end.");
1888 
1889   // Record the sled version.
1890   recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1891 }
1892 
1893 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1894                                                   X86MCInstLower &MCIL) {
1895 
1896   NoAutoPaddingScope NoPadScope(*OutStreamer);
1897 
1898   const Function &F = MF->getFunction();
1899   if (F.hasFnAttribute("patchable-function-entry")) {
1900     unsigned Num;
1901     if (F.getFnAttribute("patchable-function-entry")
1902             .getValueAsString()
1903             .getAsInteger(10, Num))
1904       return;
1905     emitX86Nops(*OutStreamer, Num, Subtarget);
1906     return;
1907   }
1908   // We want to emit the following pattern:
1909   //
1910   //   .p2align 1, ...
1911   // .Lxray_sled_N:
1912   //   jmp .tmpN
1913   //   # 9 bytes worth of noops
1914   //
1915   // We need the 9 bytes because at runtime, we'd be patching over the full 11
1916   // bytes with the following pattern:
1917   //
1918   //   mov %r10, <function id, 32-bit>   // 6 bytes
1919   //   call <relative offset, 32-bits>   // 5 bytes
1920   //
1921   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1922   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1923   OutStreamer->emitLabel(CurSled);
1924 
1925   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1926   // an operand (computed as an offset from the jmp instruction).
1927   // FIXME: Find another less hacky way do force the relative jump.
1928   OutStreamer->emitBytes("\xeb\x09");
1929   emitX86Nops(*OutStreamer, 9, Subtarget);
1930   recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
1931 }
1932 
1933 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1934                                        X86MCInstLower &MCIL) {
1935   NoAutoPaddingScope NoPadScope(*OutStreamer);
1936 
1937   // Since PATCHABLE_RET takes the opcode of the return statement as an
1938   // argument, we use that to emit the correct form of the RET that we want.
1939   // i.e. when we see this:
1940   //
1941   //   PATCHABLE_RET X86::RET ...
1942   //
1943   // We should emit the RET followed by sleds.
1944   //
1945   //   .p2align 1, ...
1946   // .Lxray_sled_N:
1947   //   ret  # or equivalent instruction
1948   //   # 10 bytes worth of noops
1949   //
1950   // This just makes sure that the alignment for the next instruction is 2.
1951   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1952   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1953   OutStreamer->emitLabel(CurSled);
1954   unsigned OpCode = MI.getOperand(0).getImm();
1955   MCInst Ret;
1956   Ret.setOpcode(OpCode);
1957   for (auto &MO : drop_begin(MI.operands()))
1958     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1959       Ret.addOperand(MaybeOperand.getValue());
1960   OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1961   emitX86Nops(*OutStreamer, 10, Subtarget);
1962   recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
1963 }
1964 
1965 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1966                                              X86MCInstLower &MCIL) {
1967   NoAutoPaddingScope NoPadScope(*OutStreamer);
1968 
1969   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1970   // instruction so we lower that particular instruction and its operands.
1971   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1972   // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1973   // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1974   // tail call much like how we have it in PATCHABLE_RET.
1975   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1976   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1977   OutStreamer->emitLabel(CurSled);
1978   auto Target = OutContext.createTempSymbol();
1979 
1980   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1981   // an operand (computed as an offset from the jmp instruction).
1982   // FIXME: Find another less hacky way do force the relative jump.
1983   OutStreamer->emitBytes("\xeb\x09");
1984   emitX86Nops(*OutStreamer, 9, Subtarget);
1985   OutStreamer->emitLabel(Target);
1986   recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1987 
1988   unsigned OpCode = MI.getOperand(0).getImm();
1989   OpCode = convertTailJumpOpcode(OpCode);
1990   MCInst TC;
1991   TC.setOpcode(OpCode);
1992 
1993   // Before emitting the instruction, add a comment to indicate that this is
1994   // indeed a tail call.
1995   OutStreamer->AddComment("TAILCALL");
1996   for (auto &MO : drop_begin(MI.operands()))
1997     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1998       TC.addOperand(MaybeOperand.getValue());
1999   OutStreamer->emitInstruction(TC, getSubtargetInfo());
2000 }
2001 
2002 // Returns instruction preceding MBBI in MachineFunction.
2003 // If MBBI is the first instruction of the first basic block, returns null.
2004 static MachineBasicBlock::const_iterator
2005 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
2006   const MachineBasicBlock *MBB = MBBI->getParent();
2007   while (MBBI == MBB->begin()) {
2008     if (MBB == &MBB->getParent()->front())
2009       return MachineBasicBlock::const_iterator();
2010     MBB = MBB->getPrevNode();
2011     MBBI = MBB->end();
2012   }
2013   --MBBI;
2014   return MBBI;
2015 }
2016 
2017 static const Constant *getConstantFromPool(const MachineInstr &MI,
2018                                            const MachineOperand &Op) {
2019   if (!Op.isCPI() || Op.getOffset() != 0)
2020     return nullptr;
2021 
2022   ArrayRef<MachineConstantPoolEntry> Constants =
2023       MI.getParent()->getParent()->getConstantPool()->getConstants();
2024   const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
2025 
2026   // Bail if this is a machine constant pool entry, we won't be able to dig out
2027   // anything useful.
2028   if (ConstantEntry.isMachineConstantPoolEntry())
2029     return nullptr;
2030 
2031   return ConstantEntry.Val.ConstVal;
2032 }
2033 
2034 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
2035                                      unsigned SrcOp2Idx, ArrayRef<int> Mask) {
2036   std::string Comment;
2037 
2038   // Compute the name for a register. This is really goofy because we have
2039   // multiple instruction printers that could (in theory) use different
2040   // names. Fortunately most people use the ATT style (outside of Windows)
2041   // and they actually agree on register naming here. Ultimately, this is
2042   // a comment, and so its OK if it isn't perfect.
2043   auto GetRegisterName = [](unsigned RegNum) -> StringRef {
2044     return X86ATTInstPrinter::getRegisterName(RegNum);
2045   };
2046 
2047   const MachineOperand &DstOp = MI->getOperand(0);
2048   const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
2049   const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
2050 
2051   StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
2052   StringRef Src1Name =
2053       SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
2054   StringRef Src2Name =
2055       SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
2056 
2057   // One source operand, fix the mask to print all elements in one span.
2058   SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
2059   if (Src1Name == Src2Name)
2060     for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
2061       if (ShuffleMask[i] >= e)
2062         ShuffleMask[i] -= e;
2063 
2064   raw_string_ostream CS(Comment);
2065   CS << DstName;
2066 
2067   // Handle AVX512 MASK/MASXZ write mask comments.
2068   // MASK: zmmX {%kY}
2069   // MASKZ: zmmX {%kY} {z}
2070   if (SrcOp1Idx > 1) {
2071     assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
2072 
2073     const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
2074     if (WriteMaskOp.isReg()) {
2075       CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
2076 
2077       if (SrcOp1Idx == 2) {
2078         CS << " {z}";
2079       }
2080     }
2081   }
2082 
2083   CS << " = ";
2084 
2085   for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
2086     if (i != 0)
2087       CS << ",";
2088     if (ShuffleMask[i] == SM_SentinelZero) {
2089       CS << "zero";
2090       continue;
2091     }
2092 
2093     // Otherwise, it must come from src1 or src2.  Print the span of elements
2094     // that comes from this src.
2095     bool isSrc1 = ShuffleMask[i] < (int)e;
2096     CS << (isSrc1 ? Src1Name : Src2Name) << '[';
2097 
2098     bool IsFirst = true;
2099     while (i != e && ShuffleMask[i] != SM_SentinelZero &&
2100            (ShuffleMask[i] < (int)e) == isSrc1) {
2101       if (!IsFirst)
2102         CS << ',';
2103       else
2104         IsFirst = false;
2105       if (ShuffleMask[i] == SM_SentinelUndef)
2106         CS << "u";
2107       else
2108         CS << ShuffleMask[i] % (int)e;
2109       ++i;
2110     }
2111     CS << ']';
2112     --i; // For loop increments element #.
2113   }
2114   CS.flush();
2115 
2116   return Comment;
2117 }
2118 
2119 static void printConstant(const APInt &Val, raw_ostream &CS) {
2120   if (Val.getBitWidth() <= 64) {
2121     CS << Val.getZExtValue();
2122   } else {
2123     // print multi-word constant as (w0,w1)
2124     CS << "(";
2125     for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
2126       if (i > 0)
2127         CS << ",";
2128       CS << Val.getRawData()[i];
2129     }
2130     CS << ")";
2131   }
2132 }
2133 
2134 static void printConstant(const APFloat &Flt, raw_ostream &CS) {
2135   SmallString<32> Str;
2136   // Force scientific notation to distinquish from integers.
2137   Flt.toString(Str, 0, 0);
2138   CS << Str;
2139 }
2140 
2141 static void printConstant(const Constant *COp, raw_ostream &CS) {
2142   if (isa<UndefValue>(COp)) {
2143     CS << "u";
2144   } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
2145     printConstant(CI->getValue(), CS);
2146   } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
2147     printConstant(CF->getValueAPF(), CS);
2148   } else {
2149     CS << "?";
2150   }
2151 }
2152 
2153 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
2154   assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2155   assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
2156 
2157   // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
2158   if (EmitFPOData) {
2159     X86TargetStreamer *XTS =
2160         static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
2161     switch (MI->getOpcode()) {
2162     case X86::SEH_PushReg:
2163       XTS->emitFPOPushReg(MI->getOperand(0).getImm());
2164       break;
2165     case X86::SEH_StackAlloc:
2166       XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
2167       break;
2168     case X86::SEH_StackAlign:
2169       XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
2170       break;
2171     case X86::SEH_SetFrame:
2172       assert(MI->getOperand(1).getImm() == 0 &&
2173              ".cv_fpo_setframe takes no offset");
2174       XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
2175       break;
2176     case X86::SEH_EndPrologue:
2177       XTS->emitFPOEndPrologue();
2178       break;
2179     case X86::SEH_SaveReg:
2180     case X86::SEH_SaveXMM:
2181     case X86::SEH_PushFrame:
2182       llvm_unreachable("SEH_ directive incompatible with FPO");
2183       break;
2184     default:
2185       llvm_unreachable("expected SEH_ instruction");
2186     }
2187     return;
2188   }
2189 
2190   // Otherwise, use the .seh_ directives for all other Windows platforms.
2191   switch (MI->getOpcode()) {
2192   case X86::SEH_PushReg:
2193     OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm());
2194     break;
2195 
2196   case X86::SEH_SaveReg:
2197     OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(),
2198                                    MI->getOperand(1).getImm());
2199     break;
2200 
2201   case X86::SEH_SaveXMM:
2202     OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(),
2203                                    MI->getOperand(1).getImm());
2204     break;
2205 
2206   case X86::SEH_StackAlloc:
2207     OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
2208     break;
2209 
2210   case X86::SEH_SetFrame:
2211     OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(),
2212                                     MI->getOperand(1).getImm());
2213     break;
2214 
2215   case X86::SEH_PushFrame:
2216     OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
2217     break;
2218 
2219   case X86::SEH_EndPrologue:
2220     OutStreamer->EmitWinCFIEndProlog();
2221     break;
2222 
2223   default:
2224     llvm_unreachable("expected SEH_ instruction");
2225   }
2226 }
2227 
2228 static unsigned getRegisterWidth(const MCOperandInfo &Info) {
2229   if (Info.RegClass == X86::VR128RegClassID ||
2230       Info.RegClass == X86::VR128XRegClassID)
2231     return 128;
2232   if (Info.RegClass == X86::VR256RegClassID ||
2233       Info.RegClass == X86::VR256XRegClassID)
2234     return 256;
2235   if (Info.RegClass == X86::VR512RegClassID)
2236     return 512;
2237   llvm_unreachable("Unknown register class!");
2238 }
2239 
2240 static void addConstantComments(const MachineInstr *MI,
2241                                 MCStreamer &OutStreamer) {
2242   switch (MI->getOpcode()) {
2243   // Lower PSHUFB and VPERMILP normally but add a comment if we can find
2244   // a constant shuffle mask. We won't be able to do this at the MC layer
2245   // because the mask isn't an immediate.
2246   case X86::PSHUFBrm:
2247   case X86::VPSHUFBrm:
2248   case X86::VPSHUFBYrm:
2249   case X86::VPSHUFBZ128rm:
2250   case X86::VPSHUFBZ128rmk:
2251   case X86::VPSHUFBZ128rmkz:
2252   case X86::VPSHUFBZ256rm:
2253   case X86::VPSHUFBZ256rmk:
2254   case X86::VPSHUFBZ256rmkz:
2255   case X86::VPSHUFBZrm:
2256   case X86::VPSHUFBZrmk:
2257   case X86::VPSHUFBZrmkz: {
2258     unsigned SrcIdx = 1;
2259     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2260       // Skip mask operand.
2261       ++SrcIdx;
2262       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2263         // Skip passthru operand.
2264         ++SrcIdx;
2265       }
2266     }
2267     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2268 
2269     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2270            "Unexpected number of operands!");
2271 
2272     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2273     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2274       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2275       SmallVector<int, 64> Mask;
2276       DecodePSHUFBMask(C, Width, Mask);
2277       if (!Mask.empty())
2278         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2279     }
2280     break;
2281   }
2282 
2283   case X86::VPERMILPSrm:
2284   case X86::VPERMILPSYrm:
2285   case X86::VPERMILPSZ128rm:
2286   case X86::VPERMILPSZ128rmk:
2287   case X86::VPERMILPSZ128rmkz:
2288   case X86::VPERMILPSZ256rm:
2289   case X86::VPERMILPSZ256rmk:
2290   case X86::VPERMILPSZ256rmkz:
2291   case X86::VPERMILPSZrm:
2292   case X86::VPERMILPSZrmk:
2293   case X86::VPERMILPSZrmkz:
2294   case X86::VPERMILPDrm:
2295   case X86::VPERMILPDYrm:
2296   case X86::VPERMILPDZ128rm:
2297   case X86::VPERMILPDZ128rmk:
2298   case X86::VPERMILPDZ128rmkz:
2299   case X86::VPERMILPDZ256rm:
2300   case X86::VPERMILPDZ256rmk:
2301   case X86::VPERMILPDZ256rmkz:
2302   case X86::VPERMILPDZrm:
2303   case X86::VPERMILPDZrmk:
2304   case X86::VPERMILPDZrmkz: {
2305     unsigned ElSize;
2306     switch (MI->getOpcode()) {
2307     default: llvm_unreachable("Invalid opcode");
2308     case X86::VPERMILPSrm:
2309     case X86::VPERMILPSYrm:
2310     case X86::VPERMILPSZ128rm:
2311     case X86::VPERMILPSZ256rm:
2312     case X86::VPERMILPSZrm:
2313     case X86::VPERMILPSZ128rmkz:
2314     case X86::VPERMILPSZ256rmkz:
2315     case X86::VPERMILPSZrmkz:
2316     case X86::VPERMILPSZ128rmk:
2317     case X86::VPERMILPSZ256rmk:
2318     case X86::VPERMILPSZrmk:
2319       ElSize = 32;
2320       break;
2321     case X86::VPERMILPDrm:
2322     case X86::VPERMILPDYrm:
2323     case X86::VPERMILPDZ128rm:
2324     case X86::VPERMILPDZ256rm:
2325     case X86::VPERMILPDZrm:
2326     case X86::VPERMILPDZ128rmkz:
2327     case X86::VPERMILPDZ256rmkz:
2328     case X86::VPERMILPDZrmkz:
2329     case X86::VPERMILPDZ128rmk:
2330     case X86::VPERMILPDZ256rmk:
2331     case X86::VPERMILPDZrmk:
2332       ElSize = 64;
2333       break;
2334     }
2335 
2336     unsigned SrcIdx = 1;
2337     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2338       // Skip mask operand.
2339       ++SrcIdx;
2340       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2341         // Skip passthru operand.
2342         ++SrcIdx;
2343       }
2344     }
2345     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2346 
2347     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2348            "Unexpected number of operands!");
2349 
2350     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2351     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2352       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2353       SmallVector<int, 16> Mask;
2354       DecodeVPERMILPMask(C, ElSize, Width, Mask);
2355       if (!Mask.empty())
2356         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2357     }
2358     break;
2359   }
2360 
2361   case X86::VPERMIL2PDrm:
2362   case X86::VPERMIL2PSrm:
2363   case X86::VPERMIL2PDYrm:
2364   case X86::VPERMIL2PSYrm: {
2365     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
2366            "Unexpected number of operands!");
2367 
2368     const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
2369     if (!CtrlOp.isImm())
2370       break;
2371 
2372     unsigned ElSize;
2373     switch (MI->getOpcode()) {
2374     default: llvm_unreachable("Invalid opcode");
2375     case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
2376     case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
2377     }
2378 
2379     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2380     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2381       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2382       SmallVector<int, 16> Mask;
2383       DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
2384       if (!Mask.empty())
2385         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2386     }
2387     break;
2388   }
2389 
2390   case X86::VPPERMrrm: {
2391     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
2392            "Unexpected number of operands!");
2393 
2394     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2395     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2396       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2397       SmallVector<int, 16> Mask;
2398       DecodeVPPERMMask(C, Width, Mask);
2399       if (!Mask.empty())
2400         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2401     }
2402     break;
2403   }
2404 
2405   case X86::MMX_MOVQ64rm: {
2406     assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
2407            "Unexpected number of operands!");
2408     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2409       std::string Comment;
2410       raw_string_ostream CS(Comment);
2411       const MachineOperand &DstOp = MI->getOperand(0);
2412       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2413       if (auto *CF = dyn_cast<ConstantFP>(C)) {
2414         CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false);
2415         OutStreamer.AddComment(CS.str());
2416       }
2417     }
2418     break;
2419   }
2420 
2421 #define MOV_CASE(Prefix, Suffix)                                               \
2422   case X86::Prefix##MOVAPD##Suffix##rm:                                        \
2423   case X86::Prefix##MOVAPS##Suffix##rm:                                        \
2424   case X86::Prefix##MOVUPD##Suffix##rm:                                        \
2425   case X86::Prefix##MOVUPS##Suffix##rm:                                        \
2426   case X86::Prefix##MOVDQA##Suffix##rm:                                        \
2427   case X86::Prefix##MOVDQU##Suffix##rm:
2428 
2429 #define MOV_AVX512_CASE(Suffix)                                                \
2430   case X86::VMOVDQA64##Suffix##rm:                                             \
2431   case X86::VMOVDQA32##Suffix##rm:                                             \
2432   case X86::VMOVDQU64##Suffix##rm:                                             \
2433   case X86::VMOVDQU32##Suffix##rm:                                             \
2434   case X86::VMOVDQU16##Suffix##rm:                                             \
2435   case X86::VMOVDQU8##Suffix##rm:                                              \
2436   case X86::VMOVAPS##Suffix##rm:                                               \
2437   case X86::VMOVAPD##Suffix##rm:                                               \
2438   case X86::VMOVUPS##Suffix##rm:                                               \
2439   case X86::VMOVUPD##Suffix##rm:
2440 
2441 #define CASE_ALL_MOV_RM()                                                      \
2442   MOV_CASE(, )   /* SSE */                                                     \
2443   MOV_CASE(V, )  /* AVX-128 */                                                 \
2444   MOV_CASE(V, Y) /* AVX-256 */                                                 \
2445   MOV_AVX512_CASE(Z)                                                           \
2446   MOV_AVX512_CASE(Z256)                                                        \
2447   MOV_AVX512_CASE(Z128)
2448 
2449     // For loads from a constant pool to a vector register, print the constant
2450     // loaded.
2451     CASE_ALL_MOV_RM()
2452   case X86::VBROADCASTF128:
2453   case X86::VBROADCASTI128:
2454   case X86::VBROADCASTF32X4Z256rm:
2455   case X86::VBROADCASTF32X4rm:
2456   case X86::VBROADCASTF32X8rm:
2457   case X86::VBROADCASTF64X2Z128rm:
2458   case X86::VBROADCASTF64X2rm:
2459   case X86::VBROADCASTF64X4rm:
2460   case X86::VBROADCASTI32X4Z256rm:
2461   case X86::VBROADCASTI32X4rm:
2462   case X86::VBROADCASTI32X8rm:
2463   case X86::VBROADCASTI64X2Z128rm:
2464   case X86::VBROADCASTI64X2rm:
2465   case X86::VBROADCASTI64X4rm:
2466     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2467            "Unexpected number of operands!");
2468     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2469       int NumLanes = 1;
2470       // Override NumLanes for the broadcast instructions.
2471       switch (MI->getOpcode()) {
2472       case X86::VBROADCASTF128:        NumLanes = 2; break;
2473       case X86::VBROADCASTI128:        NumLanes = 2; break;
2474       case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
2475       case X86::VBROADCASTF32X4rm:     NumLanes = 4; break;
2476       case X86::VBROADCASTF32X8rm:     NumLanes = 2; break;
2477       case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
2478       case X86::VBROADCASTF64X2rm:     NumLanes = 4; break;
2479       case X86::VBROADCASTF64X4rm:     NumLanes = 2; break;
2480       case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
2481       case X86::VBROADCASTI32X4rm:     NumLanes = 4; break;
2482       case X86::VBROADCASTI32X8rm:     NumLanes = 2; break;
2483       case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
2484       case X86::VBROADCASTI64X2rm:     NumLanes = 4; break;
2485       case X86::VBROADCASTI64X4rm:     NumLanes = 2; break;
2486       }
2487 
2488       std::string Comment;
2489       raw_string_ostream CS(Comment);
2490       const MachineOperand &DstOp = MI->getOperand(0);
2491       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2492       if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2493         CS << "[";
2494         for (int l = 0; l != NumLanes; ++l) {
2495           for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2496                ++i) {
2497             if (i != 0 || l != 0)
2498               CS << ",";
2499             if (CDS->getElementType()->isIntegerTy())
2500               printConstant(CDS->getElementAsAPInt(i), CS);
2501             else if (CDS->getElementType()->isHalfTy() ||
2502                      CDS->getElementType()->isFloatTy() ||
2503                      CDS->getElementType()->isDoubleTy())
2504               printConstant(CDS->getElementAsAPFloat(i), CS);
2505             else
2506               CS << "?";
2507           }
2508         }
2509         CS << "]";
2510         OutStreamer.AddComment(CS.str());
2511       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
2512         CS << "<";
2513         for (int l = 0; l != NumLanes; ++l) {
2514           for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2515                ++i) {
2516             if (i != 0 || l != 0)
2517               CS << ",";
2518             printConstant(CV->getOperand(i), CS);
2519           }
2520         }
2521         CS << ">";
2522         OutStreamer.AddComment(CS.str());
2523       }
2524     }
2525     break;
2526 
2527   case X86::MOVDDUPrm:
2528   case X86::VMOVDDUPrm:
2529   case X86::VMOVDDUPZ128rm:
2530   case X86::VBROADCASTSSrm:
2531   case X86::VBROADCASTSSYrm:
2532   case X86::VBROADCASTSSZ128rm:
2533   case X86::VBROADCASTSSZ256rm:
2534   case X86::VBROADCASTSSZrm:
2535   case X86::VBROADCASTSDYrm:
2536   case X86::VBROADCASTSDZ256rm:
2537   case X86::VBROADCASTSDZrm:
2538   case X86::VPBROADCASTBrm:
2539   case X86::VPBROADCASTBYrm:
2540   case X86::VPBROADCASTBZ128rm:
2541   case X86::VPBROADCASTBZ256rm:
2542   case X86::VPBROADCASTBZrm:
2543   case X86::VPBROADCASTDrm:
2544   case X86::VPBROADCASTDYrm:
2545   case X86::VPBROADCASTDZ128rm:
2546   case X86::VPBROADCASTDZ256rm:
2547   case X86::VPBROADCASTDZrm:
2548   case X86::VPBROADCASTQrm:
2549   case X86::VPBROADCASTQYrm:
2550   case X86::VPBROADCASTQZ128rm:
2551   case X86::VPBROADCASTQZ256rm:
2552   case X86::VPBROADCASTQZrm:
2553   case X86::VPBROADCASTWrm:
2554   case X86::VPBROADCASTWYrm:
2555   case X86::VPBROADCASTWZ128rm:
2556   case X86::VPBROADCASTWZ256rm:
2557   case X86::VPBROADCASTWZrm:
2558     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2559            "Unexpected number of operands!");
2560     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2561       int NumElts;
2562       switch (MI->getOpcode()) {
2563       default: llvm_unreachable("Invalid opcode");
2564       case X86::MOVDDUPrm:          NumElts = 2;  break;
2565       case X86::VMOVDDUPrm:         NumElts = 2;  break;
2566       case X86::VMOVDDUPZ128rm:     NumElts = 2;  break;
2567       case X86::VBROADCASTSSrm:     NumElts = 4;  break;
2568       case X86::VBROADCASTSSYrm:    NumElts = 8;  break;
2569       case X86::VBROADCASTSSZ128rm: NumElts = 4;  break;
2570       case X86::VBROADCASTSSZ256rm: NumElts = 8;  break;
2571       case X86::VBROADCASTSSZrm:    NumElts = 16; break;
2572       case X86::VBROADCASTSDYrm:    NumElts = 4;  break;
2573       case X86::VBROADCASTSDZ256rm: NumElts = 4;  break;
2574       case X86::VBROADCASTSDZrm:    NumElts = 8;  break;
2575       case X86::VPBROADCASTBrm:     NumElts = 16; break;
2576       case X86::VPBROADCASTBYrm:    NumElts = 32; break;
2577       case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
2578       case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
2579       case X86::VPBROADCASTBZrm:    NumElts = 64; break;
2580       case X86::VPBROADCASTDrm:     NumElts = 4;  break;
2581       case X86::VPBROADCASTDYrm:    NumElts = 8;  break;
2582       case X86::VPBROADCASTDZ128rm: NumElts = 4;  break;
2583       case X86::VPBROADCASTDZ256rm: NumElts = 8;  break;
2584       case X86::VPBROADCASTDZrm:    NumElts = 16; break;
2585       case X86::VPBROADCASTQrm:     NumElts = 2;  break;
2586       case X86::VPBROADCASTQYrm:    NumElts = 4;  break;
2587       case X86::VPBROADCASTQZ128rm: NumElts = 2;  break;
2588       case X86::VPBROADCASTQZ256rm: NumElts = 4;  break;
2589       case X86::VPBROADCASTQZrm:    NumElts = 8;  break;
2590       case X86::VPBROADCASTWrm:     NumElts = 8;  break;
2591       case X86::VPBROADCASTWYrm:    NumElts = 16; break;
2592       case X86::VPBROADCASTWZ128rm: NumElts = 8;  break;
2593       case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
2594       case X86::VPBROADCASTWZrm:    NumElts = 32; break;
2595       }
2596 
2597       std::string Comment;
2598       raw_string_ostream CS(Comment);
2599       const MachineOperand &DstOp = MI->getOperand(0);
2600       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2601       CS << "[";
2602       for (int i = 0; i != NumElts; ++i) {
2603         if (i != 0)
2604           CS << ",";
2605         printConstant(C, CS);
2606       }
2607       CS << "]";
2608       OutStreamer.AddComment(CS.str());
2609     }
2610   }
2611 }
2612 
2613 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2614   X86MCInstLower MCInstLowering(*MF, *this);
2615   const X86RegisterInfo *RI =
2616       MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2617 
2618   // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
2619   // are compressed from EVEX encoding to VEX encoding.
2620   if (TM.Options.MCOptions.ShowMCEncoding) {
2621     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2622       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2623   }
2624 
2625   // Add comments for values loaded from constant pool.
2626   if (OutStreamer->isVerboseAsm())
2627     addConstantComments(MI, *OutStreamer);
2628 
2629   switch (MI->getOpcode()) {
2630   case TargetOpcode::DBG_VALUE:
2631     llvm_unreachable("Should be handled target independently");
2632 
2633   // Emit nothing here but a comment if we can.
2634   case X86::Int_MemBarrier:
2635     OutStreamer->emitRawComment("MEMBARRIER");
2636     return;
2637 
2638   case X86::EH_RETURN:
2639   case X86::EH_RETURN64: {
2640     // Lower these as normal, but add some comments.
2641     Register Reg = MI->getOperand(0).getReg();
2642     OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2643                             X86ATTInstPrinter::getRegisterName(Reg));
2644     break;
2645   }
2646   case X86::CLEANUPRET: {
2647     // Lower these as normal, but add some comments.
2648     OutStreamer->AddComment("CLEANUPRET");
2649     break;
2650   }
2651 
2652   case X86::CATCHRET: {
2653     // Lower these as normal, but add some comments.
2654     OutStreamer->AddComment("CATCHRET");
2655     break;
2656   }
2657 
2658   case X86::ENDBR32:
2659   case X86::ENDBR64: {
2660     // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2661     // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2662     // non-empty. If MI is the initial ENDBR, place the
2663     // __patchable_function_entries label after ENDBR.
2664     if (CurrentPatchableFunctionEntrySym &&
2665         CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2666         MI == &MF->front().front()) {
2667       MCInst Inst;
2668       MCInstLowering.Lower(MI, Inst);
2669       EmitAndCountInstruction(Inst);
2670       CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
2671       OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
2672       return;
2673     }
2674     break;
2675   }
2676 
2677   case X86::TAILJMPr:
2678   case X86::TAILJMPm:
2679   case X86::TAILJMPd:
2680   case X86::TAILJMPd_CC:
2681   case X86::TAILJMPr64:
2682   case X86::TAILJMPm64:
2683   case X86::TAILJMPd64:
2684   case X86::TAILJMPd64_CC:
2685   case X86::TAILJMPr64_REX:
2686   case X86::TAILJMPm64_REX:
2687     // Lower these as normal, but add some comments.
2688     OutStreamer->AddComment("TAILCALL");
2689     break;
2690 
2691   case X86::TLS_addr32:
2692   case X86::TLS_addr64:
2693   case X86::TLS_addrX32:
2694   case X86::TLS_base_addr32:
2695   case X86::TLS_base_addr64:
2696   case X86::TLS_base_addrX32:
2697     return LowerTlsAddr(MCInstLowering, *MI);
2698 
2699   case X86::MOVPC32r: {
2700     // This is a pseudo op for a two instruction sequence with a label, which
2701     // looks like:
2702     //     call "L1$pb"
2703     // "L1$pb":
2704     //     popl %esi
2705 
2706     // Emit the call.
2707     MCSymbol *PICBase = MF->getPICBaseSymbol();
2708     // FIXME: We would like an efficient form for this, so we don't have to do a
2709     // lot of extra uniquing.
2710     EmitAndCountInstruction(
2711         MCInstBuilder(X86::CALLpcrel32)
2712             .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2713 
2714     const X86FrameLowering *FrameLowering =
2715         MF->getSubtarget<X86Subtarget>().getFrameLowering();
2716     bool hasFP = FrameLowering->hasFP(*MF);
2717 
2718     // TODO: This is needed only if we require precise CFA.
2719     bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2720                                !OutStreamer->getDwarfFrameInfos().back().End;
2721 
2722     int stackGrowth = -RI->getSlotSize();
2723 
2724     if (HasActiveDwarfFrame && !hasFP) {
2725       OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2726     }
2727 
2728     // Emit the label.
2729     OutStreamer->emitLabel(PICBase);
2730 
2731     // popl $reg
2732     EmitAndCountInstruction(
2733         MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2734 
2735     if (HasActiveDwarfFrame && !hasFP) {
2736       OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2737     }
2738     return;
2739   }
2740 
2741   case X86::ADD32ri: {
2742     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2743     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2744       break;
2745 
2746     // Okay, we have something like:
2747     //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2748 
2749     // For this, we want to print something like:
2750     //   MYGLOBAL + (. - PICBASE)
2751     // However, we can't generate a ".", so just emit a new label here and refer
2752     // to it.
2753     MCSymbol *DotSym = OutContext.createTempSymbol();
2754     OutStreamer->emitLabel(DotSym);
2755 
2756     // Now that we have emitted the label, lower the complex operand expression.
2757     MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2758 
2759     const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2760     const MCExpr *PICBase =
2761         MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2762     DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2763 
2764     DotExpr = MCBinaryExpr::createAdd(
2765         MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
2766 
2767     EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2768                                 .addReg(MI->getOperand(0).getReg())
2769                                 .addReg(MI->getOperand(1).getReg())
2770                                 .addExpr(DotExpr));
2771     return;
2772   }
2773   case TargetOpcode::STATEPOINT:
2774     return LowerSTATEPOINT(*MI, MCInstLowering);
2775 
2776   case TargetOpcode::FAULTING_OP:
2777     return LowerFAULTING_OP(*MI, MCInstLowering);
2778 
2779   case TargetOpcode::FENTRY_CALL:
2780     return LowerFENTRY_CALL(*MI, MCInstLowering);
2781 
2782   case TargetOpcode::PATCHABLE_OP:
2783     return LowerPATCHABLE_OP(*MI, MCInstLowering);
2784 
2785   case TargetOpcode::STACKMAP:
2786     return LowerSTACKMAP(*MI);
2787 
2788   case TargetOpcode::PATCHPOINT:
2789     return LowerPATCHPOINT(*MI, MCInstLowering);
2790 
2791   case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2792     return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2793 
2794   case TargetOpcode::PATCHABLE_RET:
2795     return LowerPATCHABLE_RET(*MI, MCInstLowering);
2796 
2797   case TargetOpcode::PATCHABLE_TAIL_CALL:
2798     return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2799 
2800   case TargetOpcode::PATCHABLE_EVENT_CALL:
2801     return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2802 
2803   case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2804     return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2805 
2806   case X86::MORESTACK_RET:
2807     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2808     return;
2809 
2810   case X86::ASAN_CHECK_MEMACCESS:
2811     return LowerASAN_CHECK_MEMACCESS(*MI);
2812 
2813   case X86::MORESTACK_RET_RESTORE_R10:
2814     // Return, then restore R10.
2815     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2816     EmitAndCountInstruction(
2817         MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2818     return;
2819 
2820   case X86::SEH_PushReg:
2821   case X86::SEH_SaveReg:
2822   case X86::SEH_SaveXMM:
2823   case X86::SEH_StackAlloc:
2824   case X86::SEH_StackAlign:
2825   case X86::SEH_SetFrame:
2826   case X86::SEH_PushFrame:
2827   case X86::SEH_EndPrologue:
2828     EmitSEHInstruction(MI);
2829     return;
2830 
2831   case X86::SEH_Epilogue: {
2832     assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2833     MachineBasicBlock::const_iterator MBBI(MI);
2834     // Check if preceded by a call and emit nop if so.
2835     for (MBBI = PrevCrossBBInst(MBBI);
2836          MBBI != MachineBasicBlock::const_iterator();
2837          MBBI = PrevCrossBBInst(MBBI)) {
2838       // Conservatively assume that pseudo instructions don't emit code and keep
2839       // looking for a call. We may emit an unnecessary nop in some cases.
2840       if (!MBBI->isPseudo()) {
2841         if (MBBI->isCall())
2842           EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2843         break;
2844       }
2845     }
2846     return;
2847   }
2848   case X86::UBSAN_UD1:
2849     EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2850                                 .addReg(X86::EAX)
2851                                 .addReg(X86::EAX)
2852                                 .addImm(1)
2853                                 .addReg(X86::NoRegister)
2854                                 .addImm(MI->getOperand(0).getImm())
2855                                 .addReg(X86::NoRegister));
2856     return;
2857   }
2858 
2859   MCInst TmpInst;
2860   MCInstLowering.Lower(MI, TmpInst);
2861 
2862   // Stackmap shadows cannot include branch targets, so we can count the bytes
2863   // in a call towards the shadow, but must ensure that the no thread returns
2864   // in to the stackmap shadow.  The only way to achieve this is if the call
2865   // is at the end of the shadow.
2866   if (MI->isCall()) {
2867     // Count then size of the call towards the shadow
2868     SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2869     // Then flush the shadow so that we fill with nops before the call, not
2870     // after it.
2871     SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2872     // Then emit the call
2873     OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
2874     return;
2875   }
2876 
2877   EmitAndCountInstruction(TmpInst);
2878 }
2879