xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp (revision 43a5ec4eb41567cc92586503212743d89686d78f)
1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains code to lower X86 MachineInstrs to their corresponding
10 // MCInst records.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/X86ATTInstPrinter.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86InstComments.h"
17 #include "MCTargetDesc/X86ShuffleDecode.h"
18 #include "MCTargetDesc/X86TargetStreamer.h"
19 #include "X86AsmPrinter.h"
20 #include "X86RegisterInfo.h"
21 #include "X86ShuffleDecodeConstantPool.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/iterator_range.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/Mangler.h"
34 #include "llvm/MC/MCAsmInfo.h"
35 #include "llvm/MC/MCCodeEmitter.h"
36 #include "llvm/MC/MCContext.h"
37 #include "llvm/MC/MCExpr.h"
38 #include "llvm/MC/MCFixup.h"
39 #include "llvm/MC/MCInst.h"
40 #include "llvm/MC/MCInstBuilder.h"
41 #include "llvm/MC/MCSection.h"
42 #include "llvm/MC/MCSectionELF.h"
43 #include "llvm/MC/MCStreamer.h"
44 #include "llvm/MC/MCSymbol.h"
45 #include "llvm/MC/MCSymbolELF.h"
46 #include "llvm/Target/TargetLoweringObjectFile.h"
47 #include "llvm/Target/TargetMachine.h"
48 
49 using namespace llvm;
50 
51 namespace {
52 
53 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
54 class X86MCInstLower {
55   MCContext &Ctx;
56   const MachineFunction &MF;
57   const TargetMachine &TM;
58   const MCAsmInfo &MAI;
59   X86AsmPrinter &AsmPrinter;
60 
61 public:
62   X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
63 
64   Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
65                                           const MachineOperand &MO) const;
66   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
67 
68   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
69   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
70 
71 private:
72   MachineModuleInfoMachO &getMachOMMI() const;
73 };
74 
75 } // end anonymous namespace
76 
77 /// A RAII helper which defines a region of instructions which can't have
78 /// padding added between them for correctness.
79 struct NoAutoPaddingScope {
80   MCStreamer &OS;
81   const bool OldAllowAutoPadding;
82   NoAutoPaddingScope(MCStreamer &OS)
83       : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
84     changeAndComment(false);
85   }
86   ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
87   void changeAndComment(bool b) {
88     if (b == OS.getAllowAutoPadding())
89       return;
90     OS.setAllowAutoPadding(b);
91     if (b)
92       OS.emitRawComment("autopadding");
93     else
94       OS.emitRawComment("noautopadding");
95   }
96 };
97 
98 // Emit a minimal sequence of nops spanning NumBytes bytes.
99 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
100                         const X86Subtarget *Subtarget);
101 
102 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
103                                                  const MCSubtargetInfo &STI,
104                                                  MCCodeEmitter *CodeEmitter) {
105   if (InShadow) {
106     SmallString<256> Code;
107     SmallVector<MCFixup, 4> Fixups;
108     raw_svector_ostream VecOS(Code);
109     CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
110     CurrentShadowSize += Code.size();
111     if (CurrentShadowSize >= RequiredShadowSize)
112       InShadow = false; // The shadow is big enough. Stop counting.
113   }
114 }
115 
116 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
117     MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
118   if (InShadow && CurrentShadowSize < RequiredShadowSize) {
119     InShadow = false;
120     emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
121                 &MF->getSubtarget<X86Subtarget>());
122   }
123 }
124 
125 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
126   OutStreamer->emitInstruction(Inst, getSubtargetInfo());
127   SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
128 }
129 
130 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
131                                X86AsmPrinter &asmprinter)
132     : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
133       AsmPrinter(asmprinter) {}
134 
135 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
136   return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
137 }
138 
139 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
140 /// operand to an MCSymbol.
141 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
142   const Triple &TT = TM.getTargetTriple();
143   if (MO.isGlobal() && TT.isOSBinFormatELF())
144     return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
145 
146   const DataLayout &DL = MF.getDataLayout();
147   assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
148          "Isn't a symbol reference");
149 
150   MCSymbol *Sym = nullptr;
151   SmallString<128> Name;
152   StringRef Suffix;
153 
154   switch (MO.getTargetFlags()) {
155   case X86II::MO_DLLIMPORT:
156     // Handle dllimport linkage.
157     Name += "__imp_";
158     break;
159   case X86II::MO_COFFSTUB:
160     Name += ".refptr.";
161     break;
162   case X86II::MO_DARWIN_NONLAZY:
163   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
164     Suffix = "$non_lazy_ptr";
165     break;
166   }
167 
168   if (!Suffix.empty())
169     Name += DL.getPrivateGlobalPrefix();
170 
171   if (MO.isGlobal()) {
172     const GlobalValue *GV = MO.getGlobal();
173     AsmPrinter.getNameWithPrefix(Name, GV);
174   } else if (MO.isSymbol()) {
175     Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
176   } else if (MO.isMBB()) {
177     assert(Suffix.empty());
178     Sym = MO.getMBB()->getSymbol();
179   }
180 
181   Name += Suffix;
182   if (!Sym)
183     Sym = Ctx.getOrCreateSymbol(Name);
184 
185   // If the target flags on the operand changes the name of the symbol, do that
186   // before we return the symbol.
187   switch (MO.getTargetFlags()) {
188   default:
189     break;
190   case X86II::MO_COFFSTUB: {
191     MachineModuleInfoCOFF &MMICOFF =
192         MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
193     MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
194     if (!StubSym.getPointer()) {
195       assert(MO.isGlobal() && "Extern symbol not handled yet");
196       StubSym = MachineModuleInfoImpl::StubValueTy(
197           AsmPrinter.getSymbol(MO.getGlobal()), true);
198     }
199     break;
200   }
201   case X86II::MO_DARWIN_NONLAZY:
202   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
203     MachineModuleInfoImpl::StubValueTy &StubSym =
204         getMachOMMI().getGVStubEntry(Sym);
205     if (!StubSym.getPointer()) {
206       assert(MO.isGlobal() && "Extern symbol not handled yet");
207       StubSym = MachineModuleInfoImpl::StubValueTy(
208           AsmPrinter.getSymbol(MO.getGlobal()),
209           !MO.getGlobal()->hasInternalLinkage());
210     }
211     break;
212   }
213   }
214 
215   return Sym;
216 }
217 
218 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
219                                              MCSymbol *Sym) const {
220   // FIXME: We would like an efficient form for this, so we don't have to do a
221   // lot of extra uniquing.
222   const MCExpr *Expr = nullptr;
223   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
224 
225   switch (MO.getTargetFlags()) {
226   default:
227     llvm_unreachable("Unknown target flag on GV operand");
228   case X86II::MO_NO_FLAG: // No flag.
229   // These affect the name of the symbol, not any suffix.
230   case X86II::MO_DARWIN_NONLAZY:
231   case X86II::MO_DLLIMPORT:
232   case X86II::MO_COFFSTUB:
233     break;
234 
235   case X86II::MO_TLVP:
236     RefKind = MCSymbolRefExpr::VK_TLVP;
237     break;
238   case X86II::MO_TLVP_PIC_BASE:
239     Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
240     // Subtract the pic base.
241     Expr = MCBinaryExpr::createSub(
242         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
243     break;
244   case X86II::MO_SECREL:
245     RefKind = MCSymbolRefExpr::VK_SECREL;
246     break;
247   case X86II::MO_TLSGD:
248     RefKind = MCSymbolRefExpr::VK_TLSGD;
249     break;
250   case X86II::MO_TLSLD:
251     RefKind = MCSymbolRefExpr::VK_TLSLD;
252     break;
253   case X86II::MO_TLSLDM:
254     RefKind = MCSymbolRefExpr::VK_TLSLDM;
255     break;
256   case X86II::MO_GOTTPOFF:
257     RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
258     break;
259   case X86II::MO_INDNTPOFF:
260     RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
261     break;
262   case X86II::MO_TPOFF:
263     RefKind = MCSymbolRefExpr::VK_TPOFF;
264     break;
265   case X86II::MO_DTPOFF:
266     RefKind = MCSymbolRefExpr::VK_DTPOFF;
267     break;
268   case X86II::MO_NTPOFF:
269     RefKind = MCSymbolRefExpr::VK_NTPOFF;
270     break;
271   case X86II::MO_GOTNTPOFF:
272     RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
273     break;
274   case X86II::MO_GOTPCREL:
275     RefKind = MCSymbolRefExpr::VK_GOTPCREL;
276     break;
277   case X86II::MO_GOT:
278     RefKind = MCSymbolRefExpr::VK_GOT;
279     break;
280   case X86II::MO_GOTOFF:
281     RefKind = MCSymbolRefExpr::VK_GOTOFF;
282     break;
283   case X86II::MO_PLT:
284     RefKind = MCSymbolRefExpr::VK_PLT;
285     break;
286   case X86II::MO_ABS8:
287     RefKind = MCSymbolRefExpr::VK_X86_ABS8;
288     break;
289   case X86II::MO_PIC_BASE_OFFSET:
290   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
291     Expr = MCSymbolRefExpr::create(Sym, Ctx);
292     // Subtract the pic base.
293     Expr = MCBinaryExpr::createSub(
294         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
295     if (MO.isJTI()) {
296       assert(MAI.doesSetDirectiveSuppressReloc());
297       // If .set directive is supported, use it to reduce the number of
298       // relocations the assembler will generate for differences between
299       // local labels. This is only safe when the symbols are in the same
300       // section so we are restricting it to jumptable references.
301       MCSymbol *Label = Ctx.createTempSymbol();
302       AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
303       Expr = MCSymbolRefExpr::create(Label, Ctx);
304     }
305     break;
306   }
307 
308   if (!Expr)
309     Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
310 
311   if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
312     Expr = MCBinaryExpr::createAdd(
313         Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
314   return MCOperand::createExpr(Expr);
315 }
316 
317 /// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
318 /// a short fixed-register form.
319 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
320   unsigned ImmOp = Inst.getNumOperands() - 1;
321   assert(Inst.getOperand(0).isReg() &&
322          (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
323          ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
324            Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
325           Inst.getNumOperands() == 2) &&
326          "Unexpected instruction!");
327 
328   // Check whether the destination register can be fixed.
329   unsigned Reg = Inst.getOperand(0).getReg();
330   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
331     return;
332 
333   // If so, rewrite the instruction.
334   MCOperand Saved = Inst.getOperand(ImmOp);
335   Inst = MCInst();
336   Inst.setOpcode(Opcode);
337   Inst.addOperand(Saved);
338 }
339 
340 /// If a movsx instruction has a shorter encoding for the used register
341 /// simplify the instruction to use it instead.
342 static void SimplifyMOVSX(MCInst &Inst) {
343   unsigned NewOpcode = 0;
344   unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
345   switch (Inst.getOpcode()) {
346   default:
347     llvm_unreachable("Unexpected instruction!");
348   case X86::MOVSX16rr8: // movsbw %al, %ax   --> cbtw
349     if (Op0 == X86::AX && Op1 == X86::AL)
350       NewOpcode = X86::CBW;
351     break;
352   case X86::MOVSX32rr16: // movswl %ax, %eax  --> cwtl
353     if (Op0 == X86::EAX && Op1 == X86::AX)
354       NewOpcode = X86::CWDE;
355     break;
356   case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
357     if (Op0 == X86::RAX && Op1 == X86::EAX)
358       NewOpcode = X86::CDQE;
359     break;
360   }
361 
362   if (NewOpcode != 0) {
363     Inst = MCInst();
364     Inst.setOpcode(NewOpcode);
365   }
366 }
367 
368 /// Simplify things like MOV32rm to MOV32o32a.
369 static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
370                                   unsigned Opcode) {
371   // Don't make these simplifications in 64-bit mode; other assemblers don't
372   // perform them because they make the code larger.
373   if (Printer.getSubtarget().is64Bit())
374     return;
375 
376   bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
377   unsigned AddrBase = IsStore;
378   unsigned RegOp = IsStore ? 0 : 5;
379   unsigned AddrOp = AddrBase + 3;
380   assert(
381       Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
382       Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
383       Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
384       Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
385       Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
386       (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
387       "Unexpected instruction!");
388 
389   // Check whether the destination register can be fixed.
390   unsigned Reg = Inst.getOperand(RegOp).getReg();
391   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
392     return;
393 
394   // Check whether this is an absolute address.
395   // FIXME: We know TLVP symbol refs aren't, but there should be a better way
396   // to do this here.
397   bool Absolute = true;
398   if (Inst.getOperand(AddrOp).isExpr()) {
399     const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
400     if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
401       if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
402         Absolute = false;
403   }
404 
405   if (Absolute &&
406       (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
407        Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
408        Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
409     return;
410 
411   // If so, rewrite the instruction.
412   MCOperand Saved = Inst.getOperand(AddrOp);
413   MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
414   Inst = MCInst();
415   Inst.setOpcode(Opcode);
416   Inst.addOperand(Saved);
417   Inst.addOperand(Seg);
418 }
419 
420 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
421   return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
422 }
423 
424 Optional<MCOperand>
425 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
426                                     const MachineOperand &MO) const {
427   switch (MO.getType()) {
428   default:
429     MI->print(errs());
430     llvm_unreachable("unknown operand type");
431   case MachineOperand::MO_Register:
432     // Ignore all implicit register operands.
433     if (MO.isImplicit())
434       return None;
435     return MCOperand::createReg(MO.getReg());
436   case MachineOperand::MO_Immediate:
437     return MCOperand::createImm(MO.getImm());
438   case MachineOperand::MO_MachineBasicBlock:
439   case MachineOperand::MO_GlobalAddress:
440   case MachineOperand::MO_ExternalSymbol:
441     return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
442   case MachineOperand::MO_MCSymbol:
443     return LowerSymbolOperand(MO, MO.getMCSymbol());
444   case MachineOperand::MO_JumpTableIndex:
445     return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
446   case MachineOperand::MO_ConstantPoolIndex:
447     return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
448   case MachineOperand::MO_BlockAddress:
449     return LowerSymbolOperand(
450         MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
451   case MachineOperand::MO_RegisterMask:
452     // Ignore call clobbers.
453     return None;
454   }
455 }
456 
457 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
458 // information.
459 static unsigned convertTailJumpOpcode(unsigned Opcode) {
460   switch (Opcode) {
461   case X86::TAILJMPr:
462     Opcode = X86::JMP32r;
463     break;
464   case X86::TAILJMPm:
465     Opcode = X86::JMP32m;
466     break;
467   case X86::TAILJMPr64:
468     Opcode = X86::JMP64r;
469     break;
470   case X86::TAILJMPm64:
471     Opcode = X86::JMP64m;
472     break;
473   case X86::TAILJMPr64_REX:
474     Opcode = X86::JMP64r_REX;
475     break;
476   case X86::TAILJMPm64_REX:
477     Opcode = X86::JMP64m_REX;
478     break;
479   case X86::TAILJMPd:
480   case X86::TAILJMPd64:
481     Opcode = X86::JMP_1;
482     break;
483   case X86::TAILJMPd_CC:
484   case X86::TAILJMPd64_CC:
485     Opcode = X86::JCC_1;
486     break;
487   }
488 
489   return Opcode;
490 }
491 
492 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
493   OutMI.setOpcode(MI->getOpcode());
494 
495   for (const MachineOperand &MO : MI->operands())
496     if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
497       OutMI.addOperand(MaybeMCOp.getValue());
498 
499   // Handle a few special cases to eliminate operand modifiers.
500   switch (OutMI.getOpcode()) {
501   case X86::LEA64_32r:
502   case X86::LEA64r:
503   case X86::LEA16r:
504   case X86::LEA32r:
505     // LEA should have a segment register, but it must be empty.
506     assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
507            "Unexpected # of LEA operands");
508     assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
509            "LEA has segment specified!");
510     break;
511 
512   case X86::MULX32Hrr:
513   case X86::MULX32Hrm:
514   case X86::MULX64Hrr:
515   case X86::MULX64Hrm: {
516     // Turn into regular MULX by duplicating the destination.
517     unsigned NewOpc;
518     switch (OutMI.getOpcode()) {
519     default: llvm_unreachable("Invalid opcode");
520     case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
521     case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
522     case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
523     case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
524     }
525     OutMI.setOpcode(NewOpc);
526     // Duplicate the destination.
527     unsigned DestReg = OutMI.getOperand(0).getReg();
528     OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
529     break;
530   }
531 
532   // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
533   // if one of the registers is extended, but other isn't.
534   case X86::VMOVZPQILo2PQIrr:
535   case X86::VMOVAPDrr:
536   case X86::VMOVAPDYrr:
537   case X86::VMOVAPSrr:
538   case X86::VMOVAPSYrr:
539   case X86::VMOVDQArr:
540   case X86::VMOVDQAYrr:
541   case X86::VMOVDQUrr:
542   case X86::VMOVDQUYrr:
543   case X86::VMOVUPDrr:
544   case X86::VMOVUPDYrr:
545   case X86::VMOVUPSrr:
546   case X86::VMOVUPSYrr: {
547     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
548         X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
549       unsigned NewOpc;
550       switch (OutMI.getOpcode()) {
551       default: llvm_unreachable("Invalid opcode");
552       case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
553       case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
554       case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
555       case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
556       case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
557       case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
558       case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
559       case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
560       case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
561       case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
562       case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
563       case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
564       case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
565       }
566       OutMI.setOpcode(NewOpc);
567     }
568     break;
569   }
570   case X86::VMOVSDrr:
571   case X86::VMOVSSrr: {
572     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
573         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
574       unsigned NewOpc;
575       switch (OutMI.getOpcode()) {
576       default: llvm_unreachable("Invalid opcode");
577       case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
578       case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
579       }
580       OutMI.setOpcode(NewOpc);
581     }
582     break;
583   }
584 
585   case X86::VPCMPBZ128rmi:  case X86::VPCMPBZ128rmik:
586   case X86::VPCMPBZ128rri:  case X86::VPCMPBZ128rrik:
587   case X86::VPCMPBZ256rmi:  case X86::VPCMPBZ256rmik:
588   case X86::VPCMPBZ256rri:  case X86::VPCMPBZ256rrik:
589   case X86::VPCMPBZrmi:     case X86::VPCMPBZrmik:
590   case X86::VPCMPBZrri:     case X86::VPCMPBZrrik:
591   case X86::VPCMPDZ128rmi:  case X86::VPCMPDZ128rmik:
592   case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
593   case X86::VPCMPDZ128rri:  case X86::VPCMPDZ128rrik:
594   case X86::VPCMPDZ256rmi:  case X86::VPCMPDZ256rmik:
595   case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
596   case X86::VPCMPDZ256rri:  case X86::VPCMPDZ256rrik:
597   case X86::VPCMPDZrmi:     case X86::VPCMPDZrmik:
598   case X86::VPCMPDZrmib:    case X86::VPCMPDZrmibk:
599   case X86::VPCMPDZrri:     case X86::VPCMPDZrrik:
600   case X86::VPCMPQZ128rmi:  case X86::VPCMPQZ128rmik:
601   case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
602   case X86::VPCMPQZ128rri:  case X86::VPCMPQZ128rrik:
603   case X86::VPCMPQZ256rmi:  case X86::VPCMPQZ256rmik:
604   case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
605   case X86::VPCMPQZ256rri:  case X86::VPCMPQZ256rrik:
606   case X86::VPCMPQZrmi:     case X86::VPCMPQZrmik:
607   case X86::VPCMPQZrmib:    case X86::VPCMPQZrmibk:
608   case X86::VPCMPQZrri:     case X86::VPCMPQZrrik:
609   case X86::VPCMPWZ128rmi:  case X86::VPCMPWZ128rmik:
610   case X86::VPCMPWZ128rri:  case X86::VPCMPWZ128rrik:
611   case X86::VPCMPWZ256rmi:  case X86::VPCMPWZ256rmik:
612   case X86::VPCMPWZ256rri:  case X86::VPCMPWZ256rrik:
613   case X86::VPCMPWZrmi:     case X86::VPCMPWZrmik:
614   case X86::VPCMPWZrri:     case X86::VPCMPWZrrik: {
615     // Turn immediate 0 into the VPCMPEQ instruction.
616     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
617       unsigned NewOpc;
618       switch (OutMI.getOpcode()) {
619       default: llvm_unreachable("Invalid opcode");
620       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPEQBZ128rm;   break;
621       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPEQBZ128rmk;  break;
622       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPEQBZ128rr;   break;
623       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPEQBZ128rrk;  break;
624       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPEQBZ256rm;   break;
625       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPEQBZ256rmk;  break;
626       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPEQBZ256rr;   break;
627       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPEQBZ256rrk;  break;
628       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPEQBZrm;      break;
629       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPEQBZrmk;     break;
630       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPEQBZrr;      break;
631       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPEQBZrrk;     break;
632       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPEQDZ128rm;   break;
633       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPEQDZ128rmb;  break;
634       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
635       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPEQDZ128rmk;  break;
636       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPEQDZ128rr;   break;
637       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPEQDZ128rrk;  break;
638       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPEQDZ256rm;   break;
639       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPEQDZ256rmb;  break;
640       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
641       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPEQDZ256rmk;  break;
642       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPEQDZ256rr;   break;
643       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPEQDZ256rrk;  break;
644       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPEQDZrm;      break;
645       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPEQDZrmb;     break;
646       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPEQDZrmbk;    break;
647       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPEQDZrmk;     break;
648       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPEQDZrr;      break;
649       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPEQDZrrk;     break;
650       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPEQQZ128rm;   break;
651       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPEQQZ128rmb;  break;
652       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
653       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPEQQZ128rmk;  break;
654       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPEQQZ128rr;   break;
655       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPEQQZ128rrk;  break;
656       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPEQQZ256rm;   break;
657       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPEQQZ256rmb;  break;
658       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
659       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPEQQZ256rmk;  break;
660       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPEQQZ256rr;   break;
661       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPEQQZ256rrk;  break;
662       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPEQQZrm;      break;
663       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPEQQZrmb;     break;
664       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPEQQZrmbk;    break;
665       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPEQQZrmk;     break;
666       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPEQQZrr;      break;
667       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPEQQZrrk;     break;
668       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPEQWZ128rm;   break;
669       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPEQWZ128rmk;  break;
670       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPEQWZ128rr;   break;
671       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPEQWZ128rrk;  break;
672       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPEQWZ256rm;   break;
673       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPEQWZ256rmk;  break;
674       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPEQWZ256rr;   break;
675       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPEQWZ256rrk;  break;
676       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPEQWZrm;      break;
677       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPEQWZrmk;     break;
678       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPEQWZrr;      break;
679       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPEQWZrrk;     break;
680       }
681 
682       OutMI.setOpcode(NewOpc);
683       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
684       break;
685     }
686 
687     // Turn immediate 6 into the VPCMPGT instruction.
688     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
689       unsigned NewOpc;
690       switch (OutMI.getOpcode()) {
691       default: llvm_unreachable("Invalid opcode");
692       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPGTBZ128rm;   break;
693       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPGTBZ128rmk;  break;
694       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPGTBZ128rr;   break;
695       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPGTBZ128rrk;  break;
696       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPGTBZ256rm;   break;
697       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPGTBZ256rmk;  break;
698       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPGTBZ256rr;   break;
699       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPGTBZ256rrk;  break;
700       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPGTBZrm;      break;
701       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPGTBZrmk;     break;
702       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPGTBZrr;      break;
703       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPGTBZrrk;     break;
704       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPGTDZ128rm;   break;
705       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPGTDZ128rmb;  break;
706       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
707       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPGTDZ128rmk;  break;
708       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPGTDZ128rr;   break;
709       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPGTDZ128rrk;  break;
710       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPGTDZ256rm;   break;
711       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPGTDZ256rmb;  break;
712       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
713       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPGTDZ256rmk;  break;
714       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPGTDZ256rr;   break;
715       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPGTDZ256rrk;  break;
716       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPGTDZrm;      break;
717       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPGTDZrmb;     break;
718       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPGTDZrmbk;    break;
719       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPGTDZrmk;     break;
720       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPGTDZrr;      break;
721       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPGTDZrrk;     break;
722       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPGTQZ128rm;   break;
723       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPGTQZ128rmb;  break;
724       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
725       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPGTQZ128rmk;  break;
726       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPGTQZ128rr;   break;
727       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPGTQZ128rrk;  break;
728       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPGTQZ256rm;   break;
729       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPGTQZ256rmb;  break;
730       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
731       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPGTQZ256rmk;  break;
732       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPGTQZ256rr;   break;
733       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPGTQZ256rrk;  break;
734       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPGTQZrm;      break;
735       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPGTQZrmb;     break;
736       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPGTQZrmbk;    break;
737       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPGTQZrmk;     break;
738       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPGTQZrr;      break;
739       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPGTQZrrk;     break;
740       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPGTWZ128rm;   break;
741       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPGTWZ128rmk;  break;
742       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPGTWZ128rr;   break;
743       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPGTWZ128rrk;  break;
744       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPGTWZ256rm;   break;
745       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPGTWZ256rmk;  break;
746       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPGTWZ256rr;   break;
747       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPGTWZ256rrk;  break;
748       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPGTWZrm;      break;
749       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPGTWZrmk;     break;
750       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPGTWZrr;      break;
751       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPGTWZrrk;     break;
752       }
753 
754       OutMI.setOpcode(NewOpc);
755       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
756       break;
757     }
758 
759     break;
760   }
761 
762   // CALL64r, CALL64pcrel32 - These instructions used to have
763   // register inputs modeled as normal uses instead of implicit uses.  As such,
764   // they we used to truncate off all but the first operand (the callee). This
765   // issue seems to have been fixed at some point. This assert verifies that.
766   case X86::CALL64r:
767   case X86::CALL64pcrel32:
768     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
769     break;
770 
771   case X86::EH_RETURN:
772   case X86::EH_RETURN64: {
773     OutMI = MCInst();
774     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
775     break;
776   }
777 
778   case X86::CLEANUPRET: {
779     // Replace CLEANUPRET with the appropriate RET.
780     OutMI = MCInst();
781     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
782     break;
783   }
784 
785   case X86::CATCHRET: {
786     // Replace CATCHRET with the appropriate RET.
787     const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
788     unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
789     OutMI = MCInst();
790     OutMI.setOpcode(getRetOpcode(Subtarget));
791     OutMI.addOperand(MCOperand::createReg(ReturnReg));
792     break;
793   }
794 
795   // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
796   // instruction.
797   case X86::TAILJMPr:
798   case X86::TAILJMPr64:
799   case X86::TAILJMPr64_REX:
800   case X86::TAILJMPd:
801   case X86::TAILJMPd64:
802     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
803     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
804     break;
805 
806   case X86::TAILJMPd_CC:
807   case X86::TAILJMPd64_CC:
808     assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
809     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
810     break;
811 
812   case X86::TAILJMPm:
813   case X86::TAILJMPm64:
814   case X86::TAILJMPm64_REX:
815     assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
816            "Unexpected number of operands!");
817     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
818     break;
819 
820   case X86::DEC16r:
821   case X86::DEC32r:
822   case X86::INC16r:
823   case X86::INC32r:
824     // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
825     if (!AsmPrinter.getSubtarget().is64Bit()) {
826       unsigned Opcode;
827       switch (OutMI.getOpcode()) {
828       default: llvm_unreachable("Invalid opcode");
829       case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
830       case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
831       case X86::INC16r: Opcode = X86::INC16r_alt; break;
832       case X86::INC32r: Opcode = X86::INC32r_alt; break;
833       }
834       OutMI.setOpcode(Opcode);
835     }
836     break;
837 
838   // We don't currently select the correct instruction form for instructions
839   // which have a short %eax, etc. form. Handle this by custom lowering, for
840   // now.
841   //
842   // Note, we are currently not handling the following instructions:
843   // MOV64ao8, MOV64o8a
844   // XCHG16ar, XCHG32ar, XCHG64ar
845   case X86::MOV8mr_NOREX:
846   case X86::MOV8mr:
847   case X86::MOV8rm_NOREX:
848   case X86::MOV8rm:
849   case X86::MOV16mr:
850   case X86::MOV16rm:
851   case X86::MOV32mr:
852   case X86::MOV32rm: {
853     unsigned NewOpc;
854     switch (OutMI.getOpcode()) {
855     default: llvm_unreachable("Invalid opcode");
856     case X86::MOV8mr_NOREX:
857     case X86::MOV8mr:  NewOpc = X86::MOV8o32a; break;
858     case X86::MOV8rm_NOREX:
859     case X86::MOV8rm:  NewOpc = X86::MOV8ao32; break;
860     case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
861     case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
862     case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
863     case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
864     }
865     SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
866     break;
867   }
868 
869   case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
870   case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
871   case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
872   case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
873   case X86::OR8ri:  case X86::OR16ri:  case X86::OR32ri:  case X86::OR64ri32:
874   case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
875   case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
876   case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
877   case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
878     unsigned NewOpc;
879     switch (OutMI.getOpcode()) {
880     default: llvm_unreachable("Invalid opcode");
881     case X86::ADC8ri:     NewOpc = X86::ADC8i8;    break;
882     case X86::ADC16ri:    NewOpc = X86::ADC16i16;  break;
883     case X86::ADC32ri:    NewOpc = X86::ADC32i32;  break;
884     case X86::ADC64ri32:  NewOpc = X86::ADC64i32;  break;
885     case X86::ADD8ri:     NewOpc = X86::ADD8i8;    break;
886     case X86::ADD16ri:    NewOpc = X86::ADD16i16;  break;
887     case X86::ADD32ri:    NewOpc = X86::ADD32i32;  break;
888     case X86::ADD64ri32:  NewOpc = X86::ADD64i32;  break;
889     case X86::AND8ri:     NewOpc = X86::AND8i8;    break;
890     case X86::AND16ri:    NewOpc = X86::AND16i16;  break;
891     case X86::AND32ri:    NewOpc = X86::AND32i32;  break;
892     case X86::AND64ri32:  NewOpc = X86::AND64i32;  break;
893     case X86::CMP8ri:     NewOpc = X86::CMP8i8;    break;
894     case X86::CMP16ri:    NewOpc = X86::CMP16i16;  break;
895     case X86::CMP32ri:    NewOpc = X86::CMP32i32;  break;
896     case X86::CMP64ri32:  NewOpc = X86::CMP64i32;  break;
897     case X86::OR8ri:      NewOpc = X86::OR8i8;     break;
898     case X86::OR16ri:     NewOpc = X86::OR16i16;   break;
899     case X86::OR32ri:     NewOpc = X86::OR32i32;   break;
900     case X86::OR64ri32:   NewOpc = X86::OR64i32;   break;
901     case X86::SBB8ri:     NewOpc = X86::SBB8i8;    break;
902     case X86::SBB16ri:    NewOpc = X86::SBB16i16;  break;
903     case X86::SBB32ri:    NewOpc = X86::SBB32i32;  break;
904     case X86::SBB64ri32:  NewOpc = X86::SBB64i32;  break;
905     case X86::SUB8ri:     NewOpc = X86::SUB8i8;    break;
906     case X86::SUB16ri:    NewOpc = X86::SUB16i16;  break;
907     case X86::SUB32ri:    NewOpc = X86::SUB32i32;  break;
908     case X86::SUB64ri32:  NewOpc = X86::SUB64i32;  break;
909     case X86::TEST8ri:    NewOpc = X86::TEST8i8;   break;
910     case X86::TEST16ri:   NewOpc = X86::TEST16i16; break;
911     case X86::TEST32ri:   NewOpc = X86::TEST32i32; break;
912     case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
913     case X86::XOR8ri:     NewOpc = X86::XOR8i8;    break;
914     case X86::XOR16ri:    NewOpc = X86::XOR16i16;  break;
915     case X86::XOR32ri:    NewOpc = X86::XOR32i32;  break;
916     case X86::XOR64ri32:  NewOpc = X86::XOR64i32;  break;
917     }
918     SimplifyShortImmForm(OutMI, NewOpc);
919     break;
920   }
921 
922   // Try to shrink some forms of movsx.
923   case X86::MOVSX16rr8:
924   case X86::MOVSX32rr16:
925   case X86::MOVSX64rr32:
926     SimplifyMOVSX(OutMI);
927     break;
928 
929   case X86::VCMPPDrri:
930   case X86::VCMPPDYrri:
931   case X86::VCMPPSrri:
932   case X86::VCMPPSYrri:
933   case X86::VCMPSDrr:
934   case X86::VCMPSSrr: {
935     // Swap the operands if it will enable a 2 byte VEX encoding.
936     // FIXME: Change the immediate to improve opportunities?
937     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
938         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
939       unsigned Imm = MI->getOperand(3).getImm() & 0x7;
940       switch (Imm) {
941       default: break;
942       case 0x00: // EQUAL
943       case 0x03: // UNORDERED
944       case 0x04: // NOT EQUAL
945       case 0x07: // ORDERED
946         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
947         break;
948       }
949     }
950     break;
951   }
952 
953   case X86::VMOVHLPSrr:
954   case X86::VUNPCKHPDrr:
955     // These are not truly commutable so hide them from the default case.
956     break;
957 
958   default: {
959     // If the instruction is a commutable arithmetic instruction we might be
960     // able to commute the operands to get a 2 byte VEX prefix.
961     uint64_t TSFlags = MI->getDesc().TSFlags;
962     if (MI->getDesc().isCommutable() &&
963         (TSFlags & X86II::EncodingMask) == X86II::VEX &&
964         (TSFlags & X86II::OpMapMask) == X86II::TB &&
965         (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
966         !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
967         OutMI.getNumOperands() == 3) {
968       if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
969           X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
970         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
971     }
972     break;
973   }
974   }
975 }
976 
977 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
978                                  const MachineInstr &MI) {
979   NoAutoPaddingScope NoPadScope(*OutStreamer);
980   bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
981                   MI.getOpcode() != X86::TLS_base_addr32;
982   bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
983                       MI.getOpcode() == X86::TLS_base_addr64;
984   MCContext &Ctx = OutStreamer->getContext();
985 
986   MCSymbolRefExpr::VariantKind SRVK;
987   switch (MI.getOpcode()) {
988   case X86::TLS_addr32:
989   case X86::TLS_addr64:
990   case X86::TLS_addrX32:
991     SRVK = MCSymbolRefExpr::VK_TLSGD;
992     break;
993   case X86::TLS_base_addr32:
994     SRVK = MCSymbolRefExpr::VK_TLSLDM;
995     break;
996   case X86::TLS_base_addr64:
997   case X86::TLS_base_addrX32:
998     SRVK = MCSymbolRefExpr::VK_TLSLD;
999     break;
1000   default:
1001     llvm_unreachable("unexpected opcode");
1002   }
1003 
1004   const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
1005       MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
1006 
1007   // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
1008   // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
1009   // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
1010   // only using GOT when GOTPCRELX is enabled.
1011   // TODO Delete the workaround when GOTPCRELX becomes commonplace.
1012   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
1013                 Ctx.getAsmInfo()->canRelaxRelocations();
1014 
1015   if (Is64Bits) {
1016     bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
1017     if (NeedsPadding && Is64BitsLP64)
1018       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1019     EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
1020                                 .addReg(X86::RDI)
1021                                 .addReg(X86::RIP)
1022                                 .addImm(1)
1023                                 .addReg(0)
1024                                 .addExpr(Sym)
1025                                 .addReg(0));
1026     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
1027     if (NeedsPadding) {
1028       if (!UseGot)
1029         EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1030       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1031       EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
1032     }
1033     if (UseGot) {
1034       const MCExpr *Expr = MCSymbolRefExpr::create(
1035           TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
1036       EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
1037                                   .addReg(X86::RIP)
1038                                   .addImm(1)
1039                                   .addReg(0)
1040                                   .addExpr(Expr)
1041                                   .addReg(0));
1042     } else {
1043       EmitAndCountInstruction(
1044           MCInstBuilder(X86::CALL64pcrel32)
1045               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1046                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1047     }
1048   } else {
1049     if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
1050       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1051                                   .addReg(X86::EAX)
1052                                   .addReg(0)
1053                                   .addImm(1)
1054                                   .addReg(X86::EBX)
1055                                   .addExpr(Sym)
1056                                   .addReg(0));
1057     } else {
1058       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1059                                   .addReg(X86::EAX)
1060                                   .addReg(X86::EBX)
1061                                   .addImm(1)
1062                                   .addReg(0)
1063                                   .addExpr(Sym)
1064                                   .addReg(0));
1065     }
1066 
1067     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
1068     if (UseGot) {
1069       const MCExpr *Expr =
1070           MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
1071       EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
1072                                   .addReg(X86::EBX)
1073                                   .addImm(1)
1074                                   .addReg(0)
1075                                   .addExpr(Expr)
1076                                   .addReg(0));
1077     } else {
1078       EmitAndCountInstruction(
1079           MCInstBuilder(X86::CALLpcrel32)
1080               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1081                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1082     }
1083   }
1084 }
1085 
1086 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
1087 /// bytes.  Return the size of nop emitted.
1088 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
1089                         const X86Subtarget *Subtarget) {
1090   // Determine the longest nop which can be efficiently decoded for the given
1091   // target cpu.  15-bytes is the longest single NOP instruction, but some
1092   // platforms can't decode the longest forms efficiently.
1093   unsigned MaxNopLength = 1;
1094   if (Subtarget->is64Bit()) {
1095     // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
1096     // IndexReg/BaseReg below need to be updated.
1097     if (Subtarget->hasFeature(X86::FeatureFast7ByteNOP))
1098       MaxNopLength = 7;
1099     else if (Subtarget->hasFeature(X86::FeatureFast15ByteNOP))
1100       MaxNopLength = 15;
1101     else if (Subtarget->hasFeature(X86::FeatureFast11ByteNOP))
1102       MaxNopLength = 11;
1103     else
1104       MaxNopLength = 10;
1105   } if (Subtarget->is32Bit())
1106     MaxNopLength = 2;
1107 
1108   // Cap a single nop emission at the profitable value for the target
1109   NumBytes = std::min(NumBytes, MaxNopLength);
1110 
1111   unsigned NopSize;
1112   unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
1113   IndexReg = Displacement = SegmentReg = 0;
1114   BaseReg = X86::RAX;
1115   ScaleVal = 1;
1116   switch (NumBytes) {
1117   case 0:
1118     llvm_unreachable("Zero nops?");
1119     break;
1120   case 1:
1121     NopSize = 1;
1122     Opc = X86::NOOP;
1123     break;
1124   case 2:
1125     NopSize = 2;
1126     Opc = X86::XCHG16ar;
1127     break;
1128   case 3:
1129     NopSize = 3;
1130     Opc = X86::NOOPL;
1131     break;
1132   case 4:
1133     NopSize = 4;
1134     Opc = X86::NOOPL;
1135     Displacement = 8;
1136     break;
1137   case 5:
1138     NopSize = 5;
1139     Opc = X86::NOOPL;
1140     Displacement = 8;
1141     IndexReg = X86::RAX;
1142     break;
1143   case 6:
1144     NopSize = 6;
1145     Opc = X86::NOOPW;
1146     Displacement = 8;
1147     IndexReg = X86::RAX;
1148     break;
1149   case 7:
1150     NopSize = 7;
1151     Opc = X86::NOOPL;
1152     Displacement = 512;
1153     break;
1154   case 8:
1155     NopSize = 8;
1156     Opc = X86::NOOPL;
1157     Displacement = 512;
1158     IndexReg = X86::RAX;
1159     break;
1160   case 9:
1161     NopSize = 9;
1162     Opc = X86::NOOPW;
1163     Displacement = 512;
1164     IndexReg = X86::RAX;
1165     break;
1166   default:
1167     NopSize = 10;
1168     Opc = X86::NOOPW;
1169     Displacement = 512;
1170     IndexReg = X86::RAX;
1171     SegmentReg = X86::CS;
1172     break;
1173   }
1174 
1175   unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
1176   NopSize += NumPrefixes;
1177   for (unsigned i = 0; i != NumPrefixes; ++i)
1178     OS.emitBytes("\x66");
1179 
1180   switch (Opc) {
1181   default: llvm_unreachable("Unexpected opcode");
1182   case X86::NOOP:
1183     OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
1184     break;
1185   case X86::XCHG16ar:
1186     OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
1187                        *Subtarget);
1188     break;
1189   case X86::NOOPL:
1190   case X86::NOOPW:
1191     OS.emitInstruction(MCInstBuilder(Opc)
1192                            .addReg(BaseReg)
1193                            .addImm(ScaleVal)
1194                            .addReg(IndexReg)
1195                            .addImm(Displacement)
1196                            .addReg(SegmentReg),
1197                        *Subtarget);
1198     break;
1199   }
1200   assert(NopSize <= NumBytes && "We overemitted?");
1201   return NopSize;
1202 }
1203 
1204 /// Emit the optimal amount of multi-byte nops on X86.
1205 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
1206                         const X86Subtarget *Subtarget) {
1207   unsigned NopsToEmit = NumBytes;
1208   (void)NopsToEmit;
1209   while (NumBytes) {
1210     NumBytes -= emitNop(OS, NumBytes, Subtarget);
1211     assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
1212   }
1213 }
1214 
1215 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
1216                                     X86MCInstLower &MCIL) {
1217   assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
1218 
1219   NoAutoPaddingScope NoPadScope(*OutStreamer);
1220 
1221   StatepointOpers SOpers(&MI);
1222   if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
1223     emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
1224   } else {
1225     // Lower call target and choose correct opcode
1226     const MachineOperand &CallTarget = SOpers.getCallTarget();
1227     MCOperand CallTargetMCOp;
1228     unsigned CallOpcode;
1229     switch (CallTarget.getType()) {
1230     case MachineOperand::MO_GlobalAddress:
1231     case MachineOperand::MO_ExternalSymbol:
1232       CallTargetMCOp = MCIL.LowerSymbolOperand(
1233           CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
1234       CallOpcode = X86::CALL64pcrel32;
1235       // Currently, we only support relative addressing with statepoints.
1236       // Otherwise, we'll need a scratch register to hold the target
1237       // address.  You'll fail asserts during load & relocation if this
1238       // symbol is to far away. (TODO: support non-relative addressing)
1239       break;
1240     case MachineOperand::MO_Immediate:
1241       CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
1242       CallOpcode = X86::CALL64pcrel32;
1243       // Currently, we only support relative addressing with statepoints.
1244       // Otherwise, we'll need a scratch register to hold the target
1245       // immediate.  You'll fail asserts during load & relocation if this
1246       // address is to far away. (TODO: support non-relative addressing)
1247       break;
1248     case MachineOperand::MO_Register:
1249       // FIXME: Add retpoline support and remove this.
1250       if (Subtarget->useIndirectThunkCalls())
1251         report_fatal_error("Lowering register statepoints with thunks not "
1252                            "yet implemented.");
1253       CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
1254       CallOpcode = X86::CALL64r;
1255       break;
1256     default:
1257       llvm_unreachable("Unsupported operand type in statepoint call target");
1258       break;
1259     }
1260 
1261     // Emit call
1262     MCInst CallInst;
1263     CallInst.setOpcode(CallOpcode);
1264     CallInst.addOperand(CallTargetMCOp);
1265     OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
1266   }
1267 
1268   // Record our statepoint node in the same section used by STACKMAP
1269   // and PATCHPOINT
1270   auto &Ctx = OutStreamer->getContext();
1271   MCSymbol *MILabel = Ctx.createTempSymbol();
1272   OutStreamer->emitLabel(MILabel);
1273   SM.recordStatepoint(*MILabel, MI);
1274 }
1275 
1276 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
1277                                      X86MCInstLower &MCIL) {
1278   // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
1279   //                  <opcode>, <operands>
1280 
1281   NoAutoPaddingScope NoPadScope(*OutStreamer);
1282 
1283   Register DefRegister = FaultingMI.getOperand(0).getReg();
1284   FaultMaps::FaultKind FK =
1285       static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
1286   MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
1287   unsigned Opcode = FaultingMI.getOperand(3).getImm();
1288   unsigned OperandsBeginIdx = 4;
1289 
1290   auto &Ctx = OutStreamer->getContext();
1291   MCSymbol *FaultingLabel = Ctx.createTempSymbol();
1292   OutStreamer->emitLabel(FaultingLabel);
1293 
1294   assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
1295   FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
1296 
1297   MCInst MI;
1298   MI.setOpcode(Opcode);
1299 
1300   if (DefRegister != X86::NoRegister)
1301     MI.addOperand(MCOperand::createReg(DefRegister));
1302 
1303   for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
1304             E = FaultingMI.operands_end();
1305        I != E; ++I)
1306     if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
1307       MI.addOperand(MaybeOperand.getValue());
1308 
1309   OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
1310   OutStreamer->emitInstruction(MI, getSubtargetInfo());
1311 }
1312 
1313 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
1314                                      X86MCInstLower &MCIL) {
1315   bool Is64Bits = Subtarget->is64Bit();
1316   MCContext &Ctx = OutStreamer->getContext();
1317   MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
1318   const MCSymbolRefExpr *Op =
1319       MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
1320 
1321   EmitAndCountInstruction(
1322       MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
1323           .addExpr(Op));
1324 }
1325 
1326 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
1327                                       X86MCInstLower &MCIL) {
1328   // PATCHABLE_OP minsize, opcode, operands
1329 
1330   NoAutoPaddingScope NoPadScope(*OutStreamer);
1331 
1332   unsigned MinSize = MI.getOperand(0).getImm();
1333   unsigned Opcode = MI.getOperand(1).getImm();
1334 
1335   MCInst MCI;
1336   MCI.setOpcode(Opcode);
1337   for (auto &MO : drop_begin(MI.operands(), 2))
1338     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1339       MCI.addOperand(MaybeOperand.getValue());
1340 
1341   SmallString<256> Code;
1342   SmallVector<MCFixup, 4> Fixups;
1343   raw_svector_ostream VecOS(Code);
1344   CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1345 
1346   if (Code.size() < MinSize) {
1347     if (MinSize == 2 && Subtarget->is32Bit() &&
1348         Subtarget->isTargetWindowsMSVC() &&
1349         (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
1350       // For compatibilty reasons, when targetting MSVC, is is important to
1351       // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1352       // rely specifically on this pattern to be able to patch a function.
1353       // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1354       OutStreamer->emitInstruction(
1355           MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1356           *Subtarget);
1357     } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
1358       // This is an optimization that lets us get away without emitting a nop in
1359       // many cases.
1360       //
1361       // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1362       // bytes too, so the check on MinSize is important.
1363       MCI.setOpcode(X86::PUSH64rmr);
1364     } else {
1365       unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
1366       assert(NopSize == MinSize && "Could not implement MinSize!");
1367       (void)NopSize;
1368     }
1369   }
1370 
1371   OutStreamer->emitInstruction(MCI, getSubtargetInfo());
1372 }
1373 
1374 // Lower a stackmap of the form:
1375 // <id>, <shadowBytes>, ...
1376 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1377   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1378 
1379   auto &Ctx = OutStreamer->getContext();
1380   MCSymbol *MILabel = Ctx.createTempSymbol();
1381   OutStreamer->emitLabel(MILabel);
1382 
1383   SM.recordStackMap(*MILabel, MI);
1384   unsigned NumShadowBytes = MI.getOperand(1).getImm();
1385   SMShadowTracker.reset(NumShadowBytes);
1386 }
1387 
1388 // Lower a patchpoint of the form:
1389 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1390 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1391                                     X86MCInstLower &MCIL) {
1392   assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1393 
1394   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1395 
1396   NoAutoPaddingScope NoPadScope(*OutStreamer);
1397 
1398   auto &Ctx = OutStreamer->getContext();
1399   MCSymbol *MILabel = Ctx.createTempSymbol();
1400   OutStreamer->emitLabel(MILabel);
1401   SM.recordPatchPoint(*MILabel, MI);
1402 
1403   PatchPointOpers opers(&MI);
1404   unsigned ScratchIdx = opers.getNextScratchIdx();
1405   unsigned EncodedBytes = 0;
1406   const MachineOperand &CalleeMO = opers.getCallTarget();
1407 
1408   // Check for null target. If target is non-null (i.e. is non-zero or is
1409   // symbolic) then emit a call.
1410   if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1411     MCOperand CalleeMCOp;
1412     switch (CalleeMO.getType()) {
1413     default:
1414       /// FIXME: Add a verifier check for bad callee types.
1415       llvm_unreachable("Unrecognized callee operand type.");
1416     case MachineOperand::MO_Immediate:
1417       if (CalleeMO.getImm())
1418         CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1419       break;
1420     case MachineOperand::MO_ExternalSymbol:
1421     case MachineOperand::MO_GlobalAddress:
1422       CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1423                                            MCIL.GetSymbolFromOperand(CalleeMO));
1424       break;
1425     }
1426 
1427     // Emit MOV to materialize the target address and the CALL to target.
1428     // This is encoded with 12-13 bytes, depending on which register is used.
1429     Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1430     if (X86II::isX86_64ExtendedReg(ScratchReg))
1431       EncodedBytes = 13;
1432     else
1433       EncodedBytes = 12;
1434 
1435     EmitAndCountInstruction(
1436         MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1437     // FIXME: Add retpoline support and remove this.
1438     if (Subtarget->useIndirectThunkCalls())
1439       report_fatal_error(
1440           "Lowering patchpoint with thunks not yet implemented.");
1441     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1442   }
1443 
1444   // Emit padding.
1445   unsigned NumBytes = opers.getNumPatchBytes();
1446   assert(NumBytes >= EncodedBytes &&
1447          "Patchpoint can't request size less than the length of a call.");
1448 
1449   emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1450 }
1451 
1452 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1453                                               X86MCInstLower &MCIL) {
1454   assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1455 
1456   NoAutoPaddingScope NoPadScope(*OutStreamer);
1457 
1458   // We want to emit the following pattern, which follows the x86 calling
1459   // convention to prepare for the trampoline call to be patched in.
1460   //
1461   //   .p2align 1, ...
1462   // .Lxray_event_sled_N:
1463   //   jmp +N                        // jump across the instrumentation sled
1464   //   ...                           // set up arguments in register
1465   //   callq __xray_CustomEvent@plt  // force dependency to symbol
1466   //   ...
1467   //   <jump here>
1468   //
1469   // After patching, it would look something like:
1470   //
1471   //   nopw (2-byte nop)
1472   //   ...
1473   //   callq __xrayCustomEvent  // already lowered
1474   //   ...
1475   //
1476   // ---
1477   // First we emit the label and the jump.
1478   auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1479   OutStreamer->AddComment("# XRay Custom Event Log");
1480   OutStreamer->emitCodeAlignment(2);
1481   OutStreamer->emitLabel(CurSled);
1482 
1483   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1484   // an operand (computed as an offset from the jmp instruction).
1485   // FIXME: Find another less hacky way do force the relative jump.
1486   OutStreamer->emitBinaryData("\xeb\x0f");
1487 
1488   // The default C calling convention will place two arguments into %rcx and
1489   // %rdx -- so we only work with those.
1490   const Register DestRegs[] = {X86::RDI, X86::RSI};
1491   bool UsedMask[] = {false, false};
1492   // Filled out in loop.
1493   Register SrcRegs[] = {0, 0};
1494 
1495   // Then we put the operands in the %rdi and %rsi registers. We spill the
1496   // values in the register before we clobber them, and mark them as used in
1497   // UsedMask. In case the arguments are already in the correct register, we use
1498   // emit nops appropriately sized to keep the sled the same size in every
1499   // situation.
1500   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1501     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1502       assert(Op->isReg() && "Only support arguments in registers");
1503       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1504       if (SrcRegs[I] != DestRegs[I]) {
1505         UsedMask[I] = true;
1506         EmitAndCountInstruction(
1507             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1508       } else {
1509         emitX86Nops(*OutStreamer, 4, Subtarget);
1510       }
1511     }
1512 
1513   // Now that the register values are stashed, mov arguments into place.
1514   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1515   // earlier DestReg. We will have already overwritten over the register before
1516   // we can copy from it.
1517   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1518     if (SrcRegs[I] != DestRegs[I])
1519       EmitAndCountInstruction(
1520           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1521 
1522   // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1523   // name of the trampoline to be implemented by the XRay runtime.
1524   auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1525   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1526   if (isPositionIndependent())
1527     TOp.setTargetFlags(X86II::MO_PLT);
1528 
1529   // Emit the call instruction.
1530   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1531                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1532 
1533   // Restore caller-saved and used registers.
1534   for (unsigned I = sizeof UsedMask; I-- > 0;)
1535     if (UsedMask[I])
1536       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1537     else
1538       emitX86Nops(*OutStreamer, 1, Subtarget);
1539 
1540   OutStreamer->AddComment("xray custom event end.");
1541 
1542   // Record the sled version. Version 0 of this sled was spelled differently, so
1543   // we let the runtime handle the different offsets we're using. Version 2
1544   // changed the absolute address to a PC-relative address.
1545   recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1546 }
1547 
1548 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1549                                                     X86MCInstLower &MCIL) {
1550   assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1551 
1552   NoAutoPaddingScope NoPadScope(*OutStreamer);
1553 
1554   // We want to emit the following pattern, which follows the x86 calling
1555   // convention to prepare for the trampoline call to be patched in.
1556   //
1557   //   .p2align 1, ...
1558   // .Lxray_event_sled_N:
1559   //   jmp +N                        // jump across the instrumentation sled
1560   //   ...                           // set up arguments in register
1561   //   callq __xray_TypedEvent@plt  // force dependency to symbol
1562   //   ...
1563   //   <jump here>
1564   //
1565   // After patching, it would look something like:
1566   //
1567   //   nopw (2-byte nop)
1568   //   ...
1569   //   callq __xrayTypedEvent  // already lowered
1570   //   ...
1571   //
1572   // ---
1573   // First we emit the label and the jump.
1574   auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1575   OutStreamer->AddComment("# XRay Typed Event Log");
1576   OutStreamer->emitCodeAlignment(2);
1577   OutStreamer->emitLabel(CurSled);
1578 
1579   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1580   // an operand (computed as an offset from the jmp instruction).
1581   // FIXME: Find another less hacky way do force the relative jump.
1582   OutStreamer->emitBinaryData("\xeb\x14");
1583 
1584   // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1585   // so we'll work with those. Or we may be called via SystemV, in which case
1586   // we don't have to do any translation.
1587   const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1588   bool UsedMask[] = {false, false, false};
1589 
1590   // Will fill out src regs in the loop.
1591   Register SrcRegs[] = {0, 0, 0};
1592 
1593   // Then we put the operands in the SystemV registers. We spill the values in
1594   // the registers before we clobber them, and mark them as used in UsedMask.
1595   // In case the arguments are already in the correct register, we emit nops
1596   // appropriately sized to keep the sled the same size in every situation.
1597   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1598     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1599       // TODO: Is register only support adequate?
1600       assert(Op->isReg() && "Only supports arguments in registers");
1601       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1602       if (SrcRegs[I] != DestRegs[I]) {
1603         UsedMask[I] = true;
1604         EmitAndCountInstruction(
1605             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1606       } else {
1607         emitX86Nops(*OutStreamer, 4, Subtarget);
1608       }
1609     }
1610 
1611   // In the above loop we only stash all of the destination registers or emit
1612   // nops if the arguments are already in the right place. Doing the actually
1613   // moving is postponed until after all the registers are stashed so nothing
1614   // is clobbers. We've already added nops to account for the size of mov and
1615   // push if the register is in the right place, so we only have to worry about
1616   // emitting movs.
1617   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1618   // earlier DestReg. We will have already overwritten over the register before
1619   // we can copy from it.
1620   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1621     if (UsedMask[I])
1622       EmitAndCountInstruction(
1623           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1624 
1625   // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1626   // name of the trampoline to be implemented by the XRay runtime.
1627   auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1628   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1629   if (isPositionIndependent())
1630     TOp.setTargetFlags(X86II::MO_PLT);
1631 
1632   // Emit the call instruction.
1633   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1634                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1635 
1636   // Restore caller-saved and used registers.
1637   for (unsigned I = sizeof UsedMask; I-- > 0;)
1638     if (UsedMask[I])
1639       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1640     else
1641       emitX86Nops(*OutStreamer, 1, Subtarget);
1642 
1643   OutStreamer->AddComment("xray typed event end.");
1644 
1645   // Record the sled version.
1646   recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1647 }
1648 
1649 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1650                                                   X86MCInstLower &MCIL) {
1651 
1652   NoAutoPaddingScope NoPadScope(*OutStreamer);
1653 
1654   const Function &F = MF->getFunction();
1655   if (F.hasFnAttribute("patchable-function-entry")) {
1656     unsigned Num;
1657     if (F.getFnAttribute("patchable-function-entry")
1658             .getValueAsString()
1659             .getAsInteger(10, Num))
1660       return;
1661     emitX86Nops(*OutStreamer, Num, Subtarget);
1662     return;
1663   }
1664   // We want to emit the following pattern:
1665   //
1666   //   .p2align 1, ...
1667   // .Lxray_sled_N:
1668   //   jmp .tmpN
1669   //   # 9 bytes worth of noops
1670   //
1671   // We need the 9 bytes because at runtime, we'd be patching over the full 11
1672   // bytes with the following pattern:
1673   //
1674   //   mov %r10, <function id, 32-bit>   // 6 bytes
1675   //   call <relative offset, 32-bits>   // 5 bytes
1676   //
1677   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1678   OutStreamer->emitCodeAlignment(2);
1679   OutStreamer->emitLabel(CurSled);
1680 
1681   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1682   // an operand (computed as an offset from the jmp instruction).
1683   // FIXME: Find another less hacky way do force the relative jump.
1684   OutStreamer->emitBytes("\xeb\x09");
1685   emitX86Nops(*OutStreamer, 9, Subtarget);
1686   recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
1687 }
1688 
1689 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1690                                        X86MCInstLower &MCIL) {
1691   NoAutoPaddingScope NoPadScope(*OutStreamer);
1692 
1693   // Since PATCHABLE_RET takes the opcode of the return statement as an
1694   // argument, we use that to emit the correct form of the RET that we want.
1695   // i.e. when we see this:
1696   //
1697   //   PATCHABLE_RET X86::RET ...
1698   //
1699   // We should emit the RET followed by sleds.
1700   //
1701   //   .p2align 1, ...
1702   // .Lxray_sled_N:
1703   //   ret  # or equivalent instruction
1704   //   # 10 bytes worth of noops
1705   //
1706   // This just makes sure that the alignment for the next instruction is 2.
1707   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1708   OutStreamer->emitCodeAlignment(2);
1709   OutStreamer->emitLabel(CurSled);
1710   unsigned OpCode = MI.getOperand(0).getImm();
1711   MCInst Ret;
1712   Ret.setOpcode(OpCode);
1713   for (auto &MO : drop_begin(MI.operands()))
1714     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1715       Ret.addOperand(MaybeOperand.getValue());
1716   OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1717   emitX86Nops(*OutStreamer, 10, Subtarget);
1718   recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
1719 }
1720 
1721 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1722                                              X86MCInstLower &MCIL) {
1723   NoAutoPaddingScope NoPadScope(*OutStreamer);
1724 
1725   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1726   // instruction so we lower that particular instruction and its operands.
1727   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1728   // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1729   // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1730   // tail call much like how we have it in PATCHABLE_RET.
1731   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1732   OutStreamer->emitCodeAlignment(2);
1733   OutStreamer->emitLabel(CurSled);
1734   auto Target = OutContext.createTempSymbol();
1735 
1736   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1737   // an operand (computed as an offset from the jmp instruction).
1738   // FIXME: Find another less hacky way do force the relative jump.
1739   OutStreamer->emitBytes("\xeb\x09");
1740   emitX86Nops(*OutStreamer, 9, Subtarget);
1741   OutStreamer->emitLabel(Target);
1742   recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1743 
1744   unsigned OpCode = MI.getOperand(0).getImm();
1745   OpCode = convertTailJumpOpcode(OpCode);
1746   MCInst TC;
1747   TC.setOpcode(OpCode);
1748 
1749   // Before emitting the instruction, add a comment to indicate that this is
1750   // indeed a tail call.
1751   OutStreamer->AddComment("TAILCALL");
1752   for (auto &MO : drop_begin(MI.operands()))
1753     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1754       TC.addOperand(MaybeOperand.getValue());
1755   OutStreamer->emitInstruction(TC, getSubtargetInfo());
1756 }
1757 
1758 // Returns instruction preceding MBBI in MachineFunction.
1759 // If MBBI is the first instruction of the first basic block, returns null.
1760 static MachineBasicBlock::const_iterator
1761 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1762   const MachineBasicBlock *MBB = MBBI->getParent();
1763   while (MBBI == MBB->begin()) {
1764     if (MBB == &MBB->getParent()->front())
1765       return MachineBasicBlock::const_iterator();
1766     MBB = MBB->getPrevNode();
1767     MBBI = MBB->end();
1768   }
1769   --MBBI;
1770   return MBBI;
1771 }
1772 
1773 static const Constant *getConstantFromPool(const MachineInstr &MI,
1774                                            const MachineOperand &Op) {
1775   if (!Op.isCPI() || Op.getOffset() != 0)
1776     return nullptr;
1777 
1778   ArrayRef<MachineConstantPoolEntry> Constants =
1779       MI.getParent()->getParent()->getConstantPool()->getConstants();
1780   const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1781 
1782   // Bail if this is a machine constant pool entry, we won't be able to dig out
1783   // anything useful.
1784   if (ConstantEntry.isMachineConstantPoolEntry())
1785     return nullptr;
1786 
1787   return ConstantEntry.Val.ConstVal;
1788 }
1789 
1790 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1791                                      unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1792   std::string Comment;
1793 
1794   // Compute the name for a register. This is really goofy because we have
1795   // multiple instruction printers that could (in theory) use different
1796   // names. Fortunately most people use the ATT style (outside of Windows)
1797   // and they actually agree on register naming here. Ultimately, this is
1798   // a comment, and so its OK if it isn't perfect.
1799   auto GetRegisterName = [](unsigned RegNum) -> StringRef {
1800     return X86ATTInstPrinter::getRegisterName(RegNum);
1801   };
1802 
1803   const MachineOperand &DstOp = MI->getOperand(0);
1804   const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1805   const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1806 
1807   StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1808   StringRef Src1Name =
1809       SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1810   StringRef Src2Name =
1811       SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1812 
1813   // One source operand, fix the mask to print all elements in one span.
1814   SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
1815   if (Src1Name == Src2Name)
1816     for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1817       if (ShuffleMask[i] >= e)
1818         ShuffleMask[i] -= e;
1819 
1820   raw_string_ostream CS(Comment);
1821   CS << DstName;
1822 
1823   // Handle AVX512 MASK/MASXZ write mask comments.
1824   // MASK: zmmX {%kY}
1825   // MASKZ: zmmX {%kY} {z}
1826   if (SrcOp1Idx > 1) {
1827     assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1828 
1829     const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1830     if (WriteMaskOp.isReg()) {
1831       CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1832 
1833       if (SrcOp1Idx == 2) {
1834         CS << " {z}";
1835       }
1836     }
1837   }
1838 
1839   CS << " = ";
1840 
1841   for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1842     if (i != 0)
1843       CS << ",";
1844     if (ShuffleMask[i] == SM_SentinelZero) {
1845       CS << "zero";
1846       continue;
1847     }
1848 
1849     // Otherwise, it must come from src1 or src2.  Print the span of elements
1850     // that comes from this src.
1851     bool isSrc1 = ShuffleMask[i] < (int)e;
1852     CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1853 
1854     bool IsFirst = true;
1855     while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1856            (ShuffleMask[i] < (int)e) == isSrc1) {
1857       if (!IsFirst)
1858         CS << ',';
1859       else
1860         IsFirst = false;
1861       if (ShuffleMask[i] == SM_SentinelUndef)
1862         CS << "u";
1863       else
1864         CS << ShuffleMask[i] % (int)e;
1865       ++i;
1866     }
1867     CS << ']';
1868     --i; // For loop increments element #.
1869   }
1870   CS.flush();
1871 
1872   return Comment;
1873 }
1874 
1875 static void printConstant(const APInt &Val, raw_ostream &CS) {
1876   if (Val.getBitWidth() <= 64) {
1877     CS << Val.getZExtValue();
1878   } else {
1879     // print multi-word constant as (w0,w1)
1880     CS << "(";
1881     for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1882       if (i > 0)
1883         CS << ",";
1884       CS << Val.getRawData()[i];
1885     }
1886     CS << ")";
1887   }
1888 }
1889 
1890 static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1891   SmallString<32> Str;
1892   // Force scientific notation to distinquish from integers.
1893   Flt.toString(Str, 0, 0);
1894   CS << Str;
1895 }
1896 
1897 static void printConstant(const Constant *COp, raw_ostream &CS) {
1898   if (isa<UndefValue>(COp)) {
1899     CS << "u";
1900   } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1901     printConstant(CI->getValue(), CS);
1902   } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1903     printConstant(CF->getValueAPF(), CS);
1904   } else {
1905     CS << "?";
1906   }
1907 }
1908 
1909 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1910   assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1911   assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
1912 
1913   // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1914   if (EmitFPOData) {
1915     X86TargetStreamer *XTS =
1916         static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1917     switch (MI->getOpcode()) {
1918     case X86::SEH_PushReg:
1919       XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1920       break;
1921     case X86::SEH_StackAlloc:
1922       XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1923       break;
1924     case X86::SEH_StackAlign:
1925       XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1926       break;
1927     case X86::SEH_SetFrame:
1928       assert(MI->getOperand(1).getImm() == 0 &&
1929              ".cv_fpo_setframe takes no offset");
1930       XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1931       break;
1932     case X86::SEH_EndPrologue:
1933       XTS->emitFPOEndPrologue();
1934       break;
1935     case X86::SEH_SaveReg:
1936     case X86::SEH_SaveXMM:
1937     case X86::SEH_PushFrame:
1938       llvm_unreachable("SEH_ directive incompatible with FPO");
1939       break;
1940     default:
1941       llvm_unreachable("expected SEH_ instruction");
1942     }
1943     return;
1944   }
1945 
1946   // Otherwise, use the .seh_ directives for all other Windows platforms.
1947   switch (MI->getOpcode()) {
1948   case X86::SEH_PushReg:
1949     OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm());
1950     break;
1951 
1952   case X86::SEH_SaveReg:
1953     OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(),
1954                                    MI->getOperand(1).getImm());
1955     break;
1956 
1957   case X86::SEH_SaveXMM:
1958     OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(),
1959                                    MI->getOperand(1).getImm());
1960     break;
1961 
1962   case X86::SEH_StackAlloc:
1963     OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
1964     break;
1965 
1966   case X86::SEH_SetFrame:
1967     OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(),
1968                                     MI->getOperand(1).getImm());
1969     break;
1970 
1971   case X86::SEH_PushFrame:
1972     OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
1973     break;
1974 
1975   case X86::SEH_EndPrologue:
1976     OutStreamer->EmitWinCFIEndProlog();
1977     break;
1978 
1979   default:
1980     llvm_unreachable("expected SEH_ instruction");
1981   }
1982 }
1983 
1984 static unsigned getRegisterWidth(const MCOperandInfo &Info) {
1985   if (Info.RegClass == X86::VR128RegClassID ||
1986       Info.RegClass == X86::VR128XRegClassID)
1987     return 128;
1988   if (Info.RegClass == X86::VR256RegClassID ||
1989       Info.RegClass == X86::VR256XRegClassID)
1990     return 256;
1991   if (Info.RegClass == X86::VR512RegClassID)
1992     return 512;
1993   llvm_unreachable("Unknown register class!");
1994 }
1995 
1996 static void addConstantComments(const MachineInstr *MI,
1997                                 MCStreamer &OutStreamer) {
1998   switch (MI->getOpcode()) {
1999   // Lower PSHUFB and VPERMILP normally but add a comment if we can find
2000   // a constant shuffle mask. We won't be able to do this at the MC layer
2001   // because the mask isn't an immediate.
2002   case X86::PSHUFBrm:
2003   case X86::VPSHUFBrm:
2004   case X86::VPSHUFBYrm:
2005   case X86::VPSHUFBZ128rm:
2006   case X86::VPSHUFBZ128rmk:
2007   case X86::VPSHUFBZ128rmkz:
2008   case X86::VPSHUFBZ256rm:
2009   case X86::VPSHUFBZ256rmk:
2010   case X86::VPSHUFBZ256rmkz:
2011   case X86::VPSHUFBZrm:
2012   case X86::VPSHUFBZrmk:
2013   case X86::VPSHUFBZrmkz: {
2014     unsigned SrcIdx = 1;
2015     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2016       // Skip mask operand.
2017       ++SrcIdx;
2018       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2019         // Skip passthru operand.
2020         ++SrcIdx;
2021       }
2022     }
2023     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2024 
2025     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2026            "Unexpected number of operands!");
2027 
2028     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2029     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2030       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2031       SmallVector<int, 64> Mask;
2032       DecodePSHUFBMask(C, Width, Mask);
2033       if (!Mask.empty())
2034         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2035     }
2036     break;
2037   }
2038 
2039   case X86::VPERMILPSrm:
2040   case X86::VPERMILPSYrm:
2041   case X86::VPERMILPSZ128rm:
2042   case X86::VPERMILPSZ128rmk:
2043   case X86::VPERMILPSZ128rmkz:
2044   case X86::VPERMILPSZ256rm:
2045   case X86::VPERMILPSZ256rmk:
2046   case X86::VPERMILPSZ256rmkz:
2047   case X86::VPERMILPSZrm:
2048   case X86::VPERMILPSZrmk:
2049   case X86::VPERMILPSZrmkz:
2050   case X86::VPERMILPDrm:
2051   case X86::VPERMILPDYrm:
2052   case X86::VPERMILPDZ128rm:
2053   case X86::VPERMILPDZ128rmk:
2054   case X86::VPERMILPDZ128rmkz:
2055   case X86::VPERMILPDZ256rm:
2056   case X86::VPERMILPDZ256rmk:
2057   case X86::VPERMILPDZ256rmkz:
2058   case X86::VPERMILPDZrm:
2059   case X86::VPERMILPDZrmk:
2060   case X86::VPERMILPDZrmkz: {
2061     unsigned ElSize;
2062     switch (MI->getOpcode()) {
2063     default: llvm_unreachable("Invalid opcode");
2064     case X86::VPERMILPSrm:
2065     case X86::VPERMILPSYrm:
2066     case X86::VPERMILPSZ128rm:
2067     case X86::VPERMILPSZ256rm:
2068     case X86::VPERMILPSZrm:
2069     case X86::VPERMILPSZ128rmkz:
2070     case X86::VPERMILPSZ256rmkz:
2071     case X86::VPERMILPSZrmkz:
2072     case X86::VPERMILPSZ128rmk:
2073     case X86::VPERMILPSZ256rmk:
2074     case X86::VPERMILPSZrmk:
2075       ElSize = 32;
2076       break;
2077     case X86::VPERMILPDrm:
2078     case X86::VPERMILPDYrm:
2079     case X86::VPERMILPDZ128rm:
2080     case X86::VPERMILPDZ256rm:
2081     case X86::VPERMILPDZrm:
2082     case X86::VPERMILPDZ128rmkz:
2083     case X86::VPERMILPDZ256rmkz:
2084     case X86::VPERMILPDZrmkz:
2085     case X86::VPERMILPDZ128rmk:
2086     case X86::VPERMILPDZ256rmk:
2087     case X86::VPERMILPDZrmk:
2088       ElSize = 64;
2089       break;
2090     }
2091 
2092     unsigned SrcIdx = 1;
2093     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2094       // Skip mask operand.
2095       ++SrcIdx;
2096       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2097         // Skip passthru operand.
2098         ++SrcIdx;
2099       }
2100     }
2101     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2102 
2103     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2104            "Unexpected number of operands!");
2105 
2106     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2107     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2108       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2109       SmallVector<int, 16> Mask;
2110       DecodeVPERMILPMask(C, ElSize, Width, Mask);
2111       if (!Mask.empty())
2112         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2113     }
2114     break;
2115   }
2116 
2117   case X86::VPERMIL2PDrm:
2118   case X86::VPERMIL2PSrm:
2119   case X86::VPERMIL2PDYrm:
2120   case X86::VPERMIL2PSYrm: {
2121     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
2122            "Unexpected number of operands!");
2123 
2124     const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
2125     if (!CtrlOp.isImm())
2126       break;
2127 
2128     unsigned ElSize;
2129     switch (MI->getOpcode()) {
2130     default: llvm_unreachable("Invalid opcode");
2131     case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
2132     case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
2133     }
2134 
2135     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2136     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2137       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2138       SmallVector<int, 16> Mask;
2139       DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
2140       if (!Mask.empty())
2141         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2142     }
2143     break;
2144   }
2145 
2146   case X86::VPPERMrrm: {
2147     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
2148            "Unexpected number of operands!");
2149 
2150     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2151     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2152       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2153       SmallVector<int, 16> Mask;
2154       DecodeVPPERMMask(C, Width, Mask);
2155       if (!Mask.empty())
2156         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2157     }
2158     break;
2159   }
2160 
2161   case X86::MMX_MOVQ64rm: {
2162     assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
2163            "Unexpected number of operands!");
2164     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2165       std::string Comment;
2166       raw_string_ostream CS(Comment);
2167       const MachineOperand &DstOp = MI->getOperand(0);
2168       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2169       if (auto *CF = dyn_cast<ConstantFP>(C)) {
2170         CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false);
2171         OutStreamer.AddComment(CS.str());
2172       }
2173     }
2174     break;
2175   }
2176 
2177 #define MOV_CASE(Prefix, Suffix)                                               \
2178   case X86::Prefix##MOVAPD##Suffix##rm:                                        \
2179   case X86::Prefix##MOVAPS##Suffix##rm:                                        \
2180   case X86::Prefix##MOVUPD##Suffix##rm:                                        \
2181   case X86::Prefix##MOVUPS##Suffix##rm:                                        \
2182   case X86::Prefix##MOVDQA##Suffix##rm:                                        \
2183   case X86::Prefix##MOVDQU##Suffix##rm:
2184 
2185 #define MOV_AVX512_CASE(Suffix)                                                \
2186   case X86::VMOVDQA64##Suffix##rm:                                             \
2187   case X86::VMOVDQA32##Suffix##rm:                                             \
2188   case X86::VMOVDQU64##Suffix##rm:                                             \
2189   case X86::VMOVDQU32##Suffix##rm:                                             \
2190   case X86::VMOVDQU16##Suffix##rm:                                             \
2191   case X86::VMOVDQU8##Suffix##rm:                                              \
2192   case X86::VMOVAPS##Suffix##rm:                                               \
2193   case X86::VMOVAPD##Suffix##rm:                                               \
2194   case X86::VMOVUPS##Suffix##rm:                                               \
2195   case X86::VMOVUPD##Suffix##rm:
2196 
2197 #define CASE_ALL_MOV_RM()                                                      \
2198   MOV_CASE(, )   /* SSE */                                                     \
2199   MOV_CASE(V, )  /* AVX-128 */                                                 \
2200   MOV_CASE(V, Y) /* AVX-256 */                                                 \
2201   MOV_AVX512_CASE(Z)                                                           \
2202   MOV_AVX512_CASE(Z256)                                                        \
2203   MOV_AVX512_CASE(Z128)
2204 
2205     // For loads from a constant pool to a vector register, print the constant
2206     // loaded.
2207     CASE_ALL_MOV_RM()
2208   case X86::VBROADCASTF128:
2209   case X86::VBROADCASTI128:
2210   case X86::VBROADCASTF32X4Z256rm:
2211   case X86::VBROADCASTF32X4rm:
2212   case X86::VBROADCASTF32X8rm:
2213   case X86::VBROADCASTF64X2Z128rm:
2214   case X86::VBROADCASTF64X2rm:
2215   case X86::VBROADCASTF64X4rm:
2216   case X86::VBROADCASTI32X4Z256rm:
2217   case X86::VBROADCASTI32X4rm:
2218   case X86::VBROADCASTI32X8rm:
2219   case X86::VBROADCASTI64X2Z128rm:
2220   case X86::VBROADCASTI64X2rm:
2221   case X86::VBROADCASTI64X4rm:
2222     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2223            "Unexpected number of operands!");
2224     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2225       int NumLanes = 1;
2226       // Override NumLanes for the broadcast instructions.
2227       switch (MI->getOpcode()) {
2228       case X86::VBROADCASTF128:        NumLanes = 2; break;
2229       case X86::VBROADCASTI128:        NumLanes = 2; break;
2230       case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
2231       case X86::VBROADCASTF32X4rm:     NumLanes = 4; break;
2232       case X86::VBROADCASTF32X8rm:     NumLanes = 2; break;
2233       case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
2234       case X86::VBROADCASTF64X2rm:     NumLanes = 4; break;
2235       case X86::VBROADCASTF64X4rm:     NumLanes = 2; break;
2236       case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
2237       case X86::VBROADCASTI32X4rm:     NumLanes = 4; break;
2238       case X86::VBROADCASTI32X8rm:     NumLanes = 2; break;
2239       case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
2240       case X86::VBROADCASTI64X2rm:     NumLanes = 4; break;
2241       case X86::VBROADCASTI64X4rm:     NumLanes = 2; break;
2242       }
2243 
2244       std::string Comment;
2245       raw_string_ostream CS(Comment);
2246       const MachineOperand &DstOp = MI->getOperand(0);
2247       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2248       if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2249         CS << "[";
2250         for (int l = 0; l != NumLanes; ++l) {
2251           for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2252                ++i) {
2253             if (i != 0 || l != 0)
2254               CS << ",";
2255             if (CDS->getElementType()->isIntegerTy())
2256               printConstant(CDS->getElementAsAPInt(i), CS);
2257             else if (CDS->getElementType()->isHalfTy() ||
2258                      CDS->getElementType()->isFloatTy() ||
2259                      CDS->getElementType()->isDoubleTy())
2260               printConstant(CDS->getElementAsAPFloat(i), CS);
2261             else
2262               CS << "?";
2263           }
2264         }
2265         CS << "]";
2266         OutStreamer.AddComment(CS.str());
2267       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
2268         CS << "<";
2269         for (int l = 0; l != NumLanes; ++l) {
2270           for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2271                ++i) {
2272             if (i != 0 || l != 0)
2273               CS << ",";
2274             printConstant(CV->getOperand(i), CS);
2275           }
2276         }
2277         CS << ">";
2278         OutStreamer.AddComment(CS.str());
2279       }
2280     }
2281     break;
2282 
2283   case X86::MOVDDUPrm:
2284   case X86::VMOVDDUPrm:
2285   case X86::VMOVDDUPZ128rm:
2286   case X86::VBROADCASTSSrm:
2287   case X86::VBROADCASTSSYrm:
2288   case X86::VBROADCASTSSZ128rm:
2289   case X86::VBROADCASTSSZ256rm:
2290   case X86::VBROADCASTSSZrm:
2291   case X86::VBROADCASTSDYrm:
2292   case X86::VBROADCASTSDZ256rm:
2293   case X86::VBROADCASTSDZrm:
2294   case X86::VPBROADCASTBrm:
2295   case X86::VPBROADCASTBYrm:
2296   case X86::VPBROADCASTBZ128rm:
2297   case X86::VPBROADCASTBZ256rm:
2298   case X86::VPBROADCASTBZrm:
2299   case X86::VPBROADCASTDrm:
2300   case X86::VPBROADCASTDYrm:
2301   case X86::VPBROADCASTDZ128rm:
2302   case X86::VPBROADCASTDZ256rm:
2303   case X86::VPBROADCASTDZrm:
2304   case X86::VPBROADCASTQrm:
2305   case X86::VPBROADCASTQYrm:
2306   case X86::VPBROADCASTQZ128rm:
2307   case X86::VPBROADCASTQZ256rm:
2308   case X86::VPBROADCASTQZrm:
2309   case X86::VPBROADCASTWrm:
2310   case X86::VPBROADCASTWYrm:
2311   case X86::VPBROADCASTWZ128rm:
2312   case X86::VPBROADCASTWZ256rm:
2313   case X86::VPBROADCASTWZrm:
2314     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2315            "Unexpected number of operands!");
2316     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2317       int NumElts;
2318       switch (MI->getOpcode()) {
2319       default: llvm_unreachable("Invalid opcode");
2320       case X86::MOVDDUPrm:          NumElts = 2;  break;
2321       case X86::VMOVDDUPrm:         NumElts = 2;  break;
2322       case X86::VMOVDDUPZ128rm:     NumElts = 2;  break;
2323       case X86::VBROADCASTSSrm:     NumElts = 4;  break;
2324       case X86::VBROADCASTSSYrm:    NumElts = 8;  break;
2325       case X86::VBROADCASTSSZ128rm: NumElts = 4;  break;
2326       case X86::VBROADCASTSSZ256rm: NumElts = 8;  break;
2327       case X86::VBROADCASTSSZrm:    NumElts = 16; break;
2328       case X86::VBROADCASTSDYrm:    NumElts = 4;  break;
2329       case X86::VBROADCASTSDZ256rm: NumElts = 4;  break;
2330       case X86::VBROADCASTSDZrm:    NumElts = 8;  break;
2331       case X86::VPBROADCASTBrm:     NumElts = 16; break;
2332       case X86::VPBROADCASTBYrm:    NumElts = 32; break;
2333       case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
2334       case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
2335       case X86::VPBROADCASTBZrm:    NumElts = 64; break;
2336       case X86::VPBROADCASTDrm:     NumElts = 4;  break;
2337       case X86::VPBROADCASTDYrm:    NumElts = 8;  break;
2338       case X86::VPBROADCASTDZ128rm: NumElts = 4;  break;
2339       case X86::VPBROADCASTDZ256rm: NumElts = 8;  break;
2340       case X86::VPBROADCASTDZrm:    NumElts = 16; break;
2341       case X86::VPBROADCASTQrm:     NumElts = 2;  break;
2342       case X86::VPBROADCASTQYrm:    NumElts = 4;  break;
2343       case X86::VPBROADCASTQZ128rm: NumElts = 2;  break;
2344       case X86::VPBROADCASTQZ256rm: NumElts = 4;  break;
2345       case X86::VPBROADCASTQZrm:    NumElts = 8;  break;
2346       case X86::VPBROADCASTWrm:     NumElts = 8;  break;
2347       case X86::VPBROADCASTWYrm:    NumElts = 16; break;
2348       case X86::VPBROADCASTWZ128rm: NumElts = 8;  break;
2349       case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
2350       case X86::VPBROADCASTWZrm:    NumElts = 32; break;
2351       }
2352 
2353       std::string Comment;
2354       raw_string_ostream CS(Comment);
2355       const MachineOperand &DstOp = MI->getOperand(0);
2356       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2357       CS << "[";
2358       for (int i = 0; i != NumElts; ++i) {
2359         if (i != 0)
2360           CS << ",";
2361         printConstant(C, CS);
2362       }
2363       CS << "]";
2364       OutStreamer.AddComment(CS.str());
2365     }
2366   }
2367 }
2368 
2369 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2370   X86MCInstLower MCInstLowering(*MF, *this);
2371   const X86RegisterInfo *RI =
2372       MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2373 
2374   // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
2375   // are compressed from EVEX encoding to VEX encoding.
2376   if (TM.Options.MCOptions.ShowMCEncoding) {
2377     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2378       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2379   }
2380 
2381   // Add comments for values loaded from constant pool.
2382   if (OutStreamer->isVerboseAsm())
2383     addConstantComments(MI, *OutStreamer);
2384 
2385   switch (MI->getOpcode()) {
2386   case TargetOpcode::DBG_VALUE:
2387     llvm_unreachable("Should be handled target independently");
2388 
2389   // Emit nothing here but a comment if we can.
2390   case X86::Int_MemBarrier:
2391     OutStreamer->emitRawComment("MEMBARRIER");
2392     return;
2393 
2394   case X86::EH_RETURN:
2395   case X86::EH_RETURN64: {
2396     // Lower these as normal, but add some comments.
2397     Register Reg = MI->getOperand(0).getReg();
2398     OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2399                             X86ATTInstPrinter::getRegisterName(Reg));
2400     break;
2401   }
2402   case X86::CLEANUPRET: {
2403     // Lower these as normal, but add some comments.
2404     OutStreamer->AddComment("CLEANUPRET");
2405     break;
2406   }
2407 
2408   case X86::CATCHRET: {
2409     // Lower these as normal, but add some comments.
2410     OutStreamer->AddComment("CATCHRET");
2411     break;
2412   }
2413 
2414   case X86::ENDBR32:
2415   case X86::ENDBR64: {
2416     // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2417     // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2418     // non-empty. If MI is the initial ENDBR, place the
2419     // __patchable_function_entries label after ENDBR.
2420     if (CurrentPatchableFunctionEntrySym &&
2421         CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2422         MI == &MF->front().front()) {
2423       MCInst Inst;
2424       MCInstLowering.Lower(MI, Inst);
2425       EmitAndCountInstruction(Inst);
2426       CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
2427       OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
2428       return;
2429     }
2430     break;
2431   }
2432 
2433   case X86::TAILJMPr:
2434   case X86::TAILJMPm:
2435   case X86::TAILJMPd:
2436   case X86::TAILJMPd_CC:
2437   case X86::TAILJMPr64:
2438   case X86::TAILJMPm64:
2439   case X86::TAILJMPd64:
2440   case X86::TAILJMPd64_CC:
2441   case X86::TAILJMPr64_REX:
2442   case X86::TAILJMPm64_REX:
2443     // Lower these as normal, but add some comments.
2444     OutStreamer->AddComment("TAILCALL");
2445     break;
2446 
2447   case X86::TLS_addr32:
2448   case X86::TLS_addr64:
2449   case X86::TLS_addrX32:
2450   case X86::TLS_base_addr32:
2451   case X86::TLS_base_addr64:
2452   case X86::TLS_base_addrX32:
2453     return LowerTlsAddr(MCInstLowering, *MI);
2454 
2455   case X86::MOVPC32r: {
2456     // This is a pseudo op for a two instruction sequence with a label, which
2457     // looks like:
2458     //     call "L1$pb"
2459     // "L1$pb":
2460     //     popl %esi
2461 
2462     // Emit the call.
2463     MCSymbol *PICBase = MF->getPICBaseSymbol();
2464     // FIXME: We would like an efficient form for this, so we don't have to do a
2465     // lot of extra uniquing.
2466     EmitAndCountInstruction(
2467         MCInstBuilder(X86::CALLpcrel32)
2468             .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2469 
2470     const X86FrameLowering *FrameLowering =
2471         MF->getSubtarget<X86Subtarget>().getFrameLowering();
2472     bool hasFP = FrameLowering->hasFP(*MF);
2473 
2474     // TODO: This is needed only if we require precise CFA.
2475     bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2476                                !OutStreamer->getDwarfFrameInfos().back().End;
2477 
2478     int stackGrowth = -RI->getSlotSize();
2479 
2480     if (HasActiveDwarfFrame && !hasFP) {
2481       OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2482     }
2483 
2484     // Emit the label.
2485     OutStreamer->emitLabel(PICBase);
2486 
2487     // popl $reg
2488     EmitAndCountInstruction(
2489         MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2490 
2491     if (HasActiveDwarfFrame && !hasFP) {
2492       OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2493     }
2494     return;
2495   }
2496 
2497   case X86::ADD32ri: {
2498     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2499     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2500       break;
2501 
2502     // Okay, we have something like:
2503     //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2504 
2505     // For this, we want to print something like:
2506     //   MYGLOBAL + (. - PICBASE)
2507     // However, we can't generate a ".", so just emit a new label here and refer
2508     // to it.
2509     MCSymbol *DotSym = OutContext.createTempSymbol();
2510     OutStreamer->emitLabel(DotSym);
2511 
2512     // Now that we have emitted the label, lower the complex operand expression.
2513     MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2514 
2515     const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2516     const MCExpr *PICBase =
2517         MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2518     DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2519 
2520     DotExpr = MCBinaryExpr::createAdd(
2521         MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
2522 
2523     EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2524                                 .addReg(MI->getOperand(0).getReg())
2525                                 .addReg(MI->getOperand(1).getReg())
2526                                 .addExpr(DotExpr));
2527     return;
2528   }
2529   case TargetOpcode::STATEPOINT:
2530     return LowerSTATEPOINT(*MI, MCInstLowering);
2531 
2532   case TargetOpcode::FAULTING_OP:
2533     return LowerFAULTING_OP(*MI, MCInstLowering);
2534 
2535   case TargetOpcode::FENTRY_CALL:
2536     return LowerFENTRY_CALL(*MI, MCInstLowering);
2537 
2538   case TargetOpcode::PATCHABLE_OP:
2539     return LowerPATCHABLE_OP(*MI, MCInstLowering);
2540 
2541   case TargetOpcode::STACKMAP:
2542     return LowerSTACKMAP(*MI);
2543 
2544   case TargetOpcode::PATCHPOINT:
2545     return LowerPATCHPOINT(*MI, MCInstLowering);
2546 
2547   case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2548     return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2549 
2550   case TargetOpcode::PATCHABLE_RET:
2551     return LowerPATCHABLE_RET(*MI, MCInstLowering);
2552 
2553   case TargetOpcode::PATCHABLE_TAIL_CALL:
2554     return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2555 
2556   case TargetOpcode::PATCHABLE_EVENT_CALL:
2557     return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2558 
2559   case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2560     return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2561 
2562   case X86::MORESTACK_RET:
2563     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2564     return;
2565 
2566   case X86::MORESTACK_RET_RESTORE_R10:
2567     // Return, then restore R10.
2568     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2569     EmitAndCountInstruction(
2570         MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2571     return;
2572 
2573   case X86::SEH_PushReg:
2574   case X86::SEH_SaveReg:
2575   case X86::SEH_SaveXMM:
2576   case X86::SEH_StackAlloc:
2577   case X86::SEH_StackAlign:
2578   case X86::SEH_SetFrame:
2579   case X86::SEH_PushFrame:
2580   case X86::SEH_EndPrologue:
2581     EmitSEHInstruction(MI);
2582     return;
2583 
2584   case X86::SEH_Epilogue: {
2585     assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2586     MachineBasicBlock::const_iterator MBBI(MI);
2587     // Check if preceded by a call and emit nop if so.
2588     for (MBBI = PrevCrossBBInst(MBBI);
2589          MBBI != MachineBasicBlock::const_iterator();
2590          MBBI = PrevCrossBBInst(MBBI)) {
2591       // Conservatively assume that pseudo instructions don't emit code and keep
2592       // looking for a call. We may emit an unnecessary nop in some cases.
2593       if (!MBBI->isPseudo()) {
2594         if (MBBI->isCall())
2595           EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2596         break;
2597       }
2598     }
2599     return;
2600   }
2601   case X86::UBSAN_UD1:
2602     EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2603                                 .addReg(X86::EAX)
2604                                 .addReg(X86::EAX)
2605                                 .addImm(1)
2606                                 .addReg(X86::NoRegister)
2607                                 .addImm(MI->getOperand(0).getImm())
2608                                 .addReg(X86::NoRegister));
2609     return;
2610   }
2611 
2612   MCInst TmpInst;
2613   MCInstLowering.Lower(MI, TmpInst);
2614 
2615   // Stackmap shadows cannot include branch targets, so we can count the bytes
2616   // in a call towards the shadow, but must ensure that the no thread returns
2617   // in to the stackmap shadow.  The only way to achieve this is if the call
2618   // is at the end of the shadow.
2619   if (MI->isCall()) {
2620     // Count then size of the call towards the shadow
2621     SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2622     // Then flush the shadow so that we fill with nops before the call, not
2623     // after it.
2624     SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2625     // Then emit the call
2626     OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
2627     return;
2628   }
2629 
2630   EmitAndCountInstruction(TmpInst);
2631 }
2632