xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp (revision a2464ee12761660f50d0b6f59f233949ebcacc87)
1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains code to lower X86 MachineInstrs to their corresponding
10 // MCInst records.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/X86ATTInstPrinter.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86InstComments.h"
17 #include "MCTargetDesc/X86ShuffleDecode.h"
18 #include "MCTargetDesc/X86TargetStreamer.h"
19 #include "X86AsmPrinter.h"
20 #include "X86RegisterInfo.h"
21 #include "X86ShuffleDecodeConstantPool.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/iterator_range.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/Mangler.h"
34 #include "llvm/MC/MCAsmInfo.h"
35 #include "llvm/MC/MCCodeEmitter.h"
36 #include "llvm/MC/MCContext.h"
37 #include "llvm/MC/MCExpr.h"
38 #include "llvm/MC/MCFixup.h"
39 #include "llvm/MC/MCInst.h"
40 #include "llvm/MC/MCInstBuilder.h"
41 #include "llvm/MC/MCSection.h"
42 #include "llvm/MC/MCSectionELF.h"
43 #include "llvm/MC/MCStreamer.h"
44 #include "llvm/MC/MCSymbol.h"
45 #include "llvm/MC/MCSymbolELF.h"
46 #include "llvm/MC/TargetRegistry.h"
47 #include "llvm/Target/TargetLoweringObjectFile.h"
48 #include "llvm/Target/TargetMachine.h"
49 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
50 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
51 #include <string>
52 
53 using namespace llvm;
54 
55 namespace {
56 
57 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
58 class X86MCInstLower {
59   MCContext &Ctx;
60   const MachineFunction &MF;
61   const TargetMachine &TM;
62   const MCAsmInfo &MAI;
63   X86AsmPrinter &AsmPrinter;
64 
65 public:
66   X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
67 
68   Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
69                                           const MachineOperand &MO) const;
70   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
71 
72   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
73   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
74 
75 private:
76   MachineModuleInfoMachO &getMachOMMI() const;
77 };
78 
79 } // end anonymous namespace
80 
81 /// A RAII helper which defines a region of instructions which can't have
82 /// padding added between them for correctness.
83 struct NoAutoPaddingScope {
84   MCStreamer &OS;
85   const bool OldAllowAutoPadding;
86   NoAutoPaddingScope(MCStreamer &OS)
87       : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
88     changeAndComment(false);
89   }
90   ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
91   void changeAndComment(bool b) {
92     if (b == OS.getAllowAutoPadding())
93       return;
94     OS.setAllowAutoPadding(b);
95     if (b)
96       OS.emitRawComment("autopadding");
97     else
98       OS.emitRawComment("noautopadding");
99   }
100 };
101 
102 // Emit a minimal sequence of nops spanning NumBytes bytes.
103 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
104                         const X86Subtarget *Subtarget);
105 
106 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
107                                                  const MCSubtargetInfo &STI,
108                                                  MCCodeEmitter *CodeEmitter) {
109   if (InShadow) {
110     SmallString<256> Code;
111     SmallVector<MCFixup, 4> Fixups;
112     raw_svector_ostream VecOS(Code);
113     CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
114     CurrentShadowSize += Code.size();
115     if (CurrentShadowSize >= RequiredShadowSize)
116       InShadow = false; // The shadow is big enough. Stop counting.
117   }
118 }
119 
120 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
121     MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
122   if (InShadow && CurrentShadowSize < RequiredShadowSize) {
123     InShadow = false;
124     emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
125                 &MF->getSubtarget<X86Subtarget>());
126   }
127 }
128 
129 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
130   OutStreamer->emitInstruction(Inst, getSubtargetInfo());
131   SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
132 }
133 
134 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
135                                X86AsmPrinter &asmprinter)
136     : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
137       AsmPrinter(asmprinter) {}
138 
139 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
140   return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
141 }
142 
143 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
144 /// operand to an MCSymbol.
145 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
146   const Triple &TT = TM.getTargetTriple();
147   if (MO.isGlobal() && TT.isOSBinFormatELF())
148     return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
149 
150   const DataLayout &DL = MF.getDataLayout();
151   assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
152          "Isn't a symbol reference");
153 
154   MCSymbol *Sym = nullptr;
155   SmallString<128> Name;
156   StringRef Suffix;
157 
158   switch (MO.getTargetFlags()) {
159   case X86II::MO_DLLIMPORT:
160     // Handle dllimport linkage.
161     Name += "__imp_";
162     break;
163   case X86II::MO_COFFSTUB:
164     Name += ".refptr.";
165     break;
166   case X86II::MO_DARWIN_NONLAZY:
167   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
168     Suffix = "$non_lazy_ptr";
169     break;
170   }
171 
172   if (!Suffix.empty())
173     Name += DL.getPrivateGlobalPrefix();
174 
175   if (MO.isGlobal()) {
176     const GlobalValue *GV = MO.getGlobal();
177     AsmPrinter.getNameWithPrefix(Name, GV);
178   } else if (MO.isSymbol()) {
179     Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
180   } else if (MO.isMBB()) {
181     assert(Suffix.empty());
182     Sym = MO.getMBB()->getSymbol();
183   }
184 
185   Name += Suffix;
186   if (!Sym)
187     Sym = Ctx.getOrCreateSymbol(Name);
188 
189   // If the target flags on the operand changes the name of the symbol, do that
190   // before we return the symbol.
191   switch (MO.getTargetFlags()) {
192   default:
193     break;
194   case X86II::MO_COFFSTUB: {
195     MachineModuleInfoCOFF &MMICOFF =
196         MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
197     MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
198     if (!StubSym.getPointer()) {
199       assert(MO.isGlobal() && "Extern symbol not handled yet");
200       StubSym = MachineModuleInfoImpl::StubValueTy(
201           AsmPrinter.getSymbol(MO.getGlobal()), true);
202     }
203     break;
204   }
205   case X86II::MO_DARWIN_NONLAZY:
206   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
207     MachineModuleInfoImpl::StubValueTy &StubSym =
208         getMachOMMI().getGVStubEntry(Sym);
209     if (!StubSym.getPointer()) {
210       assert(MO.isGlobal() && "Extern symbol not handled yet");
211       StubSym = MachineModuleInfoImpl::StubValueTy(
212           AsmPrinter.getSymbol(MO.getGlobal()),
213           !MO.getGlobal()->hasInternalLinkage());
214     }
215     break;
216   }
217   }
218 
219   return Sym;
220 }
221 
222 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
223                                              MCSymbol *Sym) const {
224   // FIXME: We would like an efficient form for this, so we don't have to do a
225   // lot of extra uniquing.
226   const MCExpr *Expr = nullptr;
227   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
228 
229   switch (MO.getTargetFlags()) {
230   default:
231     llvm_unreachable("Unknown target flag on GV operand");
232   case X86II::MO_NO_FLAG: // No flag.
233   // These affect the name of the symbol, not any suffix.
234   case X86II::MO_DARWIN_NONLAZY:
235   case X86II::MO_DLLIMPORT:
236   case X86II::MO_COFFSTUB:
237     break;
238 
239   case X86II::MO_TLVP:
240     RefKind = MCSymbolRefExpr::VK_TLVP;
241     break;
242   case X86II::MO_TLVP_PIC_BASE:
243     Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
244     // Subtract the pic base.
245     Expr = MCBinaryExpr::createSub(
246         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
247     break;
248   case X86II::MO_SECREL:
249     RefKind = MCSymbolRefExpr::VK_SECREL;
250     break;
251   case X86II::MO_TLSGD:
252     RefKind = MCSymbolRefExpr::VK_TLSGD;
253     break;
254   case X86II::MO_TLSLD:
255     RefKind = MCSymbolRefExpr::VK_TLSLD;
256     break;
257   case X86II::MO_TLSLDM:
258     RefKind = MCSymbolRefExpr::VK_TLSLDM;
259     break;
260   case X86II::MO_GOTTPOFF:
261     RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
262     break;
263   case X86II::MO_INDNTPOFF:
264     RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
265     break;
266   case X86II::MO_TPOFF:
267     RefKind = MCSymbolRefExpr::VK_TPOFF;
268     break;
269   case X86II::MO_DTPOFF:
270     RefKind = MCSymbolRefExpr::VK_DTPOFF;
271     break;
272   case X86II::MO_NTPOFF:
273     RefKind = MCSymbolRefExpr::VK_NTPOFF;
274     break;
275   case X86II::MO_GOTNTPOFF:
276     RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
277     break;
278   case X86II::MO_GOTPCREL:
279     RefKind = MCSymbolRefExpr::VK_GOTPCREL;
280     break;
281   case X86II::MO_GOTPCREL_NORELAX:
282     RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX;
283     break;
284   case X86II::MO_GOT:
285     RefKind = MCSymbolRefExpr::VK_GOT;
286     break;
287   case X86II::MO_GOTOFF:
288     RefKind = MCSymbolRefExpr::VK_GOTOFF;
289     break;
290   case X86II::MO_PLT:
291     RefKind = MCSymbolRefExpr::VK_PLT;
292     break;
293   case X86II::MO_ABS8:
294     RefKind = MCSymbolRefExpr::VK_X86_ABS8;
295     break;
296   case X86II::MO_PIC_BASE_OFFSET:
297   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
298     Expr = MCSymbolRefExpr::create(Sym, Ctx);
299     // Subtract the pic base.
300     Expr = MCBinaryExpr::createSub(
301         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
302     if (MO.isJTI()) {
303       assert(MAI.doesSetDirectiveSuppressReloc());
304       // If .set directive is supported, use it to reduce the number of
305       // relocations the assembler will generate for differences between
306       // local labels. This is only safe when the symbols are in the same
307       // section so we are restricting it to jumptable references.
308       MCSymbol *Label = Ctx.createTempSymbol();
309       AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
310       Expr = MCSymbolRefExpr::create(Label, Ctx);
311     }
312     break;
313   }
314 
315   if (!Expr)
316     Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
317 
318   if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
319     Expr = MCBinaryExpr::createAdd(
320         Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
321   return MCOperand::createExpr(Expr);
322 }
323 
324 /// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
325 /// a short fixed-register form.
326 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
327   unsigned ImmOp = Inst.getNumOperands() - 1;
328   assert(Inst.getOperand(0).isReg() &&
329          (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
330          ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
331            Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
332           Inst.getNumOperands() == 2) &&
333          "Unexpected instruction!");
334 
335   // Check whether the destination register can be fixed.
336   unsigned Reg = Inst.getOperand(0).getReg();
337   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
338     return;
339 
340   // If so, rewrite the instruction.
341   MCOperand Saved = Inst.getOperand(ImmOp);
342   Inst = MCInst();
343   Inst.setOpcode(Opcode);
344   Inst.addOperand(Saved);
345 }
346 
347 /// If a movsx instruction has a shorter encoding for the used register
348 /// simplify the instruction to use it instead.
349 static void SimplifyMOVSX(MCInst &Inst) {
350   unsigned NewOpcode = 0;
351   unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
352   switch (Inst.getOpcode()) {
353   default:
354     llvm_unreachable("Unexpected instruction!");
355   case X86::MOVSX16rr8: // movsbw %al, %ax   --> cbtw
356     if (Op0 == X86::AX && Op1 == X86::AL)
357       NewOpcode = X86::CBW;
358     break;
359   case X86::MOVSX32rr16: // movswl %ax, %eax  --> cwtl
360     if (Op0 == X86::EAX && Op1 == X86::AX)
361       NewOpcode = X86::CWDE;
362     break;
363   case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
364     if (Op0 == X86::RAX && Op1 == X86::EAX)
365       NewOpcode = X86::CDQE;
366     break;
367   }
368 
369   if (NewOpcode != 0) {
370     Inst = MCInst();
371     Inst.setOpcode(NewOpcode);
372   }
373 }
374 
375 /// Simplify things like MOV32rm to MOV32o32a.
376 static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
377                                   unsigned Opcode) {
378   // Don't make these simplifications in 64-bit mode; other assemblers don't
379   // perform them because they make the code larger.
380   if (Printer.getSubtarget().is64Bit())
381     return;
382 
383   bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
384   unsigned AddrBase = IsStore;
385   unsigned RegOp = IsStore ? 0 : 5;
386   unsigned AddrOp = AddrBase + 3;
387   assert(
388       Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
389       Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
390       Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
391       Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
392       Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
393       (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
394       "Unexpected instruction!");
395 
396   // Check whether the destination register can be fixed.
397   unsigned Reg = Inst.getOperand(RegOp).getReg();
398   if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
399     return;
400 
401   // Check whether this is an absolute address.
402   // FIXME: We know TLVP symbol refs aren't, but there should be a better way
403   // to do this here.
404   bool Absolute = true;
405   if (Inst.getOperand(AddrOp).isExpr()) {
406     const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
407     if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
408       if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
409         Absolute = false;
410   }
411 
412   if (Absolute &&
413       (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
414        Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
415        Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
416     return;
417 
418   // If so, rewrite the instruction.
419   MCOperand Saved = Inst.getOperand(AddrOp);
420   MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
421   Inst = MCInst();
422   Inst.setOpcode(Opcode);
423   Inst.addOperand(Saved);
424   Inst.addOperand(Seg);
425 }
426 
427 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
428   return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
429 }
430 
431 Optional<MCOperand>
432 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
433                                     const MachineOperand &MO) const {
434   switch (MO.getType()) {
435   default:
436     MI->print(errs());
437     llvm_unreachable("unknown operand type");
438   case MachineOperand::MO_Register:
439     // Ignore all implicit register operands.
440     if (MO.isImplicit())
441       return None;
442     return MCOperand::createReg(MO.getReg());
443   case MachineOperand::MO_Immediate:
444     return MCOperand::createImm(MO.getImm());
445   case MachineOperand::MO_MachineBasicBlock:
446   case MachineOperand::MO_GlobalAddress:
447   case MachineOperand::MO_ExternalSymbol:
448     return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
449   case MachineOperand::MO_MCSymbol:
450     return LowerSymbolOperand(MO, MO.getMCSymbol());
451   case MachineOperand::MO_JumpTableIndex:
452     return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
453   case MachineOperand::MO_ConstantPoolIndex:
454     return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
455   case MachineOperand::MO_BlockAddress:
456     return LowerSymbolOperand(
457         MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
458   case MachineOperand::MO_RegisterMask:
459     // Ignore call clobbers.
460     return None;
461   }
462 }
463 
464 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
465 // information.
466 static unsigned convertTailJumpOpcode(unsigned Opcode) {
467   switch (Opcode) {
468   case X86::TAILJMPr:
469     Opcode = X86::JMP32r;
470     break;
471   case X86::TAILJMPm:
472     Opcode = X86::JMP32m;
473     break;
474   case X86::TAILJMPr64:
475     Opcode = X86::JMP64r;
476     break;
477   case X86::TAILJMPm64:
478     Opcode = X86::JMP64m;
479     break;
480   case X86::TAILJMPr64_REX:
481     Opcode = X86::JMP64r_REX;
482     break;
483   case X86::TAILJMPm64_REX:
484     Opcode = X86::JMP64m_REX;
485     break;
486   case X86::TAILJMPd:
487   case X86::TAILJMPd64:
488     Opcode = X86::JMP_1;
489     break;
490   case X86::TAILJMPd_CC:
491   case X86::TAILJMPd64_CC:
492     Opcode = X86::JCC_1;
493     break;
494   }
495 
496   return Opcode;
497 }
498 
499 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
500   OutMI.setOpcode(MI->getOpcode());
501 
502   for (const MachineOperand &MO : MI->operands())
503     if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
504       OutMI.addOperand(MaybeMCOp.getValue());
505 
506   // Handle a few special cases to eliminate operand modifiers.
507   switch (OutMI.getOpcode()) {
508   case X86::LEA64_32r:
509   case X86::LEA64r:
510   case X86::LEA16r:
511   case X86::LEA32r:
512     // LEA should have a segment register, but it must be empty.
513     assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
514            "Unexpected # of LEA operands");
515     assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
516            "LEA has segment specified!");
517     break;
518 
519   case X86::MULX32Hrr:
520   case X86::MULX32Hrm:
521   case X86::MULX64Hrr:
522   case X86::MULX64Hrm: {
523     // Turn into regular MULX by duplicating the destination.
524     unsigned NewOpc;
525     switch (OutMI.getOpcode()) {
526     default: llvm_unreachable("Invalid opcode");
527     case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
528     case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
529     case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
530     case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
531     }
532     OutMI.setOpcode(NewOpc);
533     // Duplicate the destination.
534     unsigned DestReg = OutMI.getOperand(0).getReg();
535     OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
536     break;
537   }
538 
539   // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
540   // if one of the registers is extended, but other isn't.
541   case X86::VMOVZPQILo2PQIrr:
542   case X86::VMOVAPDrr:
543   case X86::VMOVAPDYrr:
544   case X86::VMOVAPSrr:
545   case X86::VMOVAPSYrr:
546   case X86::VMOVDQArr:
547   case X86::VMOVDQAYrr:
548   case X86::VMOVDQUrr:
549   case X86::VMOVDQUYrr:
550   case X86::VMOVUPDrr:
551   case X86::VMOVUPDYrr:
552   case X86::VMOVUPSrr:
553   case X86::VMOVUPSYrr: {
554     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
555         X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
556       unsigned NewOpc;
557       switch (OutMI.getOpcode()) {
558       default: llvm_unreachable("Invalid opcode");
559       case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
560       case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
561       case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
562       case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
563       case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
564       case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
565       case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
566       case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
567       case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
568       case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
569       case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
570       case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
571       case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
572       }
573       OutMI.setOpcode(NewOpc);
574     }
575     break;
576   }
577   case X86::VMOVSDrr:
578   case X86::VMOVSSrr: {
579     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
580         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
581       unsigned NewOpc;
582       switch (OutMI.getOpcode()) {
583       default: llvm_unreachable("Invalid opcode");
584       case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
585       case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
586       }
587       OutMI.setOpcode(NewOpc);
588     }
589     break;
590   }
591 
592   case X86::VPCMPBZ128rmi:  case X86::VPCMPBZ128rmik:
593   case X86::VPCMPBZ128rri:  case X86::VPCMPBZ128rrik:
594   case X86::VPCMPBZ256rmi:  case X86::VPCMPBZ256rmik:
595   case X86::VPCMPBZ256rri:  case X86::VPCMPBZ256rrik:
596   case X86::VPCMPBZrmi:     case X86::VPCMPBZrmik:
597   case X86::VPCMPBZrri:     case X86::VPCMPBZrrik:
598   case X86::VPCMPDZ128rmi:  case X86::VPCMPDZ128rmik:
599   case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
600   case X86::VPCMPDZ128rri:  case X86::VPCMPDZ128rrik:
601   case X86::VPCMPDZ256rmi:  case X86::VPCMPDZ256rmik:
602   case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
603   case X86::VPCMPDZ256rri:  case X86::VPCMPDZ256rrik:
604   case X86::VPCMPDZrmi:     case X86::VPCMPDZrmik:
605   case X86::VPCMPDZrmib:    case X86::VPCMPDZrmibk:
606   case X86::VPCMPDZrri:     case X86::VPCMPDZrrik:
607   case X86::VPCMPQZ128rmi:  case X86::VPCMPQZ128rmik:
608   case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
609   case X86::VPCMPQZ128rri:  case X86::VPCMPQZ128rrik:
610   case X86::VPCMPQZ256rmi:  case X86::VPCMPQZ256rmik:
611   case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
612   case X86::VPCMPQZ256rri:  case X86::VPCMPQZ256rrik:
613   case X86::VPCMPQZrmi:     case X86::VPCMPQZrmik:
614   case X86::VPCMPQZrmib:    case X86::VPCMPQZrmibk:
615   case X86::VPCMPQZrri:     case X86::VPCMPQZrrik:
616   case X86::VPCMPWZ128rmi:  case X86::VPCMPWZ128rmik:
617   case X86::VPCMPWZ128rri:  case X86::VPCMPWZ128rrik:
618   case X86::VPCMPWZ256rmi:  case X86::VPCMPWZ256rmik:
619   case X86::VPCMPWZ256rri:  case X86::VPCMPWZ256rrik:
620   case X86::VPCMPWZrmi:     case X86::VPCMPWZrmik:
621   case X86::VPCMPWZrri:     case X86::VPCMPWZrrik: {
622     // Turn immediate 0 into the VPCMPEQ instruction.
623     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
624       unsigned NewOpc;
625       switch (OutMI.getOpcode()) {
626       default: llvm_unreachable("Invalid opcode");
627       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPEQBZ128rm;   break;
628       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPEQBZ128rmk;  break;
629       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPEQBZ128rr;   break;
630       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPEQBZ128rrk;  break;
631       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPEQBZ256rm;   break;
632       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPEQBZ256rmk;  break;
633       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPEQBZ256rr;   break;
634       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPEQBZ256rrk;  break;
635       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPEQBZrm;      break;
636       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPEQBZrmk;     break;
637       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPEQBZrr;      break;
638       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPEQBZrrk;     break;
639       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPEQDZ128rm;   break;
640       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPEQDZ128rmb;  break;
641       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
642       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPEQDZ128rmk;  break;
643       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPEQDZ128rr;   break;
644       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPEQDZ128rrk;  break;
645       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPEQDZ256rm;   break;
646       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPEQDZ256rmb;  break;
647       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
648       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPEQDZ256rmk;  break;
649       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPEQDZ256rr;   break;
650       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPEQDZ256rrk;  break;
651       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPEQDZrm;      break;
652       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPEQDZrmb;     break;
653       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPEQDZrmbk;    break;
654       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPEQDZrmk;     break;
655       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPEQDZrr;      break;
656       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPEQDZrrk;     break;
657       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPEQQZ128rm;   break;
658       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPEQQZ128rmb;  break;
659       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
660       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPEQQZ128rmk;  break;
661       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPEQQZ128rr;   break;
662       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPEQQZ128rrk;  break;
663       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPEQQZ256rm;   break;
664       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPEQQZ256rmb;  break;
665       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
666       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPEQQZ256rmk;  break;
667       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPEQQZ256rr;   break;
668       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPEQQZ256rrk;  break;
669       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPEQQZrm;      break;
670       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPEQQZrmb;     break;
671       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPEQQZrmbk;    break;
672       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPEQQZrmk;     break;
673       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPEQQZrr;      break;
674       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPEQQZrrk;     break;
675       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPEQWZ128rm;   break;
676       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPEQWZ128rmk;  break;
677       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPEQWZ128rr;   break;
678       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPEQWZ128rrk;  break;
679       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPEQWZ256rm;   break;
680       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPEQWZ256rmk;  break;
681       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPEQWZ256rr;   break;
682       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPEQWZ256rrk;  break;
683       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPEQWZrm;      break;
684       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPEQWZrmk;     break;
685       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPEQWZrr;      break;
686       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPEQWZrrk;     break;
687       }
688 
689       OutMI.setOpcode(NewOpc);
690       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
691       break;
692     }
693 
694     // Turn immediate 6 into the VPCMPGT instruction.
695     if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
696       unsigned NewOpc;
697       switch (OutMI.getOpcode()) {
698       default: llvm_unreachable("Invalid opcode");
699       case X86::VPCMPBZ128rmi:   NewOpc = X86::VPCMPGTBZ128rm;   break;
700       case X86::VPCMPBZ128rmik:  NewOpc = X86::VPCMPGTBZ128rmk;  break;
701       case X86::VPCMPBZ128rri:   NewOpc = X86::VPCMPGTBZ128rr;   break;
702       case X86::VPCMPBZ128rrik:  NewOpc = X86::VPCMPGTBZ128rrk;  break;
703       case X86::VPCMPBZ256rmi:   NewOpc = X86::VPCMPGTBZ256rm;   break;
704       case X86::VPCMPBZ256rmik:  NewOpc = X86::VPCMPGTBZ256rmk;  break;
705       case X86::VPCMPBZ256rri:   NewOpc = X86::VPCMPGTBZ256rr;   break;
706       case X86::VPCMPBZ256rrik:  NewOpc = X86::VPCMPGTBZ256rrk;  break;
707       case X86::VPCMPBZrmi:      NewOpc = X86::VPCMPGTBZrm;      break;
708       case X86::VPCMPBZrmik:     NewOpc = X86::VPCMPGTBZrmk;     break;
709       case X86::VPCMPBZrri:      NewOpc = X86::VPCMPGTBZrr;      break;
710       case X86::VPCMPBZrrik:     NewOpc = X86::VPCMPGTBZrrk;     break;
711       case X86::VPCMPDZ128rmi:   NewOpc = X86::VPCMPGTDZ128rm;   break;
712       case X86::VPCMPDZ128rmib:  NewOpc = X86::VPCMPGTDZ128rmb;  break;
713       case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
714       case X86::VPCMPDZ128rmik:  NewOpc = X86::VPCMPGTDZ128rmk;  break;
715       case X86::VPCMPDZ128rri:   NewOpc = X86::VPCMPGTDZ128rr;   break;
716       case X86::VPCMPDZ128rrik:  NewOpc = X86::VPCMPGTDZ128rrk;  break;
717       case X86::VPCMPDZ256rmi:   NewOpc = X86::VPCMPGTDZ256rm;   break;
718       case X86::VPCMPDZ256rmib:  NewOpc = X86::VPCMPGTDZ256rmb;  break;
719       case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
720       case X86::VPCMPDZ256rmik:  NewOpc = X86::VPCMPGTDZ256rmk;  break;
721       case X86::VPCMPDZ256rri:   NewOpc = X86::VPCMPGTDZ256rr;   break;
722       case X86::VPCMPDZ256rrik:  NewOpc = X86::VPCMPGTDZ256rrk;  break;
723       case X86::VPCMPDZrmi:      NewOpc = X86::VPCMPGTDZrm;      break;
724       case X86::VPCMPDZrmib:     NewOpc = X86::VPCMPGTDZrmb;     break;
725       case X86::VPCMPDZrmibk:    NewOpc = X86::VPCMPGTDZrmbk;    break;
726       case X86::VPCMPDZrmik:     NewOpc = X86::VPCMPGTDZrmk;     break;
727       case X86::VPCMPDZrri:      NewOpc = X86::VPCMPGTDZrr;      break;
728       case X86::VPCMPDZrrik:     NewOpc = X86::VPCMPGTDZrrk;     break;
729       case X86::VPCMPQZ128rmi:   NewOpc = X86::VPCMPGTQZ128rm;   break;
730       case X86::VPCMPQZ128rmib:  NewOpc = X86::VPCMPGTQZ128rmb;  break;
731       case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
732       case X86::VPCMPQZ128rmik:  NewOpc = X86::VPCMPGTQZ128rmk;  break;
733       case X86::VPCMPQZ128rri:   NewOpc = X86::VPCMPGTQZ128rr;   break;
734       case X86::VPCMPQZ128rrik:  NewOpc = X86::VPCMPGTQZ128rrk;  break;
735       case X86::VPCMPQZ256rmi:   NewOpc = X86::VPCMPGTQZ256rm;   break;
736       case X86::VPCMPQZ256rmib:  NewOpc = X86::VPCMPGTQZ256rmb;  break;
737       case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
738       case X86::VPCMPQZ256rmik:  NewOpc = X86::VPCMPGTQZ256rmk;  break;
739       case X86::VPCMPQZ256rri:   NewOpc = X86::VPCMPGTQZ256rr;   break;
740       case X86::VPCMPQZ256rrik:  NewOpc = X86::VPCMPGTQZ256rrk;  break;
741       case X86::VPCMPQZrmi:      NewOpc = X86::VPCMPGTQZrm;      break;
742       case X86::VPCMPQZrmib:     NewOpc = X86::VPCMPGTQZrmb;     break;
743       case X86::VPCMPQZrmibk:    NewOpc = X86::VPCMPGTQZrmbk;    break;
744       case X86::VPCMPQZrmik:     NewOpc = X86::VPCMPGTQZrmk;     break;
745       case X86::VPCMPQZrri:      NewOpc = X86::VPCMPGTQZrr;      break;
746       case X86::VPCMPQZrrik:     NewOpc = X86::VPCMPGTQZrrk;     break;
747       case X86::VPCMPWZ128rmi:   NewOpc = X86::VPCMPGTWZ128rm;   break;
748       case X86::VPCMPWZ128rmik:  NewOpc = X86::VPCMPGTWZ128rmk;  break;
749       case X86::VPCMPWZ128rri:   NewOpc = X86::VPCMPGTWZ128rr;   break;
750       case X86::VPCMPWZ128rrik:  NewOpc = X86::VPCMPGTWZ128rrk;  break;
751       case X86::VPCMPWZ256rmi:   NewOpc = X86::VPCMPGTWZ256rm;   break;
752       case X86::VPCMPWZ256rmik:  NewOpc = X86::VPCMPGTWZ256rmk;  break;
753       case X86::VPCMPWZ256rri:   NewOpc = X86::VPCMPGTWZ256rr;   break;
754       case X86::VPCMPWZ256rrik:  NewOpc = X86::VPCMPGTWZ256rrk;  break;
755       case X86::VPCMPWZrmi:      NewOpc = X86::VPCMPGTWZrm;      break;
756       case X86::VPCMPWZrmik:     NewOpc = X86::VPCMPGTWZrmk;     break;
757       case X86::VPCMPWZrri:      NewOpc = X86::VPCMPGTWZrr;      break;
758       case X86::VPCMPWZrrik:     NewOpc = X86::VPCMPGTWZrrk;     break;
759       }
760 
761       OutMI.setOpcode(NewOpc);
762       OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
763       break;
764     }
765 
766     break;
767   }
768 
769   // CALL64r, CALL64pcrel32 - These instructions used to have
770   // register inputs modeled as normal uses instead of implicit uses.  As such,
771   // they we used to truncate off all but the first operand (the callee). This
772   // issue seems to have been fixed at some point. This assert verifies that.
773   case X86::CALL64r:
774   case X86::CALL64pcrel32:
775     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
776     break;
777 
778   case X86::EH_RETURN:
779   case X86::EH_RETURN64: {
780     OutMI = MCInst();
781     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
782     break;
783   }
784 
785   case X86::CLEANUPRET: {
786     // Replace CLEANUPRET with the appropriate RET.
787     OutMI = MCInst();
788     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
789     break;
790   }
791 
792   case X86::CATCHRET: {
793     // Replace CATCHRET with the appropriate RET.
794     const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
795     unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
796     OutMI = MCInst();
797     OutMI.setOpcode(getRetOpcode(Subtarget));
798     OutMI.addOperand(MCOperand::createReg(ReturnReg));
799     break;
800   }
801 
802   // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
803   // instruction.
804   case X86::TAILJMPr:
805   case X86::TAILJMPr64:
806   case X86::TAILJMPr64_REX:
807   case X86::TAILJMPd:
808   case X86::TAILJMPd64:
809     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
810     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
811     break;
812 
813   case X86::TAILJMPd_CC:
814   case X86::TAILJMPd64_CC:
815     assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
816     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
817     break;
818 
819   case X86::TAILJMPm:
820   case X86::TAILJMPm64:
821   case X86::TAILJMPm64_REX:
822     assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
823            "Unexpected number of operands!");
824     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
825     break;
826 
827   case X86::DEC16r:
828   case X86::DEC32r:
829   case X86::INC16r:
830   case X86::INC32r:
831     // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
832     if (!AsmPrinter.getSubtarget().is64Bit()) {
833       unsigned Opcode;
834       switch (OutMI.getOpcode()) {
835       default: llvm_unreachable("Invalid opcode");
836       case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
837       case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
838       case X86::INC16r: Opcode = X86::INC16r_alt; break;
839       case X86::INC32r: Opcode = X86::INC32r_alt; break;
840       }
841       OutMI.setOpcode(Opcode);
842     }
843     break;
844 
845   // We don't currently select the correct instruction form for instructions
846   // which have a short %eax, etc. form. Handle this by custom lowering, for
847   // now.
848   //
849   // Note, we are currently not handling the following instructions:
850   // MOV64ao8, MOV64o8a
851   // XCHG16ar, XCHG32ar, XCHG64ar
852   case X86::MOV8mr_NOREX:
853   case X86::MOV8mr:
854   case X86::MOV8rm_NOREX:
855   case X86::MOV8rm:
856   case X86::MOV16mr:
857   case X86::MOV16rm:
858   case X86::MOV32mr:
859   case X86::MOV32rm: {
860     unsigned NewOpc;
861     switch (OutMI.getOpcode()) {
862     default: llvm_unreachable("Invalid opcode");
863     case X86::MOV8mr_NOREX:
864     case X86::MOV8mr:  NewOpc = X86::MOV8o32a; break;
865     case X86::MOV8rm_NOREX:
866     case X86::MOV8rm:  NewOpc = X86::MOV8ao32; break;
867     case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
868     case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
869     case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
870     case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
871     }
872     SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
873     break;
874   }
875 
876   case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
877   case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
878   case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
879   case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
880   case X86::OR8ri:  case X86::OR16ri:  case X86::OR32ri:  case X86::OR64ri32:
881   case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
882   case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
883   case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
884   case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
885     unsigned NewOpc;
886     switch (OutMI.getOpcode()) {
887     default: llvm_unreachable("Invalid opcode");
888     case X86::ADC8ri:     NewOpc = X86::ADC8i8;    break;
889     case X86::ADC16ri:    NewOpc = X86::ADC16i16;  break;
890     case X86::ADC32ri:    NewOpc = X86::ADC32i32;  break;
891     case X86::ADC64ri32:  NewOpc = X86::ADC64i32;  break;
892     case X86::ADD8ri:     NewOpc = X86::ADD8i8;    break;
893     case X86::ADD16ri:    NewOpc = X86::ADD16i16;  break;
894     case X86::ADD32ri:    NewOpc = X86::ADD32i32;  break;
895     case X86::ADD64ri32:  NewOpc = X86::ADD64i32;  break;
896     case X86::AND8ri:     NewOpc = X86::AND8i8;    break;
897     case X86::AND16ri:    NewOpc = X86::AND16i16;  break;
898     case X86::AND32ri:    NewOpc = X86::AND32i32;  break;
899     case X86::AND64ri32:  NewOpc = X86::AND64i32;  break;
900     case X86::CMP8ri:     NewOpc = X86::CMP8i8;    break;
901     case X86::CMP16ri:    NewOpc = X86::CMP16i16;  break;
902     case X86::CMP32ri:    NewOpc = X86::CMP32i32;  break;
903     case X86::CMP64ri32:  NewOpc = X86::CMP64i32;  break;
904     case X86::OR8ri:      NewOpc = X86::OR8i8;     break;
905     case X86::OR16ri:     NewOpc = X86::OR16i16;   break;
906     case X86::OR32ri:     NewOpc = X86::OR32i32;   break;
907     case X86::OR64ri32:   NewOpc = X86::OR64i32;   break;
908     case X86::SBB8ri:     NewOpc = X86::SBB8i8;    break;
909     case X86::SBB16ri:    NewOpc = X86::SBB16i16;  break;
910     case X86::SBB32ri:    NewOpc = X86::SBB32i32;  break;
911     case X86::SBB64ri32:  NewOpc = X86::SBB64i32;  break;
912     case X86::SUB8ri:     NewOpc = X86::SUB8i8;    break;
913     case X86::SUB16ri:    NewOpc = X86::SUB16i16;  break;
914     case X86::SUB32ri:    NewOpc = X86::SUB32i32;  break;
915     case X86::SUB64ri32:  NewOpc = X86::SUB64i32;  break;
916     case X86::TEST8ri:    NewOpc = X86::TEST8i8;   break;
917     case X86::TEST16ri:   NewOpc = X86::TEST16i16; break;
918     case X86::TEST32ri:   NewOpc = X86::TEST32i32; break;
919     case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
920     case X86::XOR8ri:     NewOpc = X86::XOR8i8;    break;
921     case X86::XOR16ri:    NewOpc = X86::XOR16i16;  break;
922     case X86::XOR32ri:    NewOpc = X86::XOR32i32;  break;
923     case X86::XOR64ri32:  NewOpc = X86::XOR64i32;  break;
924     }
925     SimplifyShortImmForm(OutMI, NewOpc);
926     break;
927   }
928 
929   // Try to shrink some forms of movsx.
930   case X86::MOVSX16rr8:
931   case X86::MOVSX32rr16:
932   case X86::MOVSX64rr32:
933     SimplifyMOVSX(OutMI);
934     break;
935 
936   case X86::VCMPPDrri:
937   case X86::VCMPPDYrri:
938   case X86::VCMPPSrri:
939   case X86::VCMPPSYrri:
940   case X86::VCMPSDrr:
941   case X86::VCMPSSrr: {
942     // Swap the operands if it will enable a 2 byte VEX encoding.
943     // FIXME: Change the immediate to improve opportunities?
944     if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
945         X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
946       unsigned Imm = MI->getOperand(3).getImm() & 0x7;
947       switch (Imm) {
948       default: break;
949       case 0x00: // EQUAL
950       case 0x03: // UNORDERED
951       case 0x04: // NOT EQUAL
952       case 0x07: // ORDERED
953         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
954         break;
955       }
956     }
957     break;
958   }
959 
960   case X86::VMOVHLPSrr:
961   case X86::VUNPCKHPDrr:
962     // These are not truly commutable so hide them from the default case.
963     break;
964 
965   default: {
966     // If the instruction is a commutable arithmetic instruction we might be
967     // able to commute the operands to get a 2 byte VEX prefix.
968     uint64_t TSFlags = MI->getDesc().TSFlags;
969     if (MI->getDesc().isCommutable() &&
970         (TSFlags & X86II::EncodingMask) == X86II::VEX &&
971         (TSFlags & X86II::OpMapMask) == X86II::TB &&
972         (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
973         !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
974         OutMI.getNumOperands() == 3) {
975       if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
976           X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
977         std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
978     }
979     break;
980   }
981   }
982 }
983 
984 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
985                                  const MachineInstr &MI) {
986   NoAutoPaddingScope NoPadScope(*OutStreamer);
987   bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
988                   MI.getOpcode() != X86::TLS_base_addr32;
989   bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
990                       MI.getOpcode() == X86::TLS_base_addr64;
991   MCContext &Ctx = OutStreamer->getContext();
992 
993   MCSymbolRefExpr::VariantKind SRVK;
994   switch (MI.getOpcode()) {
995   case X86::TLS_addr32:
996   case X86::TLS_addr64:
997   case X86::TLS_addrX32:
998     SRVK = MCSymbolRefExpr::VK_TLSGD;
999     break;
1000   case X86::TLS_base_addr32:
1001     SRVK = MCSymbolRefExpr::VK_TLSLDM;
1002     break;
1003   case X86::TLS_base_addr64:
1004   case X86::TLS_base_addrX32:
1005     SRVK = MCSymbolRefExpr::VK_TLSLD;
1006     break;
1007   default:
1008     llvm_unreachable("unexpected opcode");
1009   }
1010 
1011   const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
1012       MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
1013 
1014   // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
1015   // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
1016   // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
1017   // only using GOT when GOTPCRELX is enabled.
1018   // TODO Delete the workaround when GOTPCRELX becomes commonplace.
1019   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
1020                 Ctx.getAsmInfo()->canRelaxRelocations();
1021 
1022   if (Is64Bits) {
1023     bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
1024     if (NeedsPadding && Is64BitsLP64)
1025       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1026     EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
1027                                 .addReg(X86::RDI)
1028                                 .addReg(X86::RIP)
1029                                 .addImm(1)
1030                                 .addReg(0)
1031                                 .addExpr(Sym)
1032                                 .addReg(0));
1033     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
1034     if (NeedsPadding) {
1035       if (!UseGot)
1036         EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1037       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
1038       EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
1039     }
1040     if (UseGot) {
1041       const MCExpr *Expr = MCSymbolRefExpr::create(
1042           TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
1043       EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
1044                                   .addReg(X86::RIP)
1045                                   .addImm(1)
1046                                   .addReg(0)
1047                                   .addExpr(Expr)
1048                                   .addReg(0));
1049     } else {
1050       EmitAndCountInstruction(
1051           MCInstBuilder(X86::CALL64pcrel32)
1052               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1053                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1054     }
1055   } else {
1056     if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
1057       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1058                                   .addReg(X86::EAX)
1059                                   .addReg(0)
1060                                   .addImm(1)
1061                                   .addReg(X86::EBX)
1062                                   .addExpr(Sym)
1063                                   .addReg(0));
1064     } else {
1065       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
1066                                   .addReg(X86::EAX)
1067                                   .addReg(X86::EBX)
1068                                   .addImm(1)
1069                                   .addReg(0)
1070                                   .addExpr(Sym)
1071                                   .addReg(0));
1072     }
1073 
1074     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
1075     if (UseGot) {
1076       const MCExpr *Expr =
1077           MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
1078       EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
1079                                   .addReg(X86::EBX)
1080                                   .addImm(1)
1081                                   .addReg(0)
1082                                   .addExpr(Expr)
1083                                   .addReg(0));
1084     } else {
1085       EmitAndCountInstruction(
1086           MCInstBuilder(X86::CALLpcrel32)
1087               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
1088                                                MCSymbolRefExpr::VK_PLT, Ctx)));
1089     }
1090   }
1091 }
1092 
1093 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
1094 /// bytes.  Return the size of nop emitted.
1095 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
1096                         const X86Subtarget *Subtarget) {
1097   // Determine the longest nop which can be efficiently decoded for the given
1098   // target cpu.  15-bytes is the longest single NOP instruction, but some
1099   // platforms can't decode the longest forms efficiently.
1100   unsigned MaxNopLength = 1;
1101   if (Subtarget->is64Bit()) {
1102     // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
1103     // IndexReg/BaseReg below need to be updated.
1104     if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
1105       MaxNopLength = 7;
1106     else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
1107       MaxNopLength = 15;
1108     else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
1109       MaxNopLength = 11;
1110     else
1111       MaxNopLength = 10;
1112   } if (Subtarget->is32Bit())
1113     MaxNopLength = 2;
1114 
1115   // Cap a single nop emission at the profitable value for the target
1116   NumBytes = std::min(NumBytes, MaxNopLength);
1117 
1118   unsigned NopSize;
1119   unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
1120   IndexReg = Displacement = SegmentReg = 0;
1121   BaseReg = X86::RAX;
1122   ScaleVal = 1;
1123   switch (NumBytes) {
1124   case 0:
1125     llvm_unreachable("Zero nops?");
1126     break;
1127   case 1:
1128     NopSize = 1;
1129     Opc = X86::NOOP;
1130     break;
1131   case 2:
1132     NopSize = 2;
1133     Opc = X86::XCHG16ar;
1134     break;
1135   case 3:
1136     NopSize = 3;
1137     Opc = X86::NOOPL;
1138     break;
1139   case 4:
1140     NopSize = 4;
1141     Opc = X86::NOOPL;
1142     Displacement = 8;
1143     break;
1144   case 5:
1145     NopSize = 5;
1146     Opc = X86::NOOPL;
1147     Displacement = 8;
1148     IndexReg = X86::RAX;
1149     break;
1150   case 6:
1151     NopSize = 6;
1152     Opc = X86::NOOPW;
1153     Displacement = 8;
1154     IndexReg = X86::RAX;
1155     break;
1156   case 7:
1157     NopSize = 7;
1158     Opc = X86::NOOPL;
1159     Displacement = 512;
1160     break;
1161   case 8:
1162     NopSize = 8;
1163     Opc = X86::NOOPL;
1164     Displacement = 512;
1165     IndexReg = X86::RAX;
1166     break;
1167   case 9:
1168     NopSize = 9;
1169     Opc = X86::NOOPW;
1170     Displacement = 512;
1171     IndexReg = X86::RAX;
1172     break;
1173   default:
1174     NopSize = 10;
1175     Opc = X86::NOOPW;
1176     Displacement = 512;
1177     IndexReg = X86::RAX;
1178     SegmentReg = X86::CS;
1179     break;
1180   }
1181 
1182   unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
1183   NopSize += NumPrefixes;
1184   for (unsigned i = 0; i != NumPrefixes; ++i)
1185     OS.emitBytes("\x66");
1186 
1187   switch (Opc) {
1188   default: llvm_unreachable("Unexpected opcode");
1189   case X86::NOOP:
1190     OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
1191     break;
1192   case X86::XCHG16ar:
1193     OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
1194                        *Subtarget);
1195     break;
1196   case X86::NOOPL:
1197   case X86::NOOPW:
1198     OS.emitInstruction(MCInstBuilder(Opc)
1199                            .addReg(BaseReg)
1200                            .addImm(ScaleVal)
1201                            .addReg(IndexReg)
1202                            .addImm(Displacement)
1203                            .addReg(SegmentReg),
1204                        *Subtarget);
1205     break;
1206   }
1207   assert(NopSize <= NumBytes && "We overemitted?");
1208   return NopSize;
1209 }
1210 
1211 /// Emit the optimal amount of multi-byte nops on X86.
1212 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
1213                         const X86Subtarget *Subtarget) {
1214   unsigned NopsToEmit = NumBytes;
1215   (void)NopsToEmit;
1216   while (NumBytes) {
1217     NumBytes -= emitNop(OS, NumBytes, Subtarget);
1218     assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
1219   }
1220 }
1221 
1222 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
1223                                     X86MCInstLower &MCIL) {
1224   assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
1225 
1226   NoAutoPaddingScope NoPadScope(*OutStreamer);
1227 
1228   StatepointOpers SOpers(&MI);
1229   if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
1230     emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
1231   } else {
1232     // Lower call target and choose correct opcode
1233     const MachineOperand &CallTarget = SOpers.getCallTarget();
1234     MCOperand CallTargetMCOp;
1235     unsigned CallOpcode;
1236     switch (CallTarget.getType()) {
1237     case MachineOperand::MO_GlobalAddress:
1238     case MachineOperand::MO_ExternalSymbol:
1239       CallTargetMCOp = MCIL.LowerSymbolOperand(
1240           CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
1241       CallOpcode = X86::CALL64pcrel32;
1242       // Currently, we only support relative addressing with statepoints.
1243       // Otherwise, we'll need a scratch register to hold the target
1244       // address.  You'll fail asserts during load & relocation if this
1245       // symbol is to far away. (TODO: support non-relative addressing)
1246       break;
1247     case MachineOperand::MO_Immediate:
1248       CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
1249       CallOpcode = X86::CALL64pcrel32;
1250       // Currently, we only support relative addressing with statepoints.
1251       // Otherwise, we'll need a scratch register to hold the target
1252       // immediate.  You'll fail asserts during load & relocation if this
1253       // address is to far away. (TODO: support non-relative addressing)
1254       break;
1255     case MachineOperand::MO_Register:
1256       // FIXME: Add retpoline support and remove this.
1257       if (Subtarget->useIndirectThunkCalls())
1258         report_fatal_error("Lowering register statepoints with thunks not "
1259                            "yet implemented.");
1260       CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
1261       CallOpcode = X86::CALL64r;
1262       break;
1263     default:
1264       llvm_unreachable("Unsupported operand type in statepoint call target");
1265       break;
1266     }
1267 
1268     // Emit call
1269     MCInst CallInst;
1270     CallInst.setOpcode(CallOpcode);
1271     CallInst.addOperand(CallTargetMCOp);
1272     OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
1273   }
1274 
1275   // Record our statepoint node in the same section used by STACKMAP
1276   // and PATCHPOINT
1277   auto &Ctx = OutStreamer->getContext();
1278   MCSymbol *MILabel = Ctx.createTempSymbol();
1279   OutStreamer->emitLabel(MILabel);
1280   SM.recordStatepoint(*MILabel, MI);
1281 }
1282 
1283 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
1284                                      X86MCInstLower &MCIL) {
1285   // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
1286   //                  <opcode>, <operands>
1287 
1288   NoAutoPaddingScope NoPadScope(*OutStreamer);
1289 
1290   Register DefRegister = FaultingMI.getOperand(0).getReg();
1291   FaultMaps::FaultKind FK =
1292       static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
1293   MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
1294   unsigned Opcode = FaultingMI.getOperand(3).getImm();
1295   unsigned OperandsBeginIdx = 4;
1296 
1297   auto &Ctx = OutStreamer->getContext();
1298   MCSymbol *FaultingLabel = Ctx.createTempSymbol();
1299   OutStreamer->emitLabel(FaultingLabel);
1300 
1301   assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
1302   FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
1303 
1304   MCInst MI;
1305   MI.setOpcode(Opcode);
1306 
1307   if (DefRegister != X86::NoRegister)
1308     MI.addOperand(MCOperand::createReg(DefRegister));
1309 
1310   for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
1311             E = FaultingMI.operands_end();
1312        I != E; ++I)
1313     if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
1314       MI.addOperand(MaybeOperand.getValue());
1315 
1316   OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
1317   OutStreamer->emitInstruction(MI, getSubtargetInfo());
1318 }
1319 
1320 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
1321                                      X86MCInstLower &MCIL) {
1322   bool Is64Bits = Subtarget->is64Bit();
1323   MCContext &Ctx = OutStreamer->getContext();
1324   MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
1325   const MCSymbolRefExpr *Op =
1326       MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
1327 
1328   EmitAndCountInstruction(
1329       MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
1330           .addExpr(Op));
1331 }
1332 
1333 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
1334   // FIXME: Make this work on non-ELF.
1335   if (!TM.getTargetTriple().isOSBinFormatELF()) {
1336     report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
1337     return;
1338   }
1339 
1340   const auto &Reg = MI.getOperand(0).getReg();
1341   ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
1342 
1343   uint64_t ShadowBase;
1344   int MappingScale;
1345   bool OrShadowOffset;
1346   getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64,
1347                             AccessInfo.CompileKernel, &ShadowBase,
1348                             &MappingScale, &OrShadowOffset);
1349 
1350   std::string Name = AccessInfo.IsWrite ? "store" : "load";
1351   std::string Op = OrShadowOffset ? "or" : "add";
1352   std::string SymName = "__asan_check_" + Name + "_" + Op + "_" +
1353                         utostr(1ULL << AccessInfo.AccessSizeIndex) + "_" +
1354                         TM.getMCRegisterInfo()->getName(Reg.asMCReg());
1355   if (OrShadowOffset)
1356     report_fatal_error(
1357         "OrShadowOffset is not supported with optimized callbacks");
1358 
1359   EmitAndCountInstruction(
1360       MCInstBuilder(X86::CALL64pcrel32)
1361           .addExpr(MCSymbolRefExpr::create(
1362               OutContext.getOrCreateSymbol(SymName), OutContext)));
1363 }
1364 
1365 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
1366                                       X86MCInstLower &MCIL) {
1367   // PATCHABLE_OP minsize, opcode, operands
1368 
1369   NoAutoPaddingScope NoPadScope(*OutStreamer);
1370 
1371   unsigned MinSize = MI.getOperand(0).getImm();
1372   unsigned Opcode = MI.getOperand(1).getImm();
1373 
1374   MCInst MCI;
1375   MCI.setOpcode(Opcode);
1376   for (auto &MO : drop_begin(MI.operands(), 2))
1377     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1378       MCI.addOperand(MaybeOperand.getValue());
1379 
1380   SmallString<256> Code;
1381   SmallVector<MCFixup, 4> Fixups;
1382   raw_svector_ostream VecOS(Code);
1383   CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1384 
1385   if (Code.size() < MinSize) {
1386     if (MinSize == 2 && Subtarget->is32Bit() &&
1387         Subtarget->isTargetWindowsMSVC() &&
1388         (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
1389       // For compatibilty reasons, when targetting MSVC, is is important to
1390       // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
1391       // rely specifically on this pattern to be able to patch a function.
1392       // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
1393       OutStreamer->emitInstruction(
1394           MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
1395           *Subtarget);
1396     } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
1397       // This is an optimization that lets us get away without emitting a nop in
1398       // many cases.
1399       //
1400       // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1401       // bytes too, so the check on MinSize is important.
1402       MCI.setOpcode(X86::PUSH64rmr);
1403     } else {
1404       unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
1405       assert(NopSize == MinSize && "Could not implement MinSize!");
1406       (void)NopSize;
1407     }
1408   }
1409 
1410   OutStreamer->emitInstruction(MCI, getSubtargetInfo());
1411 }
1412 
1413 // Lower a stackmap of the form:
1414 // <id>, <shadowBytes>, ...
1415 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1416   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1417 
1418   auto &Ctx = OutStreamer->getContext();
1419   MCSymbol *MILabel = Ctx.createTempSymbol();
1420   OutStreamer->emitLabel(MILabel);
1421 
1422   SM.recordStackMap(*MILabel, MI);
1423   unsigned NumShadowBytes = MI.getOperand(1).getImm();
1424   SMShadowTracker.reset(NumShadowBytes);
1425 }
1426 
1427 // Lower a patchpoint of the form:
1428 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1429 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1430                                     X86MCInstLower &MCIL) {
1431   assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1432 
1433   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1434 
1435   NoAutoPaddingScope NoPadScope(*OutStreamer);
1436 
1437   auto &Ctx = OutStreamer->getContext();
1438   MCSymbol *MILabel = Ctx.createTempSymbol();
1439   OutStreamer->emitLabel(MILabel);
1440   SM.recordPatchPoint(*MILabel, MI);
1441 
1442   PatchPointOpers opers(&MI);
1443   unsigned ScratchIdx = opers.getNextScratchIdx();
1444   unsigned EncodedBytes = 0;
1445   const MachineOperand &CalleeMO = opers.getCallTarget();
1446 
1447   // Check for null target. If target is non-null (i.e. is non-zero or is
1448   // symbolic) then emit a call.
1449   if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1450     MCOperand CalleeMCOp;
1451     switch (CalleeMO.getType()) {
1452     default:
1453       /// FIXME: Add a verifier check for bad callee types.
1454       llvm_unreachable("Unrecognized callee operand type.");
1455     case MachineOperand::MO_Immediate:
1456       if (CalleeMO.getImm())
1457         CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1458       break;
1459     case MachineOperand::MO_ExternalSymbol:
1460     case MachineOperand::MO_GlobalAddress:
1461       CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1462                                            MCIL.GetSymbolFromOperand(CalleeMO));
1463       break;
1464     }
1465 
1466     // Emit MOV to materialize the target address and the CALL to target.
1467     // This is encoded with 12-13 bytes, depending on which register is used.
1468     Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1469     if (X86II::isX86_64ExtendedReg(ScratchReg))
1470       EncodedBytes = 13;
1471     else
1472       EncodedBytes = 12;
1473 
1474     EmitAndCountInstruction(
1475         MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1476     // FIXME: Add retpoline support and remove this.
1477     if (Subtarget->useIndirectThunkCalls())
1478       report_fatal_error(
1479           "Lowering patchpoint with thunks not yet implemented.");
1480     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1481   }
1482 
1483   // Emit padding.
1484   unsigned NumBytes = opers.getNumPatchBytes();
1485   assert(NumBytes >= EncodedBytes &&
1486          "Patchpoint can't request size less than the length of a call.");
1487 
1488   emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1489 }
1490 
1491 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1492                                               X86MCInstLower &MCIL) {
1493   assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1494 
1495   NoAutoPaddingScope NoPadScope(*OutStreamer);
1496 
1497   // We want to emit the following pattern, which follows the x86 calling
1498   // convention to prepare for the trampoline call to be patched in.
1499   //
1500   //   .p2align 1, ...
1501   // .Lxray_event_sled_N:
1502   //   jmp +N                        // jump across the instrumentation sled
1503   //   ...                           // set up arguments in register
1504   //   callq __xray_CustomEvent@plt  // force dependency to symbol
1505   //   ...
1506   //   <jump here>
1507   //
1508   // After patching, it would look something like:
1509   //
1510   //   nopw (2-byte nop)
1511   //   ...
1512   //   callq __xrayCustomEvent  // already lowered
1513   //   ...
1514   //
1515   // ---
1516   // First we emit the label and the jump.
1517   auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1518   OutStreamer->AddComment("# XRay Custom Event Log");
1519   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1520   OutStreamer->emitLabel(CurSled);
1521 
1522   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1523   // an operand (computed as an offset from the jmp instruction).
1524   // FIXME: Find another less hacky way do force the relative jump.
1525   OutStreamer->emitBinaryData("\xeb\x0f");
1526 
1527   // The default C calling convention will place two arguments into %rcx and
1528   // %rdx -- so we only work with those.
1529   const Register DestRegs[] = {X86::RDI, X86::RSI};
1530   bool UsedMask[] = {false, false};
1531   // Filled out in loop.
1532   Register SrcRegs[] = {0, 0};
1533 
1534   // Then we put the operands in the %rdi and %rsi registers. We spill the
1535   // values in the register before we clobber them, and mark them as used in
1536   // UsedMask. In case the arguments are already in the correct register, we use
1537   // emit nops appropriately sized to keep the sled the same size in every
1538   // situation.
1539   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1540     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1541       assert(Op->isReg() && "Only support arguments in registers");
1542       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1543       if (SrcRegs[I] != DestRegs[I]) {
1544         UsedMask[I] = true;
1545         EmitAndCountInstruction(
1546             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1547       } else {
1548         emitX86Nops(*OutStreamer, 4, Subtarget);
1549       }
1550     }
1551 
1552   // Now that the register values are stashed, mov arguments into place.
1553   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1554   // earlier DestReg. We will have already overwritten over the register before
1555   // we can copy from it.
1556   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1557     if (SrcRegs[I] != DestRegs[I])
1558       EmitAndCountInstruction(
1559           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1560 
1561   // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1562   // name of the trampoline to be implemented by the XRay runtime.
1563   auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1564   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1565   if (isPositionIndependent())
1566     TOp.setTargetFlags(X86II::MO_PLT);
1567 
1568   // Emit the call instruction.
1569   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1570                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1571 
1572   // Restore caller-saved and used registers.
1573   for (unsigned I = sizeof UsedMask; I-- > 0;)
1574     if (UsedMask[I])
1575       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1576     else
1577       emitX86Nops(*OutStreamer, 1, Subtarget);
1578 
1579   OutStreamer->AddComment("xray custom event end.");
1580 
1581   // Record the sled version. Version 0 of this sled was spelled differently, so
1582   // we let the runtime handle the different offsets we're using. Version 2
1583   // changed the absolute address to a PC-relative address.
1584   recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1585 }
1586 
1587 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1588                                                     X86MCInstLower &MCIL) {
1589   assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1590 
1591   NoAutoPaddingScope NoPadScope(*OutStreamer);
1592 
1593   // We want to emit the following pattern, which follows the x86 calling
1594   // convention to prepare for the trampoline call to be patched in.
1595   //
1596   //   .p2align 1, ...
1597   // .Lxray_event_sled_N:
1598   //   jmp +N                        // jump across the instrumentation sled
1599   //   ...                           // set up arguments in register
1600   //   callq __xray_TypedEvent@plt  // force dependency to symbol
1601   //   ...
1602   //   <jump here>
1603   //
1604   // After patching, it would look something like:
1605   //
1606   //   nopw (2-byte nop)
1607   //   ...
1608   //   callq __xrayTypedEvent  // already lowered
1609   //   ...
1610   //
1611   // ---
1612   // First we emit the label and the jump.
1613   auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1614   OutStreamer->AddComment("# XRay Typed Event Log");
1615   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1616   OutStreamer->emitLabel(CurSled);
1617 
1618   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1619   // an operand (computed as an offset from the jmp instruction).
1620   // FIXME: Find another less hacky way do force the relative jump.
1621   OutStreamer->emitBinaryData("\xeb\x14");
1622 
1623   // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1624   // so we'll work with those. Or we may be called via SystemV, in which case
1625   // we don't have to do any translation.
1626   const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1627   bool UsedMask[] = {false, false, false};
1628 
1629   // Will fill out src regs in the loop.
1630   Register SrcRegs[] = {0, 0, 0};
1631 
1632   // Then we put the operands in the SystemV registers. We spill the values in
1633   // the registers before we clobber them, and mark them as used in UsedMask.
1634   // In case the arguments are already in the correct register, we emit nops
1635   // appropriately sized to keep the sled the same size in every situation.
1636   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1637     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1638       // TODO: Is register only support adequate?
1639       assert(Op->isReg() && "Only supports arguments in registers");
1640       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1641       if (SrcRegs[I] != DestRegs[I]) {
1642         UsedMask[I] = true;
1643         EmitAndCountInstruction(
1644             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1645       } else {
1646         emitX86Nops(*OutStreamer, 4, Subtarget);
1647       }
1648     }
1649 
1650   // In the above loop we only stash all of the destination registers or emit
1651   // nops if the arguments are already in the right place. Doing the actually
1652   // moving is postponed until after all the registers are stashed so nothing
1653   // is clobbers. We've already added nops to account for the size of mov and
1654   // push if the register is in the right place, so we only have to worry about
1655   // emitting movs.
1656   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1657   // earlier DestReg. We will have already overwritten over the register before
1658   // we can copy from it.
1659   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1660     if (UsedMask[I])
1661       EmitAndCountInstruction(
1662           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1663 
1664   // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1665   // name of the trampoline to be implemented by the XRay runtime.
1666   auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1667   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1668   if (isPositionIndependent())
1669     TOp.setTargetFlags(X86II::MO_PLT);
1670 
1671   // Emit the call instruction.
1672   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1673                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1674 
1675   // Restore caller-saved and used registers.
1676   for (unsigned I = sizeof UsedMask; I-- > 0;)
1677     if (UsedMask[I])
1678       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1679     else
1680       emitX86Nops(*OutStreamer, 1, Subtarget);
1681 
1682   OutStreamer->AddComment("xray typed event end.");
1683 
1684   // Record the sled version.
1685   recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1686 }
1687 
1688 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1689                                                   X86MCInstLower &MCIL) {
1690 
1691   NoAutoPaddingScope NoPadScope(*OutStreamer);
1692 
1693   const Function &F = MF->getFunction();
1694   if (F.hasFnAttribute("patchable-function-entry")) {
1695     unsigned Num;
1696     if (F.getFnAttribute("patchable-function-entry")
1697             .getValueAsString()
1698             .getAsInteger(10, Num))
1699       return;
1700     emitX86Nops(*OutStreamer, Num, Subtarget);
1701     return;
1702   }
1703   // We want to emit the following pattern:
1704   //
1705   //   .p2align 1, ...
1706   // .Lxray_sled_N:
1707   //   jmp .tmpN
1708   //   # 9 bytes worth of noops
1709   //
1710   // We need the 9 bytes because at runtime, we'd be patching over the full 11
1711   // bytes with the following pattern:
1712   //
1713   //   mov %r10, <function id, 32-bit>   // 6 bytes
1714   //   call <relative offset, 32-bits>   // 5 bytes
1715   //
1716   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1717   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1718   OutStreamer->emitLabel(CurSled);
1719 
1720   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1721   // an operand (computed as an offset from the jmp instruction).
1722   // FIXME: Find another less hacky way do force the relative jump.
1723   OutStreamer->emitBytes("\xeb\x09");
1724   emitX86Nops(*OutStreamer, 9, Subtarget);
1725   recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
1726 }
1727 
1728 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1729                                        X86MCInstLower &MCIL) {
1730   NoAutoPaddingScope NoPadScope(*OutStreamer);
1731 
1732   // Since PATCHABLE_RET takes the opcode of the return statement as an
1733   // argument, we use that to emit the correct form of the RET that we want.
1734   // i.e. when we see this:
1735   //
1736   //   PATCHABLE_RET X86::RET ...
1737   //
1738   // We should emit the RET followed by sleds.
1739   //
1740   //   .p2align 1, ...
1741   // .Lxray_sled_N:
1742   //   ret  # or equivalent instruction
1743   //   # 10 bytes worth of noops
1744   //
1745   // This just makes sure that the alignment for the next instruction is 2.
1746   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1747   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1748   OutStreamer->emitLabel(CurSled);
1749   unsigned OpCode = MI.getOperand(0).getImm();
1750   MCInst Ret;
1751   Ret.setOpcode(OpCode);
1752   for (auto &MO : drop_begin(MI.operands()))
1753     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1754       Ret.addOperand(MaybeOperand.getValue());
1755   OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1756   emitX86Nops(*OutStreamer, 10, Subtarget);
1757   recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
1758 }
1759 
1760 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1761                                              X86MCInstLower &MCIL) {
1762   NoAutoPaddingScope NoPadScope(*OutStreamer);
1763 
1764   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1765   // instruction so we lower that particular instruction and its operands.
1766   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1767   // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1768   // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1769   // tail call much like how we have it in PATCHABLE_RET.
1770   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1771   OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
1772   OutStreamer->emitLabel(CurSled);
1773   auto Target = OutContext.createTempSymbol();
1774 
1775   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1776   // an operand (computed as an offset from the jmp instruction).
1777   // FIXME: Find another less hacky way do force the relative jump.
1778   OutStreamer->emitBytes("\xeb\x09");
1779   emitX86Nops(*OutStreamer, 9, Subtarget);
1780   OutStreamer->emitLabel(Target);
1781   recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1782 
1783   unsigned OpCode = MI.getOperand(0).getImm();
1784   OpCode = convertTailJumpOpcode(OpCode);
1785   MCInst TC;
1786   TC.setOpcode(OpCode);
1787 
1788   // Before emitting the instruction, add a comment to indicate that this is
1789   // indeed a tail call.
1790   OutStreamer->AddComment("TAILCALL");
1791   for (auto &MO : drop_begin(MI.operands()))
1792     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1793       TC.addOperand(MaybeOperand.getValue());
1794   OutStreamer->emitInstruction(TC, getSubtargetInfo());
1795 }
1796 
1797 // Returns instruction preceding MBBI in MachineFunction.
1798 // If MBBI is the first instruction of the first basic block, returns null.
1799 static MachineBasicBlock::const_iterator
1800 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1801   const MachineBasicBlock *MBB = MBBI->getParent();
1802   while (MBBI == MBB->begin()) {
1803     if (MBB == &MBB->getParent()->front())
1804       return MachineBasicBlock::const_iterator();
1805     MBB = MBB->getPrevNode();
1806     MBBI = MBB->end();
1807   }
1808   --MBBI;
1809   return MBBI;
1810 }
1811 
1812 static const Constant *getConstantFromPool(const MachineInstr &MI,
1813                                            const MachineOperand &Op) {
1814   if (!Op.isCPI() || Op.getOffset() != 0)
1815     return nullptr;
1816 
1817   ArrayRef<MachineConstantPoolEntry> Constants =
1818       MI.getParent()->getParent()->getConstantPool()->getConstants();
1819   const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1820 
1821   // Bail if this is a machine constant pool entry, we won't be able to dig out
1822   // anything useful.
1823   if (ConstantEntry.isMachineConstantPoolEntry())
1824     return nullptr;
1825 
1826   return ConstantEntry.Val.ConstVal;
1827 }
1828 
1829 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1830                                      unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1831   std::string Comment;
1832 
1833   // Compute the name for a register. This is really goofy because we have
1834   // multiple instruction printers that could (in theory) use different
1835   // names. Fortunately most people use the ATT style (outside of Windows)
1836   // and they actually agree on register naming here. Ultimately, this is
1837   // a comment, and so its OK if it isn't perfect.
1838   auto GetRegisterName = [](unsigned RegNum) -> StringRef {
1839     return X86ATTInstPrinter::getRegisterName(RegNum);
1840   };
1841 
1842   const MachineOperand &DstOp = MI->getOperand(0);
1843   const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1844   const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1845 
1846   StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1847   StringRef Src1Name =
1848       SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1849   StringRef Src2Name =
1850       SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1851 
1852   // One source operand, fix the mask to print all elements in one span.
1853   SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
1854   if (Src1Name == Src2Name)
1855     for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1856       if (ShuffleMask[i] >= e)
1857         ShuffleMask[i] -= e;
1858 
1859   raw_string_ostream CS(Comment);
1860   CS << DstName;
1861 
1862   // Handle AVX512 MASK/MASXZ write mask comments.
1863   // MASK: zmmX {%kY}
1864   // MASKZ: zmmX {%kY} {z}
1865   if (SrcOp1Idx > 1) {
1866     assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1867 
1868     const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1869     if (WriteMaskOp.isReg()) {
1870       CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1871 
1872       if (SrcOp1Idx == 2) {
1873         CS << " {z}";
1874       }
1875     }
1876   }
1877 
1878   CS << " = ";
1879 
1880   for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1881     if (i != 0)
1882       CS << ",";
1883     if (ShuffleMask[i] == SM_SentinelZero) {
1884       CS << "zero";
1885       continue;
1886     }
1887 
1888     // Otherwise, it must come from src1 or src2.  Print the span of elements
1889     // that comes from this src.
1890     bool isSrc1 = ShuffleMask[i] < (int)e;
1891     CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1892 
1893     bool IsFirst = true;
1894     while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1895            (ShuffleMask[i] < (int)e) == isSrc1) {
1896       if (!IsFirst)
1897         CS << ',';
1898       else
1899         IsFirst = false;
1900       if (ShuffleMask[i] == SM_SentinelUndef)
1901         CS << "u";
1902       else
1903         CS << ShuffleMask[i] % (int)e;
1904       ++i;
1905     }
1906     CS << ']';
1907     --i; // For loop increments element #.
1908   }
1909   CS.flush();
1910 
1911   return Comment;
1912 }
1913 
1914 static void printConstant(const APInt &Val, raw_ostream &CS) {
1915   if (Val.getBitWidth() <= 64) {
1916     CS << Val.getZExtValue();
1917   } else {
1918     // print multi-word constant as (w0,w1)
1919     CS << "(";
1920     for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1921       if (i > 0)
1922         CS << ",";
1923       CS << Val.getRawData()[i];
1924     }
1925     CS << ")";
1926   }
1927 }
1928 
1929 static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1930   SmallString<32> Str;
1931   // Force scientific notation to distinquish from integers.
1932   Flt.toString(Str, 0, 0);
1933   CS << Str;
1934 }
1935 
1936 static void printConstant(const Constant *COp, raw_ostream &CS) {
1937   if (isa<UndefValue>(COp)) {
1938     CS << "u";
1939   } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1940     printConstant(CI->getValue(), CS);
1941   } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1942     printConstant(CF->getValueAPF(), CS);
1943   } else {
1944     CS << "?";
1945   }
1946 }
1947 
1948 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1949   assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1950   assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
1951 
1952   // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1953   if (EmitFPOData) {
1954     X86TargetStreamer *XTS =
1955         static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1956     switch (MI->getOpcode()) {
1957     case X86::SEH_PushReg:
1958       XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1959       break;
1960     case X86::SEH_StackAlloc:
1961       XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1962       break;
1963     case X86::SEH_StackAlign:
1964       XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1965       break;
1966     case X86::SEH_SetFrame:
1967       assert(MI->getOperand(1).getImm() == 0 &&
1968              ".cv_fpo_setframe takes no offset");
1969       XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1970       break;
1971     case X86::SEH_EndPrologue:
1972       XTS->emitFPOEndPrologue();
1973       break;
1974     case X86::SEH_SaveReg:
1975     case X86::SEH_SaveXMM:
1976     case X86::SEH_PushFrame:
1977       llvm_unreachable("SEH_ directive incompatible with FPO");
1978       break;
1979     default:
1980       llvm_unreachable("expected SEH_ instruction");
1981     }
1982     return;
1983   }
1984 
1985   // Otherwise, use the .seh_ directives for all other Windows platforms.
1986   switch (MI->getOpcode()) {
1987   case X86::SEH_PushReg:
1988     OutStreamer->EmitWinCFIPushReg(MI->getOperand(0).getImm());
1989     break;
1990 
1991   case X86::SEH_SaveReg:
1992     OutStreamer->EmitWinCFISaveReg(MI->getOperand(0).getImm(),
1993                                    MI->getOperand(1).getImm());
1994     break;
1995 
1996   case X86::SEH_SaveXMM:
1997     OutStreamer->EmitWinCFISaveXMM(MI->getOperand(0).getImm(),
1998                                    MI->getOperand(1).getImm());
1999     break;
2000 
2001   case X86::SEH_StackAlloc:
2002     OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
2003     break;
2004 
2005   case X86::SEH_SetFrame:
2006     OutStreamer->EmitWinCFISetFrame(MI->getOperand(0).getImm(),
2007                                     MI->getOperand(1).getImm());
2008     break;
2009 
2010   case X86::SEH_PushFrame:
2011     OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
2012     break;
2013 
2014   case X86::SEH_EndPrologue:
2015     OutStreamer->EmitWinCFIEndProlog();
2016     break;
2017 
2018   default:
2019     llvm_unreachable("expected SEH_ instruction");
2020   }
2021 }
2022 
2023 static unsigned getRegisterWidth(const MCOperandInfo &Info) {
2024   if (Info.RegClass == X86::VR128RegClassID ||
2025       Info.RegClass == X86::VR128XRegClassID)
2026     return 128;
2027   if (Info.RegClass == X86::VR256RegClassID ||
2028       Info.RegClass == X86::VR256XRegClassID)
2029     return 256;
2030   if (Info.RegClass == X86::VR512RegClassID)
2031     return 512;
2032   llvm_unreachable("Unknown register class!");
2033 }
2034 
2035 static void addConstantComments(const MachineInstr *MI,
2036                                 MCStreamer &OutStreamer) {
2037   switch (MI->getOpcode()) {
2038   // Lower PSHUFB and VPERMILP normally but add a comment if we can find
2039   // a constant shuffle mask. We won't be able to do this at the MC layer
2040   // because the mask isn't an immediate.
2041   case X86::PSHUFBrm:
2042   case X86::VPSHUFBrm:
2043   case X86::VPSHUFBYrm:
2044   case X86::VPSHUFBZ128rm:
2045   case X86::VPSHUFBZ128rmk:
2046   case X86::VPSHUFBZ128rmkz:
2047   case X86::VPSHUFBZ256rm:
2048   case X86::VPSHUFBZ256rmk:
2049   case X86::VPSHUFBZ256rmkz:
2050   case X86::VPSHUFBZrm:
2051   case X86::VPSHUFBZrmk:
2052   case X86::VPSHUFBZrmkz: {
2053     unsigned SrcIdx = 1;
2054     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2055       // Skip mask operand.
2056       ++SrcIdx;
2057       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2058         // Skip passthru operand.
2059         ++SrcIdx;
2060       }
2061     }
2062     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2063 
2064     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2065            "Unexpected number of operands!");
2066 
2067     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2068     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2069       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2070       SmallVector<int, 64> Mask;
2071       DecodePSHUFBMask(C, Width, Mask);
2072       if (!Mask.empty())
2073         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2074     }
2075     break;
2076   }
2077 
2078   case X86::VPERMILPSrm:
2079   case X86::VPERMILPSYrm:
2080   case X86::VPERMILPSZ128rm:
2081   case X86::VPERMILPSZ128rmk:
2082   case X86::VPERMILPSZ128rmkz:
2083   case X86::VPERMILPSZ256rm:
2084   case X86::VPERMILPSZ256rmk:
2085   case X86::VPERMILPSZ256rmkz:
2086   case X86::VPERMILPSZrm:
2087   case X86::VPERMILPSZrmk:
2088   case X86::VPERMILPSZrmkz:
2089   case X86::VPERMILPDrm:
2090   case X86::VPERMILPDYrm:
2091   case X86::VPERMILPDZ128rm:
2092   case X86::VPERMILPDZ128rmk:
2093   case X86::VPERMILPDZ128rmkz:
2094   case X86::VPERMILPDZ256rm:
2095   case X86::VPERMILPDZ256rmk:
2096   case X86::VPERMILPDZ256rmkz:
2097   case X86::VPERMILPDZrm:
2098   case X86::VPERMILPDZrmk:
2099   case X86::VPERMILPDZrmkz: {
2100     unsigned ElSize;
2101     switch (MI->getOpcode()) {
2102     default: llvm_unreachable("Invalid opcode");
2103     case X86::VPERMILPSrm:
2104     case X86::VPERMILPSYrm:
2105     case X86::VPERMILPSZ128rm:
2106     case X86::VPERMILPSZ256rm:
2107     case X86::VPERMILPSZrm:
2108     case X86::VPERMILPSZ128rmkz:
2109     case X86::VPERMILPSZ256rmkz:
2110     case X86::VPERMILPSZrmkz:
2111     case X86::VPERMILPSZ128rmk:
2112     case X86::VPERMILPSZ256rmk:
2113     case X86::VPERMILPSZrmk:
2114       ElSize = 32;
2115       break;
2116     case X86::VPERMILPDrm:
2117     case X86::VPERMILPDYrm:
2118     case X86::VPERMILPDZ128rm:
2119     case X86::VPERMILPDZ256rm:
2120     case X86::VPERMILPDZrm:
2121     case X86::VPERMILPDZ128rmkz:
2122     case X86::VPERMILPDZ256rmkz:
2123     case X86::VPERMILPDZrmkz:
2124     case X86::VPERMILPDZ128rmk:
2125     case X86::VPERMILPDZ256rmk:
2126     case X86::VPERMILPDZrmk:
2127       ElSize = 64;
2128       break;
2129     }
2130 
2131     unsigned SrcIdx = 1;
2132     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
2133       // Skip mask operand.
2134       ++SrcIdx;
2135       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
2136         // Skip passthru operand.
2137         ++SrcIdx;
2138       }
2139     }
2140     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
2141 
2142     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
2143            "Unexpected number of operands!");
2144 
2145     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
2146     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2147       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2148       SmallVector<int, 16> Mask;
2149       DecodeVPERMILPMask(C, ElSize, Width, Mask);
2150       if (!Mask.empty())
2151         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
2152     }
2153     break;
2154   }
2155 
2156   case X86::VPERMIL2PDrm:
2157   case X86::VPERMIL2PSrm:
2158   case X86::VPERMIL2PDYrm:
2159   case X86::VPERMIL2PSYrm: {
2160     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
2161            "Unexpected number of operands!");
2162 
2163     const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
2164     if (!CtrlOp.isImm())
2165       break;
2166 
2167     unsigned ElSize;
2168     switch (MI->getOpcode()) {
2169     default: llvm_unreachable("Invalid opcode");
2170     case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
2171     case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
2172     }
2173 
2174     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2175     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2176       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2177       SmallVector<int, 16> Mask;
2178       DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
2179       if (!Mask.empty())
2180         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2181     }
2182     break;
2183   }
2184 
2185   case X86::VPPERMrrm: {
2186     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
2187            "Unexpected number of operands!");
2188 
2189     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
2190     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
2191       unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
2192       SmallVector<int, 16> Mask;
2193       DecodeVPPERMMask(C, Width, Mask);
2194       if (!Mask.empty())
2195         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
2196     }
2197     break;
2198   }
2199 
2200   case X86::MMX_MOVQ64rm: {
2201     assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
2202            "Unexpected number of operands!");
2203     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2204       std::string Comment;
2205       raw_string_ostream CS(Comment);
2206       const MachineOperand &DstOp = MI->getOperand(0);
2207       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2208       if (auto *CF = dyn_cast<ConstantFP>(C)) {
2209         CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false);
2210         OutStreamer.AddComment(CS.str());
2211       }
2212     }
2213     break;
2214   }
2215 
2216 #define MOV_CASE(Prefix, Suffix)                                               \
2217   case X86::Prefix##MOVAPD##Suffix##rm:                                        \
2218   case X86::Prefix##MOVAPS##Suffix##rm:                                        \
2219   case X86::Prefix##MOVUPD##Suffix##rm:                                        \
2220   case X86::Prefix##MOVUPS##Suffix##rm:                                        \
2221   case X86::Prefix##MOVDQA##Suffix##rm:                                        \
2222   case X86::Prefix##MOVDQU##Suffix##rm:
2223 
2224 #define MOV_AVX512_CASE(Suffix)                                                \
2225   case X86::VMOVDQA64##Suffix##rm:                                             \
2226   case X86::VMOVDQA32##Suffix##rm:                                             \
2227   case X86::VMOVDQU64##Suffix##rm:                                             \
2228   case X86::VMOVDQU32##Suffix##rm:                                             \
2229   case X86::VMOVDQU16##Suffix##rm:                                             \
2230   case X86::VMOVDQU8##Suffix##rm:                                              \
2231   case X86::VMOVAPS##Suffix##rm:                                               \
2232   case X86::VMOVAPD##Suffix##rm:                                               \
2233   case X86::VMOVUPS##Suffix##rm:                                               \
2234   case X86::VMOVUPD##Suffix##rm:
2235 
2236 #define CASE_ALL_MOV_RM()                                                      \
2237   MOV_CASE(, )   /* SSE */                                                     \
2238   MOV_CASE(V, )  /* AVX-128 */                                                 \
2239   MOV_CASE(V, Y) /* AVX-256 */                                                 \
2240   MOV_AVX512_CASE(Z)                                                           \
2241   MOV_AVX512_CASE(Z256)                                                        \
2242   MOV_AVX512_CASE(Z128)
2243 
2244     // For loads from a constant pool to a vector register, print the constant
2245     // loaded.
2246     CASE_ALL_MOV_RM()
2247   case X86::VBROADCASTF128:
2248   case X86::VBROADCASTI128:
2249   case X86::VBROADCASTF32X4Z256rm:
2250   case X86::VBROADCASTF32X4rm:
2251   case X86::VBROADCASTF32X8rm:
2252   case X86::VBROADCASTF64X2Z128rm:
2253   case X86::VBROADCASTF64X2rm:
2254   case X86::VBROADCASTF64X4rm:
2255   case X86::VBROADCASTI32X4Z256rm:
2256   case X86::VBROADCASTI32X4rm:
2257   case X86::VBROADCASTI32X8rm:
2258   case X86::VBROADCASTI64X2Z128rm:
2259   case X86::VBROADCASTI64X2rm:
2260   case X86::VBROADCASTI64X4rm:
2261     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2262            "Unexpected number of operands!");
2263     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2264       int NumLanes = 1;
2265       // Override NumLanes for the broadcast instructions.
2266       switch (MI->getOpcode()) {
2267       case X86::VBROADCASTF128:        NumLanes = 2; break;
2268       case X86::VBROADCASTI128:        NumLanes = 2; break;
2269       case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
2270       case X86::VBROADCASTF32X4rm:     NumLanes = 4; break;
2271       case X86::VBROADCASTF32X8rm:     NumLanes = 2; break;
2272       case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
2273       case X86::VBROADCASTF64X2rm:     NumLanes = 4; break;
2274       case X86::VBROADCASTF64X4rm:     NumLanes = 2; break;
2275       case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
2276       case X86::VBROADCASTI32X4rm:     NumLanes = 4; break;
2277       case X86::VBROADCASTI32X8rm:     NumLanes = 2; break;
2278       case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
2279       case X86::VBROADCASTI64X2rm:     NumLanes = 4; break;
2280       case X86::VBROADCASTI64X4rm:     NumLanes = 2; break;
2281       }
2282 
2283       std::string Comment;
2284       raw_string_ostream CS(Comment);
2285       const MachineOperand &DstOp = MI->getOperand(0);
2286       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2287       if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2288         CS << "[";
2289         for (int l = 0; l != NumLanes; ++l) {
2290           for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2291                ++i) {
2292             if (i != 0 || l != 0)
2293               CS << ",";
2294             if (CDS->getElementType()->isIntegerTy())
2295               printConstant(CDS->getElementAsAPInt(i), CS);
2296             else if (CDS->getElementType()->isHalfTy() ||
2297                      CDS->getElementType()->isFloatTy() ||
2298                      CDS->getElementType()->isDoubleTy())
2299               printConstant(CDS->getElementAsAPFloat(i), CS);
2300             else
2301               CS << "?";
2302           }
2303         }
2304         CS << "]";
2305         OutStreamer.AddComment(CS.str());
2306       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
2307         CS << "<";
2308         for (int l = 0; l != NumLanes; ++l) {
2309           for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2310                ++i) {
2311             if (i != 0 || l != 0)
2312               CS << ",";
2313             printConstant(CV->getOperand(i), CS);
2314           }
2315         }
2316         CS << ">";
2317         OutStreamer.AddComment(CS.str());
2318       }
2319     }
2320     break;
2321 
2322   case X86::MOVDDUPrm:
2323   case X86::VMOVDDUPrm:
2324   case X86::VMOVDDUPZ128rm:
2325   case X86::VBROADCASTSSrm:
2326   case X86::VBROADCASTSSYrm:
2327   case X86::VBROADCASTSSZ128rm:
2328   case X86::VBROADCASTSSZ256rm:
2329   case X86::VBROADCASTSSZrm:
2330   case X86::VBROADCASTSDYrm:
2331   case X86::VBROADCASTSDZ256rm:
2332   case X86::VBROADCASTSDZrm:
2333   case X86::VPBROADCASTBrm:
2334   case X86::VPBROADCASTBYrm:
2335   case X86::VPBROADCASTBZ128rm:
2336   case X86::VPBROADCASTBZ256rm:
2337   case X86::VPBROADCASTBZrm:
2338   case X86::VPBROADCASTDrm:
2339   case X86::VPBROADCASTDYrm:
2340   case X86::VPBROADCASTDZ128rm:
2341   case X86::VPBROADCASTDZ256rm:
2342   case X86::VPBROADCASTDZrm:
2343   case X86::VPBROADCASTQrm:
2344   case X86::VPBROADCASTQYrm:
2345   case X86::VPBROADCASTQZ128rm:
2346   case X86::VPBROADCASTQZ256rm:
2347   case X86::VPBROADCASTQZrm:
2348   case X86::VPBROADCASTWrm:
2349   case X86::VPBROADCASTWYrm:
2350   case X86::VPBROADCASTWZ128rm:
2351   case X86::VPBROADCASTWZ256rm:
2352   case X86::VPBROADCASTWZrm:
2353     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
2354            "Unexpected number of operands!");
2355     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
2356       int NumElts;
2357       switch (MI->getOpcode()) {
2358       default: llvm_unreachable("Invalid opcode");
2359       case X86::MOVDDUPrm:          NumElts = 2;  break;
2360       case X86::VMOVDDUPrm:         NumElts = 2;  break;
2361       case X86::VMOVDDUPZ128rm:     NumElts = 2;  break;
2362       case X86::VBROADCASTSSrm:     NumElts = 4;  break;
2363       case X86::VBROADCASTSSYrm:    NumElts = 8;  break;
2364       case X86::VBROADCASTSSZ128rm: NumElts = 4;  break;
2365       case X86::VBROADCASTSSZ256rm: NumElts = 8;  break;
2366       case X86::VBROADCASTSSZrm:    NumElts = 16; break;
2367       case X86::VBROADCASTSDYrm:    NumElts = 4;  break;
2368       case X86::VBROADCASTSDZ256rm: NumElts = 4;  break;
2369       case X86::VBROADCASTSDZrm:    NumElts = 8;  break;
2370       case X86::VPBROADCASTBrm:     NumElts = 16; break;
2371       case X86::VPBROADCASTBYrm:    NumElts = 32; break;
2372       case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
2373       case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
2374       case X86::VPBROADCASTBZrm:    NumElts = 64; break;
2375       case X86::VPBROADCASTDrm:     NumElts = 4;  break;
2376       case X86::VPBROADCASTDYrm:    NumElts = 8;  break;
2377       case X86::VPBROADCASTDZ128rm: NumElts = 4;  break;
2378       case X86::VPBROADCASTDZ256rm: NumElts = 8;  break;
2379       case X86::VPBROADCASTDZrm:    NumElts = 16; break;
2380       case X86::VPBROADCASTQrm:     NumElts = 2;  break;
2381       case X86::VPBROADCASTQYrm:    NumElts = 4;  break;
2382       case X86::VPBROADCASTQZ128rm: NumElts = 2;  break;
2383       case X86::VPBROADCASTQZ256rm: NumElts = 4;  break;
2384       case X86::VPBROADCASTQZrm:    NumElts = 8;  break;
2385       case X86::VPBROADCASTWrm:     NumElts = 8;  break;
2386       case X86::VPBROADCASTWYrm:    NumElts = 16; break;
2387       case X86::VPBROADCASTWZ128rm: NumElts = 8;  break;
2388       case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
2389       case X86::VPBROADCASTWZrm:    NumElts = 32; break;
2390       }
2391 
2392       std::string Comment;
2393       raw_string_ostream CS(Comment);
2394       const MachineOperand &DstOp = MI->getOperand(0);
2395       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2396       CS << "[";
2397       for (int i = 0; i != NumElts; ++i) {
2398         if (i != 0)
2399           CS << ",";
2400         printConstant(C, CS);
2401       }
2402       CS << "]";
2403       OutStreamer.AddComment(CS.str());
2404     }
2405   }
2406 }
2407 
2408 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2409   X86MCInstLower MCInstLowering(*MF, *this);
2410   const X86RegisterInfo *RI =
2411       MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2412 
2413   if (MI->getOpcode() == X86::OR64rm) {
2414     for (auto &Opd : MI->operands()) {
2415       if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) ==
2416                                 "swift_async_extendedFramePointerFlags") {
2417         ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
2418       }
2419     }
2420   }
2421 
2422   // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
2423   // are compressed from EVEX encoding to VEX encoding.
2424   if (TM.Options.MCOptions.ShowMCEncoding) {
2425     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2426       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2427   }
2428 
2429   // Add comments for values loaded from constant pool.
2430   if (OutStreamer->isVerboseAsm())
2431     addConstantComments(MI, *OutStreamer);
2432 
2433   switch (MI->getOpcode()) {
2434   case TargetOpcode::DBG_VALUE:
2435     llvm_unreachable("Should be handled target independently");
2436 
2437   // Emit nothing here but a comment if we can.
2438   case X86::Int_MemBarrier:
2439     OutStreamer->emitRawComment("MEMBARRIER");
2440     return;
2441 
2442   case X86::EH_RETURN:
2443   case X86::EH_RETURN64: {
2444     // Lower these as normal, but add some comments.
2445     Register Reg = MI->getOperand(0).getReg();
2446     OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2447                             X86ATTInstPrinter::getRegisterName(Reg));
2448     break;
2449   }
2450   case X86::CLEANUPRET: {
2451     // Lower these as normal, but add some comments.
2452     OutStreamer->AddComment("CLEANUPRET");
2453     break;
2454   }
2455 
2456   case X86::CATCHRET: {
2457     // Lower these as normal, but add some comments.
2458     OutStreamer->AddComment("CATCHRET");
2459     break;
2460   }
2461 
2462   case X86::ENDBR32:
2463   case X86::ENDBR64: {
2464     // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2465     // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2466     // non-empty. If MI is the initial ENDBR, place the
2467     // __patchable_function_entries label after ENDBR.
2468     if (CurrentPatchableFunctionEntrySym &&
2469         CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2470         MI == &MF->front().front()) {
2471       MCInst Inst;
2472       MCInstLowering.Lower(MI, Inst);
2473       EmitAndCountInstruction(Inst);
2474       CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
2475       OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
2476       return;
2477     }
2478     break;
2479   }
2480 
2481   case X86::TAILJMPr:
2482   case X86::TAILJMPm:
2483   case X86::TAILJMPd:
2484   case X86::TAILJMPd_CC:
2485   case X86::TAILJMPr64:
2486   case X86::TAILJMPm64:
2487   case X86::TAILJMPd64:
2488   case X86::TAILJMPd64_CC:
2489   case X86::TAILJMPr64_REX:
2490   case X86::TAILJMPm64_REX:
2491     // Lower these as normal, but add some comments.
2492     OutStreamer->AddComment("TAILCALL");
2493     break;
2494 
2495   case X86::TLS_addr32:
2496   case X86::TLS_addr64:
2497   case X86::TLS_addrX32:
2498   case X86::TLS_base_addr32:
2499   case X86::TLS_base_addr64:
2500   case X86::TLS_base_addrX32:
2501     return LowerTlsAddr(MCInstLowering, *MI);
2502 
2503   case X86::MOVPC32r: {
2504     // This is a pseudo op for a two instruction sequence with a label, which
2505     // looks like:
2506     //     call "L1$pb"
2507     // "L1$pb":
2508     //     popl %esi
2509 
2510     // Emit the call.
2511     MCSymbol *PICBase = MF->getPICBaseSymbol();
2512     // FIXME: We would like an efficient form for this, so we don't have to do a
2513     // lot of extra uniquing.
2514     EmitAndCountInstruction(
2515         MCInstBuilder(X86::CALLpcrel32)
2516             .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2517 
2518     const X86FrameLowering *FrameLowering =
2519         MF->getSubtarget<X86Subtarget>().getFrameLowering();
2520     bool hasFP = FrameLowering->hasFP(*MF);
2521 
2522     // TODO: This is needed only if we require precise CFA.
2523     bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2524                                !OutStreamer->getDwarfFrameInfos().back().End;
2525 
2526     int stackGrowth = -RI->getSlotSize();
2527 
2528     if (HasActiveDwarfFrame && !hasFP) {
2529       OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2530     }
2531 
2532     // Emit the label.
2533     OutStreamer->emitLabel(PICBase);
2534 
2535     // popl $reg
2536     EmitAndCountInstruction(
2537         MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2538 
2539     if (HasActiveDwarfFrame && !hasFP) {
2540       OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2541     }
2542     return;
2543   }
2544 
2545   case X86::ADD32ri: {
2546     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2547     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2548       break;
2549 
2550     // Okay, we have something like:
2551     //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2552 
2553     // For this, we want to print something like:
2554     //   MYGLOBAL + (. - PICBASE)
2555     // However, we can't generate a ".", so just emit a new label here and refer
2556     // to it.
2557     MCSymbol *DotSym = OutContext.createTempSymbol();
2558     OutStreamer->emitLabel(DotSym);
2559 
2560     // Now that we have emitted the label, lower the complex operand expression.
2561     MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2562 
2563     const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2564     const MCExpr *PICBase =
2565         MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2566     DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2567 
2568     DotExpr = MCBinaryExpr::createAdd(
2569         MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
2570 
2571     EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2572                                 .addReg(MI->getOperand(0).getReg())
2573                                 .addReg(MI->getOperand(1).getReg())
2574                                 .addExpr(DotExpr));
2575     return;
2576   }
2577   case TargetOpcode::STATEPOINT:
2578     return LowerSTATEPOINT(*MI, MCInstLowering);
2579 
2580   case TargetOpcode::FAULTING_OP:
2581     return LowerFAULTING_OP(*MI, MCInstLowering);
2582 
2583   case TargetOpcode::FENTRY_CALL:
2584     return LowerFENTRY_CALL(*MI, MCInstLowering);
2585 
2586   case TargetOpcode::PATCHABLE_OP:
2587     return LowerPATCHABLE_OP(*MI, MCInstLowering);
2588 
2589   case TargetOpcode::STACKMAP:
2590     return LowerSTACKMAP(*MI);
2591 
2592   case TargetOpcode::PATCHPOINT:
2593     return LowerPATCHPOINT(*MI, MCInstLowering);
2594 
2595   case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2596     return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2597 
2598   case TargetOpcode::PATCHABLE_RET:
2599     return LowerPATCHABLE_RET(*MI, MCInstLowering);
2600 
2601   case TargetOpcode::PATCHABLE_TAIL_CALL:
2602     return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2603 
2604   case TargetOpcode::PATCHABLE_EVENT_CALL:
2605     return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2606 
2607   case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2608     return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2609 
2610   case X86::MORESTACK_RET:
2611     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2612     return;
2613 
2614   case X86::ASAN_CHECK_MEMACCESS:
2615     return LowerASAN_CHECK_MEMACCESS(*MI);
2616 
2617   case X86::MORESTACK_RET_RESTORE_R10:
2618     // Return, then restore R10.
2619     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2620     EmitAndCountInstruction(
2621         MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2622     return;
2623 
2624   case X86::SEH_PushReg:
2625   case X86::SEH_SaveReg:
2626   case X86::SEH_SaveXMM:
2627   case X86::SEH_StackAlloc:
2628   case X86::SEH_StackAlign:
2629   case X86::SEH_SetFrame:
2630   case X86::SEH_PushFrame:
2631   case X86::SEH_EndPrologue:
2632     EmitSEHInstruction(MI);
2633     return;
2634 
2635   case X86::SEH_Epilogue: {
2636     assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2637     MachineBasicBlock::const_iterator MBBI(MI);
2638     // Check if preceded by a call and emit nop if so.
2639     for (MBBI = PrevCrossBBInst(MBBI);
2640          MBBI != MachineBasicBlock::const_iterator();
2641          MBBI = PrevCrossBBInst(MBBI)) {
2642       // Conservatively assume that pseudo instructions don't emit code and keep
2643       // looking for a call. We may emit an unnecessary nop in some cases.
2644       if (!MBBI->isPseudo()) {
2645         if (MBBI->isCall())
2646           EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2647         break;
2648       }
2649     }
2650     return;
2651   }
2652   case X86::UBSAN_UD1:
2653     EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2654                                 .addReg(X86::EAX)
2655                                 .addReg(X86::EAX)
2656                                 .addImm(1)
2657                                 .addReg(X86::NoRegister)
2658                                 .addImm(MI->getOperand(0).getImm())
2659                                 .addReg(X86::NoRegister));
2660     return;
2661   }
2662 
2663   MCInst TmpInst;
2664   MCInstLowering.Lower(MI, TmpInst);
2665 
2666   // Stackmap shadows cannot include branch targets, so we can count the bytes
2667   // in a call towards the shadow, but must ensure that the no thread returns
2668   // in to the stackmap shadow.  The only way to achieve this is if the call
2669   // is at the end of the shadow.
2670   if (MI->isCall()) {
2671     // Count then size of the call towards the shadow
2672     SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2673     // Then flush the shadow so that we fill with nops before the call, not
2674     // after it.
2675     SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2676     // Then emit the call
2677     OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
2678     return;
2679   }
2680 
2681   EmitAndCountInstruction(TmpInst);
2682 }
2683