xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains code to lower X86 MachineInstrs to their corresponding
10 // MCInst records.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/X86ATTInstPrinter.h"
15 #include "MCTargetDesc/X86BaseInfo.h"
16 #include "MCTargetDesc/X86EncodingOptimization.h"
17 #include "MCTargetDesc/X86InstComments.h"
18 #include "MCTargetDesc/X86ShuffleDecode.h"
19 #include "MCTargetDesc/X86TargetStreamer.h"
20 #include "X86AsmPrinter.h"
21 #include "X86MachineFunctionInfo.h"
22 #include "X86RegisterInfo.h"
23 #include "X86ShuffleDecodeConstantPool.h"
24 #include "X86Subtarget.h"
25 #include "llvm/ADT/SmallString.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/ADT/iterator_range.h"
28 #include "llvm/CodeGen/MachineConstantPool.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/StackMaps.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/Mangler.h"
36 #include "llvm/MC/MCAsmInfo.h"
37 #include "llvm/MC/MCCodeEmitter.h"
38 #include "llvm/MC/MCContext.h"
39 #include "llvm/MC/MCExpr.h"
40 #include "llvm/MC/MCFixup.h"
41 #include "llvm/MC/MCInst.h"
42 #include "llvm/MC/MCInstBuilder.h"
43 #include "llvm/MC/MCSection.h"
44 #include "llvm/MC/MCSectionELF.h"
45 #include "llvm/MC/MCStreamer.h"
46 #include "llvm/MC/MCSymbol.h"
47 #include "llvm/MC/MCSymbolELF.h"
48 #include "llvm/MC/TargetRegistry.h"
49 #include "llvm/Target/TargetLoweringObjectFile.h"
50 #include "llvm/Target/TargetMachine.h"
51 #include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
52 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
53 #include <string>
54 
55 using namespace llvm;
56 
57 namespace {
58 
59 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
60 class X86MCInstLower {
61   MCContext &Ctx;
62   const MachineFunction &MF;
63   const TargetMachine &TM;
64   const MCAsmInfo &MAI;
65   X86AsmPrinter &AsmPrinter;
66 
67 public:
68   X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
69 
70   std::optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
71                                                const MachineOperand &MO) const;
72   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
73 
74   MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
75   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
76 
77 private:
78   MachineModuleInfoMachO &getMachOMMI() const;
79 };
80 
81 } // end anonymous namespace
82 
83 /// A RAII helper which defines a region of instructions which can't have
84 /// padding added between them for correctness.
85 struct NoAutoPaddingScope {
86   MCStreamer &OS;
87   const bool OldAllowAutoPadding;
88   NoAutoPaddingScope(MCStreamer &OS)
89       : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
90     changeAndComment(false);
91   }
92   ~NoAutoPaddingScope() { changeAndComment(OldAllowAutoPadding); }
93   void changeAndComment(bool b) {
94     if (b == OS.getAllowAutoPadding())
95       return;
96     OS.setAllowAutoPadding(b);
97     if (b)
98       OS.emitRawComment("autopadding");
99     else
100       OS.emitRawComment("noautopadding");
101   }
102 };
103 
104 // Emit a minimal sequence of nops spanning NumBytes bytes.
105 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
106                         const X86Subtarget *Subtarget);
107 
108 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
109                                                  const MCSubtargetInfo &STI,
110                                                  MCCodeEmitter *CodeEmitter) {
111   if (InShadow) {
112     SmallString<256> Code;
113     SmallVector<MCFixup, 4> Fixups;
114     CodeEmitter->encodeInstruction(Inst, Code, Fixups, STI);
115     CurrentShadowSize += Code.size();
116     if (CurrentShadowSize >= RequiredShadowSize)
117       InShadow = false; // The shadow is big enough. Stop counting.
118   }
119 }
120 
121 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
122     MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
123   if (InShadow && CurrentShadowSize < RequiredShadowSize) {
124     InShadow = false;
125     emitX86Nops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
126                 &MF->getSubtarget<X86Subtarget>());
127   }
128 }
129 
130 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
131   OutStreamer->emitInstruction(Inst, getSubtargetInfo());
132   SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
133 }
134 
135 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
136                                X86AsmPrinter &asmprinter)
137     : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
138       AsmPrinter(asmprinter) {}
139 
140 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
141   return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
142 }
143 
144 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
145 /// operand to an MCSymbol.
146 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
147   const Triple &TT = TM.getTargetTriple();
148   if (MO.isGlobal() && TT.isOSBinFormatELF())
149     return AsmPrinter.getSymbolPreferLocal(*MO.getGlobal());
150 
151   const DataLayout &DL = MF.getDataLayout();
152   assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
153          "Isn't a symbol reference");
154 
155   MCSymbol *Sym = nullptr;
156   SmallString<128> Name;
157   StringRef Suffix;
158 
159   switch (MO.getTargetFlags()) {
160   case X86II::MO_DLLIMPORT:
161     // Handle dllimport linkage.
162     Name += "__imp_";
163     break;
164   case X86II::MO_COFFSTUB:
165     Name += ".refptr.";
166     break;
167   case X86II::MO_DARWIN_NONLAZY:
168   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
169     Suffix = "$non_lazy_ptr";
170     break;
171   }
172 
173   if (!Suffix.empty())
174     Name += DL.getPrivateGlobalPrefix();
175 
176   if (MO.isGlobal()) {
177     const GlobalValue *GV = MO.getGlobal();
178     AsmPrinter.getNameWithPrefix(Name, GV);
179   } else if (MO.isSymbol()) {
180     Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
181   } else if (MO.isMBB()) {
182     assert(Suffix.empty());
183     Sym = MO.getMBB()->getSymbol();
184   }
185 
186   Name += Suffix;
187   if (!Sym)
188     Sym = Ctx.getOrCreateSymbol(Name);
189 
190   // If the target flags on the operand changes the name of the symbol, do that
191   // before we return the symbol.
192   switch (MO.getTargetFlags()) {
193   default:
194     break;
195   case X86II::MO_COFFSTUB: {
196     MachineModuleInfoCOFF &MMICOFF =
197         MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
198     MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
199     if (!StubSym.getPointer()) {
200       assert(MO.isGlobal() && "Extern symbol not handled yet");
201       StubSym = MachineModuleInfoImpl::StubValueTy(
202           AsmPrinter.getSymbol(MO.getGlobal()), true);
203     }
204     break;
205   }
206   case X86II::MO_DARWIN_NONLAZY:
207   case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
208     MachineModuleInfoImpl::StubValueTy &StubSym =
209         getMachOMMI().getGVStubEntry(Sym);
210     if (!StubSym.getPointer()) {
211       assert(MO.isGlobal() && "Extern symbol not handled yet");
212       StubSym = MachineModuleInfoImpl::StubValueTy(
213           AsmPrinter.getSymbol(MO.getGlobal()),
214           !MO.getGlobal()->hasInternalLinkage());
215     }
216     break;
217   }
218   }
219 
220   return Sym;
221 }
222 
223 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
224                                              MCSymbol *Sym) const {
225   // FIXME: We would like an efficient form for this, so we don't have to do a
226   // lot of extra uniquing.
227   const MCExpr *Expr = nullptr;
228   MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
229 
230   switch (MO.getTargetFlags()) {
231   default:
232     llvm_unreachable("Unknown target flag on GV operand");
233   case X86II::MO_NO_FLAG: // No flag.
234   // These affect the name of the symbol, not any suffix.
235   case X86II::MO_DARWIN_NONLAZY:
236   case X86II::MO_DLLIMPORT:
237   case X86II::MO_COFFSTUB:
238     break;
239 
240   case X86II::MO_TLVP:
241     RefKind = MCSymbolRefExpr::VK_TLVP;
242     break;
243   case X86II::MO_TLVP_PIC_BASE:
244     Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
245     // Subtract the pic base.
246     Expr = MCBinaryExpr::createSub(
247         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
248     break;
249   case X86II::MO_SECREL:
250     RefKind = MCSymbolRefExpr::VK_SECREL;
251     break;
252   case X86II::MO_TLSGD:
253     RefKind = MCSymbolRefExpr::VK_TLSGD;
254     break;
255   case X86II::MO_TLSLD:
256     RefKind = MCSymbolRefExpr::VK_TLSLD;
257     break;
258   case X86II::MO_TLSLDM:
259     RefKind = MCSymbolRefExpr::VK_TLSLDM;
260     break;
261   case X86II::MO_GOTTPOFF:
262     RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
263     break;
264   case X86II::MO_INDNTPOFF:
265     RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
266     break;
267   case X86II::MO_TPOFF:
268     RefKind = MCSymbolRefExpr::VK_TPOFF;
269     break;
270   case X86II::MO_DTPOFF:
271     RefKind = MCSymbolRefExpr::VK_DTPOFF;
272     break;
273   case X86II::MO_NTPOFF:
274     RefKind = MCSymbolRefExpr::VK_NTPOFF;
275     break;
276   case X86II::MO_GOTNTPOFF:
277     RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
278     break;
279   case X86II::MO_GOTPCREL:
280     RefKind = MCSymbolRefExpr::VK_GOTPCREL;
281     break;
282   case X86II::MO_GOTPCREL_NORELAX:
283     RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX;
284     break;
285   case X86II::MO_GOT:
286     RefKind = MCSymbolRefExpr::VK_GOT;
287     break;
288   case X86II::MO_GOTOFF:
289     RefKind = MCSymbolRefExpr::VK_GOTOFF;
290     break;
291   case X86II::MO_PLT:
292     RefKind = MCSymbolRefExpr::VK_PLT;
293     break;
294   case X86II::MO_ABS8:
295     RefKind = MCSymbolRefExpr::VK_X86_ABS8;
296     break;
297   case X86II::MO_PIC_BASE_OFFSET:
298   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
299     Expr = MCSymbolRefExpr::create(Sym, Ctx);
300     // Subtract the pic base.
301     Expr = MCBinaryExpr::createSub(
302         Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
303     if (MO.isJTI()) {
304       assert(MAI.doesSetDirectiveSuppressReloc());
305       // If .set directive is supported, use it to reduce the number of
306       // relocations the assembler will generate for differences between
307       // local labels. This is only safe when the symbols are in the same
308       // section so we are restricting it to jumptable references.
309       MCSymbol *Label = Ctx.createTempSymbol();
310       AsmPrinter.OutStreamer->emitAssignment(Label, Expr);
311       Expr = MCSymbolRefExpr::create(Label, Ctx);
312     }
313     break;
314   }
315 
316   if (!Expr)
317     Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
318 
319   if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
320     Expr = MCBinaryExpr::createAdd(
321         Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
322   return MCOperand::createExpr(Expr);
323 }
324 
325 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
326   return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
327 }
328 
329 std::optional<MCOperand>
330 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
331                                     const MachineOperand &MO) const {
332   switch (MO.getType()) {
333   default:
334     MI->print(errs());
335     llvm_unreachable("unknown operand type");
336   case MachineOperand::MO_Register:
337     // Ignore all implicit register operands.
338     if (MO.isImplicit())
339       return std::nullopt;
340     return MCOperand::createReg(MO.getReg());
341   case MachineOperand::MO_Immediate:
342     return MCOperand::createImm(MO.getImm());
343   case MachineOperand::MO_MachineBasicBlock:
344   case MachineOperand::MO_GlobalAddress:
345   case MachineOperand::MO_ExternalSymbol:
346     return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
347   case MachineOperand::MO_MCSymbol:
348     return LowerSymbolOperand(MO, MO.getMCSymbol());
349   case MachineOperand::MO_JumpTableIndex:
350     return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
351   case MachineOperand::MO_ConstantPoolIndex:
352     return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
353   case MachineOperand::MO_BlockAddress:
354     return LowerSymbolOperand(
355         MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
356   case MachineOperand::MO_RegisterMask:
357     // Ignore call clobbers.
358     return std::nullopt;
359   }
360 }
361 
362 // Replace TAILJMP opcodes with their equivalent opcodes that have encoding
363 // information.
364 static unsigned convertTailJumpOpcode(unsigned Opcode) {
365   switch (Opcode) {
366   case X86::TAILJMPr:
367     Opcode = X86::JMP32r;
368     break;
369   case X86::TAILJMPm:
370     Opcode = X86::JMP32m;
371     break;
372   case X86::TAILJMPr64:
373     Opcode = X86::JMP64r;
374     break;
375   case X86::TAILJMPm64:
376     Opcode = X86::JMP64m;
377     break;
378   case X86::TAILJMPr64_REX:
379     Opcode = X86::JMP64r_REX;
380     break;
381   case X86::TAILJMPm64_REX:
382     Opcode = X86::JMP64m_REX;
383     break;
384   case X86::TAILJMPd:
385   case X86::TAILJMPd64:
386     Opcode = X86::JMP_1;
387     break;
388   case X86::TAILJMPd_CC:
389   case X86::TAILJMPd64_CC:
390     Opcode = X86::JCC_1;
391     break;
392   }
393 
394   return Opcode;
395 }
396 
397 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
398   OutMI.setOpcode(MI->getOpcode());
399 
400   for (const MachineOperand &MO : MI->operands())
401     if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
402       OutMI.addOperand(*MaybeMCOp);
403 
404   bool In64BitMode = AsmPrinter.getSubtarget().is64Bit();
405   if (X86::optimizeInstFromVEX3ToVEX2(OutMI, MI->getDesc()) ||
406       X86::optimizeShiftRotateWithImmediateOne(OutMI) ||
407       X86::optimizeVPCMPWithImmediateOneOrSix(OutMI) ||
408       X86::optimizeMOVSX(OutMI) || X86::optimizeINCDEC(OutMI, In64BitMode) ||
409       X86::optimizeMOV(OutMI, In64BitMode) ||
410       X86::optimizeToFixedRegisterOrShortImmediateForm(OutMI))
411     return;
412 
413   // Handle a few special cases to eliminate operand modifiers.
414   switch (OutMI.getOpcode()) {
415   case X86::LEA64_32r:
416   case X86::LEA64r:
417   case X86::LEA16r:
418   case X86::LEA32r:
419     // LEA should have a segment register, but it must be empty.
420     assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
421            "Unexpected # of LEA operands");
422     assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
423            "LEA has segment specified!");
424     break;
425   case X86::MULX32Hrr:
426   case X86::MULX32Hrm:
427   case X86::MULX64Hrr:
428   case X86::MULX64Hrm: {
429     // Turn into regular MULX by duplicating the destination.
430     unsigned NewOpc;
431     switch (OutMI.getOpcode()) {
432     default: llvm_unreachable("Invalid opcode");
433     case X86::MULX32Hrr: NewOpc = X86::MULX32rr; break;
434     case X86::MULX32Hrm: NewOpc = X86::MULX32rm; break;
435     case X86::MULX64Hrr: NewOpc = X86::MULX64rr; break;
436     case X86::MULX64Hrm: NewOpc = X86::MULX64rm; break;
437     }
438     OutMI.setOpcode(NewOpc);
439     // Duplicate the destination.
440     unsigned DestReg = OutMI.getOperand(0).getReg();
441     OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
442     break;
443   }
444   // CALL64r, CALL64pcrel32 - These instructions used to have
445   // register inputs modeled as normal uses instead of implicit uses.  As such,
446   // they we used to truncate off all but the first operand (the callee). This
447   // issue seems to have been fixed at some point. This assert verifies that.
448   case X86::CALL64r:
449   case X86::CALL64pcrel32:
450     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
451     break;
452   case X86::EH_RETURN:
453   case X86::EH_RETURN64: {
454     OutMI = MCInst();
455     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
456     break;
457   }
458   case X86::CLEANUPRET: {
459     // Replace CLEANUPRET with the appropriate RET.
460     OutMI = MCInst();
461     OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
462     break;
463   }
464   case X86::CATCHRET: {
465     // Replace CATCHRET with the appropriate RET.
466     const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
467     unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX;
468     OutMI = MCInst();
469     OutMI.setOpcode(getRetOpcode(Subtarget));
470     OutMI.addOperand(MCOperand::createReg(ReturnReg));
471     break;
472   }
473   // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
474   // instruction.
475   case X86::TAILJMPr:
476   case X86::TAILJMPr64:
477   case X86::TAILJMPr64_REX:
478   case X86::TAILJMPd:
479   case X86::TAILJMPd64:
480     assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
481     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
482     break;
483   case X86::TAILJMPd_CC:
484   case X86::TAILJMPd64_CC:
485     assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
486     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
487     break;
488   case X86::TAILJMPm:
489   case X86::TAILJMPm64:
490   case X86::TAILJMPm64_REX:
491     assert(OutMI.getNumOperands() == X86::AddrNumOperands &&
492            "Unexpected number of operands!");
493     OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
494     break;
495   case X86::MASKMOVDQU:
496   case X86::VMASKMOVDQU:
497     if (In64BitMode)
498       OutMI.setFlags(X86::IP_HAS_AD_SIZE);
499     break;
500   case X86::BSF16rm:
501   case X86::BSF16rr:
502   case X86::BSF32rm:
503   case X86::BSF32rr:
504   case X86::BSF64rm:
505   case X86::BSF64rr: {
506     // Add an REP prefix to BSF instructions so that new processors can
507     // recognize as TZCNT, which has better performance than BSF.
508     // BSF and TZCNT have different interpretations on ZF bit. So make sure
509     // it won't be used later.
510     const MachineOperand *FlagDef = MI->findRegisterDefOperand(X86::EFLAGS);
511     if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead())
512       OutMI.setFlags(X86::IP_HAS_REPEAT);
513     break;
514   }
515   default:
516     break;
517   }
518 }
519 
520 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
521                                  const MachineInstr &MI) {
522   NoAutoPaddingScope NoPadScope(*OutStreamer);
523   bool Is64Bits = MI.getOpcode() != X86::TLS_addr32 &&
524                   MI.getOpcode() != X86::TLS_base_addr32;
525   bool Is64BitsLP64 = MI.getOpcode() == X86::TLS_addr64 ||
526                       MI.getOpcode() == X86::TLS_base_addr64;
527   MCContext &Ctx = OutStreamer->getContext();
528 
529   MCSymbolRefExpr::VariantKind SRVK;
530   switch (MI.getOpcode()) {
531   case X86::TLS_addr32:
532   case X86::TLS_addr64:
533   case X86::TLS_addrX32:
534     SRVK = MCSymbolRefExpr::VK_TLSGD;
535     break;
536   case X86::TLS_base_addr32:
537     SRVK = MCSymbolRefExpr::VK_TLSLDM;
538     break;
539   case X86::TLS_base_addr64:
540   case X86::TLS_base_addrX32:
541     SRVK = MCSymbolRefExpr::VK_TLSLD;
542     break;
543   default:
544     llvm_unreachable("unexpected opcode");
545   }
546 
547   const MCSymbolRefExpr *Sym = MCSymbolRefExpr::create(
548       MCInstLowering.GetSymbolFromOperand(MI.getOperand(3)), SRVK, Ctx);
549 
550   // As of binutils 2.32, ld has a bogus TLS relaxation error when the GD/LD
551   // code sequence using R_X86_64_GOTPCREL (instead of R_X86_64_GOTPCRELX) is
552   // attempted to be relaxed to IE/LE (binutils PR24784). Work around the bug by
553   // only using GOT when GOTPCRELX is enabled.
554   // TODO Delete the workaround when GOTPCRELX becomes commonplace.
555   bool UseGot = MMI->getModule()->getRtLibUseGOT() &&
556                 Ctx.getAsmInfo()->canRelaxRelocations();
557 
558   if (Is64Bits) {
559     bool NeedsPadding = SRVK == MCSymbolRefExpr::VK_TLSGD;
560     if (NeedsPadding && Is64BitsLP64)
561       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
562     EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
563                                 .addReg(X86::RDI)
564                                 .addReg(X86::RIP)
565                                 .addImm(1)
566                                 .addReg(0)
567                                 .addExpr(Sym)
568                                 .addReg(0));
569     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("__tls_get_addr");
570     if (NeedsPadding) {
571       if (!UseGot)
572         EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
573       EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
574       EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
575     }
576     if (UseGot) {
577       const MCExpr *Expr = MCSymbolRefExpr::create(
578           TlsGetAddr, MCSymbolRefExpr::VK_GOTPCREL, Ctx);
579       EmitAndCountInstruction(MCInstBuilder(X86::CALL64m)
580                                   .addReg(X86::RIP)
581                                   .addImm(1)
582                                   .addReg(0)
583                                   .addExpr(Expr)
584                                   .addReg(0));
585     } else {
586       EmitAndCountInstruction(
587           MCInstBuilder(X86::CALL64pcrel32)
588               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
589                                                MCSymbolRefExpr::VK_PLT, Ctx)));
590     }
591   } else {
592     if (SRVK == MCSymbolRefExpr::VK_TLSGD && !UseGot) {
593       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
594                                   .addReg(X86::EAX)
595                                   .addReg(0)
596                                   .addImm(1)
597                                   .addReg(X86::EBX)
598                                   .addExpr(Sym)
599                                   .addReg(0));
600     } else {
601       EmitAndCountInstruction(MCInstBuilder(X86::LEA32r)
602                                   .addReg(X86::EAX)
603                                   .addReg(X86::EBX)
604                                   .addImm(1)
605                                   .addReg(0)
606                                   .addExpr(Sym)
607                                   .addReg(0));
608     }
609 
610     const MCSymbol *TlsGetAddr = Ctx.getOrCreateSymbol("___tls_get_addr");
611     if (UseGot) {
612       const MCExpr *Expr =
613           MCSymbolRefExpr::create(TlsGetAddr, MCSymbolRefExpr::VK_GOT, Ctx);
614       EmitAndCountInstruction(MCInstBuilder(X86::CALL32m)
615                                   .addReg(X86::EBX)
616                                   .addImm(1)
617                                   .addReg(0)
618                                   .addExpr(Expr)
619                                   .addReg(0));
620     } else {
621       EmitAndCountInstruction(
622           MCInstBuilder(X86::CALLpcrel32)
623               .addExpr(MCSymbolRefExpr::create(TlsGetAddr,
624                                                MCSymbolRefExpr::VK_PLT, Ctx)));
625     }
626   }
627 }
628 
629 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
630 /// bytes.  Return the size of nop emitted.
631 static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
632                         const X86Subtarget *Subtarget) {
633   // Determine the longest nop which can be efficiently decoded for the given
634   // target cpu.  15-bytes is the longest single NOP instruction, but some
635   // platforms can't decode the longest forms efficiently.
636   unsigned MaxNopLength = 1;
637   if (Subtarget->is64Bit()) {
638     // FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
639     // IndexReg/BaseReg below need to be updated.
640     if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
641       MaxNopLength = 7;
642     else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
643       MaxNopLength = 15;
644     else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
645       MaxNopLength = 11;
646     else
647       MaxNopLength = 10;
648   } if (Subtarget->is32Bit())
649     MaxNopLength = 2;
650 
651   // Cap a single nop emission at the profitable value for the target
652   NumBytes = std::min(NumBytes, MaxNopLength);
653 
654   unsigned NopSize;
655   unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
656   IndexReg = Displacement = SegmentReg = 0;
657   BaseReg = X86::RAX;
658   ScaleVal = 1;
659   switch (NumBytes) {
660   case 0:
661     llvm_unreachable("Zero nops?");
662     break;
663   case 1:
664     NopSize = 1;
665     Opc = X86::NOOP;
666     break;
667   case 2:
668     NopSize = 2;
669     Opc = X86::XCHG16ar;
670     break;
671   case 3:
672     NopSize = 3;
673     Opc = X86::NOOPL;
674     break;
675   case 4:
676     NopSize = 4;
677     Opc = X86::NOOPL;
678     Displacement = 8;
679     break;
680   case 5:
681     NopSize = 5;
682     Opc = X86::NOOPL;
683     Displacement = 8;
684     IndexReg = X86::RAX;
685     break;
686   case 6:
687     NopSize = 6;
688     Opc = X86::NOOPW;
689     Displacement = 8;
690     IndexReg = X86::RAX;
691     break;
692   case 7:
693     NopSize = 7;
694     Opc = X86::NOOPL;
695     Displacement = 512;
696     break;
697   case 8:
698     NopSize = 8;
699     Opc = X86::NOOPL;
700     Displacement = 512;
701     IndexReg = X86::RAX;
702     break;
703   case 9:
704     NopSize = 9;
705     Opc = X86::NOOPW;
706     Displacement = 512;
707     IndexReg = X86::RAX;
708     break;
709   default:
710     NopSize = 10;
711     Opc = X86::NOOPW;
712     Displacement = 512;
713     IndexReg = X86::RAX;
714     SegmentReg = X86::CS;
715     break;
716   }
717 
718   unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
719   NopSize += NumPrefixes;
720   for (unsigned i = 0; i != NumPrefixes; ++i)
721     OS.emitBytes("\x66");
722 
723   switch (Opc) {
724   default: llvm_unreachable("Unexpected opcode");
725   case X86::NOOP:
726     OS.emitInstruction(MCInstBuilder(Opc), *Subtarget);
727     break;
728   case X86::XCHG16ar:
729     OS.emitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX),
730                        *Subtarget);
731     break;
732   case X86::NOOPL:
733   case X86::NOOPW:
734     OS.emitInstruction(MCInstBuilder(Opc)
735                            .addReg(BaseReg)
736                            .addImm(ScaleVal)
737                            .addReg(IndexReg)
738                            .addImm(Displacement)
739                            .addReg(SegmentReg),
740                        *Subtarget);
741     break;
742   }
743   assert(NopSize <= NumBytes && "We overemitted?");
744   return NopSize;
745 }
746 
747 /// Emit the optimal amount of multi-byte nops on X86.
748 static void emitX86Nops(MCStreamer &OS, unsigned NumBytes,
749                         const X86Subtarget *Subtarget) {
750   unsigned NopsToEmit = NumBytes;
751   (void)NopsToEmit;
752   while (NumBytes) {
753     NumBytes -= emitNop(OS, NumBytes, Subtarget);
754     assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
755   }
756 }
757 
758 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
759                                     X86MCInstLower &MCIL) {
760   assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
761 
762   NoAutoPaddingScope NoPadScope(*OutStreamer);
763 
764   StatepointOpers SOpers(&MI);
765   if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
766     emitX86Nops(*OutStreamer, PatchBytes, Subtarget);
767   } else {
768     // Lower call target and choose correct opcode
769     const MachineOperand &CallTarget = SOpers.getCallTarget();
770     MCOperand CallTargetMCOp;
771     unsigned CallOpcode;
772     switch (CallTarget.getType()) {
773     case MachineOperand::MO_GlobalAddress:
774     case MachineOperand::MO_ExternalSymbol:
775       CallTargetMCOp = MCIL.LowerSymbolOperand(
776           CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
777       CallOpcode = X86::CALL64pcrel32;
778       // Currently, we only support relative addressing with statepoints.
779       // Otherwise, we'll need a scratch register to hold the target
780       // address.  You'll fail asserts during load & relocation if this
781       // symbol is to far away. (TODO: support non-relative addressing)
782       break;
783     case MachineOperand::MO_Immediate:
784       CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
785       CallOpcode = X86::CALL64pcrel32;
786       // Currently, we only support relative addressing with statepoints.
787       // Otherwise, we'll need a scratch register to hold the target
788       // immediate.  You'll fail asserts during load & relocation if this
789       // address is to far away. (TODO: support non-relative addressing)
790       break;
791     case MachineOperand::MO_Register:
792       // FIXME: Add retpoline support and remove this.
793       if (Subtarget->useIndirectThunkCalls())
794         report_fatal_error("Lowering register statepoints with thunks not "
795                            "yet implemented.");
796       CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
797       CallOpcode = X86::CALL64r;
798       break;
799     default:
800       llvm_unreachable("Unsupported operand type in statepoint call target");
801       break;
802     }
803 
804     // Emit call
805     MCInst CallInst;
806     CallInst.setOpcode(CallOpcode);
807     CallInst.addOperand(CallTargetMCOp);
808     OutStreamer->emitInstruction(CallInst, getSubtargetInfo());
809   }
810 
811   // Record our statepoint node in the same section used by STACKMAP
812   // and PATCHPOINT
813   auto &Ctx = OutStreamer->getContext();
814   MCSymbol *MILabel = Ctx.createTempSymbol();
815   OutStreamer->emitLabel(MILabel);
816   SM.recordStatepoint(*MILabel, MI);
817 }
818 
819 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
820                                      X86MCInstLower &MCIL) {
821   // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
822   //                  <opcode>, <operands>
823 
824   NoAutoPaddingScope NoPadScope(*OutStreamer);
825 
826   Register DefRegister = FaultingMI.getOperand(0).getReg();
827   FaultMaps::FaultKind FK =
828       static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
829   MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
830   unsigned Opcode = FaultingMI.getOperand(3).getImm();
831   unsigned OperandsBeginIdx = 4;
832 
833   auto &Ctx = OutStreamer->getContext();
834   MCSymbol *FaultingLabel = Ctx.createTempSymbol();
835   OutStreamer->emitLabel(FaultingLabel);
836 
837   assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
838   FM.recordFaultingOp(FK, FaultingLabel, HandlerLabel);
839 
840   MCInst MI;
841   MI.setOpcode(Opcode);
842 
843   if (DefRegister != X86::NoRegister)
844     MI.addOperand(MCOperand::createReg(DefRegister));
845 
846   for (const MachineOperand &MO :
847        llvm::drop_begin(FaultingMI.operands(), OperandsBeginIdx))
848     if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, MO))
849       MI.addOperand(*MaybeOperand);
850 
851   OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
852   OutStreamer->emitInstruction(MI, getSubtargetInfo());
853 }
854 
855 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
856                                      X86MCInstLower &MCIL) {
857   bool Is64Bits = Subtarget->is64Bit();
858   MCContext &Ctx = OutStreamer->getContext();
859   MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
860   const MCSymbolRefExpr *Op =
861       MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
862 
863   EmitAndCountInstruction(
864       MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
865           .addExpr(Op));
866 }
867 
868 void X86AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
869   assert(std::next(MI.getIterator())->isCall() &&
870          "KCFI_CHECK not followed by a call instruction");
871 
872   // Adjust the offset for patchable-function-prefix. X86InstrInfo::getNop()
873   // returns a 1-byte X86::NOOP, which means the offset is the same in
874   // bytes.  This assumes that patchable-function-prefix is the same for all
875   // functions.
876   const MachineFunction &MF = *MI.getMF();
877   int64_t PrefixNops = 0;
878   (void)MF.getFunction()
879       .getFnAttribute("patchable-function-prefix")
880       .getValueAsString()
881       .getAsInteger(10, PrefixNops);
882 
883   // KCFI allows indirect calls to any location that's preceded by a valid
884   // type identifier. To avoid encoding the full constant into an instruction,
885   // and thus emitting potential call target gadgets at each indirect call
886   // site, load a negated constant to a register and compare that to the
887   // expected value at the call target.
888   const Register AddrReg = MI.getOperand(0).getReg();
889   const uint32_t Type = MI.getOperand(1).getImm();
890   // The check is immediately before the call. If the call target is in R10,
891   // we can clobber R11 for the check instead.
892   unsigned TempReg = AddrReg == X86::R10 ? X86::R11D : X86::R10D;
893   EmitAndCountInstruction(
894       MCInstBuilder(X86::MOV32ri).addReg(TempReg).addImm(-MaskKCFIType(Type)));
895   EmitAndCountInstruction(MCInstBuilder(X86::ADD32rm)
896                               .addReg(X86::NoRegister)
897                               .addReg(TempReg)
898                               .addReg(AddrReg)
899                               .addImm(1)
900                               .addReg(X86::NoRegister)
901                               .addImm(-(PrefixNops + 4))
902                               .addReg(X86::NoRegister));
903 
904   MCSymbol *Pass = OutContext.createTempSymbol();
905   EmitAndCountInstruction(
906       MCInstBuilder(X86::JCC_1)
907           .addExpr(MCSymbolRefExpr::create(Pass, OutContext))
908           .addImm(X86::COND_E));
909 
910   MCSymbol *Trap = OutContext.createTempSymbol();
911   OutStreamer->emitLabel(Trap);
912   EmitAndCountInstruction(MCInstBuilder(X86::TRAP));
913   emitKCFITrapEntry(MF, Trap);
914   OutStreamer->emitLabel(Pass);
915 }
916 
917 void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
918   // FIXME: Make this work on non-ELF.
919   if (!TM.getTargetTriple().isOSBinFormatELF()) {
920     report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
921     return;
922   }
923 
924   const auto &Reg = MI.getOperand(0).getReg();
925   ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
926 
927   uint64_t ShadowBase;
928   int MappingScale;
929   bool OrShadowOffset;
930   getAddressSanitizerParams(Triple(TM.getTargetTriple()), 64,
931                             AccessInfo.CompileKernel, &ShadowBase,
932                             &MappingScale, &OrShadowOffset);
933 
934   StringRef Name = AccessInfo.IsWrite ? "store" : "load";
935   StringRef Op = OrShadowOffset ? "or" : "add";
936   std::string SymName = ("__asan_check_" + Name + "_" + Op + "_" +
937                          Twine(1ULL << AccessInfo.AccessSizeIndex) + "_" +
938                          TM.getMCRegisterInfo()->getName(Reg.asMCReg()))
939                             .str();
940   if (OrShadowOffset)
941     report_fatal_error(
942         "OrShadowOffset is not supported with optimized callbacks");
943 
944   EmitAndCountInstruction(
945       MCInstBuilder(X86::CALL64pcrel32)
946           .addExpr(MCSymbolRefExpr::create(
947               OutContext.getOrCreateSymbol(SymName), OutContext)));
948 }
949 
950 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
951                                       X86MCInstLower &MCIL) {
952   // PATCHABLE_OP minsize, opcode, operands
953 
954   NoAutoPaddingScope NoPadScope(*OutStreamer);
955 
956   unsigned MinSize = MI.getOperand(0).getImm();
957   unsigned Opcode = MI.getOperand(1).getImm();
958   // Opcode PATCHABLE_OP is a special case: there is no instruction to wrap,
959   // simply emit a nop of size MinSize.
960   bool EmptyInst = (Opcode == TargetOpcode::PATCHABLE_OP);
961 
962   MCInst MCI;
963   MCI.setOpcode(Opcode);
964   for (auto &MO : drop_begin(MI.operands(), 2))
965     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
966       MCI.addOperand(*MaybeOperand);
967 
968   SmallString<256> Code;
969   if (!EmptyInst) {
970     SmallVector<MCFixup, 4> Fixups;
971     CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo());
972   }
973 
974   if (Code.size() < MinSize) {
975     if (MinSize == 2 && Subtarget->is32Bit() &&
976         Subtarget->isTargetWindowsMSVC() &&
977         (Subtarget->getCPU().empty() || Subtarget->getCPU() == "pentium3")) {
978       // For compatibility reasons, when targetting MSVC, is is important to
979       // generate a 'legacy' NOP in the form of a 8B FF MOV EDI, EDI. Some tools
980       // rely specifically on this pattern to be able to patch a function.
981       // This is only for 32-bit targets, when using /arch:IA32 or /arch:SSE.
982       OutStreamer->emitInstruction(
983           MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
984           *Subtarget);
985     } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
986       // This is an optimization that lets us get away without emitting a nop in
987       // many cases.
988       //
989       // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
990       // bytes too, so the check on MinSize is important.
991       MCI.setOpcode(X86::PUSH64rmr);
992     } else {
993       unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
994       assert(NopSize == MinSize && "Could not implement MinSize!");
995       (void)NopSize;
996     }
997   }
998   if (!EmptyInst)
999     OutStreamer->emitInstruction(MCI, getSubtargetInfo());
1000 }
1001 
1002 // Lower a stackmap of the form:
1003 // <id>, <shadowBytes>, ...
1004 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1005   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1006 
1007   auto &Ctx = OutStreamer->getContext();
1008   MCSymbol *MILabel = Ctx.createTempSymbol();
1009   OutStreamer->emitLabel(MILabel);
1010 
1011   SM.recordStackMap(*MILabel, MI);
1012   unsigned NumShadowBytes = MI.getOperand(1).getImm();
1013   SMShadowTracker.reset(NumShadowBytes);
1014 }
1015 
1016 // Lower a patchpoint of the form:
1017 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1018 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1019                                     X86MCInstLower &MCIL) {
1020   assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1021 
1022   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1023 
1024   NoAutoPaddingScope NoPadScope(*OutStreamer);
1025 
1026   auto &Ctx = OutStreamer->getContext();
1027   MCSymbol *MILabel = Ctx.createTempSymbol();
1028   OutStreamer->emitLabel(MILabel);
1029   SM.recordPatchPoint(*MILabel, MI);
1030 
1031   PatchPointOpers opers(&MI);
1032   unsigned ScratchIdx = opers.getNextScratchIdx();
1033   unsigned EncodedBytes = 0;
1034   const MachineOperand &CalleeMO = opers.getCallTarget();
1035 
1036   // Check for null target. If target is non-null (i.e. is non-zero or is
1037   // symbolic) then emit a call.
1038   if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1039     MCOperand CalleeMCOp;
1040     switch (CalleeMO.getType()) {
1041     default:
1042       /// FIXME: Add a verifier check for bad callee types.
1043       llvm_unreachable("Unrecognized callee operand type.");
1044     case MachineOperand::MO_Immediate:
1045       if (CalleeMO.getImm())
1046         CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1047       break;
1048     case MachineOperand::MO_ExternalSymbol:
1049     case MachineOperand::MO_GlobalAddress:
1050       CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1051                                            MCIL.GetSymbolFromOperand(CalleeMO));
1052       break;
1053     }
1054 
1055     // Emit MOV to materialize the target address and the CALL to target.
1056     // This is encoded with 12-13 bytes, depending on which register is used.
1057     Register ScratchReg = MI.getOperand(ScratchIdx).getReg();
1058     if (X86II::isX86_64ExtendedReg(ScratchReg))
1059       EncodedBytes = 13;
1060     else
1061       EncodedBytes = 12;
1062 
1063     EmitAndCountInstruction(
1064         MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1065     // FIXME: Add retpoline support and remove this.
1066     if (Subtarget->useIndirectThunkCalls())
1067       report_fatal_error(
1068           "Lowering patchpoint with thunks not yet implemented.");
1069     EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1070   }
1071 
1072   // Emit padding.
1073   unsigned NumBytes = opers.getNumPatchBytes();
1074   assert(NumBytes >= EncodedBytes &&
1075          "Patchpoint can't request size less than the length of a call.");
1076 
1077   emitX86Nops(*OutStreamer, NumBytes - EncodedBytes, Subtarget);
1078 }
1079 
1080 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1081                                               X86MCInstLower &MCIL) {
1082   assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1083 
1084   NoAutoPaddingScope NoPadScope(*OutStreamer);
1085 
1086   // We want to emit the following pattern, which follows the x86 calling
1087   // convention to prepare for the trampoline call to be patched in.
1088   //
1089   //   .p2align 1, ...
1090   // .Lxray_event_sled_N:
1091   //   jmp +N                        // jump across the instrumentation sled
1092   //   ...                           // set up arguments in register
1093   //   callq __xray_CustomEvent@plt  // force dependency to symbol
1094   //   ...
1095   //   <jump here>
1096   //
1097   // After patching, it would look something like:
1098   //
1099   //   nopw (2-byte nop)
1100   //   ...
1101   //   callq __xrayCustomEvent  // already lowered
1102   //   ...
1103   //
1104   // ---
1105   // First we emit the label and the jump.
1106   auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1107   OutStreamer->AddComment("# XRay Custom Event Log");
1108   OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1109   OutStreamer->emitLabel(CurSled);
1110 
1111   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1112   // an operand (computed as an offset from the jmp instruction).
1113   // FIXME: Find another less hacky way do force the relative jump.
1114   OutStreamer->emitBinaryData("\xeb\x0f");
1115 
1116   // The default C calling convention will place two arguments into %rcx and
1117   // %rdx -- so we only work with those.
1118   const Register DestRegs[] = {X86::RDI, X86::RSI};
1119   bool UsedMask[] = {false, false};
1120   // Filled out in loop.
1121   Register SrcRegs[] = {0, 0};
1122 
1123   // Then we put the operands in the %rdi and %rsi registers. We spill the
1124   // values in the register before we clobber them, and mark them as used in
1125   // UsedMask. In case the arguments are already in the correct register, we use
1126   // emit nops appropriately sized to keep the sled the same size in every
1127   // situation.
1128   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1129     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1130       assert(Op->isReg() && "Only support arguments in registers");
1131       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1132       assert(SrcRegs[I].isValid() && "Invalid operand");
1133       if (SrcRegs[I] != DestRegs[I]) {
1134         UsedMask[I] = true;
1135         EmitAndCountInstruction(
1136             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1137       } else {
1138         emitX86Nops(*OutStreamer, 4, Subtarget);
1139       }
1140     }
1141 
1142   // Now that the register values are stashed, mov arguments into place.
1143   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1144   // earlier DestReg. We will have already overwritten over the register before
1145   // we can copy from it.
1146   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1147     if (SrcRegs[I] != DestRegs[I])
1148       EmitAndCountInstruction(
1149           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1150 
1151   // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1152   // name of the trampoline to be implemented by the XRay runtime.
1153   auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1154   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1155   if (isPositionIndependent())
1156     TOp.setTargetFlags(X86II::MO_PLT);
1157 
1158   // Emit the call instruction.
1159   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1160                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1161 
1162   // Restore caller-saved and used registers.
1163   for (unsigned I = sizeof UsedMask; I-- > 0;)
1164     if (UsedMask[I])
1165       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1166     else
1167       emitX86Nops(*OutStreamer, 1, Subtarget);
1168 
1169   OutStreamer->AddComment("xray custom event end.");
1170 
1171   // Record the sled version. Version 0 of this sled was spelled differently, so
1172   // we let the runtime handle the different offsets we're using. Version 2
1173   // changed the absolute address to a PC-relative address.
1174   recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
1175 }
1176 
1177 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1178                                                     X86MCInstLower &MCIL) {
1179   assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1180 
1181   NoAutoPaddingScope NoPadScope(*OutStreamer);
1182 
1183   // We want to emit the following pattern, which follows the x86 calling
1184   // convention to prepare for the trampoline call to be patched in.
1185   //
1186   //   .p2align 1, ...
1187   // .Lxray_event_sled_N:
1188   //   jmp +N                        // jump across the instrumentation sled
1189   //   ...                           // set up arguments in register
1190   //   callq __xray_TypedEvent@plt  // force dependency to symbol
1191   //   ...
1192   //   <jump here>
1193   //
1194   // After patching, it would look something like:
1195   //
1196   //   nopw (2-byte nop)
1197   //   ...
1198   //   callq __xrayTypedEvent  // already lowered
1199   //   ...
1200   //
1201   // ---
1202   // First we emit the label and the jump.
1203   auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1204   OutStreamer->AddComment("# XRay Typed Event Log");
1205   OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1206   OutStreamer->emitLabel(CurSled);
1207 
1208   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1209   // an operand (computed as an offset from the jmp instruction).
1210   // FIXME: Find another less hacky way do force the relative jump.
1211   OutStreamer->emitBinaryData("\xeb\x14");
1212 
1213   // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1214   // so we'll work with those. Or we may be called via SystemV, in which case
1215   // we don't have to do any translation.
1216   const Register DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1217   bool UsedMask[] = {false, false, false};
1218 
1219   // Will fill out src regs in the loop.
1220   Register SrcRegs[] = {0, 0, 0};
1221 
1222   // Then we put the operands in the SystemV registers. We spill the values in
1223   // the registers before we clobber them, and mark them as used in UsedMask.
1224   // In case the arguments are already in the correct register, we emit nops
1225   // appropriately sized to keep the sled the same size in every situation.
1226   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1227     if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1228       // TODO: Is register only support adequate?
1229       assert(Op->isReg() && "Only supports arguments in registers");
1230       SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
1231       assert(SrcRegs[I].isValid() && "Invalid operand");
1232       if (SrcRegs[I] != DestRegs[I]) {
1233         UsedMask[I] = true;
1234         EmitAndCountInstruction(
1235             MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1236       } else {
1237         emitX86Nops(*OutStreamer, 4, Subtarget);
1238       }
1239     }
1240 
1241   // In the above loop we only stash all of the destination registers or emit
1242   // nops if the arguments are already in the right place. Doing the actually
1243   // moving is postponed until after all the registers are stashed so nothing
1244   // is clobbers. We've already added nops to account for the size of mov and
1245   // push if the register is in the right place, so we only have to worry about
1246   // emitting movs.
1247   // FIXME: This doesn't work if one of the later SrcRegs is equal to an
1248   // earlier DestReg. We will have already overwritten over the register before
1249   // we can copy from it.
1250   for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1251     if (UsedMask[I])
1252       EmitAndCountInstruction(
1253           MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1254 
1255   // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1256   // name of the trampoline to be implemented by the XRay runtime.
1257   auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1258   MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1259   if (isPositionIndependent())
1260     TOp.setTargetFlags(X86II::MO_PLT);
1261 
1262   // Emit the call instruction.
1263   EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1264                               .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1265 
1266   // Restore caller-saved and used registers.
1267   for (unsigned I = sizeof UsedMask; I-- > 0;)
1268     if (UsedMask[I])
1269       EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1270     else
1271       emitX86Nops(*OutStreamer, 1, Subtarget);
1272 
1273   OutStreamer->AddComment("xray typed event end.");
1274 
1275   // Record the sled version.
1276   recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
1277 }
1278 
1279 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1280                                                   X86MCInstLower &MCIL) {
1281 
1282   NoAutoPaddingScope NoPadScope(*OutStreamer);
1283 
1284   const Function &F = MF->getFunction();
1285   if (F.hasFnAttribute("patchable-function-entry")) {
1286     unsigned Num;
1287     if (F.getFnAttribute("patchable-function-entry")
1288             .getValueAsString()
1289             .getAsInteger(10, Num))
1290       return;
1291     emitX86Nops(*OutStreamer, Num, Subtarget);
1292     return;
1293   }
1294   // We want to emit the following pattern:
1295   //
1296   //   .p2align 1, ...
1297   // .Lxray_sled_N:
1298   //   jmp .tmpN
1299   //   # 9 bytes worth of noops
1300   //
1301   // We need the 9 bytes because at runtime, we'd be patching over the full 11
1302   // bytes with the following pattern:
1303   //
1304   //   mov %r10, <function id, 32-bit>   // 6 bytes
1305   //   call <relative offset, 32-bits>   // 5 bytes
1306   //
1307   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1308   OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1309   OutStreamer->emitLabel(CurSled);
1310 
1311   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1312   // an operand (computed as an offset from the jmp instruction).
1313   // FIXME: Find another less hacky way do force the relative jump.
1314   OutStreamer->emitBytes("\xeb\x09");
1315   emitX86Nops(*OutStreamer, 9, Subtarget);
1316   recordSled(CurSled, MI, SledKind::FUNCTION_ENTER, 2);
1317 }
1318 
1319 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1320                                        X86MCInstLower &MCIL) {
1321   NoAutoPaddingScope NoPadScope(*OutStreamer);
1322 
1323   // Since PATCHABLE_RET takes the opcode of the return statement as an
1324   // argument, we use that to emit the correct form of the RET that we want.
1325   // i.e. when we see this:
1326   //
1327   //   PATCHABLE_RET X86::RET ...
1328   //
1329   // We should emit the RET followed by sleds.
1330   //
1331   //   .p2align 1, ...
1332   // .Lxray_sled_N:
1333   //   ret  # or equivalent instruction
1334   //   # 10 bytes worth of noops
1335   //
1336   // This just makes sure that the alignment for the next instruction is 2.
1337   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1338   OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1339   OutStreamer->emitLabel(CurSled);
1340   unsigned OpCode = MI.getOperand(0).getImm();
1341   MCInst Ret;
1342   Ret.setOpcode(OpCode);
1343   for (auto &MO : drop_begin(MI.operands()))
1344     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1345       Ret.addOperand(*MaybeOperand);
1346   OutStreamer->emitInstruction(Ret, getSubtargetInfo());
1347   emitX86Nops(*OutStreamer, 10, Subtarget);
1348   recordSled(CurSled, MI, SledKind::FUNCTION_EXIT, 2);
1349 }
1350 
1351 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1352                                              X86MCInstLower &MCIL) {
1353   NoAutoPaddingScope NoPadScope(*OutStreamer);
1354 
1355   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1356   // instruction so we lower that particular instruction and its operands.
1357   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1358   // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1359   // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1360   // tail call much like how we have it in PATCHABLE_RET.
1361   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1362   OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo());
1363   OutStreamer->emitLabel(CurSled);
1364   auto Target = OutContext.createTempSymbol();
1365 
1366   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1367   // an operand (computed as an offset from the jmp instruction).
1368   // FIXME: Find another less hacky way do force the relative jump.
1369   OutStreamer->emitBytes("\xeb\x09");
1370   emitX86Nops(*OutStreamer, 9, Subtarget);
1371   OutStreamer->emitLabel(Target);
1372   recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
1373 
1374   unsigned OpCode = MI.getOperand(0).getImm();
1375   OpCode = convertTailJumpOpcode(OpCode);
1376   MCInst TC;
1377   TC.setOpcode(OpCode);
1378 
1379   // Before emitting the instruction, add a comment to indicate that this is
1380   // indeed a tail call.
1381   OutStreamer->AddComment("TAILCALL");
1382   for (auto &MO : drop_begin(MI.operands()))
1383     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1384       TC.addOperand(*MaybeOperand);
1385   OutStreamer->emitInstruction(TC, getSubtargetInfo());
1386 }
1387 
1388 // Returns instruction preceding MBBI in MachineFunction.
1389 // If MBBI is the first instruction of the first basic block, returns null.
1390 static MachineBasicBlock::const_iterator
1391 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1392   const MachineBasicBlock *MBB = MBBI->getParent();
1393   while (MBBI == MBB->begin()) {
1394     if (MBB == &MBB->getParent()->front())
1395       return MachineBasicBlock::const_iterator();
1396     MBB = MBB->getPrevNode();
1397     MBBI = MBB->end();
1398   }
1399   --MBBI;
1400   return MBBI;
1401 }
1402 
1403 static const Constant *getConstantFromPool(const MachineInstr &MI,
1404                                            const MachineOperand &Op) {
1405   if (!Op.isCPI() || Op.getOffset() != 0)
1406     return nullptr;
1407 
1408   ArrayRef<MachineConstantPoolEntry> Constants =
1409       MI.getParent()->getParent()->getConstantPool()->getConstants();
1410   const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1411 
1412   // Bail if this is a machine constant pool entry, we won't be able to dig out
1413   // anything useful.
1414   if (ConstantEntry.isMachineConstantPoolEntry())
1415     return nullptr;
1416 
1417   return ConstantEntry.Val.ConstVal;
1418 }
1419 
1420 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1421                                      unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1422   std::string Comment;
1423 
1424   // Compute the name for a register. This is really goofy because we have
1425   // multiple instruction printers that could (in theory) use different
1426   // names. Fortunately most people use the ATT style (outside of Windows)
1427   // and they actually agree on register naming here. Ultimately, this is
1428   // a comment, and so its OK if it isn't perfect.
1429   auto GetRegisterName = [](MCRegister Reg) -> StringRef {
1430     return X86ATTInstPrinter::getRegisterName(Reg);
1431   };
1432 
1433   const MachineOperand &DstOp = MI->getOperand(0);
1434   const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1435   const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1436 
1437   StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1438   StringRef Src1Name =
1439       SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1440   StringRef Src2Name =
1441       SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1442 
1443   // One source operand, fix the mask to print all elements in one span.
1444   SmallVector<int, 8> ShuffleMask(Mask);
1445   if (Src1Name == Src2Name)
1446     for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1447       if (ShuffleMask[i] >= e)
1448         ShuffleMask[i] -= e;
1449 
1450   raw_string_ostream CS(Comment);
1451   CS << DstName;
1452 
1453   // Handle AVX512 MASK/MASXZ write mask comments.
1454   // MASK: zmmX {%kY}
1455   // MASKZ: zmmX {%kY} {z}
1456   if (SrcOp1Idx > 1) {
1457     assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1458 
1459     const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1460     if (WriteMaskOp.isReg()) {
1461       CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1462 
1463       if (SrcOp1Idx == 2) {
1464         CS << " {z}";
1465       }
1466     }
1467   }
1468 
1469   CS << " = ";
1470 
1471   for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1472     if (i != 0)
1473       CS << ",";
1474     if (ShuffleMask[i] == SM_SentinelZero) {
1475       CS << "zero";
1476       continue;
1477     }
1478 
1479     // Otherwise, it must come from src1 or src2.  Print the span of elements
1480     // that comes from this src.
1481     bool isSrc1 = ShuffleMask[i] < (int)e;
1482     CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1483 
1484     bool IsFirst = true;
1485     while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1486            (ShuffleMask[i] < (int)e) == isSrc1) {
1487       if (!IsFirst)
1488         CS << ',';
1489       else
1490         IsFirst = false;
1491       if (ShuffleMask[i] == SM_SentinelUndef)
1492         CS << "u";
1493       else
1494         CS << ShuffleMask[i] % (int)e;
1495       ++i;
1496     }
1497     CS << ']';
1498     --i; // For loop increments element #.
1499   }
1500   CS.flush();
1501 
1502   return Comment;
1503 }
1504 
1505 static void printConstant(const APInt &Val, raw_ostream &CS) {
1506   if (Val.getBitWidth() <= 64) {
1507     CS << Val.getZExtValue();
1508   } else {
1509     // print multi-word constant as (w0,w1)
1510     CS << "(";
1511     for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1512       if (i > 0)
1513         CS << ",";
1514       CS << Val.getRawData()[i];
1515     }
1516     CS << ")";
1517   }
1518 }
1519 
1520 static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1521   SmallString<32> Str;
1522   // Force scientific notation to distinquish from integers.
1523   Flt.toString(Str, 0, 0);
1524   CS << Str;
1525 }
1526 
1527 static void printConstant(const Constant *COp, unsigned BitWidth,
1528                           raw_ostream &CS) {
1529   if (isa<UndefValue>(COp)) {
1530     CS << "u";
1531   } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1532     printConstant(CI->getValue(), CS);
1533   } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1534     printConstant(CF->getValueAPF(), CS);
1535   } else if (auto *CDS = dyn_cast<ConstantDataSequential>(COp)) {
1536     Type *EltTy = CDS->getElementType();
1537     bool IsInteger = EltTy->isIntegerTy();
1538     bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy();
1539     unsigned EltBits = EltTy->getPrimitiveSizeInBits();
1540     unsigned E = std::min(BitWidth / EltBits, CDS->getNumElements());
1541     assert((BitWidth % EltBits) == 0 && "Broadcast element size mismatch");
1542     for (unsigned I = 0; I != E; ++I) {
1543       if (I != 0)
1544         CS << ",";
1545       if (IsInteger)
1546         printConstant(CDS->getElementAsAPInt(I), CS);
1547       else if (IsFP)
1548         printConstant(CDS->getElementAsAPFloat(I), CS);
1549       else
1550         CS << "?";
1551     }
1552   } else {
1553     CS << "?";
1554   }
1555 }
1556 
1557 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1558   assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1559   assert((getSubtarget().isOSWindows() || TM.getTargetTriple().isUEFI()) &&
1560          "SEH_ instruction Windows and UEFI only");
1561 
1562   // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1563   if (EmitFPOData) {
1564     X86TargetStreamer *XTS =
1565         static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1566     switch (MI->getOpcode()) {
1567     case X86::SEH_PushReg:
1568       XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1569       break;
1570     case X86::SEH_StackAlloc:
1571       XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1572       break;
1573     case X86::SEH_StackAlign:
1574       XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1575       break;
1576     case X86::SEH_SetFrame:
1577       assert(MI->getOperand(1).getImm() == 0 &&
1578              ".cv_fpo_setframe takes no offset");
1579       XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1580       break;
1581     case X86::SEH_EndPrologue:
1582       XTS->emitFPOEndPrologue();
1583       break;
1584     case X86::SEH_SaveReg:
1585     case X86::SEH_SaveXMM:
1586     case X86::SEH_PushFrame:
1587       llvm_unreachable("SEH_ directive incompatible with FPO");
1588       break;
1589     default:
1590       llvm_unreachable("expected SEH_ instruction");
1591     }
1592     return;
1593   }
1594 
1595   // Otherwise, use the .seh_ directives for all other Windows platforms.
1596   switch (MI->getOpcode()) {
1597   case X86::SEH_PushReg:
1598     OutStreamer->emitWinCFIPushReg(MI->getOperand(0).getImm());
1599     break;
1600 
1601   case X86::SEH_SaveReg:
1602     OutStreamer->emitWinCFISaveReg(MI->getOperand(0).getImm(),
1603                                    MI->getOperand(1).getImm());
1604     break;
1605 
1606   case X86::SEH_SaveXMM:
1607     OutStreamer->emitWinCFISaveXMM(MI->getOperand(0).getImm(),
1608                                    MI->getOperand(1).getImm());
1609     break;
1610 
1611   case X86::SEH_StackAlloc:
1612     OutStreamer->emitWinCFIAllocStack(MI->getOperand(0).getImm());
1613     break;
1614 
1615   case X86::SEH_SetFrame:
1616     OutStreamer->emitWinCFISetFrame(MI->getOperand(0).getImm(),
1617                                     MI->getOperand(1).getImm());
1618     break;
1619 
1620   case X86::SEH_PushFrame:
1621     OutStreamer->emitWinCFIPushFrame(MI->getOperand(0).getImm());
1622     break;
1623 
1624   case X86::SEH_EndPrologue:
1625     OutStreamer->emitWinCFIEndProlog();
1626     break;
1627 
1628   default:
1629     llvm_unreachable("expected SEH_ instruction");
1630   }
1631 }
1632 
1633 static unsigned getRegisterWidth(const MCOperandInfo &Info) {
1634   if (Info.RegClass == X86::VR128RegClassID ||
1635       Info.RegClass == X86::VR128XRegClassID)
1636     return 128;
1637   if (Info.RegClass == X86::VR256RegClassID ||
1638       Info.RegClass == X86::VR256XRegClassID)
1639     return 256;
1640   if (Info.RegClass == X86::VR512RegClassID)
1641     return 512;
1642   llvm_unreachable("Unknown register class!");
1643 }
1644 
1645 static void addConstantComments(const MachineInstr *MI,
1646                                 MCStreamer &OutStreamer) {
1647   switch (MI->getOpcode()) {
1648   // Lower PSHUFB and VPERMILP normally but add a comment if we can find
1649   // a constant shuffle mask. We won't be able to do this at the MC layer
1650   // because the mask isn't an immediate.
1651   case X86::PSHUFBrm:
1652   case X86::VPSHUFBrm:
1653   case X86::VPSHUFBYrm:
1654   case X86::VPSHUFBZ128rm:
1655   case X86::VPSHUFBZ128rmk:
1656   case X86::VPSHUFBZ128rmkz:
1657   case X86::VPSHUFBZ256rm:
1658   case X86::VPSHUFBZ256rmk:
1659   case X86::VPSHUFBZ256rmkz:
1660   case X86::VPSHUFBZrm:
1661   case X86::VPSHUFBZrmk:
1662   case X86::VPSHUFBZrmkz: {
1663     unsigned SrcIdx = 1;
1664     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
1665       // Skip mask operand.
1666       ++SrcIdx;
1667       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
1668         // Skip passthru operand.
1669         ++SrcIdx;
1670       }
1671     }
1672     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
1673 
1674     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
1675            "Unexpected number of operands!");
1676 
1677     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
1678     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1679       unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]);
1680       SmallVector<int, 64> Mask;
1681       DecodePSHUFBMask(C, Width, Mask);
1682       if (!Mask.empty())
1683         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
1684     }
1685     break;
1686   }
1687 
1688   case X86::VPERMILPSrm:
1689   case X86::VPERMILPSYrm:
1690   case X86::VPERMILPSZ128rm:
1691   case X86::VPERMILPSZ128rmk:
1692   case X86::VPERMILPSZ128rmkz:
1693   case X86::VPERMILPSZ256rm:
1694   case X86::VPERMILPSZ256rmk:
1695   case X86::VPERMILPSZ256rmkz:
1696   case X86::VPERMILPSZrm:
1697   case X86::VPERMILPSZrmk:
1698   case X86::VPERMILPSZrmkz:
1699   case X86::VPERMILPDrm:
1700   case X86::VPERMILPDYrm:
1701   case X86::VPERMILPDZ128rm:
1702   case X86::VPERMILPDZ128rmk:
1703   case X86::VPERMILPDZ128rmkz:
1704   case X86::VPERMILPDZ256rm:
1705   case X86::VPERMILPDZ256rmk:
1706   case X86::VPERMILPDZ256rmkz:
1707   case X86::VPERMILPDZrm:
1708   case X86::VPERMILPDZrmk:
1709   case X86::VPERMILPDZrmkz: {
1710     unsigned ElSize;
1711     switch (MI->getOpcode()) {
1712     default: llvm_unreachable("Invalid opcode");
1713     case X86::VPERMILPSrm:
1714     case X86::VPERMILPSYrm:
1715     case X86::VPERMILPSZ128rm:
1716     case X86::VPERMILPSZ256rm:
1717     case X86::VPERMILPSZrm:
1718     case X86::VPERMILPSZ128rmkz:
1719     case X86::VPERMILPSZ256rmkz:
1720     case X86::VPERMILPSZrmkz:
1721     case X86::VPERMILPSZ128rmk:
1722     case X86::VPERMILPSZ256rmk:
1723     case X86::VPERMILPSZrmk:
1724       ElSize = 32;
1725       break;
1726     case X86::VPERMILPDrm:
1727     case X86::VPERMILPDYrm:
1728     case X86::VPERMILPDZ128rm:
1729     case X86::VPERMILPDZ256rm:
1730     case X86::VPERMILPDZrm:
1731     case X86::VPERMILPDZ128rmkz:
1732     case X86::VPERMILPDZ256rmkz:
1733     case X86::VPERMILPDZrmkz:
1734     case X86::VPERMILPDZ128rmk:
1735     case X86::VPERMILPDZ256rmk:
1736     case X86::VPERMILPDZrmk:
1737       ElSize = 64;
1738       break;
1739     }
1740 
1741     unsigned SrcIdx = 1;
1742     if (X86II::isKMasked(MI->getDesc().TSFlags)) {
1743       // Skip mask operand.
1744       ++SrcIdx;
1745       if (X86II::isKMergeMasked(MI->getDesc().TSFlags)) {
1746         // Skip passthru operand.
1747         ++SrcIdx;
1748       }
1749     }
1750     unsigned MaskIdx = SrcIdx + 1 + X86::AddrDisp;
1751 
1752     assert(MI->getNumOperands() >= (SrcIdx + 1 + X86::AddrNumOperands) &&
1753            "Unexpected number of operands!");
1754 
1755     const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
1756     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1757       unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]);
1758       SmallVector<int, 16> Mask;
1759       DecodeVPERMILPMask(C, ElSize, Width, Mask);
1760       if (!Mask.empty())
1761         OutStreamer.AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
1762     }
1763     break;
1764   }
1765 
1766   case X86::VPERMIL2PDrm:
1767   case X86::VPERMIL2PSrm:
1768   case X86::VPERMIL2PDYrm:
1769   case X86::VPERMIL2PSYrm: {
1770     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands + 1) &&
1771            "Unexpected number of operands!");
1772 
1773     const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
1774     if (!CtrlOp.isImm())
1775       break;
1776 
1777     unsigned ElSize;
1778     switch (MI->getOpcode()) {
1779     default: llvm_unreachable("Invalid opcode");
1780     case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
1781     case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
1782     }
1783 
1784     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
1785     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1786       unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]);
1787       SmallVector<int, 16> Mask;
1788       DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
1789       if (!Mask.empty())
1790         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
1791     }
1792     break;
1793   }
1794 
1795   case X86::VPPERMrrm: {
1796     assert(MI->getNumOperands() >= (3 + X86::AddrNumOperands) &&
1797            "Unexpected number of operands!");
1798 
1799     const MachineOperand &MaskOp = MI->getOperand(3 + X86::AddrDisp);
1800     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1801       unsigned Width = getRegisterWidth(MI->getDesc().operands()[0]);
1802       SmallVector<int, 16> Mask;
1803       DecodeVPPERMMask(C, Width, Mask);
1804       if (!Mask.empty())
1805         OutStreamer.AddComment(getShuffleComment(MI, 1, 2, Mask));
1806     }
1807     break;
1808   }
1809 
1810   case X86::MMX_MOVQ64rm: {
1811     assert(MI->getNumOperands() == (1 + X86::AddrNumOperands) &&
1812            "Unexpected number of operands!");
1813     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
1814       std::string Comment;
1815       raw_string_ostream CS(Comment);
1816       const MachineOperand &DstOp = MI->getOperand(0);
1817       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
1818       if (auto *CF = dyn_cast<ConstantFP>(C)) {
1819         CS << "0x" << toString(CF->getValueAPF().bitcastToAPInt(), 16, false);
1820         OutStreamer.AddComment(CS.str());
1821       }
1822     }
1823     break;
1824   }
1825 
1826 #define MOV_CASE(Prefix, Suffix)                                               \
1827   case X86::Prefix##MOVAPD##Suffix##rm:                                        \
1828   case X86::Prefix##MOVAPS##Suffix##rm:                                        \
1829   case X86::Prefix##MOVUPD##Suffix##rm:                                        \
1830   case X86::Prefix##MOVUPS##Suffix##rm:                                        \
1831   case X86::Prefix##MOVDQA##Suffix##rm:                                        \
1832   case X86::Prefix##MOVDQU##Suffix##rm:
1833 
1834 #define MOV_AVX512_CASE(Suffix)                                                \
1835   case X86::VMOVDQA64##Suffix##rm:                                             \
1836   case X86::VMOVDQA32##Suffix##rm:                                             \
1837   case X86::VMOVDQU64##Suffix##rm:                                             \
1838   case X86::VMOVDQU32##Suffix##rm:                                             \
1839   case X86::VMOVDQU16##Suffix##rm:                                             \
1840   case X86::VMOVDQU8##Suffix##rm:                                              \
1841   case X86::VMOVAPS##Suffix##rm:                                               \
1842   case X86::VMOVAPD##Suffix##rm:                                               \
1843   case X86::VMOVUPS##Suffix##rm:                                               \
1844   case X86::VMOVUPD##Suffix##rm:
1845 
1846 #define CASE_ALL_MOV_RM()                                                      \
1847   MOV_CASE(, )   /* SSE */                                                     \
1848   MOV_CASE(V, )  /* AVX-128 */                                                 \
1849   MOV_CASE(V, Y) /* AVX-256 */                                                 \
1850   MOV_AVX512_CASE(Z)                                                           \
1851   MOV_AVX512_CASE(Z256)                                                        \
1852   MOV_AVX512_CASE(Z128)
1853 
1854     // For loads from a constant pool to a vector register, print the constant
1855     // loaded.
1856     CASE_ALL_MOV_RM()
1857   case X86::VBROADCASTF128:
1858   case X86::VBROADCASTI128:
1859   case X86::VBROADCASTF32X4Z256rm:
1860   case X86::VBROADCASTF32X4rm:
1861   case X86::VBROADCASTF32X8rm:
1862   case X86::VBROADCASTF64X2Z128rm:
1863   case X86::VBROADCASTF64X2rm:
1864   case X86::VBROADCASTF64X4rm:
1865   case X86::VBROADCASTI32X4Z256rm:
1866   case X86::VBROADCASTI32X4rm:
1867   case X86::VBROADCASTI32X8rm:
1868   case X86::VBROADCASTI64X2Z128rm:
1869   case X86::VBROADCASTI64X2rm:
1870   case X86::VBROADCASTI64X4rm:
1871     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
1872            "Unexpected number of operands!");
1873     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
1874       int NumLanes = 1;
1875       // Override NumLanes for the broadcast instructions.
1876       switch (MI->getOpcode()) {
1877       case X86::VBROADCASTF128:        NumLanes = 2; break;
1878       case X86::VBROADCASTI128:        NumLanes = 2; break;
1879       case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
1880       case X86::VBROADCASTF32X4rm:     NumLanes = 4; break;
1881       case X86::VBROADCASTF32X8rm:     NumLanes = 2; break;
1882       case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
1883       case X86::VBROADCASTF64X2rm:     NumLanes = 4; break;
1884       case X86::VBROADCASTF64X4rm:     NumLanes = 2; break;
1885       case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
1886       case X86::VBROADCASTI32X4rm:     NumLanes = 4; break;
1887       case X86::VBROADCASTI32X8rm:     NumLanes = 2; break;
1888       case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
1889       case X86::VBROADCASTI64X2rm:     NumLanes = 4; break;
1890       case X86::VBROADCASTI64X4rm:     NumLanes = 2; break;
1891       }
1892 
1893       std::string Comment;
1894       raw_string_ostream CS(Comment);
1895       const MachineOperand &DstOp = MI->getOperand(0);
1896       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
1897       if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
1898         CS << "[";
1899         for (int l = 0; l != NumLanes; ++l) {
1900           for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
1901                ++i) {
1902             if (i != 0 || l != 0)
1903               CS << ",";
1904             if (CDS->getElementType()->isIntegerTy())
1905               printConstant(CDS->getElementAsAPInt(i), CS);
1906             else if (CDS->getElementType()->isHalfTy() ||
1907                      CDS->getElementType()->isFloatTy() ||
1908                      CDS->getElementType()->isDoubleTy())
1909               printConstant(CDS->getElementAsAPFloat(i), CS);
1910             else
1911               CS << "?";
1912           }
1913         }
1914         CS << "]";
1915         OutStreamer.AddComment(CS.str());
1916       } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
1917         CS << "<";
1918         for (int l = 0; l != NumLanes; ++l) {
1919           for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
1920                ++i) {
1921             if (i != 0 || l != 0)
1922               CS << ",";
1923             printConstant(CV->getOperand(i),
1924                           CV->getType()->getPrimitiveSizeInBits(), CS);
1925           }
1926         }
1927         CS << ">";
1928         OutStreamer.AddComment(CS.str());
1929       }
1930     }
1931     break;
1932 
1933   case X86::MOVDDUPrm:
1934   case X86::VMOVDDUPrm:
1935   case X86::VMOVDDUPZ128rm:
1936   case X86::VBROADCASTSSrm:
1937   case X86::VBROADCASTSSYrm:
1938   case X86::VBROADCASTSSZ128rm:
1939   case X86::VBROADCASTSSZ256rm:
1940   case X86::VBROADCASTSSZrm:
1941   case X86::VBROADCASTSDYrm:
1942   case X86::VBROADCASTSDZ256rm:
1943   case X86::VBROADCASTSDZrm:
1944   case X86::VPBROADCASTBrm:
1945   case X86::VPBROADCASTBYrm:
1946   case X86::VPBROADCASTBZ128rm:
1947   case X86::VPBROADCASTBZ256rm:
1948   case X86::VPBROADCASTBZrm:
1949   case X86::VPBROADCASTDrm:
1950   case X86::VPBROADCASTDYrm:
1951   case X86::VPBROADCASTDZ128rm:
1952   case X86::VPBROADCASTDZ256rm:
1953   case X86::VPBROADCASTDZrm:
1954   case X86::VPBROADCASTQrm:
1955   case X86::VPBROADCASTQYrm:
1956   case X86::VPBROADCASTQZ128rm:
1957   case X86::VPBROADCASTQZ256rm:
1958   case X86::VPBROADCASTQZrm:
1959   case X86::VPBROADCASTWrm:
1960   case X86::VPBROADCASTWYrm:
1961   case X86::VPBROADCASTWZ128rm:
1962   case X86::VPBROADCASTWZ256rm:
1963   case X86::VPBROADCASTWZrm:
1964     assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
1965            "Unexpected number of operands!");
1966     if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
1967       int NumElts, EltBits;
1968       switch (MI->getOpcode()) {
1969       default: llvm_unreachable("Invalid opcode");
1970       case X86::MOVDDUPrm:          NumElts = 2;  EltBits = 64; break;
1971       case X86::VMOVDDUPrm:         NumElts = 2;  EltBits = 64; break;
1972       case X86::VMOVDDUPZ128rm:     NumElts = 2;  EltBits = 64; break;
1973       case X86::VBROADCASTSSrm:     NumElts = 4;  EltBits = 32; break;
1974       case X86::VBROADCASTSSYrm:    NumElts = 8;  EltBits = 32; break;
1975       case X86::VBROADCASTSSZ128rm: NumElts = 4;  EltBits = 32; break;
1976       case X86::VBROADCASTSSZ256rm: NumElts = 8;  EltBits = 32; break;
1977       case X86::VBROADCASTSSZrm:    NumElts = 16; EltBits = 32; break;
1978       case X86::VBROADCASTSDYrm:    NumElts = 4;  EltBits = 64; break;
1979       case X86::VBROADCASTSDZ256rm: NumElts = 4;  EltBits = 64; break;
1980       case X86::VBROADCASTSDZrm:    NumElts = 8;  EltBits = 64; break;
1981       case X86::VPBROADCASTBrm:     NumElts = 16; EltBits = 8; break;
1982       case X86::VPBROADCASTBYrm:    NumElts = 32; EltBits = 8; break;
1983       case X86::VPBROADCASTBZ128rm: NumElts = 16; EltBits = 8; break;
1984       case X86::VPBROADCASTBZ256rm: NumElts = 32; EltBits = 8; break;
1985       case X86::VPBROADCASTBZrm:    NumElts = 64; EltBits = 8; break;
1986       case X86::VPBROADCASTDrm:     NumElts = 4;  EltBits = 32; break;
1987       case X86::VPBROADCASTDYrm:    NumElts = 8;  EltBits = 32; break;
1988       case X86::VPBROADCASTDZ128rm: NumElts = 4;  EltBits = 32; break;
1989       case X86::VPBROADCASTDZ256rm: NumElts = 8;  EltBits = 32; break;
1990       case X86::VPBROADCASTDZrm:    NumElts = 16; EltBits = 32; break;
1991       case X86::VPBROADCASTQrm:     NumElts = 2;  EltBits = 64; break;
1992       case X86::VPBROADCASTQYrm:    NumElts = 4;  EltBits = 64; break;
1993       case X86::VPBROADCASTQZ128rm: NumElts = 2;  EltBits = 64; break;
1994       case X86::VPBROADCASTQZ256rm: NumElts = 4;  EltBits = 64; break;
1995       case X86::VPBROADCASTQZrm:    NumElts = 8;  EltBits = 64; break;
1996       case X86::VPBROADCASTWrm:     NumElts = 8;  EltBits = 16; break;
1997       case X86::VPBROADCASTWYrm:    NumElts = 16; EltBits = 16; break;
1998       case X86::VPBROADCASTWZ128rm: NumElts = 8;  EltBits = 16; break;
1999       case X86::VPBROADCASTWZ256rm: NumElts = 16; EltBits = 16; break;
2000       case X86::VPBROADCASTWZrm:    NumElts = 32; EltBits = 16; break;
2001       }
2002 
2003       std::string Comment;
2004       raw_string_ostream CS(Comment);
2005       const MachineOperand &DstOp = MI->getOperand(0);
2006       CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2007       CS << "[";
2008       for (int i = 0; i != NumElts; ++i) {
2009         if (i != 0)
2010           CS << ",";
2011         printConstant(C, EltBits, CS);
2012       }
2013       CS << "]";
2014       OutStreamer.AddComment(CS.str());
2015     }
2016   }
2017 }
2018 
2019 void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2020   // FIXME: Enable feature predicate checks once all the test pass.
2021   // X86_MC::verifyInstructionPredicates(MI->getOpcode(),
2022   //                                     Subtarget->getFeatureBits());
2023 
2024   X86MCInstLower MCInstLowering(*MF, *this);
2025   const X86RegisterInfo *RI =
2026       MF->getSubtarget<X86Subtarget>().getRegisterInfo();
2027 
2028   if (MI->getOpcode() == X86::OR64rm) {
2029     for (auto &Opd : MI->operands()) {
2030       if (Opd.isSymbol() && StringRef(Opd.getSymbolName()) ==
2031                                 "swift_async_extendedFramePointerFlags") {
2032         ShouldEmitWeakSwiftAsyncExtendedFramePointerFlags = true;
2033       }
2034     }
2035   }
2036 
2037   // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
2038   // are compressed from EVEX encoding to VEX encoding.
2039   if (TM.Options.MCOptions.ShowMCEncoding) {
2040     if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
2041       OutStreamer->AddComment("EVEX TO VEX Compression ", false);
2042   }
2043 
2044   // Add comments for values loaded from constant pool.
2045   if (OutStreamer->isVerboseAsm())
2046     addConstantComments(MI, *OutStreamer);
2047 
2048   switch (MI->getOpcode()) {
2049   case TargetOpcode::DBG_VALUE:
2050     llvm_unreachable("Should be handled target independently");
2051 
2052   case X86::EH_RETURN:
2053   case X86::EH_RETURN64: {
2054     // Lower these as normal, but add some comments.
2055     Register Reg = MI->getOperand(0).getReg();
2056     OutStreamer->AddComment(StringRef("eh_return, addr: %") +
2057                             X86ATTInstPrinter::getRegisterName(Reg));
2058     break;
2059   }
2060   case X86::CLEANUPRET: {
2061     // Lower these as normal, but add some comments.
2062     OutStreamer->AddComment("CLEANUPRET");
2063     break;
2064   }
2065 
2066   case X86::CATCHRET: {
2067     // Lower these as normal, but add some comments.
2068     OutStreamer->AddComment("CATCHRET");
2069     break;
2070   }
2071 
2072   case X86::ENDBR32:
2073   case X86::ENDBR64: {
2074     // CurrentPatchableFunctionEntrySym can be CurrentFnBegin only for
2075     // -fpatchable-function-entry=N,0. The entry MBB is guaranteed to be
2076     // non-empty. If MI is the initial ENDBR, place the
2077     // __patchable_function_entries label after ENDBR.
2078     if (CurrentPatchableFunctionEntrySym &&
2079         CurrentPatchableFunctionEntrySym == CurrentFnBegin &&
2080         MI == &MF->front().front()) {
2081       MCInst Inst;
2082       MCInstLowering.Lower(MI, Inst);
2083       EmitAndCountInstruction(Inst);
2084       CurrentPatchableFunctionEntrySym = createTempSymbol("patch");
2085       OutStreamer->emitLabel(CurrentPatchableFunctionEntrySym);
2086       return;
2087     }
2088     break;
2089   }
2090 
2091   case X86::TAILJMPd64:
2092     if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
2093       EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
2094     [[fallthrough]];
2095   case X86::TAILJMPr:
2096   case X86::TAILJMPm:
2097   case X86::TAILJMPd:
2098   case X86::TAILJMPd_CC:
2099   case X86::TAILJMPr64:
2100   case X86::TAILJMPm64:
2101   case X86::TAILJMPd64_CC:
2102   case X86::TAILJMPr64_REX:
2103   case X86::TAILJMPm64_REX:
2104     // Lower these as normal, but add some comments.
2105     OutStreamer->AddComment("TAILCALL");
2106     break;
2107 
2108   case X86::TLS_addr32:
2109   case X86::TLS_addr64:
2110   case X86::TLS_addrX32:
2111   case X86::TLS_base_addr32:
2112   case X86::TLS_base_addr64:
2113   case X86::TLS_base_addrX32:
2114     return LowerTlsAddr(MCInstLowering, *MI);
2115 
2116   case X86::MOVPC32r: {
2117     // This is a pseudo op for a two instruction sequence with a label, which
2118     // looks like:
2119     //     call "L1$pb"
2120     // "L1$pb":
2121     //     popl %esi
2122 
2123     // Emit the call.
2124     MCSymbol *PICBase = MF->getPICBaseSymbol();
2125     // FIXME: We would like an efficient form for this, so we don't have to do a
2126     // lot of extra uniquing.
2127     EmitAndCountInstruction(
2128         MCInstBuilder(X86::CALLpcrel32)
2129             .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
2130 
2131     const X86FrameLowering *FrameLowering =
2132         MF->getSubtarget<X86Subtarget>().getFrameLowering();
2133     bool hasFP = FrameLowering->hasFP(*MF);
2134 
2135     // TODO: This is needed only if we require precise CFA.
2136     bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
2137                                !OutStreamer->getDwarfFrameInfos().back().End;
2138 
2139     int stackGrowth = -RI->getSlotSize();
2140 
2141     if (HasActiveDwarfFrame && !hasFP) {
2142       OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
2143       MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
2144     }
2145 
2146     // Emit the label.
2147     OutStreamer->emitLabel(PICBase);
2148 
2149     // popl $reg
2150     EmitAndCountInstruction(
2151         MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
2152 
2153     if (HasActiveDwarfFrame && !hasFP) {
2154       OutStreamer->emitCFIAdjustCfaOffset(stackGrowth);
2155     }
2156     return;
2157   }
2158 
2159   case X86::ADD32ri: {
2160     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
2161     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
2162       break;
2163 
2164     // Okay, we have something like:
2165     //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
2166 
2167     // For this, we want to print something like:
2168     //   MYGLOBAL + (. - PICBASE)
2169     // However, we can't generate a ".", so just emit a new label here and refer
2170     // to it.
2171     MCSymbol *DotSym = OutContext.createTempSymbol();
2172     OutStreamer->emitLabel(DotSym);
2173 
2174     // Now that we have emitted the label, lower the complex operand expression.
2175     MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
2176 
2177     const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
2178     const MCExpr *PICBase =
2179         MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
2180     DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
2181 
2182     DotExpr = MCBinaryExpr::createAdd(
2183         MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
2184 
2185     EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
2186                                 .addReg(MI->getOperand(0).getReg())
2187                                 .addReg(MI->getOperand(1).getReg())
2188                                 .addExpr(DotExpr));
2189     return;
2190   }
2191   case TargetOpcode::STATEPOINT:
2192     return LowerSTATEPOINT(*MI, MCInstLowering);
2193 
2194   case TargetOpcode::FAULTING_OP:
2195     return LowerFAULTING_OP(*MI, MCInstLowering);
2196 
2197   case TargetOpcode::FENTRY_CALL:
2198     return LowerFENTRY_CALL(*MI, MCInstLowering);
2199 
2200   case TargetOpcode::PATCHABLE_OP:
2201     return LowerPATCHABLE_OP(*MI, MCInstLowering);
2202 
2203   case TargetOpcode::STACKMAP:
2204     return LowerSTACKMAP(*MI);
2205 
2206   case TargetOpcode::PATCHPOINT:
2207     return LowerPATCHPOINT(*MI, MCInstLowering);
2208 
2209   case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
2210     return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
2211 
2212   case TargetOpcode::PATCHABLE_RET:
2213     return LowerPATCHABLE_RET(*MI, MCInstLowering);
2214 
2215   case TargetOpcode::PATCHABLE_TAIL_CALL:
2216     return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
2217 
2218   case TargetOpcode::PATCHABLE_EVENT_CALL:
2219     return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
2220 
2221   case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
2222     return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
2223 
2224   case X86::MORESTACK_RET:
2225     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2226     return;
2227 
2228   case X86::KCFI_CHECK:
2229     return LowerKCFI_CHECK(*MI);
2230 
2231   case X86::ASAN_CHECK_MEMACCESS:
2232     return LowerASAN_CHECK_MEMACCESS(*MI);
2233 
2234   case X86::MORESTACK_RET_RESTORE_R10:
2235     // Return, then restore R10.
2236     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
2237     EmitAndCountInstruction(
2238         MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
2239     return;
2240 
2241   case X86::SEH_PushReg:
2242   case X86::SEH_SaveReg:
2243   case X86::SEH_SaveXMM:
2244   case X86::SEH_StackAlloc:
2245   case X86::SEH_StackAlign:
2246   case X86::SEH_SetFrame:
2247   case X86::SEH_PushFrame:
2248   case X86::SEH_EndPrologue:
2249     EmitSEHInstruction(MI);
2250     return;
2251 
2252   case X86::SEH_Epilogue: {
2253     assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
2254     MachineBasicBlock::const_iterator MBBI(MI);
2255     // Check if preceded by a call and emit nop if so.
2256     for (MBBI = PrevCrossBBInst(MBBI);
2257          MBBI != MachineBasicBlock::const_iterator();
2258          MBBI = PrevCrossBBInst(MBBI)) {
2259       // Pseudo instructions that aren't a call are assumed to not emit any
2260       // code. If they do, we worst case generate unnecessary noops after a
2261       // call.
2262       if (MBBI->isCall() || !MBBI->isPseudo()) {
2263         if (MBBI->isCall())
2264           EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2265         break;
2266       }
2267     }
2268     return;
2269   }
2270   case X86::UBSAN_UD1:
2271     EmitAndCountInstruction(MCInstBuilder(X86::UD1Lm)
2272                                 .addReg(X86::EAX)
2273                                 .addReg(X86::EAX)
2274                                 .addImm(1)
2275                                 .addReg(X86::NoRegister)
2276                                 .addImm(MI->getOperand(0).getImm())
2277                                 .addReg(X86::NoRegister));
2278     return;
2279   case X86::CALL64pcrel32:
2280     if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
2281       EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
2282     break;
2283   }
2284 
2285   MCInst TmpInst;
2286   MCInstLowering.Lower(MI, TmpInst);
2287 
2288   // Stackmap shadows cannot include branch targets, so we can count the bytes
2289   // in a call towards the shadow, but must ensure that the no thread returns
2290   // in to the stackmap shadow.  The only way to achieve this is if the call
2291   // is at the end of the shadow.
2292   if (MI->isCall()) {
2293     // Count then size of the call towards the shadow
2294     SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2295     // Then flush the shadow so that we fill with nops before the call, not
2296     // after it.
2297     SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2298     // Then emit the call
2299     OutStreamer->emitInstruction(TmpInst, getSubtargetInfo());
2300     return;
2301   }
2302 
2303   EmitAndCountInstruction(TmpInst);
2304 }
2305