xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (revision e9a994639b2af232f994ba2ad23ca45a17718d2b)
1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/BinaryFormat/ELF.h"
13 #include "llvm/BinaryFormat/MachO.h"
14 #include "llvm/MC/MCAsmBackend.h"
15 #include "llvm/MC/MCAsmLayout.h"
16 #include "llvm/MC/MCAssembler.h"
17 #include "llvm/MC/MCCodeEmitter.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCDwarf.h"
20 #include "llvm/MC/MCELFObjectWriter.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCFixupKindInfo.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCMachObjectWriter.h"
26 #include "llvm/MC/MCObjectStreamer.h"
27 #include "llvm/MC/MCObjectWriter.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionMachO.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCValue.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
36 
37 using namespace llvm;
38 
39 namespace {
40 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
41 class X86AlignBranchKind {
42 private:
43   uint8_t AlignBranchKind = 0;
44 
45 public:
46   void operator=(const std::string &Val) {
47     if (Val.empty())
48       return;
49     SmallVector<StringRef, 6> BranchTypes;
50     StringRef(Val).split(BranchTypes, '+', -1, false);
51     for (auto BranchType : BranchTypes) {
52       if (BranchType == "fused")
53         addKind(X86::AlignBranchFused);
54       else if (BranchType == "jcc")
55         addKind(X86::AlignBranchJcc);
56       else if (BranchType == "jmp")
57         addKind(X86::AlignBranchJmp);
58       else if (BranchType == "call")
59         addKind(X86::AlignBranchCall);
60       else if (BranchType == "ret")
61         addKind(X86::AlignBranchRet);
62       else if (BranchType == "indirect")
63         addKind(X86::AlignBranchIndirect);
64       else {
65         errs() << "invalid argument " << BranchType.str()
66                << " to -x86-align-branch=; each element must be one of: fused, "
67                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
68       }
69     }
70   }
71 
72   operator uint8_t() const { return AlignBranchKind; }
73   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
74 };
75 
76 X86AlignBranchKind X86AlignBranchKindLoc;
77 
78 cl::opt<unsigned> X86AlignBranchBoundary(
79     "x86-align-branch-boundary", cl::init(0),
80     cl::desc(
81         "Control how the assembler should align branches with NOP. If the "
82         "boundary's size is not 0, it should be a power of 2 and no less "
83         "than 32. Branches will be aligned to prevent from being across or "
84         "against the boundary of specified size. The default value 0 does not "
85         "align branches."));
86 
87 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
88     "x86-align-branch",
89     cl::desc(
90         "Specify types of branches to align (plus separated list of types):"
91              "\njcc      indicates conditional jumps"
92              "\nfused    indicates fused conditional jumps"
93              "\njmp      indicates direct unconditional jumps"
94              "\ncall     indicates direct and indirect calls"
95              "\nret      indicates rets"
96              "\nindirect indicates indirect unconditional jumps"),
97     cl::location(X86AlignBranchKindLoc));
98 
99 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
100     "x86-branches-within-32B-boundaries", cl::init(false),
101     cl::desc(
102         "Align selected instructions to mitigate negative performance impact "
103         "of Intel's micro code update for errata skx102.  May break "
104         "assumptions about labels corresponding to particular instructions, "
105         "and should be used with caution."));
106 
107 cl::opt<unsigned> X86PadMaxPrefixSize(
108     "x86-pad-max-prefix-size", cl::init(0),
109     cl::desc("Maximum number of prefixes to use for padding"));
110 
111 cl::opt<bool> X86PadForAlign(
112     "x86-pad-for-align", cl::init(false), cl::Hidden,
113     cl::desc("Pad previous instructions to implement align directives"));
114 
115 cl::opt<bool> X86PadForBranchAlign(
116     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
117     cl::desc("Pad previous instructions to implement branch alignment"));
118 
119 class X86ELFObjectWriter : public MCELFObjectTargetWriter {
120 public:
121   X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
122                      bool HasRelocationAddend, bool foobar)
123     : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {}
124 };
125 
126 class X86AsmBackend : public MCAsmBackend {
127   const MCSubtargetInfo &STI;
128   std::unique_ptr<const MCInstrInfo> MCII;
129   X86AlignBranchKind AlignBranchType;
130   Align AlignBoundary;
131   unsigned TargetPrefixMax = 0;
132 
133   MCInst PrevInst;
134   MCBoundaryAlignFragment *PendingBA = nullptr;
135   std::pair<MCFragment *, size_t> PrevInstPosition;
136   bool CanPadInst;
137 
138   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
139   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
140   bool needAlign(const MCInst &Inst) const;
141   bool canPadBranches(MCObjectStreamer &OS) const;
142   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
143 
144 public:
145   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
146       : MCAsmBackend(support::little), STI(STI),
147         MCII(T.createMCInstrInfo()) {
148     if (X86AlignBranchWithin32BBoundaries) {
149       // At the moment, this defaults to aligning fused branches, unconditional
150       // jumps, and (unfused) conditional jumps with nops.  Both the
151       // instructions aligned and the alignment method (nop vs prefix) may
152       // change in the future.
153       AlignBoundary = assumeAligned(32);;
154       AlignBranchType.addKind(X86::AlignBranchFused);
155       AlignBranchType.addKind(X86::AlignBranchJcc);
156       AlignBranchType.addKind(X86::AlignBranchJmp);
157     }
158     // Allow overriding defaults set by master flag
159     if (X86AlignBranchBoundary.getNumOccurrences())
160       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
161     if (X86AlignBranch.getNumOccurrences())
162       AlignBranchType = X86AlignBranchKindLoc;
163     if (X86PadMaxPrefixSize.getNumOccurrences())
164       TargetPrefixMax = X86PadMaxPrefixSize;
165   }
166 
167   bool allowAutoPadding() const override;
168   bool allowEnhancedRelaxation() const override;
169   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
170   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
171 
172   unsigned getNumFixupKinds() const override {
173     return X86::NumTargetFixupKinds;
174   }
175 
176   Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
177 
178   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
179 
180   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
181                              const MCValue &Target) override;
182 
183   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
184                   const MCValue &Target, MutableArrayRef<char> Data,
185                   uint64_t Value, bool IsResolved,
186                   const MCSubtargetInfo *STI) const override;
187 
188   bool mayNeedRelaxation(const MCInst &Inst,
189                          const MCSubtargetInfo &STI) const override;
190 
191   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
192                             const MCRelaxableFragment *DF,
193                             const MCAsmLayout &Layout) const override;
194 
195   void relaxInstruction(MCInst &Inst,
196                         const MCSubtargetInfo &STI) const override;
197 
198   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
199                                    MCCodeEmitter &Emitter,
200                                    unsigned &RemainingSize) const;
201 
202   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
203                                unsigned &RemainingSize) const;
204 
205   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
206                               unsigned &RemainingSize) const;
207 
208   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
209 
210   unsigned getMaximumNopSize() const override;
211 
212   bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
213 };
214 } // end anonymous namespace
215 
216 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
217   unsigned Op = Inst.getOpcode();
218   switch (Op) {
219   default:
220     return Op;
221   case X86::JCC_1:
222     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
223   case X86::JMP_1:
224     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
225   }
226 }
227 
228 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
229   unsigned Op = Inst.getOpcode();
230   switch (Op) {
231   default:
232     return Op;
233 
234     // IMUL
235   case X86::IMUL16rri8: return X86::IMUL16rri;
236   case X86::IMUL16rmi8: return X86::IMUL16rmi;
237   case X86::IMUL32rri8: return X86::IMUL32rri;
238   case X86::IMUL32rmi8: return X86::IMUL32rmi;
239   case X86::IMUL64rri8: return X86::IMUL64rri32;
240   case X86::IMUL64rmi8: return X86::IMUL64rmi32;
241 
242     // AND
243   case X86::AND16ri8: return X86::AND16ri;
244   case X86::AND16mi8: return X86::AND16mi;
245   case X86::AND32ri8: return X86::AND32ri;
246   case X86::AND32mi8: return X86::AND32mi;
247   case X86::AND64ri8: return X86::AND64ri32;
248   case X86::AND64mi8: return X86::AND64mi32;
249 
250     // OR
251   case X86::OR16ri8: return X86::OR16ri;
252   case X86::OR16mi8: return X86::OR16mi;
253   case X86::OR32ri8: return X86::OR32ri;
254   case X86::OR32mi8: return X86::OR32mi;
255   case X86::OR64ri8: return X86::OR64ri32;
256   case X86::OR64mi8: return X86::OR64mi32;
257 
258     // XOR
259   case X86::XOR16ri8: return X86::XOR16ri;
260   case X86::XOR16mi8: return X86::XOR16mi;
261   case X86::XOR32ri8: return X86::XOR32ri;
262   case X86::XOR32mi8: return X86::XOR32mi;
263   case X86::XOR64ri8: return X86::XOR64ri32;
264   case X86::XOR64mi8: return X86::XOR64mi32;
265 
266     // ADD
267   case X86::ADD16ri8: return X86::ADD16ri;
268   case X86::ADD16mi8: return X86::ADD16mi;
269   case X86::ADD32ri8: return X86::ADD32ri;
270   case X86::ADD32mi8: return X86::ADD32mi;
271   case X86::ADD64ri8: return X86::ADD64ri32;
272   case X86::ADD64mi8: return X86::ADD64mi32;
273 
274    // ADC
275   case X86::ADC16ri8: return X86::ADC16ri;
276   case X86::ADC16mi8: return X86::ADC16mi;
277   case X86::ADC32ri8: return X86::ADC32ri;
278   case X86::ADC32mi8: return X86::ADC32mi;
279   case X86::ADC64ri8: return X86::ADC64ri32;
280   case X86::ADC64mi8: return X86::ADC64mi32;
281 
282     // SUB
283   case X86::SUB16ri8: return X86::SUB16ri;
284   case X86::SUB16mi8: return X86::SUB16mi;
285   case X86::SUB32ri8: return X86::SUB32ri;
286   case X86::SUB32mi8: return X86::SUB32mi;
287   case X86::SUB64ri8: return X86::SUB64ri32;
288   case X86::SUB64mi8: return X86::SUB64mi32;
289 
290    // SBB
291   case X86::SBB16ri8: return X86::SBB16ri;
292   case X86::SBB16mi8: return X86::SBB16mi;
293   case X86::SBB32ri8: return X86::SBB32ri;
294   case X86::SBB32mi8: return X86::SBB32mi;
295   case X86::SBB64ri8: return X86::SBB64ri32;
296   case X86::SBB64mi8: return X86::SBB64mi32;
297 
298     // CMP
299   case X86::CMP16ri8: return X86::CMP16ri;
300   case X86::CMP16mi8: return X86::CMP16mi;
301   case X86::CMP32ri8: return X86::CMP32ri;
302   case X86::CMP32mi8: return X86::CMP32mi;
303   case X86::CMP64ri8: return X86::CMP64ri32;
304   case X86::CMP64mi8: return X86::CMP64mi32;
305 
306     // PUSH
307   case X86::PUSH32i8:  return X86::PUSHi32;
308   case X86::PUSH16i8:  return X86::PUSHi16;
309   case X86::PUSH64i8:  return X86::PUSH64i32;
310   }
311 }
312 
313 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
314   unsigned R = getRelaxedOpcodeArith(Inst);
315   if (R != Inst.getOpcode())
316     return R;
317   return getRelaxedOpcodeBranch(Inst, Is16BitMode);
318 }
319 
320 static X86::CondCode getCondFromBranch(const MCInst &MI,
321                                        const MCInstrInfo &MCII) {
322   unsigned Opcode = MI.getOpcode();
323   switch (Opcode) {
324   default:
325     return X86::COND_INVALID;
326   case X86::JCC_1: {
327     const MCInstrDesc &Desc = MCII.get(Opcode);
328     return static_cast<X86::CondCode>(
329         MI.getOperand(Desc.getNumOperands() - 1).getImm());
330   }
331   }
332 }
333 
334 static X86::SecondMacroFusionInstKind
335 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
336   X86::CondCode CC = getCondFromBranch(MI, MCII);
337   return classifySecondCondCodeInMacroFusion(CC);
338 }
339 
340 /// Check if the instruction uses RIP relative addressing.
341 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
342   unsigned Opcode = MI.getOpcode();
343   const MCInstrDesc &Desc = MCII.get(Opcode);
344   uint64_t TSFlags = Desc.TSFlags;
345   unsigned CurOp = X86II::getOperandBias(Desc);
346   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
347   if (MemoryOperand < 0)
348     return false;
349   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
350   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
351   return (BaseReg == X86::RIP);
352 }
353 
354 /// Check if the instruction is a prefix.
355 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
356   return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
357 }
358 
359 /// Check if the instruction is valid as the first instruction in macro fusion.
360 static bool isFirstMacroFusibleInst(const MCInst &Inst,
361                                     const MCInstrInfo &MCII) {
362   // An Intel instruction with RIP relative addressing is not macro fusible.
363   if (isRIPRelative(Inst, MCII))
364     return false;
365   X86::FirstMacroFusionInstKind FIK =
366       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
367   return FIK != X86::FirstMacroFusionInstKind::Invalid;
368 }
369 
370 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
371 /// get a better peformance in some cases. Here, we determine which prefix is
372 /// the most suitable.
373 ///
374 /// If the instruction has a segment override prefix, use the existing one.
375 /// If the target is 64-bit, use the CS.
376 /// If the target is 32-bit,
377 ///   - If the instruction has a ESP/EBP base register, use SS.
378 ///   - Otherwise use DS.
379 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
380   assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) &&
381          "Prefixes can be added only in 32-bit or 64-bit mode.");
382   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
383   uint64_t TSFlags = Desc.TSFlags;
384 
385   // Determine where the memory operand starts, if present.
386   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
387   if (MemoryOperand != -1)
388     MemoryOperand += X86II::getOperandBias(Desc);
389 
390   unsigned SegmentReg = 0;
391   if (MemoryOperand >= 0) {
392     // Check for explicit segment override on memory operand.
393     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
394   }
395 
396   switch (TSFlags & X86II::FormMask) {
397   default:
398     break;
399   case X86II::RawFrmDstSrc: {
400     // Check segment override opcode prefix as needed (not for %ds).
401     if (Inst.getOperand(2).getReg() != X86::DS)
402       SegmentReg = Inst.getOperand(2).getReg();
403     break;
404   }
405   case X86II::RawFrmSrc: {
406     // Check segment override opcode prefix as needed (not for %ds).
407     if (Inst.getOperand(1).getReg() != X86::DS)
408       SegmentReg = Inst.getOperand(1).getReg();
409     break;
410   }
411   case X86II::RawFrmMemOffs: {
412     // Check segment override opcode prefix as needed.
413     SegmentReg = Inst.getOperand(1).getReg();
414     break;
415   }
416   }
417 
418   if (SegmentReg != 0)
419     return X86::getSegmentOverridePrefixForReg(SegmentReg);
420 
421   if (STI.hasFeature(X86::Mode64Bit))
422     return X86::CS_Encoding;
423 
424   if (MemoryOperand >= 0) {
425     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
426     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
427     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
428       return X86::SS_Encoding;
429   }
430   return X86::DS_Encoding;
431 }
432 
433 /// Check if the two instructions will be macro-fused on the target cpu.
434 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
435   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
436   if (!InstDesc.isConditionalBranch())
437     return false;
438   if (!isFirstMacroFusibleInst(Cmp, *MCII))
439     return false;
440   const X86::FirstMacroFusionInstKind CmpKind =
441       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
442   const X86::SecondMacroFusionInstKind BranchKind =
443       classifySecondInstInMacroFusion(Jcc, *MCII);
444   return X86::isMacroFused(CmpKind, BranchKind);
445 }
446 
447 /// Check if the instruction has a variant symbol operand.
448 static bool hasVariantSymbol(const MCInst &MI) {
449   for (auto &Operand : MI) {
450     if (!Operand.isExpr())
451       continue;
452     const MCExpr &Expr = *Operand.getExpr();
453     if (Expr.getKind() == MCExpr::SymbolRef &&
454         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
455       return true;
456   }
457   return false;
458 }
459 
460 bool X86AsmBackend::allowAutoPadding() const {
461   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
462 }
463 
464 bool X86AsmBackend::allowEnhancedRelaxation() const {
465   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
466 }
467 
468 /// X86 has certain instructions which enable interrupts exactly one
469 /// instruction *after* the instruction which stores to SS.  Return true if the
470 /// given instruction has such an interrupt delay slot.
471 static bool hasInterruptDelaySlot(const MCInst &Inst) {
472   switch (Inst.getOpcode()) {
473   case X86::POPSS16:
474   case X86::POPSS32:
475   case X86::STI:
476     return true;
477 
478   case X86::MOV16sr:
479   case X86::MOV32sr:
480   case X86::MOV64sr:
481   case X86::MOV16sm:
482     if (Inst.getOperand(0).getReg() == X86::SS)
483       return true;
484     break;
485   }
486   return false;
487 }
488 
489 /// Check if the instruction to be emitted is right after any data.
490 static bool
491 isRightAfterData(MCFragment *CurrentFragment,
492                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
493   MCFragment *F = CurrentFragment;
494   // Empty data fragments may be created to prevent further data being
495   // added into the previous fragment, we need to skip them since they
496   // have no contents.
497   for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
498     if (cast<MCDataFragment>(F)->getContents().size() != 0)
499       break;
500 
501   // Since data is always emitted into a DataFragment, our check strategy is
502   // simple here.
503   //   - If the fragment is a DataFragment
504   //     - If it's not the fragment where the previous instruction is,
505   //       returns true.
506   //     - If it's the fragment holding the previous instruction but its
507   //       size changed since the the previous instruction was emitted into
508   //       it, returns true.
509   //     - Otherwise returns false.
510   //   - If the fragment is not a DataFragment, returns false.
511   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
512     return DF != PrevInstPosition.first ||
513            DF->getContents().size() != PrevInstPosition.second;
514 
515   return false;
516 }
517 
518 /// \returns the fragment size if it has instructions, otherwise returns 0.
519 static size_t getSizeForInstFragment(const MCFragment *F) {
520   if (!F || !F->hasInstructions())
521     return 0;
522   // MCEncodedFragmentWithContents being templated makes this tricky.
523   switch (F->getKind()) {
524   default:
525     llvm_unreachable("Unknown fragment with instructions!");
526   case MCFragment::FT_Data:
527     return cast<MCDataFragment>(*F).getContents().size();
528   case MCFragment::FT_Relaxable:
529     return cast<MCRelaxableFragment>(*F).getContents().size();
530   case MCFragment::FT_CompactEncodedInst:
531     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
532   }
533 }
534 
535 /// Return true if we can insert NOP or prefixes automatically before the
536 /// the instruction to be emitted.
537 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
538   if (hasVariantSymbol(Inst))
539     // Linker may rewrite the instruction with variant symbol operand(e.g.
540     // TLSCALL).
541     return false;
542 
543   if (hasInterruptDelaySlot(PrevInst))
544     // If this instruction follows an interrupt enabling instruction with a one
545     // instruction delay, inserting a nop would change behavior.
546     return false;
547 
548   if (isPrefix(PrevInst, *MCII))
549     // If this instruction follows a prefix, inserting a nop/prefix would change
550     // semantic.
551     return false;
552 
553   if (isPrefix(Inst, *MCII))
554     // If this instruction is a prefix, inserting a prefix would change
555     // semantic.
556     return false;
557 
558   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
559     // If this instruction follows any data, there is no clear
560     // instruction boundary, inserting a nop/prefix would change semantic.
561     return false;
562 
563   return true;
564 }
565 
566 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
567   if (!OS.getAllowAutoPadding())
568     return false;
569   assert(allowAutoPadding() && "incorrect initialization!");
570 
571   // We only pad in text section.
572   if (!OS.getCurrentSectionOnly()->getKind().isText())
573     return false;
574 
575   // To be Done: Currently don't deal with Bundle cases.
576   if (OS.getAssembler().isBundlingEnabled())
577     return false;
578 
579   // Branches only need to be aligned in 32-bit or 64-bit mode.
580   if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit)))
581     return false;
582 
583   return true;
584 }
585 
586 /// Check if the instruction operand needs to be aligned.
587 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
588   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
589   return (Desc.isConditionalBranch() &&
590           (AlignBranchType & X86::AlignBranchJcc)) ||
591          (Desc.isUnconditionalBranch() &&
592           (AlignBranchType & X86::AlignBranchJmp)) ||
593          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
594          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
595          (Desc.isIndirectBranch() &&
596           (AlignBranchType & X86::AlignBranchIndirect));
597 }
598 
599 /// Insert BoundaryAlignFragment before instructions to align branches.
600 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
601                                          const MCInst &Inst) {
602   CanPadInst = canPadInst(Inst, OS);
603 
604   if (!canPadBranches(OS))
605     return;
606 
607   if (!isMacroFused(PrevInst, Inst))
608     // Macro fusion doesn't happen indeed, clear the pending.
609     PendingBA = nullptr;
610 
611   if (!CanPadInst)
612     return;
613 
614   if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
615     // Macro fusion actually happens and there is no other fragment inserted
616     // after the previous instruction.
617     //
618     // Do nothing here since we already inserted a BoudaryAlign fragment when
619     // we met the first instruction in the fused pair and we'll tie them
620     // together in emitInstructionEnd.
621     //
622     // Note: When there is at least one fragment, such as MCAlignFragment,
623     // inserted after the previous instruction, e.g.
624     //
625     // \code
626     //   cmp %rax %rcx
627     //   .align 16
628     //   je .Label0
629     // \ endcode
630     //
631     // We will treat the JCC as a unfused branch although it may be fused
632     // with the CMP.
633     return;
634   }
635 
636   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
637                           isFirstMacroFusibleInst(Inst, *MCII))) {
638     // If we meet a unfused branch or the first instuction in a fusiable pair,
639     // insert a BoundaryAlign fragment.
640     OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary));
641   }
642 }
643 
644 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
645 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
646   PrevInst = Inst;
647   MCFragment *CF = OS.getCurrentFragment();
648   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
649   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
650     F->setAllowAutoPadding(CanPadInst);
651 
652   if (!canPadBranches(OS))
653     return;
654 
655   if (!needAlign(Inst) || !PendingBA)
656     return;
657 
658   // Tie the aligned instructions into a a pending BoundaryAlign.
659   PendingBA->setLastFragment(CF);
660   PendingBA = nullptr;
661 
662   // We need to ensure that further data isn't added to the current
663   // DataFragment, so that we can get the size of instructions later in
664   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
665   // DataFragment.
666   if (isa_and_nonnull<MCDataFragment>(CF))
667     OS.insert(new MCDataFragment());
668 
669   // Update the maximum alignment on the current section if necessary.
670   MCSection *Sec = OS.getCurrentSectionOnly();
671   if (AlignBoundary.value() > Sec->getAlignment())
672     Sec->setAlignment(AlignBoundary);
673 }
674 
675 Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
676   if (STI.getTargetTriple().isOSBinFormatELF()) {
677     unsigned Type;
678     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
679       Type = llvm::StringSwitch<unsigned>(Name)
680 #define ELF_RELOC(X, Y) .Case(#X, Y)
681 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
682 #undef ELF_RELOC
683                  .Default(-1u);
684     } else {
685       Type = llvm::StringSwitch<unsigned>(Name)
686 #define ELF_RELOC(X, Y) .Case(#X, Y)
687 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
688 #undef ELF_RELOC
689                  .Default(-1u);
690     }
691     if (Type == -1u)
692       return None;
693     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
694   }
695   return MCAsmBackend::getFixupKind(Name);
696 }
697 
698 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
699   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
700       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
701       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
702       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
703       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
704       {"reloc_signed_4byte", 0, 32, 0},
705       {"reloc_signed_4byte_relax", 0, 32, 0},
706       {"reloc_global_offset_table", 0, 32, 0},
707       {"reloc_global_offset_table8", 0, 64, 0},
708       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
709   };
710 
711   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
712   // do not require any extra processing.
713   if (Kind >= FirstLiteralRelocationKind)
714     return MCAsmBackend::getFixupKindInfo(FK_NONE);
715 
716   if (Kind < FirstTargetFixupKind)
717     return MCAsmBackend::getFixupKindInfo(Kind);
718 
719   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
720          "Invalid kind!");
721   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
722   return Infos[Kind - FirstTargetFixupKind];
723 }
724 
725 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
726                                           const MCFixup &Fixup,
727                                           const MCValue &) {
728   return Fixup.getKind() >= FirstLiteralRelocationKind;
729 }
730 
731 static unsigned getFixupKindSize(unsigned Kind) {
732   switch (Kind) {
733   default:
734     llvm_unreachable("invalid fixup kind!");
735   case FK_NONE:
736     return 0;
737   case FK_PCRel_1:
738   case FK_SecRel_1:
739   case FK_Data_1:
740     return 1;
741   case FK_PCRel_2:
742   case FK_SecRel_2:
743   case FK_Data_2:
744     return 2;
745   case FK_PCRel_4:
746   case X86::reloc_riprel_4byte:
747   case X86::reloc_riprel_4byte_relax:
748   case X86::reloc_riprel_4byte_relax_rex:
749   case X86::reloc_riprel_4byte_movq_load:
750   case X86::reloc_signed_4byte:
751   case X86::reloc_signed_4byte_relax:
752   case X86::reloc_global_offset_table:
753   case X86::reloc_branch_4byte_pcrel:
754   case FK_SecRel_4:
755   case FK_Data_4:
756     return 4;
757   case FK_PCRel_8:
758   case FK_SecRel_8:
759   case FK_Data_8:
760   case X86::reloc_global_offset_table8:
761     return 8;
762   }
763 }
764 
765 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
766                                const MCValue &Target,
767                                MutableArrayRef<char> Data,
768                                uint64_t Value, bool IsResolved,
769                                const MCSubtargetInfo *STI) const {
770   unsigned Kind = Fixup.getKind();
771   if (Kind >= FirstLiteralRelocationKind)
772     return;
773   unsigned Size = getFixupKindSize(Kind);
774 
775   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
776 
777   int64_t SignedValue = static_cast<int64_t>(Value);
778   if ((Target.isAbsolute() || IsResolved) &&
779       getFixupKindInfo(Fixup.getKind()).Flags &
780       MCFixupKindInfo::FKF_IsPCRel) {
781     // check that PC relative fixup fits into the fixup size.
782     if (Size > 0 && !isIntN(Size * 8, SignedValue))
783       Asm.getContext().reportError(
784                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
785                                    " is too large for field of " + Twine(Size) +
786                                    ((Size == 1) ? " byte." : " bytes."));
787   } else {
788     // Check that uppper bits are either all zeros or all ones.
789     // Specifically ignore overflow/underflow as long as the leakage is
790     // limited to the lower bits. This is to remain compatible with
791     // other assemblers.
792     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
793            "Value does not fit in the Fixup field");
794   }
795 
796   for (unsigned i = 0; i != Size; ++i)
797     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
798 }
799 
800 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
801                                       const MCSubtargetInfo &STI) const {
802   // Branches can always be relaxed in either mode.
803   if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
804     return true;
805 
806   // Check if this instruction is ever relaxable.
807   if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
808     return false;
809 
810 
811   // Check if the relaxable operand has an expression. For the current set of
812   // relaxable instructions, the relaxable operand is always the last operand.
813   unsigned RelaxableOp = Inst.getNumOperands() - 1;
814   if (Inst.getOperand(RelaxableOp).isExpr())
815     return true;
816 
817   return false;
818 }
819 
820 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
821                                          uint64_t Value,
822                                          const MCRelaxableFragment *DF,
823                                          const MCAsmLayout &Layout) const {
824   // Relax if the value is too big for a (signed) i8.
825   return !isInt<8>(Value);
826 }
827 
828 // FIXME: Can tblgen help at all here to verify there aren't other instructions
829 // we can relax?
830 void X86AsmBackend::relaxInstruction(MCInst &Inst,
831                                      const MCSubtargetInfo &STI) const {
832   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
833   bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
834   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
835 
836   if (RelaxedOp == Inst.getOpcode()) {
837     SmallString<256> Tmp;
838     raw_svector_ostream OS(Tmp);
839     Inst.dump_pretty(OS);
840     OS << "\n";
841     report_fatal_error("unexpected instruction to relax: " + OS.str());
842   }
843 
844   Inst.setOpcode(RelaxedOp);
845 }
846 
847 /// Return true if this instruction has been fully relaxed into it's most
848 /// general available form.
849 static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
850   auto &Inst = RF.getInst();
851   auto &STI = *RF.getSubtargetInfo();
852   bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
853   return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
854 }
855 
856 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
857                                             MCCodeEmitter &Emitter,
858                                             unsigned &RemainingSize) const {
859   if (!RF.getAllowAutoPadding())
860     return false;
861   // If the instruction isn't fully relaxed, shifting it around might require a
862   // larger value for one of the fixups then can be encoded.  The outer loop
863   // will also catch this before moving to the next instruction, but we need to
864   // prevent padding this single instruction as well.
865   if (!isFullyRelaxed(RF))
866     return false;
867 
868   const unsigned OldSize = RF.getContents().size();
869   if (OldSize == 15)
870     return false;
871 
872   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
873   const unsigned RemainingPrefixSize = [&]() -> unsigned {
874     SmallString<15> Code;
875     raw_svector_ostream VecOS(Code);
876     Emitter.emitPrefix(RF.getInst(), VecOS, STI);
877     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
878 
879     // TODO: It turns out we need a decent amount of plumbing for the target
880     // specific bits to determine number of prefixes its safe to add.  Various
881     // targets (older chips mostly, but also Atom family) encounter decoder
882     // stalls with too many prefixes.  For testing purposes, we set the value
883     // externally for the moment.
884     unsigned ExistingPrefixSize = Code.size();
885     if (TargetPrefixMax <= ExistingPrefixSize)
886       return 0;
887     return TargetPrefixMax - ExistingPrefixSize;
888   }();
889   const unsigned PrefixBytesToAdd =
890       std::min(MaxPossiblePad, RemainingPrefixSize);
891   if (PrefixBytesToAdd == 0)
892     return false;
893 
894   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
895 
896   SmallString<256> Code;
897   Code.append(PrefixBytesToAdd, Prefix);
898   Code.append(RF.getContents().begin(), RF.getContents().end());
899   RF.getContents() = Code;
900 
901   // Adjust the fixups for the change in offsets
902   for (auto &F : RF.getFixups()) {
903     F.setOffset(F.getOffset() + PrefixBytesToAdd);
904   }
905 
906   RemainingSize -= PrefixBytesToAdd;
907   return true;
908 }
909 
910 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
911                                                 MCCodeEmitter &Emitter,
912                                                 unsigned &RemainingSize) const {
913   if (isFullyRelaxed(RF))
914     // TODO: There are lots of other tricks we could apply for increasing
915     // encoding size without impacting performance.
916     return false;
917 
918   MCInst Relaxed = RF.getInst();
919   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
920 
921   SmallVector<MCFixup, 4> Fixups;
922   SmallString<15> Code;
923   raw_svector_ostream VecOS(Code);
924   Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
925   const unsigned OldSize = RF.getContents().size();
926   const unsigned NewSize = Code.size();
927   assert(NewSize >= OldSize && "size decrease during relaxation?");
928   unsigned Delta = NewSize - OldSize;
929   if (Delta > RemainingSize)
930     return false;
931   RF.setInst(Relaxed);
932   RF.getContents() = Code;
933   RF.getFixups() = Fixups;
934   RemainingSize -= Delta;
935   return true;
936 }
937 
938 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
939                                            MCCodeEmitter &Emitter,
940                                            unsigned &RemainingSize) const {
941   bool Changed = false;
942   if (RemainingSize != 0)
943     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
944   if (RemainingSize != 0)
945     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
946   return Changed;
947 }
948 
949 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
950                                  MCAsmLayout &Layout) const {
951   // See if we can further relax some instructions to cut down on the number of
952   // nop bytes required for code alignment.  The actual win is in reducing
953   // instruction count, not number of bytes.  Modern X86-64 can easily end up
954   // decode limited.  It is often better to reduce the number of instructions
955   // (i.e. eliminate nops) even at the cost of increasing the size and
956   // complexity of others.
957   if (!X86PadForAlign && !X86PadForBranchAlign)
958     return;
959 
960   // The processed regions are delimitered by LabeledFragments. -g may have more
961   // MCSymbols and therefore different relaxation results. X86PadForAlign is
962   // disabled by default to eliminate the -g vs non -g difference.
963   DenseSet<MCFragment *> LabeledFragments;
964   for (const MCSymbol &S : Asm.symbols())
965     LabeledFragments.insert(S.getFragment(false));
966 
967   for (MCSection &Sec : Asm) {
968     if (!Sec.getKind().isText())
969       continue;
970 
971     SmallVector<MCRelaxableFragment *, 4> Relaxable;
972     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
973       MCFragment &F = *I;
974 
975       if (LabeledFragments.count(&F))
976         Relaxable.clear();
977 
978       if (F.getKind() == MCFragment::FT_Data ||
979           F.getKind() == MCFragment::FT_CompactEncodedInst)
980         // Skip and ignore
981         continue;
982 
983       if (F.getKind() == MCFragment::FT_Relaxable) {
984         auto &RF = cast<MCRelaxableFragment>(*I);
985         Relaxable.push_back(&RF);
986         continue;
987       }
988 
989       auto canHandle = [](MCFragment &F) -> bool {
990         switch (F.getKind()) {
991         default:
992           return false;
993         case MCFragment::FT_Align:
994           return X86PadForAlign;
995         case MCFragment::FT_BoundaryAlign:
996           return X86PadForBranchAlign;
997         }
998       };
999       // For any unhandled kind, assume we can't change layout.
1000       if (!canHandle(F)) {
1001         Relaxable.clear();
1002         continue;
1003       }
1004 
1005 #ifndef NDEBUG
1006       const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
1007 #endif
1008       const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
1009 
1010       // To keep the effects local, prefer to relax instructions closest to
1011       // the align directive.  This is purely about human understandability
1012       // of the resulting code.  If we later find a reason to expand
1013       // particular instructions over others, we can adjust.
1014       MCFragment *FirstChangedFragment = nullptr;
1015       unsigned RemainingSize = OrigSize;
1016       while (!Relaxable.empty() && RemainingSize != 0) {
1017         auto &RF = *Relaxable.pop_back_val();
1018         // Give the backend a chance to play any tricks it wishes to increase
1019         // the encoding size of the given instruction.  Target independent code
1020         // will try further relaxation, but target's may play further tricks.
1021         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
1022           FirstChangedFragment = &RF;
1023 
1024         // If we have an instruction which hasn't been fully relaxed, we can't
1025         // skip past it and insert bytes before it.  Changing its starting
1026         // offset might require a larger negative offset than it can encode.
1027         // We don't need to worry about larger positive offsets as none of the
1028         // possible offsets between this and our align are visible, and the
1029         // ones afterwards aren't changing.
1030         if (!isFullyRelaxed(RF))
1031           break;
1032       }
1033       Relaxable.clear();
1034 
1035       if (FirstChangedFragment) {
1036         // Make sure the offsets for any fragments in the effected range get
1037         // updated.  Note that this (conservatively) invalidates the offsets of
1038         // those following, but this is not required.
1039         Layout.invalidateFragmentsFrom(FirstChangedFragment);
1040       }
1041 
1042       // BoundaryAlign explicitly tracks it's size (unlike align)
1043       if (F.getKind() == MCFragment::FT_BoundaryAlign)
1044         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
1045 
1046 #ifndef NDEBUG
1047       const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
1048       const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
1049       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
1050              "can't move start of next fragment!");
1051       assert(FinalSize == RemainingSize && "inconsistent size computation?");
1052 #endif
1053 
1054       // If we're looking at a boundary align, make sure we don't try to pad
1055       // its target instructions for some following directive.  Doing so would
1056       // break the alignment of the current boundary align.
1057       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
1058         const MCFragment *LastFragment = BF->getLastFragment();
1059         if (!LastFragment)
1060           continue;
1061         while (&*I != LastFragment)
1062           ++I;
1063       }
1064     }
1065   }
1066 
1067   // The layout is done. Mark every fragment as valid.
1068   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
1069     MCSection &Section = *Layout.getSectionOrder()[i];
1070     Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
1071     Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
1072   }
1073 }
1074 
1075 unsigned X86AsmBackend::getMaximumNopSize() const {
1076   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit))
1077     return 1;
1078   if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
1079     return 7;
1080   if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
1081     return 15;
1082   if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
1083     return 11;
1084   // FIXME: handle 32-bit mode
1085   // 15-bytes is the longest single NOP instruction, but 10-bytes is
1086   // commonly the longest that can be efficiently decoded.
1087   return 10;
1088 }
1089 
1090 /// Write a sequence of optimal nops to the output, covering \p Count
1091 /// bytes.
1092 /// \return - true on success, false on failure
1093 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
1094   static const char Nops[10][11] = {
1095     // nop
1096     "\x90",
1097     // xchg %ax,%ax
1098     "\x66\x90",
1099     // nopl (%[re]ax)
1100     "\x0f\x1f\x00",
1101     // nopl 0(%[re]ax)
1102     "\x0f\x1f\x40\x00",
1103     // nopl 0(%[re]ax,%[re]ax,1)
1104     "\x0f\x1f\x44\x00\x00",
1105     // nopw 0(%[re]ax,%[re]ax,1)
1106     "\x66\x0f\x1f\x44\x00\x00",
1107     // nopl 0L(%[re]ax)
1108     "\x0f\x1f\x80\x00\x00\x00\x00",
1109     // nopl 0L(%[re]ax,%[re]ax,1)
1110     "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1111     // nopw 0L(%[re]ax,%[re]ax,1)
1112     "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1113     // nopw %cs:0L(%[re]ax,%[re]ax,1)
1114     "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1115   };
1116 
1117   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize();
1118 
1119   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1120   // length.
1121   do {
1122     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1123     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1124     for (uint8_t i = 0; i < Prefixes; i++)
1125       OS << '\x66';
1126     const uint8_t Rest = ThisNopLength - Prefixes;
1127     if (Rest != 0)
1128       OS.write(Nops[Rest - 1], Rest);
1129     Count -= ThisNopLength;
1130   } while (Count != 0);
1131 
1132   return true;
1133 }
1134 
1135 /* *** */
1136 
1137 namespace {
1138 
1139 class ELFX86AsmBackend : public X86AsmBackend {
1140 public:
1141   uint8_t OSABI;
1142   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1143       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1144 };
1145 
1146 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1147 public:
1148   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1149                       const MCSubtargetInfo &STI)
1150     : ELFX86AsmBackend(T, OSABI, STI) {}
1151 
1152   std::unique_ptr<MCObjectTargetWriter>
1153   createObjectTargetWriter() const override {
1154     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1155   }
1156 };
1157 
1158 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1159 public:
1160   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1161                        const MCSubtargetInfo &STI)
1162       : ELFX86AsmBackend(T, OSABI, STI) {}
1163 
1164   std::unique_ptr<MCObjectTargetWriter>
1165   createObjectTargetWriter() const override {
1166     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1167                                     ELF::EM_X86_64);
1168   }
1169 };
1170 
1171 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1172 public:
1173   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1174                          const MCSubtargetInfo &STI)
1175       : ELFX86AsmBackend(T, OSABI, STI) {}
1176 
1177   std::unique_ptr<MCObjectTargetWriter>
1178   createObjectTargetWriter() const override {
1179     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1180                                     ELF::EM_IAMCU);
1181   }
1182 };
1183 
1184 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1185 public:
1186   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1187                       const MCSubtargetInfo &STI)
1188     : ELFX86AsmBackend(T, OSABI, STI) {}
1189 
1190   std::unique_ptr<MCObjectTargetWriter>
1191   createObjectTargetWriter() const override {
1192     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1193   }
1194 };
1195 
1196 class WindowsX86AsmBackend : public X86AsmBackend {
1197   bool Is64Bit;
1198 
1199 public:
1200   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1201                        const MCSubtargetInfo &STI)
1202     : X86AsmBackend(T, STI)
1203     , Is64Bit(is64Bit) {
1204   }
1205 
1206   Optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1207     return StringSwitch<Optional<MCFixupKind>>(Name)
1208         .Case("dir32", FK_Data_4)
1209         .Case("secrel32", FK_SecRel_4)
1210         .Case("secidx", FK_SecRel_2)
1211         .Default(MCAsmBackend::getFixupKind(Name));
1212   }
1213 
1214   std::unique_ptr<MCObjectTargetWriter>
1215   createObjectTargetWriter() const override {
1216     return createX86WinCOFFObjectWriter(Is64Bit);
1217   }
1218 };
1219 
1220 namespace CU {
1221 
1222   /// Compact unwind encoding values.
1223   enum CompactUnwindEncodings {
1224     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1225     /// the return address, then [RE]SP is moved to [RE]BP.
1226     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1227 
1228     /// A frameless function with a small constant stack size.
1229     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1230 
1231     /// A frameless function with a large constant stack size.
1232     UNWIND_MODE_STACK_IND                  = 0x03000000,
1233 
1234     /// No compact unwind encoding is available.
1235     UNWIND_MODE_DWARF                      = 0x04000000,
1236 
1237     /// Mask for encoding the frame registers.
1238     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1239 
1240     /// Mask for encoding the frameless registers.
1241     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1242   };
1243 
1244 } // namespace CU
1245 
1246 class DarwinX86AsmBackend : public X86AsmBackend {
1247   const MCRegisterInfo &MRI;
1248 
1249   /// Number of registers that can be saved in a compact unwind encoding.
1250   enum { CU_NUM_SAVED_REGS = 6 };
1251 
1252   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1253   Triple TT;
1254   bool Is64Bit;
1255 
1256   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1257   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1258   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1259 protected:
1260   /// Size of a "push" instruction for the given register.
1261   unsigned PushInstrSize(unsigned Reg) const {
1262     switch (Reg) {
1263       case X86::EBX:
1264       case X86::ECX:
1265       case X86::EDX:
1266       case X86::EDI:
1267       case X86::ESI:
1268       case X86::EBP:
1269       case X86::RBX:
1270       case X86::RBP:
1271         return 1;
1272       case X86::R12:
1273       case X86::R13:
1274       case X86::R14:
1275       case X86::R15:
1276         return 2;
1277     }
1278     return 1;
1279   }
1280 
1281 private:
1282   /// Get the compact unwind number for a given register. The number
1283   /// corresponds to the enum lists in compact_unwind_encoding.h.
1284   int getCompactUnwindRegNum(unsigned Reg) const {
1285     static const MCPhysReg CU32BitRegs[7] = {
1286       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1287     };
1288     static const MCPhysReg CU64BitRegs[] = {
1289       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1290     };
1291     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1292     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1293       if (*CURegs == Reg)
1294         return Idx;
1295 
1296     return -1;
1297   }
1298 
1299   /// Return the registers encoded for a compact encoding with a frame
1300   /// pointer.
1301   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1302     // Encode the registers in the order they were saved --- 3-bits per
1303     // register. The list of saved registers is assumed to be in reverse
1304     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1305     uint32_t RegEnc = 0;
1306     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1307       unsigned Reg = SavedRegs[i];
1308       if (Reg == 0) break;
1309 
1310       int CURegNum = getCompactUnwindRegNum(Reg);
1311       if (CURegNum == -1) return ~0U;
1312 
1313       // Encode the 3-bit register number in order, skipping over 3-bits for
1314       // each register.
1315       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1316     }
1317 
1318     assert((RegEnc & 0x3FFFF) == RegEnc &&
1319            "Invalid compact register encoding!");
1320     return RegEnc;
1321   }
1322 
1323   /// Create the permutation encoding used with frameless stacks. It is
1324   /// passed the number of registers to be saved and an array of the registers
1325   /// saved.
1326   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1327     // The saved registers are numbered from 1 to 6. In order to encode the
1328     // order in which they were saved, we re-number them according to their
1329     // place in the register order. The re-numbering is relative to the last
1330     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1331     // that order:
1332     //
1333     //    Orig  Re-Num
1334     //    ----  ------
1335     //     6       6
1336     //     2       2
1337     //     4       3
1338     //     5       3
1339     //
1340     for (unsigned i = 0; i < RegCount; ++i) {
1341       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1342       if (CUReg == -1) return ~0U;
1343       SavedRegs[i] = CUReg;
1344     }
1345 
1346     // Reverse the list.
1347     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1348 
1349     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1350     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1351       unsigned Countless = 0;
1352       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1353         if (SavedRegs[j] < SavedRegs[i])
1354           ++Countless;
1355 
1356       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1357     }
1358 
1359     // Take the renumbered values and encode them into a 10-bit number.
1360     uint32_t permutationEncoding = 0;
1361     switch (RegCount) {
1362     case 6:
1363       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1364                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1365                              +     RenumRegs[4];
1366       break;
1367     case 5:
1368       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1369                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1370                              +     RenumRegs[5];
1371       break;
1372     case 4:
1373       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1374                              + 3 * RenumRegs[4] +      RenumRegs[5];
1375       break;
1376     case 3:
1377       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1378                              +     RenumRegs[5];
1379       break;
1380     case 2:
1381       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1382       break;
1383     case 1:
1384       permutationEncoding |=       RenumRegs[5];
1385       break;
1386     }
1387 
1388     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1389            "Invalid compact register encoding!");
1390     return permutationEncoding;
1391   }
1392 
1393 public:
1394   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1395                       const MCSubtargetInfo &STI)
1396       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1397         Is64Bit(TT.isArch64Bit()) {
1398     memset(SavedRegs, 0, sizeof(SavedRegs));
1399     OffsetSize = Is64Bit ? 8 : 4;
1400     MoveInstrSize = Is64Bit ? 3 : 2;
1401     StackDivide = Is64Bit ? 8 : 4;
1402   }
1403 
1404   std::unique_ptr<MCObjectTargetWriter>
1405   createObjectTargetWriter() const override {
1406     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1407     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1408     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1409   }
1410 
1411   /// Implementation of algorithm to generate the compact unwind encoding
1412   /// for the CFI instructions.
1413   uint32_t
1414   generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1415     if (Instrs.empty()) return 0;
1416 
1417     // Reset the saved registers.
1418     unsigned SavedRegIdx = 0;
1419     memset(SavedRegs, 0, sizeof(SavedRegs));
1420 
1421     bool HasFP = false;
1422 
1423     // Encode that we are using EBP/RBP as the frame pointer.
1424     uint32_t CompactUnwindEncoding = 0;
1425 
1426     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1427     unsigned InstrOffset = 0;
1428     unsigned StackAdjust = 0;
1429     unsigned StackSize = 0;
1430     unsigned NumDefCFAOffsets = 0;
1431 
1432     for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
1433       const MCCFIInstruction &Inst = Instrs[i];
1434 
1435       switch (Inst.getOperation()) {
1436       default:
1437         // Any other CFI directives indicate a frame that we aren't prepared
1438         // to represent via compact unwind, so just bail out.
1439         return 0;
1440       case MCCFIInstruction::OpDefCfaRegister: {
1441         // Defines a frame pointer. E.g.
1442         //
1443         //     movq %rsp, %rbp
1444         //  L0:
1445         //     .cfi_def_cfa_register %rbp
1446         //
1447         HasFP = true;
1448 
1449         // If the frame pointer is other than esp/rsp, we do not have a way to
1450         // generate a compact unwinding representation, so bail out.
1451         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1452             (Is64Bit ? X86::RBP : X86::EBP))
1453           return 0;
1454 
1455         // Reset the counts.
1456         memset(SavedRegs, 0, sizeof(SavedRegs));
1457         StackAdjust = 0;
1458         SavedRegIdx = 0;
1459         InstrOffset += MoveInstrSize;
1460         break;
1461       }
1462       case MCCFIInstruction::OpDefCfaOffset: {
1463         // Defines a new offset for the CFA. E.g.
1464         //
1465         //  With frame:
1466         //
1467         //     pushq %rbp
1468         //  L0:
1469         //     .cfi_def_cfa_offset 16
1470         //
1471         //  Without frame:
1472         //
1473         //     subq $72, %rsp
1474         //  L0:
1475         //     .cfi_def_cfa_offset 80
1476         //
1477         StackSize = Inst.getOffset() / StackDivide;
1478         ++NumDefCFAOffsets;
1479         break;
1480       }
1481       case MCCFIInstruction::OpOffset: {
1482         // Defines a "push" of a callee-saved register. E.g.
1483         //
1484         //     pushq %r15
1485         //     pushq %r14
1486         //     pushq %rbx
1487         //  L0:
1488         //     subq $120, %rsp
1489         //  L1:
1490         //     .cfi_offset %rbx, -40
1491         //     .cfi_offset %r14, -32
1492         //     .cfi_offset %r15, -24
1493         //
1494         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1495           // If there are too many saved registers, we cannot use a compact
1496           // unwind encoding.
1497           return CU::UNWIND_MODE_DWARF;
1498 
1499         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1500         SavedRegs[SavedRegIdx++] = Reg;
1501         StackAdjust += OffsetSize;
1502         InstrOffset += PushInstrSize(Reg);
1503         break;
1504       }
1505       }
1506     }
1507 
1508     StackAdjust /= StackDivide;
1509 
1510     if (HasFP) {
1511       if ((StackAdjust & 0xFF) != StackAdjust)
1512         // Offset was too big for a compact unwind encoding.
1513         return CU::UNWIND_MODE_DWARF;
1514 
1515       // Get the encoding of the saved registers when we have a frame pointer.
1516       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1517       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1518 
1519       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1520       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1521       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1522     } else {
1523       SubtractInstrIdx += InstrOffset;
1524       ++StackAdjust;
1525 
1526       if ((StackSize & 0xFF) == StackSize) {
1527         // Frameless stack with a small stack size.
1528         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1529 
1530         // Encode the stack size.
1531         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1532       } else {
1533         if ((StackAdjust & 0x7) != StackAdjust)
1534           // The extra stack adjustments are too big for us to handle.
1535           return CU::UNWIND_MODE_DWARF;
1536 
1537         // Frameless stack with an offset too large for us to encode compactly.
1538         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1539 
1540         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1541         // instruction.
1542         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1543 
1544         // Encode any extra stack adjustments (done via push instructions).
1545         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1546       }
1547 
1548       // Encode the number of registers saved. (Reverse the list first.)
1549       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1550       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1551 
1552       // Get the encoding of the saved registers when we don't have a frame
1553       // pointer.
1554       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1555       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1556 
1557       // Encode the register encoding.
1558       CompactUnwindEncoding |=
1559         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1560     }
1561 
1562     return CompactUnwindEncoding;
1563   }
1564 };
1565 
1566 } // end anonymous namespace
1567 
1568 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1569                                            const MCSubtargetInfo &STI,
1570                                            const MCRegisterInfo &MRI,
1571                                            const MCTargetOptions &Options) {
1572   const Triple &TheTriple = STI.getTargetTriple();
1573   if (TheTriple.isOSBinFormatMachO())
1574     return new DarwinX86AsmBackend(T, MRI, STI);
1575 
1576   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1577     return new WindowsX86AsmBackend(T, false, STI);
1578 
1579   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1580 
1581   if (TheTriple.isOSIAMCU())
1582     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1583 
1584   return new ELFX86_32AsmBackend(T, OSABI, STI);
1585 }
1586 
1587 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1588                                            const MCSubtargetInfo &STI,
1589                                            const MCRegisterInfo &MRI,
1590                                            const MCTargetOptions &Options) {
1591   const Triple &TheTriple = STI.getTargetTriple();
1592   if (TheTriple.isOSBinFormatMachO())
1593     return new DarwinX86AsmBackend(T, MRI, STI);
1594 
1595   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1596     return new WindowsX86AsmBackend(T, true, STI);
1597 
1598   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1599 
1600   if (TheTriple.getEnvironment() == Triple::GNUX32)
1601     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1602   return new ELFX86_64AsmBackend(T, OSABI, STI);
1603 }
1604