xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (revision ca53e5aedfebcc1b4091b68e01b2d5cae923f85e)
1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/BinaryFormat/ELF.h"
13 #include "llvm/BinaryFormat/MachO.h"
14 #include "llvm/MC/MCAsmBackend.h"
15 #include "llvm/MC/MCAsmLayout.h"
16 #include "llvm/MC/MCAssembler.h"
17 #include "llvm/MC/MCCodeEmitter.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCDwarf.h"
20 #include "llvm/MC/MCELFObjectWriter.h"
21 #include "llvm/MC/MCExpr.h"
22 #include "llvm/MC/MCFixupKindInfo.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCMachObjectWriter.h"
26 #include "llvm/MC/MCObjectStreamer.h"
27 #include "llvm/MC/MCObjectWriter.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionMachO.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCValue.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
36 
37 using namespace llvm;
38 
39 namespace {
40 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
41 class X86AlignBranchKind {
42 private:
43   uint8_t AlignBranchKind = 0;
44 
45 public:
46   void operator=(const std::string &Val) {
47     if (Val.empty())
48       return;
49     SmallVector<StringRef, 6> BranchTypes;
50     StringRef(Val).split(BranchTypes, '+', -1, false);
51     for (auto BranchType : BranchTypes) {
52       if (BranchType == "fused")
53         addKind(X86::AlignBranchFused);
54       else if (BranchType == "jcc")
55         addKind(X86::AlignBranchJcc);
56       else if (BranchType == "jmp")
57         addKind(X86::AlignBranchJmp);
58       else if (BranchType == "call")
59         addKind(X86::AlignBranchCall);
60       else if (BranchType == "ret")
61         addKind(X86::AlignBranchRet);
62       else if (BranchType == "indirect")
63         addKind(X86::AlignBranchIndirect);
64       else {
65         errs() << "invalid argument " << BranchType.str()
66                << " to -x86-align-branch=; each element must be one of: fused, "
67                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
68       }
69     }
70   }
71 
72   operator uint8_t() const { return AlignBranchKind; }
73   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
74 };
75 
76 X86AlignBranchKind X86AlignBranchKindLoc;
77 
78 cl::opt<unsigned> X86AlignBranchBoundary(
79     "x86-align-branch-boundary", cl::init(0),
80     cl::desc(
81         "Control how the assembler should align branches with NOP. If the "
82         "boundary's size is not 0, it should be a power of 2 and no less "
83         "than 32. Branches will be aligned to prevent from being across or "
84         "against the boundary of specified size. The default value 0 does not "
85         "align branches."));
86 
87 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
88     "x86-align-branch",
89     cl::desc(
90         "Specify types of branches to align (plus separated list of types):"
91              "\njcc      indicates conditional jumps"
92              "\nfused    indicates fused conditional jumps"
93              "\njmp      indicates direct unconditional jumps"
94              "\ncall     indicates direct and indirect calls"
95              "\nret      indicates rets"
96              "\nindirect indicates indirect unconditional jumps"),
97     cl::location(X86AlignBranchKindLoc));
98 
99 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
100     "x86-branches-within-32B-boundaries", cl::init(false),
101     cl::desc(
102         "Align selected instructions to mitigate negative performance impact "
103         "of Intel's micro code update for errata skx102.  May break "
104         "assumptions about labels corresponding to particular instructions, "
105         "and should be used with caution."));
106 
107 cl::opt<unsigned> X86PadMaxPrefixSize(
108     "x86-pad-max-prefix-size", cl::init(0),
109     cl::desc("Maximum number of prefixes to use for padding"));
110 
111 cl::opt<bool> X86PadForAlign(
112     "x86-pad-for-align", cl::init(true), cl::Hidden,
113     cl::desc("Pad previous instructions to implement align directives"));
114 
115 cl::opt<bool> X86PadForBranchAlign(
116     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
117     cl::desc("Pad previous instructions to implement branch alignment"));
118 
119 class X86ELFObjectWriter : public MCELFObjectTargetWriter {
120 public:
121   X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
122                      bool HasRelocationAddend, bool foobar)
123     : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {}
124 };
125 
126 class X86AsmBackend : public MCAsmBackend {
127   const MCSubtargetInfo &STI;
128   std::unique_ptr<const MCInstrInfo> MCII;
129   X86AlignBranchKind AlignBranchType;
130   Align AlignBoundary;
131   unsigned TargetPrefixMax = 0;
132 
133   MCInst PrevInst;
134   MCBoundaryAlignFragment *PendingBA = nullptr;
135   std::pair<MCFragment *, size_t> PrevInstPosition;
136   bool CanPadInst;
137 
138   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
139   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
140   bool needAlign(const MCInst &Inst) const;
141   bool canPadBranches(MCObjectStreamer &OS) const;
142   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
143 
144 public:
145   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
146       : MCAsmBackend(support::little), STI(STI),
147         MCII(T.createMCInstrInfo()) {
148     if (X86AlignBranchWithin32BBoundaries) {
149       // At the moment, this defaults to aligning fused branches, unconditional
150       // jumps, and (unfused) conditional jumps with nops.  Both the
151       // instructions aligned and the alignment method (nop vs prefix) may
152       // change in the future.
153       AlignBoundary = assumeAligned(32);;
154       AlignBranchType.addKind(X86::AlignBranchFused);
155       AlignBranchType.addKind(X86::AlignBranchJcc);
156       AlignBranchType.addKind(X86::AlignBranchJmp);
157     }
158     // Allow overriding defaults set by master flag
159     if (X86AlignBranchBoundary.getNumOccurrences())
160       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
161     if (X86AlignBranch.getNumOccurrences())
162       AlignBranchType = X86AlignBranchKindLoc;
163     if (X86PadMaxPrefixSize.getNumOccurrences())
164       TargetPrefixMax = X86PadMaxPrefixSize;
165   }
166 
167   bool allowAutoPadding() const override;
168   bool allowEnhancedRelaxation() const override;
169   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
170   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
171 
172   unsigned getNumFixupKinds() const override {
173     return X86::NumTargetFixupKinds;
174   }
175 
176   Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
177 
178   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
179 
180   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
181                              const MCValue &Target) override;
182 
183   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
184                   const MCValue &Target, MutableArrayRef<char> Data,
185                   uint64_t Value, bool IsResolved,
186                   const MCSubtargetInfo *STI) const override;
187 
188   bool mayNeedRelaxation(const MCInst &Inst,
189                          const MCSubtargetInfo &STI) const override;
190 
191   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
192                             const MCRelaxableFragment *DF,
193                             const MCAsmLayout &Layout) const override;
194 
195   void relaxInstruction(MCInst &Inst,
196                         const MCSubtargetInfo &STI) const override;
197 
198   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
199                                    MCCodeEmitter &Emitter,
200                                    unsigned &RemainingSize) const;
201 
202   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
203                                unsigned &RemainingSize) const;
204 
205   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
206                               unsigned &RemainingSize) const;
207 
208   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
209 
210   bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
211 };
212 } // end anonymous namespace
213 
214 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
215   unsigned Op = Inst.getOpcode();
216   switch (Op) {
217   default:
218     return Op;
219   case X86::JCC_1:
220     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
221   case X86::JMP_1:
222     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
223   }
224 }
225 
226 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
227   unsigned Op = Inst.getOpcode();
228   switch (Op) {
229   default:
230     return Op;
231 
232     // IMUL
233   case X86::IMUL16rri8: return X86::IMUL16rri;
234   case X86::IMUL16rmi8: return X86::IMUL16rmi;
235   case X86::IMUL32rri8: return X86::IMUL32rri;
236   case X86::IMUL32rmi8: return X86::IMUL32rmi;
237   case X86::IMUL64rri8: return X86::IMUL64rri32;
238   case X86::IMUL64rmi8: return X86::IMUL64rmi32;
239 
240     // AND
241   case X86::AND16ri8: return X86::AND16ri;
242   case X86::AND16mi8: return X86::AND16mi;
243   case X86::AND32ri8: return X86::AND32ri;
244   case X86::AND32mi8: return X86::AND32mi;
245   case X86::AND64ri8: return X86::AND64ri32;
246   case X86::AND64mi8: return X86::AND64mi32;
247 
248     // OR
249   case X86::OR16ri8: return X86::OR16ri;
250   case X86::OR16mi8: return X86::OR16mi;
251   case X86::OR32ri8: return X86::OR32ri;
252   case X86::OR32mi8: return X86::OR32mi;
253   case X86::OR64ri8: return X86::OR64ri32;
254   case X86::OR64mi8: return X86::OR64mi32;
255 
256     // XOR
257   case X86::XOR16ri8: return X86::XOR16ri;
258   case X86::XOR16mi8: return X86::XOR16mi;
259   case X86::XOR32ri8: return X86::XOR32ri;
260   case X86::XOR32mi8: return X86::XOR32mi;
261   case X86::XOR64ri8: return X86::XOR64ri32;
262   case X86::XOR64mi8: return X86::XOR64mi32;
263 
264     // ADD
265   case X86::ADD16ri8: return X86::ADD16ri;
266   case X86::ADD16mi8: return X86::ADD16mi;
267   case X86::ADD32ri8: return X86::ADD32ri;
268   case X86::ADD32mi8: return X86::ADD32mi;
269   case X86::ADD64ri8: return X86::ADD64ri32;
270   case X86::ADD64mi8: return X86::ADD64mi32;
271 
272    // ADC
273   case X86::ADC16ri8: return X86::ADC16ri;
274   case X86::ADC16mi8: return X86::ADC16mi;
275   case X86::ADC32ri8: return X86::ADC32ri;
276   case X86::ADC32mi8: return X86::ADC32mi;
277   case X86::ADC64ri8: return X86::ADC64ri32;
278   case X86::ADC64mi8: return X86::ADC64mi32;
279 
280     // SUB
281   case X86::SUB16ri8: return X86::SUB16ri;
282   case X86::SUB16mi8: return X86::SUB16mi;
283   case X86::SUB32ri8: return X86::SUB32ri;
284   case X86::SUB32mi8: return X86::SUB32mi;
285   case X86::SUB64ri8: return X86::SUB64ri32;
286   case X86::SUB64mi8: return X86::SUB64mi32;
287 
288    // SBB
289   case X86::SBB16ri8: return X86::SBB16ri;
290   case X86::SBB16mi8: return X86::SBB16mi;
291   case X86::SBB32ri8: return X86::SBB32ri;
292   case X86::SBB32mi8: return X86::SBB32mi;
293   case X86::SBB64ri8: return X86::SBB64ri32;
294   case X86::SBB64mi8: return X86::SBB64mi32;
295 
296     // CMP
297   case X86::CMP16ri8: return X86::CMP16ri;
298   case X86::CMP16mi8: return X86::CMP16mi;
299   case X86::CMP32ri8: return X86::CMP32ri;
300   case X86::CMP32mi8: return X86::CMP32mi;
301   case X86::CMP64ri8: return X86::CMP64ri32;
302   case X86::CMP64mi8: return X86::CMP64mi32;
303 
304     // PUSH
305   case X86::PUSH32i8:  return X86::PUSHi32;
306   case X86::PUSH16i8:  return X86::PUSHi16;
307   case X86::PUSH64i8:  return X86::PUSH64i32;
308   }
309 }
310 
311 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
312   unsigned R = getRelaxedOpcodeArith(Inst);
313   if (R != Inst.getOpcode())
314     return R;
315   return getRelaxedOpcodeBranch(Inst, Is16BitMode);
316 }
317 
318 static X86::CondCode getCondFromBranch(const MCInst &MI,
319                                        const MCInstrInfo &MCII) {
320   unsigned Opcode = MI.getOpcode();
321   switch (Opcode) {
322   default:
323     return X86::COND_INVALID;
324   case X86::JCC_1: {
325     const MCInstrDesc &Desc = MCII.get(Opcode);
326     return static_cast<X86::CondCode>(
327         MI.getOperand(Desc.getNumOperands() - 1).getImm());
328   }
329   }
330 }
331 
332 static X86::SecondMacroFusionInstKind
333 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
334   X86::CondCode CC = getCondFromBranch(MI, MCII);
335   return classifySecondCondCodeInMacroFusion(CC);
336 }
337 
338 /// Check if the instruction uses RIP relative addressing.
339 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
340   unsigned Opcode = MI.getOpcode();
341   const MCInstrDesc &Desc = MCII.get(Opcode);
342   uint64_t TSFlags = Desc.TSFlags;
343   unsigned CurOp = X86II::getOperandBias(Desc);
344   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
345   if (MemoryOperand < 0)
346     return false;
347   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
348   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
349   return (BaseReg == X86::RIP);
350 }
351 
352 /// Check if the instruction is a prefix.
353 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
354   return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
355 }
356 
357 /// Check if the instruction is valid as the first instruction in macro fusion.
358 static bool isFirstMacroFusibleInst(const MCInst &Inst,
359                                     const MCInstrInfo &MCII) {
360   // An Intel instruction with RIP relative addressing is not macro fusible.
361   if (isRIPRelative(Inst, MCII))
362     return false;
363   X86::FirstMacroFusionInstKind FIK =
364       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
365   return FIK != X86::FirstMacroFusionInstKind::Invalid;
366 }
367 
368 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
369 /// get a better peformance in some cases. Here, we determine which prefix is
370 /// the most suitable.
371 ///
372 /// If the instruction has a segment override prefix, use the existing one.
373 /// If the target is 64-bit, use the CS.
374 /// If the target is 32-bit,
375 ///   - If the instruction has a ESP/EBP base register, use SS.
376 ///   - Otherwise use DS.
377 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
378   assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) &&
379          "Prefixes can be added only in 32-bit or 64-bit mode.");
380   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
381   uint64_t TSFlags = Desc.TSFlags;
382 
383   // Determine where the memory operand starts, if present.
384   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
385   if (MemoryOperand != -1)
386     MemoryOperand += X86II::getOperandBias(Desc);
387 
388   unsigned SegmentReg = 0;
389   if (MemoryOperand >= 0) {
390     // Check for explicit segment override on memory operand.
391     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
392   }
393 
394   switch (TSFlags & X86II::FormMask) {
395   default:
396     break;
397   case X86II::RawFrmDstSrc: {
398     // Check segment override opcode prefix as needed (not for %ds).
399     if (Inst.getOperand(2).getReg() != X86::DS)
400       SegmentReg = Inst.getOperand(2).getReg();
401     break;
402   }
403   case X86II::RawFrmSrc: {
404     // Check segment override opcode prefix as needed (not for %ds).
405     if (Inst.getOperand(1).getReg() != X86::DS)
406       SegmentReg = Inst.getOperand(1).getReg();
407     break;
408   }
409   case X86II::RawFrmMemOffs: {
410     // Check segment override opcode prefix as needed.
411     SegmentReg = Inst.getOperand(1).getReg();
412     break;
413   }
414   }
415 
416   if (SegmentReg != 0)
417     return X86::getSegmentOverridePrefixForReg(SegmentReg);
418 
419   if (STI.hasFeature(X86::Mode64Bit))
420     return X86::CS_Encoding;
421 
422   if (MemoryOperand >= 0) {
423     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
424     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
425     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
426       return X86::SS_Encoding;
427   }
428   return X86::DS_Encoding;
429 }
430 
431 /// Check if the two instructions will be macro-fused on the target cpu.
432 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
433   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
434   if (!InstDesc.isConditionalBranch())
435     return false;
436   if (!isFirstMacroFusibleInst(Cmp, *MCII))
437     return false;
438   const X86::FirstMacroFusionInstKind CmpKind =
439       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
440   const X86::SecondMacroFusionInstKind BranchKind =
441       classifySecondInstInMacroFusion(Jcc, *MCII);
442   return X86::isMacroFused(CmpKind, BranchKind);
443 }
444 
445 /// Check if the instruction has a variant symbol operand.
446 static bool hasVariantSymbol(const MCInst &MI) {
447   for (auto &Operand : MI) {
448     if (!Operand.isExpr())
449       continue;
450     const MCExpr &Expr = *Operand.getExpr();
451     if (Expr.getKind() == MCExpr::SymbolRef &&
452         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
453       return true;
454   }
455   return false;
456 }
457 
458 bool X86AsmBackend::allowAutoPadding() const {
459   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
460 }
461 
462 bool X86AsmBackend::allowEnhancedRelaxation() const {
463   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
464 }
465 
466 /// X86 has certain instructions which enable interrupts exactly one
467 /// instruction *after* the instruction which stores to SS.  Return true if the
468 /// given instruction has such an interrupt delay slot.
469 static bool hasInterruptDelaySlot(const MCInst &Inst) {
470   switch (Inst.getOpcode()) {
471   case X86::POPSS16:
472   case X86::POPSS32:
473   case X86::STI:
474     return true;
475 
476   case X86::MOV16sr:
477   case X86::MOV32sr:
478   case X86::MOV64sr:
479   case X86::MOV16sm:
480     if (Inst.getOperand(0).getReg() == X86::SS)
481       return true;
482     break;
483   }
484   return false;
485 }
486 
487 /// Check if the instruction to be emitted is right after any data.
488 static bool
489 isRightAfterData(MCFragment *CurrentFragment,
490                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
491   MCFragment *F = CurrentFragment;
492   // Empty data fragments may be created to prevent further data being
493   // added into the previous fragment, we need to skip them since they
494   // have no contents.
495   for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
496     if (cast<MCDataFragment>(F)->getContents().size() != 0)
497       break;
498 
499   // Since data is always emitted into a DataFragment, our check strategy is
500   // simple here.
501   //   - If the fragment is a DataFragment
502   //     - If it's not the fragment where the previous instruction is,
503   //       returns true.
504   //     - If it's the fragment holding the previous instruction but its
505   //       size changed since the the previous instruction was emitted into
506   //       it, returns true.
507   //     - Otherwise returns false.
508   //   - If the fragment is not a DataFragment, returns false.
509   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
510     return DF != PrevInstPosition.first ||
511            DF->getContents().size() != PrevInstPosition.second;
512 
513   return false;
514 }
515 
516 /// \returns the fragment size if it has instructions, otherwise returns 0.
517 static size_t getSizeForInstFragment(const MCFragment *F) {
518   if (!F || !F->hasInstructions())
519     return 0;
520   // MCEncodedFragmentWithContents being templated makes this tricky.
521   switch (F->getKind()) {
522   default:
523     llvm_unreachable("Unknown fragment with instructions!");
524   case MCFragment::FT_Data:
525     return cast<MCDataFragment>(*F).getContents().size();
526   case MCFragment::FT_Relaxable:
527     return cast<MCRelaxableFragment>(*F).getContents().size();
528   case MCFragment::FT_CompactEncodedInst:
529     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
530   }
531 }
532 
533 /// Return true if we can insert NOP or prefixes automatically before the
534 /// the instruction to be emitted.
535 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
536   if (hasVariantSymbol(Inst))
537     // Linker may rewrite the instruction with variant symbol operand(e.g.
538     // TLSCALL).
539     return false;
540 
541   if (hasInterruptDelaySlot(PrevInst))
542     // If this instruction follows an interrupt enabling instruction with a one
543     // instruction delay, inserting a nop would change behavior.
544     return false;
545 
546   if (isPrefix(PrevInst, *MCII))
547     // If this instruction follows a prefix, inserting a nop/prefix would change
548     // semantic.
549     return false;
550 
551   if (isPrefix(Inst, *MCII))
552     // If this instruction is a prefix, inserting a prefix would change
553     // semantic.
554     return false;
555 
556   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
557     // If this instruction follows any data, there is no clear
558     // instruction boundary, inserting a nop/prefix would change semantic.
559     return false;
560 
561   return true;
562 }
563 
564 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
565   if (!OS.getAllowAutoPadding())
566     return false;
567   assert(allowAutoPadding() && "incorrect initialization!");
568 
569   // We only pad in text section.
570   if (!OS.getCurrentSectionOnly()->getKind().isText())
571     return false;
572 
573   // To be Done: Currently don't deal with Bundle cases.
574   if (OS.getAssembler().isBundlingEnabled())
575     return false;
576 
577   // Branches only need to be aligned in 32-bit or 64-bit mode.
578   if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit)))
579     return false;
580 
581   return true;
582 }
583 
584 /// Check if the instruction operand needs to be aligned.
585 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
586   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
587   return (Desc.isConditionalBranch() &&
588           (AlignBranchType & X86::AlignBranchJcc)) ||
589          (Desc.isUnconditionalBranch() &&
590           (AlignBranchType & X86::AlignBranchJmp)) ||
591          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
592          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
593          (Desc.isIndirectBranch() &&
594           (AlignBranchType & X86::AlignBranchIndirect));
595 }
596 
597 /// Insert BoundaryAlignFragment before instructions to align branches.
598 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
599                                          const MCInst &Inst) {
600   CanPadInst = canPadInst(Inst, OS);
601 
602   if (!canPadBranches(OS))
603     return;
604 
605   if (!isMacroFused(PrevInst, Inst))
606     // Macro fusion doesn't happen indeed, clear the pending.
607     PendingBA = nullptr;
608 
609   if (!CanPadInst)
610     return;
611 
612   if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
613     // Macro fusion actually happens and there is no other fragment inserted
614     // after the previous instruction.
615     //
616     // Do nothing here since we already inserted a BoudaryAlign fragment when
617     // we met the first instruction in the fused pair and we'll tie them
618     // together in emitInstructionEnd.
619     //
620     // Note: When there is at least one fragment, such as MCAlignFragment,
621     // inserted after the previous instruction, e.g.
622     //
623     // \code
624     //   cmp %rax %rcx
625     //   .align 16
626     //   je .Label0
627     // \ endcode
628     //
629     // We will treat the JCC as a unfused branch although it may be fused
630     // with the CMP.
631     return;
632   }
633 
634   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
635                           isFirstMacroFusibleInst(Inst, *MCII))) {
636     // If we meet a unfused branch or the first instuction in a fusiable pair,
637     // insert a BoundaryAlign fragment.
638     OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary));
639   }
640 }
641 
642 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
643 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
644   PrevInst = Inst;
645   MCFragment *CF = OS.getCurrentFragment();
646   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
647   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
648     F->setAllowAutoPadding(CanPadInst);
649 
650   if (!canPadBranches(OS))
651     return;
652 
653   if (!needAlign(Inst) || !PendingBA)
654     return;
655 
656   // Tie the aligned instructions into a a pending BoundaryAlign.
657   PendingBA->setLastFragment(CF);
658   PendingBA = nullptr;
659 
660   // We need to ensure that further data isn't added to the current
661   // DataFragment, so that we can get the size of instructions later in
662   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
663   // DataFragment.
664   if (isa_and_nonnull<MCDataFragment>(CF))
665     OS.insert(new MCDataFragment());
666 
667   // Update the maximum alignment on the current section if necessary.
668   MCSection *Sec = OS.getCurrentSectionOnly();
669   if (AlignBoundary.value() > Sec->getAlignment())
670     Sec->setAlignment(AlignBoundary);
671 }
672 
673 Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
674   if (STI.getTargetTriple().isOSBinFormatELF()) {
675     unsigned Type;
676     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
677       Type = llvm::StringSwitch<unsigned>(Name)
678 #define ELF_RELOC(X, Y) .Case(#X, Y)
679 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
680 #undef ELF_RELOC
681                  .Default(-1u);
682     } else {
683       Type = llvm::StringSwitch<unsigned>(Name)
684 #define ELF_RELOC(X, Y) .Case(#X, Y)
685 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
686 #undef ELF_RELOC
687                  .Default(-1u);
688     }
689     if (Type == -1u)
690       return None;
691     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
692   }
693   return MCAsmBackend::getFixupKind(Name);
694 }
695 
696 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
697   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
698       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
699       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
700       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
701       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
702       {"reloc_signed_4byte", 0, 32, 0},
703       {"reloc_signed_4byte_relax", 0, 32, 0},
704       {"reloc_global_offset_table", 0, 32, 0},
705       {"reloc_global_offset_table8", 0, 64, 0},
706       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
707   };
708 
709   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
710   // do not require any extra processing.
711   if (Kind >= FirstLiteralRelocationKind)
712     return MCAsmBackend::getFixupKindInfo(FK_NONE);
713 
714   if (Kind < FirstTargetFixupKind)
715     return MCAsmBackend::getFixupKindInfo(Kind);
716 
717   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
718          "Invalid kind!");
719   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
720   return Infos[Kind - FirstTargetFixupKind];
721 }
722 
723 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
724                                           const MCFixup &Fixup,
725                                           const MCValue &) {
726   return Fixup.getKind() >= FirstLiteralRelocationKind;
727 }
728 
729 static unsigned getFixupKindSize(unsigned Kind) {
730   switch (Kind) {
731   default:
732     llvm_unreachable("invalid fixup kind!");
733   case FK_NONE:
734     return 0;
735   case FK_PCRel_1:
736   case FK_SecRel_1:
737   case FK_Data_1:
738     return 1;
739   case FK_PCRel_2:
740   case FK_SecRel_2:
741   case FK_Data_2:
742     return 2;
743   case FK_PCRel_4:
744   case X86::reloc_riprel_4byte:
745   case X86::reloc_riprel_4byte_relax:
746   case X86::reloc_riprel_4byte_relax_rex:
747   case X86::reloc_riprel_4byte_movq_load:
748   case X86::reloc_signed_4byte:
749   case X86::reloc_signed_4byte_relax:
750   case X86::reloc_global_offset_table:
751   case X86::reloc_branch_4byte_pcrel:
752   case FK_SecRel_4:
753   case FK_Data_4:
754     return 4;
755   case FK_PCRel_8:
756   case FK_SecRel_8:
757   case FK_Data_8:
758   case X86::reloc_global_offset_table8:
759     return 8;
760   }
761 }
762 
763 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
764                                const MCValue &Target,
765                                MutableArrayRef<char> Data,
766                                uint64_t Value, bool IsResolved,
767                                const MCSubtargetInfo *STI) const {
768   unsigned Kind = Fixup.getKind();
769   if (Kind >= FirstLiteralRelocationKind)
770     return;
771   unsigned Size = getFixupKindSize(Kind);
772 
773   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
774 
775   int64_t SignedValue = static_cast<int64_t>(Value);
776   if ((Target.isAbsolute() || IsResolved) &&
777       getFixupKindInfo(Fixup.getKind()).Flags &
778       MCFixupKindInfo::FKF_IsPCRel) {
779     // check that PC relative fixup fits into the fixup size.
780     if (Size > 0 && !isIntN(Size * 8, SignedValue))
781       Asm.getContext().reportError(
782                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
783                                    " is too large for field of " + Twine(Size) +
784                                    ((Size == 1) ? " byte." : " bytes."));
785   } else {
786     // Check that uppper bits are either all zeros or all ones.
787     // Specifically ignore overflow/underflow as long as the leakage is
788     // limited to the lower bits. This is to remain compatible with
789     // other assemblers.
790     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
791            "Value does not fit in the Fixup field");
792   }
793 
794   for (unsigned i = 0; i != Size; ++i)
795     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
796 }
797 
798 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
799                                       const MCSubtargetInfo &STI) const {
800   // Branches can always be relaxed in either mode.
801   if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
802     return true;
803 
804   // Check if this instruction is ever relaxable.
805   if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
806     return false;
807 
808 
809   // Check if the relaxable operand has an expression. For the current set of
810   // relaxable instructions, the relaxable operand is always the last operand.
811   unsigned RelaxableOp = Inst.getNumOperands() - 1;
812   if (Inst.getOperand(RelaxableOp).isExpr())
813     return true;
814 
815   return false;
816 }
817 
818 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
819                                          uint64_t Value,
820                                          const MCRelaxableFragment *DF,
821                                          const MCAsmLayout &Layout) const {
822   // Relax if the value is too big for a (signed) i8.
823   return !isInt<8>(Value);
824 }
825 
826 // FIXME: Can tblgen help at all here to verify there aren't other instructions
827 // we can relax?
828 void X86AsmBackend::relaxInstruction(MCInst &Inst,
829                                      const MCSubtargetInfo &STI) const {
830   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
831   bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
832   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
833 
834   if (RelaxedOp == Inst.getOpcode()) {
835     SmallString<256> Tmp;
836     raw_svector_ostream OS(Tmp);
837     Inst.dump_pretty(OS);
838     OS << "\n";
839     report_fatal_error("unexpected instruction to relax: " + OS.str());
840   }
841 
842   Inst.setOpcode(RelaxedOp);
843 }
844 
845 /// Return true if this instruction has been fully relaxed into it's most
846 /// general available form.
847 static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
848   auto &Inst = RF.getInst();
849   auto &STI = *RF.getSubtargetInfo();
850   bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
851   return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
852 }
853 
854 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
855                                             MCCodeEmitter &Emitter,
856                                             unsigned &RemainingSize) const {
857   if (!RF.getAllowAutoPadding())
858     return false;
859   // If the instruction isn't fully relaxed, shifting it around might require a
860   // larger value for one of the fixups then can be encoded.  The outer loop
861   // will also catch this before moving to the next instruction, but we need to
862   // prevent padding this single instruction as well.
863   if (!isFullyRelaxed(RF))
864     return false;
865 
866   const unsigned OldSize = RF.getContents().size();
867   if (OldSize == 15)
868     return false;
869 
870   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
871   const unsigned RemainingPrefixSize = [&]() -> unsigned {
872     SmallString<15> Code;
873     raw_svector_ostream VecOS(Code);
874     Emitter.emitPrefix(RF.getInst(), VecOS, STI);
875     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
876 
877     // TODO: It turns out we need a decent amount of plumbing for the target
878     // specific bits to determine number of prefixes its safe to add.  Various
879     // targets (older chips mostly, but also Atom family) encounter decoder
880     // stalls with too many prefixes.  For testing purposes, we set the value
881     // externally for the moment.
882     unsigned ExistingPrefixSize = Code.size();
883     if (TargetPrefixMax <= ExistingPrefixSize)
884       return 0;
885     return TargetPrefixMax - ExistingPrefixSize;
886   }();
887   const unsigned PrefixBytesToAdd =
888       std::min(MaxPossiblePad, RemainingPrefixSize);
889   if (PrefixBytesToAdd == 0)
890     return false;
891 
892   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
893 
894   SmallString<256> Code;
895   Code.append(PrefixBytesToAdd, Prefix);
896   Code.append(RF.getContents().begin(), RF.getContents().end());
897   RF.getContents() = Code;
898 
899   // Adjust the fixups for the change in offsets
900   for (auto &F : RF.getFixups()) {
901     F.setOffset(F.getOffset() + PrefixBytesToAdd);
902   }
903 
904   RemainingSize -= PrefixBytesToAdd;
905   return true;
906 }
907 
908 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
909                                                 MCCodeEmitter &Emitter,
910                                                 unsigned &RemainingSize) const {
911   if (isFullyRelaxed(RF))
912     // TODO: There are lots of other tricks we could apply for increasing
913     // encoding size without impacting performance.
914     return false;
915 
916   MCInst Relaxed = RF.getInst();
917   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
918 
919   SmallVector<MCFixup, 4> Fixups;
920   SmallString<15> Code;
921   raw_svector_ostream VecOS(Code);
922   Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
923   const unsigned OldSize = RF.getContents().size();
924   const unsigned NewSize = Code.size();
925   assert(NewSize >= OldSize && "size decrease during relaxation?");
926   unsigned Delta = NewSize - OldSize;
927   if (Delta > RemainingSize)
928     return false;
929   RF.setInst(Relaxed);
930   RF.getContents() = Code;
931   RF.getFixups() = Fixups;
932   RemainingSize -= Delta;
933   return true;
934 }
935 
936 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
937                                            MCCodeEmitter &Emitter,
938                                            unsigned &RemainingSize) const {
939   bool Changed = false;
940   if (RemainingSize != 0)
941     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
942   if (RemainingSize != 0)
943     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
944   return Changed;
945 }
946 
947 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
948                                  MCAsmLayout &Layout) const {
949   // See if we can further relax some instructions to cut down on the number of
950   // nop bytes required for code alignment.  The actual win is in reducing
951   // instruction count, not number of bytes.  Modern X86-64 can easily end up
952   // decode limited.  It is often better to reduce the number of instructions
953   // (i.e. eliminate nops) even at the cost of increasing the size and
954   // complexity of others.
955   if (!X86PadForAlign && !X86PadForBranchAlign)
956     return;
957 
958   DenseSet<MCFragment *> LabeledFragments;
959   for (const MCSymbol &S : Asm.symbols())
960     LabeledFragments.insert(S.getFragment(false));
961 
962   for (MCSection &Sec : Asm) {
963     if (!Sec.getKind().isText())
964       continue;
965 
966     SmallVector<MCRelaxableFragment *, 4> Relaxable;
967     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
968       MCFragment &F = *I;
969 
970       if (LabeledFragments.count(&F))
971         Relaxable.clear();
972 
973       if (F.getKind() == MCFragment::FT_Data ||
974           F.getKind() == MCFragment::FT_CompactEncodedInst)
975         // Skip and ignore
976         continue;
977 
978       if (F.getKind() == MCFragment::FT_Relaxable) {
979         auto &RF = cast<MCRelaxableFragment>(*I);
980         Relaxable.push_back(&RF);
981         continue;
982       }
983 
984       auto canHandle = [](MCFragment &F) -> bool {
985         switch (F.getKind()) {
986         default:
987           return false;
988         case MCFragment::FT_Align:
989           return X86PadForAlign;
990         case MCFragment::FT_BoundaryAlign:
991           return X86PadForBranchAlign;
992         }
993       };
994       // For any unhandled kind, assume we can't change layout.
995       if (!canHandle(F)) {
996         Relaxable.clear();
997         continue;
998       }
999 
1000 #ifndef NDEBUG
1001       const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
1002 #endif
1003       const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
1004 
1005       // To keep the effects local, prefer to relax instructions closest to
1006       // the align directive.  This is purely about human understandability
1007       // of the resulting code.  If we later find a reason to expand
1008       // particular instructions over others, we can adjust.
1009       MCFragment *FirstChangedFragment = nullptr;
1010       unsigned RemainingSize = OrigSize;
1011       while (!Relaxable.empty() && RemainingSize != 0) {
1012         auto &RF = *Relaxable.pop_back_val();
1013         // Give the backend a chance to play any tricks it wishes to increase
1014         // the encoding size of the given instruction.  Target independent code
1015         // will try further relaxation, but target's may play further tricks.
1016         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
1017           FirstChangedFragment = &RF;
1018 
1019         // If we have an instruction which hasn't been fully relaxed, we can't
1020         // skip past it and insert bytes before it.  Changing its starting
1021         // offset might require a larger negative offset than it can encode.
1022         // We don't need to worry about larger positive offsets as none of the
1023         // possible offsets between this and our align are visible, and the
1024         // ones afterwards aren't changing.
1025         if (!isFullyRelaxed(RF))
1026           break;
1027       }
1028       Relaxable.clear();
1029 
1030       if (FirstChangedFragment) {
1031         // Make sure the offsets for any fragments in the effected range get
1032         // updated.  Note that this (conservatively) invalidates the offsets of
1033         // those following, but this is not required.
1034         Layout.invalidateFragmentsFrom(FirstChangedFragment);
1035       }
1036 
1037       // BoundaryAlign explicitly tracks it's size (unlike align)
1038       if (F.getKind() == MCFragment::FT_BoundaryAlign)
1039         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
1040 
1041 #ifndef NDEBUG
1042       const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
1043       const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
1044       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
1045              "can't move start of next fragment!");
1046       assert(FinalSize == RemainingSize && "inconsistent size computation?");
1047 #endif
1048 
1049       // If we're looking at a boundary align, make sure we don't try to pad
1050       // its target instructions for some following directive.  Doing so would
1051       // break the alignment of the current boundary align.
1052       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
1053         const MCFragment *LastFragment = BF->getLastFragment();
1054         if (!LastFragment)
1055           continue;
1056         while (&*I != LastFragment)
1057           ++I;
1058       }
1059     }
1060   }
1061 
1062   // The layout is done. Mark every fragment as valid.
1063   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
1064     MCSection &Section = *Layout.getSectionOrder()[i];
1065     Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
1066     Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
1067   }
1068 }
1069 
1070 /// Write a sequence of optimal nops to the output, covering \p Count
1071 /// bytes.
1072 /// \return - true on success, false on failure
1073 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
1074   static const char Nops[10][11] = {
1075     // nop
1076     "\x90",
1077     // xchg %ax,%ax
1078     "\x66\x90",
1079     // nopl (%[re]ax)
1080     "\x0f\x1f\x00",
1081     // nopl 0(%[re]ax)
1082     "\x0f\x1f\x40\x00",
1083     // nopl 0(%[re]ax,%[re]ax,1)
1084     "\x0f\x1f\x44\x00\x00",
1085     // nopw 0(%[re]ax,%[re]ax,1)
1086     "\x66\x0f\x1f\x44\x00\x00",
1087     // nopl 0L(%[re]ax)
1088     "\x0f\x1f\x80\x00\x00\x00\x00",
1089     // nopl 0L(%[re]ax,%[re]ax,1)
1090     "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1091     // nopw 0L(%[re]ax,%[re]ax,1)
1092     "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1093     // nopw %cs:0L(%[re]ax,%[re]ax,1)
1094     "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1095   };
1096 
1097   // This CPU doesn't support long nops. If needed add more.
1098   // FIXME: We could generated something better than plain 0x90.
1099   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) {
1100     for (uint64_t i = 0; i < Count; ++i)
1101       OS << '\x90';
1102     return true;
1103   }
1104 
1105   // 15-bytes is the longest single NOP instruction, but 10-bytes is
1106   // commonly the longest that can be efficiently decoded.
1107   uint64_t MaxNopLength = 10;
1108   if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
1109     MaxNopLength = 7;
1110   else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
1111     MaxNopLength = 15;
1112   else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
1113     MaxNopLength = 11;
1114 
1115   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1116   // length.
1117   do {
1118     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1119     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1120     for (uint8_t i = 0; i < Prefixes; i++)
1121       OS << '\x66';
1122     const uint8_t Rest = ThisNopLength - Prefixes;
1123     if (Rest != 0)
1124       OS.write(Nops[Rest - 1], Rest);
1125     Count -= ThisNopLength;
1126   } while (Count != 0);
1127 
1128   return true;
1129 }
1130 
1131 /* *** */
1132 
1133 namespace {
1134 
1135 class ELFX86AsmBackend : public X86AsmBackend {
1136 public:
1137   uint8_t OSABI;
1138   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1139       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1140 };
1141 
1142 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1143 public:
1144   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1145                       const MCSubtargetInfo &STI)
1146     : ELFX86AsmBackend(T, OSABI, STI) {}
1147 
1148   std::unique_ptr<MCObjectTargetWriter>
1149   createObjectTargetWriter() const override {
1150     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1151   }
1152 };
1153 
1154 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1155 public:
1156   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1157                        const MCSubtargetInfo &STI)
1158       : ELFX86AsmBackend(T, OSABI, STI) {}
1159 
1160   std::unique_ptr<MCObjectTargetWriter>
1161   createObjectTargetWriter() const override {
1162     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1163                                     ELF::EM_X86_64);
1164   }
1165 };
1166 
1167 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1168 public:
1169   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1170                          const MCSubtargetInfo &STI)
1171       : ELFX86AsmBackend(T, OSABI, STI) {}
1172 
1173   std::unique_ptr<MCObjectTargetWriter>
1174   createObjectTargetWriter() const override {
1175     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1176                                     ELF::EM_IAMCU);
1177   }
1178 };
1179 
1180 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1181 public:
1182   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1183                       const MCSubtargetInfo &STI)
1184     : ELFX86AsmBackend(T, OSABI, STI) {}
1185 
1186   std::unique_ptr<MCObjectTargetWriter>
1187   createObjectTargetWriter() const override {
1188     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1189   }
1190 };
1191 
1192 class WindowsX86AsmBackend : public X86AsmBackend {
1193   bool Is64Bit;
1194 
1195 public:
1196   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1197                        const MCSubtargetInfo &STI)
1198     : X86AsmBackend(T, STI)
1199     , Is64Bit(is64Bit) {
1200   }
1201 
1202   Optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1203     return StringSwitch<Optional<MCFixupKind>>(Name)
1204         .Case("dir32", FK_Data_4)
1205         .Case("secrel32", FK_SecRel_4)
1206         .Case("secidx", FK_SecRel_2)
1207         .Default(MCAsmBackend::getFixupKind(Name));
1208   }
1209 
1210   std::unique_ptr<MCObjectTargetWriter>
1211   createObjectTargetWriter() const override {
1212     return createX86WinCOFFObjectWriter(Is64Bit);
1213   }
1214 };
1215 
1216 namespace CU {
1217 
1218   /// Compact unwind encoding values.
1219   enum CompactUnwindEncodings {
1220     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1221     /// the return address, then [RE]SP is moved to [RE]BP.
1222     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1223 
1224     /// A frameless function with a small constant stack size.
1225     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1226 
1227     /// A frameless function with a large constant stack size.
1228     UNWIND_MODE_STACK_IND                  = 0x03000000,
1229 
1230     /// No compact unwind encoding is available.
1231     UNWIND_MODE_DWARF                      = 0x04000000,
1232 
1233     /// Mask for encoding the frame registers.
1234     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1235 
1236     /// Mask for encoding the frameless registers.
1237     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1238   };
1239 
1240 } // end CU namespace
1241 
1242 class DarwinX86AsmBackend : public X86AsmBackend {
1243   const MCRegisterInfo &MRI;
1244 
1245   /// Number of registers that can be saved in a compact unwind encoding.
1246   enum { CU_NUM_SAVED_REGS = 6 };
1247 
1248   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1249   Triple TT;
1250   bool Is64Bit;
1251 
1252   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1253   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1254   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1255 protected:
1256   /// Size of a "push" instruction for the given register.
1257   unsigned PushInstrSize(unsigned Reg) const {
1258     switch (Reg) {
1259       case X86::EBX:
1260       case X86::ECX:
1261       case X86::EDX:
1262       case X86::EDI:
1263       case X86::ESI:
1264       case X86::EBP:
1265       case X86::RBX:
1266       case X86::RBP:
1267         return 1;
1268       case X86::R12:
1269       case X86::R13:
1270       case X86::R14:
1271       case X86::R15:
1272         return 2;
1273     }
1274     return 1;
1275   }
1276 
1277 private:
1278   /// Get the compact unwind number for a given register. The number
1279   /// corresponds to the enum lists in compact_unwind_encoding.h.
1280   int getCompactUnwindRegNum(unsigned Reg) const {
1281     static const MCPhysReg CU32BitRegs[7] = {
1282       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1283     };
1284     static const MCPhysReg CU64BitRegs[] = {
1285       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1286     };
1287     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1288     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1289       if (*CURegs == Reg)
1290         return Idx;
1291 
1292     return -1;
1293   }
1294 
1295   /// Return the registers encoded for a compact encoding with a frame
1296   /// pointer.
1297   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1298     // Encode the registers in the order they were saved --- 3-bits per
1299     // register. The list of saved registers is assumed to be in reverse
1300     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1301     uint32_t RegEnc = 0;
1302     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1303       unsigned Reg = SavedRegs[i];
1304       if (Reg == 0) break;
1305 
1306       int CURegNum = getCompactUnwindRegNum(Reg);
1307       if (CURegNum == -1) return ~0U;
1308 
1309       // Encode the 3-bit register number in order, skipping over 3-bits for
1310       // each register.
1311       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1312     }
1313 
1314     assert((RegEnc & 0x3FFFF) == RegEnc &&
1315            "Invalid compact register encoding!");
1316     return RegEnc;
1317   }
1318 
1319   /// Create the permutation encoding used with frameless stacks. It is
1320   /// passed the number of registers to be saved and an array of the registers
1321   /// saved.
1322   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1323     // The saved registers are numbered from 1 to 6. In order to encode the
1324     // order in which they were saved, we re-number them according to their
1325     // place in the register order. The re-numbering is relative to the last
1326     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1327     // that order:
1328     //
1329     //    Orig  Re-Num
1330     //    ----  ------
1331     //     6       6
1332     //     2       2
1333     //     4       3
1334     //     5       3
1335     //
1336     for (unsigned i = 0; i < RegCount; ++i) {
1337       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1338       if (CUReg == -1) return ~0U;
1339       SavedRegs[i] = CUReg;
1340     }
1341 
1342     // Reverse the list.
1343     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1344 
1345     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1346     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1347       unsigned Countless = 0;
1348       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1349         if (SavedRegs[j] < SavedRegs[i])
1350           ++Countless;
1351 
1352       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1353     }
1354 
1355     // Take the renumbered values and encode them into a 10-bit number.
1356     uint32_t permutationEncoding = 0;
1357     switch (RegCount) {
1358     case 6:
1359       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1360                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1361                              +     RenumRegs[4];
1362       break;
1363     case 5:
1364       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1365                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1366                              +     RenumRegs[5];
1367       break;
1368     case 4:
1369       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1370                              + 3 * RenumRegs[4] +      RenumRegs[5];
1371       break;
1372     case 3:
1373       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1374                              +     RenumRegs[5];
1375       break;
1376     case 2:
1377       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1378       break;
1379     case 1:
1380       permutationEncoding |=       RenumRegs[5];
1381       break;
1382     }
1383 
1384     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1385            "Invalid compact register encoding!");
1386     return permutationEncoding;
1387   }
1388 
1389 public:
1390   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1391                       const MCSubtargetInfo &STI)
1392       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1393         Is64Bit(TT.isArch64Bit()) {
1394     memset(SavedRegs, 0, sizeof(SavedRegs));
1395     OffsetSize = Is64Bit ? 8 : 4;
1396     MoveInstrSize = Is64Bit ? 3 : 2;
1397     StackDivide = Is64Bit ? 8 : 4;
1398   }
1399 
1400   std::unique_ptr<MCObjectTargetWriter>
1401   createObjectTargetWriter() const override {
1402     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1403     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1404     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1405   }
1406 
1407   /// Implementation of algorithm to generate the compact unwind encoding
1408   /// for the CFI instructions.
1409   uint32_t
1410   generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1411     if (Instrs.empty()) return 0;
1412 
1413     // Reset the saved registers.
1414     unsigned SavedRegIdx = 0;
1415     memset(SavedRegs, 0, sizeof(SavedRegs));
1416 
1417     bool HasFP = false;
1418 
1419     // Encode that we are using EBP/RBP as the frame pointer.
1420     uint32_t CompactUnwindEncoding = 0;
1421 
1422     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1423     unsigned InstrOffset = 0;
1424     unsigned StackAdjust = 0;
1425     unsigned StackSize = 0;
1426     unsigned NumDefCFAOffsets = 0;
1427 
1428     for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
1429       const MCCFIInstruction &Inst = Instrs[i];
1430 
1431       switch (Inst.getOperation()) {
1432       default:
1433         // Any other CFI directives indicate a frame that we aren't prepared
1434         // to represent via compact unwind, so just bail out.
1435         return 0;
1436       case MCCFIInstruction::OpDefCfaRegister: {
1437         // Defines a frame pointer. E.g.
1438         //
1439         //     movq %rsp, %rbp
1440         //  L0:
1441         //     .cfi_def_cfa_register %rbp
1442         //
1443         HasFP = true;
1444 
1445         // If the frame pointer is other than esp/rsp, we do not have a way to
1446         // generate a compact unwinding representation, so bail out.
1447         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1448             (Is64Bit ? X86::RBP : X86::EBP))
1449           return 0;
1450 
1451         // Reset the counts.
1452         memset(SavedRegs, 0, sizeof(SavedRegs));
1453         StackAdjust = 0;
1454         SavedRegIdx = 0;
1455         InstrOffset += MoveInstrSize;
1456         break;
1457       }
1458       case MCCFIInstruction::OpDefCfaOffset: {
1459         // Defines a new offset for the CFA. E.g.
1460         //
1461         //  With frame:
1462         //
1463         //     pushq %rbp
1464         //  L0:
1465         //     .cfi_def_cfa_offset 16
1466         //
1467         //  Without frame:
1468         //
1469         //     subq $72, %rsp
1470         //  L0:
1471         //     .cfi_def_cfa_offset 80
1472         //
1473         StackSize = Inst.getOffset() / StackDivide;
1474         ++NumDefCFAOffsets;
1475         break;
1476       }
1477       case MCCFIInstruction::OpOffset: {
1478         // Defines a "push" of a callee-saved register. E.g.
1479         //
1480         //     pushq %r15
1481         //     pushq %r14
1482         //     pushq %rbx
1483         //  L0:
1484         //     subq $120, %rsp
1485         //  L1:
1486         //     .cfi_offset %rbx, -40
1487         //     .cfi_offset %r14, -32
1488         //     .cfi_offset %r15, -24
1489         //
1490         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1491           // If there are too many saved registers, we cannot use a compact
1492           // unwind encoding.
1493           return CU::UNWIND_MODE_DWARF;
1494 
1495         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1496         SavedRegs[SavedRegIdx++] = Reg;
1497         StackAdjust += OffsetSize;
1498         InstrOffset += PushInstrSize(Reg);
1499         break;
1500       }
1501       }
1502     }
1503 
1504     StackAdjust /= StackDivide;
1505 
1506     if (HasFP) {
1507       if ((StackAdjust & 0xFF) != StackAdjust)
1508         // Offset was too big for a compact unwind encoding.
1509         return CU::UNWIND_MODE_DWARF;
1510 
1511       // Get the encoding of the saved registers when we have a frame pointer.
1512       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1513       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1514 
1515       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1516       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1517       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1518     } else {
1519       SubtractInstrIdx += InstrOffset;
1520       ++StackAdjust;
1521 
1522       if ((StackSize & 0xFF) == StackSize) {
1523         // Frameless stack with a small stack size.
1524         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1525 
1526         // Encode the stack size.
1527         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1528       } else {
1529         if ((StackAdjust & 0x7) != StackAdjust)
1530           // The extra stack adjustments are too big for us to handle.
1531           return CU::UNWIND_MODE_DWARF;
1532 
1533         // Frameless stack with an offset too large for us to encode compactly.
1534         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1535 
1536         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1537         // instruction.
1538         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1539 
1540         // Encode any extra stack adjustments (done via push instructions).
1541         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1542       }
1543 
1544       // Encode the number of registers saved. (Reverse the list first.)
1545       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1546       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1547 
1548       // Get the encoding of the saved registers when we don't have a frame
1549       // pointer.
1550       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1551       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1552 
1553       // Encode the register encoding.
1554       CompactUnwindEncoding |=
1555         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1556     }
1557 
1558     return CompactUnwindEncoding;
1559   }
1560 };
1561 
1562 } // end anonymous namespace
1563 
1564 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1565                                            const MCSubtargetInfo &STI,
1566                                            const MCRegisterInfo &MRI,
1567                                            const MCTargetOptions &Options) {
1568   const Triple &TheTriple = STI.getTargetTriple();
1569   if (TheTriple.isOSBinFormatMachO())
1570     return new DarwinX86AsmBackend(T, MRI, STI);
1571 
1572   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1573     return new WindowsX86AsmBackend(T, false, STI);
1574 
1575   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1576 
1577   if (TheTriple.isOSIAMCU())
1578     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1579 
1580   return new ELFX86_32AsmBackend(T, OSABI, STI);
1581 }
1582 
1583 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1584                                            const MCSubtargetInfo &STI,
1585                                            const MCRegisterInfo &MRI,
1586                                            const MCTargetOptions &Options) {
1587   const Triple &TheTriple = STI.getTargetTriple();
1588   if (TheTriple.isOSBinFormatMachO())
1589     return new DarwinX86AsmBackend(T, MRI, STI);
1590 
1591   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1592     return new WindowsX86AsmBackend(T, true, STI);
1593 
1594   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1595 
1596   if (TheTriple.getEnvironment() == Triple::GNUX32)
1597     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1598   return new ELFX86_64AsmBackend(T, OSABI, STI);
1599 }
1600