xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (revision 51015e6d0f570239b0c2088dc6cf2b018928375d)
1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "MCTargetDesc/X86InstrRelaxTables.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAsmLayout.h"
17 #include "llvm/MC/MCAssembler.h"
18 #include "llvm/MC/MCCodeEmitter.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDwarf.h"
21 #include "llvm/MC/MCELFObjectWriter.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 namespace {
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind {
43 private:
44   uint8_t AlignBranchKind = 0;
45 
46 public:
47   void operator=(const std::string &Val) {
48     if (Val.empty())
49       return;
50     SmallVector<StringRef, 6> BranchTypes;
51     StringRef(Val).split(BranchTypes, '+', -1, false);
52     for (auto BranchType : BranchTypes) {
53       if (BranchType == "fused")
54         addKind(X86::AlignBranchFused);
55       else if (BranchType == "jcc")
56         addKind(X86::AlignBranchJcc);
57       else if (BranchType == "jmp")
58         addKind(X86::AlignBranchJmp);
59       else if (BranchType == "call")
60         addKind(X86::AlignBranchCall);
61       else if (BranchType == "ret")
62         addKind(X86::AlignBranchRet);
63       else if (BranchType == "indirect")
64         addKind(X86::AlignBranchIndirect);
65       else {
66         errs() << "invalid argument " << BranchType.str()
67                << " to -x86-align-branch=; each element must be one of: fused, "
68                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
69       }
70     }
71   }
72 
73   operator uint8_t() const { return AlignBranchKind; }
74   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75 };
76 
77 X86AlignBranchKind X86AlignBranchKindLoc;
78 
79 cl::opt<unsigned> X86AlignBranchBoundary(
80     "x86-align-branch-boundary", cl::init(0),
81     cl::desc(
82         "Control how the assembler should align branches with NOP. If the "
83         "boundary's size is not 0, it should be a power of 2 and no less "
84         "than 32. Branches will be aligned to prevent from being across or "
85         "against the boundary of specified size. The default value 0 does not "
86         "align branches."));
87 
88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89     "x86-align-branch",
90     cl::desc(
91         "Specify types of branches to align (plus separated list of types):"
92              "\njcc      indicates conditional jumps"
93              "\nfused    indicates fused conditional jumps"
94              "\njmp      indicates direct unconditional jumps"
95              "\ncall     indicates direct and indirect calls"
96              "\nret      indicates rets"
97              "\nindirect indicates indirect unconditional jumps"),
98     cl::location(X86AlignBranchKindLoc));
99 
100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101     "x86-branches-within-32B-boundaries", cl::init(false),
102     cl::desc(
103         "Align selected instructions to mitigate negative performance impact "
104         "of Intel's micro code update for errata skx102.  May break "
105         "assumptions about labels corresponding to particular instructions, "
106         "and should be used with caution."));
107 
108 cl::opt<unsigned> X86PadMaxPrefixSize(
109     "x86-pad-max-prefix-size", cl::init(0),
110     cl::desc("Maximum number of prefixes to use for padding"));
111 
112 cl::opt<bool> X86PadForAlign(
113     "x86-pad-for-align", cl::init(false), cl::Hidden,
114     cl::desc("Pad previous instructions to implement align directives"));
115 
116 cl::opt<bool> X86PadForBranchAlign(
117     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118     cl::desc("Pad previous instructions to implement branch alignment"));
119 
120 class X86AsmBackend : public MCAsmBackend {
121   const MCSubtargetInfo &STI;
122   std::unique_ptr<const MCInstrInfo> MCII;
123   X86AlignBranchKind AlignBranchType;
124   Align AlignBoundary;
125   unsigned TargetPrefixMax = 0;
126 
127   MCInst PrevInst;
128   MCBoundaryAlignFragment *PendingBA = nullptr;
129   std::pair<MCFragment *, size_t> PrevInstPosition;
130   bool CanPadInst;
131 
132   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134   bool needAlign(const MCInst &Inst) const;
135   bool canPadBranches(MCObjectStreamer &OS) const;
136   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
137 
138 public:
139   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140       : MCAsmBackend(support::little), STI(STI),
141         MCII(T.createMCInstrInfo()) {
142     if (X86AlignBranchWithin32BBoundaries) {
143       // At the moment, this defaults to aligning fused branches, unconditional
144       // jumps, and (unfused) conditional jumps with nops.  Both the
145       // instructions aligned and the alignment method (nop vs prefix) may
146       // change in the future.
147       AlignBoundary = assumeAligned(32);;
148       AlignBranchType.addKind(X86::AlignBranchFused);
149       AlignBranchType.addKind(X86::AlignBranchJcc);
150       AlignBranchType.addKind(X86::AlignBranchJmp);
151     }
152     // Allow overriding defaults set by main flag
153     if (X86AlignBranchBoundary.getNumOccurrences())
154       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
155     if (X86AlignBranch.getNumOccurrences())
156       AlignBranchType = X86AlignBranchKindLoc;
157     if (X86PadMaxPrefixSize.getNumOccurrences())
158       TargetPrefixMax = X86PadMaxPrefixSize;
159   }
160 
161   bool allowAutoPadding() const override;
162   bool allowEnhancedRelaxation() const override;
163   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164                             const MCSubtargetInfo &STI) override;
165   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
166 
167   unsigned getNumFixupKinds() const override {
168     return X86::NumTargetFixupKinds;
169   }
170 
171   Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
172 
173   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
174 
175   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
176                              const MCValue &Target) override;
177 
178   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
179                   const MCValue &Target, MutableArrayRef<char> Data,
180                   uint64_t Value, bool IsResolved,
181                   const MCSubtargetInfo *STI) const override;
182 
183   bool mayNeedRelaxation(const MCInst &Inst,
184                          const MCSubtargetInfo &STI) const override;
185 
186   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
187                             const MCRelaxableFragment *DF,
188                             const MCAsmLayout &Layout) const override;
189 
190   void relaxInstruction(MCInst &Inst,
191                         const MCSubtargetInfo &STI) const override;
192 
193   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
194                                    MCCodeEmitter &Emitter,
195                                    unsigned &RemainingSize) const;
196 
197   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
198                                unsigned &RemainingSize) const;
199 
200   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
201                               unsigned &RemainingSize) const;
202 
203   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
204 
205   unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
206 
207   bool writeNopData(raw_ostream &OS, uint64_t Count,
208                     const MCSubtargetInfo *STI) const override;
209 };
210 } // end anonymous namespace
211 
212 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
213   unsigned Op = Inst.getOpcode();
214   switch (Op) {
215   default:
216     return Op;
217   case X86::JCC_1:
218     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
219   case X86::JMP_1:
220     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
221   }
222 }
223 
224 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
225   unsigned Op = Inst.getOpcode();
226   return X86::getRelaxedOpcodeArith(Op);
227 }
228 
229 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
230   unsigned R = getRelaxedOpcodeArith(Inst);
231   if (R != Inst.getOpcode())
232     return R;
233   return getRelaxedOpcodeBranch(Inst, Is16BitMode);
234 }
235 
236 static X86::CondCode getCondFromBranch(const MCInst &MI,
237                                        const MCInstrInfo &MCII) {
238   unsigned Opcode = MI.getOpcode();
239   switch (Opcode) {
240   default:
241     return X86::COND_INVALID;
242   case X86::JCC_1: {
243     const MCInstrDesc &Desc = MCII.get(Opcode);
244     return static_cast<X86::CondCode>(
245         MI.getOperand(Desc.getNumOperands() - 1).getImm());
246   }
247   }
248 }
249 
250 static X86::SecondMacroFusionInstKind
251 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
252   X86::CondCode CC = getCondFromBranch(MI, MCII);
253   return classifySecondCondCodeInMacroFusion(CC);
254 }
255 
256 /// Check if the instruction uses RIP relative addressing.
257 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
258   unsigned Opcode = MI.getOpcode();
259   const MCInstrDesc &Desc = MCII.get(Opcode);
260   uint64_t TSFlags = Desc.TSFlags;
261   unsigned CurOp = X86II::getOperandBias(Desc);
262   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
263   if (MemoryOperand < 0)
264     return false;
265   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
266   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
267   return (BaseReg == X86::RIP);
268 }
269 
270 /// Check if the instruction is a prefix.
271 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
272   return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
273 }
274 
275 /// Check if the instruction is valid as the first instruction in macro fusion.
276 static bool isFirstMacroFusibleInst(const MCInst &Inst,
277                                     const MCInstrInfo &MCII) {
278   // An Intel instruction with RIP relative addressing is not macro fusible.
279   if (isRIPRelative(Inst, MCII))
280     return false;
281   X86::FirstMacroFusionInstKind FIK =
282       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
283   return FIK != X86::FirstMacroFusionInstKind::Invalid;
284 }
285 
286 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
287 /// get a better peformance in some cases. Here, we determine which prefix is
288 /// the most suitable.
289 ///
290 /// If the instruction has a segment override prefix, use the existing one.
291 /// If the target is 64-bit, use the CS.
292 /// If the target is 32-bit,
293 ///   - If the instruction has a ESP/EBP base register, use SS.
294 ///   - Otherwise use DS.
295 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
296   assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
297          "Prefixes can be added only in 32-bit or 64-bit mode.");
298   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
299   uint64_t TSFlags = Desc.TSFlags;
300 
301   // Determine where the memory operand starts, if present.
302   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
303   if (MemoryOperand != -1)
304     MemoryOperand += X86II::getOperandBias(Desc);
305 
306   unsigned SegmentReg = 0;
307   if (MemoryOperand >= 0) {
308     // Check for explicit segment override on memory operand.
309     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
310   }
311 
312   switch (TSFlags & X86II::FormMask) {
313   default:
314     break;
315   case X86II::RawFrmDstSrc: {
316     // Check segment override opcode prefix as needed (not for %ds).
317     if (Inst.getOperand(2).getReg() != X86::DS)
318       SegmentReg = Inst.getOperand(2).getReg();
319     break;
320   }
321   case X86II::RawFrmSrc: {
322     // Check segment override opcode prefix as needed (not for %ds).
323     if (Inst.getOperand(1).getReg() != X86::DS)
324       SegmentReg = Inst.getOperand(1).getReg();
325     break;
326   }
327   case X86II::RawFrmMemOffs: {
328     // Check segment override opcode prefix as needed.
329     SegmentReg = Inst.getOperand(1).getReg();
330     break;
331   }
332   }
333 
334   if (SegmentReg != 0)
335     return X86::getSegmentOverridePrefixForReg(SegmentReg);
336 
337   if (STI.hasFeature(X86::Is64Bit))
338     return X86::CS_Encoding;
339 
340   if (MemoryOperand >= 0) {
341     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
342     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
343     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
344       return X86::SS_Encoding;
345   }
346   return X86::DS_Encoding;
347 }
348 
349 /// Check if the two instructions will be macro-fused on the target cpu.
350 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
351   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
352   if (!InstDesc.isConditionalBranch())
353     return false;
354   if (!isFirstMacroFusibleInst(Cmp, *MCII))
355     return false;
356   const X86::FirstMacroFusionInstKind CmpKind =
357       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
358   const X86::SecondMacroFusionInstKind BranchKind =
359       classifySecondInstInMacroFusion(Jcc, *MCII);
360   return X86::isMacroFused(CmpKind, BranchKind);
361 }
362 
363 /// Check if the instruction has a variant symbol operand.
364 static bool hasVariantSymbol(const MCInst &MI) {
365   for (auto &Operand : MI) {
366     if (!Operand.isExpr())
367       continue;
368     const MCExpr &Expr = *Operand.getExpr();
369     if (Expr.getKind() == MCExpr::SymbolRef &&
370         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
371       return true;
372   }
373   return false;
374 }
375 
376 bool X86AsmBackend::allowAutoPadding() const {
377   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
378 }
379 
380 bool X86AsmBackend::allowEnhancedRelaxation() const {
381   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
382 }
383 
384 /// X86 has certain instructions which enable interrupts exactly one
385 /// instruction *after* the instruction which stores to SS.  Return true if the
386 /// given instruction has such an interrupt delay slot.
387 static bool hasInterruptDelaySlot(const MCInst &Inst) {
388   switch (Inst.getOpcode()) {
389   case X86::POPSS16:
390   case X86::POPSS32:
391   case X86::STI:
392     return true;
393 
394   case X86::MOV16sr:
395   case X86::MOV32sr:
396   case X86::MOV64sr:
397   case X86::MOV16sm:
398     if (Inst.getOperand(0).getReg() == X86::SS)
399       return true;
400     break;
401   }
402   return false;
403 }
404 
405 /// Check if the instruction to be emitted is right after any data.
406 static bool
407 isRightAfterData(MCFragment *CurrentFragment,
408                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
409   MCFragment *F = CurrentFragment;
410   // Empty data fragments may be created to prevent further data being
411   // added into the previous fragment, we need to skip them since they
412   // have no contents.
413   for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
414     if (cast<MCDataFragment>(F)->getContents().size() != 0)
415       break;
416 
417   // Since data is always emitted into a DataFragment, our check strategy is
418   // simple here.
419   //   - If the fragment is a DataFragment
420   //     - If it's not the fragment where the previous instruction is,
421   //       returns true.
422   //     - If it's the fragment holding the previous instruction but its
423   //       size changed since the the previous instruction was emitted into
424   //       it, returns true.
425   //     - Otherwise returns false.
426   //   - If the fragment is not a DataFragment, returns false.
427   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
428     return DF != PrevInstPosition.first ||
429            DF->getContents().size() != PrevInstPosition.second;
430 
431   return false;
432 }
433 
434 /// \returns the fragment size if it has instructions, otherwise returns 0.
435 static size_t getSizeForInstFragment(const MCFragment *F) {
436   if (!F || !F->hasInstructions())
437     return 0;
438   // MCEncodedFragmentWithContents being templated makes this tricky.
439   switch (F->getKind()) {
440   default:
441     llvm_unreachable("Unknown fragment with instructions!");
442   case MCFragment::FT_Data:
443     return cast<MCDataFragment>(*F).getContents().size();
444   case MCFragment::FT_Relaxable:
445     return cast<MCRelaxableFragment>(*F).getContents().size();
446   case MCFragment::FT_CompactEncodedInst:
447     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
448   }
449 }
450 
451 /// Return true if we can insert NOP or prefixes automatically before the
452 /// the instruction to be emitted.
453 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
454   if (hasVariantSymbol(Inst))
455     // Linker may rewrite the instruction with variant symbol operand(e.g.
456     // TLSCALL).
457     return false;
458 
459   if (hasInterruptDelaySlot(PrevInst))
460     // If this instruction follows an interrupt enabling instruction with a one
461     // instruction delay, inserting a nop would change behavior.
462     return false;
463 
464   if (isPrefix(PrevInst, *MCII))
465     // If this instruction follows a prefix, inserting a nop/prefix would change
466     // semantic.
467     return false;
468 
469   if (isPrefix(Inst, *MCII))
470     // If this instruction is a prefix, inserting a prefix would change
471     // semantic.
472     return false;
473 
474   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
475     // If this instruction follows any data, there is no clear
476     // instruction boundary, inserting a nop/prefix would change semantic.
477     return false;
478 
479   return true;
480 }
481 
482 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
483   if (!OS.getAllowAutoPadding())
484     return false;
485   assert(allowAutoPadding() && "incorrect initialization!");
486 
487   // We only pad in text section.
488   if (!OS.getCurrentSectionOnly()->getKind().isText())
489     return false;
490 
491   // To be Done: Currently don't deal with Bundle cases.
492   if (OS.getAssembler().isBundlingEnabled())
493     return false;
494 
495   // Branches only need to be aligned in 32-bit or 64-bit mode.
496   if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
497     return false;
498 
499   return true;
500 }
501 
502 /// Check if the instruction operand needs to be aligned.
503 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
504   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
505   return (Desc.isConditionalBranch() &&
506           (AlignBranchType & X86::AlignBranchJcc)) ||
507          (Desc.isUnconditionalBranch() &&
508           (AlignBranchType & X86::AlignBranchJmp)) ||
509          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
510          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
511          (Desc.isIndirectBranch() &&
512           (AlignBranchType & X86::AlignBranchIndirect));
513 }
514 
515 /// Insert BoundaryAlignFragment before instructions to align branches.
516 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
517                                          const MCInst &Inst, const MCSubtargetInfo &STI) {
518   CanPadInst = canPadInst(Inst, OS);
519 
520   if (!canPadBranches(OS))
521     return;
522 
523   if (!isMacroFused(PrevInst, Inst))
524     // Macro fusion doesn't happen indeed, clear the pending.
525     PendingBA = nullptr;
526 
527   if (!CanPadInst)
528     return;
529 
530   if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
531     // Macro fusion actually happens and there is no other fragment inserted
532     // after the previous instruction.
533     //
534     // Do nothing here since we already inserted a BoudaryAlign fragment when
535     // we met the first instruction in the fused pair and we'll tie them
536     // together in emitInstructionEnd.
537     //
538     // Note: When there is at least one fragment, such as MCAlignFragment,
539     // inserted after the previous instruction, e.g.
540     //
541     // \code
542     //   cmp %rax %rcx
543     //   .align 16
544     //   je .Label0
545     // \ endcode
546     //
547     // We will treat the JCC as a unfused branch although it may be fused
548     // with the CMP.
549     return;
550   }
551 
552   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
553                           isFirstMacroFusibleInst(Inst, *MCII))) {
554     // If we meet a unfused branch or the first instuction in a fusiable pair,
555     // insert a BoundaryAlign fragment.
556     OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
557   }
558 }
559 
560 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
561 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
562   PrevInst = Inst;
563   MCFragment *CF = OS.getCurrentFragment();
564   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
565   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
566     F->setAllowAutoPadding(CanPadInst);
567 
568   if (!canPadBranches(OS))
569     return;
570 
571   if (!needAlign(Inst) || !PendingBA)
572     return;
573 
574   // Tie the aligned instructions into a a pending BoundaryAlign.
575   PendingBA->setLastFragment(CF);
576   PendingBA = nullptr;
577 
578   // We need to ensure that further data isn't added to the current
579   // DataFragment, so that we can get the size of instructions later in
580   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
581   // DataFragment.
582   if (isa_and_nonnull<MCDataFragment>(CF))
583     OS.insert(new MCDataFragment());
584 
585   // Update the maximum alignment on the current section if necessary.
586   MCSection *Sec = OS.getCurrentSectionOnly();
587   if (AlignBoundary.value() > Sec->getAlignment())
588     Sec->setAlignment(AlignBoundary);
589 }
590 
591 Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
592   if (STI.getTargetTriple().isOSBinFormatELF()) {
593     unsigned Type;
594     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
595       Type = llvm::StringSwitch<unsigned>(Name)
596 #define ELF_RELOC(X, Y) .Case(#X, Y)
597 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
598 #undef ELF_RELOC
599                  .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
600                  .Case("BFD_RELOC_8", ELF::R_X86_64_8)
601                  .Case("BFD_RELOC_16", ELF::R_X86_64_16)
602                  .Case("BFD_RELOC_32", ELF::R_X86_64_32)
603                  .Case("BFD_RELOC_64", ELF::R_X86_64_64)
604                  .Default(-1u);
605     } else {
606       Type = llvm::StringSwitch<unsigned>(Name)
607 #define ELF_RELOC(X, Y) .Case(#X, Y)
608 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
609 #undef ELF_RELOC
610                  .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
611                  .Case("BFD_RELOC_8", ELF::R_386_8)
612                  .Case("BFD_RELOC_16", ELF::R_386_16)
613                  .Case("BFD_RELOC_32", ELF::R_386_32)
614                  .Default(-1u);
615     }
616     if (Type == -1u)
617       return None;
618     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
619   }
620   return MCAsmBackend::getFixupKind(Name);
621 }
622 
623 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
624   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
625       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
626       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
627       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
628       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
629       {"reloc_signed_4byte", 0, 32, 0},
630       {"reloc_signed_4byte_relax", 0, 32, 0},
631       {"reloc_global_offset_table", 0, 32, 0},
632       {"reloc_global_offset_table8", 0, 64, 0},
633       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
634   };
635 
636   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
637   // do not require any extra processing.
638   if (Kind >= FirstLiteralRelocationKind)
639     return MCAsmBackend::getFixupKindInfo(FK_NONE);
640 
641   if (Kind < FirstTargetFixupKind)
642     return MCAsmBackend::getFixupKindInfo(Kind);
643 
644   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
645          "Invalid kind!");
646   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
647   return Infos[Kind - FirstTargetFixupKind];
648 }
649 
650 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
651                                           const MCFixup &Fixup,
652                                           const MCValue &) {
653   return Fixup.getKind() >= FirstLiteralRelocationKind;
654 }
655 
656 static unsigned getFixupKindSize(unsigned Kind) {
657   switch (Kind) {
658   default:
659     llvm_unreachable("invalid fixup kind!");
660   case FK_NONE:
661     return 0;
662   case FK_PCRel_1:
663   case FK_SecRel_1:
664   case FK_Data_1:
665     return 1;
666   case FK_PCRel_2:
667   case FK_SecRel_2:
668   case FK_Data_2:
669     return 2;
670   case FK_PCRel_4:
671   case X86::reloc_riprel_4byte:
672   case X86::reloc_riprel_4byte_relax:
673   case X86::reloc_riprel_4byte_relax_rex:
674   case X86::reloc_riprel_4byte_movq_load:
675   case X86::reloc_signed_4byte:
676   case X86::reloc_signed_4byte_relax:
677   case X86::reloc_global_offset_table:
678   case X86::reloc_branch_4byte_pcrel:
679   case FK_SecRel_4:
680   case FK_Data_4:
681     return 4;
682   case FK_PCRel_8:
683   case FK_SecRel_8:
684   case FK_Data_8:
685   case X86::reloc_global_offset_table8:
686     return 8;
687   }
688 }
689 
690 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
691                                const MCValue &Target,
692                                MutableArrayRef<char> Data,
693                                uint64_t Value, bool IsResolved,
694                                const MCSubtargetInfo *STI) const {
695   unsigned Kind = Fixup.getKind();
696   if (Kind >= FirstLiteralRelocationKind)
697     return;
698   unsigned Size = getFixupKindSize(Kind);
699 
700   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
701 
702   int64_t SignedValue = static_cast<int64_t>(Value);
703   if ((Target.isAbsolute() || IsResolved) &&
704       getFixupKindInfo(Fixup.getKind()).Flags &
705       MCFixupKindInfo::FKF_IsPCRel) {
706     // check that PC relative fixup fits into the fixup size.
707     if (Size > 0 && !isIntN(Size * 8, SignedValue))
708       Asm.getContext().reportError(
709                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
710                                    " is too large for field of " + Twine(Size) +
711                                    ((Size == 1) ? " byte." : " bytes."));
712   } else {
713     // Check that uppper bits are either all zeros or all ones.
714     // Specifically ignore overflow/underflow as long as the leakage is
715     // limited to the lower bits. This is to remain compatible with
716     // other assemblers.
717     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
718            "Value does not fit in the Fixup field");
719   }
720 
721   for (unsigned i = 0; i != Size; ++i)
722     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
723 }
724 
725 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
726                                       const MCSubtargetInfo &STI) const {
727   // Branches can always be relaxed in either mode.
728   if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
729     return true;
730 
731   // Check if this instruction is ever relaxable.
732   if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
733     return false;
734 
735 
736   // Check if the relaxable operand has an expression. For the current set of
737   // relaxable instructions, the relaxable operand is always the last operand.
738   unsigned RelaxableOp = Inst.getNumOperands() - 1;
739   if (Inst.getOperand(RelaxableOp).isExpr())
740     return true;
741 
742   return false;
743 }
744 
745 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
746                                          uint64_t Value,
747                                          const MCRelaxableFragment *DF,
748                                          const MCAsmLayout &Layout) const {
749   // Relax if the value is too big for a (signed) i8.
750   return !isInt<8>(Value);
751 }
752 
753 // FIXME: Can tblgen help at all here to verify there aren't other instructions
754 // we can relax?
755 void X86AsmBackend::relaxInstruction(MCInst &Inst,
756                                      const MCSubtargetInfo &STI) const {
757   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
758   bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
759   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
760 
761   if (RelaxedOp == Inst.getOpcode()) {
762     SmallString<256> Tmp;
763     raw_svector_ostream OS(Tmp);
764     Inst.dump_pretty(OS);
765     OS << "\n";
766     report_fatal_error("unexpected instruction to relax: " + OS.str());
767   }
768 
769   Inst.setOpcode(RelaxedOp);
770 }
771 
772 /// Return true if this instruction has been fully relaxed into it's most
773 /// general available form.
774 static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
775   auto &Inst = RF.getInst();
776   auto &STI = *RF.getSubtargetInfo();
777   bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
778   return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
779 }
780 
781 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
782                                             MCCodeEmitter &Emitter,
783                                             unsigned &RemainingSize) const {
784   if (!RF.getAllowAutoPadding())
785     return false;
786   // If the instruction isn't fully relaxed, shifting it around might require a
787   // larger value for one of the fixups then can be encoded.  The outer loop
788   // will also catch this before moving to the next instruction, but we need to
789   // prevent padding this single instruction as well.
790   if (!isFullyRelaxed(RF))
791     return false;
792 
793   const unsigned OldSize = RF.getContents().size();
794   if (OldSize == 15)
795     return false;
796 
797   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
798   const unsigned RemainingPrefixSize = [&]() -> unsigned {
799     SmallString<15> Code;
800     raw_svector_ostream VecOS(Code);
801     Emitter.emitPrefix(RF.getInst(), VecOS, STI);
802     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
803 
804     // TODO: It turns out we need a decent amount of plumbing for the target
805     // specific bits to determine number of prefixes its safe to add.  Various
806     // targets (older chips mostly, but also Atom family) encounter decoder
807     // stalls with too many prefixes.  For testing purposes, we set the value
808     // externally for the moment.
809     unsigned ExistingPrefixSize = Code.size();
810     if (TargetPrefixMax <= ExistingPrefixSize)
811       return 0;
812     return TargetPrefixMax - ExistingPrefixSize;
813   }();
814   const unsigned PrefixBytesToAdd =
815       std::min(MaxPossiblePad, RemainingPrefixSize);
816   if (PrefixBytesToAdd == 0)
817     return false;
818 
819   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
820 
821   SmallString<256> Code;
822   Code.append(PrefixBytesToAdd, Prefix);
823   Code.append(RF.getContents().begin(), RF.getContents().end());
824   RF.getContents() = Code;
825 
826   // Adjust the fixups for the change in offsets
827   for (auto &F : RF.getFixups()) {
828     F.setOffset(F.getOffset() + PrefixBytesToAdd);
829   }
830 
831   RemainingSize -= PrefixBytesToAdd;
832   return true;
833 }
834 
835 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
836                                                 MCCodeEmitter &Emitter,
837                                                 unsigned &RemainingSize) const {
838   if (isFullyRelaxed(RF))
839     // TODO: There are lots of other tricks we could apply for increasing
840     // encoding size without impacting performance.
841     return false;
842 
843   MCInst Relaxed = RF.getInst();
844   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
845 
846   SmallVector<MCFixup, 4> Fixups;
847   SmallString<15> Code;
848   raw_svector_ostream VecOS(Code);
849   Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
850   const unsigned OldSize = RF.getContents().size();
851   const unsigned NewSize = Code.size();
852   assert(NewSize >= OldSize && "size decrease during relaxation?");
853   unsigned Delta = NewSize - OldSize;
854   if (Delta > RemainingSize)
855     return false;
856   RF.setInst(Relaxed);
857   RF.getContents() = Code;
858   RF.getFixups() = Fixups;
859   RemainingSize -= Delta;
860   return true;
861 }
862 
863 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
864                                            MCCodeEmitter &Emitter,
865                                            unsigned &RemainingSize) const {
866   bool Changed = false;
867   if (RemainingSize != 0)
868     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
869   if (RemainingSize != 0)
870     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
871   return Changed;
872 }
873 
874 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
875                                  MCAsmLayout &Layout) const {
876   // See if we can further relax some instructions to cut down on the number of
877   // nop bytes required for code alignment.  The actual win is in reducing
878   // instruction count, not number of bytes.  Modern X86-64 can easily end up
879   // decode limited.  It is often better to reduce the number of instructions
880   // (i.e. eliminate nops) even at the cost of increasing the size and
881   // complexity of others.
882   if (!X86PadForAlign && !X86PadForBranchAlign)
883     return;
884 
885   // The processed regions are delimitered by LabeledFragments. -g may have more
886   // MCSymbols and therefore different relaxation results. X86PadForAlign is
887   // disabled by default to eliminate the -g vs non -g difference.
888   DenseSet<MCFragment *> LabeledFragments;
889   for (const MCSymbol &S : Asm.symbols())
890     LabeledFragments.insert(S.getFragment(false));
891 
892   for (MCSection &Sec : Asm) {
893     if (!Sec.getKind().isText())
894       continue;
895 
896     SmallVector<MCRelaxableFragment *, 4> Relaxable;
897     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
898       MCFragment &F = *I;
899 
900       if (LabeledFragments.count(&F))
901         Relaxable.clear();
902 
903       if (F.getKind() == MCFragment::FT_Data ||
904           F.getKind() == MCFragment::FT_CompactEncodedInst)
905         // Skip and ignore
906         continue;
907 
908       if (F.getKind() == MCFragment::FT_Relaxable) {
909         auto &RF = cast<MCRelaxableFragment>(*I);
910         Relaxable.push_back(&RF);
911         continue;
912       }
913 
914       auto canHandle = [](MCFragment &F) -> bool {
915         switch (F.getKind()) {
916         default:
917           return false;
918         case MCFragment::FT_Align:
919           return X86PadForAlign;
920         case MCFragment::FT_BoundaryAlign:
921           return X86PadForBranchAlign;
922         }
923       };
924       // For any unhandled kind, assume we can't change layout.
925       if (!canHandle(F)) {
926         Relaxable.clear();
927         continue;
928       }
929 
930 #ifndef NDEBUG
931       const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
932 #endif
933       const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
934 
935       // To keep the effects local, prefer to relax instructions closest to
936       // the align directive.  This is purely about human understandability
937       // of the resulting code.  If we later find a reason to expand
938       // particular instructions over others, we can adjust.
939       MCFragment *FirstChangedFragment = nullptr;
940       unsigned RemainingSize = OrigSize;
941       while (!Relaxable.empty() && RemainingSize != 0) {
942         auto &RF = *Relaxable.pop_back_val();
943         // Give the backend a chance to play any tricks it wishes to increase
944         // the encoding size of the given instruction.  Target independent code
945         // will try further relaxation, but target's may play further tricks.
946         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
947           FirstChangedFragment = &RF;
948 
949         // If we have an instruction which hasn't been fully relaxed, we can't
950         // skip past it and insert bytes before it.  Changing its starting
951         // offset might require a larger negative offset than it can encode.
952         // We don't need to worry about larger positive offsets as none of the
953         // possible offsets between this and our align are visible, and the
954         // ones afterwards aren't changing.
955         if (!isFullyRelaxed(RF))
956           break;
957       }
958       Relaxable.clear();
959 
960       if (FirstChangedFragment) {
961         // Make sure the offsets for any fragments in the effected range get
962         // updated.  Note that this (conservatively) invalidates the offsets of
963         // those following, but this is not required.
964         Layout.invalidateFragmentsFrom(FirstChangedFragment);
965       }
966 
967       // BoundaryAlign explicitly tracks it's size (unlike align)
968       if (F.getKind() == MCFragment::FT_BoundaryAlign)
969         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
970 
971 #ifndef NDEBUG
972       const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
973       const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
974       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
975              "can't move start of next fragment!");
976       assert(FinalSize == RemainingSize && "inconsistent size computation?");
977 #endif
978 
979       // If we're looking at a boundary align, make sure we don't try to pad
980       // its target instructions for some following directive.  Doing so would
981       // break the alignment of the current boundary align.
982       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
983         const MCFragment *LastFragment = BF->getLastFragment();
984         if (!LastFragment)
985           continue;
986         while (&*I != LastFragment)
987           ++I;
988       }
989     }
990   }
991 
992   // The layout is done. Mark every fragment as valid.
993   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
994     MCSection &Section = *Layout.getSectionOrder()[i];
995     Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
996     Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
997   }
998 }
999 
1000 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
1001   if (STI.hasFeature(X86::Is16Bit))
1002     return 4;
1003   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
1004     return 1;
1005   if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
1006     return 7;
1007   if (STI.getFeatureBits()[X86::TuningFast15ByteNOP])
1008     return 15;
1009   if (STI.getFeatureBits()[X86::TuningFast11ByteNOP])
1010     return 11;
1011   // FIXME: handle 32-bit mode
1012   // 15-bytes is the longest single NOP instruction, but 10-bytes is
1013   // commonly the longest that can be efficiently decoded.
1014   return 10;
1015 }
1016 
1017 /// Write a sequence of optimal nops to the output, covering \p Count
1018 /// bytes.
1019 /// \return - true on success, false on failure
1020 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
1021                                  const MCSubtargetInfo *STI) const {
1022   static const char Nops32Bit[10][11] = {
1023       // nop
1024       "\x90",
1025       // xchg %ax,%ax
1026       "\x66\x90",
1027       // nopl (%[re]ax)
1028       "\x0f\x1f\x00",
1029       // nopl 0(%[re]ax)
1030       "\x0f\x1f\x40\x00",
1031       // nopl 0(%[re]ax,%[re]ax,1)
1032       "\x0f\x1f\x44\x00\x00",
1033       // nopw 0(%[re]ax,%[re]ax,1)
1034       "\x66\x0f\x1f\x44\x00\x00",
1035       // nopl 0L(%[re]ax)
1036       "\x0f\x1f\x80\x00\x00\x00\x00",
1037       // nopl 0L(%[re]ax,%[re]ax,1)
1038       "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1039       // nopw 0L(%[re]ax,%[re]ax,1)
1040       "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1041       // nopw %cs:0L(%[re]ax,%[re]ax,1)
1042       "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1043   };
1044 
1045   // 16-bit mode uses different nop patterns than 32-bit.
1046   static const char Nops16Bit[4][11] = {
1047       // nop
1048       "\x90",
1049       // xchg %eax,%eax
1050       "\x66\x90",
1051       // lea 0(%si),%si
1052       "\x8d\x74\x00",
1053       // lea 0w(%si),%si
1054       "\x8d\xb4\x00\x00",
1055   };
1056 
1057   const char(*Nops)[11] =
1058       STI->getFeatureBits()[X86::Is16Bit] ? Nops16Bit : Nops32Bit;
1059 
1060   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1061 
1062   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1063   // length.
1064   do {
1065     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1066     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1067     for (uint8_t i = 0; i < Prefixes; i++)
1068       OS << '\x66';
1069     const uint8_t Rest = ThisNopLength - Prefixes;
1070     if (Rest != 0)
1071       OS.write(Nops[Rest - 1], Rest);
1072     Count -= ThisNopLength;
1073   } while (Count != 0);
1074 
1075   return true;
1076 }
1077 
1078 /* *** */
1079 
1080 namespace {
1081 
1082 class ELFX86AsmBackend : public X86AsmBackend {
1083 public:
1084   uint8_t OSABI;
1085   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1086       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1087 };
1088 
1089 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1090 public:
1091   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1092                       const MCSubtargetInfo &STI)
1093     : ELFX86AsmBackend(T, OSABI, STI) {}
1094 
1095   std::unique_ptr<MCObjectTargetWriter>
1096   createObjectTargetWriter() const override {
1097     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1098   }
1099 };
1100 
1101 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1102 public:
1103   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1104                        const MCSubtargetInfo &STI)
1105       : ELFX86AsmBackend(T, OSABI, STI) {}
1106 
1107   std::unique_ptr<MCObjectTargetWriter>
1108   createObjectTargetWriter() const override {
1109     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1110                                     ELF::EM_X86_64);
1111   }
1112 };
1113 
1114 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1115 public:
1116   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1117                          const MCSubtargetInfo &STI)
1118       : ELFX86AsmBackend(T, OSABI, STI) {}
1119 
1120   std::unique_ptr<MCObjectTargetWriter>
1121   createObjectTargetWriter() const override {
1122     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1123                                     ELF::EM_IAMCU);
1124   }
1125 };
1126 
1127 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1128 public:
1129   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1130                       const MCSubtargetInfo &STI)
1131     : ELFX86AsmBackend(T, OSABI, STI) {}
1132 
1133   std::unique_ptr<MCObjectTargetWriter>
1134   createObjectTargetWriter() const override {
1135     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1136   }
1137 };
1138 
1139 class WindowsX86AsmBackend : public X86AsmBackend {
1140   bool Is64Bit;
1141 
1142 public:
1143   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1144                        const MCSubtargetInfo &STI)
1145     : X86AsmBackend(T, STI)
1146     , Is64Bit(is64Bit) {
1147   }
1148 
1149   Optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1150     return StringSwitch<Optional<MCFixupKind>>(Name)
1151         .Case("dir32", FK_Data_4)
1152         .Case("secrel32", FK_SecRel_4)
1153         .Case("secidx", FK_SecRel_2)
1154         .Default(MCAsmBackend::getFixupKind(Name));
1155   }
1156 
1157   std::unique_ptr<MCObjectTargetWriter>
1158   createObjectTargetWriter() const override {
1159     return createX86WinCOFFObjectWriter(Is64Bit);
1160   }
1161 };
1162 
1163 namespace CU {
1164 
1165   /// Compact unwind encoding values.
1166   enum CompactUnwindEncodings {
1167     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1168     /// the return address, then [RE]SP is moved to [RE]BP.
1169     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1170 
1171     /// A frameless function with a small constant stack size.
1172     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1173 
1174     /// A frameless function with a large constant stack size.
1175     UNWIND_MODE_STACK_IND                  = 0x03000000,
1176 
1177     /// No compact unwind encoding is available.
1178     UNWIND_MODE_DWARF                      = 0x04000000,
1179 
1180     /// Mask for encoding the frame registers.
1181     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1182 
1183     /// Mask for encoding the frameless registers.
1184     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1185   };
1186 
1187 } // namespace CU
1188 
1189 class DarwinX86AsmBackend : public X86AsmBackend {
1190   const MCRegisterInfo &MRI;
1191 
1192   /// Number of registers that can be saved in a compact unwind encoding.
1193   enum { CU_NUM_SAVED_REGS = 6 };
1194 
1195   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1196   Triple TT;
1197   bool Is64Bit;
1198 
1199   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1200   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1201   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1202 protected:
1203   /// Size of a "push" instruction for the given register.
1204   unsigned PushInstrSize(unsigned Reg) const {
1205     switch (Reg) {
1206       case X86::EBX:
1207       case X86::ECX:
1208       case X86::EDX:
1209       case X86::EDI:
1210       case X86::ESI:
1211       case X86::EBP:
1212       case X86::RBX:
1213       case X86::RBP:
1214         return 1;
1215       case X86::R12:
1216       case X86::R13:
1217       case X86::R14:
1218       case X86::R15:
1219         return 2;
1220     }
1221     return 1;
1222   }
1223 
1224 private:
1225   /// Get the compact unwind number for a given register. The number
1226   /// corresponds to the enum lists in compact_unwind_encoding.h.
1227   int getCompactUnwindRegNum(unsigned Reg) const {
1228     static const MCPhysReg CU32BitRegs[7] = {
1229       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1230     };
1231     static const MCPhysReg CU64BitRegs[] = {
1232       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1233     };
1234     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1235     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1236       if (*CURegs == Reg)
1237         return Idx;
1238 
1239     return -1;
1240   }
1241 
1242   /// Return the registers encoded for a compact encoding with a frame
1243   /// pointer.
1244   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1245     // Encode the registers in the order they were saved --- 3-bits per
1246     // register. The list of saved registers is assumed to be in reverse
1247     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1248     uint32_t RegEnc = 0;
1249     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1250       unsigned Reg = SavedRegs[i];
1251       if (Reg == 0) break;
1252 
1253       int CURegNum = getCompactUnwindRegNum(Reg);
1254       if (CURegNum == -1) return ~0U;
1255 
1256       // Encode the 3-bit register number in order, skipping over 3-bits for
1257       // each register.
1258       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1259     }
1260 
1261     assert((RegEnc & 0x3FFFF) == RegEnc &&
1262            "Invalid compact register encoding!");
1263     return RegEnc;
1264   }
1265 
1266   /// Create the permutation encoding used with frameless stacks. It is
1267   /// passed the number of registers to be saved and an array of the registers
1268   /// saved.
1269   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1270     // The saved registers are numbered from 1 to 6. In order to encode the
1271     // order in which they were saved, we re-number them according to their
1272     // place in the register order. The re-numbering is relative to the last
1273     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1274     // that order:
1275     //
1276     //    Orig  Re-Num
1277     //    ----  ------
1278     //     6       6
1279     //     2       2
1280     //     4       3
1281     //     5       3
1282     //
1283     for (unsigned i = 0; i < RegCount; ++i) {
1284       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1285       if (CUReg == -1) return ~0U;
1286       SavedRegs[i] = CUReg;
1287     }
1288 
1289     // Reverse the list.
1290     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1291 
1292     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1293     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1294       unsigned Countless = 0;
1295       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1296         if (SavedRegs[j] < SavedRegs[i])
1297           ++Countless;
1298 
1299       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1300     }
1301 
1302     // Take the renumbered values and encode them into a 10-bit number.
1303     uint32_t permutationEncoding = 0;
1304     switch (RegCount) {
1305     case 6:
1306       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1307                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1308                              +     RenumRegs[4];
1309       break;
1310     case 5:
1311       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1312                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1313                              +     RenumRegs[5];
1314       break;
1315     case 4:
1316       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1317                              + 3 * RenumRegs[4] +      RenumRegs[5];
1318       break;
1319     case 3:
1320       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1321                              +     RenumRegs[5];
1322       break;
1323     case 2:
1324       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1325       break;
1326     case 1:
1327       permutationEncoding |=       RenumRegs[5];
1328       break;
1329     }
1330 
1331     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1332            "Invalid compact register encoding!");
1333     return permutationEncoding;
1334   }
1335 
1336 public:
1337   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1338                       const MCSubtargetInfo &STI)
1339       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1340         Is64Bit(TT.isArch64Bit()) {
1341     memset(SavedRegs, 0, sizeof(SavedRegs));
1342     OffsetSize = Is64Bit ? 8 : 4;
1343     MoveInstrSize = Is64Bit ? 3 : 2;
1344     StackDivide = Is64Bit ? 8 : 4;
1345   }
1346 
1347   std::unique_ptr<MCObjectTargetWriter>
1348   createObjectTargetWriter() const override {
1349     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1350     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1351     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1352   }
1353 
1354   /// Implementation of algorithm to generate the compact unwind encoding
1355   /// for the CFI instructions.
1356   uint32_t
1357   generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1358     if (Instrs.empty()) return 0;
1359 
1360     // Reset the saved registers.
1361     unsigned SavedRegIdx = 0;
1362     memset(SavedRegs, 0, sizeof(SavedRegs));
1363 
1364     bool HasFP = false;
1365 
1366     // Encode that we are using EBP/RBP as the frame pointer.
1367     uint32_t CompactUnwindEncoding = 0;
1368 
1369     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1370     unsigned InstrOffset = 0;
1371     unsigned StackAdjust = 0;
1372     unsigned StackSize = 0;
1373     int MinAbsOffset = std::numeric_limits<int>::max();
1374 
1375     for (const MCCFIInstruction &Inst : Instrs) {
1376       switch (Inst.getOperation()) {
1377       default:
1378         // Any other CFI directives indicate a frame that we aren't prepared
1379         // to represent via compact unwind, so just bail out.
1380         return CU::UNWIND_MODE_DWARF;
1381       case MCCFIInstruction::OpDefCfaRegister: {
1382         // Defines a frame pointer. E.g.
1383         //
1384         //     movq %rsp, %rbp
1385         //  L0:
1386         //     .cfi_def_cfa_register %rbp
1387         //
1388         HasFP = true;
1389 
1390         // If the frame pointer is other than esp/rsp, we do not have a way to
1391         // generate a compact unwinding representation, so bail out.
1392         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1393             (Is64Bit ? X86::RBP : X86::EBP))
1394           return CU::UNWIND_MODE_DWARF;
1395 
1396         // Reset the counts.
1397         memset(SavedRegs, 0, sizeof(SavedRegs));
1398         StackAdjust = 0;
1399         SavedRegIdx = 0;
1400         MinAbsOffset = std::numeric_limits<int>::max();
1401         InstrOffset += MoveInstrSize;
1402         break;
1403       }
1404       case MCCFIInstruction::OpDefCfaOffset: {
1405         // Defines a new offset for the CFA. E.g.
1406         //
1407         //  With frame:
1408         //
1409         //     pushq %rbp
1410         //  L0:
1411         //     .cfi_def_cfa_offset 16
1412         //
1413         //  Without frame:
1414         //
1415         //     subq $72, %rsp
1416         //  L0:
1417         //     .cfi_def_cfa_offset 80
1418         //
1419         StackSize = Inst.getOffset() / StackDivide;
1420         break;
1421       }
1422       case MCCFIInstruction::OpOffset: {
1423         // Defines a "push" of a callee-saved register. E.g.
1424         //
1425         //     pushq %r15
1426         //     pushq %r14
1427         //     pushq %rbx
1428         //  L0:
1429         //     subq $120, %rsp
1430         //  L1:
1431         //     .cfi_offset %rbx, -40
1432         //     .cfi_offset %r14, -32
1433         //     .cfi_offset %r15, -24
1434         //
1435         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1436           // If there are too many saved registers, we cannot use a compact
1437           // unwind encoding.
1438           return CU::UNWIND_MODE_DWARF;
1439 
1440         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1441         SavedRegs[SavedRegIdx++] = Reg;
1442         StackAdjust += OffsetSize;
1443         MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1444         InstrOffset += PushInstrSize(Reg);
1445         break;
1446       }
1447       }
1448     }
1449 
1450     StackAdjust /= StackDivide;
1451 
1452     if (HasFP) {
1453       if ((StackAdjust & 0xFF) != StackAdjust)
1454         // Offset was too big for a compact unwind encoding.
1455         return CU::UNWIND_MODE_DWARF;
1456 
1457       // We don't attempt to track a real StackAdjust, so if the saved registers
1458       // aren't adjacent to rbp we can't cope.
1459       if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1460         return CU::UNWIND_MODE_DWARF;
1461 
1462       // Get the encoding of the saved registers when we have a frame pointer.
1463       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1464       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1465 
1466       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1467       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1468       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1469     } else {
1470       SubtractInstrIdx += InstrOffset;
1471       ++StackAdjust;
1472 
1473       if ((StackSize & 0xFF) == StackSize) {
1474         // Frameless stack with a small stack size.
1475         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1476 
1477         // Encode the stack size.
1478         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1479       } else {
1480         if ((StackAdjust & 0x7) != StackAdjust)
1481           // The extra stack adjustments are too big for us to handle.
1482           return CU::UNWIND_MODE_DWARF;
1483 
1484         // Frameless stack with an offset too large for us to encode compactly.
1485         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1486 
1487         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1488         // instruction.
1489         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1490 
1491         // Encode any extra stack adjustments (done via push instructions).
1492         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1493       }
1494 
1495       // Encode the number of registers saved. (Reverse the list first.)
1496       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1497       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1498 
1499       // Get the encoding of the saved registers when we don't have a frame
1500       // pointer.
1501       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1502       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1503 
1504       // Encode the register encoding.
1505       CompactUnwindEncoding |=
1506         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1507     }
1508 
1509     return CompactUnwindEncoding;
1510   }
1511 };
1512 
1513 } // end anonymous namespace
1514 
1515 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1516                                            const MCSubtargetInfo &STI,
1517                                            const MCRegisterInfo &MRI,
1518                                            const MCTargetOptions &Options) {
1519   const Triple &TheTriple = STI.getTargetTriple();
1520   if (TheTriple.isOSBinFormatMachO())
1521     return new DarwinX86AsmBackend(T, MRI, STI);
1522 
1523   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1524     return new WindowsX86AsmBackend(T, false, STI);
1525 
1526   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1527 
1528   if (TheTriple.isOSIAMCU())
1529     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1530 
1531   return new ELFX86_32AsmBackend(T, OSABI, STI);
1532 }
1533 
1534 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1535                                            const MCSubtargetInfo &STI,
1536                                            const MCRegisterInfo &MRI,
1537                                            const MCTargetOptions &Options) {
1538   const Triple &TheTriple = STI.getTargetTriple();
1539   if (TheTriple.isOSBinFormatMachO())
1540     return new DarwinX86AsmBackend(T, MRI, STI);
1541 
1542   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1543     return new WindowsX86AsmBackend(T, true, STI);
1544 
1545   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1546 
1547   if (TheTriple.isX32())
1548     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1549   return new ELFX86_64AsmBackend(T, OSABI, STI);
1550 }
1551