xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (revision 7ef62cebc2f965b0f640263e179276928885e33d)
1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "MCTargetDesc/X86InstrRelaxTables.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAsmLayout.h"
17 #include "llvm/MC/MCAssembler.h"
18 #include "llvm/MC/MCCodeEmitter.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDwarf.h"
21 #include "llvm/MC/MCELFObjectWriter.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 namespace {
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind {
43 private:
44   uint8_t AlignBranchKind = 0;
45 
46 public:
47   void operator=(const std::string &Val) {
48     if (Val.empty())
49       return;
50     SmallVector<StringRef, 6> BranchTypes;
51     StringRef(Val).split(BranchTypes, '+', -1, false);
52     for (auto BranchType : BranchTypes) {
53       if (BranchType == "fused")
54         addKind(X86::AlignBranchFused);
55       else if (BranchType == "jcc")
56         addKind(X86::AlignBranchJcc);
57       else if (BranchType == "jmp")
58         addKind(X86::AlignBranchJmp);
59       else if (BranchType == "call")
60         addKind(X86::AlignBranchCall);
61       else if (BranchType == "ret")
62         addKind(X86::AlignBranchRet);
63       else if (BranchType == "indirect")
64         addKind(X86::AlignBranchIndirect);
65       else {
66         errs() << "invalid argument " << BranchType.str()
67                << " to -x86-align-branch=; each element must be one of: fused, "
68                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
69       }
70     }
71   }
72 
73   operator uint8_t() const { return AlignBranchKind; }
74   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75 };
76 
77 X86AlignBranchKind X86AlignBranchKindLoc;
78 
79 cl::opt<unsigned> X86AlignBranchBoundary(
80     "x86-align-branch-boundary", cl::init(0),
81     cl::desc(
82         "Control how the assembler should align branches with NOP. If the "
83         "boundary's size is not 0, it should be a power of 2 and no less "
84         "than 32. Branches will be aligned to prevent from being across or "
85         "against the boundary of specified size. The default value 0 does not "
86         "align branches."));
87 
88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89     "x86-align-branch",
90     cl::desc(
91         "Specify types of branches to align (plus separated list of types):"
92              "\njcc      indicates conditional jumps"
93              "\nfused    indicates fused conditional jumps"
94              "\njmp      indicates direct unconditional jumps"
95              "\ncall     indicates direct and indirect calls"
96              "\nret      indicates rets"
97              "\nindirect indicates indirect unconditional jumps"),
98     cl::location(X86AlignBranchKindLoc));
99 
100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101     "x86-branches-within-32B-boundaries", cl::init(false),
102     cl::desc(
103         "Align selected instructions to mitigate negative performance impact "
104         "of Intel's micro code update for errata skx102.  May break "
105         "assumptions about labels corresponding to particular instructions, "
106         "and should be used with caution."));
107 
108 cl::opt<unsigned> X86PadMaxPrefixSize(
109     "x86-pad-max-prefix-size", cl::init(0),
110     cl::desc("Maximum number of prefixes to use for padding"));
111 
112 cl::opt<bool> X86PadForAlign(
113     "x86-pad-for-align", cl::init(false), cl::Hidden,
114     cl::desc("Pad previous instructions to implement align directives"));
115 
116 cl::opt<bool> X86PadForBranchAlign(
117     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118     cl::desc("Pad previous instructions to implement branch alignment"));
119 
120 class X86AsmBackend : public MCAsmBackend {
121   const MCSubtargetInfo &STI;
122   std::unique_ptr<const MCInstrInfo> MCII;
123   X86AlignBranchKind AlignBranchType;
124   Align AlignBoundary;
125   unsigned TargetPrefixMax = 0;
126 
127   MCInst PrevInst;
128   MCBoundaryAlignFragment *PendingBA = nullptr;
129   std::pair<MCFragment *, size_t> PrevInstPosition;
130   bool CanPadInst;
131 
132   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134   bool needAlign(const MCInst &Inst) const;
135   bool canPadBranches(MCObjectStreamer &OS) const;
136   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
137 
138 public:
139   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140       : MCAsmBackend(support::little), STI(STI),
141         MCII(T.createMCInstrInfo()) {
142     if (X86AlignBranchWithin32BBoundaries) {
143       // At the moment, this defaults to aligning fused branches, unconditional
144       // jumps, and (unfused) conditional jumps with nops.  Both the
145       // instructions aligned and the alignment method (nop vs prefix) may
146       // change in the future.
147       AlignBoundary = assumeAligned(32);;
148       AlignBranchType.addKind(X86::AlignBranchFused);
149       AlignBranchType.addKind(X86::AlignBranchJcc);
150       AlignBranchType.addKind(X86::AlignBranchJmp);
151     }
152     // Allow overriding defaults set by main flag
153     if (X86AlignBranchBoundary.getNumOccurrences())
154       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
155     if (X86AlignBranch.getNumOccurrences())
156       AlignBranchType = X86AlignBranchKindLoc;
157     if (X86PadMaxPrefixSize.getNumOccurrences())
158       TargetPrefixMax = X86PadMaxPrefixSize;
159   }
160 
161   bool allowAutoPadding() const override;
162   bool allowEnhancedRelaxation() const override;
163   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164                             const MCSubtargetInfo &STI) override;
165   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
166 
167   unsigned getNumFixupKinds() const override {
168     return X86::NumTargetFixupKinds;
169   }
170 
171   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
172 
173   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
174 
175   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
176                              const MCValue &Target) override;
177 
178   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
179                   const MCValue &Target, MutableArrayRef<char> Data,
180                   uint64_t Value, bool IsResolved,
181                   const MCSubtargetInfo *STI) const override;
182 
183   bool mayNeedRelaxation(const MCInst &Inst,
184                          const MCSubtargetInfo &STI) const override;
185 
186   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
187                             const MCRelaxableFragment *DF,
188                             const MCAsmLayout &Layout) const override;
189 
190   void relaxInstruction(MCInst &Inst,
191                         const MCSubtargetInfo &STI) const override;
192 
193   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
194                                    MCCodeEmitter &Emitter,
195                                    unsigned &RemainingSize) const;
196 
197   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
198                                unsigned &RemainingSize) const;
199 
200   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
201                               unsigned &RemainingSize) const;
202 
203   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
204 
205   unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
206 
207   bool writeNopData(raw_ostream &OS, uint64_t Count,
208                     const MCSubtargetInfo *STI) const override;
209 };
210 } // end anonymous namespace
211 
212 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
213   unsigned Op = Inst.getOpcode();
214   switch (Op) {
215   default:
216     return Op;
217   case X86::JCC_1:
218     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
219   case X86::JMP_1:
220     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
221   }
222 }
223 
224 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
225   unsigned Op = Inst.getOpcode();
226   return X86::getRelaxedOpcodeArith(Op);
227 }
228 
229 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
230   unsigned R = getRelaxedOpcodeArith(Inst);
231   if (R != Inst.getOpcode())
232     return R;
233   return getRelaxedOpcodeBranch(Inst, Is16BitMode);
234 }
235 
236 static X86::CondCode getCondFromBranch(const MCInst &MI,
237                                        const MCInstrInfo &MCII) {
238   unsigned Opcode = MI.getOpcode();
239   switch (Opcode) {
240   default:
241     return X86::COND_INVALID;
242   case X86::JCC_1: {
243     const MCInstrDesc &Desc = MCII.get(Opcode);
244     return static_cast<X86::CondCode>(
245         MI.getOperand(Desc.getNumOperands() - 1).getImm());
246   }
247   }
248 }
249 
250 static X86::SecondMacroFusionInstKind
251 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
252   X86::CondCode CC = getCondFromBranch(MI, MCII);
253   return classifySecondCondCodeInMacroFusion(CC);
254 }
255 
256 /// Check if the instruction uses RIP relative addressing.
257 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
258   unsigned Opcode = MI.getOpcode();
259   const MCInstrDesc &Desc = MCII.get(Opcode);
260   uint64_t TSFlags = Desc.TSFlags;
261   unsigned CurOp = X86II::getOperandBias(Desc);
262   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
263   if (MemoryOperand < 0)
264     return false;
265   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
266   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
267   return (BaseReg == X86::RIP);
268 }
269 
270 /// Check if the instruction is a prefix.
271 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
272   return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
273 }
274 
275 /// Check if the instruction is valid as the first instruction in macro fusion.
276 static bool isFirstMacroFusibleInst(const MCInst &Inst,
277                                     const MCInstrInfo &MCII) {
278   // An Intel instruction with RIP relative addressing is not macro fusible.
279   if (isRIPRelative(Inst, MCII))
280     return false;
281   X86::FirstMacroFusionInstKind FIK =
282       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
283   return FIK != X86::FirstMacroFusionInstKind::Invalid;
284 }
285 
286 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
287 /// get a better peformance in some cases. Here, we determine which prefix is
288 /// the most suitable.
289 ///
290 /// If the instruction has a segment override prefix, use the existing one.
291 /// If the target is 64-bit, use the CS.
292 /// If the target is 32-bit,
293 ///   - If the instruction has a ESP/EBP base register, use SS.
294 ///   - Otherwise use DS.
295 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
296   assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
297          "Prefixes can be added only in 32-bit or 64-bit mode.");
298   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
299   uint64_t TSFlags = Desc.TSFlags;
300 
301   // Determine where the memory operand starts, if present.
302   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
303   if (MemoryOperand != -1)
304     MemoryOperand += X86II::getOperandBias(Desc);
305 
306   unsigned SegmentReg = 0;
307   if (MemoryOperand >= 0) {
308     // Check for explicit segment override on memory operand.
309     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
310   }
311 
312   switch (TSFlags & X86II::FormMask) {
313   default:
314     break;
315   case X86II::RawFrmDstSrc: {
316     // Check segment override opcode prefix as needed (not for %ds).
317     if (Inst.getOperand(2).getReg() != X86::DS)
318       SegmentReg = Inst.getOperand(2).getReg();
319     break;
320   }
321   case X86II::RawFrmSrc: {
322     // Check segment override opcode prefix as needed (not for %ds).
323     if (Inst.getOperand(1).getReg() != X86::DS)
324       SegmentReg = Inst.getOperand(1).getReg();
325     break;
326   }
327   case X86II::RawFrmMemOffs: {
328     // Check segment override opcode prefix as needed.
329     SegmentReg = Inst.getOperand(1).getReg();
330     break;
331   }
332   }
333 
334   if (SegmentReg != 0)
335     return X86::getSegmentOverridePrefixForReg(SegmentReg);
336 
337   if (STI.hasFeature(X86::Is64Bit))
338     return X86::CS_Encoding;
339 
340   if (MemoryOperand >= 0) {
341     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
342     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
343     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
344       return X86::SS_Encoding;
345   }
346   return X86::DS_Encoding;
347 }
348 
349 /// Check if the two instructions will be macro-fused on the target cpu.
350 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
351   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
352   if (!InstDesc.isConditionalBranch())
353     return false;
354   if (!isFirstMacroFusibleInst(Cmp, *MCII))
355     return false;
356   const X86::FirstMacroFusionInstKind CmpKind =
357       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
358   const X86::SecondMacroFusionInstKind BranchKind =
359       classifySecondInstInMacroFusion(Jcc, *MCII);
360   return X86::isMacroFused(CmpKind, BranchKind);
361 }
362 
363 /// Check if the instruction has a variant symbol operand.
364 static bool hasVariantSymbol(const MCInst &MI) {
365   for (auto &Operand : MI) {
366     if (!Operand.isExpr())
367       continue;
368     const MCExpr &Expr = *Operand.getExpr();
369     if (Expr.getKind() == MCExpr::SymbolRef &&
370         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
371       return true;
372   }
373   return false;
374 }
375 
376 bool X86AsmBackend::allowAutoPadding() const {
377   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
378 }
379 
380 bool X86AsmBackend::allowEnhancedRelaxation() const {
381   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
382 }
383 
384 /// X86 has certain instructions which enable interrupts exactly one
385 /// instruction *after* the instruction which stores to SS.  Return true if the
386 /// given instruction has such an interrupt delay slot.
387 static bool hasInterruptDelaySlot(const MCInst &Inst) {
388   switch (Inst.getOpcode()) {
389   case X86::POPSS16:
390   case X86::POPSS32:
391   case X86::STI:
392     return true;
393 
394   case X86::MOV16sr:
395   case X86::MOV32sr:
396   case X86::MOV64sr:
397   case X86::MOV16sm:
398     if (Inst.getOperand(0).getReg() == X86::SS)
399       return true;
400     break;
401   }
402   return false;
403 }
404 
405 /// Check if the instruction to be emitted is right after any data.
406 static bool
407 isRightAfterData(MCFragment *CurrentFragment,
408                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
409   MCFragment *F = CurrentFragment;
410   // Empty data fragments may be created to prevent further data being
411   // added into the previous fragment, we need to skip them since they
412   // have no contents.
413   for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
414     if (cast<MCDataFragment>(F)->getContents().size() != 0)
415       break;
416 
417   // Since data is always emitted into a DataFragment, our check strategy is
418   // simple here.
419   //   - If the fragment is a DataFragment
420   //     - If it's not the fragment where the previous instruction is,
421   //       returns true.
422   //     - If it's the fragment holding the previous instruction but its
423   //       size changed since the the previous instruction was emitted into
424   //       it, returns true.
425   //     - Otherwise returns false.
426   //   - If the fragment is not a DataFragment, returns false.
427   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
428     return DF != PrevInstPosition.first ||
429            DF->getContents().size() != PrevInstPosition.second;
430 
431   return false;
432 }
433 
434 /// \returns the fragment size if it has instructions, otherwise returns 0.
435 static size_t getSizeForInstFragment(const MCFragment *F) {
436   if (!F || !F->hasInstructions())
437     return 0;
438   // MCEncodedFragmentWithContents being templated makes this tricky.
439   switch (F->getKind()) {
440   default:
441     llvm_unreachable("Unknown fragment with instructions!");
442   case MCFragment::FT_Data:
443     return cast<MCDataFragment>(*F).getContents().size();
444   case MCFragment::FT_Relaxable:
445     return cast<MCRelaxableFragment>(*F).getContents().size();
446   case MCFragment::FT_CompactEncodedInst:
447     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
448   }
449 }
450 
451 /// Return true if we can insert NOP or prefixes automatically before the
452 /// the instruction to be emitted.
453 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
454   if (hasVariantSymbol(Inst))
455     // Linker may rewrite the instruction with variant symbol operand(e.g.
456     // TLSCALL).
457     return false;
458 
459   if (hasInterruptDelaySlot(PrevInst))
460     // If this instruction follows an interrupt enabling instruction with a one
461     // instruction delay, inserting a nop would change behavior.
462     return false;
463 
464   if (isPrefix(PrevInst, *MCII))
465     // If this instruction follows a prefix, inserting a nop/prefix would change
466     // semantic.
467     return false;
468 
469   if (isPrefix(Inst, *MCII))
470     // If this instruction is a prefix, inserting a prefix would change
471     // semantic.
472     return false;
473 
474   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
475     // If this instruction follows any data, there is no clear
476     // instruction boundary, inserting a nop/prefix would change semantic.
477     return false;
478 
479   return true;
480 }
481 
482 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
483   if (!OS.getAllowAutoPadding())
484     return false;
485   assert(allowAutoPadding() && "incorrect initialization!");
486 
487   // We only pad in text section.
488   if (!OS.getCurrentSectionOnly()->getKind().isText())
489     return false;
490 
491   // To be Done: Currently don't deal with Bundle cases.
492   if (OS.getAssembler().isBundlingEnabled())
493     return false;
494 
495   // Branches only need to be aligned in 32-bit or 64-bit mode.
496   if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
497     return false;
498 
499   return true;
500 }
501 
502 /// Check if the instruction operand needs to be aligned.
503 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
504   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
505   return (Desc.isConditionalBranch() &&
506           (AlignBranchType & X86::AlignBranchJcc)) ||
507          (Desc.isUnconditionalBranch() &&
508           (AlignBranchType & X86::AlignBranchJmp)) ||
509          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
510          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
511          (Desc.isIndirectBranch() &&
512           (AlignBranchType & X86::AlignBranchIndirect));
513 }
514 
515 /// Insert BoundaryAlignFragment before instructions to align branches.
516 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
517                                          const MCInst &Inst, const MCSubtargetInfo &STI) {
518   CanPadInst = canPadInst(Inst, OS);
519 
520   if (!canPadBranches(OS))
521     return;
522 
523   if (!isMacroFused(PrevInst, Inst))
524     // Macro fusion doesn't happen indeed, clear the pending.
525     PendingBA = nullptr;
526 
527   if (!CanPadInst)
528     return;
529 
530   if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
531     // Macro fusion actually happens and there is no other fragment inserted
532     // after the previous instruction.
533     //
534     // Do nothing here since we already inserted a BoudaryAlign fragment when
535     // we met the first instruction in the fused pair and we'll tie them
536     // together in emitInstructionEnd.
537     //
538     // Note: When there is at least one fragment, such as MCAlignFragment,
539     // inserted after the previous instruction, e.g.
540     //
541     // \code
542     //   cmp %rax %rcx
543     //   .align 16
544     //   je .Label0
545     // \ endcode
546     //
547     // We will treat the JCC as a unfused branch although it may be fused
548     // with the CMP.
549     return;
550   }
551 
552   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
553                           isFirstMacroFusibleInst(Inst, *MCII))) {
554     // If we meet a unfused branch or the first instuction in a fusiable pair,
555     // insert a BoundaryAlign fragment.
556     OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
557   }
558 }
559 
560 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
561 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
562   PrevInst = Inst;
563   MCFragment *CF = OS.getCurrentFragment();
564   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
565   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
566     F->setAllowAutoPadding(CanPadInst);
567 
568   if (!canPadBranches(OS))
569     return;
570 
571   if (!needAlign(Inst) || !PendingBA)
572     return;
573 
574   // Tie the aligned instructions into a a pending BoundaryAlign.
575   PendingBA->setLastFragment(CF);
576   PendingBA = nullptr;
577 
578   // We need to ensure that further data isn't added to the current
579   // DataFragment, so that we can get the size of instructions later in
580   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
581   // DataFragment.
582   if (isa_and_nonnull<MCDataFragment>(CF))
583     OS.insert(new MCDataFragment());
584 
585   // Update the maximum alignment on the current section if necessary.
586   MCSection *Sec = OS.getCurrentSectionOnly();
587   Sec->ensureMinAlignment(AlignBoundary);
588 }
589 
590 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
591   if (STI.getTargetTriple().isOSBinFormatELF()) {
592     unsigned Type;
593     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
594       Type = llvm::StringSwitch<unsigned>(Name)
595 #define ELF_RELOC(X, Y) .Case(#X, Y)
596 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
597 #undef ELF_RELOC
598                  .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
599                  .Case("BFD_RELOC_8", ELF::R_X86_64_8)
600                  .Case("BFD_RELOC_16", ELF::R_X86_64_16)
601                  .Case("BFD_RELOC_32", ELF::R_X86_64_32)
602                  .Case("BFD_RELOC_64", ELF::R_X86_64_64)
603                  .Default(-1u);
604     } else {
605       Type = llvm::StringSwitch<unsigned>(Name)
606 #define ELF_RELOC(X, Y) .Case(#X, Y)
607 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
608 #undef ELF_RELOC
609                  .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
610                  .Case("BFD_RELOC_8", ELF::R_386_8)
611                  .Case("BFD_RELOC_16", ELF::R_386_16)
612                  .Case("BFD_RELOC_32", ELF::R_386_32)
613                  .Default(-1u);
614     }
615     if (Type == -1u)
616       return std::nullopt;
617     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
618   }
619   return MCAsmBackend::getFixupKind(Name);
620 }
621 
622 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
623   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
624       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
625       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
626       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
627       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
628       {"reloc_signed_4byte", 0, 32, 0},
629       {"reloc_signed_4byte_relax", 0, 32, 0},
630       {"reloc_global_offset_table", 0, 32, 0},
631       {"reloc_global_offset_table8", 0, 64, 0},
632       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
633   };
634 
635   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
636   // do not require any extra processing.
637   if (Kind >= FirstLiteralRelocationKind)
638     return MCAsmBackend::getFixupKindInfo(FK_NONE);
639 
640   if (Kind < FirstTargetFixupKind)
641     return MCAsmBackend::getFixupKindInfo(Kind);
642 
643   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
644          "Invalid kind!");
645   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
646   return Infos[Kind - FirstTargetFixupKind];
647 }
648 
649 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
650                                           const MCFixup &Fixup,
651                                           const MCValue &) {
652   return Fixup.getKind() >= FirstLiteralRelocationKind;
653 }
654 
655 static unsigned getFixupKindSize(unsigned Kind) {
656   switch (Kind) {
657   default:
658     llvm_unreachable("invalid fixup kind!");
659   case FK_NONE:
660     return 0;
661   case FK_PCRel_1:
662   case FK_SecRel_1:
663   case FK_Data_1:
664     return 1;
665   case FK_PCRel_2:
666   case FK_SecRel_2:
667   case FK_Data_2:
668     return 2;
669   case FK_PCRel_4:
670   case X86::reloc_riprel_4byte:
671   case X86::reloc_riprel_4byte_relax:
672   case X86::reloc_riprel_4byte_relax_rex:
673   case X86::reloc_riprel_4byte_movq_load:
674   case X86::reloc_signed_4byte:
675   case X86::reloc_signed_4byte_relax:
676   case X86::reloc_global_offset_table:
677   case X86::reloc_branch_4byte_pcrel:
678   case FK_SecRel_4:
679   case FK_Data_4:
680     return 4;
681   case FK_PCRel_8:
682   case FK_SecRel_8:
683   case FK_Data_8:
684   case X86::reloc_global_offset_table8:
685     return 8;
686   }
687 }
688 
689 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
690                                const MCValue &Target,
691                                MutableArrayRef<char> Data,
692                                uint64_t Value, bool IsResolved,
693                                const MCSubtargetInfo *STI) const {
694   unsigned Kind = Fixup.getKind();
695   if (Kind >= FirstLiteralRelocationKind)
696     return;
697   unsigned Size = getFixupKindSize(Kind);
698 
699   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
700 
701   int64_t SignedValue = static_cast<int64_t>(Value);
702   if ((Target.isAbsolute() || IsResolved) &&
703       getFixupKindInfo(Fixup.getKind()).Flags &
704       MCFixupKindInfo::FKF_IsPCRel) {
705     // check that PC relative fixup fits into the fixup size.
706     if (Size > 0 && !isIntN(Size * 8, SignedValue))
707       Asm.getContext().reportError(
708                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
709                                    " is too large for field of " + Twine(Size) +
710                                    ((Size == 1) ? " byte." : " bytes."));
711   } else {
712     // Check that uppper bits are either all zeros or all ones.
713     // Specifically ignore overflow/underflow as long as the leakage is
714     // limited to the lower bits. This is to remain compatible with
715     // other assemblers.
716     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
717            "Value does not fit in the Fixup field");
718   }
719 
720   for (unsigned i = 0; i != Size; ++i)
721     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
722 }
723 
724 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
725                                       const MCSubtargetInfo &STI) const {
726   // Branches can always be relaxed in either mode.
727   if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
728     return true;
729 
730   // Check if this instruction is ever relaxable.
731   if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
732     return false;
733 
734 
735   // Check if the relaxable operand has an expression. For the current set of
736   // relaxable instructions, the relaxable operand is always the last operand.
737   unsigned RelaxableOp = Inst.getNumOperands() - 1;
738   if (Inst.getOperand(RelaxableOp).isExpr())
739     return true;
740 
741   return false;
742 }
743 
744 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
745                                          uint64_t Value,
746                                          const MCRelaxableFragment *DF,
747                                          const MCAsmLayout &Layout) const {
748   // Relax if the value is too big for a (signed) i8.
749   return !isInt<8>(Value);
750 }
751 
752 // FIXME: Can tblgen help at all here to verify there aren't other instructions
753 // we can relax?
754 void X86AsmBackend::relaxInstruction(MCInst &Inst,
755                                      const MCSubtargetInfo &STI) const {
756   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
757   bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
758   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
759 
760   if (RelaxedOp == Inst.getOpcode()) {
761     SmallString<256> Tmp;
762     raw_svector_ostream OS(Tmp);
763     Inst.dump_pretty(OS);
764     OS << "\n";
765     report_fatal_error("unexpected instruction to relax: " + OS.str());
766   }
767 
768   Inst.setOpcode(RelaxedOp);
769 }
770 
771 /// Return true if this instruction has been fully relaxed into it's most
772 /// general available form.
773 static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
774   auto &Inst = RF.getInst();
775   auto &STI = *RF.getSubtargetInfo();
776   bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
777   return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
778 }
779 
780 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
781                                             MCCodeEmitter &Emitter,
782                                             unsigned &RemainingSize) const {
783   if (!RF.getAllowAutoPadding())
784     return false;
785   // If the instruction isn't fully relaxed, shifting it around might require a
786   // larger value for one of the fixups then can be encoded.  The outer loop
787   // will also catch this before moving to the next instruction, but we need to
788   // prevent padding this single instruction as well.
789   if (!isFullyRelaxed(RF))
790     return false;
791 
792   const unsigned OldSize = RF.getContents().size();
793   if (OldSize == 15)
794     return false;
795 
796   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
797   const unsigned RemainingPrefixSize = [&]() -> unsigned {
798     SmallString<15> Code;
799     raw_svector_ostream VecOS(Code);
800     Emitter.emitPrefix(RF.getInst(), VecOS, STI);
801     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
802 
803     // TODO: It turns out we need a decent amount of plumbing for the target
804     // specific bits to determine number of prefixes its safe to add.  Various
805     // targets (older chips mostly, but also Atom family) encounter decoder
806     // stalls with too many prefixes.  For testing purposes, we set the value
807     // externally for the moment.
808     unsigned ExistingPrefixSize = Code.size();
809     if (TargetPrefixMax <= ExistingPrefixSize)
810       return 0;
811     return TargetPrefixMax - ExistingPrefixSize;
812   }();
813   const unsigned PrefixBytesToAdd =
814       std::min(MaxPossiblePad, RemainingPrefixSize);
815   if (PrefixBytesToAdd == 0)
816     return false;
817 
818   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
819 
820   SmallString<256> Code;
821   Code.append(PrefixBytesToAdd, Prefix);
822   Code.append(RF.getContents().begin(), RF.getContents().end());
823   RF.getContents() = Code;
824 
825   // Adjust the fixups for the change in offsets
826   for (auto &F : RF.getFixups()) {
827     F.setOffset(F.getOffset() + PrefixBytesToAdd);
828   }
829 
830   RemainingSize -= PrefixBytesToAdd;
831   return true;
832 }
833 
834 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
835                                                 MCCodeEmitter &Emitter,
836                                                 unsigned &RemainingSize) const {
837   if (isFullyRelaxed(RF))
838     // TODO: There are lots of other tricks we could apply for increasing
839     // encoding size without impacting performance.
840     return false;
841 
842   MCInst Relaxed = RF.getInst();
843   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
844 
845   SmallVector<MCFixup, 4> Fixups;
846   SmallString<15> Code;
847   raw_svector_ostream VecOS(Code);
848   Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
849   const unsigned OldSize = RF.getContents().size();
850   const unsigned NewSize = Code.size();
851   assert(NewSize >= OldSize && "size decrease during relaxation?");
852   unsigned Delta = NewSize - OldSize;
853   if (Delta > RemainingSize)
854     return false;
855   RF.setInst(Relaxed);
856   RF.getContents() = Code;
857   RF.getFixups() = Fixups;
858   RemainingSize -= Delta;
859   return true;
860 }
861 
862 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
863                                            MCCodeEmitter &Emitter,
864                                            unsigned &RemainingSize) const {
865   bool Changed = false;
866   if (RemainingSize != 0)
867     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
868   if (RemainingSize != 0)
869     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
870   return Changed;
871 }
872 
873 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
874                                  MCAsmLayout &Layout) const {
875   // See if we can further relax some instructions to cut down on the number of
876   // nop bytes required for code alignment.  The actual win is in reducing
877   // instruction count, not number of bytes.  Modern X86-64 can easily end up
878   // decode limited.  It is often better to reduce the number of instructions
879   // (i.e. eliminate nops) even at the cost of increasing the size and
880   // complexity of others.
881   if (!X86PadForAlign && !X86PadForBranchAlign)
882     return;
883 
884   // The processed regions are delimitered by LabeledFragments. -g may have more
885   // MCSymbols and therefore different relaxation results. X86PadForAlign is
886   // disabled by default to eliminate the -g vs non -g difference.
887   DenseSet<MCFragment *> LabeledFragments;
888   for (const MCSymbol &S : Asm.symbols())
889     LabeledFragments.insert(S.getFragment(false));
890 
891   for (MCSection &Sec : Asm) {
892     if (!Sec.getKind().isText())
893       continue;
894 
895     SmallVector<MCRelaxableFragment *, 4> Relaxable;
896     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
897       MCFragment &F = *I;
898 
899       if (LabeledFragments.count(&F))
900         Relaxable.clear();
901 
902       if (F.getKind() == MCFragment::FT_Data ||
903           F.getKind() == MCFragment::FT_CompactEncodedInst)
904         // Skip and ignore
905         continue;
906 
907       if (F.getKind() == MCFragment::FT_Relaxable) {
908         auto &RF = cast<MCRelaxableFragment>(*I);
909         Relaxable.push_back(&RF);
910         continue;
911       }
912 
913       auto canHandle = [](MCFragment &F) -> bool {
914         switch (F.getKind()) {
915         default:
916           return false;
917         case MCFragment::FT_Align:
918           return X86PadForAlign;
919         case MCFragment::FT_BoundaryAlign:
920           return X86PadForBranchAlign;
921         }
922       };
923       // For any unhandled kind, assume we can't change layout.
924       if (!canHandle(F)) {
925         Relaxable.clear();
926         continue;
927       }
928 
929 #ifndef NDEBUG
930       const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
931 #endif
932       const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
933 
934       // To keep the effects local, prefer to relax instructions closest to
935       // the align directive.  This is purely about human understandability
936       // of the resulting code.  If we later find a reason to expand
937       // particular instructions over others, we can adjust.
938       MCFragment *FirstChangedFragment = nullptr;
939       unsigned RemainingSize = OrigSize;
940       while (!Relaxable.empty() && RemainingSize != 0) {
941         auto &RF = *Relaxable.pop_back_val();
942         // Give the backend a chance to play any tricks it wishes to increase
943         // the encoding size of the given instruction.  Target independent code
944         // will try further relaxation, but target's may play further tricks.
945         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
946           FirstChangedFragment = &RF;
947 
948         // If we have an instruction which hasn't been fully relaxed, we can't
949         // skip past it and insert bytes before it.  Changing its starting
950         // offset might require a larger negative offset than it can encode.
951         // We don't need to worry about larger positive offsets as none of the
952         // possible offsets between this and our align are visible, and the
953         // ones afterwards aren't changing.
954         if (!isFullyRelaxed(RF))
955           break;
956       }
957       Relaxable.clear();
958 
959       if (FirstChangedFragment) {
960         // Make sure the offsets for any fragments in the effected range get
961         // updated.  Note that this (conservatively) invalidates the offsets of
962         // those following, but this is not required.
963         Layout.invalidateFragmentsFrom(FirstChangedFragment);
964       }
965 
966       // BoundaryAlign explicitly tracks it's size (unlike align)
967       if (F.getKind() == MCFragment::FT_BoundaryAlign)
968         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
969 
970 #ifndef NDEBUG
971       const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
972       const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
973       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
974              "can't move start of next fragment!");
975       assert(FinalSize == RemainingSize && "inconsistent size computation?");
976 #endif
977 
978       // If we're looking at a boundary align, make sure we don't try to pad
979       // its target instructions for some following directive.  Doing so would
980       // break the alignment of the current boundary align.
981       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
982         const MCFragment *LastFragment = BF->getLastFragment();
983         if (!LastFragment)
984           continue;
985         while (&*I != LastFragment)
986           ++I;
987       }
988     }
989   }
990 
991   // The layout is done. Mark every fragment as valid.
992   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
993     MCSection &Section = *Layout.getSectionOrder()[i];
994     Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
995     Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
996   }
997 }
998 
999 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
1000   if (STI.hasFeature(X86::Is16Bit))
1001     return 4;
1002   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
1003     return 1;
1004   if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
1005     return 7;
1006   if (STI.getFeatureBits()[X86::TuningFast15ByteNOP])
1007     return 15;
1008   if (STI.getFeatureBits()[X86::TuningFast11ByteNOP])
1009     return 11;
1010   // FIXME: handle 32-bit mode
1011   // 15-bytes is the longest single NOP instruction, but 10-bytes is
1012   // commonly the longest that can be efficiently decoded.
1013   return 10;
1014 }
1015 
1016 /// Write a sequence of optimal nops to the output, covering \p Count
1017 /// bytes.
1018 /// \return - true on success, false on failure
1019 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
1020                                  const MCSubtargetInfo *STI) const {
1021   static const char Nops32Bit[10][11] = {
1022       // nop
1023       "\x90",
1024       // xchg %ax,%ax
1025       "\x66\x90",
1026       // nopl (%[re]ax)
1027       "\x0f\x1f\x00",
1028       // nopl 0(%[re]ax)
1029       "\x0f\x1f\x40\x00",
1030       // nopl 0(%[re]ax,%[re]ax,1)
1031       "\x0f\x1f\x44\x00\x00",
1032       // nopw 0(%[re]ax,%[re]ax,1)
1033       "\x66\x0f\x1f\x44\x00\x00",
1034       // nopl 0L(%[re]ax)
1035       "\x0f\x1f\x80\x00\x00\x00\x00",
1036       // nopl 0L(%[re]ax,%[re]ax,1)
1037       "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1038       // nopw 0L(%[re]ax,%[re]ax,1)
1039       "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1040       // nopw %cs:0L(%[re]ax,%[re]ax,1)
1041       "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1042   };
1043 
1044   // 16-bit mode uses different nop patterns than 32-bit.
1045   static const char Nops16Bit[4][11] = {
1046       // nop
1047       "\x90",
1048       // xchg %eax,%eax
1049       "\x66\x90",
1050       // lea 0(%si),%si
1051       "\x8d\x74\x00",
1052       // lea 0w(%si),%si
1053       "\x8d\xb4\x00\x00",
1054   };
1055 
1056   const char(*Nops)[11] =
1057       STI->getFeatureBits()[X86::Is16Bit] ? Nops16Bit : Nops32Bit;
1058 
1059   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1060 
1061   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1062   // length.
1063   do {
1064     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1065     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1066     for (uint8_t i = 0; i < Prefixes; i++)
1067       OS << '\x66';
1068     const uint8_t Rest = ThisNopLength - Prefixes;
1069     if (Rest != 0)
1070       OS.write(Nops[Rest - 1], Rest);
1071     Count -= ThisNopLength;
1072   } while (Count != 0);
1073 
1074   return true;
1075 }
1076 
1077 /* *** */
1078 
1079 namespace {
1080 
1081 class ELFX86AsmBackend : public X86AsmBackend {
1082 public:
1083   uint8_t OSABI;
1084   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1085       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1086 };
1087 
1088 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1089 public:
1090   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1091                       const MCSubtargetInfo &STI)
1092     : ELFX86AsmBackend(T, OSABI, STI) {}
1093 
1094   std::unique_ptr<MCObjectTargetWriter>
1095   createObjectTargetWriter() const override {
1096     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1097   }
1098 };
1099 
1100 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1101 public:
1102   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1103                        const MCSubtargetInfo &STI)
1104       : ELFX86AsmBackend(T, OSABI, STI) {}
1105 
1106   std::unique_ptr<MCObjectTargetWriter>
1107   createObjectTargetWriter() const override {
1108     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1109                                     ELF::EM_X86_64);
1110   }
1111 };
1112 
1113 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1114 public:
1115   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1116                          const MCSubtargetInfo &STI)
1117       : ELFX86AsmBackend(T, OSABI, STI) {}
1118 
1119   std::unique_ptr<MCObjectTargetWriter>
1120   createObjectTargetWriter() const override {
1121     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1122                                     ELF::EM_IAMCU);
1123   }
1124 };
1125 
1126 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1127 public:
1128   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1129                       const MCSubtargetInfo &STI)
1130     : ELFX86AsmBackend(T, OSABI, STI) {}
1131 
1132   std::unique_ptr<MCObjectTargetWriter>
1133   createObjectTargetWriter() const override {
1134     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1135   }
1136 };
1137 
1138 class WindowsX86AsmBackend : public X86AsmBackend {
1139   bool Is64Bit;
1140 
1141 public:
1142   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1143                        const MCSubtargetInfo &STI)
1144     : X86AsmBackend(T, STI)
1145     , Is64Bit(is64Bit) {
1146   }
1147 
1148   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1149     return StringSwitch<std::optional<MCFixupKind>>(Name)
1150         .Case("dir32", FK_Data_4)
1151         .Case("secrel32", FK_SecRel_4)
1152         .Case("secidx", FK_SecRel_2)
1153         .Default(MCAsmBackend::getFixupKind(Name));
1154   }
1155 
1156   std::unique_ptr<MCObjectTargetWriter>
1157   createObjectTargetWriter() const override {
1158     return createX86WinCOFFObjectWriter(Is64Bit);
1159   }
1160 };
1161 
1162 namespace CU {
1163 
1164   /// Compact unwind encoding values.
1165   enum CompactUnwindEncodings {
1166     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1167     /// the return address, then [RE]SP is moved to [RE]BP.
1168     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1169 
1170     /// A frameless function with a small constant stack size.
1171     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1172 
1173     /// A frameless function with a large constant stack size.
1174     UNWIND_MODE_STACK_IND                  = 0x03000000,
1175 
1176     /// No compact unwind encoding is available.
1177     UNWIND_MODE_DWARF                      = 0x04000000,
1178 
1179     /// Mask for encoding the frame registers.
1180     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1181 
1182     /// Mask for encoding the frameless registers.
1183     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1184   };
1185 
1186 } // namespace CU
1187 
1188 class DarwinX86AsmBackend : public X86AsmBackend {
1189   const MCRegisterInfo &MRI;
1190 
1191   /// Number of registers that can be saved in a compact unwind encoding.
1192   enum { CU_NUM_SAVED_REGS = 6 };
1193 
1194   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1195   Triple TT;
1196   bool Is64Bit;
1197 
1198   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1199   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1200   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1201 protected:
1202   /// Size of a "push" instruction for the given register.
1203   unsigned PushInstrSize(unsigned Reg) const {
1204     switch (Reg) {
1205       case X86::EBX:
1206       case X86::ECX:
1207       case X86::EDX:
1208       case X86::EDI:
1209       case X86::ESI:
1210       case X86::EBP:
1211       case X86::RBX:
1212       case X86::RBP:
1213         return 1;
1214       case X86::R12:
1215       case X86::R13:
1216       case X86::R14:
1217       case X86::R15:
1218         return 2;
1219     }
1220     return 1;
1221   }
1222 
1223 private:
1224   /// Get the compact unwind number for a given register. The number
1225   /// corresponds to the enum lists in compact_unwind_encoding.h.
1226   int getCompactUnwindRegNum(unsigned Reg) const {
1227     static const MCPhysReg CU32BitRegs[7] = {
1228       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1229     };
1230     static const MCPhysReg CU64BitRegs[] = {
1231       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1232     };
1233     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1234     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1235       if (*CURegs == Reg)
1236         return Idx;
1237 
1238     return -1;
1239   }
1240 
1241   /// Return the registers encoded for a compact encoding with a frame
1242   /// pointer.
1243   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1244     // Encode the registers in the order they were saved --- 3-bits per
1245     // register. The list of saved registers is assumed to be in reverse
1246     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1247     uint32_t RegEnc = 0;
1248     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1249       unsigned Reg = SavedRegs[i];
1250       if (Reg == 0) break;
1251 
1252       int CURegNum = getCompactUnwindRegNum(Reg);
1253       if (CURegNum == -1) return ~0U;
1254 
1255       // Encode the 3-bit register number in order, skipping over 3-bits for
1256       // each register.
1257       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1258     }
1259 
1260     assert((RegEnc & 0x3FFFF) == RegEnc &&
1261            "Invalid compact register encoding!");
1262     return RegEnc;
1263   }
1264 
1265   /// Create the permutation encoding used with frameless stacks. It is
1266   /// passed the number of registers to be saved and an array of the registers
1267   /// saved.
1268   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1269     // The saved registers are numbered from 1 to 6. In order to encode the
1270     // order in which they were saved, we re-number them according to their
1271     // place in the register order. The re-numbering is relative to the last
1272     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1273     // that order:
1274     //
1275     //    Orig  Re-Num
1276     //    ----  ------
1277     //     6       6
1278     //     2       2
1279     //     4       3
1280     //     5       3
1281     //
1282     for (unsigned i = 0; i < RegCount; ++i) {
1283       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1284       if (CUReg == -1) return ~0U;
1285       SavedRegs[i] = CUReg;
1286     }
1287 
1288     // Reverse the list.
1289     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1290 
1291     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1292     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1293       unsigned Countless = 0;
1294       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1295         if (SavedRegs[j] < SavedRegs[i])
1296           ++Countless;
1297 
1298       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1299     }
1300 
1301     // Take the renumbered values and encode them into a 10-bit number.
1302     uint32_t permutationEncoding = 0;
1303     switch (RegCount) {
1304     case 6:
1305       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1306                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1307                              +     RenumRegs[4];
1308       break;
1309     case 5:
1310       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1311                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1312                              +     RenumRegs[5];
1313       break;
1314     case 4:
1315       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1316                              + 3 * RenumRegs[4] +      RenumRegs[5];
1317       break;
1318     case 3:
1319       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1320                              +     RenumRegs[5];
1321       break;
1322     case 2:
1323       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1324       break;
1325     case 1:
1326       permutationEncoding |=       RenumRegs[5];
1327       break;
1328     }
1329 
1330     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1331            "Invalid compact register encoding!");
1332     return permutationEncoding;
1333   }
1334 
1335 public:
1336   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1337                       const MCSubtargetInfo &STI)
1338       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1339         Is64Bit(TT.isArch64Bit()) {
1340     memset(SavedRegs, 0, sizeof(SavedRegs));
1341     OffsetSize = Is64Bit ? 8 : 4;
1342     MoveInstrSize = Is64Bit ? 3 : 2;
1343     StackDivide = Is64Bit ? 8 : 4;
1344   }
1345 
1346   std::unique_ptr<MCObjectTargetWriter>
1347   createObjectTargetWriter() const override {
1348     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1349     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1350     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1351   }
1352 
1353   /// Implementation of algorithm to generate the compact unwind encoding
1354   /// for the CFI instructions.
1355   uint32_t
1356   generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1357     if (Instrs.empty()) return 0;
1358 
1359     // Reset the saved registers.
1360     unsigned SavedRegIdx = 0;
1361     memset(SavedRegs, 0, sizeof(SavedRegs));
1362 
1363     bool HasFP = false;
1364 
1365     // Encode that we are using EBP/RBP as the frame pointer.
1366     uint32_t CompactUnwindEncoding = 0;
1367 
1368     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1369     unsigned InstrOffset = 0;
1370     unsigned StackAdjust = 0;
1371     unsigned StackSize = 0;
1372     int MinAbsOffset = std::numeric_limits<int>::max();
1373 
1374     for (const MCCFIInstruction &Inst : Instrs) {
1375       switch (Inst.getOperation()) {
1376       default:
1377         // Any other CFI directives indicate a frame that we aren't prepared
1378         // to represent via compact unwind, so just bail out.
1379         return CU::UNWIND_MODE_DWARF;
1380       case MCCFIInstruction::OpDefCfaRegister: {
1381         // Defines a frame pointer. E.g.
1382         //
1383         //     movq %rsp, %rbp
1384         //  L0:
1385         //     .cfi_def_cfa_register %rbp
1386         //
1387         HasFP = true;
1388 
1389         // If the frame pointer is other than esp/rsp, we do not have a way to
1390         // generate a compact unwinding representation, so bail out.
1391         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1392             (Is64Bit ? X86::RBP : X86::EBP))
1393           return CU::UNWIND_MODE_DWARF;
1394 
1395         // Reset the counts.
1396         memset(SavedRegs, 0, sizeof(SavedRegs));
1397         StackAdjust = 0;
1398         SavedRegIdx = 0;
1399         MinAbsOffset = std::numeric_limits<int>::max();
1400         InstrOffset += MoveInstrSize;
1401         break;
1402       }
1403       case MCCFIInstruction::OpDefCfaOffset: {
1404         // Defines a new offset for the CFA. E.g.
1405         //
1406         //  With frame:
1407         //
1408         //     pushq %rbp
1409         //  L0:
1410         //     .cfi_def_cfa_offset 16
1411         //
1412         //  Without frame:
1413         //
1414         //     subq $72, %rsp
1415         //  L0:
1416         //     .cfi_def_cfa_offset 80
1417         //
1418         StackSize = Inst.getOffset() / StackDivide;
1419         break;
1420       }
1421       case MCCFIInstruction::OpOffset: {
1422         // Defines a "push" of a callee-saved register. E.g.
1423         //
1424         //     pushq %r15
1425         //     pushq %r14
1426         //     pushq %rbx
1427         //  L0:
1428         //     subq $120, %rsp
1429         //  L1:
1430         //     .cfi_offset %rbx, -40
1431         //     .cfi_offset %r14, -32
1432         //     .cfi_offset %r15, -24
1433         //
1434         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1435           // If there are too many saved registers, we cannot use a compact
1436           // unwind encoding.
1437           return CU::UNWIND_MODE_DWARF;
1438 
1439         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1440         SavedRegs[SavedRegIdx++] = Reg;
1441         StackAdjust += OffsetSize;
1442         MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1443         InstrOffset += PushInstrSize(Reg);
1444         break;
1445       }
1446       }
1447     }
1448 
1449     StackAdjust /= StackDivide;
1450 
1451     if (HasFP) {
1452       if ((StackAdjust & 0xFF) != StackAdjust)
1453         // Offset was too big for a compact unwind encoding.
1454         return CU::UNWIND_MODE_DWARF;
1455 
1456       // We don't attempt to track a real StackAdjust, so if the saved registers
1457       // aren't adjacent to rbp we can't cope.
1458       if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1459         return CU::UNWIND_MODE_DWARF;
1460 
1461       // Get the encoding of the saved registers when we have a frame pointer.
1462       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1463       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1464 
1465       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1466       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1467       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1468     } else {
1469       SubtractInstrIdx += InstrOffset;
1470       ++StackAdjust;
1471 
1472       if ((StackSize & 0xFF) == StackSize) {
1473         // Frameless stack with a small stack size.
1474         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1475 
1476         // Encode the stack size.
1477         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1478       } else {
1479         if ((StackAdjust & 0x7) != StackAdjust)
1480           // The extra stack adjustments are too big for us to handle.
1481           return CU::UNWIND_MODE_DWARF;
1482 
1483         // Frameless stack with an offset too large for us to encode compactly.
1484         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1485 
1486         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1487         // instruction.
1488         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1489 
1490         // Encode any extra stack adjustments (done via push instructions).
1491         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1492       }
1493 
1494       // Encode the number of registers saved. (Reverse the list first.)
1495       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1496       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1497 
1498       // Get the encoding of the saved registers when we don't have a frame
1499       // pointer.
1500       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1501       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1502 
1503       // Encode the register encoding.
1504       CompactUnwindEncoding |=
1505         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1506     }
1507 
1508     return CompactUnwindEncoding;
1509   }
1510 };
1511 
1512 } // end anonymous namespace
1513 
1514 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1515                                            const MCSubtargetInfo &STI,
1516                                            const MCRegisterInfo &MRI,
1517                                            const MCTargetOptions &Options) {
1518   const Triple &TheTriple = STI.getTargetTriple();
1519   if (TheTriple.isOSBinFormatMachO())
1520     return new DarwinX86AsmBackend(T, MRI, STI);
1521 
1522   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1523     return new WindowsX86AsmBackend(T, false, STI);
1524 
1525   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1526 
1527   if (TheTriple.isOSIAMCU())
1528     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1529 
1530   return new ELFX86_32AsmBackend(T, OSABI, STI);
1531 }
1532 
1533 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1534                                            const MCSubtargetInfo &STI,
1535                                            const MCRegisterInfo &MRI,
1536                                            const MCTargetOptions &Options) {
1537   const Triple &TheTriple = STI.getTargetTriple();
1538   if (TheTriple.isOSBinFormatMachO())
1539     return new DarwinX86AsmBackend(T, MRI, STI);
1540 
1541   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1542     return new WindowsX86AsmBackend(T, true, STI);
1543 
1544   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1545 
1546   if (TheTriple.isX32())
1547     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1548   return new ELFX86_64AsmBackend(T, OSABI, STI);
1549 }
1550