xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "MCTargetDesc/X86EncodingOptimization.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAsmLayout.h"
17 #include "llvm/MC/MCAssembler.h"
18 #include "llvm/MC/MCCodeEmitter.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDwarf.h"
21 #include "llvm/MC/MCELFObjectWriter.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 namespace {
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind {
43 private:
44   uint8_t AlignBranchKind = 0;
45 
46 public:
47   void operator=(const std::string &Val) {
48     if (Val.empty())
49       return;
50     SmallVector<StringRef, 6> BranchTypes;
51     StringRef(Val).split(BranchTypes, '+', -1, false);
52     for (auto BranchType : BranchTypes) {
53       if (BranchType == "fused")
54         addKind(X86::AlignBranchFused);
55       else if (BranchType == "jcc")
56         addKind(X86::AlignBranchJcc);
57       else if (BranchType == "jmp")
58         addKind(X86::AlignBranchJmp);
59       else if (BranchType == "call")
60         addKind(X86::AlignBranchCall);
61       else if (BranchType == "ret")
62         addKind(X86::AlignBranchRet);
63       else if (BranchType == "indirect")
64         addKind(X86::AlignBranchIndirect);
65       else {
66         errs() << "invalid argument " << BranchType.str()
67                << " to -x86-align-branch=; each element must be one of: fused, "
68                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
69       }
70     }
71   }
72 
73   operator uint8_t() const { return AlignBranchKind; }
74   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75 };
76 
77 X86AlignBranchKind X86AlignBranchKindLoc;
78 
79 cl::opt<unsigned> X86AlignBranchBoundary(
80     "x86-align-branch-boundary", cl::init(0),
81     cl::desc(
82         "Control how the assembler should align branches with NOP. If the "
83         "boundary's size is not 0, it should be a power of 2 and no less "
84         "than 32. Branches will be aligned to prevent from being across or "
85         "against the boundary of specified size. The default value 0 does not "
86         "align branches."));
87 
88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89     "x86-align-branch",
90     cl::desc(
91         "Specify types of branches to align (plus separated list of types):"
92              "\njcc      indicates conditional jumps"
93              "\nfused    indicates fused conditional jumps"
94              "\njmp      indicates direct unconditional jumps"
95              "\ncall     indicates direct and indirect calls"
96              "\nret      indicates rets"
97              "\nindirect indicates indirect unconditional jumps"),
98     cl::location(X86AlignBranchKindLoc));
99 
100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101     "x86-branches-within-32B-boundaries", cl::init(false),
102     cl::desc(
103         "Align selected instructions to mitigate negative performance impact "
104         "of Intel's micro code update for errata skx102.  May break "
105         "assumptions about labels corresponding to particular instructions, "
106         "and should be used with caution."));
107 
108 cl::opt<unsigned> X86PadMaxPrefixSize(
109     "x86-pad-max-prefix-size", cl::init(0),
110     cl::desc("Maximum number of prefixes to use for padding"));
111 
112 cl::opt<bool> X86PadForAlign(
113     "x86-pad-for-align", cl::init(false), cl::Hidden,
114     cl::desc("Pad previous instructions to implement align directives"));
115 
116 cl::opt<bool> X86PadForBranchAlign(
117     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118     cl::desc("Pad previous instructions to implement branch alignment"));
119 
120 class X86AsmBackend : public MCAsmBackend {
121   const MCSubtargetInfo &STI;
122   std::unique_ptr<const MCInstrInfo> MCII;
123   X86AlignBranchKind AlignBranchType;
124   Align AlignBoundary;
125   unsigned TargetPrefixMax = 0;
126 
127   MCInst PrevInst;
128   MCBoundaryAlignFragment *PendingBA = nullptr;
129   std::pair<MCFragment *, size_t> PrevInstPosition;
130   bool CanPadInst = false;
131 
132   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134   bool needAlign(const MCInst &Inst) const;
135   bool canPadBranches(MCObjectStreamer &OS) const;
136   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
137 
138 public:
139   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140       : MCAsmBackend(support::little), STI(STI),
141         MCII(T.createMCInstrInfo()) {
142     if (X86AlignBranchWithin32BBoundaries) {
143       // At the moment, this defaults to aligning fused branches, unconditional
144       // jumps, and (unfused) conditional jumps with nops.  Both the
145       // instructions aligned and the alignment method (nop vs prefix) may
146       // change in the future.
147       AlignBoundary = assumeAligned(32);
148       AlignBranchType.addKind(X86::AlignBranchFused);
149       AlignBranchType.addKind(X86::AlignBranchJcc);
150       AlignBranchType.addKind(X86::AlignBranchJmp);
151     }
152     // Allow overriding defaults set by main flag
153     if (X86AlignBranchBoundary.getNumOccurrences())
154       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
155     if (X86AlignBranch.getNumOccurrences())
156       AlignBranchType = X86AlignBranchKindLoc;
157     if (X86PadMaxPrefixSize.getNumOccurrences())
158       TargetPrefixMax = X86PadMaxPrefixSize;
159   }
160 
161   bool allowAutoPadding() const override;
162   bool allowEnhancedRelaxation() const override;
163   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164                             const MCSubtargetInfo &STI) override;
165   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
166 
167   unsigned getNumFixupKinds() const override {
168     return X86::NumTargetFixupKinds;
169   }
170 
171   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
172 
173   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
174 
175   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
176                              const MCValue &Target) override;
177 
178   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
179                   const MCValue &Target, MutableArrayRef<char> Data,
180                   uint64_t Value, bool IsResolved,
181                   const MCSubtargetInfo *STI) const override;
182 
183   bool mayNeedRelaxation(const MCInst &Inst,
184                          const MCSubtargetInfo &STI) const override;
185 
186   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
187                             const MCRelaxableFragment *DF,
188                             const MCAsmLayout &Layout) const override;
189 
190   void relaxInstruction(MCInst &Inst,
191                         const MCSubtargetInfo &STI) const override;
192 
193   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
194                                    MCCodeEmitter &Emitter,
195                                    unsigned &RemainingSize) const;
196 
197   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
198                                unsigned &RemainingSize) const;
199 
200   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
201                               unsigned &RemainingSize) const;
202 
203   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
204 
205   unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
206 
207   bool writeNopData(raw_ostream &OS, uint64_t Count,
208                     const MCSubtargetInfo *STI) const override;
209 };
210 } // end anonymous namespace
211 
212 static bool isRelaxableBranch(unsigned Opcode) {
213   return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
214 }
215 
216 static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
217                                        bool Is16BitMode = false) {
218   switch (Opcode) {
219   default:
220     llvm_unreachable("invalid opcode for branch");
221   case X86::JCC_1:
222     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
223   case X86::JMP_1:
224     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
225   }
226 }
227 
228 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
229   unsigned Opcode = MI.getOpcode();
230   return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
231                                    : X86::getOpcodeForLongImmediateForm(Opcode);
232 }
233 
234 static X86::CondCode getCondFromBranch(const MCInst &MI,
235                                        const MCInstrInfo &MCII) {
236   unsigned Opcode = MI.getOpcode();
237   switch (Opcode) {
238   default:
239     return X86::COND_INVALID;
240   case X86::JCC_1: {
241     const MCInstrDesc &Desc = MCII.get(Opcode);
242     return static_cast<X86::CondCode>(
243         MI.getOperand(Desc.getNumOperands() - 1).getImm());
244   }
245   }
246 }
247 
248 static X86::SecondMacroFusionInstKind
249 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
250   X86::CondCode CC = getCondFromBranch(MI, MCII);
251   return classifySecondCondCodeInMacroFusion(CC);
252 }
253 
254 /// Check if the instruction uses RIP relative addressing.
255 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
256   unsigned Opcode = MI.getOpcode();
257   const MCInstrDesc &Desc = MCII.get(Opcode);
258   uint64_t TSFlags = Desc.TSFlags;
259   unsigned CurOp = X86II::getOperandBias(Desc);
260   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
261   if (MemoryOperand < 0)
262     return false;
263   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
264   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
265   return (BaseReg == X86::RIP);
266 }
267 
268 /// Check if the instruction is a prefix.
269 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
270   return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
271 }
272 
273 /// Check if the instruction is valid as the first instruction in macro fusion.
274 static bool isFirstMacroFusibleInst(const MCInst &Inst,
275                                     const MCInstrInfo &MCII) {
276   // An Intel instruction with RIP relative addressing is not macro fusible.
277   if (isRIPRelative(Inst, MCII))
278     return false;
279   X86::FirstMacroFusionInstKind FIK =
280       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
281   return FIK != X86::FirstMacroFusionInstKind::Invalid;
282 }
283 
284 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
285 /// get a better peformance in some cases. Here, we determine which prefix is
286 /// the most suitable.
287 ///
288 /// If the instruction has a segment override prefix, use the existing one.
289 /// If the target is 64-bit, use the CS.
290 /// If the target is 32-bit,
291 ///   - If the instruction has a ESP/EBP base register, use SS.
292 ///   - Otherwise use DS.
293 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
294   assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
295          "Prefixes can be added only in 32-bit or 64-bit mode.");
296   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
297   uint64_t TSFlags = Desc.TSFlags;
298 
299   // Determine where the memory operand starts, if present.
300   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
301   if (MemoryOperand != -1)
302     MemoryOperand += X86II::getOperandBias(Desc);
303 
304   unsigned SegmentReg = 0;
305   if (MemoryOperand >= 0) {
306     // Check for explicit segment override on memory operand.
307     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
308   }
309 
310   switch (TSFlags & X86II::FormMask) {
311   default:
312     break;
313   case X86II::RawFrmDstSrc: {
314     // Check segment override opcode prefix as needed (not for %ds).
315     if (Inst.getOperand(2).getReg() != X86::DS)
316       SegmentReg = Inst.getOperand(2).getReg();
317     break;
318   }
319   case X86II::RawFrmSrc: {
320     // Check segment override opcode prefix as needed (not for %ds).
321     if (Inst.getOperand(1).getReg() != X86::DS)
322       SegmentReg = Inst.getOperand(1).getReg();
323     break;
324   }
325   case X86II::RawFrmMemOffs: {
326     // Check segment override opcode prefix as needed.
327     SegmentReg = Inst.getOperand(1).getReg();
328     break;
329   }
330   }
331 
332   if (SegmentReg != 0)
333     return X86::getSegmentOverridePrefixForReg(SegmentReg);
334 
335   if (STI.hasFeature(X86::Is64Bit))
336     return X86::CS_Encoding;
337 
338   if (MemoryOperand >= 0) {
339     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
340     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
341     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
342       return X86::SS_Encoding;
343   }
344   return X86::DS_Encoding;
345 }
346 
347 /// Check if the two instructions will be macro-fused on the target cpu.
348 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
349   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
350   if (!InstDesc.isConditionalBranch())
351     return false;
352   if (!isFirstMacroFusibleInst(Cmp, *MCII))
353     return false;
354   const X86::FirstMacroFusionInstKind CmpKind =
355       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
356   const X86::SecondMacroFusionInstKind BranchKind =
357       classifySecondInstInMacroFusion(Jcc, *MCII);
358   return X86::isMacroFused(CmpKind, BranchKind);
359 }
360 
361 /// Check if the instruction has a variant symbol operand.
362 static bool hasVariantSymbol(const MCInst &MI) {
363   for (auto &Operand : MI) {
364     if (!Operand.isExpr())
365       continue;
366     const MCExpr &Expr = *Operand.getExpr();
367     if (Expr.getKind() == MCExpr::SymbolRef &&
368         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
369       return true;
370   }
371   return false;
372 }
373 
374 bool X86AsmBackend::allowAutoPadding() const {
375   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
376 }
377 
378 bool X86AsmBackend::allowEnhancedRelaxation() const {
379   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
380 }
381 
382 /// X86 has certain instructions which enable interrupts exactly one
383 /// instruction *after* the instruction which stores to SS.  Return true if the
384 /// given instruction has such an interrupt delay slot.
385 static bool hasInterruptDelaySlot(const MCInst &Inst) {
386   switch (Inst.getOpcode()) {
387   case X86::POPSS16:
388   case X86::POPSS32:
389   case X86::STI:
390     return true;
391 
392   case X86::MOV16sr:
393   case X86::MOV32sr:
394   case X86::MOV64sr:
395   case X86::MOV16sm:
396     if (Inst.getOperand(0).getReg() == X86::SS)
397       return true;
398     break;
399   }
400   return false;
401 }
402 
403 /// Check if the instruction to be emitted is right after any data.
404 static bool
405 isRightAfterData(MCFragment *CurrentFragment,
406                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
407   MCFragment *F = CurrentFragment;
408   // Empty data fragments may be created to prevent further data being
409   // added into the previous fragment, we need to skip them since they
410   // have no contents.
411   for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
412     if (cast<MCDataFragment>(F)->getContents().size() != 0)
413       break;
414 
415   // Since data is always emitted into a DataFragment, our check strategy is
416   // simple here.
417   //   - If the fragment is a DataFragment
418   //     - If it's not the fragment where the previous instruction is,
419   //       returns true.
420   //     - If it's the fragment holding the previous instruction but its
421   //       size changed since the the previous instruction was emitted into
422   //       it, returns true.
423   //     - Otherwise returns false.
424   //   - If the fragment is not a DataFragment, returns false.
425   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
426     return DF != PrevInstPosition.first ||
427            DF->getContents().size() != PrevInstPosition.second;
428 
429   return false;
430 }
431 
432 /// \returns the fragment size if it has instructions, otherwise returns 0.
433 static size_t getSizeForInstFragment(const MCFragment *F) {
434   if (!F || !F->hasInstructions())
435     return 0;
436   // MCEncodedFragmentWithContents being templated makes this tricky.
437   switch (F->getKind()) {
438   default:
439     llvm_unreachable("Unknown fragment with instructions!");
440   case MCFragment::FT_Data:
441     return cast<MCDataFragment>(*F).getContents().size();
442   case MCFragment::FT_Relaxable:
443     return cast<MCRelaxableFragment>(*F).getContents().size();
444   case MCFragment::FT_CompactEncodedInst:
445     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
446   }
447 }
448 
449 /// Return true if we can insert NOP or prefixes automatically before the
450 /// the instruction to be emitted.
451 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
452   if (hasVariantSymbol(Inst))
453     // Linker may rewrite the instruction with variant symbol operand(e.g.
454     // TLSCALL).
455     return false;
456 
457   if (hasInterruptDelaySlot(PrevInst))
458     // If this instruction follows an interrupt enabling instruction with a one
459     // instruction delay, inserting a nop would change behavior.
460     return false;
461 
462   if (isPrefix(PrevInst, *MCII))
463     // If this instruction follows a prefix, inserting a nop/prefix would change
464     // semantic.
465     return false;
466 
467   if (isPrefix(Inst, *MCII))
468     // If this instruction is a prefix, inserting a prefix would change
469     // semantic.
470     return false;
471 
472   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
473     // If this instruction follows any data, there is no clear
474     // instruction boundary, inserting a nop/prefix would change semantic.
475     return false;
476 
477   return true;
478 }
479 
480 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
481   if (!OS.getAllowAutoPadding())
482     return false;
483   assert(allowAutoPadding() && "incorrect initialization!");
484 
485   // We only pad in text section.
486   if (!OS.getCurrentSectionOnly()->getKind().isText())
487     return false;
488 
489   // To be Done: Currently don't deal with Bundle cases.
490   if (OS.getAssembler().isBundlingEnabled())
491     return false;
492 
493   // Branches only need to be aligned in 32-bit or 64-bit mode.
494   if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
495     return false;
496 
497   return true;
498 }
499 
500 /// Check if the instruction operand needs to be aligned.
501 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
502   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
503   return (Desc.isConditionalBranch() &&
504           (AlignBranchType & X86::AlignBranchJcc)) ||
505          (Desc.isUnconditionalBranch() &&
506           (AlignBranchType & X86::AlignBranchJmp)) ||
507          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
508          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
509          (Desc.isIndirectBranch() &&
510           (AlignBranchType & X86::AlignBranchIndirect));
511 }
512 
513 /// Insert BoundaryAlignFragment before instructions to align branches.
514 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
515                                          const MCInst &Inst, const MCSubtargetInfo &STI) {
516   CanPadInst = canPadInst(Inst, OS);
517 
518   if (!canPadBranches(OS))
519     return;
520 
521   if (!isMacroFused(PrevInst, Inst))
522     // Macro fusion doesn't happen indeed, clear the pending.
523     PendingBA = nullptr;
524 
525   if (!CanPadInst)
526     return;
527 
528   if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
529     // Macro fusion actually happens and there is no other fragment inserted
530     // after the previous instruction.
531     //
532     // Do nothing here since we already inserted a BoudaryAlign fragment when
533     // we met the first instruction in the fused pair and we'll tie them
534     // together in emitInstructionEnd.
535     //
536     // Note: When there is at least one fragment, such as MCAlignFragment,
537     // inserted after the previous instruction, e.g.
538     //
539     // \code
540     //   cmp %rax %rcx
541     //   .align 16
542     //   je .Label0
543     // \ endcode
544     //
545     // We will treat the JCC as a unfused branch although it may be fused
546     // with the CMP.
547     return;
548   }
549 
550   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
551                           isFirstMacroFusibleInst(Inst, *MCII))) {
552     // If we meet a unfused branch or the first instuction in a fusiable pair,
553     // insert a BoundaryAlign fragment.
554     OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
555   }
556 }
557 
558 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
559 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
560   PrevInst = Inst;
561   MCFragment *CF = OS.getCurrentFragment();
562   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
563   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
564     F->setAllowAutoPadding(CanPadInst);
565 
566   if (!canPadBranches(OS))
567     return;
568 
569   if (!needAlign(Inst) || !PendingBA)
570     return;
571 
572   // Tie the aligned instructions into a a pending BoundaryAlign.
573   PendingBA->setLastFragment(CF);
574   PendingBA = nullptr;
575 
576   // We need to ensure that further data isn't added to the current
577   // DataFragment, so that we can get the size of instructions later in
578   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
579   // DataFragment.
580   if (isa_and_nonnull<MCDataFragment>(CF))
581     OS.insert(new MCDataFragment());
582 
583   // Update the maximum alignment on the current section if necessary.
584   MCSection *Sec = OS.getCurrentSectionOnly();
585   Sec->ensureMinAlignment(AlignBoundary);
586 }
587 
588 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
589   if (STI.getTargetTriple().isOSBinFormatELF()) {
590     unsigned Type;
591     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
592       Type = llvm::StringSwitch<unsigned>(Name)
593 #define ELF_RELOC(X, Y) .Case(#X, Y)
594 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
595 #undef ELF_RELOC
596                  .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
597                  .Case("BFD_RELOC_8", ELF::R_X86_64_8)
598                  .Case("BFD_RELOC_16", ELF::R_X86_64_16)
599                  .Case("BFD_RELOC_32", ELF::R_X86_64_32)
600                  .Case("BFD_RELOC_64", ELF::R_X86_64_64)
601                  .Default(-1u);
602     } else {
603       Type = llvm::StringSwitch<unsigned>(Name)
604 #define ELF_RELOC(X, Y) .Case(#X, Y)
605 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
606 #undef ELF_RELOC
607                  .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
608                  .Case("BFD_RELOC_8", ELF::R_386_8)
609                  .Case("BFD_RELOC_16", ELF::R_386_16)
610                  .Case("BFD_RELOC_32", ELF::R_386_32)
611                  .Default(-1u);
612     }
613     if (Type == -1u)
614       return std::nullopt;
615     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
616   }
617   return MCAsmBackend::getFixupKind(Name);
618 }
619 
620 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
621   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
622       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
623       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
624       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
625       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
626       {"reloc_signed_4byte", 0, 32, 0},
627       {"reloc_signed_4byte_relax", 0, 32, 0},
628       {"reloc_global_offset_table", 0, 32, 0},
629       {"reloc_global_offset_table8", 0, 64, 0},
630       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
631   };
632 
633   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
634   // do not require any extra processing.
635   if (Kind >= FirstLiteralRelocationKind)
636     return MCAsmBackend::getFixupKindInfo(FK_NONE);
637 
638   if (Kind < FirstTargetFixupKind)
639     return MCAsmBackend::getFixupKindInfo(Kind);
640 
641   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
642          "Invalid kind!");
643   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
644   return Infos[Kind - FirstTargetFixupKind];
645 }
646 
647 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
648                                           const MCFixup &Fixup,
649                                           const MCValue &) {
650   return Fixup.getKind() >= FirstLiteralRelocationKind;
651 }
652 
653 static unsigned getFixupKindSize(unsigned Kind) {
654   switch (Kind) {
655   default:
656     llvm_unreachable("invalid fixup kind!");
657   case FK_NONE:
658     return 0;
659   case FK_PCRel_1:
660   case FK_SecRel_1:
661   case FK_Data_1:
662     return 1;
663   case FK_PCRel_2:
664   case FK_SecRel_2:
665   case FK_Data_2:
666     return 2;
667   case FK_PCRel_4:
668   case X86::reloc_riprel_4byte:
669   case X86::reloc_riprel_4byte_relax:
670   case X86::reloc_riprel_4byte_relax_rex:
671   case X86::reloc_riprel_4byte_movq_load:
672   case X86::reloc_signed_4byte:
673   case X86::reloc_signed_4byte_relax:
674   case X86::reloc_global_offset_table:
675   case X86::reloc_branch_4byte_pcrel:
676   case FK_SecRel_4:
677   case FK_Data_4:
678     return 4;
679   case FK_PCRel_8:
680   case FK_SecRel_8:
681   case FK_Data_8:
682   case X86::reloc_global_offset_table8:
683     return 8;
684   }
685 }
686 
687 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
688                                const MCValue &Target,
689                                MutableArrayRef<char> Data,
690                                uint64_t Value, bool IsResolved,
691                                const MCSubtargetInfo *STI) const {
692   unsigned Kind = Fixup.getKind();
693   if (Kind >= FirstLiteralRelocationKind)
694     return;
695   unsigned Size = getFixupKindSize(Kind);
696 
697   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
698 
699   int64_t SignedValue = static_cast<int64_t>(Value);
700   if ((Target.isAbsolute() || IsResolved) &&
701       getFixupKindInfo(Fixup.getKind()).Flags &
702       MCFixupKindInfo::FKF_IsPCRel) {
703     // check that PC relative fixup fits into the fixup size.
704     if (Size > 0 && !isIntN(Size * 8, SignedValue))
705       Asm.getContext().reportError(
706                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
707                                    " is too large for field of " + Twine(Size) +
708                                    ((Size == 1) ? " byte." : " bytes."));
709   } else {
710     // Check that uppper bits are either all zeros or all ones.
711     // Specifically ignore overflow/underflow as long as the leakage is
712     // limited to the lower bits. This is to remain compatible with
713     // other assemblers.
714     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
715            "Value does not fit in the Fixup field");
716   }
717 
718   for (unsigned i = 0; i != Size; ++i)
719     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
720 }
721 
722 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
723                                       const MCSubtargetInfo &STI) const {
724   unsigned Opcode = MI.getOpcode();
725   return isRelaxableBranch(Opcode) ||
726          (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
727           MI.getOperand(MI.getNumOperands() - 1).isExpr());
728 }
729 
730 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
731                                          uint64_t Value,
732                                          const MCRelaxableFragment *DF,
733                                          const MCAsmLayout &Layout) const {
734   // Relax if the value is too big for a (signed) i8.
735   return !isInt<8>(Value);
736 }
737 
738 // FIXME: Can tblgen help at all here to verify there aren't other instructions
739 // we can relax?
740 void X86AsmBackend::relaxInstruction(MCInst &Inst,
741                                      const MCSubtargetInfo &STI) const {
742   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
743   bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
744   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
745 
746   if (RelaxedOp == Inst.getOpcode()) {
747     SmallString<256> Tmp;
748     raw_svector_ostream OS(Tmp);
749     Inst.dump_pretty(OS);
750     OS << "\n";
751     report_fatal_error("unexpected instruction to relax: " + OS.str());
752   }
753 
754   Inst.setOpcode(RelaxedOp);
755 }
756 
757 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
758                                             MCCodeEmitter &Emitter,
759                                             unsigned &RemainingSize) const {
760   if (!RF.getAllowAutoPadding())
761     return false;
762   // If the instruction isn't fully relaxed, shifting it around might require a
763   // larger value for one of the fixups then can be encoded.  The outer loop
764   // will also catch this before moving to the next instruction, but we need to
765   // prevent padding this single instruction as well.
766   if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
767     return false;
768 
769   const unsigned OldSize = RF.getContents().size();
770   if (OldSize == 15)
771     return false;
772 
773   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
774   const unsigned RemainingPrefixSize = [&]() -> unsigned {
775     SmallString<15> Code;
776     Emitter.emitPrefix(RF.getInst(), Code, STI);
777     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
778 
779     // TODO: It turns out we need a decent amount of plumbing for the target
780     // specific bits to determine number of prefixes its safe to add.  Various
781     // targets (older chips mostly, but also Atom family) encounter decoder
782     // stalls with too many prefixes.  For testing purposes, we set the value
783     // externally for the moment.
784     unsigned ExistingPrefixSize = Code.size();
785     if (TargetPrefixMax <= ExistingPrefixSize)
786       return 0;
787     return TargetPrefixMax - ExistingPrefixSize;
788   }();
789   const unsigned PrefixBytesToAdd =
790       std::min(MaxPossiblePad, RemainingPrefixSize);
791   if (PrefixBytesToAdd == 0)
792     return false;
793 
794   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
795 
796   SmallString<256> Code;
797   Code.append(PrefixBytesToAdd, Prefix);
798   Code.append(RF.getContents().begin(), RF.getContents().end());
799   RF.getContents() = Code;
800 
801   // Adjust the fixups for the change in offsets
802   for (auto &F : RF.getFixups()) {
803     F.setOffset(F.getOffset() + PrefixBytesToAdd);
804   }
805 
806   RemainingSize -= PrefixBytesToAdd;
807   return true;
808 }
809 
810 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
811                                                 MCCodeEmitter &Emitter,
812                                                 unsigned &RemainingSize) const {
813   if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
814     // TODO: There are lots of other tricks we could apply for increasing
815     // encoding size without impacting performance.
816     return false;
817 
818   MCInst Relaxed = RF.getInst();
819   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
820 
821   SmallVector<MCFixup, 4> Fixups;
822   SmallString<15> Code;
823   Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
824   const unsigned OldSize = RF.getContents().size();
825   const unsigned NewSize = Code.size();
826   assert(NewSize >= OldSize && "size decrease during relaxation?");
827   unsigned Delta = NewSize - OldSize;
828   if (Delta > RemainingSize)
829     return false;
830   RF.setInst(Relaxed);
831   RF.getContents() = Code;
832   RF.getFixups() = Fixups;
833   RemainingSize -= Delta;
834   return true;
835 }
836 
837 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
838                                            MCCodeEmitter &Emitter,
839                                            unsigned &RemainingSize) const {
840   bool Changed = false;
841   if (RemainingSize != 0)
842     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
843   if (RemainingSize != 0)
844     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
845   return Changed;
846 }
847 
848 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
849                                  MCAsmLayout &Layout) const {
850   // See if we can further relax some instructions to cut down on the number of
851   // nop bytes required for code alignment.  The actual win is in reducing
852   // instruction count, not number of bytes.  Modern X86-64 can easily end up
853   // decode limited.  It is often better to reduce the number of instructions
854   // (i.e. eliminate nops) even at the cost of increasing the size and
855   // complexity of others.
856   if (!X86PadForAlign && !X86PadForBranchAlign)
857     return;
858 
859   // The processed regions are delimitered by LabeledFragments. -g may have more
860   // MCSymbols and therefore different relaxation results. X86PadForAlign is
861   // disabled by default to eliminate the -g vs non -g difference.
862   DenseSet<MCFragment *> LabeledFragments;
863   for (const MCSymbol &S : Asm.symbols())
864     LabeledFragments.insert(S.getFragment(false));
865 
866   for (MCSection &Sec : Asm) {
867     if (!Sec.getKind().isText())
868       continue;
869 
870     SmallVector<MCRelaxableFragment *, 4> Relaxable;
871     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
872       MCFragment &F = *I;
873 
874       if (LabeledFragments.count(&F))
875         Relaxable.clear();
876 
877       if (F.getKind() == MCFragment::FT_Data ||
878           F.getKind() == MCFragment::FT_CompactEncodedInst)
879         // Skip and ignore
880         continue;
881 
882       if (F.getKind() == MCFragment::FT_Relaxable) {
883         auto &RF = cast<MCRelaxableFragment>(*I);
884         Relaxable.push_back(&RF);
885         continue;
886       }
887 
888       auto canHandle = [](MCFragment &F) -> bool {
889         switch (F.getKind()) {
890         default:
891           return false;
892         case MCFragment::FT_Align:
893           return X86PadForAlign;
894         case MCFragment::FT_BoundaryAlign:
895           return X86PadForBranchAlign;
896         }
897       };
898       // For any unhandled kind, assume we can't change layout.
899       if (!canHandle(F)) {
900         Relaxable.clear();
901         continue;
902       }
903 
904 #ifndef NDEBUG
905       const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
906 #endif
907       const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
908 
909       // To keep the effects local, prefer to relax instructions closest to
910       // the align directive.  This is purely about human understandability
911       // of the resulting code.  If we later find a reason to expand
912       // particular instructions over others, we can adjust.
913       MCFragment *FirstChangedFragment = nullptr;
914       unsigned RemainingSize = OrigSize;
915       while (!Relaxable.empty() && RemainingSize != 0) {
916         auto &RF = *Relaxable.pop_back_val();
917         // Give the backend a chance to play any tricks it wishes to increase
918         // the encoding size of the given instruction.  Target independent code
919         // will try further relaxation, but target's may play further tricks.
920         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
921           FirstChangedFragment = &RF;
922 
923         // If we have an instruction which hasn't been fully relaxed, we can't
924         // skip past it and insert bytes before it.  Changing its starting
925         // offset might require a larger negative offset than it can encode.
926         // We don't need to worry about larger positive offsets as none of the
927         // possible offsets between this and our align are visible, and the
928         // ones afterwards aren't changing.
929         if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
930           break;
931       }
932       Relaxable.clear();
933 
934       if (FirstChangedFragment) {
935         // Make sure the offsets for any fragments in the effected range get
936         // updated.  Note that this (conservatively) invalidates the offsets of
937         // those following, but this is not required.
938         Layout.invalidateFragmentsFrom(FirstChangedFragment);
939       }
940 
941       // BoundaryAlign explicitly tracks it's size (unlike align)
942       if (F.getKind() == MCFragment::FT_BoundaryAlign)
943         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
944 
945 #ifndef NDEBUG
946       const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
947       const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
948       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
949              "can't move start of next fragment!");
950       assert(FinalSize == RemainingSize && "inconsistent size computation?");
951 #endif
952 
953       // If we're looking at a boundary align, make sure we don't try to pad
954       // its target instructions for some following directive.  Doing so would
955       // break the alignment of the current boundary align.
956       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
957         const MCFragment *LastFragment = BF->getLastFragment();
958         if (!LastFragment)
959           continue;
960         while (&*I != LastFragment)
961           ++I;
962       }
963     }
964   }
965 
966   // The layout is done. Mark every fragment as valid.
967   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
968     MCSection &Section = *Layout.getSectionOrder()[i];
969     Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
970     Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
971   }
972 }
973 
974 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
975   if (STI.hasFeature(X86::Is16Bit))
976     return 4;
977   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
978     return 1;
979   if (STI.hasFeature(X86::TuningFast7ByteNOP))
980     return 7;
981   if (STI.hasFeature(X86::TuningFast15ByteNOP))
982     return 15;
983   if (STI.hasFeature(X86::TuningFast11ByteNOP))
984     return 11;
985   // FIXME: handle 32-bit mode
986   // 15-bytes is the longest single NOP instruction, but 10-bytes is
987   // commonly the longest that can be efficiently decoded.
988   return 10;
989 }
990 
991 /// Write a sequence of optimal nops to the output, covering \p Count
992 /// bytes.
993 /// \return - true on success, false on failure
994 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
995                                  const MCSubtargetInfo *STI) const {
996   static const char Nops32Bit[10][11] = {
997       // nop
998       "\x90",
999       // xchg %ax,%ax
1000       "\x66\x90",
1001       // nopl (%[re]ax)
1002       "\x0f\x1f\x00",
1003       // nopl 0(%[re]ax)
1004       "\x0f\x1f\x40\x00",
1005       // nopl 0(%[re]ax,%[re]ax,1)
1006       "\x0f\x1f\x44\x00\x00",
1007       // nopw 0(%[re]ax,%[re]ax,1)
1008       "\x66\x0f\x1f\x44\x00\x00",
1009       // nopl 0L(%[re]ax)
1010       "\x0f\x1f\x80\x00\x00\x00\x00",
1011       // nopl 0L(%[re]ax,%[re]ax,1)
1012       "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1013       // nopw 0L(%[re]ax,%[re]ax,1)
1014       "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1015       // nopw %cs:0L(%[re]ax,%[re]ax,1)
1016       "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1017   };
1018 
1019   // 16-bit mode uses different nop patterns than 32-bit.
1020   static const char Nops16Bit[4][11] = {
1021       // nop
1022       "\x90",
1023       // xchg %eax,%eax
1024       "\x66\x90",
1025       // lea 0(%si),%si
1026       "\x8d\x74\x00",
1027       // lea 0w(%si),%si
1028       "\x8d\xb4\x00\x00",
1029   };
1030 
1031   const char(*Nops)[11] =
1032       STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1033 
1034   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1035 
1036   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1037   // length.
1038   do {
1039     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1040     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1041     for (uint8_t i = 0; i < Prefixes; i++)
1042       OS << '\x66';
1043     const uint8_t Rest = ThisNopLength - Prefixes;
1044     if (Rest != 0)
1045       OS.write(Nops[Rest - 1], Rest);
1046     Count -= ThisNopLength;
1047   } while (Count != 0);
1048 
1049   return true;
1050 }
1051 
1052 /* *** */
1053 
1054 namespace {
1055 
1056 class ELFX86AsmBackend : public X86AsmBackend {
1057 public:
1058   uint8_t OSABI;
1059   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1060       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1061 };
1062 
1063 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1064 public:
1065   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1066                       const MCSubtargetInfo &STI)
1067     : ELFX86AsmBackend(T, OSABI, STI) {}
1068 
1069   std::unique_ptr<MCObjectTargetWriter>
1070   createObjectTargetWriter() const override {
1071     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1072   }
1073 };
1074 
1075 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1076 public:
1077   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1078                        const MCSubtargetInfo &STI)
1079       : ELFX86AsmBackend(T, OSABI, STI) {}
1080 
1081   std::unique_ptr<MCObjectTargetWriter>
1082   createObjectTargetWriter() const override {
1083     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1084                                     ELF::EM_X86_64);
1085   }
1086 };
1087 
1088 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1089 public:
1090   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1091                          const MCSubtargetInfo &STI)
1092       : ELFX86AsmBackend(T, OSABI, STI) {}
1093 
1094   std::unique_ptr<MCObjectTargetWriter>
1095   createObjectTargetWriter() const override {
1096     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1097                                     ELF::EM_IAMCU);
1098   }
1099 };
1100 
1101 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1102 public:
1103   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1104                       const MCSubtargetInfo &STI)
1105     : ELFX86AsmBackend(T, OSABI, STI) {}
1106 
1107   std::unique_ptr<MCObjectTargetWriter>
1108   createObjectTargetWriter() const override {
1109     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1110   }
1111 };
1112 
1113 class WindowsX86AsmBackend : public X86AsmBackend {
1114   bool Is64Bit;
1115 
1116 public:
1117   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1118                        const MCSubtargetInfo &STI)
1119     : X86AsmBackend(T, STI)
1120     , Is64Bit(is64Bit) {
1121   }
1122 
1123   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1124     return StringSwitch<std::optional<MCFixupKind>>(Name)
1125         .Case("dir32", FK_Data_4)
1126         .Case("secrel32", FK_SecRel_4)
1127         .Case("secidx", FK_SecRel_2)
1128         .Default(MCAsmBackend::getFixupKind(Name));
1129   }
1130 
1131   std::unique_ptr<MCObjectTargetWriter>
1132   createObjectTargetWriter() const override {
1133     return createX86WinCOFFObjectWriter(Is64Bit);
1134   }
1135 };
1136 
1137 namespace CU {
1138 
1139   /// Compact unwind encoding values.
1140   enum CompactUnwindEncodings {
1141     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1142     /// the return address, then [RE]SP is moved to [RE]BP.
1143     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1144 
1145     /// A frameless function with a small constant stack size.
1146     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1147 
1148     /// A frameless function with a large constant stack size.
1149     UNWIND_MODE_STACK_IND                  = 0x03000000,
1150 
1151     /// No compact unwind encoding is available.
1152     UNWIND_MODE_DWARF                      = 0x04000000,
1153 
1154     /// Mask for encoding the frame registers.
1155     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1156 
1157     /// Mask for encoding the frameless registers.
1158     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1159   };
1160 
1161 } // namespace CU
1162 
1163 class DarwinX86AsmBackend : public X86AsmBackend {
1164   const MCRegisterInfo &MRI;
1165 
1166   /// Number of registers that can be saved in a compact unwind encoding.
1167   enum { CU_NUM_SAVED_REGS = 6 };
1168 
1169   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1170   Triple TT;
1171   bool Is64Bit;
1172 
1173   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1174   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1175   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1176 protected:
1177   /// Size of a "push" instruction for the given register.
1178   unsigned PushInstrSize(unsigned Reg) const {
1179     switch (Reg) {
1180       case X86::EBX:
1181       case X86::ECX:
1182       case X86::EDX:
1183       case X86::EDI:
1184       case X86::ESI:
1185       case X86::EBP:
1186       case X86::RBX:
1187       case X86::RBP:
1188         return 1;
1189       case X86::R12:
1190       case X86::R13:
1191       case X86::R14:
1192       case X86::R15:
1193         return 2;
1194     }
1195     return 1;
1196   }
1197 
1198 private:
1199   /// Get the compact unwind number for a given register. The number
1200   /// corresponds to the enum lists in compact_unwind_encoding.h.
1201   int getCompactUnwindRegNum(unsigned Reg) const {
1202     static const MCPhysReg CU32BitRegs[7] = {
1203       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1204     };
1205     static const MCPhysReg CU64BitRegs[] = {
1206       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1207     };
1208     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1209     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1210       if (*CURegs == Reg)
1211         return Idx;
1212 
1213     return -1;
1214   }
1215 
1216   /// Return the registers encoded for a compact encoding with a frame
1217   /// pointer.
1218   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1219     // Encode the registers in the order they were saved --- 3-bits per
1220     // register. The list of saved registers is assumed to be in reverse
1221     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1222     uint32_t RegEnc = 0;
1223     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1224       unsigned Reg = SavedRegs[i];
1225       if (Reg == 0) break;
1226 
1227       int CURegNum = getCompactUnwindRegNum(Reg);
1228       if (CURegNum == -1) return ~0U;
1229 
1230       // Encode the 3-bit register number in order, skipping over 3-bits for
1231       // each register.
1232       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1233     }
1234 
1235     assert((RegEnc & 0x3FFFF) == RegEnc &&
1236            "Invalid compact register encoding!");
1237     return RegEnc;
1238   }
1239 
1240   /// Create the permutation encoding used with frameless stacks. It is
1241   /// passed the number of registers to be saved and an array of the registers
1242   /// saved.
1243   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1244     // The saved registers are numbered from 1 to 6. In order to encode the
1245     // order in which they were saved, we re-number them according to their
1246     // place in the register order. The re-numbering is relative to the last
1247     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1248     // that order:
1249     //
1250     //    Orig  Re-Num
1251     //    ----  ------
1252     //     6       6
1253     //     2       2
1254     //     4       3
1255     //     5       3
1256     //
1257     for (unsigned i = 0; i < RegCount; ++i) {
1258       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1259       if (CUReg == -1) return ~0U;
1260       SavedRegs[i] = CUReg;
1261     }
1262 
1263     // Reverse the list.
1264     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1265 
1266     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1267     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1268       unsigned Countless = 0;
1269       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1270         if (SavedRegs[j] < SavedRegs[i])
1271           ++Countless;
1272 
1273       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1274     }
1275 
1276     // Take the renumbered values and encode them into a 10-bit number.
1277     uint32_t permutationEncoding = 0;
1278     switch (RegCount) {
1279     case 6:
1280       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1281                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1282                              +     RenumRegs[4];
1283       break;
1284     case 5:
1285       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1286                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1287                              +     RenumRegs[5];
1288       break;
1289     case 4:
1290       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1291                              + 3 * RenumRegs[4] +      RenumRegs[5];
1292       break;
1293     case 3:
1294       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1295                              +     RenumRegs[5];
1296       break;
1297     case 2:
1298       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1299       break;
1300     case 1:
1301       permutationEncoding |=       RenumRegs[5];
1302       break;
1303     }
1304 
1305     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1306            "Invalid compact register encoding!");
1307     return permutationEncoding;
1308   }
1309 
1310 public:
1311   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1312                       const MCSubtargetInfo &STI)
1313       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1314         Is64Bit(TT.isArch64Bit()) {
1315     memset(SavedRegs, 0, sizeof(SavedRegs));
1316     OffsetSize = Is64Bit ? 8 : 4;
1317     MoveInstrSize = Is64Bit ? 3 : 2;
1318     StackDivide = Is64Bit ? 8 : 4;
1319   }
1320 
1321   std::unique_ptr<MCObjectTargetWriter>
1322   createObjectTargetWriter() const override {
1323     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1324     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1325     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1326   }
1327 
1328   /// Implementation of algorithm to generate the compact unwind encoding
1329   /// for the CFI instructions.
1330   uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1331                                          const MCContext *Ctxt) const override {
1332     ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1333     if (Instrs.empty()) return 0;
1334     if (!isDarwinCanonicalPersonality(FI->Personality) &&
1335         !Ctxt->emitCompactUnwindNonCanonical())
1336       return CU::UNWIND_MODE_DWARF;
1337 
1338     // Reset the saved registers.
1339     unsigned SavedRegIdx = 0;
1340     memset(SavedRegs, 0, sizeof(SavedRegs));
1341 
1342     bool HasFP = false;
1343 
1344     // Encode that we are using EBP/RBP as the frame pointer.
1345     uint32_t CompactUnwindEncoding = 0;
1346 
1347     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1348     unsigned InstrOffset = 0;
1349     unsigned StackAdjust = 0;
1350     unsigned StackSize = 0;
1351     int MinAbsOffset = std::numeric_limits<int>::max();
1352 
1353     for (const MCCFIInstruction &Inst : Instrs) {
1354       switch (Inst.getOperation()) {
1355       default:
1356         // Any other CFI directives indicate a frame that we aren't prepared
1357         // to represent via compact unwind, so just bail out.
1358         return CU::UNWIND_MODE_DWARF;
1359       case MCCFIInstruction::OpDefCfaRegister: {
1360         // Defines a frame pointer. E.g.
1361         //
1362         //     movq %rsp, %rbp
1363         //  L0:
1364         //     .cfi_def_cfa_register %rbp
1365         //
1366         HasFP = true;
1367 
1368         // If the frame pointer is other than esp/rsp, we do not have a way to
1369         // generate a compact unwinding representation, so bail out.
1370         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1371             (Is64Bit ? X86::RBP : X86::EBP))
1372           return CU::UNWIND_MODE_DWARF;
1373 
1374         // Reset the counts.
1375         memset(SavedRegs, 0, sizeof(SavedRegs));
1376         StackAdjust = 0;
1377         SavedRegIdx = 0;
1378         MinAbsOffset = std::numeric_limits<int>::max();
1379         InstrOffset += MoveInstrSize;
1380         break;
1381       }
1382       case MCCFIInstruction::OpDefCfaOffset: {
1383         // Defines a new offset for the CFA. E.g.
1384         //
1385         //  With frame:
1386         //
1387         //     pushq %rbp
1388         //  L0:
1389         //     .cfi_def_cfa_offset 16
1390         //
1391         //  Without frame:
1392         //
1393         //     subq $72, %rsp
1394         //  L0:
1395         //     .cfi_def_cfa_offset 80
1396         //
1397         StackSize = Inst.getOffset() / StackDivide;
1398         break;
1399       }
1400       case MCCFIInstruction::OpOffset: {
1401         // Defines a "push" of a callee-saved register. E.g.
1402         //
1403         //     pushq %r15
1404         //     pushq %r14
1405         //     pushq %rbx
1406         //  L0:
1407         //     subq $120, %rsp
1408         //  L1:
1409         //     .cfi_offset %rbx, -40
1410         //     .cfi_offset %r14, -32
1411         //     .cfi_offset %r15, -24
1412         //
1413         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1414           // If there are too many saved registers, we cannot use a compact
1415           // unwind encoding.
1416           return CU::UNWIND_MODE_DWARF;
1417 
1418         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1419         SavedRegs[SavedRegIdx++] = Reg;
1420         StackAdjust += OffsetSize;
1421         MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1422         InstrOffset += PushInstrSize(Reg);
1423         break;
1424       }
1425       }
1426     }
1427 
1428     StackAdjust /= StackDivide;
1429 
1430     if (HasFP) {
1431       if ((StackAdjust & 0xFF) != StackAdjust)
1432         // Offset was too big for a compact unwind encoding.
1433         return CU::UNWIND_MODE_DWARF;
1434 
1435       // We don't attempt to track a real StackAdjust, so if the saved registers
1436       // aren't adjacent to rbp we can't cope.
1437       if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1438         return CU::UNWIND_MODE_DWARF;
1439 
1440       // Get the encoding of the saved registers when we have a frame pointer.
1441       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1442       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1443 
1444       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1445       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1446       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1447     } else {
1448       SubtractInstrIdx += InstrOffset;
1449       ++StackAdjust;
1450 
1451       if ((StackSize & 0xFF) == StackSize) {
1452         // Frameless stack with a small stack size.
1453         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1454 
1455         // Encode the stack size.
1456         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1457       } else {
1458         if ((StackAdjust & 0x7) != StackAdjust)
1459           // The extra stack adjustments are too big for us to handle.
1460           return CU::UNWIND_MODE_DWARF;
1461 
1462         // Frameless stack with an offset too large for us to encode compactly.
1463         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1464 
1465         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1466         // instruction.
1467         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1468 
1469         // Encode any extra stack adjustments (done via push instructions).
1470         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1471       }
1472 
1473       // Encode the number of registers saved. (Reverse the list first.)
1474       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1475       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1476 
1477       // Get the encoding of the saved registers when we don't have a frame
1478       // pointer.
1479       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1480       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1481 
1482       // Encode the register encoding.
1483       CompactUnwindEncoding |=
1484         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1485     }
1486 
1487     return CompactUnwindEncoding;
1488   }
1489 };
1490 
1491 } // end anonymous namespace
1492 
1493 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1494                                            const MCSubtargetInfo &STI,
1495                                            const MCRegisterInfo &MRI,
1496                                            const MCTargetOptions &Options) {
1497   const Triple &TheTriple = STI.getTargetTriple();
1498   if (TheTriple.isOSBinFormatMachO())
1499     return new DarwinX86AsmBackend(T, MRI, STI);
1500 
1501   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1502     return new WindowsX86AsmBackend(T, false, STI);
1503 
1504   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1505 
1506   if (TheTriple.isOSIAMCU())
1507     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1508 
1509   return new ELFX86_32AsmBackend(T, OSABI, STI);
1510 }
1511 
1512 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1513                                            const MCSubtargetInfo &STI,
1514                                            const MCRegisterInfo &MRI,
1515                                            const MCTargetOptions &Options) {
1516   const Triple &TheTriple = STI.getTargetTriple();
1517   if (TheTriple.isOSBinFormatMachO())
1518     return new DarwinX86AsmBackend(T, MRI, STI);
1519 
1520   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1521     return new WindowsX86AsmBackend(T, true, STI);
1522 
1523   if (TheTriple.isUEFI()) {
1524     assert(TheTriple.isOSBinFormatCOFF() &&
1525          "Only COFF format is supported in UEFI environment.");
1526     return new WindowsX86AsmBackend(T, true, STI);
1527   }
1528 
1529   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1530 
1531   if (TheTriple.isX32())
1532     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1533   return new ELFX86_64AsmBackend(T, OSABI, STI);
1534 }
1535