xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86EncodingOptimization.h"
11 #include "MCTargetDesc/X86FixupKinds.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAssembler.h"
17 #include "llvm/MC/MCCodeEmitter.h"
18 #include "llvm/MC/MCContext.h"
19 #include "llvm/MC/MCDwarf.h"
20 #include "llvm/MC/MCELFObjectWriter.h"
21 #include "llvm/MC/MCELFStreamer.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 namespace {
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind {
43 private:
44   uint8_t AlignBranchKind = 0;
45 
46 public:
47   void operator=(const std::string &Val) {
48     if (Val.empty())
49       return;
50     SmallVector<StringRef, 6> BranchTypes;
51     StringRef(Val).split(BranchTypes, '+', -1, false);
52     for (auto BranchType : BranchTypes) {
53       if (BranchType == "fused")
54         addKind(X86::AlignBranchFused);
55       else if (BranchType == "jcc")
56         addKind(X86::AlignBranchJcc);
57       else if (BranchType == "jmp")
58         addKind(X86::AlignBranchJmp);
59       else if (BranchType == "call")
60         addKind(X86::AlignBranchCall);
61       else if (BranchType == "ret")
62         addKind(X86::AlignBranchRet);
63       else if (BranchType == "indirect")
64         addKind(X86::AlignBranchIndirect);
65       else {
66         errs() << "invalid argument " << BranchType.str()
67                << " to -x86-align-branch=; each element must be one of: fused, "
68                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
69       }
70     }
71   }
72 
73   operator uint8_t() const { return AlignBranchKind; }
74   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75 };
76 
77 X86AlignBranchKind X86AlignBranchKindLoc;
78 
79 cl::opt<unsigned> X86AlignBranchBoundary(
80     "x86-align-branch-boundary", cl::init(0),
81     cl::desc(
82         "Control how the assembler should align branches with NOP. If the "
83         "boundary's size is not 0, it should be a power of 2 and no less "
84         "than 32. Branches will be aligned to prevent from being across or "
85         "against the boundary of specified size. The default value 0 does not "
86         "align branches."));
87 
88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89     "x86-align-branch",
90     cl::desc(
91         "Specify types of branches to align (plus separated list of types):"
92              "\njcc      indicates conditional jumps"
93              "\nfused    indicates fused conditional jumps"
94              "\njmp      indicates direct unconditional jumps"
95              "\ncall     indicates direct and indirect calls"
96              "\nret      indicates rets"
97              "\nindirect indicates indirect unconditional jumps"),
98     cl::location(X86AlignBranchKindLoc));
99 
100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101     "x86-branches-within-32B-boundaries", cl::init(false),
102     cl::desc(
103         "Align selected instructions to mitigate negative performance impact "
104         "of Intel's micro code update for errata skx102.  May break "
105         "assumptions about labels corresponding to particular instructions, "
106         "and should be used with caution."));
107 
108 cl::opt<unsigned> X86PadMaxPrefixSize(
109     "x86-pad-max-prefix-size", cl::init(0),
110     cl::desc("Maximum number of prefixes to use for padding"));
111 
112 cl::opt<bool> X86PadForAlign(
113     "x86-pad-for-align", cl::init(false), cl::Hidden,
114     cl::desc("Pad previous instructions to implement align directives"));
115 
116 cl::opt<bool> X86PadForBranchAlign(
117     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118     cl::desc("Pad previous instructions to implement branch alignment"));
119 
120 class X86AsmBackend : public MCAsmBackend {
121   const MCSubtargetInfo &STI;
122   std::unique_ptr<const MCInstrInfo> MCII;
123   X86AlignBranchKind AlignBranchType;
124   Align AlignBoundary;
125   unsigned TargetPrefixMax = 0;
126 
127   MCInst PrevInst;
128   unsigned PrevInstOpcode = 0;
129   MCBoundaryAlignFragment *PendingBA = nullptr;
130   std::pair<MCFragment *, size_t> PrevInstPosition;
131   bool IsRightAfterData = false;
132 
133   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
134   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
135   bool needAlign(const MCInst &Inst) const;
136   bool canPadBranches(MCObjectStreamer &OS) const;
137   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
138 
139 public:
140   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
141       : MCAsmBackend(llvm::endianness::little), STI(STI),
142         MCII(T.createMCInstrInfo()) {
143     if (X86AlignBranchWithin32BBoundaries) {
144       // At the moment, this defaults to aligning fused branches, unconditional
145       // jumps, and (unfused) conditional jumps with nops.  Both the
146       // instructions aligned and the alignment method (nop vs prefix) may
147       // change in the future.
148       AlignBoundary = assumeAligned(32);
149       AlignBranchType.addKind(X86::AlignBranchFused);
150       AlignBranchType.addKind(X86::AlignBranchJcc);
151       AlignBranchType.addKind(X86::AlignBranchJmp);
152     }
153     // Allow overriding defaults set by main flag
154     if (X86AlignBranchBoundary.getNumOccurrences())
155       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
156     if (X86AlignBranch.getNumOccurrences())
157       AlignBranchType = X86AlignBranchKindLoc;
158     if (X86PadMaxPrefixSize.getNumOccurrences())
159       TargetPrefixMax = X86PadMaxPrefixSize;
160   }
161 
162   bool allowAutoPadding() const override;
163   bool allowEnhancedRelaxation() const override;
164   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
165                             const MCSubtargetInfo &STI);
166   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst);
167 
168   unsigned getNumFixupKinds() const override {
169     return X86::NumTargetFixupKinds;
170   }
171 
172   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
173 
174   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
175 
176   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
177                              const MCValue &Target,
178                              const MCSubtargetInfo *STI) override;
179 
180   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
181                   const MCValue &Target, MutableArrayRef<char> Data,
182                   uint64_t Value, bool IsResolved,
183                   const MCSubtargetInfo *STI) const override;
184 
185   bool mayNeedRelaxation(const MCInst &Inst,
186                          const MCSubtargetInfo &STI) const override;
187 
188   bool fixupNeedsRelaxation(const MCFixup &Fixup,
189                             uint64_t Value) const override;
190 
191   void relaxInstruction(MCInst &Inst,
192                         const MCSubtargetInfo &STI) const override;
193 
194   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
195                                    MCCodeEmitter &Emitter,
196                                    unsigned &RemainingSize) const;
197 
198   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
199                                unsigned &RemainingSize) const;
200 
201   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
202                               unsigned &RemainingSize) const;
203 
204   void finishLayout(const MCAssembler &Asm) const override;
205 
206   unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
207 
208   bool writeNopData(raw_ostream &OS, uint64_t Count,
209                     const MCSubtargetInfo *STI) const override;
210 };
211 } // end anonymous namespace
212 
213 static bool isRelaxableBranch(unsigned Opcode) {
214   return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
215 }
216 
217 static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
218                                        bool Is16BitMode = false) {
219   switch (Opcode) {
220   default:
221     llvm_unreachable("invalid opcode for branch");
222   case X86::JCC_1:
223     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
224   case X86::JMP_1:
225     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
226   }
227 }
228 
229 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
230   unsigned Opcode = MI.getOpcode();
231   return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
232                                    : X86::getOpcodeForLongImmediateForm(Opcode);
233 }
234 
235 static X86::CondCode getCondFromBranch(const MCInst &MI,
236                                        const MCInstrInfo &MCII) {
237   unsigned Opcode = MI.getOpcode();
238   switch (Opcode) {
239   default:
240     return X86::COND_INVALID;
241   case X86::JCC_1: {
242     const MCInstrDesc &Desc = MCII.get(Opcode);
243     return static_cast<X86::CondCode>(
244         MI.getOperand(Desc.getNumOperands() - 1).getImm());
245   }
246   }
247 }
248 
249 static X86::SecondMacroFusionInstKind
250 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
251   X86::CondCode CC = getCondFromBranch(MI, MCII);
252   return classifySecondCondCodeInMacroFusion(CC);
253 }
254 
255 /// Check if the instruction uses RIP relative addressing.
256 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
257   unsigned Opcode = MI.getOpcode();
258   const MCInstrDesc &Desc = MCII.get(Opcode);
259   uint64_t TSFlags = Desc.TSFlags;
260   unsigned CurOp = X86II::getOperandBias(Desc);
261   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
262   if (MemoryOperand < 0)
263     return false;
264   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
265   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
266   return (BaseReg == X86::RIP);
267 }
268 
269 /// Check if the instruction is a prefix.
270 static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {
271   return X86II::isPrefix(MCII.get(Opcode).TSFlags);
272 }
273 
274 /// Check if the instruction is valid as the first instruction in macro fusion.
275 static bool isFirstMacroFusibleInst(const MCInst &Inst,
276                                     const MCInstrInfo &MCII) {
277   // An Intel instruction with RIP relative addressing is not macro fusible.
278   if (isRIPRelative(Inst, MCII))
279     return false;
280   X86::FirstMacroFusionInstKind FIK =
281       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
282   return FIK != X86::FirstMacroFusionInstKind::Invalid;
283 }
284 
285 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
286 /// get a better peformance in some cases. Here, we determine which prefix is
287 /// the most suitable.
288 ///
289 /// If the instruction has a segment override prefix, use the existing one.
290 /// If the target is 64-bit, use the CS.
291 /// If the target is 32-bit,
292 ///   - If the instruction has a ESP/EBP base register, use SS.
293 ///   - Otherwise use DS.
294 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
295   assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
296          "Prefixes can be added only in 32-bit or 64-bit mode.");
297   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
298   uint64_t TSFlags = Desc.TSFlags;
299 
300   // Determine where the memory operand starts, if present.
301   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
302   if (MemoryOperand != -1)
303     MemoryOperand += X86II::getOperandBias(Desc);
304 
305   unsigned SegmentReg = 0;
306   if (MemoryOperand >= 0) {
307     // Check for explicit segment override on memory operand.
308     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
309   }
310 
311   switch (TSFlags & X86II::FormMask) {
312   default:
313     break;
314   case X86II::RawFrmDstSrc: {
315     // Check segment override opcode prefix as needed (not for %ds).
316     if (Inst.getOperand(2).getReg() != X86::DS)
317       SegmentReg = Inst.getOperand(2).getReg();
318     break;
319   }
320   case X86II::RawFrmSrc: {
321     // Check segment override opcode prefix as needed (not for %ds).
322     if (Inst.getOperand(1).getReg() != X86::DS)
323       SegmentReg = Inst.getOperand(1).getReg();
324     break;
325   }
326   case X86II::RawFrmMemOffs: {
327     // Check segment override opcode prefix as needed.
328     SegmentReg = Inst.getOperand(1).getReg();
329     break;
330   }
331   }
332 
333   if (SegmentReg != 0)
334     return X86::getSegmentOverridePrefixForReg(SegmentReg);
335 
336   if (STI.hasFeature(X86::Is64Bit))
337     return X86::CS_Encoding;
338 
339   if (MemoryOperand >= 0) {
340     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
341     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
342     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
343       return X86::SS_Encoding;
344   }
345   return X86::DS_Encoding;
346 }
347 
348 /// Check if the two instructions will be macro-fused on the target cpu.
349 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
350   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
351   if (!InstDesc.isConditionalBranch())
352     return false;
353   if (!isFirstMacroFusibleInst(Cmp, *MCII))
354     return false;
355   const X86::FirstMacroFusionInstKind CmpKind =
356       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
357   const X86::SecondMacroFusionInstKind BranchKind =
358       classifySecondInstInMacroFusion(Jcc, *MCII);
359   return X86::isMacroFused(CmpKind, BranchKind);
360 }
361 
362 /// Check if the instruction has a variant symbol operand.
363 static bool hasVariantSymbol(const MCInst &MI) {
364   for (auto &Operand : MI) {
365     if (!Operand.isExpr())
366       continue;
367     const MCExpr &Expr = *Operand.getExpr();
368     if (Expr.getKind() == MCExpr::SymbolRef &&
369         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
370       return true;
371   }
372   return false;
373 }
374 
375 bool X86AsmBackend::allowAutoPadding() const {
376   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
377 }
378 
379 bool X86AsmBackend::allowEnhancedRelaxation() const {
380   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
381 }
382 
383 /// X86 has certain instructions which enable interrupts exactly one
384 /// instruction *after* the instruction which stores to SS.  Return true if the
385 /// given instruction may have such an interrupt delay slot.
386 static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
387   switch (InstOpcode) {
388   case X86::POPSS16:
389   case X86::POPSS32:
390   case X86::STI:
391     return true;
392 
393   case X86::MOV16sr:
394   case X86::MOV32sr:
395   case X86::MOV64sr:
396   case X86::MOV16sm:
397     // In fact, this is only the case if the first operand is SS. However, as
398     // segment moves occur extremely rarely, this is just a minor pessimization.
399     return true;
400   }
401   return false;
402 }
403 
404 /// Check if the instruction to be emitted is right after any data.
405 static bool
406 isRightAfterData(MCFragment *CurrentFragment,
407                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
408   MCFragment *F = CurrentFragment;
409   // Since data is always emitted into a DataFragment, our check strategy is
410   // simple here.
411   //   - If the fragment is a DataFragment
412   //     - If it's empty (section start or data after align), return false.
413   //     - If it's not the fragment where the previous instruction is,
414   //       returns true.
415   //     - If it's the fragment holding the previous instruction but its
416   //       size changed since the previous instruction was emitted into
417   //       it, returns true.
418   //     - Otherwise returns false.
419   //   - If the fragment is not a DataFragment, returns false.
420   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
421     return DF->getContents().size() &&
422            (DF != PrevInstPosition.first ||
423             DF->getContents().size() != PrevInstPosition.second);
424 
425   return false;
426 }
427 
428 /// \returns the fragment size if it has instructions, otherwise returns 0.
429 static size_t getSizeForInstFragment(const MCFragment *F) {
430   if (!F || !F->hasInstructions())
431     return 0;
432   // MCEncodedFragmentWithContents being templated makes this tricky.
433   switch (F->getKind()) {
434   default:
435     llvm_unreachable("Unknown fragment with instructions!");
436   case MCFragment::FT_Data:
437     return cast<MCDataFragment>(*F).getContents().size();
438   case MCFragment::FT_Relaxable:
439     return cast<MCRelaxableFragment>(*F).getContents().size();
440   case MCFragment::FT_CompactEncodedInst:
441     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
442   }
443 }
444 
445 /// Return true if we can insert NOP or prefixes automatically before the
446 /// the instruction to be emitted.
447 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
448   if (hasVariantSymbol(Inst))
449     // Linker may rewrite the instruction with variant symbol operand(e.g.
450     // TLSCALL).
451     return false;
452 
453   if (mayHaveInterruptDelaySlot(PrevInstOpcode))
454     // If this instruction follows an interrupt enabling instruction with a one
455     // instruction delay, inserting a nop would change behavior.
456     return false;
457 
458   if (isPrefix(PrevInstOpcode, *MCII))
459     // If this instruction follows a prefix, inserting a nop/prefix would change
460     // semantic.
461     return false;
462 
463   if (isPrefix(Inst.getOpcode(), *MCII))
464     // If this instruction is a prefix, inserting a prefix would change
465     // semantic.
466     return false;
467 
468   if (IsRightAfterData)
469     // If this instruction follows any data, there is no clear
470     // instruction boundary, inserting a nop/prefix would change semantic.
471     return false;
472 
473   return true;
474 }
475 
476 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
477   if (!OS.getAllowAutoPadding())
478     return false;
479   assert(allowAutoPadding() && "incorrect initialization!");
480 
481   // We only pad in text section.
482   if (!OS.getCurrentSectionOnly()->isText())
483     return false;
484 
485   // To be Done: Currently don't deal with Bundle cases.
486   if (OS.getAssembler().isBundlingEnabled())
487     return false;
488 
489   // Branches only need to be aligned in 32-bit or 64-bit mode.
490   if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
491     return false;
492 
493   return true;
494 }
495 
496 /// Check if the instruction operand needs to be aligned.
497 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
498   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
499   return (Desc.isConditionalBranch() &&
500           (AlignBranchType & X86::AlignBranchJcc)) ||
501          (Desc.isUnconditionalBranch() &&
502           (AlignBranchType & X86::AlignBranchJmp)) ||
503          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
504          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
505          (Desc.isIndirectBranch() &&
506           (AlignBranchType & X86::AlignBranchIndirect));
507 }
508 
509 /// Insert BoundaryAlignFragment before instructions to align branches.
510 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
511                                          const MCInst &Inst, const MCSubtargetInfo &STI) {
512   // Used by canPadInst. Done here, because in emitInstructionEnd, the current
513   // fragment will have changed.
514   IsRightAfterData =
515       isRightAfterData(OS.getCurrentFragment(), PrevInstPosition);
516 
517   if (!canPadBranches(OS))
518     return;
519 
520   // NB: PrevInst only valid if canPadBranches is true.
521   if (!isMacroFused(PrevInst, Inst))
522     // Macro fusion doesn't happen indeed, clear the pending.
523     PendingBA = nullptr;
524 
525   // When branch padding is enabled (basically the skx102 erratum => unlikely),
526   // we call canPadInst (not cheap) twice. However, in the common case, we can
527   // avoid unnecessary calls to that, as this is otherwise only used for
528   // relaxable fragments.
529   if (!canPadInst(Inst, OS))
530     return;
531 
532   if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) {
533     // Macro fusion actually happens and there is no other fragment inserted
534     // after the previous instruction.
535     //
536     // Do nothing here since we already inserted a BoudaryAlign fragment when
537     // we met the first instruction in the fused pair and we'll tie them
538     // together in emitInstructionEnd.
539     //
540     // Note: When there is at least one fragment, such as MCAlignFragment,
541     // inserted after the previous instruction, e.g.
542     //
543     // \code
544     //   cmp %rax %rcx
545     //   .align 16
546     //   je .Label0
547     // \ endcode
548     //
549     // We will treat the JCC as a unfused branch although it may be fused
550     // with the CMP.
551     return;
552   }
553 
554   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
555                           isFirstMacroFusibleInst(Inst, *MCII))) {
556     // If we meet a unfused branch or the first instuction in a fusiable pair,
557     // insert a BoundaryAlign fragment.
558     PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(
559         AlignBoundary, STI);
560     OS.insert(PendingBA);
561   }
562 }
563 
564 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
565 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
566                                        const MCInst &Inst) {
567   MCFragment *CF = OS.getCurrentFragment();
568   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
569     F->setAllowAutoPadding(canPadInst(Inst, OS));
570 
571   // Update PrevInstOpcode here, canPadInst() reads that.
572   PrevInstOpcode = Inst.getOpcode();
573   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
574 
575   if (!canPadBranches(OS))
576     return;
577 
578   // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
579   PrevInst = Inst;
580 
581   if (!needAlign(Inst) || !PendingBA)
582     return;
583 
584   // Tie the aligned instructions into a pending BoundaryAlign.
585   PendingBA->setLastFragment(CF);
586   PendingBA = nullptr;
587 
588   // We need to ensure that further data isn't added to the current
589   // DataFragment, so that we can get the size of instructions later in
590   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
591   // DataFragment.
592   if (isa_and_nonnull<MCDataFragment>(CF))
593     OS.insert(OS.getContext().allocFragment<MCDataFragment>());
594 
595   // Update the maximum alignment on the current section if necessary.
596   MCSection *Sec = OS.getCurrentSectionOnly();
597   Sec->ensureMinAlignment(AlignBoundary);
598 }
599 
600 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
601   if (STI.getTargetTriple().isOSBinFormatELF()) {
602     unsigned Type;
603     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
604       Type = llvm::StringSwitch<unsigned>(Name)
605 #define ELF_RELOC(X, Y) .Case(#X, Y)
606 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
607 #undef ELF_RELOC
608                  .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
609                  .Case("BFD_RELOC_8", ELF::R_X86_64_8)
610                  .Case("BFD_RELOC_16", ELF::R_X86_64_16)
611                  .Case("BFD_RELOC_32", ELF::R_X86_64_32)
612                  .Case("BFD_RELOC_64", ELF::R_X86_64_64)
613                  .Default(-1u);
614     } else {
615       Type = llvm::StringSwitch<unsigned>(Name)
616 #define ELF_RELOC(X, Y) .Case(#X, Y)
617 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
618 #undef ELF_RELOC
619                  .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
620                  .Case("BFD_RELOC_8", ELF::R_386_8)
621                  .Case("BFD_RELOC_16", ELF::R_386_16)
622                  .Case("BFD_RELOC_32", ELF::R_386_32)
623                  .Default(-1u);
624     }
625     if (Type == -1u)
626       return std::nullopt;
627     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
628   }
629   return MCAsmBackend::getFixupKind(Name);
630 }
631 
632 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
633   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
634       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
635       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
636       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
637       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
638       {"reloc_signed_4byte", 0, 32, 0},
639       {"reloc_signed_4byte_relax", 0, 32, 0},
640       {"reloc_global_offset_table", 0, 32, 0},
641       {"reloc_global_offset_table8", 0, 64, 0},
642       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
643   };
644 
645   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
646   // do not require any extra processing.
647   if (Kind >= FirstLiteralRelocationKind)
648     return MCAsmBackend::getFixupKindInfo(FK_NONE);
649 
650   if (Kind < FirstTargetFixupKind)
651     return MCAsmBackend::getFixupKindInfo(Kind);
652 
653   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
654          "Invalid kind!");
655   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
656   return Infos[Kind - FirstTargetFixupKind];
657 }
658 
659 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
660                                           const MCFixup &Fixup, const MCValue &,
661                                           const MCSubtargetInfo *STI) {
662   return Fixup.getKind() >= FirstLiteralRelocationKind;
663 }
664 
665 static unsigned getFixupKindSize(unsigned Kind) {
666   switch (Kind) {
667   default:
668     llvm_unreachable("invalid fixup kind!");
669   case FK_NONE:
670     return 0;
671   case FK_PCRel_1:
672   case FK_SecRel_1:
673   case FK_Data_1:
674     return 1;
675   case FK_PCRel_2:
676   case FK_SecRel_2:
677   case FK_Data_2:
678     return 2;
679   case FK_PCRel_4:
680   case X86::reloc_riprel_4byte:
681   case X86::reloc_riprel_4byte_relax:
682   case X86::reloc_riprel_4byte_relax_rex:
683   case X86::reloc_riprel_4byte_movq_load:
684   case X86::reloc_signed_4byte:
685   case X86::reloc_signed_4byte_relax:
686   case X86::reloc_global_offset_table:
687   case X86::reloc_branch_4byte_pcrel:
688   case FK_SecRel_4:
689   case FK_Data_4:
690     return 4;
691   case FK_PCRel_8:
692   case FK_SecRel_8:
693   case FK_Data_8:
694   case X86::reloc_global_offset_table8:
695     return 8;
696   }
697 }
698 
699 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
700                                const MCValue &Target,
701                                MutableArrayRef<char> Data,
702                                uint64_t Value, bool IsResolved,
703                                const MCSubtargetInfo *STI) const {
704   unsigned Kind = Fixup.getKind();
705   if (Kind >= FirstLiteralRelocationKind)
706     return;
707   unsigned Size = getFixupKindSize(Kind);
708 
709   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
710 
711   int64_t SignedValue = static_cast<int64_t>(Value);
712   if ((Target.isAbsolute() || IsResolved) &&
713       getFixupKindInfo(Fixup.getKind()).Flags &
714       MCFixupKindInfo::FKF_IsPCRel) {
715     // check that PC relative fixup fits into the fixup size.
716     if (Size > 0 && !isIntN(Size * 8, SignedValue))
717       Asm.getContext().reportError(
718                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
719                                    " is too large for field of " + Twine(Size) +
720                                    ((Size == 1) ? " byte." : " bytes."));
721   } else {
722     // Check that uppper bits are either all zeros or all ones.
723     // Specifically ignore overflow/underflow as long as the leakage is
724     // limited to the lower bits. This is to remain compatible with
725     // other assemblers.
726     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
727            "Value does not fit in the Fixup field");
728   }
729 
730   for (unsigned i = 0; i != Size; ++i)
731     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
732 }
733 
734 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
735                                       const MCSubtargetInfo &STI) const {
736   unsigned Opcode = MI.getOpcode();
737   unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0;
738   return isRelaxableBranch(Opcode) ||
739          (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
740           MI.getOperand(MI.getNumOperands() - 1 - SkipOperands).isExpr());
741 }
742 
743 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
744                                          uint64_t Value) const {
745   // Relax if the value is too big for a (signed) i8.
746   return !isInt<8>(Value);
747 }
748 
749 // FIXME: Can tblgen help at all here to verify there aren't other instructions
750 // we can relax?
751 void X86AsmBackend::relaxInstruction(MCInst &Inst,
752                                      const MCSubtargetInfo &STI) const {
753   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
754   bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
755   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
756 
757   if (RelaxedOp == Inst.getOpcode()) {
758     SmallString<256> Tmp;
759     raw_svector_ostream OS(Tmp);
760     Inst.dump_pretty(OS);
761     OS << "\n";
762     report_fatal_error("unexpected instruction to relax: " + OS.str());
763   }
764 
765   Inst.setOpcode(RelaxedOp);
766 }
767 
768 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
769                                             MCCodeEmitter &Emitter,
770                                             unsigned &RemainingSize) const {
771   if (!RF.getAllowAutoPadding())
772     return false;
773   // If the instruction isn't fully relaxed, shifting it around might require a
774   // larger value for one of the fixups then can be encoded.  The outer loop
775   // will also catch this before moving to the next instruction, but we need to
776   // prevent padding this single instruction as well.
777   if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
778     return false;
779 
780   const unsigned OldSize = RF.getContents().size();
781   if (OldSize == 15)
782     return false;
783 
784   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
785   const unsigned RemainingPrefixSize = [&]() -> unsigned {
786     SmallString<15> Code;
787     X86_MC::emitPrefix(Emitter, RF.getInst(), Code, STI);
788     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
789 
790     // TODO: It turns out we need a decent amount of plumbing for the target
791     // specific bits to determine number of prefixes its safe to add.  Various
792     // targets (older chips mostly, but also Atom family) encounter decoder
793     // stalls with too many prefixes.  For testing purposes, we set the value
794     // externally for the moment.
795     unsigned ExistingPrefixSize = Code.size();
796     if (TargetPrefixMax <= ExistingPrefixSize)
797       return 0;
798     return TargetPrefixMax - ExistingPrefixSize;
799   }();
800   const unsigned PrefixBytesToAdd =
801       std::min(MaxPossiblePad, RemainingPrefixSize);
802   if (PrefixBytesToAdd == 0)
803     return false;
804 
805   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
806 
807   SmallString<256> Code;
808   Code.append(PrefixBytesToAdd, Prefix);
809   Code.append(RF.getContents().begin(), RF.getContents().end());
810   RF.getContents() = Code;
811 
812   // Adjust the fixups for the change in offsets
813   for (auto &F : RF.getFixups()) {
814     F.setOffset(F.getOffset() + PrefixBytesToAdd);
815   }
816 
817   RemainingSize -= PrefixBytesToAdd;
818   return true;
819 }
820 
821 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
822                                                 MCCodeEmitter &Emitter,
823                                                 unsigned &RemainingSize) const {
824   if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
825     // TODO: There are lots of other tricks we could apply for increasing
826     // encoding size without impacting performance.
827     return false;
828 
829   MCInst Relaxed = RF.getInst();
830   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
831 
832   SmallVector<MCFixup, 4> Fixups;
833   SmallString<15> Code;
834   Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
835   const unsigned OldSize = RF.getContents().size();
836   const unsigned NewSize = Code.size();
837   assert(NewSize >= OldSize && "size decrease during relaxation?");
838   unsigned Delta = NewSize - OldSize;
839   if (Delta > RemainingSize)
840     return false;
841   RF.setInst(Relaxed);
842   RF.getContents() = Code;
843   RF.getFixups() = Fixups;
844   RemainingSize -= Delta;
845   return true;
846 }
847 
848 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
849                                            MCCodeEmitter &Emitter,
850                                            unsigned &RemainingSize) const {
851   bool Changed = false;
852   if (RemainingSize != 0)
853     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
854   if (RemainingSize != 0)
855     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
856   return Changed;
857 }
858 
859 void X86AsmBackend::finishLayout(MCAssembler const &Asm) const {
860   // See if we can further relax some instructions to cut down on the number of
861   // nop bytes required for code alignment.  The actual win is in reducing
862   // instruction count, not number of bytes.  Modern X86-64 can easily end up
863   // decode limited.  It is often better to reduce the number of instructions
864   // (i.e. eliminate nops) even at the cost of increasing the size and
865   // complexity of others.
866   if (!X86PadForAlign && !X86PadForBranchAlign)
867     return;
868 
869   // The processed regions are delimitered by LabeledFragments. -g may have more
870   // MCSymbols and therefore different relaxation results. X86PadForAlign is
871   // disabled by default to eliminate the -g vs non -g difference.
872   DenseSet<MCFragment *> LabeledFragments;
873   for (const MCSymbol &S : Asm.symbols())
874     LabeledFragments.insert(S.getFragment(false));
875 
876   for (MCSection &Sec : Asm) {
877     if (!Sec.isText())
878       continue;
879 
880     SmallVector<MCRelaxableFragment *, 4> Relaxable;
881     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
882       MCFragment &F = *I;
883 
884       if (LabeledFragments.count(&F))
885         Relaxable.clear();
886 
887       if (F.getKind() == MCFragment::FT_Data ||
888           F.getKind() == MCFragment::FT_CompactEncodedInst)
889         // Skip and ignore
890         continue;
891 
892       if (F.getKind() == MCFragment::FT_Relaxable) {
893         auto &RF = cast<MCRelaxableFragment>(*I);
894         Relaxable.push_back(&RF);
895         continue;
896       }
897 
898       auto canHandle = [](MCFragment &F) -> bool {
899         switch (F.getKind()) {
900         default:
901           return false;
902         case MCFragment::FT_Align:
903           return X86PadForAlign;
904         case MCFragment::FT_BoundaryAlign:
905           return X86PadForBranchAlign;
906         }
907       };
908       // For any unhandled kind, assume we can't change layout.
909       if (!canHandle(F)) {
910         Relaxable.clear();
911         continue;
912       }
913 
914 #ifndef NDEBUG
915       const uint64_t OrigOffset = Asm.getFragmentOffset(F);
916 #endif
917       const uint64_t OrigSize = Asm.computeFragmentSize(F);
918 
919       // To keep the effects local, prefer to relax instructions closest to
920       // the align directive.  This is purely about human understandability
921       // of the resulting code.  If we later find a reason to expand
922       // particular instructions over others, we can adjust.
923       unsigned RemainingSize = OrigSize;
924       while (!Relaxable.empty() && RemainingSize != 0) {
925         auto &RF = *Relaxable.pop_back_val();
926         // Give the backend a chance to play any tricks it wishes to increase
927         // the encoding size of the given instruction.  Target independent code
928         // will try further relaxation, but target's may play further tricks.
929         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
930           Sec.setHasLayout(false);
931 
932         // If we have an instruction which hasn't been fully relaxed, we can't
933         // skip past it and insert bytes before it.  Changing its starting
934         // offset might require a larger negative offset than it can encode.
935         // We don't need to worry about larger positive offsets as none of the
936         // possible offsets between this and our align are visible, and the
937         // ones afterwards aren't changing.
938         if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
939           break;
940       }
941       Relaxable.clear();
942 
943       // BoundaryAlign explicitly tracks it's size (unlike align)
944       if (F.getKind() == MCFragment::FT_BoundaryAlign)
945         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
946 
947 #ifndef NDEBUG
948       const uint64_t FinalOffset = Asm.getFragmentOffset(F);
949       const uint64_t FinalSize = Asm.computeFragmentSize(F);
950       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
951              "can't move start of next fragment!");
952       assert(FinalSize == RemainingSize && "inconsistent size computation?");
953 #endif
954 
955       // If we're looking at a boundary align, make sure we don't try to pad
956       // its target instructions for some following directive.  Doing so would
957       // break the alignment of the current boundary align.
958       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
959         const MCFragment *LastFragment = BF->getLastFragment();
960         if (!LastFragment)
961           continue;
962         while (&*I != LastFragment)
963           ++I;
964       }
965     }
966   }
967 
968   // The layout is done. Mark every fragment as valid.
969   for (MCSection &Section : Asm) {
970     Asm.getFragmentOffset(*Section.curFragList()->Tail);
971     Asm.computeFragmentSize(*Section.curFragList()->Tail);
972   }
973 }
974 
975 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
976   if (STI.hasFeature(X86::Is16Bit))
977     return 4;
978   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
979     return 1;
980   if (STI.hasFeature(X86::TuningFast7ByteNOP))
981     return 7;
982   if (STI.hasFeature(X86::TuningFast15ByteNOP))
983     return 15;
984   if (STI.hasFeature(X86::TuningFast11ByteNOP))
985     return 11;
986   // FIXME: handle 32-bit mode
987   // 15-bytes is the longest single NOP instruction, but 10-bytes is
988   // commonly the longest that can be efficiently decoded.
989   return 10;
990 }
991 
992 /// Write a sequence of optimal nops to the output, covering \p Count
993 /// bytes.
994 /// \return - true on success, false on failure
995 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
996                                  const MCSubtargetInfo *STI) const {
997   static const char Nops32Bit[10][11] = {
998       // nop
999       "\x90",
1000       // xchg %ax,%ax
1001       "\x66\x90",
1002       // nopl (%[re]ax)
1003       "\x0f\x1f\x00",
1004       // nopl 0(%[re]ax)
1005       "\x0f\x1f\x40\x00",
1006       // nopl 0(%[re]ax,%[re]ax,1)
1007       "\x0f\x1f\x44\x00\x00",
1008       // nopw 0(%[re]ax,%[re]ax,1)
1009       "\x66\x0f\x1f\x44\x00\x00",
1010       // nopl 0L(%[re]ax)
1011       "\x0f\x1f\x80\x00\x00\x00\x00",
1012       // nopl 0L(%[re]ax,%[re]ax,1)
1013       "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1014       // nopw 0L(%[re]ax,%[re]ax,1)
1015       "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1016       // nopw %cs:0L(%[re]ax,%[re]ax,1)
1017       "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1018   };
1019 
1020   // 16-bit mode uses different nop patterns than 32-bit.
1021   static const char Nops16Bit[4][11] = {
1022       // nop
1023       "\x90",
1024       // xchg %eax,%eax
1025       "\x66\x90",
1026       // lea 0(%si),%si
1027       "\x8d\x74\x00",
1028       // lea 0w(%si),%si
1029       "\x8d\xb4\x00\x00",
1030   };
1031 
1032   const char(*Nops)[11] =
1033       STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1034 
1035   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1036 
1037   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1038   // length.
1039   do {
1040     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1041     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1042     for (uint8_t i = 0; i < Prefixes; i++)
1043       OS << '\x66';
1044     const uint8_t Rest = ThisNopLength - Prefixes;
1045     if (Rest != 0)
1046       OS.write(Nops[Rest - 1], Rest);
1047     Count -= ThisNopLength;
1048   } while (Count != 0);
1049 
1050   return true;
1051 }
1052 
1053 /* *** */
1054 
1055 namespace {
1056 
1057 class ELFX86AsmBackend : public X86AsmBackend {
1058 public:
1059   uint8_t OSABI;
1060   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1061       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1062 };
1063 
1064 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1065 public:
1066   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1067                       const MCSubtargetInfo &STI)
1068     : ELFX86AsmBackend(T, OSABI, STI) {}
1069 
1070   std::unique_ptr<MCObjectTargetWriter>
1071   createObjectTargetWriter() const override {
1072     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1073   }
1074 };
1075 
1076 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1077 public:
1078   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1079                        const MCSubtargetInfo &STI)
1080       : ELFX86AsmBackend(T, OSABI, STI) {}
1081 
1082   std::unique_ptr<MCObjectTargetWriter>
1083   createObjectTargetWriter() const override {
1084     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1085                                     ELF::EM_X86_64);
1086   }
1087 };
1088 
1089 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1090 public:
1091   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1092                          const MCSubtargetInfo &STI)
1093       : ELFX86AsmBackend(T, OSABI, STI) {}
1094 
1095   std::unique_ptr<MCObjectTargetWriter>
1096   createObjectTargetWriter() const override {
1097     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1098                                     ELF::EM_IAMCU);
1099   }
1100 };
1101 
1102 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1103 public:
1104   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1105                       const MCSubtargetInfo &STI)
1106     : ELFX86AsmBackend(T, OSABI, STI) {}
1107 
1108   std::unique_ptr<MCObjectTargetWriter>
1109   createObjectTargetWriter() const override {
1110     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1111   }
1112 };
1113 
1114 class WindowsX86AsmBackend : public X86AsmBackend {
1115   bool Is64Bit;
1116 
1117 public:
1118   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1119                        const MCSubtargetInfo &STI)
1120     : X86AsmBackend(T, STI)
1121     , Is64Bit(is64Bit) {
1122   }
1123 
1124   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1125     return StringSwitch<std::optional<MCFixupKind>>(Name)
1126         .Case("dir32", FK_Data_4)
1127         .Case("secrel32", FK_SecRel_4)
1128         .Case("secidx", FK_SecRel_2)
1129         .Default(MCAsmBackend::getFixupKind(Name));
1130   }
1131 
1132   std::unique_ptr<MCObjectTargetWriter>
1133   createObjectTargetWriter() const override {
1134     return createX86WinCOFFObjectWriter(Is64Bit);
1135   }
1136 };
1137 
1138 namespace CU {
1139 
1140   /// Compact unwind encoding values.
1141   enum CompactUnwindEncodings {
1142     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1143     /// the return address, then [RE]SP is moved to [RE]BP.
1144     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1145 
1146     /// A frameless function with a small constant stack size.
1147     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1148 
1149     /// A frameless function with a large constant stack size.
1150     UNWIND_MODE_STACK_IND                  = 0x03000000,
1151 
1152     /// No compact unwind encoding is available.
1153     UNWIND_MODE_DWARF                      = 0x04000000,
1154 
1155     /// Mask for encoding the frame registers.
1156     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1157 
1158     /// Mask for encoding the frameless registers.
1159     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1160   };
1161 
1162 } // namespace CU
1163 
1164 class DarwinX86AsmBackend : public X86AsmBackend {
1165   const MCRegisterInfo &MRI;
1166 
1167   /// Number of registers that can be saved in a compact unwind encoding.
1168   enum { CU_NUM_SAVED_REGS = 6 };
1169 
1170   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1171   Triple TT;
1172   bool Is64Bit;
1173 
1174   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1175   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1176   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1177 protected:
1178   /// Size of a "push" instruction for the given register.
1179   unsigned PushInstrSize(unsigned Reg) const {
1180     switch (Reg) {
1181       case X86::EBX:
1182       case X86::ECX:
1183       case X86::EDX:
1184       case X86::EDI:
1185       case X86::ESI:
1186       case X86::EBP:
1187       case X86::RBX:
1188       case X86::RBP:
1189         return 1;
1190       case X86::R12:
1191       case X86::R13:
1192       case X86::R14:
1193       case X86::R15:
1194         return 2;
1195     }
1196     return 1;
1197   }
1198 
1199 private:
1200   /// Get the compact unwind number for a given register. The number
1201   /// corresponds to the enum lists in compact_unwind_encoding.h.
1202   int getCompactUnwindRegNum(unsigned Reg) const {
1203     static const MCPhysReg CU32BitRegs[7] = {
1204       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1205     };
1206     static const MCPhysReg CU64BitRegs[] = {
1207       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1208     };
1209     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1210     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1211       if (*CURegs == Reg)
1212         return Idx;
1213 
1214     return -1;
1215   }
1216 
1217   /// Return the registers encoded for a compact encoding with a frame
1218   /// pointer.
1219   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1220     // Encode the registers in the order they were saved --- 3-bits per
1221     // register. The list of saved registers is assumed to be in reverse
1222     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1223     uint32_t RegEnc = 0;
1224     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1225       unsigned Reg = SavedRegs[i];
1226       if (Reg == 0) break;
1227 
1228       int CURegNum = getCompactUnwindRegNum(Reg);
1229       if (CURegNum == -1) return ~0U;
1230 
1231       // Encode the 3-bit register number in order, skipping over 3-bits for
1232       // each register.
1233       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1234     }
1235 
1236     assert((RegEnc & 0x3FFFF) == RegEnc &&
1237            "Invalid compact register encoding!");
1238     return RegEnc;
1239   }
1240 
1241   /// Create the permutation encoding used with frameless stacks. It is
1242   /// passed the number of registers to be saved and an array of the registers
1243   /// saved.
1244   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1245     // The saved registers are numbered from 1 to 6. In order to encode the
1246     // order in which they were saved, we re-number them according to their
1247     // place in the register order. The re-numbering is relative to the last
1248     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1249     // that order:
1250     //
1251     //    Orig  Re-Num
1252     //    ----  ------
1253     //     6       6
1254     //     2       2
1255     //     4       3
1256     //     5       3
1257     //
1258     for (unsigned i = 0; i < RegCount; ++i) {
1259       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1260       if (CUReg == -1) return ~0U;
1261       SavedRegs[i] = CUReg;
1262     }
1263 
1264     // Reverse the list.
1265     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1266 
1267     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1268     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1269       unsigned Countless = 0;
1270       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1271         if (SavedRegs[j] < SavedRegs[i])
1272           ++Countless;
1273 
1274       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1275     }
1276 
1277     // Take the renumbered values and encode them into a 10-bit number.
1278     uint32_t permutationEncoding = 0;
1279     switch (RegCount) {
1280     case 6:
1281       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1282                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1283                              +     RenumRegs[4];
1284       break;
1285     case 5:
1286       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1287                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1288                              +     RenumRegs[5];
1289       break;
1290     case 4:
1291       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1292                              + 3 * RenumRegs[4] +      RenumRegs[5];
1293       break;
1294     case 3:
1295       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1296                              +     RenumRegs[5];
1297       break;
1298     case 2:
1299       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1300       break;
1301     case 1:
1302       permutationEncoding |=       RenumRegs[5];
1303       break;
1304     }
1305 
1306     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1307            "Invalid compact register encoding!");
1308     return permutationEncoding;
1309   }
1310 
1311 public:
1312   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1313                       const MCSubtargetInfo &STI)
1314       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1315         Is64Bit(TT.isArch64Bit()) {
1316     memset(SavedRegs, 0, sizeof(SavedRegs));
1317     OffsetSize = Is64Bit ? 8 : 4;
1318     MoveInstrSize = Is64Bit ? 3 : 2;
1319     StackDivide = Is64Bit ? 8 : 4;
1320   }
1321 
1322   std::unique_ptr<MCObjectTargetWriter>
1323   createObjectTargetWriter() const override {
1324     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1325     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1326     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1327   }
1328 
1329   /// Implementation of algorithm to generate the compact unwind encoding
1330   /// for the CFI instructions.
1331   uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1332                                          const MCContext *Ctxt) const override {
1333     ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1334     if (Instrs.empty()) return 0;
1335     if (!isDarwinCanonicalPersonality(FI->Personality) &&
1336         !Ctxt->emitCompactUnwindNonCanonical())
1337       return CU::UNWIND_MODE_DWARF;
1338 
1339     // Reset the saved registers.
1340     unsigned SavedRegIdx = 0;
1341     memset(SavedRegs, 0, sizeof(SavedRegs));
1342 
1343     bool HasFP = false;
1344 
1345     // Encode that we are using EBP/RBP as the frame pointer.
1346     uint64_t CompactUnwindEncoding = 0;
1347 
1348     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1349     unsigned InstrOffset = 0;
1350     unsigned StackAdjust = 0;
1351     uint64_t StackSize = 0;
1352     int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();
1353 
1354     for (const MCCFIInstruction &Inst : Instrs) {
1355       switch (Inst.getOperation()) {
1356       default:
1357         // Any other CFI directives indicate a frame that we aren't prepared
1358         // to represent via compact unwind, so just bail out.
1359         return CU::UNWIND_MODE_DWARF;
1360       case MCCFIInstruction::OpDefCfaRegister: {
1361         // Defines a frame pointer. E.g.
1362         //
1363         //     movq %rsp, %rbp
1364         //  L0:
1365         //     .cfi_def_cfa_register %rbp
1366         //
1367         HasFP = true;
1368 
1369         // If the frame pointer is other than esp/rsp, we do not have a way to
1370         // generate a compact unwinding representation, so bail out.
1371         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1372             (Is64Bit ? X86::RBP : X86::EBP))
1373           return CU::UNWIND_MODE_DWARF;
1374 
1375         // Reset the counts.
1376         memset(SavedRegs, 0, sizeof(SavedRegs));
1377         StackAdjust = 0;
1378         SavedRegIdx = 0;
1379         MinAbsOffset = std::numeric_limits<int64_t>::max();
1380         InstrOffset += MoveInstrSize;
1381         break;
1382       }
1383       case MCCFIInstruction::OpDefCfaOffset: {
1384         // Defines a new offset for the CFA. E.g.
1385         //
1386         //  With frame:
1387         //
1388         //     pushq %rbp
1389         //  L0:
1390         //     .cfi_def_cfa_offset 16
1391         //
1392         //  Without frame:
1393         //
1394         //     subq $72, %rsp
1395         //  L0:
1396         //     .cfi_def_cfa_offset 80
1397         //
1398         StackSize = Inst.getOffset() / StackDivide;
1399         break;
1400       }
1401       case MCCFIInstruction::OpOffset: {
1402         // Defines a "push" of a callee-saved register. E.g.
1403         //
1404         //     pushq %r15
1405         //     pushq %r14
1406         //     pushq %rbx
1407         //  L0:
1408         //     subq $120, %rsp
1409         //  L1:
1410         //     .cfi_offset %rbx, -40
1411         //     .cfi_offset %r14, -32
1412         //     .cfi_offset %r15, -24
1413         //
1414         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1415           // If there are too many saved registers, we cannot use a compact
1416           // unwind encoding.
1417           return CU::UNWIND_MODE_DWARF;
1418 
1419         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1420         SavedRegs[SavedRegIdx++] = Reg;
1421         StackAdjust += OffsetSize;
1422         MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset()));
1423         InstrOffset += PushInstrSize(Reg);
1424         break;
1425       }
1426       }
1427     }
1428 
1429     StackAdjust /= StackDivide;
1430 
1431     if (HasFP) {
1432       if ((StackAdjust & 0xFF) != StackAdjust)
1433         // Offset was too big for a compact unwind encoding.
1434         return CU::UNWIND_MODE_DWARF;
1435 
1436       // We don't attempt to track a real StackAdjust, so if the saved registers
1437       // aren't adjacent to rbp we can't cope.
1438       if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1439         return CU::UNWIND_MODE_DWARF;
1440 
1441       // Get the encoding of the saved registers when we have a frame pointer.
1442       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1443       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1444 
1445       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1446       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1447       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1448     } else {
1449       SubtractInstrIdx += InstrOffset;
1450       ++StackAdjust;
1451 
1452       if ((StackSize & 0xFF) == StackSize) {
1453         // Frameless stack with a small stack size.
1454         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1455 
1456         // Encode the stack size.
1457         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1458       } else {
1459         if ((StackAdjust & 0x7) != StackAdjust)
1460           // The extra stack adjustments are too big for us to handle.
1461           return CU::UNWIND_MODE_DWARF;
1462 
1463         // Frameless stack with an offset too large for us to encode compactly.
1464         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1465 
1466         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1467         // instruction.
1468         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1469 
1470         // Encode any extra stack adjustments (done via push instructions).
1471         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1472       }
1473 
1474       // Encode the number of registers saved. (Reverse the list first.)
1475       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1476       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1477 
1478       // Get the encoding of the saved registers when we don't have a frame
1479       // pointer.
1480       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1481       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1482 
1483       // Encode the register encoding.
1484       CompactUnwindEncoding |=
1485         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1486     }
1487 
1488     return CompactUnwindEncoding;
1489   }
1490 };
1491 
1492 } // end anonymous namespace
1493 
1494 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1495                                            const MCSubtargetInfo &STI,
1496                                            const MCRegisterInfo &MRI,
1497                                            const MCTargetOptions &Options) {
1498   const Triple &TheTriple = STI.getTargetTriple();
1499   if (TheTriple.isOSBinFormatMachO())
1500     return new DarwinX86AsmBackend(T, MRI, STI);
1501 
1502   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1503     return new WindowsX86AsmBackend(T, false, STI);
1504 
1505   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1506 
1507   if (TheTriple.isOSIAMCU())
1508     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1509 
1510   return new ELFX86_32AsmBackend(T, OSABI, STI);
1511 }
1512 
1513 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1514                                            const MCSubtargetInfo &STI,
1515                                            const MCRegisterInfo &MRI,
1516                                            const MCTargetOptions &Options) {
1517   const Triple &TheTriple = STI.getTargetTriple();
1518   if (TheTriple.isOSBinFormatMachO())
1519     return new DarwinX86AsmBackend(T, MRI, STI);
1520 
1521   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1522     return new WindowsX86AsmBackend(T, true, STI);
1523 
1524   if (TheTriple.isUEFI()) {
1525     assert(TheTriple.isOSBinFormatCOFF() &&
1526          "Only COFF format is supported in UEFI environment.");
1527     return new WindowsX86AsmBackend(T, true, STI);
1528   }
1529 
1530   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1531 
1532   if (TheTriple.isX32())
1533     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1534   return new ELFX86_64AsmBackend(T, OSABI, STI);
1535 }
1536 
1537 namespace {
1538 class X86ELFStreamer : public MCELFStreamer {
1539 public:
1540   X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
1541                  std::unique_ptr<MCObjectWriter> OW,
1542                  std::unique_ptr<MCCodeEmitter> Emitter)
1543       : MCELFStreamer(Context, std::move(TAB), std::move(OW),
1544                       std::move(Emitter)) {}
1545 
1546   void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
1547 };
1548 } // end anonymous namespace
1549 
1550 void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst,
1551                              const MCSubtargetInfo &STI) {
1552   auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend());
1553   Backend.emitInstructionBegin(S, Inst, STI);
1554   S.MCObjectStreamer::emitInstruction(Inst, STI);
1555   Backend.emitInstructionEnd(S, Inst);
1556 }
1557 
1558 void X86ELFStreamer::emitInstruction(const MCInst &Inst,
1559                                      const MCSubtargetInfo &STI) {
1560   X86_MC::emitInstruction(*this, Inst, STI);
1561 }
1562 
1563 MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context,
1564                                        std::unique_ptr<MCAsmBackend> &&MAB,
1565                                        std::unique_ptr<MCObjectWriter> &&MOW,
1566                                        std::unique_ptr<MCCodeEmitter> &&MCE) {
1567   return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW),
1568                             std::move(MCE));
1569 }
1570