xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp (revision a90b9d0159070121c221b966469c3e36d912bf82)
1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86FixupKinds.h"
11 #include "MCTargetDesc/X86EncodingOptimization.h"
12 #include "llvm/ADT/StringSwitch.h"
13 #include "llvm/BinaryFormat/ELF.h"
14 #include "llvm/BinaryFormat/MachO.h"
15 #include "llvm/MC/MCAsmBackend.h"
16 #include "llvm/MC/MCAsmLayout.h"
17 #include "llvm/MC/MCAssembler.h"
18 #include "llvm/MC/MCCodeEmitter.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDwarf.h"
21 #include "llvm/MC/MCELFObjectWriter.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCFixupKindInfo.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCMachObjectWriter.h"
27 #include "llvm/MC/MCObjectStreamer.h"
28 #include "llvm/MC/MCObjectWriter.h"
29 #include "llvm/MC/MCRegisterInfo.h"
30 #include "llvm/MC/MCSectionMachO.h"
31 #include "llvm/MC/MCSubtargetInfo.h"
32 #include "llvm/MC/MCValue.h"
33 #include "llvm/MC/TargetRegistry.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/raw_ostream.h"
37 
38 using namespace llvm;
39 
40 namespace {
41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
42 class X86AlignBranchKind {
43 private:
44   uint8_t AlignBranchKind = 0;
45 
46 public:
47   void operator=(const std::string &Val) {
48     if (Val.empty())
49       return;
50     SmallVector<StringRef, 6> BranchTypes;
51     StringRef(Val).split(BranchTypes, '+', -1, false);
52     for (auto BranchType : BranchTypes) {
53       if (BranchType == "fused")
54         addKind(X86::AlignBranchFused);
55       else if (BranchType == "jcc")
56         addKind(X86::AlignBranchJcc);
57       else if (BranchType == "jmp")
58         addKind(X86::AlignBranchJmp);
59       else if (BranchType == "call")
60         addKind(X86::AlignBranchCall);
61       else if (BranchType == "ret")
62         addKind(X86::AlignBranchRet);
63       else if (BranchType == "indirect")
64         addKind(X86::AlignBranchIndirect);
65       else {
66         errs() << "invalid argument " << BranchType.str()
67                << " to -x86-align-branch=; each element must be one of: fused, "
68                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
69       }
70     }
71   }
72 
73   operator uint8_t() const { return AlignBranchKind; }
74   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
75 };
76 
77 X86AlignBranchKind X86AlignBranchKindLoc;
78 
79 cl::opt<unsigned> X86AlignBranchBoundary(
80     "x86-align-branch-boundary", cl::init(0),
81     cl::desc(
82         "Control how the assembler should align branches with NOP. If the "
83         "boundary's size is not 0, it should be a power of 2 and no less "
84         "than 32. Branches will be aligned to prevent from being across or "
85         "against the boundary of specified size. The default value 0 does not "
86         "align branches."));
87 
88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
89     "x86-align-branch",
90     cl::desc(
91         "Specify types of branches to align (plus separated list of types):"
92              "\njcc      indicates conditional jumps"
93              "\nfused    indicates fused conditional jumps"
94              "\njmp      indicates direct unconditional jumps"
95              "\ncall     indicates direct and indirect calls"
96              "\nret      indicates rets"
97              "\nindirect indicates indirect unconditional jumps"),
98     cl::location(X86AlignBranchKindLoc));
99 
100 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
101     "x86-branches-within-32B-boundaries", cl::init(false),
102     cl::desc(
103         "Align selected instructions to mitigate negative performance impact "
104         "of Intel's micro code update for errata skx102.  May break "
105         "assumptions about labels corresponding to particular instructions, "
106         "and should be used with caution."));
107 
108 cl::opt<unsigned> X86PadMaxPrefixSize(
109     "x86-pad-max-prefix-size", cl::init(0),
110     cl::desc("Maximum number of prefixes to use for padding"));
111 
112 cl::opt<bool> X86PadForAlign(
113     "x86-pad-for-align", cl::init(false), cl::Hidden,
114     cl::desc("Pad previous instructions to implement align directives"));
115 
116 cl::opt<bool> X86PadForBranchAlign(
117     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
118     cl::desc("Pad previous instructions to implement branch alignment"));
119 
120 class X86AsmBackend : public MCAsmBackend {
121   const MCSubtargetInfo &STI;
122   std::unique_ptr<const MCInstrInfo> MCII;
123   X86AlignBranchKind AlignBranchType;
124   Align AlignBoundary;
125   unsigned TargetPrefixMax = 0;
126 
127   MCInst PrevInst;
128   MCBoundaryAlignFragment *PendingBA = nullptr;
129   std::pair<MCFragment *, size_t> PrevInstPosition;
130   bool CanPadInst = false;
131 
132   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
133   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
134   bool needAlign(const MCInst &Inst) const;
135   bool canPadBranches(MCObjectStreamer &OS) const;
136   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
137 
138 public:
139   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
140       : MCAsmBackend(llvm::endianness::little), STI(STI),
141         MCII(T.createMCInstrInfo()) {
142     if (X86AlignBranchWithin32BBoundaries) {
143       // At the moment, this defaults to aligning fused branches, unconditional
144       // jumps, and (unfused) conditional jumps with nops.  Both the
145       // instructions aligned and the alignment method (nop vs prefix) may
146       // change in the future.
147       AlignBoundary = assumeAligned(32);
148       AlignBranchType.addKind(X86::AlignBranchFused);
149       AlignBranchType.addKind(X86::AlignBranchJcc);
150       AlignBranchType.addKind(X86::AlignBranchJmp);
151     }
152     // Allow overriding defaults set by main flag
153     if (X86AlignBranchBoundary.getNumOccurrences())
154       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
155     if (X86AlignBranch.getNumOccurrences())
156       AlignBranchType = X86AlignBranchKindLoc;
157     if (X86PadMaxPrefixSize.getNumOccurrences())
158       TargetPrefixMax = X86PadMaxPrefixSize;
159   }
160 
161   bool allowAutoPadding() const override;
162   bool allowEnhancedRelaxation() const override;
163   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
164                             const MCSubtargetInfo &STI) override;
165   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
166 
167   unsigned getNumFixupKinds() const override {
168     return X86::NumTargetFixupKinds;
169   }
170 
171   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
172 
173   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
174 
175   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
176                              const MCValue &Target,
177                              const MCSubtargetInfo *STI) override;
178 
179   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
180                   const MCValue &Target, MutableArrayRef<char> Data,
181                   uint64_t Value, bool IsResolved,
182                   const MCSubtargetInfo *STI) const override;
183 
184   bool mayNeedRelaxation(const MCInst &Inst,
185                          const MCSubtargetInfo &STI) const override;
186 
187   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
188                             const MCRelaxableFragment *DF,
189                             const MCAsmLayout &Layout) const override;
190 
191   void relaxInstruction(MCInst &Inst,
192                         const MCSubtargetInfo &STI) const override;
193 
194   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
195                                    MCCodeEmitter &Emitter,
196                                    unsigned &RemainingSize) const;
197 
198   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
199                                unsigned &RemainingSize) const;
200 
201   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
202                               unsigned &RemainingSize) const;
203 
204   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
205 
206   unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
207 
208   bool writeNopData(raw_ostream &OS, uint64_t Count,
209                     const MCSubtargetInfo *STI) const override;
210 };
211 } // end anonymous namespace
212 
213 static bool isRelaxableBranch(unsigned Opcode) {
214   return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
215 }
216 
217 static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
218                                        bool Is16BitMode = false) {
219   switch (Opcode) {
220   default:
221     llvm_unreachable("invalid opcode for branch");
222   case X86::JCC_1:
223     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
224   case X86::JMP_1:
225     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
226   }
227 }
228 
229 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
230   unsigned Opcode = MI.getOpcode();
231   return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
232                                    : X86::getOpcodeForLongImmediateForm(Opcode);
233 }
234 
235 static X86::CondCode getCondFromBranch(const MCInst &MI,
236                                        const MCInstrInfo &MCII) {
237   unsigned Opcode = MI.getOpcode();
238   switch (Opcode) {
239   default:
240     return X86::COND_INVALID;
241   case X86::JCC_1: {
242     const MCInstrDesc &Desc = MCII.get(Opcode);
243     return static_cast<X86::CondCode>(
244         MI.getOperand(Desc.getNumOperands() - 1).getImm());
245   }
246   }
247 }
248 
249 static X86::SecondMacroFusionInstKind
250 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
251   X86::CondCode CC = getCondFromBranch(MI, MCII);
252   return classifySecondCondCodeInMacroFusion(CC);
253 }
254 
255 /// Check if the instruction uses RIP relative addressing.
256 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
257   unsigned Opcode = MI.getOpcode();
258   const MCInstrDesc &Desc = MCII.get(Opcode);
259   uint64_t TSFlags = Desc.TSFlags;
260   unsigned CurOp = X86II::getOperandBias(Desc);
261   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
262   if (MemoryOperand < 0)
263     return false;
264   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
265   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
266   return (BaseReg == X86::RIP);
267 }
268 
269 /// Check if the instruction is a prefix.
270 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
271   return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
272 }
273 
274 /// Check if the instruction is valid as the first instruction in macro fusion.
275 static bool isFirstMacroFusibleInst(const MCInst &Inst,
276                                     const MCInstrInfo &MCII) {
277   // An Intel instruction with RIP relative addressing is not macro fusible.
278   if (isRIPRelative(Inst, MCII))
279     return false;
280   X86::FirstMacroFusionInstKind FIK =
281       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
282   return FIK != X86::FirstMacroFusionInstKind::Invalid;
283 }
284 
285 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
286 /// get a better peformance in some cases. Here, we determine which prefix is
287 /// the most suitable.
288 ///
289 /// If the instruction has a segment override prefix, use the existing one.
290 /// If the target is 64-bit, use the CS.
291 /// If the target is 32-bit,
292 ///   - If the instruction has a ESP/EBP base register, use SS.
293 ///   - Otherwise use DS.
294 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
295   assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
296          "Prefixes can be added only in 32-bit or 64-bit mode.");
297   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
298   uint64_t TSFlags = Desc.TSFlags;
299 
300   // Determine where the memory operand starts, if present.
301   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
302   if (MemoryOperand != -1)
303     MemoryOperand += X86II::getOperandBias(Desc);
304 
305   unsigned SegmentReg = 0;
306   if (MemoryOperand >= 0) {
307     // Check for explicit segment override on memory operand.
308     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
309   }
310 
311   switch (TSFlags & X86II::FormMask) {
312   default:
313     break;
314   case X86II::RawFrmDstSrc: {
315     // Check segment override opcode prefix as needed (not for %ds).
316     if (Inst.getOperand(2).getReg() != X86::DS)
317       SegmentReg = Inst.getOperand(2).getReg();
318     break;
319   }
320   case X86II::RawFrmSrc: {
321     // Check segment override opcode prefix as needed (not for %ds).
322     if (Inst.getOperand(1).getReg() != X86::DS)
323       SegmentReg = Inst.getOperand(1).getReg();
324     break;
325   }
326   case X86II::RawFrmMemOffs: {
327     // Check segment override opcode prefix as needed.
328     SegmentReg = Inst.getOperand(1).getReg();
329     break;
330   }
331   }
332 
333   if (SegmentReg != 0)
334     return X86::getSegmentOverridePrefixForReg(SegmentReg);
335 
336   if (STI.hasFeature(X86::Is64Bit))
337     return X86::CS_Encoding;
338 
339   if (MemoryOperand >= 0) {
340     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
341     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
342     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
343       return X86::SS_Encoding;
344   }
345   return X86::DS_Encoding;
346 }
347 
348 /// Check if the two instructions will be macro-fused on the target cpu.
349 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
350   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
351   if (!InstDesc.isConditionalBranch())
352     return false;
353   if (!isFirstMacroFusibleInst(Cmp, *MCII))
354     return false;
355   const X86::FirstMacroFusionInstKind CmpKind =
356       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
357   const X86::SecondMacroFusionInstKind BranchKind =
358       classifySecondInstInMacroFusion(Jcc, *MCII);
359   return X86::isMacroFused(CmpKind, BranchKind);
360 }
361 
362 /// Check if the instruction has a variant symbol operand.
363 static bool hasVariantSymbol(const MCInst &MI) {
364   for (auto &Operand : MI) {
365     if (!Operand.isExpr())
366       continue;
367     const MCExpr &Expr = *Operand.getExpr();
368     if (Expr.getKind() == MCExpr::SymbolRef &&
369         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
370       return true;
371   }
372   return false;
373 }
374 
375 bool X86AsmBackend::allowAutoPadding() const {
376   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
377 }
378 
379 bool X86AsmBackend::allowEnhancedRelaxation() const {
380   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
381 }
382 
383 /// X86 has certain instructions which enable interrupts exactly one
384 /// instruction *after* the instruction which stores to SS.  Return true if the
385 /// given instruction has such an interrupt delay slot.
386 static bool hasInterruptDelaySlot(const MCInst &Inst) {
387   switch (Inst.getOpcode()) {
388   case X86::POPSS16:
389   case X86::POPSS32:
390   case X86::STI:
391     return true;
392 
393   case X86::MOV16sr:
394   case X86::MOV32sr:
395   case X86::MOV64sr:
396   case X86::MOV16sm:
397     if (Inst.getOperand(0).getReg() == X86::SS)
398       return true;
399     break;
400   }
401   return false;
402 }
403 
404 /// Check if the instruction to be emitted is right after any data.
405 static bool
406 isRightAfterData(MCFragment *CurrentFragment,
407                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
408   MCFragment *F = CurrentFragment;
409   // Empty data fragments may be created to prevent further data being
410   // added into the previous fragment, we need to skip them since they
411   // have no contents.
412   for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
413     if (cast<MCDataFragment>(F)->getContents().size() != 0)
414       break;
415 
416   // Since data is always emitted into a DataFragment, our check strategy is
417   // simple here.
418   //   - If the fragment is a DataFragment
419   //     - If it's not the fragment where the previous instruction is,
420   //       returns true.
421   //     - If it's the fragment holding the previous instruction but its
422   //       size changed since the previous instruction was emitted into
423   //       it, returns true.
424   //     - Otherwise returns false.
425   //   - If the fragment is not a DataFragment, returns false.
426   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
427     return DF != PrevInstPosition.first ||
428            DF->getContents().size() != PrevInstPosition.second;
429 
430   return false;
431 }
432 
433 /// \returns the fragment size if it has instructions, otherwise returns 0.
434 static size_t getSizeForInstFragment(const MCFragment *F) {
435   if (!F || !F->hasInstructions())
436     return 0;
437   // MCEncodedFragmentWithContents being templated makes this tricky.
438   switch (F->getKind()) {
439   default:
440     llvm_unreachable("Unknown fragment with instructions!");
441   case MCFragment::FT_Data:
442     return cast<MCDataFragment>(*F).getContents().size();
443   case MCFragment::FT_Relaxable:
444     return cast<MCRelaxableFragment>(*F).getContents().size();
445   case MCFragment::FT_CompactEncodedInst:
446     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
447   }
448 }
449 
450 /// Return true if we can insert NOP or prefixes automatically before the
451 /// the instruction to be emitted.
452 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
453   if (hasVariantSymbol(Inst))
454     // Linker may rewrite the instruction with variant symbol operand(e.g.
455     // TLSCALL).
456     return false;
457 
458   if (hasInterruptDelaySlot(PrevInst))
459     // If this instruction follows an interrupt enabling instruction with a one
460     // instruction delay, inserting a nop would change behavior.
461     return false;
462 
463   if (isPrefix(PrevInst, *MCII))
464     // If this instruction follows a prefix, inserting a nop/prefix would change
465     // semantic.
466     return false;
467 
468   if (isPrefix(Inst, *MCII))
469     // If this instruction is a prefix, inserting a prefix would change
470     // semantic.
471     return false;
472 
473   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
474     // If this instruction follows any data, there is no clear
475     // instruction boundary, inserting a nop/prefix would change semantic.
476     return false;
477 
478   return true;
479 }
480 
481 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
482   if (!OS.getAllowAutoPadding())
483     return false;
484   assert(allowAutoPadding() && "incorrect initialization!");
485 
486   // We only pad in text section.
487   if (!OS.getCurrentSectionOnly()->getKind().isText())
488     return false;
489 
490   // To be Done: Currently don't deal with Bundle cases.
491   if (OS.getAssembler().isBundlingEnabled())
492     return false;
493 
494   // Branches only need to be aligned in 32-bit or 64-bit mode.
495   if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
496     return false;
497 
498   return true;
499 }
500 
501 /// Check if the instruction operand needs to be aligned.
502 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
503   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
504   return (Desc.isConditionalBranch() &&
505           (AlignBranchType & X86::AlignBranchJcc)) ||
506          (Desc.isUnconditionalBranch() &&
507           (AlignBranchType & X86::AlignBranchJmp)) ||
508          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
509          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
510          (Desc.isIndirectBranch() &&
511           (AlignBranchType & X86::AlignBranchIndirect));
512 }
513 
514 /// Insert BoundaryAlignFragment before instructions to align branches.
515 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
516                                          const MCInst &Inst, const MCSubtargetInfo &STI) {
517   CanPadInst = canPadInst(Inst, OS);
518 
519   if (!canPadBranches(OS))
520     return;
521 
522   if (!isMacroFused(PrevInst, Inst))
523     // Macro fusion doesn't happen indeed, clear the pending.
524     PendingBA = nullptr;
525 
526   if (!CanPadInst)
527     return;
528 
529   if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
530     // Macro fusion actually happens and there is no other fragment inserted
531     // after the previous instruction.
532     //
533     // Do nothing here since we already inserted a BoudaryAlign fragment when
534     // we met the first instruction in the fused pair and we'll tie them
535     // together in emitInstructionEnd.
536     //
537     // Note: When there is at least one fragment, such as MCAlignFragment,
538     // inserted after the previous instruction, e.g.
539     //
540     // \code
541     //   cmp %rax %rcx
542     //   .align 16
543     //   je .Label0
544     // \ endcode
545     //
546     // We will treat the JCC as a unfused branch although it may be fused
547     // with the CMP.
548     return;
549   }
550 
551   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
552                           isFirstMacroFusibleInst(Inst, *MCII))) {
553     // If we meet a unfused branch or the first instuction in a fusiable pair,
554     // insert a BoundaryAlign fragment.
555     OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
556   }
557 }
558 
559 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
560 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
561   PrevInst = Inst;
562   MCFragment *CF = OS.getCurrentFragment();
563   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
564   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
565     F->setAllowAutoPadding(CanPadInst);
566 
567   if (!canPadBranches(OS))
568     return;
569 
570   if (!needAlign(Inst) || !PendingBA)
571     return;
572 
573   // Tie the aligned instructions into a pending BoundaryAlign.
574   PendingBA->setLastFragment(CF);
575   PendingBA = nullptr;
576 
577   // We need to ensure that further data isn't added to the current
578   // DataFragment, so that we can get the size of instructions later in
579   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
580   // DataFragment.
581   if (isa_and_nonnull<MCDataFragment>(CF))
582     OS.insert(new MCDataFragment());
583 
584   // Update the maximum alignment on the current section if necessary.
585   MCSection *Sec = OS.getCurrentSectionOnly();
586   Sec->ensureMinAlignment(AlignBoundary);
587 }
588 
589 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
590   if (STI.getTargetTriple().isOSBinFormatELF()) {
591     unsigned Type;
592     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
593       Type = llvm::StringSwitch<unsigned>(Name)
594 #define ELF_RELOC(X, Y) .Case(#X, Y)
595 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
596 #undef ELF_RELOC
597                  .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
598                  .Case("BFD_RELOC_8", ELF::R_X86_64_8)
599                  .Case("BFD_RELOC_16", ELF::R_X86_64_16)
600                  .Case("BFD_RELOC_32", ELF::R_X86_64_32)
601                  .Case("BFD_RELOC_64", ELF::R_X86_64_64)
602                  .Default(-1u);
603     } else {
604       Type = llvm::StringSwitch<unsigned>(Name)
605 #define ELF_RELOC(X, Y) .Case(#X, Y)
606 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
607 #undef ELF_RELOC
608                  .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
609                  .Case("BFD_RELOC_8", ELF::R_386_8)
610                  .Case("BFD_RELOC_16", ELF::R_386_16)
611                  .Case("BFD_RELOC_32", ELF::R_386_32)
612                  .Default(-1u);
613     }
614     if (Type == -1u)
615       return std::nullopt;
616     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
617   }
618   return MCAsmBackend::getFixupKind(Name);
619 }
620 
621 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
622   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
623       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
624       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
625       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
626       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
627       {"reloc_signed_4byte", 0, 32, 0},
628       {"reloc_signed_4byte_relax", 0, 32, 0},
629       {"reloc_global_offset_table", 0, 32, 0},
630       {"reloc_global_offset_table8", 0, 64, 0},
631       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
632   };
633 
634   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
635   // do not require any extra processing.
636   if (Kind >= FirstLiteralRelocationKind)
637     return MCAsmBackend::getFixupKindInfo(FK_NONE);
638 
639   if (Kind < FirstTargetFixupKind)
640     return MCAsmBackend::getFixupKindInfo(Kind);
641 
642   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
643          "Invalid kind!");
644   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
645   return Infos[Kind - FirstTargetFixupKind];
646 }
647 
648 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
649                                           const MCFixup &Fixup, const MCValue &,
650                                           const MCSubtargetInfo *STI) {
651   return Fixup.getKind() >= FirstLiteralRelocationKind;
652 }
653 
654 static unsigned getFixupKindSize(unsigned Kind) {
655   switch (Kind) {
656   default:
657     llvm_unreachable("invalid fixup kind!");
658   case FK_NONE:
659     return 0;
660   case FK_PCRel_1:
661   case FK_SecRel_1:
662   case FK_Data_1:
663     return 1;
664   case FK_PCRel_2:
665   case FK_SecRel_2:
666   case FK_Data_2:
667     return 2;
668   case FK_PCRel_4:
669   case X86::reloc_riprel_4byte:
670   case X86::reloc_riprel_4byte_relax:
671   case X86::reloc_riprel_4byte_relax_rex:
672   case X86::reloc_riprel_4byte_movq_load:
673   case X86::reloc_signed_4byte:
674   case X86::reloc_signed_4byte_relax:
675   case X86::reloc_global_offset_table:
676   case X86::reloc_branch_4byte_pcrel:
677   case FK_SecRel_4:
678   case FK_Data_4:
679     return 4;
680   case FK_PCRel_8:
681   case FK_SecRel_8:
682   case FK_Data_8:
683   case X86::reloc_global_offset_table8:
684     return 8;
685   }
686 }
687 
688 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
689                                const MCValue &Target,
690                                MutableArrayRef<char> Data,
691                                uint64_t Value, bool IsResolved,
692                                const MCSubtargetInfo *STI) const {
693   unsigned Kind = Fixup.getKind();
694   if (Kind >= FirstLiteralRelocationKind)
695     return;
696   unsigned Size = getFixupKindSize(Kind);
697 
698   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
699 
700   int64_t SignedValue = static_cast<int64_t>(Value);
701   if ((Target.isAbsolute() || IsResolved) &&
702       getFixupKindInfo(Fixup.getKind()).Flags &
703       MCFixupKindInfo::FKF_IsPCRel) {
704     // check that PC relative fixup fits into the fixup size.
705     if (Size > 0 && !isIntN(Size * 8, SignedValue))
706       Asm.getContext().reportError(
707                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
708                                    " is too large for field of " + Twine(Size) +
709                                    ((Size == 1) ? " byte." : " bytes."));
710   } else {
711     // Check that uppper bits are either all zeros or all ones.
712     // Specifically ignore overflow/underflow as long as the leakage is
713     // limited to the lower bits. This is to remain compatible with
714     // other assemblers.
715     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
716            "Value does not fit in the Fixup field");
717   }
718 
719   for (unsigned i = 0; i != Size; ++i)
720     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
721 }
722 
723 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
724                                       const MCSubtargetInfo &STI) const {
725   unsigned Opcode = MI.getOpcode();
726   return isRelaxableBranch(Opcode) ||
727          (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
728           MI.getOperand(MI.getNumOperands() - 1).isExpr());
729 }
730 
731 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
732                                          uint64_t Value,
733                                          const MCRelaxableFragment *DF,
734                                          const MCAsmLayout &Layout) const {
735   // Relax if the value is too big for a (signed) i8.
736   return !isInt<8>(Value);
737 }
738 
739 // FIXME: Can tblgen help at all here to verify there aren't other instructions
740 // we can relax?
741 void X86AsmBackend::relaxInstruction(MCInst &Inst,
742                                      const MCSubtargetInfo &STI) const {
743   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
744   bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
745   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
746 
747   if (RelaxedOp == Inst.getOpcode()) {
748     SmallString<256> Tmp;
749     raw_svector_ostream OS(Tmp);
750     Inst.dump_pretty(OS);
751     OS << "\n";
752     report_fatal_error("unexpected instruction to relax: " + OS.str());
753   }
754 
755   Inst.setOpcode(RelaxedOp);
756 }
757 
758 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
759                                             MCCodeEmitter &Emitter,
760                                             unsigned &RemainingSize) const {
761   if (!RF.getAllowAutoPadding())
762     return false;
763   // If the instruction isn't fully relaxed, shifting it around might require a
764   // larger value for one of the fixups then can be encoded.  The outer loop
765   // will also catch this before moving to the next instruction, but we need to
766   // prevent padding this single instruction as well.
767   if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
768     return false;
769 
770   const unsigned OldSize = RF.getContents().size();
771   if (OldSize == 15)
772     return false;
773 
774   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
775   const unsigned RemainingPrefixSize = [&]() -> unsigned {
776     SmallString<15> Code;
777     Emitter.emitPrefix(RF.getInst(), Code, STI);
778     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
779 
780     // TODO: It turns out we need a decent amount of plumbing for the target
781     // specific bits to determine number of prefixes its safe to add.  Various
782     // targets (older chips mostly, but also Atom family) encounter decoder
783     // stalls with too many prefixes.  For testing purposes, we set the value
784     // externally for the moment.
785     unsigned ExistingPrefixSize = Code.size();
786     if (TargetPrefixMax <= ExistingPrefixSize)
787       return 0;
788     return TargetPrefixMax - ExistingPrefixSize;
789   }();
790   const unsigned PrefixBytesToAdd =
791       std::min(MaxPossiblePad, RemainingPrefixSize);
792   if (PrefixBytesToAdd == 0)
793     return false;
794 
795   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
796 
797   SmallString<256> Code;
798   Code.append(PrefixBytesToAdd, Prefix);
799   Code.append(RF.getContents().begin(), RF.getContents().end());
800   RF.getContents() = Code;
801 
802   // Adjust the fixups for the change in offsets
803   for (auto &F : RF.getFixups()) {
804     F.setOffset(F.getOffset() + PrefixBytesToAdd);
805   }
806 
807   RemainingSize -= PrefixBytesToAdd;
808   return true;
809 }
810 
811 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
812                                                 MCCodeEmitter &Emitter,
813                                                 unsigned &RemainingSize) const {
814   if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
815     // TODO: There are lots of other tricks we could apply for increasing
816     // encoding size without impacting performance.
817     return false;
818 
819   MCInst Relaxed = RF.getInst();
820   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
821 
822   SmallVector<MCFixup, 4> Fixups;
823   SmallString<15> Code;
824   Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
825   const unsigned OldSize = RF.getContents().size();
826   const unsigned NewSize = Code.size();
827   assert(NewSize >= OldSize && "size decrease during relaxation?");
828   unsigned Delta = NewSize - OldSize;
829   if (Delta > RemainingSize)
830     return false;
831   RF.setInst(Relaxed);
832   RF.getContents() = Code;
833   RF.getFixups() = Fixups;
834   RemainingSize -= Delta;
835   return true;
836 }
837 
838 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
839                                            MCCodeEmitter &Emitter,
840                                            unsigned &RemainingSize) const {
841   bool Changed = false;
842   if (RemainingSize != 0)
843     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
844   if (RemainingSize != 0)
845     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
846   return Changed;
847 }
848 
849 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
850                                  MCAsmLayout &Layout) const {
851   // See if we can further relax some instructions to cut down on the number of
852   // nop bytes required for code alignment.  The actual win is in reducing
853   // instruction count, not number of bytes.  Modern X86-64 can easily end up
854   // decode limited.  It is often better to reduce the number of instructions
855   // (i.e. eliminate nops) even at the cost of increasing the size and
856   // complexity of others.
857   if (!X86PadForAlign && !X86PadForBranchAlign)
858     return;
859 
860   // The processed regions are delimitered by LabeledFragments. -g may have more
861   // MCSymbols and therefore different relaxation results. X86PadForAlign is
862   // disabled by default to eliminate the -g vs non -g difference.
863   DenseSet<MCFragment *> LabeledFragments;
864   for (const MCSymbol &S : Asm.symbols())
865     LabeledFragments.insert(S.getFragment(false));
866 
867   for (MCSection &Sec : Asm) {
868     if (!Sec.getKind().isText())
869       continue;
870 
871     SmallVector<MCRelaxableFragment *, 4> Relaxable;
872     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
873       MCFragment &F = *I;
874 
875       if (LabeledFragments.count(&F))
876         Relaxable.clear();
877 
878       if (F.getKind() == MCFragment::FT_Data ||
879           F.getKind() == MCFragment::FT_CompactEncodedInst)
880         // Skip and ignore
881         continue;
882 
883       if (F.getKind() == MCFragment::FT_Relaxable) {
884         auto &RF = cast<MCRelaxableFragment>(*I);
885         Relaxable.push_back(&RF);
886         continue;
887       }
888 
889       auto canHandle = [](MCFragment &F) -> bool {
890         switch (F.getKind()) {
891         default:
892           return false;
893         case MCFragment::FT_Align:
894           return X86PadForAlign;
895         case MCFragment::FT_BoundaryAlign:
896           return X86PadForBranchAlign;
897         }
898       };
899       // For any unhandled kind, assume we can't change layout.
900       if (!canHandle(F)) {
901         Relaxable.clear();
902         continue;
903       }
904 
905 #ifndef NDEBUG
906       const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
907 #endif
908       const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
909 
910       // To keep the effects local, prefer to relax instructions closest to
911       // the align directive.  This is purely about human understandability
912       // of the resulting code.  If we later find a reason to expand
913       // particular instructions over others, we can adjust.
914       MCFragment *FirstChangedFragment = nullptr;
915       unsigned RemainingSize = OrigSize;
916       while (!Relaxable.empty() && RemainingSize != 0) {
917         auto &RF = *Relaxable.pop_back_val();
918         // Give the backend a chance to play any tricks it wishes to increase
919         // the encoding size of the given instruction.  Target independent code
920         // will try further relaxation, but target's may play further tricks.
921         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
922           FirstChangedFragment = &RF;
923 
924         // If we have an instruction which hasn't been fully relaxed, we can't
925         // skip past it and insert bytes before it.  Changing its starting
926         // offset might require a larger negative offset than it can encode.
927         // We don't need to worry about larger positive offsets as none of the
928         // possible offsets between this and our align are visible, and the
929         // ones afterwards aren't changing.
930         if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
931           break;
932       }
933       Relaxable.clear();
934 
935       if (FirstChangedFragment) {
936         // Make sure the offsets for any fragments in the effected range get
937         // updated.  Note that this (conservatively) invalidates the offsets of
938         // those following, but this is not required.
939         Layout.invalidateFragmentsFrom(FirstChangedFragment);
940       }
941 
942       // BoundaryAlign explicitly tracks it's size (unlike align)
943       if (F.getKind() == MCFragment::FT_BoundaryAlign)
944         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
945 
946 #ifndef NDEBUG
947       const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
948       const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
949       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
950              "can't move start of next fragment!");
951       assert(FinalSize == RemainingSize && "inconsistent size computation?");
952 #endif
953 
954       // If we're looking at a boundary align, make sure we don't try to pad
955       // its target instructions for some following directive.  Doing so would
956       // break the alignment of the current boundary align.
957       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
958         const MCFragment *LastFragment = BF->getLastFragment();
959         if (!LastFragment)
960           continue;
961         while (&*I != LastFragment)
962           ++I;
963       }
964     }
965   }
966 
967   // The layout is done. Mark every fragment as valid.
968   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
969     MCSection &Section = *Layout.getSectionOrder()[i];
970     Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
971     Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
972   }
973 }
974 
975 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
976   if (STI.hasFeature(X86::Is16Bit))
977     return 4;
978   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
979     return 1;
980   if (STI.hasFeature(X86::TuningFast7ByteNOP))
981     return 7;
982   if (STI.hasFeature(X86::TuningFast15ByteNOP))
983     return 15;
984   if (STI.hasFeature(X86::TuningFast11ByteNOP))
985     return 11;
986   // FIXME: handle 32-bit mode
987   // 15-bytes is the longest single NOP instruction, but 10-bytes is
988   // commonly the longest that can be efficiently decoded.
989   return 10;
990 }
991 
992 /// Write a sequence of optimal nops to the output, covering \p Count
993 /// bytes.
994 /// \return - true on success, false on failure
995 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
996                                  const MCSubtargetInfo *STI) const {
997   static const char Nops32Bit[10][11] = {
998       // nop
999       "\x90",
1000       // xchg %ax,%ax
1001       "\x66\x90",
1002       // nopl (%[re]ax)
1003       "\x0f\x1f\x00",
1004       // nopl 0(%[re]ax)
1005       "\x0f\x1f\x40\x00",
1006       // nopl 0(%[re]ax,%[re]ax,1)
1007       "\x0f\x1f\x44\x00\x00",
1008       // nopw 0(%[re]ax,%[re]ax,1)
1009       "\x66\x0f\x1f\x44\x00\x00",
1010       // nopl 0L(%[re]ax)
1011       "\x0f\x1f\x80\x00\x00\x00\x00",
1012       // nopl 0L(%[re]ax,%[re]ax,1)
1013       "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1014       // nopw 0L(%[re]ax,%[re]ax,1)
1015       "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1016       // nopw %cs:0L(%[re]ax,%[re]ax,1)
1017       "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1018   };
1019 
1020   // 16-bit mode uses different nop patterns than 32-bit.
1021   static const char Nops16Bit[4][11] = {
1022       // nop
1023       "\x90",
1024       // xchg %eax,%eax
1025       "\x66\x90",
1026       // lea 0(%si),%si
1027       "\x8d\x74\x00",
1028       // lea 0w(%si),%si
1029       "\x8d\xb4\x00\x00",
1030   };
1031 
1032   const char(*Nops)[11] =
1033       STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1034 
1035   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1036 
1037   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1038   // length.
1039   do {
1040     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1041     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1042     for (uint8_t i = 0; i < Prefixes; i++)
1043       OS << '\x66';
1044     const uint8_t Rest = ThisNopLength - Prefixes;
1045     if (Rest != 0)
1046       OS.write(Nops[Rest - 1], Rest);
1047     Count -= ThisNopLength;
1048   } while (Count != 0);
1049 
1050   return true;
1051 }
1052 
1053 /* *** */
1054 
1055 namespace {
1056 
1057 class ELFX86AsmBackend : public X86AsmBackend {
1058 public:
1059   uint8_t OSABI;
1060   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1061       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1062 };
1063 
1064 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1065 public:
1066   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1067                       const MCSubtargetInfo &STI)
1068     : ELFX86AsmBackend(T, OSABI, STI) {}
1069 
1070   std::unique_ptr<MCObjectTargetWriter>
1071   createObjectTargetWriter() const override {
1072     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1073   }
1074 };
1075 
1076 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1077 public:
1078   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1079                        const MCSubtargetInfo &STI)
1080       : ELFX86AsmBackend(T, OSABI, STI) {}
1081 
1082   std::unique_ptr<MCObjectTargetWriter>
1083   createObjectTargetWriter() const override {
1084     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1085                                     ELF::EM_X86_64);
1086   }
1087 };
1088 
1089 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1090 public:
1091   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1092                          const MCSubtargetInfo &STI)
1093       : ELFX86AsmBackend(T, OSABI, STI) {}
1094 
1095   std::unique_ptr<MCObjectTargetWriter>
1096   createObjectTargetWriter() const override {
1097     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1098                                     ELF::EM_IAMCU);
1099   }
1100 };
1101 
1102 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1103 public:
1104   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1105                       const MCSubtargetInfo &STI)
1106     : ELFX86AsmBackend(T, OSABI, STI) {}
1107 
1108   std::unique_ptr<MCObjectTargetWriter>
1109   createObjectTargetWriter() const override {
1110     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1111   }
1112 };
1113 
1114 class WindowsX86AsmBackend : public X86AsmBackend {
1115   bool Is64Bit;
1116 
1117 public:
1118   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1119                        const MCSubtargetInfo &STI)
1120     : X86AsmBackend(T, STI)
1121     , Is64Bit(is64Bit) {
1122   }
1123 
1124   std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1125     return StringSwitch<std::optional<MCFixupKind>>(Name)
1126         .Case("dir32", FK_Data_4)
1127         .Case("secrel32", FK_SecRel_4)
1128         .Case("secidx", FK_SecRel_2)
1129         .Default(MCAsmBackend::getFixupKind(Name));
1130   }
1131 
1132   std::unique_ptr<MCObjectTargetWriter>
1133   createObjectTargetWriter() const override {
1134     return createX86WinCOFFObjectWriter(Is64Bit);
1135   }
1136 };
1137 
1138 namespace CU {
1139 
1140   /// Compact unwind encoding values.
1141   enum CompactUnwindEncodings {
1142     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1143     /// the return address, then [RE]SP is moved to [RE]BP.
1144     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1145 
1146     /// A frameless function with a small constant stack size.
1147     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1148 
1149     /// A frameless function with a large constant stack size.
1150     UNWIND_MODE_STACK_IND                  = 0x03000000,
1151 
1152     /// No compact unwind encoding is available.
1153     UNWIND_MODE_DWARF                      = 0x04000000,
1154 
1155     /// Mask for encoding the frame registers.
1156     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1157 
1158     /// Mask for encoding the frameless registers.
1159     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1160   };
1161 
1162 } // namespace CU
1163 
1164 class DarwinX86AsmBackend : public X86AsmBackend {
1165   const MCRegisterInfo &MRI;
1166 
1167   /// Number of registers that can be saved in a compact unwind encoding.
1168   enum { CU_NUM_SAVED_REGS = 6 };
1169 
1170   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1171   Triple TT;
1172   bool Is64Bit;
1173 
1174   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1175   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1176   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1177 protected:
1178   /// Size of a "push" instruction for the given register.
1179   unsigned PushInstrSize(unsigned Reg) const {
1180     switch (Reg) {
1181       case X86::EBX:
1182       case X86::ECX:
1183       case X86::EDX:
1184       case X86::EDI:
1185       case X86::ESI:
1186       case X86::EBP:
1187       case X86::RBX:
1188       case X86::RBP:
1189         return 1;
1190       case X86::R12:
1191       case X86::R13:
1192       case X86::R14:
1193       case X86::R15:
1194         return 2;
1195     }
1196     return 1;
1197   }
1198 
1199 private:
1200   /// Get the compact unwind number for a given register. The number
1201   /// corresponds to the enum lists in compact_unwind_encoding.h.
1202   int getCompactUnwindRegNum(unsigned Reg) const {
1203     static const MCPhysReg CU32BitRegs[7] = {
1204       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1205     };
1206     static const MCPhysReg CU64BitRegs[] = {
1207       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1208     };
1209     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1210     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1211       if (*CURegs == Reg)
1212         return Idx;
1213 
1214     return -1;
1215   }
1216 
1217   /// Return the registers encoded for a compact encoding with a frame
1218   /// pointer.
1219   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1220     // Encode the registers in the order they were saved --- 3-bits per
1221     // register. The list of saved registers is assumed to be in reverse
1222     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1223     uint32_t RegEnc = 0;
1224     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1225       unsigned Reg = SavedRegs[i];
1226       if (Reg == 0) break;
1227 
1228       int CURegNum = getCompactUnwindRegNum(Reg);
1229       if (CURegNum == -1) return ~0U;
1230 
1231       // Encode the 3-bit register number in order, skipping over 3-bits for
1232       // each register.
1233       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1234     }
1235 
1236     assert((RegEnc & 0x3FFFF) == RegEnc &&
1237            "Invalid compact register encoding!");
1238     return RegEnc;
1239   }
1240 
1241   /// Create the permutation encoding used with frameless stacks. It is
1242   /// passed the number of registers to be saved and an array of the registers
1243   /// saved.
1244   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1245     // The saved registers are numbered from 1 to 6. In order to encode the
1246     // order in which they were saved, we re-number them according to their
1247     // place in the register order. The re-numbering is relative to the last
1248     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1249     // that order:
1250     //
1251     //    Orig  Re-Num
1252     //    ----  ------
1253     //     6       6
1254     //     2       2
1255     //     4       3
1256     //     5       3
1257     //
1258     for (unsigned i = 0; i < RegCount; ++i) {
1259       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1260       if (CUReg == -1) return ~0U;
1261       SavedRegs[i] = CUReg;
1262     }
1263 
1264     // Reverse the list.
1265     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1266 
1267     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1268     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1269       unsigned Countless = 0;
1270       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1271         if (SavedRegs[j] < SavedRegs[i])
1272           ++Countless;
1273 
1274       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1275     }
1276 
1277     // Take the renumbered values and encode them into a 10-bit number.
1278     uint32_t permutationEncoding = 0;
1279     switch (RegCount) {
1280     case 6:
1281       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1282                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1283                              +     RenumRegs[4];
1284       break;
1285     case 5:
1286       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1287                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1288                              +     RenumRegs[5];
1289       break;
1290     case 4:
1291       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1292                              + 3 * RenumRegs[4] +      RenumRegs[5];
1293       break;
1294     case 3:
1295       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1296                              +     RenumRegs[5];
1297       break;
1298     case 2:
1299       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1300       break;
1301     case 1:
1302       permutationEncoding |=       RenumRegs[5];
1303       break;
1304     }
1305 
1306     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1307            "Invalid compact register encoding!");
1308     return permutationEncoding;
1309   }
1310 
1311 public:
1312   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1313                       const MCSubtargetInfo &STI)
1314       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1315         Is64Bit(TT.isArch64Bit()) {
1316     memset(SavedRegs, 0, sizeof(SavedRegs));
1317     OffsetSize = Is64Bit ? 8 : 4;
1318     MoveInstrSize = Is64Bit ? 3 : 2;
1319     StackDivide = Is64Bit ? 8 : 4;
1320   }
1321 
1322   std::unique_ptr<MCObjectTargetWriter>
1323   createObjectTargetWriter() const override {
1324     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1325     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1326     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1327   }
1328 
1329   /// Implementation of algorithm to generate the compact unwind encoding
1330   /// for the CFI instructions.
1331   uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1332                                          const MCContext *Ctxt) const override {
1333     ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1334     if (Instrs.empty()) return 0;
1335     if (!isDarwinCanonicalPersonality(FI->Personality) &&
1336         !Ctxt->emitCompactUnwindNonCanonical())
1337       return CU::UNWIND_MODE_DWARF;
1338 
1339     // Reset the saved registers.
1340     unsigned SavedRegIdx = 0;
1341     memset(SavedRegs, 0, sizeof(SavedRegs));
1342 
1343     bool HasFP = false;
1344 
1345     // Encode that we are using EBP/RBP as the frame pointer.
1346     uint32_t CompactUnwindEncoding = 0;
1347 
1348     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1349     unsigned InstrOffset = 0;
1350     unsigned StackAdjust = 0;
1351     unsigned StackSize = 0;
1352     int MinAbsOffset = std::numeric_limits<int>::max();
1353 
1354     for (const MCCFIInstruction &Inst : Instrs) {
1355       switch (Inst.getOperation()) {
1356       default:
1357         // Any other CFI directives indicate a frame that we aren't prepared
1358         // to represent via compact unwind, so just bail out.
1359         return CU::UNWIND_MODE_DWARF;
1360       case MCCFIInstruction::OpDefCfaRegister: {
1361         // Defines a frame pointer. E.g.
1362         //
1363         //     movq %rsp, %rbp
1364         //  L0:
1365         //     .cfi_def_cfa_register %rbp
1366         //
1367         HasFP = true;
1368 
1369         // If the frame pointer is other than esp/rsp, we do not have a way to
1370         // generate a compact unwinding representation, so bail out.
1371         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1372             (Is64Bit ? X86::RBP : X86::EBP))
1373           return CU::UNWIND_MODE_DWARF;
1374 
1375         // Reset the counts.
1376         memset(SavedRegs, 0, sizeof(SavedRegs));
1377         StackAdjust = 0;
1378         SavedRegIdx = 0;
1379         MinAbsOffset = std::numeric_limits<int>::max();
1380         InstrOffset += MoveInstrSize;
1381         break;
1382       }
1383       case MCCFIInstruction::OpDefCfaOffset: {
1384         // Defines a new offset for the CFA. E.g.
1385         //
1386         //  With frame:
1387         //
1388         //     pushq %rbp
1389         //  L0:
1390         //     .cfi_def_cfa_offset 16
1391         //
1392         //  Without frame:
1393         //
1394         //     subq $72, %rsp
1395         //  L0:
1396         //     .cfi_def_cfa_offset 80
1397         //
1398         StackSize = Inst.getOffset() / StackDivide;
1399         break;
1400       }
1401       case MCCFIInstruction::OpOffset: {
1402         // Defines a "push" of a callee-saved register. E.g.
1403         //
1404         //     pushq %r15
1405         //     pushq %r14
1406         //     pushq %rbx
1407         //  L0:
1408         //     subq $120, %rsp
1409         //  L1:
1410         //     .cfi_offset %rbx, -40
1411         //     .cfi_offset %r14, -32
1412         //     .cfi_offset %r15, -24
1413         //
1414         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1415           // If there are too many saved registers, we cannot use a compact
1416           // unwind encoding.
1417           return CU::UNWIND_MODE_DWARF;
1418 
1419         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1420         SavedRegs[SavedRegIdx++] = Reg;
1421         StackAdjust += OffsetSize;
1422         MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
1423         InstrOffset += PushInstrSize(Reg);
1424         break;
1425       }
1426       }
1427     }
1428 
1429     StackAdjust /= StackDivide;
1430 
1431     if (HasFP) {
1432       if ((StackAdjust & 0xFF) != StackAdjust)
1433         // Offset was too big for a compact unwind encoding.
1434         return CU::UNWIND_MODE_DWARF;
1435 
1436       // We don't attempt to track a real StackAdjust, so if the saved registers
1437       // aren't adjacent to rbp we can't cope.
1438       if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1439         return CU::UNWIND_MODE_DWARF;
1440 
1441       // Get the encoding of the saved registers when we have a frame pointer.
1442       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1443       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1444 
1445       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1446       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1447       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1448     } else {
1449       SubtractInstrIdx += InstrOffset;
1450       ++StackAdjust;
1451 
1452       if ((StackSize & 0xFF) == StackSize) {
1453         // Frameless stack with a small stack size.
1454         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1455 
1456         // Encode the stack size.
1457         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1458       } else {
1459         if ((StackAdjust & 0x7) != StackAdjust)
1460           // The extra stack adjustments are too big for us to handle.
1461           return CU::UNWIND_MODE_DWARF;
1462 
1463         // Frameless stack with an offset too large for us to encode compactly.
1464         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1465 
1466         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1467         // instruction.
1468         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1469 
1470         // Encode any extra stack adjustments (done via push instructions).
1471         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1472       }
1473 
1474       // Encode the number of registers saved. (Reverse the list first.)
1475       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1476       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1477 
1478       // Get the encoding of the saved registers when we don't have a frame
1479       // pointer.
1480       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1481       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1482 
1483       // Encode the register encoding.
1484       CompactUnwindEncoding |=
1485         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1486     }
1487 
1488     return CompactUnwindEncoding;
1489   }
1490 };
1491 
1492 } // end anonymous namespace
1493 
1494 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1495                                            const MCSubtargetInfo &STI,
1496                                            const MCRegisterInfo &MRI,
1497                                            const MCTargetOptions &Options) {
1498   const Triple &TheTriple = STI.getTargetTriple();
1499   if (TheTriple.isOSBinFormatMachO())
1500     return new DarwinX86AsmBackend(T, MRI, STI);
1501 
1502   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1503     return new WindowsX86AsmBackend(T, false, STI);
1504 
1505   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1506 
1507   if (TheTriple.isOSIAMCU())
1508     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1509 
1510   return new ELFX86_32AsmBackend(T, OSABI, STI);
1511 }
1512 
1513 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1514                                            const MCSubtargetInfo &STI,
1515                                            const MCRegisterInfo &MRI,
1516                                            const MCTargetOptions &Options) {
1517   const Triple &TheTriple = STI.getTargetTriple();
1518   if (TheTriple.isOSBinFormatMachO())
1519     return new DarwinX86AsmBackend(T, MRI, STI);
1520 
1521   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1522     return new WindowsX86AsmBackend(T, true, STI);
1523 
1524   if (TheTriple.isUEFI()) {
1525     assert(TheTriple.isOSBinFormatCOFF() &&
1526          "Only COFF format is supported in UEFI environment.");
1527     return new WindowsX86AsmBackend(T, true, STI);
1528   }
1529 
1530   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1531 
1532   if (TheTriple.isX32())
1533     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1534   return new ELFX86_64AsmBackend(T, OSABI, STI);
1535 }
1536