1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86EncodingOptimization.h"
11 #include "MCTargetDesc/X86FixupKinds.h"
12 #include "MCTargetDesc/X86MCAsmInfo.h"
13 #include "llvm/ADT/StringSwitch.h"
14 #include "llvm/BinaryFormat/ELF.h"
15 #include "llvm/BinaryFormat/MachO.h"
16 #include "llvm/MC/MCAsmBackend.h"
17 #include "llvm/MC/MCAssembler.h"
18 #include "llvm/MC/MCCodeEmitter.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDwarf.h"
21 #include "llvm/MC/MCELFObjectWriter.h"
22 #include "llvm/MC/MCELFStreamer.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCObjectStreamer.h"
27 #include "llvm/MC/MCObjectWriter.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCValue.h"
31 #include "llvm/MC/TargetRegistry.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/raw_ostream.h"
35
36 using namespace llvm;
37
38 namespace {
39 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
40 class X86AlignBranchKind {
41 private:
42 uint8_t AlignBranchKind = 0;
43
44 public:
operator =(const std::string & Val)45 void operator=(const std::string &Val) {
46 if (Val.empty())
47 return;
48 SmallVector<StringRef, 6> BranchTypes;
49 StringRef(Val).split(BranchTypes, '+', -1, false);
50 for (auto BranchType : BranchTypes) {
51 if (BranchType == "fused")
52 addKind(X86::AlignBranchFused);
53 else if (BranchType == "jcc")
54 addKind(X86::AlignBranchJcc);
55 else if (BranchType == "jmp")
56 addKind(X86::AlignBranchJmp);
57 else if (BranchType == "call")
58 addKind(X86::AlignBranchCall);
59 else if (BranchType == "ret")
60 addKind(X86::AlignBranchRet);
61 else if (BranchType == "indirect")
62 addKind(X86::AlignBranchIndirect);
63 else {
64 errs() << "invalid argument " << BranchType.str()
65 << " to -x86-align-branch=; each element must be one of: fused, "
66 "jcc, jmp, call, ret, indirect.(plus separated)\n";
67 }
68 }
69 }
70
operator uint8_t() const71 operator uint8_t() const { return AlignBranchKind; }
addKind(X86::AlignBranchBoundaryKind Value)72 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
73 };
74
75 X86AlignBranchKind X86AlignBranchKindLoc;
76
77 cl::opt<unsigned> X86AlignBranchBoundary(
78 "x86-align-branch-boundary", cl::init(0),
79 cl::desc(
80 "Control how the assembler should align branches with NOP. If the "
81 "boundary's size is not 0, it should be a power of 2 and no less "
82 "than 32. Branches will be aligned to prevent from being across or "
83 "against the boundary of specified size. The default value 0 does not "
84 "align branches."));
85
86 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
87 "x86-align-branch",
88 cl::desc(
89 "Specify types of branches to align (plus separated list of types):"
90 "\njcc indicates conditional jumps"
91 "\nfused indicates fused conditional jumps"
92 "\njmp indicates direct unconditional jumps"
93 "\ncall indicates direct and indirect calls"
94 "\nret indicates rets"
95 "\nindirect indicates indirect unconditional jumps"),
96 cl::location(X86AlignBranchKindLoc));
97
98 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
99 "x86-branches-within-32B-boundaries", cl::init(false),
100 cl::desc(
101 "Align selected instructions to mitigate negative performance impact "
102 "of Intel's micro code update for errata skx102. May break "
103 "assumptions about labels corresponding to particular instructions, "
104 "and should be used with caution."));
105
106 cl::opt<unsigned> X86PadMaxPrefixSize(
107 "x86-pad-max-prefix-size", cl::init(0),
108 cl::desc("Maximum number of prefixes to use for padding"));
109
110 cl::opt<bool> X86PadForAlign(
111 "x86-pad-for-align", cl::init(false), cl::Hidden,
112 cl::desc("Pad previous instructions to implement align directives"));
113
114 cl::opt<bool> X86PadForBranchAlign(
115 "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
116 cl::desc("Pad previous instructions to implement branch alignment"));
117
118 class X86AsmBackend : public MCAsmBackend {
119 const MCSubtargetInfo &STI;
120 std::unique_ptr<const MCInstrInfo> MCII;
121 X86AlignBranchKind AlignBranchType;
122 Align AlignBoundary;
123 unsigned TargetPrefixMax = 0;
124
125 MCInst PrevInst;
126 unsigned PrevInstOpcode = 0;
127 MCBoundaryAlignFragment *PendingBA = nullptr;
128 std::pair<MCFragment *, size_t> PrevInstPosition;
129 bool IsRightAfterData = false;
130
131 uint8_t determinePaddingPrefix(const MCInst &Inst) const;
132 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
133 bool needAlign(const MCInst &Inst) const;
134 bool canPadBranches(MCObjectStreamer &OS) const;
135 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
136
137 public:
X86AsmBackend(const Target & T,const MCSubtargetInfo & STI)138 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
139 : MCAsmBackend(llvm::endianness::little), STI(STI),
140 MCII(T.createMCInstrInfo()) {
141 if (X86AlignBranchWithin32BBoundaries) {
142 // At the moment, this defaults to aligning fused branches, unconditional
143 // jumps, and (unfused) conditional jumps with nops. Both the
144 // instructions aligned and the alignment method (nop vs prefix) may
145 // change in the future.
146 AlignBoundary = assumeAligned(32);
147 AlignBranchType.addKind(X86::AlignBranchFused);
148 AlignBranchType.addKind(X86::AlignBranchJcc);
149 AlignBranchType.addKind(X86::AlignBranchJmp);
150 }
151 // Allow overriding defaults set by main flag
152 if (X86AlignBranchBoundary.getNumOccurrences())
153 AlignBoundary = assumeAligned(X86AlignBranchBoundary);
154 if (X86AlignBranch.getNumOccurrences())
155 AlignBranchType = X86AlignBranchKindLoc;
156 if (X86PadMaxPrefixSize.getNumOccurrences())
157 TargetPrefixMax = X86PadMaxPrefixSize;
158 }
159
160 bool allowAutoPadding() const override;
161 bool allowEnhancedRelaxation() const override;
162 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
163 const MCSubtargetInfo &STI);
164 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst);
165
166
167 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
168
169 MCFixupKindInfo getFixupKindInfo(MCFixupKind Kind) const override;
170
171 std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,
172 uint64_t &) override;
173 void applyFixup(const MCFragment &, const MCFixup &, const MCValue &Target,
174 MutableArrayRef<char> Data, uint64_t Value,
175 bool IsResolved) override;
176
177 bool mayNeedRelaxation(unsigned Opcode, ArrayRef<MCOperand> Operands,
178 const MCSubtargetInfo &STI) const override;
179
180 bool fixupNeedsRelaxationAdvanced(const MCFixup &, const MCValue &, uint64_t,
181 bool) const override;
182
183 void relaxInstruction(MCInst &Inst,
184 const MCSubtargetInfo &STI) const override;
185
186 bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
187 MCCodeEmitter &Emitter,
188 unsigned &RemainingSize) const;
189
190 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
191 unsigned &RemainingSize) const;
192
193 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
194 unsigned &RemainingSize) const;
195
196 bool finishLayout(const MCAssembler &Asm) const override;
197
198 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
199
200 bool writeNopData(raw_ostream &OS, uint64_t Count,
201 const MCSubtargetInfo *STI) const override;
202 };
203 } // end anonymous namespace
204
isRelaxableBranch(unsigned Opcode)205 static bool isRelaxableBranch(unsigned Opcode) {
206 return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
207 }
208
getRelaxedOpcodeBranch(unsigned Opcode,bool Is16BitMode=false)209 static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
210 bool Is16BitMode = false) {
211 switch (Opcode) {
212 default:
213 llvm_unreachable("invalid opcode for branch");
214 case X86::JCC_1:
215 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
216 case X86::JMP_1:
217 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
218 }
219 }
220
getRelaxedOpcode(const MCInst & MI,bool Is16BitMode)221 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
222 unsigned Opcode = MI.getOpcode();
223 return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
224 : X86::getOpcodeForLongImmediateForm(Opcode);
225 }
226
getCondFromBranch(const MCInst & MI,const MCInstrInfo & MCII)227 static X86::CondCode getCondFromBranch(const MCInst &MI,
228 const MCInstrInfo &MCII) {
229 unsigned Opcode = MI.getOpcode();
230 switch (Opcode) {
231 default:
232 return X86::COND_INVALID;
233 case X86::JCC_1: {
234 const MCInstrDesc &Desc = MCII.get(Opcode);
235 return static_cast<X86::CondCode>(
236 MI.getOperand(Desc.getNumOperands() - 1).getImm());
237 }
238 }
239 }
240
241 static X86::SecondMacroFusionInstKind
classifySecondInstInMacroFusion(const MCInst & MI,const MCInstrInfo & MCII)242 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
243 X86::CondCode CC = getCondFromBranch(MI, MCII);
244 return classifySecondCondCodeInMacroFusion(CC);
245 }
246
247 /// Check if the instruction uses RIP relative addressing.
isRIPRelative(const MCInst & MI,const MCInstrInfo & MCII)248 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
249 unsigned Opcode = MI.getOpcode();
250 const MCInstrDesc &Desc = MCII.get(Opcode);
251 uint64_t TSFlags = Desc.TSFlags;
252 unsigned CurOp = X86II::getOperandBias(Desc);
253 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
254 if (MemoryOperand < 0)
255 return false;
256 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
257 MCRegister BaseReg = MI.getOperand(BaseRegNum).getReg();
258 return (BaseReg == X86::RIP);
259 }
260
261 /// Check if the instruction is a prefix.
isPrefix(unsigned Opcode,const MCInstrInfo & MCII)262 static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {
263 return X86II::isPrefix(MCII.get(Opcode).TSFlags);
264 }
265
266 /// Check if the instruction is valid as the first instruction in macro fusion.
isFirstMacroFusibleInst(const MCInst & Inst,const MCInstrInfo & MCII)267 static bool isFirstMacroFusibleInst(const MCInst &Inst,
268 const MCInstrInfo &MCII) {
269 // An Intel instruction with RIP relative addressing is not macro fusible.
270 if (isRIPRelative(Inst, MCII))
271 return false;
272 X86::FirstMacroFusionInstKind FIK =
273 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
274 return FIK != X86::FirstMacroFusionInstKind::Invalid;
275 }
276
277 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
278 /// get a better peformance in some cases. Here, we determine which prefix is
279 /// the most suitable.
280 ///
281 /// If the instruction has a segment override prefix, use the existing one.
282 /// If the target is 64-bit, use the CS.
283 /// If the target is 32-bit,
284 /// - If the instruction has a ESP/EBP base register, use SS.
285 /// - Otherwise use DS.
determinePaddingPrefix(const MCInst & Inst) const286 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
287 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&
288 "Prefixes can be added only in 32-bit or 64-bit mode.");
289 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
290 uint64_t TSFlags = Desc.TSFlags;
291
292 // Determine where the memory operand starts, if present.
293 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
294 if (MemoryOperand != -1)
295 MemoryOperand += X86II::getOperandBias(Desc);
296
297 MCRegister SegmentReg;
298 if (MemoryOperand >= 0) {
299 // Check for explicit segment override on memory operand.
300 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
301 }
302
303 switch (TSFlags & X86II::FormMask) {
304 default:
305 break;
306 case X86II::RawFrmDstSrc: {
307 // Check segment override opcode prefix as needed (not for %ds).
308 if (Inst.getOperand(2).getReg() != X86::DS)
309 SegmentReg = Inst.getOperand(2).getReg();
310 break;
311 }
312 case X86II::RawFrmSrc: {
313 // Check segment override opcode prefix as needed (not for %ds).
314 if (Inst.getOperand(1).getReg() != X86::DS)
315 SegmentReg = Inst.getOperand(1).getReg();
316 break;
317 }
318 case X86II::RawFrmMemOffs: {
319 // Check segment override opcode prefix as needed.
320 SegmentReg = Inst.getOperand(1).getReg();
321 break;
322 }
323 }
324
325 if (SegmentReg)
326 return X86::getSegmentOverridePrefixForReg(SegmentReg);
327
328 if (STI.hasFeature(X86::Is64Bit))
329 return X86::CS_Encoding;
330
331 if (MemoryOperand >= 0) {
332 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
333 MCRegister BaseReg = Inst.getOperand(BaseRegNum).getReg();
334 if (BaseReg == X86::ESP || BaseReg == X86::EBP)
335 return X86::SS_Encoding;
336 }
337 return X86::DS_Encoding;
338 }
339
340 /// Check if the two instructions will be macro-fused on the target cpu.
isMacroFused(const MCInst & Cmp,const MCInst & Jcc) const341 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
342 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
343 if (!InstDesc.isConditionalBranch())
344 return false;
345 if (!isFirstMacroFusibleInst(Cmp, *MCII))
346 return false;
347 const X86::FirstMacroFusionInstKind CmpKind =
348 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
349 const X86::SecondMacroFusionInstKind BranchKind =
350 classifySecondInstInMacroFusion(Jcc, *MCII);
351 return X86::isMacroFused(CmpKind, BranchKind);
352 }
353
354 /// Check if the instruction has a variant symbol operand.
hasVariantSymbol(const MCInst & MI)355 static bool hasVariantSymbol(const MCInst &MI) {
356 for (auto &Operand : MI) {
357 if (!Operand.isExpr())
358 continue;
359 const MCExpr &Expr = *Operand.getExpr();
360 if (Expr.getKind() == MCExpr::SymbolRef &&
361 cast<MCSymbolRefExpr>(&Expr)->getSpecifier())
362 return true;
363 }
364 return false;
365 }
366
allowAutoPadding() const367 bool X86AsmBackend::allowAutoPadding() const {
368 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
369 }
370
allowEnhancedRelaxation() const371 bool X86AsmBackend::allowEnhancedRelaxation() const {
372 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
373 }
374
375 /// X86 has certain instructions which enable interrupts exactly one
376 /// instruction *after* the instruction which stores to SS. Return true if the
377 /// given instruction may have such an interrupt delay slot.
mayHaveInterruptDelaySlot(unsigned InstOpcode)378 static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {
379 switch (InstOpcode) {
380 case X86::POPSS16:
381 case X86::POPSS32:
382 case X86::STI:
383 return true;
384
385 case X86::MOV16sr:
386 case X86::MOV32sr:
387 case X86::MOV64sr:
388 case X86::MOV16sm:
389 // In fact, this is only the case if the first operand is SS. However, as
390 // segment moves occur extremely rarely, this is just a minor pessimization.
391 return true;
392 }
393 return false;
394 }
395
396 /// Check if the instruction to be emitted is right after any data.
397 static bool
isRightAfterData(MCFragment * CurrentFragment,const std::pair<MCFragment *,size_t> & PrevInstPosition)398 isRightAfterData(MCFragment *CurrentFragment,
399 const std::pair<MCFragment *, size_t> &PrevInstPosition) {
400 MCFragment *F = CurrentFragment;
401 // Since data is always emitted into a DataFragment, our check strategy is
402 // simple here.
403 // - If the fragment is a DataFragment
404 // - If it's empty (section start or data after align), return false.
405 // - If it's not the fragment where the previous instruction is,
406 // returns true.
407 // - If it's the fragment holding the previous instruction but its
408 // size changed since the previous instruction was emitted into
409 // it, returns true.
410 // - Otherwise returns false.
411 // - If the fragment is not a DataFragment, returns false.
412 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
413 return DF->getContents().size() &&
414 (DF != PrevInstPosition.first ||
415 DF->getContents().size() != PrevInstPosition.second);
416
417 return false;
418 }
419
420 /// \returns the fragment size if it has instructions, otherwise returns 0.
getSizeForInstFragment(const MCFragment * F)421 static size_t getSizeForInstFragment(const MCFragment *F) {
422 if (!F || !F->hasInstructions())
423 return 0;
424 // MCEncodedFragmentWithContents being templated makes this tricky.
425 if (auto *DF = dyn_cast<MCEncodedFragment>(F))
426 return DF->getContents().size();
427 else
428 llvm_unreachable("Unknown fragment with instructions!");
429 }
430
431 /// Return true if we can insert NOP or prefixes automatically before the
432 /// the instruction to be emitted.
canPadInst(const MCInst & Inst,MCObjectStreamer & OS) const433 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
434 if (hasVariantSymbol(Inst))
435 // Linker may rewrite the instruction with variant symbol operand(e.g.
436 // TLSCALL).
437 return false;
438
439 if (mayHaveInterruptDelaySlot(PrevInstOpcode))
440 // If this instruction follows an interrupt enabling instruction with a one
441 // instruction delay, inserting a nop would change behavior.
442 return false;
443
444 if (isPrefix(PrevInstOpcode, *MCII))
445 // If this instruction follows a prefix, inserting a nop/prefix would change
446 // semantic.
447 return false;
448
449 if (isPrefix(Inst.getOpcode(), *MCII))
450 // If this instruction is a prefix, inserting a prefix would change
451 // semantic.
452 return false;
453
454 if (IsRightAfterData)
455 // If this instruction follows any data, there is no clear
456 // instruction boundary, inserting a nop/prefix would change semantic.
457 return false;
458
459 return true;
460 }
461
canPadBranches(MCObjectStreamer & OS) const462 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
463 if (!OS.getAllowAutoPadding())
464 return false;
465 assert(allowAutoPadding() && "incorrect initialization!");
466
467 // We only pad in text section.
468 if (!OS.getCurrentSectionOnly()->isText())
469 return false;
470
471 // To be Done: Currently don't deal with Bundle cases.
472 if (OS.getAssembler().isBundlingEnabled())
473 return false;
474
475 // Branches only need to be aligned in 32-bit or 64-bit mode.
476 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))
477 return false;
478
479 return true;
480 }
481
482 /// Check if the instruction operand needs to be aligned.
needAlign(const MCInst & Inst) const483 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
484 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
485 return (Desc.isConditionalBranch() &&
486 (AlignBranchType & X86::AlignBranchJcc)) ||
487 (Desc.isUnconditionalBranch() &&
488 (AlignBranchType & X86::AlignBranchJmp)) ||
489 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
490 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
491 (Desc.isIndirectBranch() &&
492 (AlignBranchType & X86::AlignBranchIndirect));
493 }
494
495 /// Insert BoundaryAlignFragment before instructions to align branches.
emitInstructionBegin(MCObjectStreamer & OS,const MCInst & Inst,const MCSubtargetInfo & STI)496 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
497 const MCInst &Inst, const MCSubtargetInfo &STI) {
498 // Used by canPadInst. Done here, because in emitInstructionEnd, the current
499 // fragment will have changed.
500 IsRightAfterData =
501 isRightAfterData(OS.getCurrentFragment(), PrevInstPosition);
502
503 if (!canPadBranches(OS))
504 return;
505
506 // NB: PrevInst only valid if canPadBranches is true.
507 if (!isMacroFused(PrevInst, Inst))
508 // Macro fusion doesn't happen indeed, clear the pending.
509 PendingBA = nullptr;
510
511 // When branch padding is enabled (basically the skx102 erratum => unlikely),
512 // we call canPadInst (not cheap) twice. However, in the common case, we can
513 // avoid unnecessary calls to that, as this is otherwise only used for
514 // relaxable fragments.
515 if (!canPadInst(Inst, OS))
516 return;
517
518 if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) {
519 // Macro fusion actually happens and there is no other fragment inserted
520 // after the previous instruction.
521 //
522 // Do nothing here since we already inserted a BoudaryAlign fragment when
523 // we met the first instruction in the fused pair and we'll tie them
524 // together in emitInstructionEnd.
525 //
526 // Note: When there is at least one fragment, such as MCAlignFragment,
527 // inserted after the previous instruction, e.g.
528 //
529 // \code
530 // cmp %rax %rcx
531 // .align 16
532 // je .Label0
533 // \ endcode
534 //
535 // We will treat the JCC as a unfused branch although it may be fused
536 // with the CMP.
537 return;
538 }
539
540 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
541 isFirstMacroFusibleInst(Inst, *MCII))) {
542 // If we meet a unfused branch or the first instuction in a fusiable pair,
543 // insert a BoundaryAlign fragment.
544 PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(
545 AlignBoundary, STI);
546 OS.insert(PendingBA);
547 }
548 }
549
550 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
emitInstructionEnd(MCObjectStreamer & OS,const MCInst & Inst)551 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,
552 const MCInst &Inst) {
553 MCFragment *CF = OS.getCurrentFragment();
554 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
555 F->setAllowAutoPadding(canPadInst(Inst, OS));
556
557 // Update PrevInstOpcode here, canPadInst() reads that.
558 PrevInstOpcode = Inst.getOpcode();
559 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
560
561 if (!canPadBranches(OS))
562 return;
563
564 // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.
565 PrevInst = Inst;
566
567 if (!needAlign(Inst) || !PendingBA)
568 return;
569
570 // Tie the aligned instructions into a pending BoundaryAlign.
571 PendingBA->setLastFragment(CF);
572 PendingBA = nullptr;
573
574 // We need to ensure that further data isn't added to the current
575 // DataFragment, so that we can get the size of instructions later in
576 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
577 // DataFragment.
578 if (isa_and_nonnull<MCDataFragment>(CF))
579 OS.insert(OS.getContext().allocFragment<MCDataFragment>());
580
581 // Update the maximum alignment on the current section if necessary.
582 MCSection *Sec = OS.getCurrentSectionOnly();
583 Sec->ensureMinAlignment(AlignBoundary);
584 }
585
getFixupKind(StringRef Name) const586 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
587 if (STI.getTargetTriple().isOSBinFormatELF()) {
588 unsigned Type;
589 if (STI.getTargetTriple().getArch() == Triple::x86_64) {
590 Type = llvm::StringSwitch<unsigned>(Name)
591 #define ELF_RELOC(X, Y) .Case(#X, Y)
592 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
593 #undef ELF_RELOC
594 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)
595 .Case("BFD_RELOC_8", ELF::R_X86_64_8)
596 .Case("BFD_RELOC_16", ELF::R_X86_64_16)
597 .Case("BFD_RELOC_32", ELF::R_X86_64_32)
598 .Case("BFD_RELOC_64", ELF::R_X86_64_64)
599 .Default(-1u);
600 } else {
601 Type = llvm::StringSwitch<unsigned>(Name)
602 #define ELF_RELOC(X, Y) .Case(#X, Y)
603 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
604 #undef ELF_RELOC
605 .Case("BFD_RELOC_NONE", ELF::R_386_NONE)
606 .Case("BFD_RELOC_8", ELF::R_386_8)
607 .Case("BFD_RELOC_16", ELF::R_386_16)
608 .Case("BFD_RELOC_32", ELF::R_386_32)
609 .Default(-1u);
610 }
611 if (Type == -1u)
612 return std::nullopt;
613 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
614 }
615 return MCAsmBackend::getFixupKind(Name);
616 }
617
getFixupKindInfo(MCFixupKind Kind) const618 MCFixupKindInfo X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
619 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
620 // clang-format off
621 {"reloc_riprel_4byte", 0, 32, 0},
622 {"reloc_riprel_4byte_movq_load", 0, 32, 0},
623 {"reloc_riprel_4byte_movq_load_rex2", 0, 32, 0},
624 {"reloc_riprel_4byte_relax", 0, 32, 0},
625 {"reloc_riprel_4byte_relax_rex", 0, 32, 0},
626 {"reloc_riprel_4byte_relax_rex2", 0, 32, 0},
627 {"reloc_riprel_4byte_relax_evex", 0, 32, 0},
628 {"reloc_signed_4byte", 0, 32, 0},
629 {"reloc_signed_4byte_relax", 0, 32, 0},
630 {"reloc_global_offset_table", 0, 32, 0},
631 {"reloc_branch_4byte_pcrel", 0, 32, 0},
632 // clang-format on
633 };
634
635 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
636 // do not require any extra processing.
637 if (mc::isRelocation(Kind))
638 return {};
639
640 if (Kind < FirstTargetFixupKind)
641 return MCAsmBackend::getFixupKindInfo(Kind);
642
643 assert(unsigned(Kind - FirstTargetFixupKind) < X86::NumTargetFixupKinds &&
644 "Invalid kind!");
645 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
646 return Infos[Kind - FirstTargetFixupKind];
647 }
648
getFixupKindSize(unsigned Kind)649 static unsigned getFixupKindSize(unsigned Kind) {
650 switch (Kind) {
651 default:
652 llvm_unreachable("invalid fixup kind!");
653 case FK_NONE:
654 return 0;
655 case FK_SecRel_1:
656 case FK_Data_1:
657 return 1;
658 case FK_SecRel_2:
659 case FK_Data_2:
660 return 2;
661 case X86::reloc_riprel_4byte:
662 case X86::reloc_riprel_4byte_relax:
663 case X86::reloc_riprel_4byte_relax_rex:
664 case X86::reloc_riprel_4byte_relax_rex2:
665 case X86::reloc_riprel_4byte_movq_load:
666 case X86::reloc_riprel_4byte_movq_load_rex2:
667 case X86::reloc_riprel_4byte_relax_evex:
668 case X86::reloc_signed_4byte:
669 case X86::reloc_signed_4byte_relax:
670 case X86::reloc_global_offset_table:
671 case X86::reloc_branch_4byte_pcrel:
672 case FK_SecRel_4:
673 case FK_Data_4:
674 return 4;
675 case FK_SecRel_8:
676 case FK_Data_8:
677 return 8;
678 }
679 }
680
681 constexpr char GotSymName[] = "_GLOBAL_OFFSET_TABLE_";
682
683 // Adjust PC-relative fixup offsets, which are calculated from the start of the
684 // next instruction.
evaluateFixup(const MCFragment &,MCFixup & Fixup,MCValue & Target,uint64_t &)685 std::optional<bool> X86AsmBackend::evaluateFixup(const MCFragment &,
686 MCFixup &Fixup,
687 MCValue &Target, uint64_t &) {
688 if (Fixup.isPCRel()) {
689 switch (Fixup.getKind()) {
690 case FK_Data_1:
691 Target.setConstant(Target.getConstant() - 1);
692 break;
693 case FK_Data_2:
694 Target.setConstant(Target.getConstant() - 2);
695 break;
696 default: {
697 Target.setConstant(Target.getConstant() - 4);
698 auto *Add = Target.getAddSym();
699 // If this is a pc-relative load off _GLOBAL_OFFSET_TABLE_:
700 // leaq _GLOBAL_OFFSET_TABLE_(%rip), %r15
701 // this needs to be a GOTPC32 relocation.
702 if (Add && Add->getName() == GotSymName)
703 Fixup = MCFixup::create(Fixup.getOffset(), Fixup.getValue(),
704 X86::reloc_global_offset_table);
705 } break;
706 }
707 }
708 // Use default handling for `Value` and `IsResolved`.
709 return {};
710 }
711
applyFixup(const MCFragment & F,const MCFixup & Fixup,const MCValue & Target,MutableArrayRef<char> Data,uint64_t Value,bool IsResolved)712 void X86AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
713 const MCValue &Target,
714 MutableArrayRef<char> Data, uint64_t Value,
715 bool IsResolved) {
716 // Force relocation when there is a specifier. This might be too conservative
717 // - GAS doesn't emit a relocation for call local@plt; local:.
718 if (Target.getSpecifier())
719 IsResolved = false;
720 maybeAddReloc(F, Fixup, Target, Value, IsResolved);
721
722 auto Kind = Fixup.getKind();
723 if (mc::isRelocation(Kind))
724 return;
725 unsigned Size = getFixupKindSize(Kind);
726
727 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
728
729 int64_t SignedValue = static_cast<int64_t>(Value);
730 if (IsResolved && Fixup.isPCRel()) {
731 // check that PC relative fixup fits into the fixup size.
732 if (Size > 0 && !isIntN(Size * 8, SignedValue))
733 getContext().reportError(Fixup.getLoc(),
734 "value of " + Twine(SignedValue) +
735 " is too large for field of " + Twine(Size) +
736 ((Size == 1) ? " byte." : " bytes."));
737 } else {
738 // Check that uppper bits are either all zeros or all ones.
739 // Specifically ignore overflow/underflow as long as the leakage is
740 // limited to the lower bits. This is to remain compatible with
741 // other assemblers.
742 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
743 "Value does not fit in the Fixup field");
744 }
745
746 for (unsigned i = 0; i != Size; ++i)
747 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
748 }
749
mayNeedRelaxation(unsigned Opcode,ArrayRef<MCOperand> Operands,const MCSubtargetInfo & STI) const750 bool X86AsmBackend::mayNeedRelaxation(unsigned Opcode,
751 ArrayRef<MCOperand> Operands,
752 const MCSubtargetInfo &STI) const {
753 unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0;
754 return isRelaxableBranch(Opcode) ||
755 (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
756 Operands[Operands.size() - 1 - SkipOperands].isExpr());
757 }
758
fixupNeedsRelaxationAdvanced(const MCFixup & Fixup,const MCValue & Target,uint64_t Value,bool Resolved) const759 bool X86AsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
760 const MCValue &Target,
761 uint64_t Value,
762 bool Resolved) const {
763 // If resolved, relax if the value is too big for a (signed) i8.
764 //
765 // Currently, `jmp local@plt` relaxes JMP even if the offset is small,
766 // different from gas.
767 if (Resolved)
768 return !isInt<8>(Value) || Target.getSpecifier();
769
770 // Otherwise, relax unless there is a @ABS8 specifier.
771 if (Fixup.getKind() == FK_Data_1 && Target.getAddSym() &&
772 Target.getSpecifier() == X86::S_ABS8)
773 return false;
774 return true;
775 }
776
777 // FIXME: Can tblgen help at all here to verify there aren't other instructions
778 // we can relax?
relaxInstruction(MCInst & Inst,const MCSubtargetInfo & STI) const779 void X86AsmBackend::relaxInstruction(MCInst &Inst,
780 const MCSubtargetInfo &STI) const {
781 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
782 bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
783 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
784 assert(RelaxedOp != Inst.getOpcode());
785 Inst.setOpcode(RelaxedOp);
786 }
787
padInstructionViaPrefix(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const788 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
789 MCCodeEmitter &Emitter,
790 unsigned &RemainingSize) const {
791 if (!RF.getAllowAutoPadding())
792 return false;
793 // If the instruction isn't fully relaxed, shifting it around might require a
794 // larger value for one of the fixups then can be encoded. The outer loop
795 // will also catch this before moving to the next instruction, but we need to
796 // prevent padding this single instruction as well.
797 if (mayNeedRelaxation(RF.getOpcode(), RF.getOperands(),
798 *RF.getSubtargetInfo()))
799 return false;
800
801 const unsigned OldSize = RF.getContents().size();
802 if (OldSize == 15)
803 return false;
804
805 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
806 const unsigned RemainingPrefixSize = [&]() -> unsigned {
807 SmallString<15> Code;
808 X86_MC::emitPrefix(Emitter, RF.getInst(), Code, STI);
809 assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
810
811 // TODO: It turns out we need a decent amount of plumbing for the target
812 // specific bits to determine number of prefixes its safe to add. Various
813 // targets (older chips mostly, but also Atom family) encounter decoder
814 // stalls with too many prefixes. For testing purposes, we set the value
815 // externally for the moment.
816 unsigned ExistingPrefixSize = Code.size();
817 if (TargetPrefixMax <= ExistingPrefixSize)
818 return 0;
819 return TargetPrefixMax - ExistingPrefixSize;
820 }();
821 const unsigned PrefixBytesToAdd =
822 std::min(MaxPossiblePad, RemainingPrefixSize);
823 if (PrefixBytesToAdd == 0)
824 return false;
825
826 const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
827
828 SmallString<256> Code;
829 Code.append(PrefixBytesToAdd, Prefix);
830 Code.append(RF.getContents().begin(), RF.getContents().end());
831 RF.setContents(Code);
832
833 // Adjust the fixups for the change in offsets
834 for (auto &F : RF.getFixups()) {
835 F.setOffset(F.getOffset() + PrefixBytesToAdd);
836 }
837
838 RemainingSize -= PrefixBytesToAdd;
839 return true;
840 }
841
padInstructionViaRelaxation(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const842 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
843 MCCodeEmitter &Emitter,
844 unsigned &RemainingSize) const {
845 if (!mayNeedRelaxation(RF.getOpcode(), RF.getOperands(),
846 *RF.getSubtargetInfo()))
847 // TODO: There are lots of other tricks we could apply for increasing
848 // encoding size without impacting performance.
849 return false;
850
851 MCInst Relaxed = RF.getInst();
852 relaxInstruction(Relaxed, *RF.getSubtargetInfo());
853
854 SmallVector<MCFixup, 4> Fixups;
855 SmallString<15> Code;
856 Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
857 const unsigned OldSize = RF.getContents().size();
858 const unsigned NewSize = Code.size();
859 assert(NewSize >= OldSize && "size decrease during relaxation?");
860 unsigned Delta = NewSize - OldSize;
861 if (Delta > RemainingSize)
862 return false;
863 RF.setInst(Relaxed);
864 RF.setContents(Code);
865 RF.setFixups(Fixups);
866 RemainingSize -= Delta;
867 return true;
868 }
869
padInstructionEncoding(MCRelaxableFragment & RF,MCCodeEmitter & Emitter,unsigned & RemainingSize) const870 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
871 MCCodeEmitter &Emitter,
872 unsigned &RemainingSize) const {
873 bool Changed = false;
874 if (RemainingSize != 0)
875 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
876 if (RemainingSize != 0)
877 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
878 return Changed;
879 }
880
finishLayout(const MCAssembler & Asm) const881 bool X86AsmBackend::finishLayout(const MCAssembler &Asm) const {
882 // See if we can further relax some instructions to cut down on the number of
883 // nop bytes required for code alignment. The actual win is in reducing
884 // instruction count, not number of bytes. Modern X86-64 can easily end up
885 // decode limited. It is often better to reduce the number of instructions
886 // (i.e. eliminate nops) even at the cost of increasing the size and
887 // complexity of others.
888 if (!X86PadForAlign && !X86PadForBranchAlign)
889 return false;
890
891 // The processed regions are delimitered by LabeledFragments. -g may have more
892 // MCSymbols and therefore different relaxation results. X86PadForAlign is
893 // disabled by default to eliminate the -g vs non -g difference.
894 DenseSet<MCFragment *> LabeledFragments;
895 for (const MCSymbol &S : Asm.symbols())
896 LabeledFragments.insert(S.getFragment());
897
898 bool Changed = false;
899 for (MCSection &Sec : Asm) {
900 if (!Sec.isText())
901 continue;
902
903 SmallVector<MCRelaxableFragment *, 4> Relaxable;
904 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
905 MCFragment &F = *I;
906
907 if (LabeledFragments.count(&F))
908 Relaxable.clear();
909
910 if (F.getKind() == MCFragment::FT_Data) // Skip and ignore
911 continue;
912
913 if (F.getKind() == MCFragment::FT_Relaxable) {
914 auto &RF = cast<MCRelaxableFragment>(*I);
915 Relaxable.push_back(&RF);
916 continue;
917 }
918
919 auto canHandle = [](MCFragment &F) -> bool {
920 switch (F.getKind()) {
921 default:
922 return false;
923 case MCFragment::FT_Align:
924 return X86PadForAlign;
925 case MCFragment::FT_BoundaryAlign:
926 return X86PadForBranchAlign;
927 }
928 };
929 // For any unhandled kind, assume we can't change layout.
930 if (!canHandle(F)) {
931 Relaxable.clear();
932 continue;
933 }
934
935 const uint64_t OrigSize = Asm.computeFragmentSize(F);
936
937 // To keep the effects local, prefer to relax instructions closest to
938 // the align directive. This is purely about human understandability
939 // of the resulting code. If we later find a reason to expand
940 // particular instructions over others, we can adjust.
941 unsigned RemainingSize = OrigSize;
942 while (!Relaxable.empty() && RemainingSize != 0) {
943 auto &RF = *Relaxable.pop_back_val();
944 // Give the backend a chance to play any tricks it wishes to increase
945 // the encoding size of the given instruction. Target independent code
946 // will try further relaxation, but target's may play further tricks.
947 Changed |= padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize);
948
949 // If we have an instruction which hasn't been fully relaxed, we can't
950 // skip past it and insert bytes before it. Changing its starting
951 // offset might require a larger negative offset than it can encode.
952 // We don't need to worry about larger positive offsets as none of the
953 // possible offsets between this and our align are visible, and the
954 // ones afterwards aren't changing.
955 if (mayNeedRelaxation(RF.getOpcode(), RF.getOperands(),
956 *RF.getSubtargetInfo()))
957 break;
958 }
959 Relaxable.clear();
960
961 // If we're looking at a boundary align, make sure we don't try to pad
962 // its target instructions for some following directive. Doing so would
963 // break the alignment of the current boundary align.
964 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
965 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
966 Changed = true;
967 const MCFragment *LastFragment = BF->getLastFragment();
968 if (!LastFragment)
969 continue;
970 while (&*I != LastFragment)
971 ++I;
972 }
973 }
974 }
975
976 return Changed;
977 }
978
getMaximumNopSize(const MCSubtargetInfo & STI) const979 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
980 if (STI.hasFeature(X86::Is16Bit))
981 return 4;
982 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
983 return 1;
984 if (STI.hasFeature(X86::TuningFast7ByteNOP))
985 return 7;
986 if (STI.hasFeature(X86::TuningFast15ByteNOP))
987 return 15;
988 if (STI.hasFeature(X86::TuningFast11ByteNOP))
989 return 11;
990 // FIXME: handle 32-bit mode
991 // 15-bytes is the longest single NOP instruction, but 10-bytes is
992 // commonly the longest that can be efficiently decoded.
993 return 10;
994 }
995
996 /// Write a sequence of optimal nops to the output, covering \p Count
997 /// bytes.
998 /// \return - true on success, false on failure
writeNopData(raw_ostream & OS,uint64_t Count,const MCSubtargetInfo * STI) const999 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
1000 const MCSubtargetInfo *STI) const {
1001 static const char Nops32Bit[10][11] = {
1002 // nop
1003 "\x90",
1004 // xchg %ax,%ax
1005 "\x66\x90",
1006 // nopl (%[re]ax)
1007 "\x0f\x1f\x00",
1008 // nopl 0(%[re]ax)
1009 "\x0f\x1f\x40\x00",
1010 // nopl 0(%[re]ax,%[re]ax,1)
1011 "\x0f\x1f\x44\x00\x00",
1012 // nopw 0(%[re]ax,%[re]ax,1)
1013 "\x66\x0f\x1f\x44\x00\x00",
1014 // nopl 0L(%[re]ax)
1015 "\x0f\x1f\x80\x00\x00\x00\x00",
1016 // nopl 0L(%[re]ax,%[re]ax,1)
1017 "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1018 // nopw 0L(%[re]ax,%[re]ax,1)
1019 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1020 // nopw %cs:0L(%[re]ax,%[re]ax,1)
1021 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1022 };
1023
1024 // 16-bit mode uses different nop patterns than 32-bit.
1025 static const char Nops16Bit[4][11] = {
1026 // nop
1027 "\x90",
1028 // xchg %eax,%eax
1029 "\x66\x90",
1030 // lea 0(%si),%si
1031 "\x8d\x74\x00",
1032 // lea 0w(%si),%si
1033 "\x8d\xb4\x00\x00",
1034 };
1035
1036 const char(*Nops)[11] =
1037 STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
1038
1039 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
1040
1041 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1042 // length.
1043 do {
1044 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1045 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1046 for (uint8_t i = 0; i < Prefixes; i++)
1047 OS << '\x66';
1048 const uint8_t Rest = ThisNopLength - Prefixes;
1049 if (Rest != 0)
1050 OS.write(Nops[Rest - 1], Rest);
1051 Count -= ThisNopLength;
1052 } while (Count != 0);
1053
1054 return true;
1055 }
1056
1057 /* *** */
1058
1059 namespace {
1060
1061 class ELFX86AsmBackend : public X86AsmBackend {
1062 public:
1063 uint8_t OSABI;
ELFX86AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1064 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1065 : X86AsmBackend(T, STI), OSABI(OSABI) {}
1066 };
1067
1068 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1069 public:
ELFX86_32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1070 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1071 const MCSubtargetInfo &STI)
1072 : ELFX86AsmBackend(T, OSABI, STI) {}
1073
1074 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1075 createObjectTargetWriter() const override {
1076 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1077 }
1078 };
1079
1080 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1081 public:
ELFX86_X32AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1082 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1083 const MCSubtargetInfo &STI)
1084 : ELFX86AsmBackend(T, OSABI, STI) {}
1085
1086 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1087 createObjectTargetWriter() const override {
1088 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1089 ELF::EM_X86_64);
1090 }
1091 };
1092
1093 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1094 public:
ELFX86_IAMCUAsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1095 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1096 const MCSubtargetInfo &STI)
1097 : ELFX86AsmBackend(T, OSABI, STI) {}
1098
1099 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1100 createObjectTargetWriter() const override {
1101 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1102 ELF::EM_IAMCU);
1103 }
1104 };
1105
1106 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1107 public:
ELFX86_64AsmBackend(const Target & T,uint8_t OSABI,const MCSubtargetInfo & STI)1108 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1109 const MCSubtargetInfo &STI)
1110 : ELFX86AsmBackend(T, OSABI, STI) {}
1111
1112 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1113 createObjectTargetWriter() const override {
1114 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1115 }
1116 };
1117
1118 class WindowsX86AsmBackend : public X86AsmBackend {
1119 bool Is64Bit;
1120
1121 public:
WindowsX86AsmBackend(const Target & T,bool is64Bit,const MCSubtargetInfo & STI)1122 WindowsX86AsmBackend(const Target &T, bool is64Bit,
1123 const MCSubtargetInfo &STI)
1124 : X86AsmBackend(T, STI)
1125 , Is64Bit(is64Bit) {
1126 }
1127
getFixupKind(StringRef Name) const1128 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1129 return StringSwitch<std::optional<MCFixupKind>>(Name)
1130 .Case("dir32", FK_Data_4)
1131 .Case("secrel32", FK_SecRel_4)
1132 .Case("secidx", FK_SecRel_2)
1133 .Default(MCAsmBackend::getFixupKind(Name));
1134 }
1135
1136 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1137 createObjectTargetWriter() const override {
1138 return createX86WinCOFFObjectWriter(Is64Bit);
1139 }
1140 };
1141
1142 namespace CU {
1143
1144 /// Compact unwind encoding values.
1145 enum CompactUnwindEncodings {
1146 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1147 /// the return address, then [RE]SP is moved to [RE]BP.
1148 UNWIND_MODE_BP_FRAME = 0x01000000,
1149
1150 /// A frameless function with a small constant stack size.
1151 UNWIND_MODE_STACK_IMMD = 0x02000000,
1152
1153 /// A frameless function with a large constant stack size.
1154 UNWIND_MODE_STACK_IND = 0x03000000,
1155
1156 /// No compact unwind encoding is available.
1157 UNWIND_MODE_DWARF = 0x04000000,
1158
1159 /// Mask for encoding the frame registers.
1160 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,
1161
1162 /// Mask for encoding the frameless registers.
1163 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1164 };
1165
1166 } // namespace CU
1167
1168 class DarwinX86AsmBackend : public X86AsmBackend {
1169 const MCRegisterInfo &MRI;
1170
1171 /// Number of registers that can be saved in a compact unwind encoding.
1172 enum { CU_NUM_SAVED_REGS = 6 };
1173
1174 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1175 Triple TT;
1176 bool Is64Bit;
1177
1178 unsigned OffsetSize; ///< Offset of a "push" instruction.
1179 unsigned MoveInstrSize; ///< Size of a "move" instruction.
1180 unsigned StackDivide; ///< Amount to adjust stack size by.
1181 protected:
1182 /// Size of a "push" instruction for the given register.
PushInstrSize(MCRegister Reg) const1183 unsigned PushInstrSize(MCRegister Reg) const {
1184 switch (Reg.id()) {
1185 case X86::EBX:
1186 case X86::ECX:
1187 case X86::EDX:
1188 case X86::EDI:
1189 case X86::ESI:
1190 case X86::EBP:
1191 case X86::RBX:
1192 case X86::RBP:
1193 return 1;
1194 case X86::R12:
1195 case X86::R13:
1196 case X86::R14:
1197 case X86::R15:
1198 return 2;
1199 }
1200 return 1;
1201 }
1202
1203 private:
1204 /// Get the compact unwind number for a given register. The number
1205 /// corresponds to the enum lists in compact_unwind_encoding.h.
getCompactUnwindRegNum(unsigned Reg) const1206 int getCompactUnwindRegNum(unsigned Reg) const {
1207 static const MCPhysReg CU32BitRegs[7] = {
1208 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1209 };
1210 static const MCPhysReg CU64BitRegs[] = {
1211 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1212 };
1213 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1214 for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1215 if (*CURegs == Reg)
1216 return Idx;
1217
1218 return -1;
1219 }
1220
1221 /// Return the registers encoded for a compact encoding with a frame
1222 /// pointer.
encodeCompactUnwindRegistersWithFrame() const1223 uint32_t encodeCompactUnwindRegistersWithFrame() const {
1224 // Encode the registers in the order they were saved --- 3-bits per
1225 // register. The list of saved registers is assumed to be in reverse
1226 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1227 uint32_t RegEnc = 0;
1228 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1229 unsigned Reg = SavedRegs[i];
1230 if (Reg == 0) break;
1231
1232 int CURegNum = getCompactUnwindRegNum(Reg);
1233 if (CURegNum == -1) return ~0U;
1234
1235 // Encode the 3-bit register number in order, skipping over 3-bits for
1236 // each register.
1237 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1238 }
1239
1240 assert((RegEnc & 0x3FFFF) == RegEnc &&
1241 "Invalid compact register encoding!");
1242 return RegEnc;
1243 }
1244
1245 /// Create the permutation encoding used with frameless stacks. It is
1246 /// passed the number of registers to be saved and an array of the registers
1247 /// saved.
encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const1248 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1249 // The saved registers are numbered from 1 to 6. In order to encode the
1250 // order in which they were saved, we re-number them according to their
1251 // place in the register order. The re-numbering is relative to the last
1252 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1253 // that order:
1254 //
1255 // Orig Re-Num
1256 // ---- ------
1257 // 6 6
1258 // 2 2
1259 // 4 3
1260 // 5 3
1261 //
1262 for (unsigned i = 0; i < RegCount; ++i) {
1263 int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1264 if (CUReg == -1) return ~0U;
1265 SavedRegs[i] = CUReg;
1266 }
1267
1268 // Reverse the list.
1269 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1270
1271 uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1272 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1273 unsigned Countless = 0;
1274 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1275 if (SavedRegs[j] < SavedRegs[i])
1276 ++Countless;
1277
1278 RenumRegs[i] = SavedRegs[i] - Countless - 1;
1279 }
1280
1281 // Take the renumbered values and encode them into a 10-bit number.
1282 uint32_t permutationEncoding = 0;
1283 switch (RegCount) {
1284 case 6:
1285 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1286 + 6 * RenumRegs[2] + 2 * RenumRegs[3]
1287 + RenumRegs[4];
1288 break;
1289 case 5:
1290 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1291 + 6 * RenumRegs[3] + 2 * RenumRegs[4]
1292 + RenumRegs[5];
1293 break;
1294 case 4:
1295 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]
1296 + 3 * RenumRegs[4] + RenumRegs[5];
1297 break;
1298 case 3:
1299 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]
1300 + RenumRegs[5];
1301 break;
1302 case 2:
1303 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];
1304 break;
1305 case 1:
1306 permutationEncoding |= RenumRegs[5];
1307 break;
1308 }
1309
1310 assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1311 "Invalid compact register encoding!");
1312 return permutationEncoding;
1313 }
1314
1315 public:
DarwinX86AsmBackend(const Target & T,const MCRegisterInfo & MRI,const MCSubtargetInfo & STI)1316 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1317 const MCSubtargetInfo &STI)
1318 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1319 Is64Bit(TT.isArch64Bit()) {
1320 memset(SavedRegs, 0, sizeof(SavedRegs));
1321 OffsetSize = Is64Bit ? 8 : 4;
1322 MoveInstrSize = Is64Bit ? 3 : 2;
1323 StackDivide = Is64Bit ? 8 : 4;
1324 }
1325
1326 std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const1327 createObjectTargetWriter() const override {
1328 uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1329 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1330 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1331 }
1332
1333 /// Implementation of algorithm to generate the compact unwind encoding
1334 /// for the CFI instructions.
generateCompactUnwindEncoding(const MCDwarfFrameInfo * FI,const MCContext * Ctxt) const1335 uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
1336 const MCContext *Ctxt) const override {
1337 ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
1338 if (Instrs.empty()) return 0;
1339 if (!isDarwinCanonicalPersonality(FI->Personality) &&
1340 !Ctxt->emitCompactUnwindNonCanonical())
1341 return CU::UNWIND_MODE_DWARF;
1342
1343 // Reset the saved registers.
1344 unsigned SavedRegIdx = 0;
1345 memset(SavedRegs, 0, sizeof(SavedRegs));
1346
1347 bool HasFP = false;
1348
1349 // Encode that we are using EBP/RBP as the frame pointer.
1350 uint64_t CompactUnwindEncoding = 0;
1351
1352 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1353 unsigned InstrOffset = 0;
1354 unsigned StackAdjust = 0;
1355 uint64_t StackSize = 0;
1356 int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();
1357
1358 for (const MCCFIInstruction &Inst : Instrs) {
1359 switch (Inst.getOperation()) {
1360 default:
1361 // Any other CFI directives indicate a frame that we aren't prepared
1362 // to represent via compact unwind, so just bail out.
1363 return CU::UNWIND_MODE_DWARF;
1364 case MCCFIInstruction::OpDefCfaRegister: {
1365 // Defines a frame pointer. E.g.
1366 //
1367 // movq %rsp, %rbp
1368 // L0:
1369 // .cfi_def_cfa_register %rbp
1370 //
1371 HasFP = true;
1372
1373 // If the frame pointer is other than esp/rsp, we do not have a way to
1374 // generate a compact unwinding representation, so bail out.
1375 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1376 (Is64Bit ? X86::RBP : X86::EBP))
1377 return CU::UNWIND_MODE_DWARF;
1378
1379 // Reset the counts.
1380 memset(SavedRegs, 0, sizeof(SavedRegs));
1381 StackAdjust = 0;
1382 SavedRegIdx = 0;
1383 MinAbsOffset = std::numeric_limits<int64_t>::max();
1384 InstrOffset += MoveInstrSize;
1385 break;
1386 }
1387 case MCCFIInstruction::OpDefCfaOffset: {
1388 // Defines a new offset for the CFA. E.g.
1389 //
1390 // With frame:
1391 //
1392 // pushq %rbp
1393 // L0:
1394 // .cfi_def_cfa_offset 16
1395 //
1396 // Without frame:
1397 //
1398 // subq $72, %rsp
1399 // L0:
1400 // .cfi_def_cfa_offset 80
1401 //
1402 StackSize = Inst.getOffset() / StackDivide;
1403 break;
1404 }
1405 case MCCFIInstruction::OpOffset: {
1406 // Defines a "push" of a callee-saved register. E.g.
1407 //
1408 // pushq %r15
1409 // pushq %r14
1410 // pushq %rbx
1411 // L0:
1412 // subq $120, %rsp
1413 // L1:
1414 // .cfi_offset %rbx, -40
1415 // .cfi_offset %r14, -32
1416 // .cfi_offset %r15, -24
1417 //
1418 if (SavedRegIdx == CU_NUM_SAVED_REGS)
1419 // If there are too many saved registers, we cannot use a compact
1420 // unwind encoding.
1421 return CU::UNWIND_MODE_DWARF;
1422
1423 MCRegister Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1424 SavedRegs[SavedRegIdx++] = Reg;
1425 StackAdjust += OffsetSize;
1426 MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset()));
1427 InstrOffset += PushInstrSize(Reg);
1428 break;
1429 }
1430 }
1431 }
1432
1433 StackAdjust /= StackDivide;
1434
1435 if (HasFP) {
1436 if ((StackAdjust & 0xFF) != StackAdjust)
1437 // Offset was too big for a compact unwind encoding.
1438 return CU::UNWIND_MODE_DWARF;
1439
1440 // We don't attempt to track a real StackAdjust, so if the saved registers
1441 // aren't adjacent to rbp we can't cope.
1442 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1443 return CU::UNWIND_MODE_DWARF;
1444
1445 // Get the encoding of the saved registers when we have a frame pointer.
1446 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1447 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1448
1449 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1450 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1451 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1452 } else {
1453 SubtractInstrIdx += InstrOffset;
1454 ++StackAdjust;
1455
1456 if ((StackSize & 0xFF) == StackSize) {
1457 // Frameless stack with a small stack size.
1458 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1459
1460 // Encode the stack size.
1461 CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1462 } else {
1463 if ((StackAdjust & 0x7) != StackAdjust)
1464 // The extra stack adjustments are too big for us to handle.
1465 return CU::UNWIND_MODE_DWARF;
1466
1467 // Frameless stack with an offset too large for us to encode compactly.
1468 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1469
1470 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1471 // instruction.
1472 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1473
1474 // Encode any extra stack adjustments (done via push instructions).
1475 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1476 }
1477
1478 // Encode the number of registers saved. (Reverse the list first.)
1479 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1480 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1481
1482 // Get the encoding of the saved registers when we don't have a frame
1483 // pointer.
1484 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1485 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1486
1487 // Encode the register encoding.
1488 CompactUnwindEncoding |=
1489 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1490 }
1491
1492 return CompactUnwindEncoding;
1493 }
1494 };
1495
1496 } // end anonymous namespace
1497
createX86_32AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1498 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1499 const MCSubtargetInfo &STI,
1500 const MCRegisterInfo &MRI,
1501 const MCTargetOptions &Options) {
1502 const Triple &TheTriple = STI.getTargetTriple();
1503 if (TheTriple.isOSBinFormatMachO())
1504 return new DarwinX86AsmBackend(T, MRI, STI);
1505
1506 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1507 return new WindowsX86AsmBackend(T, false, STI);
1508
1509 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1510
1511 if (TheTriple.isOSIAMCU())
1512 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1513
1514 return new ELFX86_32AsmBackend(T, OSABI, STI);
1515 }
1516
createX86_64AsmBackend(const Target & T,const MCSubtargetInfo & STI,const MCRegisterInfo & MRI,const MCTargetOptions & Options)1517 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1518 const MCSubtargetInfo &STI,
1519 const MCRegisterInfo &MRI,
1520 const MCTargetOptions &Options) {
1521 const Triple &TheTriple = STI.getTargetTriple();
1522 if (TheTriple.isOSBinFormatMachO())
1523 return new DarwinX86AsmBackend(T, MRI, STI);
1524
1525 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1526 return new WindowsX86AsmBackend(T, true, STI);
1527
1528 if (TheTriple.isUEFI()) {
1529 assert(TheTriple.isOSBinFormatCOFF() &&
1530 "Only COFF format is supported in UEFI environment.");
1531 return new WindowsX86AsmBackend(T, true, STI);
1532 }
1533
1534 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1535
1536 if (TheTriple.isX32())
1537 return new ELFX86_X32AsmBackend(T, OSABI, STI);
1538 return new ELFX86_64AsmBackend(T, OSABI, STI);
1539 }
1540
1541 namespace {
1542 class X86ELFStreamer : public MCELFStreamer {
1543 public:
X86ELFStreamer(MCContext & Context,std::unique_ptr<MCAsmBackend> TAB,std::unique_ptr<MCObjectWriter> OW,std::unique_ptr<MCCodeEmitter> Emitter)1544 X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
1545 std::unique_ptr<MCObjectWriter> OW,
1546 std::unique_ptr<MCCodeEmitter> Emitter)
1547 : MCELFStreamer(Context, std::move(TAB), std::move(OW),
1548 std::move(Emitter)) {}
1549
1550 void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
1551 };
1552 } // end anonymous namespace
1553
emitInstruction(MCObjectStreamer & S,const MCInst & Inst,const MCSubtargetInfo & STI)1554 void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst,
1555 const MCSubtargetInfo &STI) {
1556 auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend());
1557 Backend.emitInstructionBegin(S, Inst, STI);
1558 S.MCObjectStreamer::emitInstruction(Inst, STI);
1559 Backend.emitInstructionEnd(S, Inst);
1560 }
1561
emitInstruction(const MCInst & Inst,const MCSubtargetInfo & STI)1562 void X86ELFStreamer::emitInstruction(const MCInst &Inst,
1563 const MCSubtargetInfo &STI) {
1564 X86_MC::emitInstruction(*this, Inst, STI);
1565 }
1566
createX86ELFStreamer(const Triple & T,MCContext & Context,std::unique_ptr<MCAsmBackend> && MAB,std::unique_ptr<MCObjectWriter> && MOW,std::unique_ptr<MCCodeEmitter> && MCE)1567 MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context,
1568 std::unique_ptr<MCAsmBackend> &&MAB,
1569 std::unique_ptr<MCObjectWriter> &&MOW,
1570 std::unique_ptr<MCCodeEmitter> &&MCE) {
1571 return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW),
1572 std::move(MCE));
1573 }
1574