1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86EncodingOptimization.h" 11 #include "MCTargetDesc/X86FixupKinds.h" 12 #include "llvm/ADT/StringSwitch.h" 13 #include "llvm/BinaryFormat/ELF.h" 14 #include "llvm/BinaryFormat/MachO.h" 15 #include "llvm/MC/MCAsmBackend.h" 16 #include "llvm/MC/MCAssembler.h" 17 #include "llvm/MC/MCCodeEmitter.h" 18 #include "llvm/MC/MCContext.h" 19 #include "llvm/MC/MCDwarf.h" 20 #include "llvm/MC/MCELFObjectWriter.h" 21 #include "llvm/MC/MCELFStreamer.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCFixupKindInfo.h" 24 #include "llvm/MC/MCInst.h" 25 #include "llvm/MC/MCInstrInfo.h" 26 #include "llvm/MC/MCMachObjectWriter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSectionMachO.h" 31 #include "llvm/MC/MCSubtargetInfo.h" 32 #include "llvm/MC/MCValue.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 using namespace llvm; 39 40 namespace { 41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind 42 class X86AlignBranchKind { 43 private: 44 uint8_t AlignBranchKind = 0; 45 46 public: 47 void operator=(const std::string &Val) { 48 if (Val.empty()) 49 return; 50 SmallVector<StringRef, 6> BranchTypes; 51 StringRef(Val).split(BranchTypes, '+', -1, false); 52 for (auto BranchType : BranchTypes) { 53 if (BranchType == "fused") 54 addKind(X86::AlignBranchFused); 55 else if (BranchType == "jcc") 56 addKind(X86::AlignBranchJcc); 57 else if (BranchType == "jmp") 58 addKind(X86::AlignBranchJmp); 59 else if (BranchType == "call") 60 addKind(X86::AlignBranchCall); 61 else if (BranchType == "ret") 62 addKind(X86::AlignBranchRet); 63 else if (BranchType == "indirect") 64 addKind(X86::AlignBranchIndirect); 65 else { 66 errs() << "invalid argument " << BranchType.str() 67 << " to -x86-align-branch=; each element must be one of: fused, " 68 "jcc, jmp, call, ret, indirect.(plus separated)\n"; 69 } 70 } 71 } 72 73 operator uint8_t() const { return AlignBranchKind; } 74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; } 75 }; 76 77 X86AlignBranchKind X86AlignBranchKindLoc; 78 79 cl::opt<unsigned> X86AlignBranchBoundary( 80 "x86-align-branch-boundary", cl::init(0), 81 cl::desc( 82 "Control how the assembler should align branches with NOP. If the " 83 "boundary's size is not 0, it should be a power of 2 and no less " 84 "than 32. Branches will be aligned to prevent from being across or " 85 "against the boundary of specified size. The default value 0 does not " 86 "align branches.")); 87 88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( 89 "x86-align-branch", 90 cl::desc( 91 "Specify types of branches to align (plus separated list of types):" 92 "\njcc indicates conditional jumps" 93 "\nfused indicates fused conditional jumps" 94 "\njmp indicates direct unconditional jumps" 95 "\ncall indicates direct and indirect calls" 96 "\nret indicates rets" 97 "\nindirect indicates indirect unconditional jumps"), 98 cl::location(X86AlignBranchKindLoc)); 99 100 cl::opt<bool> X86AlignBranchWithin32BBoundaries( 101 "x86-branches-within-32B-boundaries", cl::init(false), 102 cl::desc( 103 "Align selected instructions to mitigate negative performance impact " 104 "of Intel's micro code update for errata skx102. May break " 105 "assumptions about labels corresponding to particular instructions, " 106 "and should be used with caution.")); 107 108 cl::opt<unsigned> X86PadMaxPrefixSize( 109 "x86-pad-max-prefix-size", cl::init(0), 110 cl::desc("Maximum number of prefixes to use for padding")); 111 112 cl::opt<bool> X86PadForAlign( 113 "x86-pad-for-align", cl::init(false), cl::Hidden, 114 cl::desc("Pad previous instructions to implement align directives")); 115 116 cl::opt<bool> X86PadForBranchAlign( 117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden, 118 cl::desc("Pad previous instructions to implement branch alignment")); 119 120 class X86AsmBackend : public MCAsmBackend { 121 const MCSubtargetInfo &STI; 122 std::unique_ptr<const MCInstrInfo> MCII; 123 X86AlignBranchKind AlignBranchType; 124 Align AlignBoundary; 125 unsigned TargetPrefixMax = 0; 126 127 MCInst PrevInst; 128 unsigned PrevInstOpcode = 0; 129 MCBoundaryAlignFragment *PendingBA = nullptr; 130 std::pair<MCFragment *, size_t> PrevInstPosition; 131 bool IsRightAfterData = false; 132 133 uint8_t determinePaddingPrefix(const MCInst &Inst) const; 134 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; 135 bool needAlign(const MCInst &Inst) const; 136 bool canPadBranches(MCObjectStreamer &OS) const; 137 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; 138 139 public: 140 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) 141 : MCAsmBackend(llvm::endianness::little), STI(STI), 142 MCII(T.createMCInstrInfo()) { 143 if (X86AlignBranchWithin32BBoundaries) { 144 // At the moment, this defaults to aligning fused branches, unconditional 145 // jumps, and (unfused) conditional jumps with nops. Both the 146 // instructions aligned and the alignment method (nop vs prefix) may 147 // change in the future. 148 AlignBoundary = assumeAligned(32); 149 AlignBranchType.addKind(X86::AlignBranchFused); 150 AlignBranchType.addKind(X86::AlignBranchJcc); 151 AlignBranchType.addKind(X86::AlignBranchJmp); 152 } 153 // Allow overriding defaults set by main flag 154 if (X86AlignBranchBoundary.getNumOccurrences()) 155 AlignBoundary = assumeAligned(X86AlignBranchBoundary); 156 if (X86AlignBranch.getNumOccurrences()) 157 AlignBranchType = X86AlignBranchKindLoc; 158 if (X86PadMaxPrefixSize.getNumOccurrences()) 159 TargetPrefixMax = X86PadMaxPrefixSize; 160 } 161 162 bool allowAutoPadding() const override; 163 bool allowEnhancedRelaxation() const override; 164 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst, 165 const MCSubtargetInfo &STI); 166 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst); 167 168 unsigned getNumFixupKinds() const override { 169 return X86::NumTargetFixupKinds; 170 } 171 172 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override; 173 174 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; 175 176 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, 177 const MCValue &Target, 178 const MCSubtargetInfo *STI) override; 179 180 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 181 const MCValue &Target, MutableArrayRef<char> Data, 182 uint64_t Value, bool IsResolved, 183 const MCSubtargetInfo *STI) const override; 184 185 bool mayNeedRelaxation(const MCInst &Inst, 186 const MCSubtargetInfo &STI) const override; 187 188 bool fixupNeedsRelaxation(const MCFixup &Fixup, 189 uint64_t Value) const override; 190 191 void relaxInstruction(MCInst &Inst, 192 const MCSubtargetInfo &STI) const override; 193 194 bool padInstructionViaRelaxation(MCRelaxableFragment &RF, 195 MCCodeEmitter &Emitter, 196 unsigned &RemainingSize) const; 197 198 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 199 unsigned &RemainingSize) const; 200 201 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 202 unsigned &RemainingSize) const; 203 204 void finishLayout(const MCAssembler &Asm) const override; 205 206 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override; 207 208 bool writeNopData(raw_ostream &OS, uint64_t Count, 209 const MCSubtargetInfo *STI) const override; 210 }; 211 } // end anonymous namespace 212 213 static bool isRelaxableBranch(unsigned Opcode) { 214 return Opcode == X86::JCC_1 || Opcode == X86::JMP_1; 215 } 216 217 static unsigned getRelaxedOpcodeBranch(unsigned Opcode, 218 bool Is16BitMode = false) { 219 switch (Opcode) { 220 default: 221 llvm_unreachable("invalid opcode for branch"); 222 case X86::JCC_1: 223 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; 224 case X86::JMP_1: 225 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; 226 } 227 } 228 229 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) { 230 unsigned Opcode = MI.getOpcode(); 231 return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode) 232 : X86::getOpcodeForLongImmediateForm(Opcode); 233 } 234 235 static X86::CondCode getCondFromBranch(const MCInst &MI, 236 const MCInstrInfo &MCII) { 237 unsigned Opcode = MI.getOpcode(); 238 switch (Opcode) { 239 default: 240 return X86::COND_INVALID; 241 case X86::JCC_1: { 242 const MCInstrDesc &Desc = MCII.get(Opcode); 243 return static_cast<X86::CondCode>( 244 MI.getOperand(Desc.getNumOperands() - 1).getImm()); 245 } 246 } 247 } 248 249 static X86::SecondMacroFusionInstKind 250 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { 251 X86::CondCode CC = getCondFromBranch(MI, MCII); 252 return classifySecondCondCodeInMacroFusion(CC); 253 } 254 255 /// Check if the instruction uses RIP relative addressing. 256 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { 257 unsigned Opcode = MI.getOpcode(); 258 const MCInstrDesc &Desc = MCII.get(Opcode); 259 uint64_t TSFlags = Desc.TSFlags; 260 unsigned CurOp = X86II::getOperandBias(Desc); 261 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 262 if (MemoryOperand < 0) 263 return false; 264 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; 265 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); 266 return (BaseReg == X86::RIP); 267 } 268 269 /// Check if the instruction is a prefix. 270 static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) { 271 return X86II::isPrefix(MCII.get(Opcode).TSFlags); 272 } 273 274 /// Check if the instruction is valid as the first instruction in macro fusion. 275 static bool isFirstMacroFusibleInst(const MCInst &Inst, 276 const MCInstrInfo &MCII) { 277 // An Intel instruction with RIP relative addressing is not macro fusible. 278 if (isRIPRelative(Inst, MCII)) 279 return false; 280 X86::FirstMacroFusionInstKind FIK = 281 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); 282 return FIK != X86::FirstMacroFusionInstKind::Invalid; 283 } 284 285 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to 286 /// get a better peformance in some cases. Here, we determine which prefix is 287 /// the most suitable. 288 /// 289 /// If the instruction has a segment override prefix, use the existing one. 290 /// If the target is 64-bit, use the CS. 291 /// If the target is 32-bit, 292 /// - If the instruction has a ESP/EBP base register, use SS. 293 /// - Otherwise use DS. 294 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { 295 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) && 296 "Prefixes can be added only in 32-bit or 64-bit mode."); 297 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 298 uint64_t TSFlags = Desc.TSFlags; 299 300 // Determine where the memory operand starts, if present. 301 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 302 if (MemoryOperand != -1) 303 MemoryOperand += X86II::getOperandBias(Desc); 304 305 unsigned SegmentReg = 0; 306 if (MemoryOperand >= 0) { 307 // Check for explicit segment override on memory operand. 308 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); 309 } 310 311 switch (TSFlags & X86II::FormMask) { 312 default: 313 break; 314 case X86II::RawFrmDstSrc: { 315 // Check segment override opcode prefix as needed (not for %ds). 316 if (Inst.getOperand(2).getReg() != X86::DS) 317 SegmentReg = Inst.getOperand(2).getReg(); 318 break; 319 } 320 case X86II::RawFrmSrc: { 321 // Check segment override opcode prefix as needed (not for %ds). 322 if (Inst.getOperand(1).getReg() != X86::DS) 323 SegmentReg = Inst.getOperand(1).getReg(); 324 break; 325 } 326 case X86II::RawFrmMemOffs: { 327 // Check segment override opcode prefix as needed. 328 SegmentReg = Inst.getOperand(1).getReg(); 329 break; 330 } 331 } 332 333 if (SegmentReg != 0) 334 return X86::getSegmentOverridePrefixForReg(SegmentReg); 335 336 if (STI.hasFeature(X86::Is64Bit)) 337 return X86::CS_Encoding; 338 339 if (MemoryOperand >= 0) { 340 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; 341 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); 342 if (BaseReg == X86::ESP || BaseReg == X86::EBP) 343 return X86::SS_Encoding; 344 } 345 return X86::DS_Encoding; 346 } 347 348 /// Check if the two instructions will be macro-fused on the target cpu. 349 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { 350 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); 351 if (!InstDesc.isConditionalBranch()) 352 return false; 353 if (!isFirstMacroFusibleInst(Cmp, *MCII)) 354 return false; 355 const X86::FirstMacroFusionInstKind CmpKind = 356 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); 357 const X86::SecondMacroFusionInstKind BranchKind = 358 classifySecondInstInMacroFusion(Jcc, *MCII); 359 return X86::isMacroFused(CmpKind, BranchKind); 360 } 361 362 /// Check if the instruction has a variant symbol operand. 363 static bool hasVariantSymbol(const MCInst &MI) { 364 for (auto &Operand : MI) { 365 if (!Operand.isExpr()) 366 continue; 367 const MCExpr &Expr = *Operand.getExpr(); 368 if (Expr.getKind() == MCExpr::SymbolRef && 369 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None) 370 return true; 371 } 372 return false; 373 } 374 375 bool X86AsmBackend::allowAutoPadding() const { 376 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); 377 } 378 379 bool X86AsmBackend::allowEnhancedRelaxation() const { 380 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; 381 } 382 383 /// X86 has certain instructions which enable interrupts exactly one 384 /// instruction *after* the instruction which stores to SS. Return true if the 385 /// given instruction may have such an interrupt delay slot. 386 static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) { 387 switch (InstOpcode) { 388 case X86::POPSS16: 389 case X86::POPSS32: 390 case X86::STI: 391 return true; 392 393 case X86::MOV16sr: 394 case X86::MOV32sr: 395 case X86::MOV64sr: 396 case X86::MOV16sm: 397 // In fact, this is only the case if the first operand is SS. However, as 398 // segment moves occur extremely rarely, this is just a minor pessimization. 399 return true; 400 } 401 return false; 402 } 403 404 /// Check if the instruction to be emitted is right after any data. 405 static bool 406 isRightAfterData(MCFragment *CurrentFragment, 407 const std::pair<MCFragment *, size_t> &PrevInstPosition) { 408 MCFragment *F = CurrentFragment; 409 // Since data is always emitted into a DataFragment, our check strategy is 410 // simple here. 411 // - If the fragment is a DataFragment 412 // - If it's empty (section start or data after align), return false. 413 // - If it's not the fragment where the previous instruction is, 414 // returns true. 415 // - If it's the fragment holding the previous instruction but its 416 // size changed since the previous instruction was emitted into 417 // it, returns true. 418 // - Otherwise returns false. 419 // - If the fragment is not a DataFragment, returns false. 420 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) 421 return DF->getContents().size() && 422 (DF != PrevInstPosition.first || 423 DF->getContents().size() != PrevInstPosition.second); 424 425 return false; 426 } 427 428 /// \returns the fragment size if it has instructions, otherwise returns 0. 429 static size_t getSizeForInstFragment(const MCFragment *F) { 430 if (!F || !F->hasInstructions()) 431 return 0; 432 // MCEncodedFragmentWithContents being templated makes this tricky. 433 switch (F->getKind()) { 434 default: 435 llvm_unreachable("Unknown fragment with instructions!"); 436 case MCFragment::FT_Data: 437 return cast<MCDataFragment>(*F).getContents().size(); 438 case MCFragment::FT_Relaxable: 439 return cast<MCRelaxableFragment>(*F).getContents().size(); 440 case MCFragment::FT_CompactEncodedInst: 441 return cast<MCCompactEncodedInstFragment>(*F).getContents().size(); 442 } 443 } 444 445 /// Return true if we can insert NOP or prefixes automatically before the 446 /// the instruction to be emitted. 447 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { 448 if (hasVariantSymbol(Inst)) 449 // Linker may rewrite the instruction with variant symbol operand(e.g. 450 // TLSCALL). 451 return false; 452 453 if (mayHaveInterruptDelaySlot(PrevInstOpcode)) 454 // If this instruction follows an interrupt enabling instruction with a one 455 // instruction delay, inserting a nop would change behavior. 456 return false; 457 458 if (isPrefix(PrevInstOpcode, *MCII)) 459 // If this instruction follows a prefix, inserting a nop/prefix would change 460 // semantic. 461 return false; 462 463 if (isPrefix(Inst.getOpcode(), *MCII)) 464 // If this instruction is a prefix, inserting a prefix would change 465 // semantic. 466 return false; 467 468 if (IsRightAfterData) 469 // If this instruction follows any data, there is no clear 470 // instruction boundary, inserting a nop/prefix would change semantic. 471 return false; 472 473 return true; 474 } 475 476 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { 477 if (!OS.getAllowAutoPadding()) 478 return false; 479 assert(allowAutoPadding() && "incorrect initialization!"); 480 481 // We only pad in text section. 482 if (!OS.getCurrentSectionOnly()->isText()) 483 return false; 484 485 // To be Done: Currently don't deal with Bundle cases. 486 if (OS.getAssembler().isBundlingEnabled()) 487 return false; 488 489 // Branches only need to be aligned in 32-bit or 64-bit mode. 490 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit))) 491 return false; 492 493 return true; 494 } 495 496 /// Check if the instruction operand needs to be aligned. 497 bool X86AsmBackend::needAlign(const MCInst &Inst) const { 498 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 499 return (Desc.isConditionalBranch() && 500 (AlignBranchType & X86::AlignBranchJcc)) || 501 (Desc.isUnconditionalBranch() && 502 (AlignBranchType & X86::AlignBranchJmp)) || 503 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || 504 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || 505 (Desc.isIndirectBranch() && 506 (AlignBranchType & X86::AlignBranchIndirect)); 507 } 508 509 /// Insert BoundaryAlignFragment before instructions to align branches. 510 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, 511 const MCInst &Inst, const MCSubtargetInfo &STI) { 512 // Used by canPadInst. Done here, because in emitInstructionEnd, the current 513 // fragment will have changed. 514 IsRightAfterData = 515 isRightAfterData(OS.getCurrentFragment(), PrevInstPosition); 516 517 if (!canPadBranches(OS)) 518 return; 519 520 // NB: PrevInst only valid if canPadBranches is true. 521 if (!isMacroFused(PrevInst, Inst)) 522 // Macro fusion doesn't happen indeed, clear the pending. 523 PendingBA = nullptr; 524 525 // When branch padding is enabled (basically the skx102 erratum => unlikely), 526 // we call canPadInst (not cheap) twice. However, in the common case, we can 527 // avoid unnecessary calls to that, as this is otherwise only used for 528 // relaxable fragments. 529 if (!canPadInst(Inst, OS)) 530 return; 531 532 if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) { 533 // Macro fusion actually happens and there is no other fragment inserted 534 // after the previous instruction. 535 // 536 // Do nothing here since we already inserted a BoudaryAlign fragment when 537 // we met the first instruction in the fused pair and we'll tie them 538 // together in emitInstructionEnd. 539 // 540 // Note: When there is at least one fragment, such as MCAlignFragment, 541 // inserted after the previous instruction, e.g. 542 // 543 // \code 544 // cmp %rax %rcx 545 // .align 16 546 // je .Label0 547 // \ endcode 548 // 549 // We will treat the JCC as a unfused branch although it may be fused 550 // with the CMP. 551 return; 552 } 553 554 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && 555 isFirstMacroFusibleInst(Inst, *MCII))) { 556 // If we meet a unfused branch or the first instuction in a fusiable pair, 557 // insert a BoundaryAlign fragment. 558 PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>( 559 AlignBoundary, STI); 560 OS.insert(PendingBA); 561 } 562 } 563 564 /// Set the last fragment to be aligned for the BoundaryAlignFragment. 565 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, 566 const MCInst &Inst) { 567 MCFragment *CF = OS.getCurrentFragment(); 568 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) 569 F->setAllowAutoPadding(canPadInst(Inst, OS)); 570 571 // Update PrevInstOpcode here, canPadInst() reads that. 572 PrevInstOpcode = Inst.getOpcode(); 573 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); 574 575 if (!canPadBranches(OS)) 576 return; 577 578 // PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap. 579 PrevInst = Inst; 580 581 if (!needAlign(Inst) || !PendingBA) 582 return; 583 584 // Tie the aligned instructions into a pending BoundaryAlign. 585 PendingBA->setLastFragment(CF); 586 PendingBA = nullptr; 587 588 // We need to ensure that further data isn't added to the current 589 // DataFragment, so that we can get the size of instructions later in 590 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty 591 // DataFragment. 592 if (isa_and_nonnull<MCDataFragment>(CF)) 593 OS.insert(OS.getContext().allocFragment<MCDataFragment>()); 594 595 // Update the maximum alignment on the current section if necessary. 596 MCSection *Sec = OS.getCurrentSectionOnly(); 597 Sec->ensureMinAlignment(AlignBoundary); 598 } 599 600 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { 601 if (STI.getTargetTriple().isOSBinFormatELF()) { 602 unsigned Type; 603 if (STI.getTargetTriple().getArch() == Triple::x86_64) { 604 Type = llvm::StringSwitch<unsigned>(Name) 605 #define ELF_RELOC(X, Y) .Case(#X, Y) 606 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def" 607 #undef ELF_RELOC 608 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE) 609 .Case("BFD_RELOC_8", ELF::R_X86_64_8) 610 .Case("BFD_RELOC_16", ELF::R_X86_64_16) 611 .Case("BFD_RELOC_32", ELF::R_X86_64_32) 612 .Case("BFD_RELOC_64", ELF::R_X86_64_64) 613 .Default(-1u); 614 } else { 615 Type = llvm::StringSwitch<unsigned>(Name) 616 #define ELF_RELOC(X, Y) .Case(#X, Y) 617 #include "llvm/BinaryFormat/ELFRelocs/i386.def" 618 #undef ELF_RELOC 619 .Case("BFD_RELOC_NONE", ELF::R_386_NONE) 620 .Case("BFD_RELOC_8", ELF::R_386_8) 621 .Case("BFD_RELOC_16", ELF::R_386_16) 622 .Case("BFD_RELOC_32", ELF::R_386_32) 623 .Default(-1u); 624 } 625 if (Type == -1u) 626 return std::nullopt; 627 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); 628 } 629 return MCAsmBackend::getFixupKind(Name); 630 } 631 632 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { 633 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { 634 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 635 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 636 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 637 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 638 {"reloc_signed_4byte", 0, 32, 0}, 639 {"reloc_signed_4byte_relax", 0, 32, 0}, 640 {"reloc_global_offset_table", 0, 32, 0}, 641 {"reloc_global_offset_table8", 0, 64, 0}, 642 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 643 }; 644 645 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They 646 // do not require any extra processing. 647 if (Kind >= FirstLiteralRelocationKind) 648 return MCAsmBackend::getFixupKindInfo(FK_NONE); 649 650 if (Kind < FirstTargetFixupKind) 651 return MCAsmBackend::getFixupKindInfo(Kind); 652 653 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && 654 "Invalid kind!"); 655 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!"); 656 return Infos[Kind - FirstTargetFixupKind]; 657 } 658 659 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, 660 const MCFixup &Fixup, const MCValue &, 661 const MCSubtargetInfo *STI) { 662 return Fixup.getKind() >= FirstLiteralRelocationKind; 663 } 664 665 static unsigned getFixupKindSize(unsigned Kind) { 666 switch (Kind) { 667 default: 668 llvm_unreachable("invalid fixup kind!"); 669 case FK_NONE: 670 return 0; 671 case FK_PCRel_1: 672 case FK_SecRel_1: 673 case FK_Data_1: 674 return 1; 675 case FK_PCRel_2: 676 case FK_SecRel_2: 677 case FK_Data_2: 678 return 2; 679 case FK_PCRel_4: 680 case X86::reloc_riprel_4byte: 681 case X86::reloc_riprel_4byte_relax: 682 case X86::reloc_riprel_4byte_relax_rex: 683 case X86::reloc_riprel_4byte_movq_load: 684 case X86::reloc_signed_4byte: 685 case X86::reloc_signed_4byte_relax: 686 case X86::reloc_global_offset_table: 687 case X86::reloc_branch_4byte_pcrel: 688 case FK_SecRel_4: 689 case FK_Data_4: 690 return 4; 691 case FK_PCRel_8: 692 case FK_SecRel_8: 693 case FK_Data_8: 694 case X86::reloc_global_offset_table8: 695 return 8; 696 } 697 } 698 699 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 700 const MCValue &Target, 701 MutableArrayRef<char> Data, 702 uint64_t Value, bool IsResolved, 703 const MCSubtargetInfo *STI) const { 704 unsigned Kind = Fixup.getKind(); 705 if (Kind >= FirstLiteralRelocationKind) 706 return; 707 unsigned Size = getFixupKindSize(Kind); 708 709 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); 710 711 int64_t SignedValue = static_cast<int64_t>(Value); 712 if ((Target.isAbsolute() || IsResolved) && 713 getFixupKindInfo(Fixup.getKind()).Flags & 714 MCFixupKindInfo::FKF_IsPCRel) { 715 // check that PC relative fixup fits into the fixup size. 716 if (Size > 0 && !isIntN(Size * 8, SignedValue)) 717 Asm.getContext().reportError( 718 Fixup.getLoc(), "value of " + Twine(SignedValue) + 719 " is too large for field of " + Twine(Size) + 720 ((Size == 1) ? " byte." : " bytes.")); 721 } else { 722 // Check that uppper bits are either all zeros or all ones. 723 // Specifically ignore overflow/underflow as long as the leakage is 724 // limited to the lower bits. This is to remain compatible with 725 // other assemblers. 726 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && 727 "Value does not fit in the Fixup field"); 728 } 729 730 for (unsigned i = 0; i != Size; ++i) 731 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); 732 } 733 734 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI, 735 const MCSubtargetInfo &STI) const { 736 unsigned Opcode = MI.getOpcode(); 737 unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0; 738 return isRelaxableBranch(Opcode) || 739 (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode && 740 MI.getOperand(MI.getNumOperands() - 1 - SkipOperands).isExpr()); 741 } 742 743 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, 744 uint64_t Value) const { 745 // Relax if the value is too big for a (signed) i8. 746 return !isInt<8>(Value); 747 } 748 749 // FIXME: Can tblgen help at all here to verify there aren't other instructions 750 // we can relax? 751 void X86AsmBackend::relaxInstruction(MCInst &Inst, 752 const MCSubtargetInfo &STI) const { 753 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. 754 bool Is16BitMode = STI.hasFeature(X86::Is16Bit); 755 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); 756 757 if (RelaxedOp == Inst.getOpcode()) { 758 SmallString<256> Tmp; 759 raw_svector_ostream OS(Tmp); 760 Inst.dump_pretty(OS); 761 OS << "\n"; 762 report_fatal_error("unexpected instruction to relax: " + OS.str()); 763 } 764 765 Inst.setOpcode(RelaxedOp); 766 } 767 768 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, 769 MCCodeEmitter &Emitter, 770 unsigned &RemainingSize) const { 771 if (!RF.getAllowAutoPadding()) 772 return false; 773 // If the instruction isn't fully relaxed, shifting it around might require a 774 // larger value for one of the fixups then can be encoded. The outer loop 775 // will also catch this before moving to the next instruction, but we need to 776 // prevent padding this single instruction as well. 777 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 778 return false; 779 780 const unsigned OldSize = RF.getContents().size(); 781 if (OldSize == 15) 782 return false; 783 784 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); 785 const unsigned RemainingPrefixSize = [&]() -> unsigned { 786 SmallString<15> Code; 787 X86_MC::emitPrefix(Emitter, RF.getInst(), Code, STI); 788 assert(Code.size() < 15 && "The number of prefixes must be less than 15."); 789 790 // TODO: It turns out we need a decent amount of plumbing for the target 791 // specific bits to determine number of prefixes its safe to add. Various 792 // targets (older chips mostly, but also Atom family) encounter decoder 793 // stalls with too many prefixes. For testing purposes, we set the value 794 // externally for the moment. 795 unsigned ExistingPrefixSize = Code.size(); 796 if (TargetPrefixMax <= ExistingPrefixSize) 797 return 0; 798 return TargetPrefixMax - ExistingPrefixSize; 799 }(); 800 const unsigned PrefixBytesToAdd = 801 std::min(MaxPossiblePad, RemainingPrefixSize); 802 if (PrefixBytesToAdd == 0) 803 return false; 804 805 const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); 806 807 SmallString<256> Code; 808 Code.append(PrefixBytesToAdd, Prefix); 809 Code.append(RF.getContents().begin(), RF.getContents().end()); 810 RF.getContents() = Code; 811 812 // Adjust the fixups for the change in offsets 813 for (auto &F : RF.getFixups()) { 814 F.setOffset(F.getOffset() + PrefixBytesToAdd); 815 } 816 817 RemainingSize -= PrefixBytesToAdd; 818 return true; 819 } 820 821 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, 822 MCCodeEmitter &Emitter, 823 unsigned &RemainingSize) const { 824 if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 825 // TODO: There are lots of other tricks we could apply for increasing 826 // encoding size without impacting performance. 827 return false; 828 829 MCInst Relaxed = RF.getInst(); 830 relaxInstruction(Relaxed, *RF.getSubtargetInfo()); 831 832 SmallVector<MCFixup, 4> Fixups; 833 SmallString<15> Code; 834 Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo()); 835 const unsigned OldSize = RF.getContents().size(); 836 const unsigned NewSize = Code.size(); 837 assert(NewSize >= OldSize && "size decrease during relaxation?"); 838 unsigned Delta = NewSize - OldSize; 839 if (Delta > RemainingSize) 840 return false; 841 RF.setInst(Relaxed); 842 RF.getContents() = Code; 843 RF.getFixups() = Fixups; 844 RemainingSize -= Delta; 845 return true; 846 } 847 848 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, 849 MCCodeEmitter &Emitter, 850 unsigned &RemainingSize) const { 851 bool Changed = false; 852 if (RemainingSize != 0) 853 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); 854 if (RemainingSize != 0) 855 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); 856 return Changed; 857 } 858 859 void X86AsmBackend::finishLayout(MCAssembler const &Asm) const { 860 // See if we can further relax some instructions to cut down on the number of 861 // nop bytes required for code alignment. The actual win is in reducing 862 // instruction count, not number of bytes. Modern X86-64 can easily end up 863 // decode limited. It is often better to reduce the number of instructions 864 // (i.e. eliminate nops) even at the cost of increasing the size and 865 // complexity of others. 866 if (!X86PadForAlign && !X86PadForBranchAlign) 867 return; 868 869 // The processed regions are delimitered by LabeledFragments. -g may have more 870 // MCSymbols and therefore different relaxation results. X86PadForAlign is 871 // disabled by default to eliminate the -g vs non -g difference. 872 DenseSet<MCFragment *> LabeledFragments; 873 for (const MCSymbol &S : Asm.symbols()) 874 LabeledFragments.insert(S.getFragment(false)); 875 876 for (MCSection &Sec : Asm) { 877 if (!Sec.isText()) 878 continue; 879 880 SmallVector<MCRelaxableFragment *, 4> Relaxable; 881 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { 882 MCFragment &F = *I; 883 884 if (LabeledFragments.count(&F)) 885 Relaxable.clear(); 886 887 if (F.getKind() == MCFragment::FT_Data || 888 F.getKind() == MCFragment::FT_CompactEncodedInst) 889 // Skip and ignore 890 continue; 891 892 if (F.getKind() == MCFragment::FT_Relaxable) { 893 auto &RF = cast<MCRelaxableFragment>(*I); 894 Relaxable.push_back(&RF); 895 continue; 896 } 897 898 auto canHandle = [](MCFragment &F) -> bool { 899 switch (F.getKind()) { 900 default: 901 return false; 902 case MCFragment::FT_Align: 903 return X86PadForAlign; 904 case MCFragment::FT_BoundaryAlign: 905 return X86PadForBranchAlign; 906 } 907 }; 908 // For any unhandled kind, assume we can't change layout. 909 if (!canHandle(F)) { 910 Relaxable.clear(); 911 continue; 912 } 913 914 #ifndef NDEBUG 915 const uint64_t OrigOffset = Asm.getFragmentOffset(F); 916 #endif 917 const uint64_t OrigSize = Asm.computeFragmentSize(F); 918 919 // To keep the effects local, prefer to relax instructions closest to 920 // the align directive. This is purely about human understandability 921 // of the resulting code. If we later find a reason to expand 922 // particular instructions over others, we can adjust. 923 unsigned RemainingSize = OrigSize; 924 while (!Relaxable.empty() && RemainingSize != 0) { 925 auto &RF = *Relaxable.pop_back_val(); 926 // Give the backend a chance to play any tricks it wishes to increase 927 // the encoding size of the given instruction. Target independent code 928 // will try further relaxation, but target's may play further tricks. 929 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) 930 Sec.setHasLayout(false); 931 932 // If we have an instruction which hasn't been fully relaxed, we can't 933 // skip past it and insert bytes before it. Changing its starting 934 // offset might require a larger negative offset than it can encode. 935 // We don't need to worry about larger positive offsets as none of the 936 // possible offsets between this and our align are visible, and the 937 // ones afterwards aren't changing. 938 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 939 break; 940 } 941 Relaxable.clear(); 942 943 // BoundaryAlign explicitly tracks it's size (unlike align) 944 if (F.getKind() == MCFragment::FT_BoundaryAlign) 945 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); 946 947 #ifndef NDEBUG 948 const uint64_t FinalOffset = Asm.getFragmentOffset(F); 949 const uint64_t FinalSize = Asm.computeFragmentSize(F); 950 assert(OrigOffset + OrigSize == FinalOffset + FinalSize && 951 "can't move start of next fragment!"); 952 assert(FinalSize == RemainingSize && "inconsistent size computation?"); 953 #endif 954 955 // If we're looking at a boundary align, make sure we don't try to pad 956 // its target instructions for some following directive. Doing so would 957 // break the alignment of the current boundary align. 958 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { 959 const MCFragment *LastFragment = BF->getLastFragment(); 960 if (!LastFragment) 961 continue; 962 while (&*I != LastFragment) 963 ++I; 964 } 965 } 966 } 967 968 // The layout is done. Mark every fragment as valid. 969 for (MCSection &Section : Asm) { 970 Asm.getFragmentOffset(*Section.curFragList()->Tail); 971 Asm.computeFragmentSize(*Section.curFragList()->Tail); 972 } 973 } 974 975 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const { 976 if (STI.hasFeature(X86::Is16Bit)) 977 return 4; 978 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit)) 979 return 1; 980 if (STI.hasFeature(X86::TuningFast7ByteNOP)) 981 return 7; 982 if (STI.hasFeature(X86::TuningFast15ByteNOP)) 983 return 15; 984 if (STI.hasFeature(X86::TuningFast11ByteNOP)) 985 return 11; 986 // FIXME: handle 32-bit mode 987 // 15-bytes is the longest single NOP instruction, but 10-bytes is 988 // commonly the longest that can be efficiently decoded. 989 return 10; 990 } 991 992 /// Write a sequence of optimal nops to the output, covering \p Count 993 /// bytes. 994 /// \return - true on success, false on failure 995 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, 996 const MCSubtargetInfo *STI) const { 997 static const char Nops32Bit[10][11] = { 998 // nop 999 "\x90", 1000 // xchg %ax,%ax 1001 "\x66\x90", 1002 // nopl (%[re]ax) 1003 "\x0f\x1f\x00", 1004 // nopl 0(%[re]ax) 1005 "\x0f\x1f\x40\x00", 1006 // nopl 0(%[re]ax,%[re]ax,1) 1007 "\x0f\x1f\x44\x00\x00", 1008 // nopw 0(%[re]ax,%[re]ax,1) 1009 "\x66\x0f\x1f\x44\x00\x00", 1010 // nopl 0L(%[re]ax) 1011 "\x0f\x1f\x80\x00\x00\x00\x00", 1012 // nopl 0L(%[re]ax,%[re]ax,1) 1013 "\x0f\x1f\x84\x00\x00\x00\x00\x00", 1014 // nopw 0L(%[re]ax,%[re]ax,1) 1015 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", 1016 // nopw %cs:0L(%[re]ax,%[re]ax,1) 1017 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00", 1018 }; 1019 1020 // 16-bit mode uses different nop patterns than 32-bit. 1021 static const char Nops16Bit[4][11] = { 1022 // nop 1023 "\x90", 1024 // xchg %eax,%eax 1025 "\x66\x90", 1026 // lea 0(%si),%si 1027 "\x8d\x74\x00", 1028 // lea 0w(%si),%si 1029 "\x8d\xb4\x00\x00", 1030 }; 1031 1032 const char(*Nops)[11] = 1033 STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit; 1034 1035 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI); 1036 1037 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining 1038 // length. 1039 do { 1040 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); 1041 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; 1042 for (uint8_t i = 0; i < Prefixes; i++) 1043 OS << '\x66'; 1044 const uint8_t Rest = ThisNopLength - Prefixes; 1045 if (Rest != 0) 1046 OS.write(Nops[Rest - 1], Rest); 1047 Count -= ThisNopLength; 1048 } while (Count != 0); 1049 1050 return true; 1051 } 1052 1053 /* *** */ 1054 1055 namespace { 1056 1057 class ELFX86AsmBackend : public X86AsmBackend { 1058 public: 1059 uint8_t OSABI; 1060 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) 1061 : X86AsmBackend(T, STI), OSABI(OSABI) {} 1062 }; 1063 1064 class ELFX86_32AsmBackend : public ELFX86AsmBackend { 1065 public: 1066 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, 1067 const MCSubtargetInfo &STI) 1068 : ELFX86AsmBackend(T, OSABI, STI) {} 1069 1070 std::unique_ptr<MCObjectTargetWriter> 1071 createObjectTargetWriter() const override { 1072 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386); 1073 } 1074 }; 1075 1076 class ELFX86_X32AsmBackend : public ELFX86AsmBackend { 1077 public: 1078 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, 1079 const MCSubtargetInfo &STI) 1080 : ELFX86AsmBackend(T, OSABI, STI) {} 1081 1082 std::unique_ptr<MCObjectTargetWriter> 1083 createObjectTargetWriter() const override { 1084 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1085 ELF::EM_X86_64); 1086 } 1087 }; 1088 1089 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { 1090 public: 1091 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, 1092 const MCSubtargetInfo &STI) 1093 : ELFX86AsmBackend(T, OSABI, STI) {} 1094 1095 std::unique_ptr<MCObjectTargetWriter> 1096 createObjectTargetWriter() const override { 1097 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1098 ELF::EM_IAMCU); 1099 } 1100 }; 1101 1102 class ELFX86_64AsmBackend : public ELFX86AsmBackend { 1103 public: 1104 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, 1105 const MCSubtargetInfo &STI) 1106 : ELFX86AsmBackend(T, OSABI, STI) {} 1107 1108 std::unique_ptr<MCObjectTargetWriter> 1109 createObjectTargetWriter() const override { 1110 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64); 1111 } 1112 }; 1113 1114 class WindowsX86AsmBackend : public X86AsmBackend { 1115 bool Is64Bit; 1116 1117 public: 1118 WindowsX86AsmBackend(const Target &T, bool is64Bit, 1119 const MCSubtargetInfo &STI) 1120 : X86AsmBackend(T, STI) 1121 , Is64Bit(is64Bit) { 1122 } 1123 1124 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override { 1125 return StringSwitch<std::optional<MCFixupKind>>(Name) 1126 .Case("dir32", FK_Data_4) 1127 .Case("secrel32", FK_SecRel_4) 1128 .Case("secidx", FK_SecRel_2) 1129 .Default(MCAsmBackend::getFixupKind(Name)); 1130 } 1131 1132 std::unique_ptr<MCObjectTargetWriter> 1133 createObjectTargetWriter() const override { 1134 return createX86WinCOFFObjectWriter(Is64Bit); 1135 } 1136 }; 1137 1138 namespace CU { 1139 1140 /// Compact unwind encoding values. 1141 enum CompactUnwindEncodings { 1142 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after 1143 /// the return address, then [RE]SP is moved to [RE]BP. 1144 UNWIND_MODE_BP_FRAME = 0x01000000, 1145 1146 /// A frameless function with a small constant stack size. 1147 UNWIND_MODE_STACK_IMMD = 0x02000000, 1148 1149 /// A frameless function with a large constant stack size. 1150 UNWIND_MODE_STACK_IND = 0x03000000, 1151 1152 /// No compact unwind encoding is available. 1153 UNWIND_MODE_DWARF = 0x04000000, 1154 1155 /// Mask for encoding the frame registers. 1156 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, 1157 1158 /// Mask for encoding the frameless registers. 1159 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF 1160 }; 1161 1162 } // namespace CU 1163 1164 class DarwinX86AsmBackend : public X86AsmBackend { 1165 const MCRegisterInfo &MRI; 1166 1167 /// Number of registers that can be saved in a compact unwind encoding. 1168 enum { CU_NUM_SAVED_REGS = 6 }; 1169 1170 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; 1171 Triple TT; 1172 bool Is64Bit; 1173 1174 unsigned OffsetSize; ///< Offset of a "push" instruction. 1175 unsigned MoveInstrSize; ///< Size of a "move" instruction. 1176 unsigned StackDivide; ///< Amount to adjust stack size by. 1177 protected: 1178 /// Size of a "push" instruction for the given register. 1179 unsigned PushInstrSize(unsigned Reg) const { 1180 switch (Reg) { 1181 case X86::EBX: 1182 case X86::ECX: 1183 case X86::EDX: 1184 case X86::EDI: 1185 case X86::ESI: 1186 case X86::EBP: 1187 case X86::RBX: 1188 case X86::RBP: 1189 return 1; 1190 case X86::R12: 1191 case X86::R13: 1192 case X86::R14: 1193 case X86::R15: 1194 return 2; 1195 } 1196 return 1; 1197 } 1198 1199 private: 1200 /// Get the compact unwind number for a given register. The number 1201 /// corresponds to the enum lists in compact_unwind_encoding.h. 1202 int getCompactUnwindRegNum(unsigned Reg) const { 1203 static const MCPhysReg CU32BitRegs[7] = { 1204 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 1205 }; 1206 static const MCPhysReg CU64BitRegs[] = { 1207 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 1208 }; 1209 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; 1210 for (int Idx = 1; *CURegs; ++CURegs, ++Idx) 1211 if (*CURegs == Reg) 1212 return Idx; 1213 1214 return -1; 1215 } 1216 1217 /// Return the registers encoded for a compact encoding with a frame 1218 /// pointer. 1219 uint32_t encodeCompactUnwindRegistersWithFrame() const { 1220 // Encode the registers in the order they were saved --- 3-bits per 1221 // register. The list of saved registers is assumed to be in reverse 1222 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. 1223 uint32_t RegEnc = 0; 1224 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { 1225 unsigned Reg = SavedRegs[i]; 1226 if (Reg == 0) break; 1227 1228 int CURegNum = getCompactUnwindRegNum(Reg); 1229 if (CURegNum == -1) return ~0U; 1230 1231 // Encode the 3-bit register number in order, skipping over 3-bits for 1232 // each register. 1233 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); 1234 } 1235 1236 assert((RegEnc & 0x3FFFF) == RegEnc && 1237 "Invalid compact register encoding!"); 1238 return RegEnc; 1239 } 1240 1241 /// Create the permutation encoding used with frameless stacks. It is 1242 /// passed the number of registers to be saved and an array of the registers 1243 /// saved. 1244 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { 1245 // The saved registers are numbered from 1 to 6. In order to encode the 1246 // order in which they were saved, we re-number them according to their 1247 // place in the register order. The re-numbering is relative to the last 1248 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in 1249 // that order: 1250 // 1251 // Orig Re-Num 1252 // ---- ------ 1253 // 6 6 1254 // 2 2 1255 // 4 3 1256 // 5 3 1257 // 1258 for (unsigned i = 0; i < RegCount; ++i) { 1259 int CUReg = getCompactUnwindRegNum(SavedRegs[i]); 1260 if (CUReg == -1) return ~0U; 1261 SavedRegs[i] = CUReg; 1262 } 1263 1264 // Reverse the list. 1265 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); 1266 1267 uint32_t RenumRegs[CU_NUM_SAVED_REGS]; 1268 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ 1269 unsigned Countless = 0; 1270 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) 1271 if (SavedRegs[j] < SavedRegs[i]) 1272 ++Countless; 1273 1274 RenumRegs[i] = SavedRegs[i] - Countless - 1; 1275 } 1276 1277 // Take the renumbered values and encode them into a 10-bit number. 1278 uint32_t permutationEncoding = 0; 1279 switch (RegCount) { 1280 case 6: 1281 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 1282 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 1283 + RenumRegs[4]; 1284 break; 1285 case 5: 1286 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 1287 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 1288 + RenumRegs[5]; 1289 break; 1290 case 4: 1291 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 1292 + 3 * RenumRegs[4] + RenumRegs[5]; 1293 break; 1294 case 3: 1295 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 1296 + RenumRegs[5]; 1297 break; 1298 case 2: 1299 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 1300 break; 1301 case 1: 1302 permutationEncoding |= RenumRegs[5]; 1303 break; 1304 } 1305 1306 assert((permutationEncoding & 0x3FF) == permutationEncoding && 1307 "Invalid compact register encoding!"); 1308 return permutationEncoding; 1309 } 1310 1311 public: 1312 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, 1313 const MCSubtargetInfo &STI) 1314 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), 1315 Is64Bit(TT.isArch64Bit()) { 1316 memset(SavedRegs, 0, sizeof(SavedRegs)); 1317 OffsetSize = Is64Bit ? 8 : 4; 1318 MoveInstrSize = Is64Bit ? 3 : 2; 1319 StackDivide = Is64Bit ? 8 : 4; 1320 } 1321 1322 std::unique_ptr<MCObjectTargetWriter> 1323 createObjectTargetWriter() const override { 1324 uint32_t CPUType = cantFail(MachO::getCPUType(TT)); 1325 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); 1326 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); 1327 } 1328 1329 /// Implementation of algorithm to generate the compact unwind encoding 1330 /// for the CFI instructions. 1331 uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI, 1332 const MCContext *Ctxt) const override { 1333 ArrayRef<MCCFIInstruction> Instrs = FI->Instructions; 1334 if (Instrs.empty()) return 0; 1335 if (!isDarwinCanonicalPersonality(FI->Personality) && 1336 !Ctxt->emitCompactUnwindNonCanonical()) 1337 return CU::UNWIND_MODE_DWARF; 1338 1339 // Reset the saved registers. 1340 unsigned SavedRegIdx = 0; 1341 memset(SavedRegs, 0, sizeof(SavedRegs)); 1342 1343 bool HasFP = false; 1344 1345 // Encode that we are using EBP/RBP as the frame pointer. 1346 uint64_t CompactUnwindEncoding = 0; 1347 1348 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2; 1349 unsigned InstrOffset = 0; 1350 unsigned StackAdjust = 0; 1351 uint64_t StackSize = 0; 1352 int64_t MinAbsOffset = std::numeric_limits<int64_t>::max(); 1353 1354 for (const MCCFIInstruction &Inst : Instrs) { 1355 switch (Inst.getOperation()) { 1356 default: 1357 // Any other CFI directives indicate a frame that we aren't prepared 1358 // to represent via compact unwind, so just bail out. 1359 return CU::UNWIND_MODE_DWARF; 1360 case MCCFIInstruction::OpDefCfaRegister: { 1361 // Defines a frame pointer. E.g. 1362 // 1363 // movq %rsp, %rbp 1364 // L0: 1365 // .cfi_def_cfa_register %rbp 1366 // 1367 HasFP = true; 1368 1369 // If the frame pointer is other than esp/rsp, we do not have a way to 1370 // generate a compact unwinding representation, so bail out. 1371 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != 1372 (Is64Bit ? X86::RBP : X86::EBP)) 1373 return CU::UNWIND_MODE_DWARF; 1374 1375 // Reset the counts. 1376 memset(SavedRegs, 0, sizeof(SavedRegs)); 1377 StackAdjust = 0; 1378 SavedRegIdx = 0; 1379 MinAbsOffset = std::numeric_limits<int64_t>::max(); 1380 InstrOffset += MoveInstrSize; 1381 break; 1382 } 1383 case MCCFIInstruction::OpDefCfaOffset: { 1384 // Defines a new offset for the CFA. E.g. 1385 // 1386 // With frame: 1387 // 1388 // pushq %rbp 1389 // L0: 1390 // .cfi_def_cfa_offset 16 1391 // 1392 // Without frame: 1393 // 1394 // subq $72, %rsp 1395 // L0: 1396 // .cfi_def_cfa_offset 80 1397 // 1398 StackSize = Inst.getOffset() / StackDivide; 1399 break; 1400 } 1401 case MCCFIInstruction::OpOffset: { 1402 // Defines a "push" of a callee-saved register. E.g. 1403 // 1404 // pushq %r15 1405 // pushq %r14 1406 // pushq %rbx 1407 // L0: 1408 // subq $120, %rsp 1409 // L1: 1410 // .cfi_offset %rbx, -40 1411 // .cfi_offset %r14, -32 1412 // .cfi_offset %r15, -24 1413 // 1414 if (SavedRegIdx == CU_NUM_SAVED_REGS) 1415 // If there are too many saved registers, we cannot use a compact 1416 // unwind encoding. 1417 return CU::UNWIND_MODE_DWARF; 1418 1419 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); 1420 SavedRegs[SavedRegIdx++] = Reg; 1421 StackAdjust += OffsetSize; 1422 MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset())); 1423 InstrOffset += PushInstrSize(Reg); 1424 break; 1425 } 1426 } 1427 } 1428 1429 StackAdjust /= StackDivide; 1430 1431 if (HasFP) { 1432 if ((StackAdjust & 0xFF) != StackAdjust) 1433 // Offset was too big for a compact unwind encoding. 1434 return CU::UNWIND_MODE_DWARF; 1435 1436 // We don't attempt to track a real StackAdjust, so if the saved registers 1437 // aren't adjacent to rbp we can't cope. 1438 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize) 1439 return CU::UNWIND_MODE_DWARF; 1440 1441 // Get the encoding of the saved registers when we have a frame pointer. 1442 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); 1443 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1444 1445 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; 1446 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; 1447 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; 1448 } else { 1449 SubtractInstrIdx += InstrOffset; 1450 ++StackAdjust; 1451 1452 if ((StackSize & 0xFF) == StackSize) { 1453 // Frameless stack with a small stack size. 1454 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; 1455 1456 // Encode the stack size. 1457 CompactUnwindEncoding |= (StackSize & 0xFF) << 16; 1458 } else { 1459 if ((StackAdjust & 0x7) != StackAdjust) 1460 // The extra stack adjustments are too big for us to handle. 1461 return CU::UNWIND_MODE_DWARF; 1462 1463 // Frameless stack with an offset too large for us to encode compactly. 1464 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; 1465 1466 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 1467 // instruction. 1468 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 1469 1470 // Encode any extra stack adjustments (done via push instructions). 1471 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; 1472 } 1473 1474 // Encode the number of registers saved. (Reverse the list first.) 1475 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]); 1476 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; 1477 1478 // Get the encoding of the saved registers when we don't have a frame 1479 // pointer. 1480 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx); 1481 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1482 1483 // Encode the register encoding. 1484 CompactUnwindEncoding |= 1485 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; 1486 } 1487 1488 return CompactUnwindEncoding; 1489 } 1490 }; 1491 1492 } // end anonymous namespace 1493 1494 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, 1495 const MCSubtargetInfo &STI, 1496 const MCRegisterInfo &MRI, 1497 const MCTargetOptions &Options) { 1498 const Triple &TheTriple = STI.getTargetTriple(); 1499 if (TheTriple.isOSBinFormatMachO()) 1500 return new DarwinX86AsmBackend(T, MRI, STI); 1501 1502 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1503 return new WindowsX86AsmBackend(T, false, STI); 1504 1505 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1506 1507 if (TheTriple.isOSIAMCU()) 1508 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); 1509 1510 return new ELFX86_32AsmBackend(T, OSABI, STI); 1511 } 1512 1513 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, 1514 const MCSubtargetInfo &STI, 1515 const MCRegisterInfo &MRI, 1516 const MCTargetOptions &Options) { 1517 const Triple &TheTriple = STI.getTargetTriple(); 1518 if (TheTriple.isOSBinFormatMachO()) 1519 return new DarwinX86AsmBackend(T, MRI, STI); 1520 1521 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1522 return new WindowsX86AsmBackend(T, true, STI); 1523 1524 if (TheTriple.isUEFI()) { 1525 assert(TheTriple.isOSBinFormatCOFF() && 1526 "Only COFF format is supported in UEFI environment."); 1527 return new WindowsX86AsmBackend(T, true, STI); 1528 } 1529 1530 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1531 1532 if (TheTriple.isX32()) 1533 return new ELFX86_X32AsmBackend(T, OSABI, STI); 1534 return new ELFX86_64AsmBackend(T, OSABI, STI); 1535 } 1536 1537 namespace { 1538 class X86ELFStreamer : public MCELFStreamer { 1539 public: 1540 X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, 1541 std::unique_ptr<MCObjectWriter> OW, 1542 std::unique_ptr<MCCodeEmitter> Emitter) 1543 : MCELFStreamer(Context, std::move(TAB), std::move(OW), 1544 std::move(Emitter)) {} 1545 1546 void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; 1547 }; 1548 } // end anonymous namespace 1549 1550 void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst, 1551 const MCSubtargetInfo &STI) { 1552 auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend()); 1553 Backend.emitInstructionBegin(S, Inst, STI); 1554 S.MCObjectStreamer::emitInstruction(Inst, STI); 1555 Backend.emitInstructionEnd(S, Inst); 1556 } 1557 1558 void X86ELFStreamer::emitInstruction(const MCInst &Inst, 1559 const MCSubtargetInfo &STI) { 1560 X86_MC::emitInstruction(*this, Inst, STI); 1561 } 1562 1563 MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context, 1564 std::unique_ptr<MCAsmBackend> &&MAB, 1565 std::unique_ptr<MCObjectWriter> &&MOW, 1566 std::unique_ptr<MCCodeEmitter> &&MCE) { 1567 return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW), 1568 std::move(MCE)); 1569 } 1570