1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86FixupKinds.h" 11 #include "MCTargetDesc/X86InstrRelaxTables.h" 12 #include "llvm/ADT/StringSwitch.h" 13 #include "llvm/BinaryFormat/ELF.h" 14 #include "llvm/BinaryFormat/MachO.h" 15 #include "llvm/MC/MCAsmBackend.h" 16 #include "llvm/MC/MCAsmLayout.h" 17 #include "llvm/MC/MCAssembler.h" 18 #include "llvm/MC/MCCodeEmitter.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDwarf.h" 21 #include "llvm/MC/MCELFObjectWriter.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCFixupKindInfo.h" 24 #include "llvm/MC/MCInst.h" 25 #include "llvm/MC/MCInstrInfo.h" 26 #include "llvm/MC/MCMachObjectWriter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSectionMachO.h" 31 #include "llvm/MC/MCSubtargetInfo.h" 32 #include "llvm/MC/MCValue.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 using namespace llvm; 39 40 namespace { 41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind 42 class X86AlignBranchKind { 43 private: 44 uint8_t AlignBranchKind = 0; 45 46 public: 47 void operator=(const std::string &Val) { 48 if (Val.empty()) 49 return; 50 SmallVector<StringRef, 6> BranchTypes; 51 StringRef(Val).split(BranchTypes, '+', -1, false); 52 for (auto BranchType : BranchTypes) { 53 if (BranchType == "fused") 54 addKind(X86::AlignBranchFused); 55 else if (BranchType == "jcc") 56 addKind(X86::AlignBranchJcc); 57 else if (BranchType == "jmp") 58 addKind(X86::AlignBranchJmp); 59 else if (BranchType == "call") 60 addKind(X86::AlignBranchCall); 61 else if (BranchType == "ret") 62 addKind(X86::AlignBranchRet); 63 else if (BranchType == "indirect") 64 addKind(X86::AlignBranchIndirect); 65 else { 66 errs() << "invalid argument " << BranchType.str() 67 << " to -x86-align-branch=; each element must be one of: fused, " 68 "jcc, jmp, call, ret, indirect.(plus separated)\n"; 69 } 70 } 71 } 72 73 operator uint8_t() const { return AlignBranchKind; } 74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; } 75 }; 76 77 X86AlignBranchKind X86AlignBranchKindLoc; 78 79 cl::opt<unsigned> X86AlignBranchBoundary( 80 "x86-align-branch-boundary", cl::init(0), 81 cl::desc( 82 "Control how the assembler should align branches with NOP. If the " 83 "boundary's size is not 0, it should be a power of 2 and no less " 84 "than 32. Branches will be aligned to prevent from being across or " 85 "against the boundary of specified size. The default value 0 does not " 86 "align branches.")); 87 88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( 89 "x86-align-branch", 90 cl::desc( 91 "Specify types of branches to align (plus separated list of types):" 92 "\njcc indicates conditional jumps" 93 "\nfused indicates fused conditional jumps" 94 "\njmp indicates direct unconditional jumps" 95 "\ncall indicates direct and indirect calls" 96 "\nret indicates rets" 97 "\nindirect indicates indirect unconditional jumps"), 98 cl::location(X86AlignBranchKindLoc)); 99 100 cl::opt<bool> X86AlignBranchWithin32BBoundaries( 101 "x86-branches-within-32B-boundaries", cl::init(false), 102 cl::desc( 103 "Align selected instructions to mitigate negative performance impact " 104 "of Intel's micro code update for errata skx102. May break " 105 "assumptions about labels corresponding to particular instructions, " 106 "and should be used with caution.")); 107 108 cl::opt<unsigned> X86PadMaxPrefixSize( 109 "x86-pad-max-prefix-size", cl::init(0), 110 cl::desc("Maximum number of prefixes to use for padding")); 111 112 cl::opt<bool> X86PadForAlign( 113 "x86-pad-for-align", cl::init(false), cl::Hidden, 114 cl::desc("Pad previous instructions to implement align directives")); 115 116 cl::opt<bool> X86PadForBranchAlign( 117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden, 118 cl::desc("Pad previous instructions to implement branch alignment")); 119 120 class X86AsmBackend : public MCAsmBackend { 121 const MCSubtargetInfo &STI; 122 std::unique_ptr<const MCInstrInfo> MCII; 123 X86AlignBranchKind AlignBranchType; 124 Align AlignBoundary; 125 unsigned TargetPrefixMax = 0; 126 127 MCInst PrevInst; 128 MCBoundaryAlignFragment *PendingBA = nullptr; 129 std::pair<MCFragment *, size_t> PrevInstPosition; 130 bool CanPadInst; 131 132 uint8_t determinePaddingPrefix(const MCInst &Inst) const; 133 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; 134 bool needAlign(const MCInst &Inst) const; 135 bool canPadBranches(MCObjectStreamer &OS) const; 136 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; 137 138 public: 139 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) 140 : MCAsmBackend(support::little), STI(STI), 141 MCII(T.createMCInstrInfo()) { 142 if (X86AlignBranchWithin32BBoundaries) { 143 // At the moment, this defaults to aligning fused branches, unconditional 144 // jumps, and (unfused) conditional jumps with nops. Both the 145 // instructions aligned and the alignment method (nop vs prefix) may 146 // change in the future. 147 AlignBoundary = assumeAligned(32);; 148 AlignBranchType.addKind(X86::AlignBranchFused); 149 AlignBranchType.addKind(X86::AlignBranchJcc); 150 AlignBranchType.addKind(X86::AlignBranchJmp); 151 } 152 // Allow overriding defaults set by main flag 153 if (X86AlignBranchBoundary.getNumOccurrences()) 154 AlignBoundary = assumeAligned(X86AlignBranchBoundary); 155 if (X86AlignBranch.getNumOccurrences()) 156 AlignBranchType = X86AlignBranchKindLoc; 157 if (X86PadMaxPrefixSize.getNumOccurrences()) 158 TargetPrefixMax = X86PadMaxPrefixSize; 159 } 160 161 bool allowAutoPadding() const override; 162 bool allowEnhancedRelaxation() const override; 163 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst, 164 const MCSubtargetInfo &STI) override; 165 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override; 166 167 unsigned getNumFixupKinds() const override { 168 return X86::NumTargetFixupKinds; 169 } 170 171 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override; 172 173 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; 174 175 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, 176 const MCValue &Target) override; 177 178 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 179 const MCValue &Target, MutableArrayRef<char> Data, 180 uint64_t Value, bool IsResolved, 181 const MCSubtargetInfo *STI) const override; 182 183 bool mayNeedRelaxation(const MCInst &Inst, 184 const MCSubtargetInfo &STI) const override; 185 186 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, 187 const MCRelaxableFragment *DF, 188 const MCAsmLayout &Layout) const override; 189 190 void relaxInstruction(MCInst &Inst, 191 const MCSubtargetInfo &STI) const override; 192 193 bool padInstructionViaRelaxation(MCRelaxableFragment &RF, 194 MCCodeEmitter &Emitter, 195 unsigned &RemainingSize) const; 196 197 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 198 unsigned &RemainingSize) const; 199 200 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 201 unsigned &RemainingSize) const; 202 203 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; 204 205 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override; 206 207 bool writeNopData(raw_ostream &OS, uint64_t Count, 208 const MCSubtargetInfo *STI) const override; 209 }; 210 } // end anonymous namespace 211 212 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) { 213 unsigned Op = Inst.getOpcode(); 214 switch (Op) { 215 default: 216 return Op; 217 case X86::JCC_1: 218 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; 219 case X86::JMP_1: 220 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; 221 } 222 } 223 224 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) { 225 unsigned Op = Inst.getOpcode(); 226 return X86::getRelaxedOpcodeArith(Op); 227 } 228 229 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) { 230 unsigned R = getRelaxedOpcodeArith(Inst); 231 if (R != Inst.getOpcode()) 232 return R; 233 return getRelaxedOpcodeBranch(Inst, Is16BitMode); 234 } 235 236 static X86::CondCode getCondFromBranch(const MCInst &MI, 237 const MCInstrInfo &MCII) { 238 unsigned Opcode = MI.getOpcode(); 239 switch (Opcode) { 240 default: 241 return X86::COND_INVALID; 242 case X86::JCC_1: { 243 const MCInstrDesc &Desc = MCII.get(Opcode); 244 return static_cast<X86::CondCode>( 245 MI.getOperand(Desc.getNumOperands() - 1).getImm()); 246 } 247 } 248 } 249 250 static X86::SecondMacroFusionInstKind 251 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { 252 X86::CondCode CC = getCondFromBranch(MI, MCII); 253 return classifySecondCondCodeInMacroFusion(CC); 254 } 255 256 /// Check if the instruction uses RIP relative addressing. 257 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { 258 unsigned Opcode = MI.getOpcode(); 259 const MCInstrDesc &Desc = MCII.get(Opcode); 260 uint64_t TSFlags = Desc.TSFlags; 261 unsigned CurOp = X86II::getOperandBias(Desc); 262 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 263 if (MemoryOperand < 0) 264 return false; 265 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; 266 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); 267 return (BaseReg == X86::RIP); 268 } 269 270 /// Check if the instruction is a prefix. 271 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) { 272 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags); 273 } 274 275 /// Check if the instruction is valid as the first instruction in macro fusion. 276 static bool isFirstMacroFusibleInst(const MCInst &Inst, 277 const MCInstrInfo &MCII) { 278 // An Intel instruction with RIP relative addressing is not macro fusible. 279 if (isRIPRelative(Inst, MCII)) 280 return false; 281 X86::FirstMacroFusionInstKind FIK = 282 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); 283 return FIK != X86::FirstMacroFusionInstKind::Invalid; 284 } 285 286 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to 287 /// get a better peformance in some cases. Here, we determine which prefix is 288 /// the most suitable. 289 /// 290 /// If the instruction has a segment override prefix, use the existing one. 291 /// If the target is 64-bit, use the CS. 292 /// If the target is 32-bit, 293 /// - If the instruction has a ESP/EBP base register, use SS. 294 /// - Otherwise use DS. 295 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { 296 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) && 297 "Prefixes can be added only in 32-bit or 64-bit mode."); 298 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 299 uint64_t TSFlags = Desc.TSFlags; 300 301 // Determine where the memory operand starts, if present. 302 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 303 if (MemoryOperand != -1) 304 MemoryOperand += X86II::getOperandBias(Desc); 305 306 unsigned SegmentReg = 0; 307 if (MemoryOperand >= 0) { 308 // Check for explicit segment override on memory operand. 309 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); 310 } 311 312 switch (TSFlags & X86II::FormMask) { 313 default: 314 break; 315 case X86II::RawFrmDstSrc: { 316 // Check segment override opcode prefix as needed (not for %ds). 317 if (Inst.getOperand(2).getReg() != X86::DS) 318 SegmentReg = Inst.getOperand(2).getReg(); 319 break; 320 } 321 case X86II::RawFrmSrc: { 322 // Check segment override opcode prefix as needed (not for %ds). 323 if (Inst.getOperand(1).getReg() != X86::DS) 324 SegmentReg = Inst.getOperand(1).getReg(); 325 break; 326 } 327 case X86II::RawFrmMemOffs: { 328 // Check segment override opcode prefix as needed. 329 SegmentReg = Inst.getOperand(1).getReg(); 330 break; 331 } 332 } 333 334 if (SegmentReg != 0) 335 return X86::getSegmentOverridePrefixForReg(SegmentReg); 336 337 if (STI.hasFeature(X86::Is64Bit)) 338 return X86::CS_Encoding; 339 340 if (MemoryOperand >= 0) { 341 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; 342 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); 343 if (BaseReg == X86::ESP || BaseReg == X86::EBP) 344 return X86::SS_Encoding; 345 } 346 return X86::DS_Encoding; 347 } 348 349 /// Check if the two instructions will be macro-fused on the target cpu. 350 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { 351 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); 352 if (!InstDesc.isConditionalBranch()) 353 return false; 354 if (!isFirstMacroFusibleInst(Cmp, *MCII)) 355 return false; 356 const X86::FirstMacroFusionInstKind CmpKind = 357 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); 358 const X86::SecondMacroFusionInstKind BranchKind = 359 classifySecondInstInMacroFusion(Jcc, *MCII); 360 return X86::isMacroFused(CmpKind, BranchKind); 361 } 362 363 /// Check if the instruction has a variant symbol operand. 364 static bool hasVariantSymbol(const MCInst &MI) { 365 for (auto &Operand : MI) { 366 if (!Operand.isExpr()) 367 continue; 368 const MCExpr &Expr = *Operand.getExpr(); 369 if (Expr.getKind() == MCExpr::SymbolRef && 370 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None) 371 return true; 372 } 373 return false; 374 } 375 376 bool X86AsmBackend::allowAutoPadding() const { 377 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); 378 } 379 380 bool X86AsmBackend::allowEnhancedRelaxation() const { 381 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; 382 } 383 384 /// X86 has certain instructions which enable interrupts exactly one 385 /// instruction *after* the instruction which stores to SS. Return true if the 386 /// given instruction has such an interrupt delay slot. 387 static bool hasInterruptDelaySlot(const MCInst &Inst) { 388 switch (Inst.getOpcode()) { 389 case X86::POPSS16: 390 case X86::POPSS32: 391 case X86::STI: 392 return true; 393 394 case X86::MOV16sr: 395 case X86::MOV32sr: 396 case X86::MOV64sr: 397 case X86::MOV16sm: 398 if (Inst.getOperand(0).getReg() == X86::SS) 399 return true; 400 break; 401 } 402 return false; 403 } 404 405 /// Check if the instruction to be emitted is right after any data. 406 static bool 407 isRightAfterData(MCFragment *CurrentFragment, 408 const std::pair<MCFragment *, size_t> &PrevInstPosition) { 409 MCFragment *F = CurrentFragment; 410 // Empty data fragments may be created to prevent further data being 411 // added into the previous fragment, we need to skip them since they 412 // have no contents. 413 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode()) 414 if (cast<MCDataFragment>(F)->getContents().size() != 0) 415 break; 416 417 // Since data is always emitted into a DataFragment, our check strategy is 418 // simple here. 419 // - If the fragment is a DataFragment 420 // - If it's not the fragment where the previous instruction is, 421 // returns true. 422 // - If it's the fragment holding the previous instruction but its 423 // size changed since the the previous instruction was emitted into 424 // it, returns true. 425 // - Otherwise returns false. 426 // - If the fragment is not a DataFragment, returns false. 427 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) 428 return DF != PrevInstPosition.first || 429 DF->getContents().size() != PrevInstPosition.second; 430 431 return false; 432 } 433 434 /// \returns the fragment size if it has instructions, otherwise returns 0. 435 static size_t getSizeForInstFragment(const MCFragment *F) { 436 if (!F || !F->hasInstructions()) 437 return 0; 438 // MCEncodedFragmentWithContents being templated makes this tricky. 439 switch (F->getKind()) { 440 default: 441 llvm_unreachable("Unknown fragment with instructions!"); 442 case MCFragment::FT_Data: 443 return cast<MCDataFragment>(*F).getContents().size(); 444 case MCFragment::FT_Relaxable: 445 return cast<MCRelaxableFragment>(*F).getContents().size(); 446 case MCFragment::FT_CompactEncodedInst: 447 return cast<MCCompactEncodedInstFragment>(*F).getContents().size(); 448 } 449 } 450 451 /// Return true if we can insert NOP or prefixes automatically before the 452 /// the instruction to be emitted. 453 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { 454 if (hasVariantSymbol(Inst)) 455 // Linker may rewrite the instruction with variant symbol operand(e.g. 456 // TLSCALL). 457 return false; 458 459 if (hasInterruptDelaySlot(PrevInst)) 460 // If this instruction follows an interrupt enabling instruction with a one 461 // instruction delay, inserting a nop would change behavior. 462 return false; 463 464 if (isPrefix(PrevInst, *MCII)) 465 // If this instruction follows a prefix, inserting a nop/prefix would change 466 // semantic. 467 return false; 468 469 if (isPrefix(Inst, *MCII)) 470 // If this instruction is a prefix, inserting a prefix would change 471 // semantic. 472 return false; 473 474 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) 475 // If this instruction follows any data, there is no clear 476 // instruction boundary, inserting a nop/prefix would change semantic. 477 return false; 478 479 return true; 480 } 481 482 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { 483 if (!OS.getAllowAutoPadding()) 484 return false; 485 assert(allowAutoPadding() && "incorrect initialization!"); 486 487 // We only pad in text section. 488 if (!OS.getCurrentSectionOnly()->getKind().isText()) 489 return false; 490 491 // To be Done: Currently don't deal with Bundle cases. 492 if (OS.getAssembler().isBundlingEnabled()) 493 return false; 494 495 // Branches only need to be aligned in 32-bit or 64-bit mode. 496 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit))) 497 return false; 498 499 return true; 500 } 501 502 /// Check if the instruction operand needs to be aligned. 503 bool X86AsmBackend::needAlign(const MCInst &Inst) const { 504 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 505 return (Desc.isConditionalBranch() && 506 (AlignBranchType & X86::AlignBranchJcc)) || 507 (Desc.isUnconditionalBranch() && 508 (AlignBranchType & X86::AlignBranchJmp)) || 509 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || 510 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || 511 (Desc.isIndirectBranch() && 512 (AlignBranchType & X86::AlignBranchIndirect)); 513 } 514 515 /// Insert BoundaryAlignFragment before instructions to align branches. 516 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, 517 const MCInst &Inst, const MCSubtargetInfo &STI) { 518 CanPadInst = canPadInst(Inst, OS); 519 520 if (!canPadBranches(OS)) 521 return; 522 523 if (!isMacroFused(PrevInst, Inst)) 524 // Macro fusion doesn't happen indeed, clear the pending. 525 PendingBA = nullptr; 526 527 if (!CanPadInst) 528 return; 529 530 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) { 531 // Macro fusion actually happens and there is no other fragment inserted 532 // after the previous instruction. 533 // 534 // Do nothing here since we already inserted a BoudaryAlign fragment when 535 // we met the first instruction in the fused pair and we'll tie them 536 // together in emitInstructionEnd. 537 // 538 // Note: When there is at least one fragment, such as MCAlignFragment, 539 // inserted after the previous instruction, e.g. 540 // 541 // \code 542 // cmp %rax %rcx 543 // .align 16 544 // je .Label0 545 // \ endcode 546 // 547 // We will treat the JCC as a unfused branch although it may be fused 548 // with the CMP. 549 return; 550 } 551 552 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && 553 isFirstMacroFusibleInst(Inst, *MCII))) { 554 // If we meet a unfused branch or the first instuction in a fusiable pair, 555 // insert a BoundaryAlign fragment. 556 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI)); 557 } 558 } 559 560 /// Set the last fragment to be aligned for the BoundaryAlignFragment. 561 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { 562 PrevInst = Inst; 563 MCFragment *CF = OS.getCurrentFragment(); 564 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); 565 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) 566 F->setAllowAutoPadding(CanPadInst); 567 568 if (!canPadBranches(OS)) 569 return; 570 571 if (!needAlign(Inst) || !PendingBA) 572 return; 573 574 // Tie the aligned instructions into a a pending BoundaryAlign. 575 PendingBA->setLastFragment(CF); 576 PendingBA = nullptr; 577 578 // We need to ensure that further data isn't added to the current 579 // DataFragment, so that we can get the size of instructions later in 580 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty 581 // DataFragment. 582 if (isa_and_nonnull<MCDataFragment>(CF)) 583 OS.insert(new MCDataFragment()); 584 585 // Update the maximum alignment on the current section if necessary. 586 MCSection *Sec = OS.getCurrentSectionOnly(); 587 Sec->ensureMinAlignment(AlignBoundary); 588 } 589 590 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { 591 if (STI.getTargetTriple().isOSBinFormatELF()) { 592 unsigned Type; 593 if (STI.getTargetTriple().getArch() == Triple::x86_64) { 594 Type = llvm::StringSwitch<unsigned>(Name) 595 #define ELF_RELOC(X, Y) .Case(#X, Y) 596 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def" 597 #undef ELF_RELOC 598 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE) 599 .Case("BFD_RELOC_8", ELF::R_X86_64_8) 600 .Case("BFD_RELOC_16", ELF::R_X86_64_16) 601 .Case("BFD_RELOC_32", ELF::R_X86_64_32) 602 .Case("BFD_RELOC_64", ELF::R_X86_64_64) 603 .Default(-1u); 604 } else { 605 Type = llvm::StringSwitch<unsigned>(Name) 606 #define ELF_RELOC(X, Y) .Case(#X, Y) 607 #include "llvm/BinaryFormat/ELFRelocs/i386.def" 608 #undef ELF_RELOC 609 .Case("BFD_RELOC_NONE", ELF::R_386_NONE) 610 .Case("BFD_RELOC_8", ELF::R_386_8) 611 .Case("BFD_RELOC_16", ELF::R_386_16) 612 .Case("BFD_RELOC_32", ELF::R_386_32) 613 .Default(-1u); 614 } 615 if (Type == -1u) 616 return std::nullopt; 617 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); 618 } 619 return MCAsmBackend::getFixupKind(Name); 620 } 621 622 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { 623 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { 624 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 625 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 626 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 627 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 628 {"reloc_signed_4byte", 0, 32, 0}, 629 {"reloc_signed_4byte_relax", 0, 32, 0}, 630 {"reloc_global_offset_table", 0, 32, 0}, 631 {"reloc_global_offset_table8", 0, 64, 0}, 632 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 633 }; 634 635 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They 636 // do not require any extra processing. 637 if (Kind >= FirstLiteralRelocationKind) 638 return MCAsmBackend::getFixupKindInfo(FK_NONE); 639 640 if (Kind < FirstTargetFixupKind) 641 return MCAsmBackend::getFixupKindInfo(Kind); 642 643 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && 644 "Invalid kind!"); 645 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!"); 646 return Infos[Kind - FirstTargetFixupKind]; 647 } 648 649 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, 650 const MCFixup &Fixup, 651 const MCValue &) { 652 return Fixup.getKind() >= FirstLiteralRelocationKind; 653 } 654 655 static unsigned getFixupKindSize(unsigned Kind) { 656 switch (Kind) { 657 default: 658 llvm_unreachable("invalid fixup kind!"); 659 case FK_NONE: 660 return 0; 661 case FK_PCRel_1: 662 case FK_SecRel_1: 663 case FK_Data_1: 664 return 1; 665 case FK_PCRel_2: 666 case FK_SecRel_2: 667 case FK_Data_2: 668 return 2; 669 case FK_PCRel_4: 670 case X86::reloc_riprel_4byte: 671 case X86::reloc_riprel_4byte_relax: 672 case X86::reloc_riprel_4byte_relax_rex: 673 case X86::reloc_riprel_4byte_movq_load: 674 case X86::reloc_signed_4byte: 675 case X86::reloc_signed_4byte_relax: 676 case X86::reloc_global_offset_table: 677 case X86::reloc_branch_4byte_pcrel: 678 case FK_SecRel_4: 679 case FK_Data_4: 680 return 4; 681 case FK_PCRel_8: 682 case FK_SecRel_8: 683 case FK_Data_8: 684 case X86::reloc_global_offset_table8: 685 return 8; 686 } 687 } 688 689 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 690 const MCValue &Target, 691 MutableArrayRef<char> Data, 692 uint64_t Value, bool IsResolved, 693 const MCSubtargetInfo *STI) const { 694 unsigned Kind = Fixup.getKind(); 695 if (Kind >= FirstLiteralRelocationKind) 696 return; 697 unsigned Size = getFixupKindSize(Kind); 698 699 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); 700 701 int64_t SignedValue = static_cast<int64_t>(Value); 702 if ((Target.isAbsolute() || IsResolved) && 703 getFixupKindInfo(Fixup.getKind()).Flags & 704 MCFixupKindInfo::FKF_IsPCRel) { 705 // check that PC relative fixup fits into the fixup size. 706 if (Size > 0 && !isIntN(Size * 8, SignedValue)) 707 Asm.getContext().reportError( 708 Fixup.getLoc(), "value of " + Twine(SignedValue) + 709 " is too large for field of " + Twine(Size) + 710 ((Size == 1) ? " byte." : " bytes.")); 711 } else { 712 // Check that uppper bits are either all zeros or all ones. 713 // Specifically ignore overflow/underflow as long as the leakage is 714 // limited to the lower bits. This is to remain compatible with 715 // other assemblers. 716 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && 717 "Value does not fit in the Fixup field"); 718 } 719 720 for (unsigned i = 0; i != Size; ++i) 721 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); 722 } 723 724 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst, 725 const MCSubtargetInfo &STI) const { 726 // Branches can always be relaxed in either mode. 727 if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode()) 728 return true; 729 730 // Check if this instruction is ever relaxable. 731 if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode()) 732 return false; 733 734 735 // Check if the relaxable operand has an expression. For the current set of 736 // relaxable instructions, the relaxable operand is always the last operand. 737 unsigned RelaxableOp = Inst.getNumOperands() - 1; 738 if (Inst.getOperand(RelaxableOp).isExpr()) 739 return true; 740 741 return false; 742 } 743 744 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, 745 uint64_t Value, 746 const MCRelaxableFragment *DF, 747 const MCAsmLayout &Layout) const { 748 // Relax if the value is too big for a (signed) i8. 749 return !isInt<8>(Value); 750 } 751 752 // FIXME: Can tblgen help at all here to verify there aren't other instructions 753 // we can relax? 754 void X86AsmBackend::relaxInstruction(MCInst &Inst, 755 const MCSubtargetInfo &STI) const { 756 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. 757 bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit]; 758 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); 759 760 if (RelaxedOp == Inst.getOpcode()) { 761 SmallString<256> Tmp; 762 raw_svector_ostream OS(Tmp); 763 Inst.dump_pretty(OS); 764 OS << "\n"; 765 report_fatal_error("unexpected instruction to relax: " + OS.str()); 766 } 767 768 Inst.setOpcode(RelaxedOp); 769 } 770 771 /// Return true if this instruction has been fully relaxed into it's most 772 /// general available form. 773 static bool isFullyRelaxed(const MCRelaxableFragment &RF) { 774 auto &Inst = RF.getInst(); 775 auto &STI = *RF.getSubtargetInfo(); 776 bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit]; 777 return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode(); 778 } 779 780 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, 781 MCCodeEmitter &Emitter, 782 unsigned &RemainingSize) const { 783 if (!RF.getAllowAutoPadding()) 784 return false; 785 // If the instruction isn't fully relaxed, shifting it around might require a 786 // larger value for one of the fixups then can be encoded. The outer loop 787 // will also catch this before moving to the next instruction, but we need to 788 // prevent padding this single instruction as well. 789 if (!isFullyRelaxed(RF)) 790 return false; 791 792 const unsigned OldSize = RF.getContents().size(); 793 if (OldSize == 15) 794 return false; 795 796 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); 797 const unsigned RemainingPrefixSize = [&]() -> unsigned { 798 SmallString<15> Code; 799 raw_svector_ostream VecOS(Code); 800 Emitter.emitPrefix(RF.getInst(), VecOS, STI); 801 assert(Code.size() < 15 && "The number of prefixes must be less than 15."); 802 803 // TODO: It turns out we need a decent amount of plumbing for the target 804 // specific bits to determine number of prefixes its safe to add. Various 805 // targets (older chips mostly, but also Atom family) encounter decoder 806 // stalls with too many prefixes. For testing purposes, we set the value 807 // externally for the moment. 808 unsigned ExistingPrefixSize = Code.size(); 809 if (TargetPrefixMax <= ExistingPrefixSize) 810 return 0; 811 return TargetPrefixMax - ExistingPrefixSize; 812 }(); 813 const unsigned PrefixBytesToAdd = 814 std::min(MaxPossiblePad, RemainingPrefixSize); 815 if (PrefixBytesToAdd == 0) 816 return false; 817 818 const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); 819 820 SmallString<256> Code; 821 Code.append(PrefixBytesToAdd, Prefix); 822 Code.append(RF.getContents().begin(), RF.getContents().end()); 823 RF.getContents() = Code; 824 825 // Adjust the fixups for the change in offsets 826 for (auto &F : RF.getFixups()) { 827 F.setOffset(F.getOffset() + PrefixBytesToAdd); 828 } 829 830 RemainingSize -= PrefixBytesToAdd; 831 return true; 832 } 833 834 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, 835 MCCodeEmitter &Emitter, 836 unsigned &RemainingSize) const { 837 if (isFullyRelaxed(RF)) 838 // TODO: There are lots of other tricks we could apply for increasing 839 // encoding size without impacting performance. 840 return false; 841 842 MCInst Relaxed = RF.getInst(); 843 relaxInstruction(Relaxed, *RF.getSubtargetInfo()); 844 845 SmallVector<MCFixup, 4> Fixups; 846 SmallString<15> Code; 847 raw_svector_ostream VecOS(Code); 848 Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo()); 849 const unsigned OldSize = RF.getContents().size(); 850 const unsigned NewSize = Code.size(); 851 assert(NewSize >= OldSize && "size decrease during relaxation?"); 852 unsigned Delta = NewSize - OldSize; 853 if (Delta > RemainingSize) 854 return false; 855 RF.setInst(Relaxed); 856 RF.getContents() = Code; 857 RF.getFixups() = Fixups; 858 RemainingSize -= Delta; 859 return true; 860 } 861 862 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, 863 MCCodeEmitter &Emitter, 864 unsigned &RemainingSize) const { 865 bool Changed = false; 866 if (RemainingSize != 0) 867 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); 868 if (RemainingSize != 0) 869 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); 870 return Changed; 871 } 872 873 void X86AsmBackend::finishLayout(MCAssembler const &Asm, 874 MCAsmLayout &Layout) const { 875 // See if we can further relax some instructions to cut down on the number of 876 // nop bytes required for code alignment. The actual win is in reducing 877 // instruction count, not number of bytes. Modern X86-64 can easily end up 878 // decode limited. It is often better to reduce the number of instructions 879 // (i.e. eliminate nops) even at the cost of increasing the size and 880 // complexity of others. 881 if (!X86PadForAlign && !X86PadForBranchAlign) 882 return; 883 884 // The processed regions are delimitered by LabeledFragments. -g may have more 885 // MCSymbols and therefore different relaxation results. X86PadForAlign is 886 // disabled by default to eliminate the -g vs non -g difference. 887 DenseSet<MCFragment *> LabeledFragments; 888 for (const MCSymbol &S : Asm.symbols()) 889 LabeledFragments.insert(S.getFragment(false)); 890 891 for (MCSection &Sec : Asm) { 892 if (!Sec.getKind().isText()) 893 continue; 894 895 SmallVector<MCRelaxableFragment *, 4> Relaxable; 896 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { 897 MCFragment &F = *I; 898 899 if (LabeledFragments.count(&F)) 900 Relaxable.clear(); 901 902 if (F.getKind() == MCFragment::FT_Data || 903 F.getKind() == MCFragment::FT_CompactEncodedInst) 904 // Skip and ignore 905 continue; 906 907 if (F.getKind() == MCFragment::FT_Relaxable) { 908 auto &RF = cast<MCRelaxableFragment>(*I); 909 Relaxable.push_back(&RF); 910 continue; 911 } 912 913 auto canHandle = [](MCFragment &F) -> bool { 914 switch (F.getKind()) { 915 default: 916 return false; 917 case MCFragment::FT_Align: 918 return X86PadForAlign; 919 case MCFragment::FT_BoundaryAlign: 920 return X86PadForBranchAlign; 921 } 922 }; 923 // For any unhandled kind, assume we can't change layout. 924 if (!canHandle(F)) { 925 Relaxable.clear(); 926 continue; 927 } 928 929 #ifndef NDEBUG 930 const uint64_t OrigOffset = Layout.getFragmentOffset(&F); 931 #endif 932 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F); 933 934 // To keep the effects local, prefer to relax instructions closest to 935 // the align directive. This is purely about human understandability 936 // of the resulting code. If we later find a reason to expand 937 // particular instructions over others, we can adjust. 938 MCFragment *FirstChangedFragment = nullptr; 939 unsigned RemainingSize = OrigSize; 940 while (!Relaxable.empty() && RemainingSize != 0) { 941 auto &RF = *Relaxable.pop_back_val(); 942 // Give the backend a chance to play any tricks it wishes to increase 943 // the encoding size of the given instruction. Target independent code 944 // will try further relaxation, but target's may play further tricks. 945 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) 946 FirstChangedFragment = &RF; 947 948 // If we have an instruction which hasn't been fully relaxed, we can't 949 // skip past it and insert bytes before it. Changing its starting 950 // offset might require a larger negative offset than it can encode. 951 // We don't need to worry about larger positive offsets as none of the 952 // possible offsets between this and our align are visible, and the 953 // ones afterwards aren't changing. 954 if (!isFullyRelaxed(RF)) 955 break; 956 } 957 Relaxable.clear(); 958 959 if (FirstChangedFragment) { 960 // Make sure the offsets for any fragments in the effected range get 961 // updated. Note that this (conservatively) invalidates the offsets of 962 // those following, but this is not required. 963 Layout.invalidateFragmentsFrom(FirstChangedFragment); 964 } 965 966 // BoundaryAlign explicitly tracks it's size (unlike align) 967 if (F.getKind() == MCFragment::FT_BoundaryAlign) 968 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); 969 970 #ifndef NDEBUG 971 const uint64_t FinalOffset = Layout.getFragmentOffset(&F); 972 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F); 973 assert(OrigOffset + OrigSize == FinalOffset + FinalSize && 974 "can't move start of next fragment!"); 975 assert(FinalSize == RemainingSize && "inconsistent size computation?"); 976 #endif 977 978 // If we're looking at a boundary align, make sure we don't try to pad 979 // its target instructions for some following directive. Doing so would 980 // break the alignment of the current boundary align. 981 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { 982 const MCFragment *LastFragment = BF->getLastFragment(); 983 if (!LastFragment) 984 continue; 985 while (&*I != LastFragment) 986 ++I; 987 } 988 } 989 } 990 991 // The layout is done. Mark every fragment as valid. 992 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { 993 MCSection &Section = *Layout.getSectionOrder()[i]; 994 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); 995 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); 996 } 997 } 998 999 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const { 1000 if (STI.hasFeature(X86::Is16Bit)) 1001 return 4; 1002 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit)) 1003 return 1; 1004 if (STI.getFeatureBits()[X86::TuningFast7ByteNOP]) 1005 return 7; 1006 if (STI.getFeatureBits()[X86::TuningFast15ByteNOP]) 1007 return 15; 1008 if (STI.getFeatureBits()[X86::TuningFast11ByteNOP]) 1009 return 11; 1010 // FIXME: handle 32-bit mode 1011 // 15-bytes is the longest single NOP instruction, but 10-bytes is 1012 // commonly the longest that can be efficiently decoded. 1013 return 10; 1014 } 1015 1016 /// Write a sequence of optimal nops to the output, covering \p Count 1017 /// bytes. 1018 /// \return - true on success, false on failure 1019 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, 1020 const MCSubtargetInfo *STI) const { 1021 static const char Nops32Bit[10][11] = { 1022 // nop 1023 "\x90", 1024 // xchg %ax,%ax 1025 "\x66\x90", 1026 // nopl (%[re]ax) 1027 "\x0f\x1f\x00", 1028 // nopl 0(%[re]ax) 1029 "\x0f\x1f\x40\x00", 1030 // nopl 0(%[re]ax,%[re]ax,1) 1031 "\x0f\x1f\x44\x00\x00", 1032 // nopw 0(%[re]ax,%[re]ax,1) 1033 "\x66\x0f\x1f\x44\x00\x00", 1034 // nopl 0L(%[re]ax) 1035 "\x0f\x1f\x80\x00\x00\x00\x00", 1036 // nopl 0L(%[re]ax,%[re]ax,1) 1037 "\x0f\x1f\x84\x00\x00\x00\x00\x00", 1038 // nopw 0L(%[re]ax,%[re]ax,1) 1039 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", 1040 // nopw %cs:0L(%[re]ax,%[re]ax,1) 1041 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00", 1042 }; 1043 1044 // 16-bit mode uses different nop patterns than 32-bit. 1045 static const char Nops16Bit[4][11] = { 1046 // nop 1047 "\x90", 1048 // xchg %eax,%eax 1049 "\x66\x90", 1050 // lea 0(%si),%si 1051 "\x8d\x74\x00", 1052 // lea 0w(%si),%si 1053 "\x8d\xb4\x00\x00", 1054 }; 1055 1056 const char(*Nops)[11] = 1057 STI->getFeatureBits()[X86::Is16Bit] ? Nops16Bit : Nops32Bit; 1058 1059 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI); 1060 1061 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining 1062 // length. 1063 do { 1064 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); 1065 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; 1066 for (uint8_t i = 0; i < Prefixes; i++) 1067 OS << '\x66'; 1068 const uint8_t Rest = ThisNopLength - Prefixes; 1069 if (Rest != 0) 1070 OS.write(Nops[Rest - 1], Rest); 1071 Count -= ThisNopLength; 1072 } while (Count != 0); 1073 1074 return true; 1075 } 1076 1077 /* *** */ 1078 1079 namespace { 1080 1081 class ELFX86AsmBackend : public X86AsmBackend { 1082 public: 1083 uint8_t OSABI; 1084 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) 1085 : X86AsmBackend(T, STI), OSABI(OSABI) {} 1086 }; 1087 1088 class ELFX86_32AsmBackend : public ELFX86AsmBackend { 1089 public: 1090 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, 1091 const MCSubtargetInfo &STI) 1092 : ELFX86AsmBackend(T, OSABI, STI) {} 1093 1094 std::unique_ptr<MCObjectTargetWriter> 1095 createObjectTargetWriter() const override { 1096 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386); 1097 } 1098 }; 1099 1100 class ELFX86_X32AsmBackend : public ELFX86AsmBackend { 1101 public: 1102 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, 1103 const MCSubtargetInfo &STI) 1104 : ELFX86AsmBackend(T, OSABI, STI) {} 1105 1106 std::unique_ptr<MCObjectTargetWriter> 1107 createObjectTargetWriter() const override { 1108 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1109 ELF::EM_X86_64); 1110 } 1111 }; 1112 1113 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { 1114 public: 1115 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, 1116 const MCSubtargetInfo &STI) 1117 : ELFX86AsmBackend(T, OSABI, STI) {} 1118 1119 std::unique_ptr<MCObjectTargetWriter> 1120 createObjectTargetWriter() const override { 1121 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1122 ELF::EM_IAMCU); 1123 } 1124 }; 1125 1126 class ELFX86_64AsmBackend : public ELFX86AsmBackend { 1127 public: 1128 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, 1129 const MCSubtargetInfo &STI) 1130 : ELFX86AsmBackend(T, OSABI, STI) {} 1131 1132 std::unique_ptr<MCObjectTargetWriter> 1133 createObjectTargetWriter() const override { 1134 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64); 1135 } 1136 }; 1137 1138 class WindowsX86AsmBackend : public X86AsmBackend { 1139 bool Is64Bit; 1140 1141 public: 1142 WindowsX86AsmBackend(const Target &T, bool is64Bit, 1143 const MCSubtargetInfo &STI) 1144 : X86AsmBackend(T, STI) 1145 , Is64Bit(is64Bit) { 1146 } 1147 1148 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override { 1149 return StringSwitch<std::optional<MCFixupKind>>(Name) 1150 .Case("dir32", FK_Data_4) 1151 .Case("secrel32", FK_SecRel_4) 1152 .Case("secidx", FK_SecRel_2) 1153 .Default(MCAsmBackend::getFixupKind(Name)); 1154 } 1155 1156 std::unique_ptr<MCObjectTargetWriter> 1157 createObjectTargetWriter() const override { 1158 return createX86WinCOFFObjectWriter(Is64Bit); 1159 } 1160 }; 1161 1162 namespace CU { 1163 1164 /// Compact unwind encoding values. 1165 enum CompactUnwindEncodings { 1166 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after 1167 /// the return address, then [RE]SP is moved to [RE]BP. 1168 UNWIND_MODE_BP_FRAME = 0x01000000, 1169 1170 /// A frameless function with a small constant stack size. 1171 UNWIND_MODE_STACK_IMMD = 0x02000000, 1172 1173 /// A frameless function with a large constant stack size. 1174 UNWIND_MODE_STACK_IND = 0x03000000, 1175 1176 /// No compact unwind encoding is available. 1177 UNWIND_MODE_DWARF = 0x04000000, 1178 1179 /// Mask for encoding the frame registers. 1180 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, 1181 1182 /// Mask for encoding the frameless registers. 1183 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF 1184 }; 1185 1186 } // namespace CU 1187 1188 class DarwinX86AsmBackend : public X86AsmBackend { 1189 const MCRegisterInfo &MRI; 1190 1191 /// Number of registers that can be saved in a compact unwind encoding. 1192 enum { CU_NUM_SAVED_REGS = 6 }; 1193 1194 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; 1195 Triple TT; 1196 bool Is64Bit; 1197 1198 unsigned OffsetSize; ///< Offset of a "push" instruction. 1199 unsigned MoveInstrSize; ///< Size of a "move" instruction. 1200 unsigned StackDivide; ///< Amount to adjust stack size by. 1201 protected: 1202 /// Size of a "push" instruction for the given register. 1203 unsigned PushInstrSize(unsigned Reg) const { 1204 switch (Reg) { 1205 case X86::EBX: 1206 case X86::ECX: 1207 case X86::EDX: 1208 case X86::EDI: 1209 case X86::ESI: 1210 case X86::EBP: 1211 case X86::RBX: 1212 case X86::RBP: 1213 return 1; 1214 case X86::R12: 1215 case X86::R13: 1216 case X86::R14: 1217 case X86::R15: 1218 return 2; 1219 } 1220 return 1; 1221 } 1222 1223 private: 1224 /// Get the compact unwind number for a given register. The number 1225 /// corresponds to the enum lists in compact_unwind_encoding.h. 1226 int getCompactUnwindRegNum(unsigned Reg) const { 1227 static const MCPhysReg CU32BitRegs[7] = { 1228 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 1229 }; 1230 static const MCPhysReg CU64BitRegs[] = { 1231 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 1232 }; 1233 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; 1234 for (int Idx = 1; *CURegs; ++CURegs, ++Idx) 1235 if (*CURegs == Reg) 1236 return Idx; 1237 1238 return -1; 1239 } 1240 1241 /// Return the registers encoded for a compact encoding with a frame 1242 /// pointer. 1243 uint32_t encodeCompactUnwindRegistersWithFrame() const { 1244 // Encode the registers in the order they were saved --- 3-bits per 1245 // register. The list of saved registers is assumed to be in reverse 1246 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. 1247 uint32_t RegEnc = 0; 1248 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { 1249 unsigned Reg = SavedRegs[i]; 1250 if (Reg == 0) break; 1251 1252 int CURegNum = getCompactUnwindRegNum(Reg); 1253 if (CURegNum == -1) return ~0U; 1254 1255 // Encode the 3-bit register number in order, skipping over 3-bits for 1256 // each register. 1257 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); 1258 } 1259 1260 assert((RegEnc & 0x3FFFF) == RegEnc && 1261 "Invalid compact register encoding!"); 1262 return RegEnc; 1263 } 1264 1265 /// Create the permutation encoding used with frameless stacks. It is 1266 /// passed the number of registers to be saved and an array of the registers 1267 /// saved. 1268 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { 1269 // The saved registers are numbered from 1 to 6. In order to encode the 1270 // order in which they were saved, we re-number them according to their 1271 // place in the register order. The re-numbering is relative to the last 1272 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in 1273 // that order: 1274 // 1275 // Orig Re-Num 1276 // ---- ------ 1277 // 6 6 1278 // 2 2 1279 // 4 3 1280 // 5 3 1281 // 1282 for (unsigned i = 0; i < RegCount; ++i) { 1283 int CUReg = getCompactUnwindRegNum(SavedRegs[i]); 1284 if (CUReg == -1) return ~0U; 1285 SavedRegs[i] = CUReg; 1286 } 1287 1288 // Reverse the list. 1289 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); 1290 1291 uint32_t RenumRegs[CU_NUM_SAVED_REGS]; 1292 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ 1293 unsigned Countless = 0; 1294 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) 1295 if (SavedRegs[j] < SavedRegs[i]) 1296 ++Countless; 1297 1298 RenumRegs[i] = SavedRegs[i] - Countless - 1; 1299 } 1300 1301 // Take the renumbered values and encode them into a 10-bit number. 1302 uint32_t permutationEncoding = 0; 1303 switch (RegCount) { 1304 case 6: 1305 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 1306 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 1307 + RenumRegs[4]; 1308 break; 1309 case 5: 1310 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 1311 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 1312 + RenumRegs[5]; 1313 break; 1314 case 4: 1315 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 1316 + 3 * RenumRegs[4] + RenumRegs[5]; 1317 break; 1318 case 3: 1319 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 1320 + RenumRegs[5]; 1321 break; 1322 case 2: 1323 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 1324 break; 1325 case 1: 1326 permutationEncoding |= RenumRegs[5]; 1327 break; 1328 } 1329 1330 assert((permutationEncoding & 0x3FF) == permutationEncoding && 1331 "Invalid compact register encoding!"); 1332 return permutationEncoding; 1333 } 1334 1335 public: 1336 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, 1337 const MCSubtargetInfo &STI) 1338 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), 1339 Is64Bit(TT.isArch64Bit()) { 1340 memset(SavedRegs, 0, sizeof(SavedRegs)); 1341 OffsetSize = Is64Bit ? 8 : 4; 1342 MoveInstrSize = Is64Bit ? 3 : 2; 1343 StackDivide = Is64Bit ? 8 : 4; 1344 } 1345 1346 std::unique_ptr<MCObjectTargetWriter> 1347 createObjectTargetWriter() const override { 1348 uint32_t CPUType = cantFail(MachO::getCPUType(TT)); 1349 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); 1350 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); 1351 } 1352 1353 /// Implementation of algorithm to generate the compact unwind encoding 1354 /// for the CFI instructions. 1355 uint32_t 1356 generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override { 1357 if (Instrs.empty()) return 0; 1358 1359 // Reset the saved registers. 1360 unsigned SavedRegIdx = 0; 1361 memset(SavedRegs, 0, sizeof(SavedRegs)); 1362 1363 bool HasFP = false; 1364 1365 // Encode that we are using EBP/RBP as the frame pointer. 1366 uint32_t CompactUnwindEncoding = 0; 1367 1368 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2; 1369 unsigned InstrOffset = 0; 1370 unsigned StackAdjust = 0; 1371 unsigned StackSize = 0; 1372 int MinAbsOffset = std::numeric_limits<int>::max(); 1373 1374 for (const MCCFIInstruction &Inst : Instrs) { 1375 switch (Inst.getOperation()) { 1376 default: 1377 // Any other CFI directives indicate a frame that we aren't prepared 1378 // to represent via compact unwind, so just bail out. 1379 return CU::UNWIND_MODE_DWARF; 1380 case MCCFIInstruction::OpDefCfaRegister: { 1381 // Defines a frame pointer. E.g. 1382 // 1383 // movq %rsp, %rbp 1384 // L0: 1385 // .cfi_def_cfa_register %rbp 1386 // 1387 HasFP = true; 1388 1389 // If the frame pointer is other than esp/rsp, we do not have a way to 1390 // generate a compact unwinding representation, so bail out. 1391 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != 1392 (Is64Bit ? X86::RBP : X86::EBP)) 1393 return CU::UNWIND_MODE_DWARF; 1394 1395 // Reset the counts. 1396 memset(SavedRegs, 0, sizeof(SavedRegs)); 1397 StackAdjust = 0; 1398 SavedRegIdx = 0; 1399 MinAbsOffset = std::numeric_limits<int>::max(); 1400 InstrOffset += MoveInstrSize; 1401 break; 1402 } 1403 case MCCFIInstruction::OpDefCfaOffset: { 1404 // Defines a new offset for the CFA. E.g. 1405 // 1406 // With frame: 1407 // 1408 // pushq %rbp 1409 // L0: 1410 // .cfi_def_cfa_offset 16 1411 // 1412 // Without frame: 1413 // 1414 // subq $72, %rsp 1415 // L0: 1416 // .cfi_def_cfa_offset 80 1417 // 1418 StackSize = Inst.getOffset() / StackDivide; 1419 break; 1420 } 1421 case MCCFIInstruction::OpOffset: { 1422 // Defines a "push" of a callee-saved register. E.g. 1423 // 1424 // pushq %r15 1425 // pushq %r14 1426 // pushq %rbx 1427 // L0: 1428 // subq $120, %rsp 1429 // L1: 1430 // .cfi_offset %rbx, -40 1431 // .cfi_offset %r14, -32 1432 // .cfi_offset %r15, -24 1433 // 1434 if (SavedRegIdx == CU_NUM_SAVED_REGS) 1435 // If there are too many saved registers, we cannot use a compact 1436 // unwind encoding. 1437 return CU::UNWIND_MODE_DWARF; 1438 1439 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); 1440 SavedRegs[SavedRegIdx++] = Reg; 1441 StackAdjust += OffsetSize; 1442 MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset())); 1443 InstrOffset += PushInstrSize(Reg); 1444 break; 1445 } 1446 } 1447 } 1448 1449 StackAdjust /= StackDivide; 1450 1451 if (HasFP) { 1452 if ((StackAdjust & 0xFF) != StackAdjust) 1453 // Offset was too big for a compact unwind encoding. 1454 return CU::UNWIND_MODE_DWARF; 1455 1456 // We don't attempt to track a real StackAdjust, so if the saved registers 1457 // aren't adjacent to rbp we can't cope. 1458 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize) 1459 return CU::UNWIND_MODE_DWARF; 1460 1461 // Get the encoding of the saved registers when we have a frame pointer. 1462 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); 1463 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1464 1465 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; 1466 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; 1467 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; 1468 } else { 1469 SubtractInstrIdx += InstrOffset; 1470 ++StackAdjust; 1471 1472 if ((StackSize & 0xFF) == StackSize) { 1473 // Frameless stack with a small stack size. 1474 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; 1475 1476 // Encode the stack size. 1477 CompactUnwindEncoding |= (StackSize & 0xFF) << 16; 1478 } else { 1479 if ((StackAdjust & 0x7) != StackAdjust) 1480 // The extra stack adjustments are too big for us to handle. 1481 return CU::UNWIND_MODE_DWARF; 1482 1483 // Frameless stack with an offset too large for us to encode compactly. 1484 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; 1485 1486 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 1487 // instruction. 1488 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 1489 1490 // Encode any extra stack adjustments (done via push instructions). 1491 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; 1492 } 1493 1494 // Encode the number of registers saved. (Reverse the list first.) 1495 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]); 1496 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; 1497 1498 // Get the encoding of the saved registers when we don't have a frame 1499 // pointer. 1500 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx); 1501 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1502 1503 // Encode the register encoding. 1504 CompactUnwindEncoding |= 1505 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; 1506 } 1507 1508 return CompactUnwindEncoding; 1509 } 1510 }; 1511 1512 } // end anonymous namespace 1513 1514 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, 1515 const MCSubtargetInfo &STI, 1516 const MCRegisterInfo &MRI, 1517 const MCTargetOptions &Options) { 1518 const Triple &TheTriple = STI.getTargetTriple(); 1519 if (TheTriple.isOSBinFormatMachO()) 1520 return new DarwinX86AsmBackend(T, MRI, STI); 1521 1522 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1523 return new WindowsX86AsmBackend(T, false, STI); 1524 1525 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1526 1527 if (TheTriple.isOSIAMCU()) 1528 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); 1529 1530 return new ELFX86_32AsmBackend(T, OSABI, STI); 1531 } 1532 1533 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, 1534 const MCSubtargetInfo &STI, 1535 const MCRegisterInfo &MRI, 1536 const MCTargetOptions &Options) { 1537 const Triple &TheTriple = STI.getTargetTriple(); 1538 if (TheTriple.isOSBinFormatMachO()) 1539 return new DarwinX86AsmBackend(T, MRI, STI); 1540 1541 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1542 return new WindowsX86AsmBackend(T, true, STI); 1543 1544 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1545 1546 if (TheTriple.isX32()) 1547 return new ELFX86_X32AsmBackend(T, OSABI, STI); 1548 return new ELFX86_64AsmBackend(T, OSABI, STI); 1549 } 1550