1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86FixupKinds.h" 11 #include "MCTargetDesc/X86EncodingOptimization.h" 12 #include "llvm/ADT/StringSwitch.h" 13 #include "llvm/BinaryFormat/ELF.h" 14 #include "llvm/BinaryFormat/MachO.h" 15 #include "llvm/MC/MCAsmBackend.h" 16 #include "llvm/MC/MCAsmLayout.h" 17 #include "llvm/MC/MCAssembler.h" 18 #include "llvm/MC/MCCodeEmitter.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDwarf.h" 21 #include "llvm/MC/MCELFObjectWriter.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCFixupKindInfo.h" 24 #include "llvm/MC/MCInst.h" 25 #include "llvm/MC/MCInstrInfo.h" 26 #include "llvm/MC/MCMachObjectWriter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSectionMachO.h" 31 #include "llvm/MC/MCSubtargetInfo.h" 32 #include "llvm/MC/MCValue.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 using namespace llvm; 39 40 namespace { 41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind 42 class X86AlignBranchKind { 43 private: 44 uint8_t AlignBranchKind = 0; 45 46 public: 47 void operator=(const std::string &Val) { 48 if (Val.empty()) 49 return; 50 SmallVector<StringRef, 6> BranchTypes; 51 StringRef(Val).split(BranchTypes, '+', -1, false); 52 for (auto BranchType : BranchTypes) { 53 if (BranchType == "fused") 54 addKind(X86::AlignBranchFused); 55 else if (BranchType == "jcc") 56 addKind(X86::AlignBranchJcc); 57 else if (BranchType == "jmp") 58 addKind(X86::AlignBranchJmp); 59 else if (BranchType == "call") 60 addKind(X86::AlignBranchCall); 61 else if (BranchType == "ret") 62 addKind(X86::AlignBranchRet); 63 else if (BranchType == "indirect") 64 addKind(X86::AlignBranchIndirect); 65 else { 66 errs() << "invalid argument " << BranchType.str() 67 << " to -x86-align-branch=; each element must be one of: fused, " 68 "jcc, jmp, call, ret, indirect.(plus separated)\n"; 69 } 70 } 71 } 72 73 operator uint8_t() const { return AlignBranchKind; } 74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; } 75 }; 76 77 X86AlignBranchKind X86AlignBranchKindLoc; 78 79 cl::opt<unsigned> X86AlignBranchBoundary( 80 "x86-align-branch-boundary", cl::init(0), 81 cl::desc( 82 "Control how the assembler should align branches with NOP. If the " 83 "boundary's size is not 0, it should be a power of 2 and no less " 84 "than 32. Branches will be aligned to prevent from being across or " 85 "against the boundary of specified size. The default value 0 does not " 86 "align branches.")); 87 88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( 89 "x86-align-branch", 90 cl::desc( 91 "Specify types of branches to align (plus separated list of types):" 92 "\njcc indicates conditional jumps" 93 "\nfused indicates fused conditional jumps" 94 "\njmp indicates direct unconditional jumps" 95 "\ncall indicates direct and indirect calls" 96 "\nret indicates rets" 97 "\nindirect indicates indirect unconditional jumps"), 98 cl::location(X86AlignBranchKindLoc)); 99 100 cl::opt<bool> X86AlignBranchWithin32BBoundaries( 101 "x86-branches-within-32B-boundaries", cl::init(false), 102 cl::desc( 103 "Align selected instructions to mitigate negative performance impact " 104 "of Intel's micro code update for errata skx102. May break " 105 "assumptions about labels corresponding to particular instructions, " 106 "and should be used with caution.")); 107 108 cl::opt<unsigned> X86PadMaxPrefixSize( 109 "x86-pad-max-prefix-size", cl::init(0), 110 cl::desc("Maximum number of prefixes to use for padding")); 111 112 cl::opt<bool> X86PadForAlign( 113 "x86-pad-for-align", cl::init(false), cl::Hidden, 114 cl::desc("Pad previous instructions to implement align directives")); 115 116 cl::opt<bool> X86PadForBranchAlign( 117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden, 118 cl::desc("Pad previous instructions to implement branch alignment")); 119 120 class X86AsmBackend : public MCAsmBackend { 121 const MCSubtargetInfo &STI; 122 std::unique_ptr<const MCInstrInfo> MCII; 123 X86AlignBranchKind AlignBranchType; 124 Align AlignBoundary; 125 unsigned TargetPrefixMax = 0; 126 127 MCInst PrevInst; 128 MCBoundaryAlignFragment *PendingBA = nullptr; 129 std::pair<MCFragment *, size_t> PrevInstPosition; 130 bool CanPadInst = false; 131 132 uint8_t determinePaddingPrefix(const MCInst &Inst) const; 133 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; 134 bool needAlign(const MCInst &Inst) const; 135 bool canPadBranches(MCObjectStreamer &OS) const; 136 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; 137 138 public: 139 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) 140 : MCAsmBackend(llvm::endianness::little), STI(STI), 141 MCII(T.createMCInstrInfo()) { 142 if (X86AlignBranchWithin32BBoundaries) { 143 // At the moment, this defaults to aligning fused branches, unconditional 144 // jumps, and (unfused) conditional jumps with nops. Both the 145 // instructions aligned and the alignment method (nop vs prefix) may 146 // change in the future. 147 AlignBoundary = assumeAligned(32); 148 AlignBranchType.addKind(X86::AlignBranchFused); 149 AlignBranchType.addKind(X86::AlignBranchJcc); 150 AlignBranchType.addKind(X86::AlignBranchJmp); 151 } 152 // Allow overriding defaults set by main flag 153 if (X86AlignBranchBoundary.getNumOccurrences()) 154 AlignBoundary = assumeAligned(X86AlignBranchBoundary); 155 if (X86AlignBranch.getNumOccurrences()) 156 AlignBranchType = X86AlignBranchKindLoc; 157 if (X86PadMaxPrefixSize.getNumOccurrences()) 158 TargetPrefixMax = X86PadMaxPrefixSize; 159 } 160 161 bool allowAutoPadding() const override; 162 bool allowEnhancedRelaxation() const override; 163 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst, 164 const MCSubtargetInfo &STI) override; 165 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override; 166 167 unsigned getNumFixupKinds() const override { 168 return X86::NumTargetFixupKinds; 169 } 170 171 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override; 172 173 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; 174 175 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, 176 const MCValue &Target, 177 const MCSubtargetInfo *STI) override; 178 179 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 180 const MCValue &Target, MutableArrayRef<char> Data, 181 uint64_t Value, bool IsResolved, 182 const MCSubtargetInfo *STI) const override; 183 184 bool mayNeedRelaxation(const MCInst &Inst, 185 const MCSubtargetInfo &STI) const override; 186 187 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, 188 const MCRelaxableFragment *DF, 189 const MCAsmLayout &Layout) const override; 190 191 void relaxInstruction(MCInst &Inst, 192 const MCSubtargetInfo &STI) const override; 193 194 bool padInstructionViaRelaxation(MCRelaxableFragment &RF, 195 MCCodeEmitter &Emitter, 196 unsigned &RemainingSize) const; 197 198 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 199 unsigned &RemainingSize) const; 200 201 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 202 unsigned &RemainingSize) const; 203 204 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; 205 206 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override; 207 208 bool writeNopData(raw_ostream &OS, uint64_t Count, 209 const MCSubtargetInfo *STI) const override; 210 }; 211 } // end anonymous namespace 212 213 static bool isRelaxableBranch(unsigned Opcode) { 214 return Opcode == X86::JCC_1 || Opcode == X86::JMP_1; 215 } 216 217 static unsigned getRelaxedOpcodeBranch(unsigned Opcode, 218 bool Is16BitMode = false) { 219 switch (Opcode) { 220 default: 221 llvm_unreachable("invalid opcode for branch"); 222 case X86::JCC_1: 223 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; 224 case X86::JMP_1: 225 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; 226 } 227 } 228 229 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) { 230 unsigned Opcode = MI.getOpcode(); 231 return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode) 232 : X86::getOpcodeForLongImmediateForm(Opcode); 233 } 234 235 static X86::CondCode getCondFromBranch(const MCInst &MI, 236 const MCInstrInfo &MCII) { 237 unsigned Opcode = MI.getOpcode(); 238 switch (Opcode) { 239 default: 240 return X86::COND_INVALID; 241 case X86::JCC_1: { 242 const MCInstrDesc &Desc = MCII.get(Opcode); 243 return static_cast<X86::CondCode>( 244 MI.getOperand(Desc.getNumOperands() - 1).getImm()); 245 } 246 } 247 } 248 249 static X86::SecondMacroFusionInstKind 250 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { 251 X86::CondCode CC = getCondFromBranch(MI, MCII); 252 return classifySecondCondCodeInMacroFusion(CC); 253 } 254 255 /// Check if the instruction uses RIP relative addressing. 256 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { 257 unsigned Opcode = MI.getOpcode(); 258 const MCInstrDesc &Desc = MCII.get(Opcode); 259 uint64_t TSFlags = Desc.TSFlags; 260 unsigned CurOp = X86II::getOperandBias(Desc); 261 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 262 if (MemoryOperand < 0) 263 return false; 264 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; 265 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); 266 return (BaseReg == X86::RIP); 267 } 268 269 /// Check if the instruction is a prefix. 270 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) { 271 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags); 272 } 273 274 /// Check if the instruction is valid as the first instruction in macro fusion. 275 static bool isFirstMacroFusibleInst(const MCInst &Inst, 276 const MCInstrInfo &MCII) { 277 // An Intel instruction with RIP relative addressing is not macro fusible. 278 if (isRIPRelative(Inst, MCII)) 279 return false; 280 X86::FirstMacroFusionInstKind FIK = 281 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); 282 return FIK != X86::FirstMacroFusionInstKind::Invalid; 283 } 284 285 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to 286 /// get a better peformance in some cases. Here, we determine which prefix is 287 /// the most suitable. 288 /// 289 /// If the instruction has a segment override prefix, use the existing one. 290 /// If the target is 64-bit, use the CS. 291 /// If the target is 32-bit, 292 /// - If the instruction has a ESP/EBP base register, use SS. 293 /// - Otherwise use DS. 294 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { 295 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) && 296 "Prefixes can be added only in 32-bit or 64-bit mode."); 297 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 298 uint64_t TSFlags = Desc.TSFlags; 299 300 // Determine where the memory operand starts, if present. 301 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 302 if (MemoryOperand != -1) 303 MemoryOperand += X86II::getOperandBias(Desc); 304 305 unsigned SegmentReg = 0; 306 if (MemoryOperand >= 0) { 307 // Check for explicit segment override on memory operand. 308 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); 309 } 310 311 switch (TSFlags & X86II::FormMask) { 312 default: 313 break; 314 case X86II::RawFrmDstSrc: { 315 // Check segment override opcode prefix as needed (not for %ds). 316 if (Inst.getOperand(2).getReg() != X86::DS) 317 SegmentReg = Inst.getOperand(2).getReg(); 318 break; 319 } 320 case X86II::RawFrmSrc: { 321 // Check segment override opcode prefix as needed (not for %ds). 322 if (Inst.getOperand(1).getReg() != X86::DS) 323 SegmentReg = Inst.getOperand(1).getReg(); 324 break; 325 } 326 case X86II::RawFrmMemOffs: { 327 // Check segment override opcode prefix as needed. 328 SegmentReg = Inst.getOperand(1).getReg(); 329 break; 330 } 331 } 332 333 if (SegmentReg != 0) 334 return X86::getSegmentOverridePrefixForReg(SegmentReg); 335 336 if (STI.hasFeature(X86::Is64Bit)) 337 return X86::CS_Encoding; 338 339 if (MemoryOperand >= 0) { 340 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; 341 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); 342 if (BaseReg == X86::ESP || BaseReg == X86::EBP) 343 return X86::SS_Encoding; 344 } 345 return X86::DS_Encoding; 346 } 347 348 /// Check if the two instructions will be macro-fused on the target cpu. 349 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { 350 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); 351 if (!InstDesc.isConditionalBranch()) 352 return false; 353 if (!isFirstMacroFusibleInst(Cmp, *MCII)) 354 return false; 355 const X86::FirstMacroFusionInstKind CmpKind = 356 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); 357 const X86::SecondMacroFusionInstKind BranchKind = 358 classifySecondInstInMacroFusion(Jcc, *MCII); 359 return X86::isMacroFused(CmpKind, BranchKind); 360 } 361 362 /// Check if the instruction has a variant symbol operand. 363 static bool hasVariantSymbol(const MCInst &MI) { 364 for (auto &Operand : MI) { 365 if (!Operand.isExpr()) 366 continue; 367 const MCExpr &Expr = *Operand.getExpr(); 368 if (Expr.getKind() == MCExpr::SymbolRef && 369 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None) 370 return true; 371 } 372 return false; 373 } 374 375 bool X86AsmBackend::allowAutoPadding() const { 376 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); 377 } 378 379 bool X86AsmBackend::allowEnhancedRelaxation() const { 380 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; 381 } 382 383 /// X86 has certain instructions which enable interrupts exactly one 384 /// instruction *after* the instruction which stores to SS. Return true if the 385 /// given instruction has such an interrupt delay slot. 386 static bool hasInterruptDelaySlot(const MCInst &Inst) { 387 switch (Inst.getOpcode()) { 388 case X86::POPSS16: 389 case X86::POPSS32: 390 case X86::STI: 391 return true; 392 393 case X86::MOV16sr: 394 case X86::MOV32sr: 395 case X86::MOV64sr: 396 case X86::MOV16sm: 397 if (Inst.getOperand(0).getReg() == X86::SS) 398 return true; 399 break; 400 } 401 return false; 402 } 403 404 /// Check if the instruction to be emitted is right after any data. 405 static bool 406 isRightAfterData(MCFragment *CurrentFragment, 407 const std::pair<MCFragment *, size_t> &PrevInstPosition) { 408 MCFragment *F = CurrentFragment; 409 // Empty data fragments may be created to prevent further data being 410 // added into the previous fragment, we need to skip them since they 411 // have no contents. 412 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode()) 413 if (cast<MCDataFragment>(F)->getContents().size() != 0) 414 break; 415 416 // Since data is always emitted into a DataFragment, our check strategy is 417 // simple here. 418 // - If the fragment is a DataFragment 419 // - If it's not the fragment where the previous instruction is, 420 // returns true. 421 // - If it's the fragment holding the previous instruction but its 422 // size changed since the previous instruction was emitted into 423 // it, returns true. 424 // - Otherwise returns false. 425 // - If the fragment is not a DataFragment, returns false. 426 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) 427 return DF != PrevInstPosition.first || 428 DF->getContents().size() != PrevInstPosition.second; 429 430 return false; 431 } 432 433 /// \returns the fragment size if it has instructions, otherwise returns 0. 434 static size_t getSizeForInstFragment(const MCFragment *F) { 435 if (!F || !F->hasInstructions()) 436 return 0; 437 // MCEncodedFragmentWithContents being templated makes this tricky. 438 switch (F->getKind()) { 439 default: 440 llvm_unreachable("Unknown fragment with instructions!"); 441 case MCFragment::FT_Data: 442 return cast<MCDataFragment>(*F).getContents().size(); 443 case MCFragment::FT_Relaxable: 444 return cast<MCRelaxableFragment>(*F).getContents().size(); 445 case MCFragment::FT_CompactEncodedInst: 446 return cast<MCCompactEncodedInstFragment>(*F).getContents().size(); 447 } 448 } 449 450 /// Return true if we can insert NOP or prefixes automatically before the 451 /// the instruction to be emitted. 452 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { 453 if (hasVariantSymbol(Inst)) 454 // Linker may rewrite the instruction with variant symbol operand(e.g. 455 // TLSCALL). 456 return false; 457 458 if (hasInterruptDelaySlot(PrevInst)) 459 // If this instruction follows an interrupt enabling instruction with a one 460 // instruction delay, inserting a nop would change behavior. 461 return false; 462 463 if (isPrefix(PrevInst, *MCII)) 464 // If this instruction follows a prefix, inserting a nop/prefix would change 465 // semantic. 466 return false; 467 468 if (isPrefix(Inst, *MCII)) 469 // If this instruction is a prefix, inserting a prefix would change 470 // semantic. 471 return false; 472 473 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) 474 // If this instruction follows any data, there is no clear 475 // instruction boundary, inserting a nop/prefix would change semantic. 476 return false; 477 478 return true; 479 } 480 481 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { 482 if (!OS.getAllowAutoPadding()) 483 return false; 484 assert(allowAutoPadding() && "incorrect initialization!"); 485 486 // We only pad in text section. 487 if (!OS.getCurrentSectionOnly()->getKind().isText()) 488 return false; 489 490 // To be Done: Currently don't deal with Bundle cases. 491 if (OS.getAssembler().isBundlingEnabled()) 492 return false; 493 494 // Branches only need to be aligned in 32-bit or 64-bit mode. 495 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit))) 496 return false; 497 498 return true; 499 } 500 501 /// Check if the instruction operand needs to be aligned. 502 bool X86AsmBackend::needAlign(const MCInst &Inst) const { 503 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 504 return (Desc.isConditionalBranch() && 505 (AlignBranchType & X86::AlignBranchJcc)) || 506 (Desc.isUnconditionalBranch() && 507 (AlignBranchType & X86::AlignBranchJmp)) || 508 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || 509 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || 510 (Desc.isIndirectBranch() && 511 (AlignBranchType & X86::AlignBranchIndirect)); 512 } 513 514 /// Insert BoundaryAlignFragment before instructions to align branches. 515 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, 516 const MCInst &Inst, const MCSubtargetInfo &STI) { 517 CanPadInst = canPadInst(Inst, OS); 518 519 if (!canPadBranches(OS)) 520 return; 521 522 if (!isMacroFused(PrevInst, Inst)) 523 // Macro fusion doesn't happen indeed, clear the pending. 524 PendingBA = nullptr; 525 526 if (!CanPadInst) 527 return; 528 529 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) { 530 // Macro fusion actually happens and there is no other fragment inserted 531 // after the previous instruction. 532 // 533 // Do nothing here since we already inserted a BoudaryAlign fragment when 534 // we met the first instruction in the fused pair and we'll tie them 535 // together in emitInstructionEnd. 536 // 537 // Note: When there is at least one fragment, such as MCAlignFragment, 538 // inserted after the previous instruction, e.g. 539 // 540 // \code 541 // cmp %rax %rcx 542 // .align 16 543 // je .Label0 544 // \ endcode 545 // 546 // We will treat the JCC as a unfused branch although it may be fused 547 // with the CMP. 548 return; 549 } 550 551 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && 552 isFirstMacroFusibleInst(Inst, *MCII))) { 553 // If we meet a unfused branch or the first instuction in a fusiable pair, 554 // insert a BoundaryAlign fragment. 555 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI)); 556 } 557 } 558 559 /// Set the last fragment to be aligned for the BoundaryAlignFragment. 560 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { 561 PrevInst = Inst; 562 MCFragment *CF = OS.getCurrentFragment(); 563 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); 564 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) 565 F->setAllowAutoPadding(CanPadInst); 566 567 if (!canPadBranches(OS)) 568 return; 569 570 if (!needAlign(Inst) || !PendingBA) 571 return; 572 573 // Tie the aligned instructions into a pending BoundaryAlign. 574 PendingBA->setLastFragment(CF); 575 PendingBA = nullptr; 576 577 // We need to ensure that further data isn't added to the current 578 // DataFragment, so that we can get the size of instructions later in 579 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty 580 // DataFragment. 581 if (isa_and_nonnull<MCDataFragment>(CF)) 582 OS.insert(new MCDataFragment()); 583 584 // Update the maximum alignment on the current section if necessary. 585 MCSection *Sec = OS.getCurrentSectionOnly(); 586 Sec->ensureMinAlignment(AlignBoundary); 587 } 588 589 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { 590 if (STI.getTargetTriple().isOSBinFormatELF()) { 591 unsigned Type; 592 if (STI.getTargetTriple().getArch() == Triple::x86_64) { 593 Type = llvm::StringSwitch<unsigned>(Name) 594 #define ELF_RELOC(X, Y) .Case(#X, Y) 595 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def" 596 #undef ELF_RELOC 597 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE) 598 .Case("BFD_RELOC_8", ELF::R_X86_64_8) 599 .Case("BFD_RELOC_16", ELF::R_X86_64_16) 600 .Case("BFD_RELOC_32", ELF::R_X86_64_32) 601 .Case("BFD_RELOC_64", ELF::R_X86_64_64) 602 .Default(-1u); 603 } else { 604 Type = llvm::StringSwitch<unsigned>(Name) 605 #define ELF_RELOC(X, Y) .Case(#X, Y) 606 #include "llvm/BinaryFormat/ELFRelocs/i386.def" 607 #undef ELF_RELOC 608 .Case("BFD_RELOC_NONE", ELF::R_386_NONE) 609 .Case("BFD_RELOC_8", ELF::R_386_8) 610 .Case("BFD_RELOC_16", ELF::R_386_16) 611 .Case("BFD_RELOC_32", ELF::R_386_32) 612 .Default(-1u); 613 } 614 if (Type == -1u) 615 return std::nullopt; 616 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); 617 } 618 return MCAsmBackend::getFixupKind(Name); 619 } 620 621 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { 622 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { 623 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 624 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 625 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 626 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 627 {"reloc_signed_4byte", 0, 32, 0}, 628 {"reloc_signed_4byte_relax", 0, 32, 0}, 629 {"reloc_global_offset_table", 0, 32, 0}, 630 {"reloc_global_offset_table8", 0, 64, 0}, 631 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 632 }; 633 634 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They 635 // do not require any extra processing. 636 if (Kind >= FirstLiteralRelocationKind) 637 return MCAsmBackend::getFixupKindInfo(FK_NONE); 638 639 if (Kind < FirstTargetFixupKind) 640 return MCAsmBackend::getFixupKindInfo(Kind); 641 642 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && 643 "Invalid kind!"); 644 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!"); 645 return Infos[Kind - FirstTargetFixupKind]; 646 } 647 648 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, 649 const MCFixup &Fixup, const MCValue &, 650 const MCSubtargetInfo *STI) { 651 return Fixup.getKind() >= FirstLiteralRelocationKind; 652 } 653 654 static unsigned getFixupKindSize(unsigned Kind) { 655 switch (Kind) { 656 default: 657 llvm_unreachable("invalid fixup kind!"); 658 case FK_NONE: 659 return 0; 660 case FK_PCRel_1: 661 case FK_SecRel_1: 662 case FK_Data_1: 663 return 1; 664 case FK_PCRel_2: 665 case FK_SecRel_2: 666 case FK_Data_2: 667 return 2; 668 case FK_PCRel_4: 669 case X86::reloc_riprel_4byte: 670 case X86::reloc_riprel_4byte_relax: 671 case X86::reloc_riprel_4byte_relax_rex: 672 case X86::reloc_riprel_4byte_movq_load: 673 case X86::reloc_signed_4byte: 674 case X86::reloc_signed_4byte_relax: 675 case X86::reloc_global_offset_table: 676 case X86::reloc_branch_4byte_pcrel: 677 case FK_SecRel_4: 678 case FK_Data_4: 679 return 4; 680 case FK_PCRel_8: 681 case FK_SecRel_8: 682 case FK_Data_8: 683 case X86::reloc_global_offset_table8: 684 return 8; 685 } 686 } 687 688 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 689 const MCValue &Target, 690 MutableArrayRef<char> Data, 691 uint64_t Value, bool IsResolved, 692 const MCSubtargetInfo *STI) const { 693 unsigned Kind = Fixup.getKind(); 694 if (Kind >= FirstLiteralRelocationKind) 695 return; 696 unsigned Size = getFixupKindSize(Kind); 697 698 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); 699 700 int64_t SignedValue = static_cast<int64_t>(Value); 701 if ((Target.isAbsolute() || IsResolved) && 702 getFixupKindInfo(Fixup.getKind()).Flags & 703 MCFixupKindInfo::FKF_IsPCRel) { 704 // check that PC relative fixup fits into the fixup size. 705 if (Size > 0 && !isIntN(Size * 8, SignedValue)) 706 Asm.getContext().reportError( 707 Fixup.getLoc(), "value of " + Twine(SignedValue) + 708 " is too large for field of " + Twine(Size) + 709 ((Size == 1) ? " byte." : " bytes.")); 710 } else { 711 // Check that uppper bits are either all zeros or all ones. 712 // Specifically ignore overflow/underflow as long as the leakage is 713 // limited to the lower bits. This is to remain compatible with 714 // other assemblers. 715 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && 716 "Value does not fit in the Fixup field"); 717 } 718 719 for (unsigned i = 0; i != Size; ++i) 720 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); 721 } 722 723 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI, 724 const MCSubtargetInfo &STI) const { 725 unsigned Opcode = MI.getOpcode(); 726 return isRelaxableBranch(Opcode) || 727 (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode && 728 MI.getOperand(MI.getNumOperands() - 1).isExpr()); 729 } 730 731 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, 732 uint64_t Value, 733 const MCRelaxableFragment *DF, 734 const MCAsmLayout &Layout) const { 735 // Relax if the value is too big for a (signed) i8. 736 return !isInt<8>(Value); 737 } 738 739 // FIXME: Can tblgen help at all here to verify there aren't other instructions 740 // we can relax? 741 void X86AsmBackend::relaxInstruction(MCInst &Inst, 742 const MCSubtargetInfo &STI) const { 743 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. 744 bool Is16BitMode = STI.hasFeature(X86::Is16Bit); 745 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); 746 747 if (RelaxedOp == Inst.getOpcode()) { 748 SmallString<256> Tmp; 749 raw_svector_ostream OS(Tmp); 750 Inst.dump_pretty(OS); 751 OS << "\n"; 752 report_fatal_error("unexpected instruction to relax: " + OS.str()); 753 } 754 755 Inst.setOpcode(RelaxedOp); 756 } 757 758 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, 759 MCCodeEmitter &Emitter, 760 unsigned &RemainingSize) const { 761 if (!RF.getAllowAutoPadding()) 762 return false; 763 // If the instruction isn't fully relaxed, shifting it around might require a 764 // larger value for one of the fixups then can be encoded. The outer loop 765 // will also catch this before moving to the next instruction, but we need to 766 // prevent padding this single instruction as well. 767 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 768 return false; 769 770 const unsigned OldSize = RF.getContents().size(); 771 if (OldSize == 15) 772 return false; 773 774 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); 775 const unsigned RemainingPrefixSize = [&]() -> unsigned { 776 SmallString<15> Code; 777 Emitter.emitPrefix(RF.getInst(), Code, STI); 778 assert(Code.size() < 15 && "The number of prefixes must be less than 15."); 779 780 // TODO: It turns out we need a decent amount of plumbing for the target 781 // specific bits to determine number of prefixes its safe to add. Various 782 // targets (older chips mostly, but also Atom family) encounter decoder 783 // stalls with too many prefixes. For testing purposes, we set the value 784 // externally for the moment. 785 unsigned ExistingPrefixSize = Code.size(); 786 if (TargetPrefixMax <= ExistingPrefixSize) 787 return 0; 788 return TargetPrefixMax - ExistingPrefixSize; 789 }(); 790 const unsigned PrefixBytesToAdd = 791 std::min(MaxPossiblePad, RemainingPrefixSize); 792 if (PrefixBytesToAdd == 0) 793 return false; 794 795 const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); 796 797 SmallString<256> Code; 798 Code.append(PrefixBytesToAdd, Prefix); 799 Code.append(RF.getContents().begin(), RF.getContents().end()); 800 RF.getContents() = Code; 801 802 // Adjust the fixups for the change in offsets 803 for (auto &F : RF.getFixups()) { 804 F.setOffset(F.getOffset() + PrefixBytesToAdd); 805 } 806 807 RemainingSize -= PrefixBytesToAdd; 808 return true; 809 } 810 811 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, 812 MCCodeEmitter &Emitter, 813 unsigned &RemainingSize) const { 814 if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 815 // TODO: There are lots of other tricks we could apply for increasing 816 // encoding size without impacting performance. 817 return false; 818 819 MCInst Relaxed = RF.getInst(); 820 relaxInstruction(Relaxed, *RF.getSubtargetInfo()); 821 822 SmallVector<MCFixup, 4> Fixups; 823 SmallString<15> Code; 824 Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo()); 825 const unsigned OldSize = RF.getContents().size(); 826 const unsigned NewSize = Code.size(); 827 assert(NewSize >= OldSize && "size decrease during relaxation?"); 828 unsigned Delta = NewSize - OldSize; 829 if (Delta > RemainingSize) 830 return false; 831 RF.setInst(Relaxed); 832 RF.getContents() = Code; 833 RF.getFixups() = Fixups; 834 RemainingSize -= Delta; 835 return true; 836 } 837 838 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, 839 MCCodeEmitter &Emitter, 840 unsigned &RemainingSize) const { 841 bool Changed = false; 842 if (RemainingSize != 0) 843 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); 844 if (RemainingSize != 0) 845 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); 846 return Changed; 847 } 848 849 void X86AsmBackend::finishLayout(MCAssembler const &Asm, 850 MCAsmLayout &Layout) const { 851 // See if we can further relax some instructions to cut down on the number of 852 // nop bytes required for code alignment. The actual win is in reducing 853 // instruction count, not number of bytes. Modern X86-64 can easily end up 854 // decode limited. It is often better to reduce the number of instructions 855 // (i.e. eliminate nops) even at the cost of increasing the size and 856 // complexity of others. 857 if (!X86PadForAlign && !X86PadForBranchAlign) 858 return; 859 860 // The processed regions are delimitered by LabeledFragments. -g may have more 861 // MCSymbols and therefore different relaxation results. X86PadForAlign is 862 // disabled by default to eliminate the -g vs non -g difference. 863 DenseSet<MCFragment *> LabeledFragments; 864 for (const MCSymbol &S : Asm.symbols()) 865 LabeledFragments.insert(S.getFragment(false)); 866 867 for (MCSection &Sec : Asm) { 868 if (!Sec.getKind().isText()) 869 continue; 870 871 SmallVector<MCRelaxableFragment *, 4> Relaxable; 872 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { 873 MCFragment &F = *I; 874 875 if (LabeledFragments.count(&F)) 876 Relaxable.clear(); 877 878 if (F.getKind() == MCFragment::FT_Data || 879 F.getKind() == MCFragment::FT_CompactEncodedInst) 880 // Skip and ignore 881 continue; 882 883 if (F.getKind() == MCFragment::FT_Relaxable) { 884 auto &RF = cast<MCRelaxableFragment>(*I); 885 Relaxable.push_back(&RF); 886 continue; 887 } 888 889 auto canHandle = [](MCFragment &F) -> bool { 890 switch (F.getKind()) { 891 default: 892 return false; 893 case MCFragment::FT_Align: 894 return X86PadForAlign; 895 case MCFragment::FT_BoundaryAlign: 896 return X86PadForBranchAlign; 897 } 898 }; 899 // For any unhandled kind, assume we can't change layout. 900 if (!canHandle(F)) { 901 Relaxable.clear(); 902 continue; 903 } 904 905 #ifndef NDEBUG 906 const uint64_t OrigOffset = Layout.getFragmentOffset(&F); 907 #endif 908 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F); 909 910 // To keep the effects local, prefer to relax instructions closest to 911 // the align directive. This is purely about human understandability 912 // of the resulting code. If we later find a reason to expand 913 // particular instructions over others, we can adjust. 914 MCFragment *FirstChangedFragment = nullptr; 915 unsigned RemainingSize = OrigSize; 916 while (!Relaxable.empty() && RemainingSize != 0) { 917 auto &RF = *Relaxable.pop_back_val(); 918 // Give the backend a chance to play any tricks it wishes to increase 919 // the encoding size of the given instruction. Target independent code 920 // will try further relaxation, but target's may play further tricks. 921 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) 922 FirstChangedFragment = &RF; 923 924 // If we have an instruction which hasn't been fully relaxed, we can't 925 // skip past it and insert bytes before it. Changing its starting 926 // offset might require a larger negative offset than it can encode. 927 // We don't need to worry about larger positive offsets as none of the 928 // possible offsets between this and our align are visible, and the 929 // ones afterwards aren't changing. 930 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 931 break; 932 } 933 Relaxable.clear(); 934 935 if (FirstChangedFragment) { 936 // Make sure the offsets for any fragments in the effected range get 937 // updated. Note that this (conservatively) invalidates the offsets of 938 // those following, but this is not required. 939 Layout.invalidateFragmentsFrom(FirstChangedFragment); 940 } 941 942 // BoundaryAlign explicitly tracks it's size (unlike align) 943 if (F.getKind() == MCFragment::FT_BoundaryAlign) 944 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); 945 946 #ifndef NDEBUG 947 const uint64_t FinalOffset = Layout.getFragmentOffset(&F); 948 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F); 949 assert(OrigOffset + OrigSize == FinalOffset + FinalSize && 950 "can't move start of next fragment!"); 951 assert(FinalSize == RemainingSize && "inconsistent size computation?"); 952 #endif 953 954 // If we're looking at a boundary align, make sure we don't try to pad 955 // its target instructions for some following directive. Doing so would 956 // break the alignment of the current boundary align. 957 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { 958 const MCFragment *LastFragment = BF->getLastFragment(); 959 if (!LastFragment) 960 continue; 961 while (&*I != LastFragment) 962 ++I; 963 } 964 } 965 } 966 967 // The layout is done. Mark every fragment as valid. 968 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { 969 MCSection &Section = *Layout.getSectionOrder()[i]; 970 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); 971 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); 972 } 973 } 974 975 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const { 976 if (STI.hasFeature(X86::Is16Bit)) 977 return 4; 978 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit)) 979 return 1; 980 if (STI.hasFeature(X86::TuningFast7ByteNOP)) 981 return 7; 982 if (STI.hasFeature(X86::TuningFast15ByteNOP)) 983 return 15; 984 if (STI.hasFeature(X86::TuningFast11ByteNOP)) 985 return 11; 986 // FIXME: handle 32-bit mode 987 // 15-bytes is the longest single NOP instruction, but 10-bytes is 988 // commonly the longest that can be efficiently decoded. 989 return 10; 990 } 991 992 /// Write a sequence of optimal nops to the output, covering \p Count 993 /// bytes. 994 /// \return - true on success, false on failure 995 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, 996 const MCSubtargetInfo *STI) const { 997 static const char Nops32Bit[10][11] = { 998 // nop 999 "\x90", 1000 // xchg %ax,%ax 1001 "\x66\x90", 1002 // nopl (%[re]ax) 1003 "\x0f\x1f\x00", 1004 // nopl 0(%[re]ax) 1005 "\x0f\x1f\x40\x00", 1006 // nopl 0(%[re]ax,%[re]ax,1) 1007 "\x0f\x1f\x44\x00\x00", 1008 // nopw 0(%[re]ax,%[re]ax,1) 1009 "\x66\x0f\x1f\x44\x00\x00", 1010 // nopl 0L(%[re]ax) 1011 "\x0f\x1f\x80\x00\x00\x00\x00", 1012 // nopl 0L(%[re]ax,%[re]ax,1) 1013 "\x0f\x1f\x84\x00\x00\x00\x00\x00", 1014 // nopw 0L(%[re]ax,%[re]ax,1) 1015 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", 1016 // nopw %cs:0L(%[re]ax,%[re]ax,1) 1017 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00", 1018 }; 1019 1020 // 16-bit mode uses different nop patterns than 32-bit. 1021 static const char Nops16Bit[4][11] = { 1022 // nop 1023 "\x90", 1024 // xchg %eax,%eax 1025 "\x66\x90", 1026 // lea 0(%si),%si 1027 "\x8d\x74\x00", 1028 // lea 0w(%si),%si 1029 "\x8d\xb4\x00\x00", 1030 }; 1031 1032 const char(*Nops)[11] = 1033 STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit; 1034 1035 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI); 1036 1037 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining 1038 // length. 1039 do { 1040 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); 1041 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; 1042 for (uint8_t i = 0; i < Prefixes; i++) 1043 OS << '\x66'; 1044 const uint8_t Rest = ThisNopLength - Prefixes; 1045 if (Rest != 0) 1046 OS.write(Nops[Rest - 1], Rest); 1047 Count -= ThisNopLength; 1048 } while (Count != 0); 1049 1050 return true; 1051 } 1052 1053 /* *** */ 1054 1055 namespace { 1056 1057 class ELFX86AsmBackend : public X86AsmBackend { 1058 public: 1059 uint8_t OSABI; 1060 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) 1061 : X86AsmBackend(T, STI), OSABI(OSABI) {} 1062 }; 1063 1064 class ELFX86_32AsmBackend : public ELFX86AsmBackend { 1065 public: 1066 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, 1067 const MCSubtargetInfo &STI) 1068 : ELFX86AsmBackend(T, OSABI, STI) {} 1069 1070 std::unique_ptr<MCObjectTargetWriter> 1071 createObjectTargetWriter() const override { 1072 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386); 1073 } 1074 }; 1075 1076 class ELFX86_X32AsmBackend : public ELFX86AsmBackend { 1077 public: 1078 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, 1079 const MCSubtargetInfo &STI) 1080 : ELFX86AsmBackend(T, OSABI, STI) {} 1081 1082 std::unique_ptr<MCObjectTargetWriter> 1083 createObjectTargetWriter() const override { 1084 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1085 ELF::EM_X86_64); 1086 } 1087 }; 1088 1089 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { 1090 public: 1091 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, 1092 const MCSubtargetInfo &STI) 1093 : ELFX86AsmBackend(T, OSABI, STI) {} 1094 1095 std::unique_ptr<MCObjectTargetWriter> 1096 createObjectTargetWriter() const override { 1097 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1098 ELF::EM_IAMCU); 1099 } 1100 }; 1101 1102 class ELFX86_64AsmBackend : public ELFX86AsmBackend { 1103 public: 1104 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, 1105 const MCSubtargetInfo &STI) 1106 : ELFX86AsmBackend(T, OSABI, STI) {} 1107 1108 std::unique_ptr<MCObjectTargetWriter> 1109 createObjectTargetWriter() const override { 1110 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64); 1111 } 1112 }; 1113 1114 class WindowsX86AsmBackend : public X86AsmBackend { 1115 bool Is64Bit; 1116 1117 public: 1118 WindowsX86AsmBackend(const Target &T, bool is64Bit, 1119 const MCSubtargetInfo &STI) 1120 : X86AsmBackend(T, STI) 1121 , Is64Bit(is64Bit) { 1122 } 1123 1124 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override { 1125 return StringSwitch<std::optional<MCFixupKind>>(Name) 1126 .Case("dir32", FK_Data_4) 1127 .Case("secrel32", FK_SecRel_4) 1128 .Case("secidx", FK_SecRel_2) 1129 .Default(MCAsmBackend::getFixupKind(Name)); 1130 } 1131 1132 std::unique_ptr<MCObjectTargetWriter> 1133 createObjectTargetWriter() const override { 1134 return createX86WinCOFFObjectWriter(Is64Bit); 1135 } 1136 }; 1137 1138 namespace CU { 1139 1140 /// Compact unwind encoding values. 1141 enum CompactUnwindEncodings { 1142 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after 1143 /// the return address, then [RE]SP is moved to [RE]BP. 1144 UNWIND_MODE_BP_FRAME = 0x01000000, 1145 1146 /// A frameless function with a small constant stack size. 1147 UNWIND_MODE_STACK_IMMD = 0x02000000, 1148 1149 /// A frameless function with a large constant stack size. 1150 UNWIND_MODE_STACK_IND = 0x03000000, 1151 1152 /// No compact unwind encoding is available. 1153 UNWIND_MODE_DWARF = 0x04000000, 1154 1155 /// Mask for encoding the frame registers. 1156 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, 1157 1158 /// Mask for encoding the frameless registers. 1159 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF 1160 }; 1161 1162 } // namespace CU 1163 1164 class DarwinX86AsmBackend : public X86AsmBackend { 1165 const MCRegisterInfo &MRI; 1166 1167 /// Number of registers that can be saved in a compact unwind encoding. 1168 enum { CU_NUM_SAVED_REGS = 6 }; 1169 1170 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; 1171 Triple TT; 1172 bool Is64Bit; 1173 1174 unsigned OffsetSize; ///< Offset of a "push" instruction. 1175 unsigned MoveInstrSize; ///< Size of a "move" instruction. 1176 unsigned StackDivide; ///< Amount to adjust stack size by. 1177 protected: 1178 /// Size of a "push" instruction for the given register. 1179 unsigned PushInstrSize(unsigned Reg) const { 1180 switch (Reg) { 1181 case X86::EBX: 1182 case X86::ECX: 1183 case X86::EDX: 1184 case X86::EDI: 1185 case X86::ESI: 1186 case X86::EBP: 1187 case X86::RBX: 1188 case X86::RBP: 1189 return 1; 1190 case X86::R12: 1191 case X86::R13: 1192 case X86::R14: 1193 case X86::R15: 1194 return 2; 1195 } 1196 return 1; 1197 } 1198 1199 private: 1200 /// Get the compact unwind number for a given register. The number 1201 /// corresponds to the enum lists in compact_unwind_encoding.h. 1202 int getCompactUnwindRegNum(unsigned Reg) const { 1203 static const MCPhysReg CU32BitRegs[7] = { 1204 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 1205 }; 1206 static const MCPhysReg CU64BitRegs[] = { 1207 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 1208 }; 1209 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; 1210 for (int Idx = 1; *CURegs; ++CURegs, ++Idx) 1211 if (*CURegs == Reg) 1212 return Idx; 1213 1214 return -1; 1215 } 1216 1217 /// Return the registers encoded for a compact encoding with a frame 1218 /// pointer. 1219 uint32_t encodeCompactUnwindRegistersWithFrame() const { 1220 // Encode the registers in the order they were saved --- 3-bits per 1221 // register. The list of saved registers is assumed to be in reverse 1222 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. 1223 uint32_t RegEnc = 0; 1224 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { 1225 unsigned Reg = SavedRegs[i]; 1226 if (Reg == 0) break; 1227 1228 int CURegNum = getCompactUnwindRegNum(Reg); 1229 if (CURegNum == -1) return ~0U; 1230 1231 // Encode the 3-bit register number in order, skipping over 3-bits for 1232 // each register. 1233 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); 1234 } 1235 1236 assert((RegEnc & 0x3FFFF) == RegEnc && 1237 "Invalid compact register encoding!"); 1238 return RegEnc; 1239 } 1240 1241 /// Create the permutation encoding used with frameless stacks. It is 1242 /// passed the number of registers to be saved and an array of the registers 1243 /// saved. 1244 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { 1245 // The saved registers are numbered from 1 to 6. In order to encode the 1246 // order in which they were saved, we re-number them according to their 1247 // place in the register order. The re-numbering is relative to the last 1248 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in 1249 // that order: 1250 // 1251 // Orig Re-Num 1252 // ---- ------ 1253 // 6 6 1254 // 2 2 1255 // 4 3 1256 // 5 3 1257 // 1258 for (unsigned i = 0; i < RegCount; ++i) { 1259 int CUReg = getCompactUnwindRegNum(SavedRegs[i]); 1260 if (CUReg == -1) return ~0U; 1261 SavedRegs[i] = CUReg; 1262 } 1263 1264 // Reverse the list. 1265 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); 1266 1267 uint32_t RenumRegs[CU_NUM_SAVED_REGS]; 1268 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ 1269 unsigned Countless = 0; 1270 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) 1271 if (SavedRegs[j] < SavedRegs[i]) 1272 ++Countless; 1273 1274 RenumRegs[i] = SavedRegs[i] - Countless - 1; 1275 } 1276 1277 // Take the renumbered values and encode them into a 10-bit number. 1278 uint32_t permutationEncoding = 0; 1279 switch (RegCount) { 1280 case 6: 1281 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 1282 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 1283 + RenumRegs[4]; 1284 break; 1285 case 5: 1286 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 1287 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 1288 + RenumRegs[5]; 1289 break; 1290 case 4: 1291 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 1292 + 3 * RenumRegs[4] + RenumRegs[5]; 1293 break; 1294 case 3: 1295 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 1296 + RenumRegs[5]; 1297 break; 1298 case 2: 1299 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 1300 break; 1301 case 1: 1302 permutationEncoding |= RenumRegs[5]; 1303 break; 1304 } 1305 1306 assert((permutationEncoding & 0x3FF) == permutationEncoding && 1307 "Invalid compact register encoding!"); 1308 return permutationEncoding; 1309 } 1310 1311 public: 1312 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, 1313 const MCSubtargetInfo &STI) 1314 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), 1315 Is64Bit(TT.isArch64Bit()) { 1316 memset(SavedRegs, 0, sizeof(SavedRegs)); 1317 OffsetSize = Is64Bit ? 8 : 4; 1318 MoveInstrSize = Is64Bit ? 3 : 2; 1319 StackDivide = Is64Bit ? 8 : 4; 1320 } 1321 1322 std::unique_ptr<MCObjectTargetWriter> 1323 createObjectTargetWriter() const override { 1324 uint32_t CPUType = cantFail(MachO::getCPUType(TT)); 1325 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); 1326 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); 1327 } 1328 1329 /// Implementation of algorithm to generate the compact unwind encoding 1330 /// for the CFI instructions. 1331 uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI, 1332 const MCContext *Ctxt) const override { 1333 ArrayRef<MCCFIInstruction> Instrs = FI->Instructions; 1334 if (Instrs.empty()) return 0; 1335 if (!isDarwinCanonicalPersonality(FI->Personality) && 1336 !Ctxt->emitCompactUnwindNonCanonical()) 1337 return CU::UNWIND_MODE_DWARF; 1338 1339 // Reset the saved registers. 1340 unsigned SavedRegIdx = 0; 1341 memset(SavedRegs, 0, sizeof(SavedRegs)); 1342 1343 bool HasFP = false; 1344 1345 // Encode that we are using EBP/RBP as the frame pointer. 1346 uint32_t CompactUnwindEncoding = 0; 1347 1348 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2; 1349 unsigned InstrOffset = 0; 1350 unsigned StackAdjust = 0; 1351 unsigned StackSize = 0; 1352 int MinAbsOffset = std::numeric_limits<int>::max(); 1353 1354 for (const MCCFIInstruction &Inst : Instrs) { 1355 switch (Inst.getOperation()) { 1356 default: 1357 // Any other CFI directives indicate a frame that we aren't prepared 1358 // to represent via compact unwind, so just bail out. 1359 return CU::UNWIND_MODE_DWARF; 1360 case MCCFIInstruction::OpDefCfaRegister: { 1361 // Defines a frame pointer. E.g. 1362 // 1363 // movq %rsp, %rbp 1364 // L0: 1365 // .cfi_def_cfa_register %rbp 1366 // 1367 HasFP = true; 1368 1369 // If the frame pointer is other than esp/rsp, we do not have a way to 1370 // generate a compact unwinding representation, so bail out. 1371 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != 1372 (Is64Bit ? X86::RBP : X86::EBP)) 1373 return CU::UNWIND_MODE_DWARF; 1374 1375 // Reset the counts. 1376 memset(SavedRegs, 0, sizeof(SavedRegs)); 1377 StackAdjust = 0; 1378 SavedRegIdx = 0; 1379 MinAbsOffset = std::numeric_limits<int>::max(); 1380 InstrOffset += MoveInstrSize; 1381 break; 1382 } 1383 case MCCFIInstruction::OpDefCfaOffset: { 1384 // Defines a new offset for the CFA. E.g. 1385 // 1386 // With frame: 1387 // 1388 // pushq %rbp 1389 // L0: 1390 // .cfi_def_cfa_offset 16 1391 // 1392 // Without frame: 1393 // 1394 // subq $72, %rsp 1395 // L0: 1396 // .cfi_def_cfa_offset 80 1397 // 1398 StackSize = Inst.getOffset() / StackDivide; 1399 break; 1400 } 1401 case MCCFIInstruction::OpOffset: { 1402 // Defines a "push" of a callee-saved register. E.g. 1403 // 1404 // pushq %r15 1405 // pushq %r14 1406 // pushq %rbx 1407 // L0: 1408 // subq $120, %rsp 1409 // L1: 1410 // .cfi_offset %rbx, -40 1411 // .cfi_offset %r14, -32 1412 // .cfi_offset %r15, -24 1413 // 1414 if (SavedRegIdx == CU_NUM_SAVED_REGS) 1415 // If there are too many saved registers, we cannot use a compact 1416 // unwind encoding. 1417 return CU::UNWIND_MODE_DWARF; 1418 1419 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); 1420 SavedRegs[SavedRegIdx++] = Reg; 1421 StackAdjust += OffsetSize; 1422 MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset())); 1423 InstrOffset += PushInstrSize(Reg); 1424 break; 1425 } 1426 } 1427 } 1428 1429 StackAdjust /= StackDivide; 1430 1431 if (HasFP) { 1432 if ((StackAdjust & 0xFF) != StackAdjust) 1433 // Offset was too big for a compact unwind encoding. 1434 return CU::UNWIND_MODE_DWARF; 1435 1436 // We don't attempt to track a real StackAdjust, so if the saved registers 1437 // aren't adjacent to rbp we can't cope. 1438 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize) 1439 return CU::UNWIND_MODE_DWARF; 1440 1441 // Get the encoding of the saved registers when we have a frame pointer. 1442 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); 1443 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1444 1445 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; 1446 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; 1447 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; 1448 } else { 1449 SubtractInstrIdx += InstrOffset; 1450 ++StackAdjust; 1451 1452 if ((StackSize & 0xFF) == StackSize) { 1453 // Frameless stack with a small stack size. 1454 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; 1455 1456 // Encode the stack size. 1457 CompactUnwindEncoding |= (StackSize & 0xFF) << 16; 1458 } else { 1459 if ((StackAdjust & 0x7) != StackAdjust) 1460 // The extra stack adjustments are too big for us to handle. 1461 return CU::UNWIND_MODE_DWARF; 1462 1463 // Frameless stack with an offset too large for us to encode compactly. 1464 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; 1465 1466 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 1467 // instruction. 1468 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 1469 1470 // Encode any extra stack adjustments (done via push instructions). 1471 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; 1472 } 1473 1474 // Encode the number of registers saved. (Reverse the list first.) 1475 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]); 1476 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; 1477 1478 // Get the encoding of the saved registers when we don't have a frame 1479 // pointer. 1480 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx); 1481 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1482 1483 // Encode the register encoding. 1484 CompactUnwindEncoding |= 1485 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; 1486 } 1487 1488 return CompactUnwindEncoding; 1489 } 1490 }; 1491 1492 } // end anonymous namespace 1493 1494 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, 1495 const MCSubtargetInfo &STI, 1496 const MCRegisterInfo &MRI, 1497 const MCTargetOptions &Options) { 1498 const Triple &TheTriple = STI.getTargetTriple(); 1499 if (TheTriple.isOSBinFormatMachO()) 1500 return new DarwinX86AsmBackend(T, MRI, STI); 1501 1502 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1503 return new WindowsX86AsmBackend(T, false, STI); 1504 1505 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1506 1507 if (TheTriple.isOSIAMCU()) 1508 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); 1509 1510 return new ELFX86_32AsmBackend(T, OSABI, STI); 1511 } 1512 1513 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, 1514 const MCSubtargetInfo &STI, 1515 const MCRegisterInfo &MRI, 1516 const MCTargetOptions &Options) { 1517 const Triple &TheTriple = STI.getTargetTriple(); 1518 if (TheTriple.isOSBinFormatMachO()) 1519 return new DarwinX86AsmBackend(T, MRI, STI); 1520 1521 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1522 return new WindowsX86AsmBackend(T, true, STI); 1523 1524 if (TheTriple.isUEFI()) { 1525 assert(TheTriple.isOSBinFormatCOFF() && 1526 "Only COFF format is supported in UEFI environment."); 1527 return new WindowsX86AsmBackend(T, true, STI); 1528 } 1529 1530 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1531 1532 if (TheTriple.isX32()) 1533 return new ELFX86_X32AsmBackend(T, OSABI, STI); 1534 return new ELFX86_64AsmBackend(T, OSABI, STI); 1535 } 1536