1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86FixupKinds.h" 11 #include "MCTargetDesc/X86EncodingOptimization.h" 12 #include "llvm/ADT/StringSwitch.h" 13 #include "llvm/BinaryFormat/ELF.h" 14 #include "llvm/BinaryFormat/MachO.h" 15 #include "llvm/MC/MCAsmBackend.h" 16 #include "llvm/MC/MCAsmLayout.h" 17 #include "llvm/MC/MCAssembler.h" 18 #include "llvm/MC/MCCodeEmitter.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDwarf.h" 21 #include "llvm/MC/MCELFObjectWriter.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCFixupKindInfo.h" 24 #include "llvm/MC/MCInst.h" 25 #include "llvm/MC/MCInstrInfo.h" 26 #include "llvm/MC/MCMachObjectWriter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSectionMachO.h" 31 #include "llvm/MC/MCSubtargetInfo.h" 32 #include "llvm/MC/MCValue.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 using namespace llvm; 39 40 namespace { 41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind 42 class X86AlignBranchKind { 43 private: 44 uint8_t AlignBranchKind = 0; 45 46 public: 47 void operator=(const std::string &Val) { 48 if (Val.empty()) 49 return; 50 SmallVector<StringRef, 6> BranchTypes; 51 StringRef(Val).split(BranchTypes, '+', -1, false); 52 for (auto BranchType : BranchTypes) { 53 if (BranchType == "fused") 54 addKind(X86::AlignBranchFused); 55 else if (BranchType == "jcc") 56 addKind(X86::AlignBranchJcc); 57 else if (BranchType == "jmp") 58 addKind(X86::AlignBranchJmp); 59 else if (BranchType == "call") 60 addKind(X86::AlignBranchCall); 61 else if (BranchType == "ret") 62 addKind(X86::AlignBranchRet); 63 else if (BranchType == "indirect") 64 addKind(X86::AlignBranchIndirect); 65 else { 66 errs() << "invalid argument " << BranchType.str() 67 << " to -x86-align-branch=; each element must be one of: fused, " 68 "jcc, jmp, call, ret, indirect.(plus separated)\n"; 69 } 70 } 71 } 72 73 operator uint8_t() const { return AlignBranchKind; } 74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; } 75 }; 76 77 X86AlignBranchKind X86AlignBranchKindLoc; 78 79 cl::opt<unsigned> X86AlignBranchBoundary( 80 "x86-align-branch-boundary", cl::init(0), 81 cl::desc( 82 "Control how the assembler should align branches with NOP. If the " 83 "boundary's size is not 0, it should be a power of 2 and no less " 84 "than 32. Branches will be aligned to prevent from being across or " 85 "against the boundary of specified size. The default value 0 does not " 86 "align branches.")); 87 88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( 89 "x86-align-branch", 90 cl::desc( 91 "Specify types of branches to align (plus separated list of types):" 92 "\njcc indicates conditional jumps" 93 "\nfused indicates fused conditional jumps" 94 "\njmp indicates direct unconditional jumps" 95 "\ncall indicates direct and indirect calls" 96 "\nret indicates rets" 97 "\nindirect indicates indirect unconditional jumps"), 98 cl::location(X86AlignBranchKindLoc)); 99 100 cl::opt<bool> X86AlignBranchWithin32BBoundaries( 101 "x86-branches-within-32B-boundaries", cl::init(false), 102 cl::desc( 103 "Align selected instructions to mitigate negative performance impact " 104 "of Intel's micro code update for errata skx102. May break " 105 "assumptions about labels corresponding to particular instructions, " 106 "and should be used with caution.")); 107 108 cl::opt<unsigned> X86PadMaxPrefixSize( 109 "x86-pad-max-prefix-size", cl::init(0), 110 cl::desc("Maximum number of prefixes to use for padding")); 111 112 cl::opt<bool> X86PadForAlign( 113 "x86-pad-for-align", cl::init(false), cl::Hidden, 114 cl::desc("Pad previous instructions to implement align directives")); 115 116 cl::opt<bool> X86PadForBranchAlign( 117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden, 118 cl::desc("Pad previous instructions to implement branch alignment")); 119 120 class X86AsmBackend : public MCAsmBackend { 121 const MCSubtargetInfo &STI; 122 std::unique_ptr<const MCInstrInfo> MCII; 123 X86AlignBranchKind AlignBranchType; 124 Align AlignBoundary; 125 unsigned TargetPrefixMax = 0; 126 127 MCInst PrevInst; 128 MCBoundaryAlignFragment *PendingBA = nullptr; 129 std::pair<MCFragment *, size_t> PrevInstPosition; 130 bool CanPadInst = false; 131 132 uint8_t determinePaddingPrefix(const MCInst &Inst) const; 133 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; 134 bool needAlign(const MCInst &Inst) const; 135 bool canPadBranches(MCObjectStreamer &OS) const; 136 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; 137 138 public: 139 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) 140 : MCAsmBackend(support::little), STI(STI), 141 MCII(T.createMCInstrInfo()) { 142 if (X86AlignBranchWithin32BBoundaries) { 143 // At the moment, this defaults to aligning fused branches, unconditional 144 // jumps, and (unfused) conditional jumps with nops. Both the 145 // instructions aligned and the alignment method (nop vs prefix) may 146 // change in the future. 147 AlignBoundary = assumeAligned(32); 148 AlignBranchType.addKind(X86::AlignBranchFused); 149 AlignBranchType.addKind(X86::AlignBranchJcc); 150 AlignBranchType.addKind(X86::AlignBranchJmp); 151 } 152 // Allow overriding defaults set by main flag 153 if (X86AlignBranchBoundary.getNumOccurrences()) 154 AlignBoundary = assumeAligned(X86AlignBranchBoundary); 155 if (X86AlignBranch.getNumOccurrences()) 156 AlignBranchType = X86AlignBranchKindLoc; 157 if (X86PadMaxPrefixSize.getNumOccurrences()) 158 TargetPrefixMax = X86PadMaxPrefixSize; 159 } 160 161 bool allowAutoPadding() const override; 162 bool allowEnhancedRelaxation() const override; 163 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst, 164 const MCSubtargetInfo &STI) override; 165 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override; 166 167 unsigned getNumFixupKinds() const override { 168 return X86::NumTargetFixupKinds; 169 } 170 171 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override; 172 173 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; 174 175 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, 176 const MCValue &Target) override; 177 178 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 179 const MCValue &Target, MutableArrayRef<char> Data, 180 uint64_t Value, bool IsResolved, 181 const MCSubtargetInfo *STI) const override; 182 183 bool mayNeedRelaxation(const MCInst &Inst, 184 const MCSubtargetInfo &STI) const override; 185 186 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, 187 const MCRelaxableFragment *DF, 188 const MCAsmLayout &Layout) const override; 189 190 void relaxInstruction(MCInst &Inst, 191 const MCSubtargetInfo &STI) const override; 192 193 bool padInstructionViaRelaxation(MCRelaxableFragment &RF, 194 MCCodeEmitter &Emitter, 195 unsigned &RemainingSize) const; 196 197 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 198 unsigned &RemainingSize) const; 199 200 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 201 unsigned &RemainingSize) const; 202 203 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; 204 205 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override; 206 207 bool writeNopData(raw_ostream &OS, uint64_t Count, 208 const MCSubtargetInfo *STI) const override; 209 }; 210 } // end anonymous namespace 211 212 static bool isRelaxableBranch(unsigned Opcode) { 213 return Opcode == X86::JCC_1 || Opcode == X86::JMP_1; 214 } 215 216 static unsigned getRelaxedOpcodeBranch(unsigned Opcode, 217 bool Is16BitMode = false) { 218 switch (Opcode) { 219 default: 220 llvm_unreachable("invalid opcode for branch"); 221 case X86::JCC_1: 222 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; 223 case X86::JMP_1: 224 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; 225 } 226 } 227 228 static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) { 229 unsigned Opcode = MI.getOpcode(); 230 return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode) 231 : X86::getOpcodeForLongImmediateForm(Opcode); 232 } 233 234 static X86::CondCode getCondFromBranch(const MCInst &MI, 235 const MCInstrInfo &MCII) { 236 unsigned Opcode = MI.getOpcode(); 237 switch (Opcode) { 238 default: 239 return X86::COND_INVALID; 240 case X86::JCC_1: { 241 const MCInstrDesc &Desc = MCII.get(Opcode); 242 return static_cast<X86::CondCode>( 243 MI.getOperand(Desc.getNumOperands() - 1).getImm()); 244 } 245 } 246 } 247 248 static X86::SecondMacroFusionInstKind 249 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { 250 X86::CondCode CC = getCondFromBranch(MI, MCII); 251 return classifySecondCondCodeInMacroFusion(CC); 252 } 253 254 /// Check if the instruction uses RIP relative addressing. 255 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { 256 unsigned Opcode = MI.getOpcode(); 257 const MCInstrDesc &Desc = MCII.get(Opcode); 258 uint64_t TSFlags = Desc.TSFlags; 259 unsigned CurOp = X86II::getOperandBias(Desc); 260 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 261 if (MemoryOperand < 0) 262 return false; 263 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; 264 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); 265 return (BaseReg == X86::RIP); 266 } 267 268 /// Check if the instruction is a prefix. 269 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) { 270 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags); 271 } 272 273 /// Check if the instruction is valid as the first instruction in macro fusion. 274 static bool isFirstMacroFusibleInst(const MCInst &Inst, 275 const MCInstrInfo &MCII) { 276 // An Intel instruction with RIP relative addressing is not macro fusible. 277 if (isRIPRelative(Inst, MCII)) 278 return false; 279 X86::FirstMacroFusionInstKind FIK = 280 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); 281 return FIK != X86::FirstMacroFusionInstKind::Invalid; 282 } 283 284 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to 285 /// get a better peformance in some cases. Here, we determine which prefix is 286 /// the most suitable. 287 /// 288 /// If the instruction has a segment override prefix, use the existing one. 289 /// If the target is 64-bit, use the CS. 290 /// If the target is 32-bit, 291 /// - If the instruction has a ESP/EBP base register, use SS. 292 /// - Otherwise use DS. 293 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { 294 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) && 295 "Prefixes can be added only in 32-bit or 64-bit mode."); 296 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 297 uint64_t TSFlags = Desc.TSFlags; 298 299 // Determine where the memory operand starts, if present. 300 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 301 if (MemoryOperand != -1) 302 MemoryOperand += X86II::getOperandBias(Desc); 303 304 unsigned SegmentReg = 0; 305 if (MemoryOperand >= 0) { 306 // Check for explicit segment override on memory operand. 307 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); 308 } 309 310 switch (TSFlags & X86II::FormMask) { 311 default: 312 break; 313 case X86II::RawFrmDstSrc: { 314 // Check segment override opcode prefix as needed (not for %ds). 315 if (Inst.getOperand(2).getReg() != X86::DS) 316 SegmentReg = Inst.getOperand(2).getReg(); 317 break; 318 } 319 case X86II::RawFrmSrc: { 320 // Check segment override opcode prefix as needed (not for %ds). 321 if (Inst.getOperand(1).getReg() != X86::DS) 322 SegmentReg = Inst.getOperand(1).getReg(); 323 break; 324 } 325 case X86II::RawFrmMemOffs: { 326 // Check segment override opcode prefix as needed. 327 SegmentReg = Inst.getOperand(1).getReg(); 328 break; 329 } 330 } 331 332 if (SegmentReg != 0) 333 return X86::getSegmentOverridePrefixForReg(SegmentReg); 334 335 if (STI.hasFeature(X86::Is64Bit)) 336 return X86::CS_Encoding; 337 338 if (MemoryOperand >= 0) { 339 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; 340 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); 341 if (BaseReg == X86::ESP || BaseReg == X86::EBP) 342 return X86::SS_Encoding; 343 } 344 return X86::DS_Encoding; 345 } 346 347 /// Check if the two instructions will be macro-fused on the target cpu. 348 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { 349 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); 350 if (!InstDesc.isConditionalBranch()) 351 return false; 352 if (!isFirstMacroFusibleInst(Cmp, *MCII)) 353 return false; 354 const X86::FirstMacroFusionInstKind CmpKind = 355 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); 356 const X86::SecondMacroFusionInstKind BranchKind = 357 classifySecondInstInMacroFusion(Jcc, *MCII); 358 return X86::isMacroFused(CmpKind, BranchKind); 359 } 360 361 /// Check if the instruction has a variant symbol operand. 362 static bool hasVariantSymbol(const MCInst &MI) { 363 for (auto &Operand : MI) { 364 if (!Operand.isExpr()) 365 continue; 366 const MCExpr &Expr = *Operand.getExpr(); 367 if (Expr.getKind() == MCExpr::SymbolRef && 368 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None) 369 return true; 370 } 371 return false; 372 } 373 374 bool X86AsmBackend::allowAutoPadding() const { 375 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); 376 } 377 378 bool X86AsmBackend::allowEnhancedRelaxation() const { 379 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; 380 } 381 382 /// X86 has certain instructions which enable interrupts exactly one 383 /// instruction *after* the instruction which stores to SS. Return true if the 384 /// given instruction has such an interrupt delay slot. 385 static bool hasInterruptDelaySlot(const MCInst &Inst) { 386 switch (Inst.getOpcode()) { 387 case X86::POPSS16: 388 case X86::POPSS32: 389 case X86::STI: 390 return true; 391 392 case X86::MOV16sr: 393 case X86::MOV32sr: 394 case X86::MOV64sr: 395 case X86::MOV16sm: 396 if (Inst.getOperand(0).getReg() == X86::SS) 397 return true; 398 break; 399 } 400 return false; 401 } 402 403 /// Check if the instruction to be emitted is right after any data. 404 static bool 405 isRightAfterData(MCFragment *CurrentFragment, 406 const std::pair<MCFragment *, size_t> &PrevInstPosition) { 407 MCFragment *F = CurrentFragment; 408 // Empty data fragments may be created to prevent further data being 409 // added into the previous fragment, we need to skip them since they 410 // have no contents. 411 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode()) 412 if (cast<MCDataFragment>(F)->getContents().size() != 0) 413 break; 414 415 // Since data is always emitted into a DataFragment, our check strategy is 416 // simple here. 417 // - If the fragment is a DataFragment 418 // - If it's not the fragment where the previous instruction is, 419 // returns true. 420 // - If it's the fragment holding the previous instruction but its 421 // size changed since the the previous instruction was emitted into 422 // it, returns true. 423 // - Otherwise returns false. 424 // - If the fragment is not a DataFragment, returns false. 425 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) 426 return DF != PrevInstPosition.first || 427 DF->getContents().size() != PrevInstPosition.second; 428 429 return false; 430 } 431 432 /// \returns the fragment size if it has instructions, otherwise returns 0. 433 static size_t getSizeForInstFragment(const MCFragment *F) { 434 if (!F || !F->hasInstructions()) 435 return 0; 436 // MCEncodedFragmentWithContents being templated makes this tricky. 437 switch (F->getKind()) { 438 default: 439 llvm_unreachable("Unknown fragment with instructions!"); 440 case MCFragment::FT_Data: 441 return cast<MCDataFragment>(*F).getContents().size(); 442 case MCFragment::FT_Relaxable: 443 return cast<MCRelaxableFragment>(*F).getContents().size(); 444 case MCFragment::FT_CompactEncodedInst: 445 return cast<MCCompactEncodedInstFragment>(*F).getContents().size(); 446 } 447 } 448 449 /// Return true if we can insert NOP or prefixes automatically before the 450 /// the instruction to be emitted. 451 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { 452 if (hasVariantSymbol(Inst)) 453 // Linker may rewrite the instruction with variant symbol operand(e.g. 454 // TLSCALL). 455 return false; 456 457 if (hasInterruptDelaySlot(PrevInst)) 458 // If this instruction follows an interrupt enabling instruction with a one 459 // instruction delay, inserting a nop would change behavior. 460 return false; 461 462 if (isPrefix(PrevInst, *MCII)) 463 // If this instruction follows a prefix, inserting a nop/prefix would change 464 // semantic. 465 return false; 466 467 if (isPrefix(Inst, *MCII)) 468 // If this instruction is a prefix, inserting a prefix would change 469 // semantic. 470 return false; 471 472 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) 473 // If this instruction follows any data, there is no clear 474 // instruction boundary, inserting a nop/prefix would change semantic. 475 return false; 476 477 return true; 478 } 479 480 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { 481 if (!OS.getAllowAutoPadding()) 482 return false; 483 assert(allowAutoPadding() && "incorrect initialization!"); 484 485 // We only pad in text section. 486 if (!OS.getCurrentSectionOnly()->getKind().isText()) 487 return false; 488 489 // To be Done: Currently don't deal with Bundle cases. 490 if (OS.getAssembler().isBundlingEnabled()) 491 return false; 492 493 // Branches only need to be aligned in 32-bit or 64-bit mode. 494 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit))) 495 return false; 496 497 return true; 498 } 499 500 /// Check if the instruction operand needs to be aligned. 501 bool X86AsmBackend::needAlign(const MCInst &Inst) const { 502 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 503 return (Desc.isConditionalBranch() && 504 (AlignBranchType & X86::AlignBranchJcc)) || 505 (Desc.isUnconditionalBranch() && 506 (AlignBranchType & X86::AlignBranchJmp)) || 507 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || 508 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || 509 (Desc.isIndirectBranch() && 510 (AlignBranchType & X86::AlignBranchIndirect)); 511 } 512 513 /// Insert BoundaryAlignFragment before instructions to align branches. 514 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, 515 const MCInst &Inst, const MCSubtargetInfo &STI) { 516 CanPadInst = canPadInst(Inst, OS); 517 518 if (!canPadBranches(OS)) 519 return; 520 521 if (!isMacroFused(PrevInst, Inst)) 522 // Macro fusion doesn't happen indeed, clear the pending. 523 PendingBA = nullptr; 524 525 if (!CanPadInst) 526 return; 527 528 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) { 529 // Macro fusion actually happens and there is no other fragment inserted 530 // after the previous instruction. 531 // 532 // Do nothing here since we already inserted a BoudaryAlign fragment when 533 // we met the first instruction in the fused pair and we'll tie them 534 // together in emitInstructionEnd. 535 // 536 // Note: When there is at least one fragment, such as MCAlignFragment, 537 // inserted after the previous instruction, e.g. 538 // 539 // \code 540 // cmp %rax %rcx 541 // .align 16 542 // je .Label0 543 // \ endcode 544 // 545 // We will treat the JCC as a unfused branch although it may be fused 546 // with the CMP. 547 return; 548 } 549 550 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && 551 isFirstMacroFusibleInst(Inst, *MCII))) { 552 // If we meet a unfused branch or the first instuction in a fusiable pair, 553 // insert a BoundaryAlign fragment. 554 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI)); 555 } 556 } 557 558 /// Set the last fragment to be aligned for the BoundaryAlignFragment. 559 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { 560 PrevInst = Inst; 561 MCFragment *CF = OS.getCurrentFragment(); 562 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); 563 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) 564 F->setAllowAutoPadding(CanPadInst); 565 566 if (!canPadBranches(OS)) 567 return; 568 569 if (!needAlign(Inst) || !PendingBA) 570 return; 571 572 // Tie the aligned instructions into a a pending BoundaryAlign. 573 PendingBA->setLastFragment(CF); 574 PendingBA = nullptr; 575 576 // We need to ensure that further data isn't added to the current 577 // DataFragment, so that we can get the size of instructions later in 578 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty 579 // DataFragment. 580 if (isa_and_nonnull<MCDataFragment>(CF)) 581 OS.insert(new MCDataFragment()); 582 583 // Update the maximum alignment on the current section if necessary. 584 MCSection *Sec = OS.getCurrentSectionOnly(); 585 Sec->ensureMinAlignment(AlignBoundary); 586 } 587 588 std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { 589 if (STI.getTargetTriple().isOSBinFormatELF()) { 590 unsigned Type; 591 if (STI.getTargetTriple().getArch() == Triple::x86_64) { 592 Type = llvm::StringSwitch<unsigned>(Name) 593 #define ELF_RELOC(X, Y) .Case(#X, Y) 594 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def" 595 #undef ELF_RELOC 596 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE) 597 .Case("BFD_RELOC_8", ELF::R_X86_64_8) 598 .Case("BFD_RELOC_16", ELF::R_X86_64_16) 599 .Case("BFD_RELOC_32", ELF::R_X86_64_32) 600 .Case("BFD_RELOC_64", ELF::R_X86_64_64) 601 .Default(-1u); 602 } else { 603 Type = llvm::StringSwitch<unsigned>(Name) 604 #define ELF_RELOC(X, Y) .Case(#X, Y) 605 #include "llvm/BinaryFormat/ELFRelocs/i386.def" 606 #undef ELF_RELOC 607 .Case("BFD_RELOC_NONE", ELF::R_386_NONE) 608 .Case("BFD_RELOC_8", ELF::R_386_8) 609 .Case("BFD_RELOC_16", ELF::R_386_16) 610 .Case("BFD_RELOC_32", ELF::R_386_32) 611 .Default(-1u); 612 } 613 if (Type == -1u) 614 return std::nullopt; 615 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); 616 } 617 return MCAsmBackend::getFixupKind(Name); 618 } 619 620 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { 621 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { 622 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 623 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 624 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 625 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 626 {"reloc_signed_4byte", 0, 32, 0}, 627 {"reloc_signed_4byte_relax", 0, 32, 0}, 628 {"reloc_global_offset_table", 0, 32, 0}, 629 {"reloc_global_offset_table8", 0, 64, 0}, 630 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 631 }; 632 633 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They 634 // do not require any extra processing. 635 if (Kind >= FirstLiteralRelocationKind) 636 return MCAsmBackend::getFixupKindInfo(FK_NONE); 637 638 if (Kind < FirstTargetFixupKind) 639 return MCAsmBackend::getFixupKindInfo(Kind); 640 641 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && 642 "Invalid kind!"); 643 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!"); 644 return Infos[Kind - FirstTargetFixupKind]; 645 } 646 647 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, 648 const MCFixup &Fixup, 649 const MCValue &) { 650 return Fixup.getKind() >= FirstLiteralRelocationKind; 651 } 652 653 static unsigned getFixupKindSize(unsigned Kind) { 654 switch (Kind) { 655 default: 656 llvm_unreachable("invalid fixup kind!"); 657 case FK_NONE: 658 return 0; 659 case FK_PCRel_1: 660 case FK_SecRel_1: 661 case FK_Data_1: 662 return 1; 663 case FK_PCRel_2: 664 case FK_SecRel_2: 665 case FK_Data_2: 666 return 2; 667 case FK_PCRel_4: 668 case X86::reloc_riprel_4byte: 669 case X86::reloc_riprel_4byte_relax: 670 case X86::reloc_riprel_4byte_relax_rex: 671 case X86::reloc_riprel_4byte_movq_load: 672 case X86::reloc_signed_4byte: 673 case X86::reloc_signed_4byte_relax: 674 case X86::reloc_global_offset_table: 675 case X86::reloc_branch_4byte_pcrel: 676 case FK_SecRel_4: 677 case FK_Data_4: 678 return 4; 679 case FK_PCRel_8: 680 case FK_SecRel_8: 681 case FK_Data_8: 682 case X86::reloc_global_offset_table8: 683 return 8; 684 } 685 } 686 687 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 688 const MCValue &Target, 689 MutableArrayRef<char> Data, 690 uint64_t Value, bool IsResolved, 691 const MCSubtargetInfo *STI) const { 692 unsigned Kind = Fixup.getKind(); 693 if (Kind >= FirstLiteralRelocationKind) 694 return; 695 unsigned Size = getFixupKindSize(Kind); 696 697 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); 698 699 int64_t SignedValue = static_cast<int64_t>(Value); 700 if ((Target.isAbsolute() || IsResolved) && 701 getFixupKindInfo(Fixup.getKind()).Flags & 702 MCFixupKindInfo::FKF_IsPCRel) { 703 // check that PC relative fixup fits into the fixup size. 704 if (Size > 0 && !isIntN(Size * 8, SignedValue)) 705 Asm.getContext().reportError( 706 Fixup.getLoc(), "value of " + Twine(SignedValue) + 707 " is too large for field of " + Twine(Size) + 708 ((Size == 1) ? " byte." : " bytes.")); 709 } else { 710 // Check that uppper bits are either all zeros or all ones. 711 // Specifically ignore overflow/underflow as long as the leakage is 712 // limited to the lower bits. This is to remain compatible with 713 // other assemblers. 714 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && 715 "Value does not fit in the Fixup field"); 716 } 717 718 for (unsigned i = 0; i != Size; ++i) 719 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); 720 } 721 722 bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI, 723 const MCSubtargetInfo &STI) const { 724 unsigned Opcode = MI.getOpcode(); 725 return isRelaxableBranch(Opcode) || 726 (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode && 727 MI.getOperand(MI.getNumOperands() - 1).isExpr()); 728 } 729 730 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, 731 uint64_t Value, 732 const MCRelaxableFragment *DF, 733 const MCAsmLayout &Layout) const { 734 // Relax if the value is too big for a (signed) i8. 735 return !isInt<8>(Value); 736 } 737 738 // FIXME: Can tblgen help at all here to verify there aren't other instructions 739 // we can relax? 740 void X86AsmBackend::relaxInstruction(MCInst &Inst, 741 const MCSubtargetInfo &STI) const { 742 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. 743 bool Is16BitMode = STI.hasFeature(X86::Is16Bit); 744 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); 745 746 if (RelaxedOp == Inst.getOpcode()) { 747 SmallString<256> Tmp; 748 raw_svector_ostream OS(Tmp); 749 Inst.dump_pretty(OS); 750 OS << "\n"; 751 report_fatal_error("unexpected instruction to relax: " + OS.str()); 752 } 753 754 Inst.setOpcode(RelaxedOp); 755 } 756 757 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, 758 MCCodeEmitter &Emitter, 759 unsigned &RemainingSize) const { 760 if (!RF.getAllowAutoPadding()) 761 return false; 762 // If the instruction isn't fully relaxed, shifting it around might require a 763 // larger value for one of the fixups then can be encoded. The outer loop 764 // will also catch this before moving to the next instruction, but we need to 765 // prevent padding this single instruction as well. 766 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 767 return false; 768 769 const unsigned OldSize = RF.getContents().size(); 770 if (OldSize == 15) 771 return false; 772 773 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); 774 const unsigned RemainingPrefixSize = [&]() -> unsigned { 775 SmallString<15> Code; 776 Emitter.emitPrefix(RF.getInst(), Code, STI); 777 assert(Code.size() < 15 && "The number of prefixes must be less than 15."); 778 779 // TODO: It turns out we need a decent amount of plumbing for the target 780 // specific bits to determine number of prefixes its safe to add. Various 781 // targets (older chips mostly, but also Atom family) encounter decoder 782 // stalls with too many prefixes. For testing purposes, we set the value 783 // externally for the moment. 784 unsigned ExistingPrefixSize = Code.size(); 785 if (TargetPrefixMax <= ExistingPrefixSize) 786 return 0; 787 return TargetPrefixMax - ExistingPrefixSize; 788 }(); 789 const unsigned PrefixBytesToAdd = 790 std::min(MaxPossiblePad, RemainingPrefixSize); 791 if (PrefixBytesToAdd == 0) 792 return false; 793 794 const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); 795 796 SmallString<256> Code; 797 Code.append(PrefixBytesToAdd, Prefix); 798 Code.append(RF.getContents().begin(), RF.getContents().end()); 799 RF.getContents() = Code; 800 801 // Adjust the fixups for the change in offsets 802 for (auto &F : RF.getFixups()) { 803 F.setOffset(F.getOffset() + PrefixBytesToAdd); 804 } 805 806 RemainingSize -= PrefixBytesToAdd; 807 return true; 808 } 809 810 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, 811 MCCodeEmitter &Emitter, 812 unsigned &RemainingSize) const { 813 if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 814 // TODO: There are lots of other tricks we could apply for increasing 815 // encoding size without impacting performance. 816 return false; 817 818 MCInst Relaxed = RF.getInst(); 819 relaxInstruction(Relaxed, *RF.getSubtargetInfo()); 820 821 SmallVector<MCFixup, 4> Fixups; 822 SmallString<15> Code; 823 Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo()); 824 const unsigned OldSize = RF.getContents().size(); 825 const unsigned NewSize = Code.size(); 826 assert(NewSize >= OldSize && "size decrease during relaxation?"); 827 unsigned Delta = NewSize - OldSize; 828 if (Delta > RemainingSize) 829 return false; 830 RF.setInst(Relaxed); 831 RF.getContents() = Code; 832 RF.getFixups() = Fixups; 833 RemainingSize -= Delta; 834 return true; 835 } 836 837 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, 838 MCCodeEmitter &Emitter, 839 unsigned &RemainingSize) const { 840 bool Changed = false; 841 if (RemainingSize != 0) 842 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); 843 if (RemainingSize != 0) 844 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); 845 return Changed; 846 } 847 848 void X86AsmBackend::finishLayout(MCAssembler const &Asm, 849 MCAsmLayout &Layout) const { 850 // See if we can further relax some instructions to cut down on the number of 851 // nop bytes required for code alignment. The actual win is in reducing 852 // instruction count, not number of bytes. Modern X86-64 can easily end up 853 // decode limited. It is often better to reduce the number of instructions 854 // (i.e. eliminate nops) even at the cost of increasing the size and 855 // complexity of others. 856 if (!X86PadForAlign && !X86PadForBranchAlign) 857 return; 858 859 // The processed regions are delimitered by LabeledFragments. -g may have more 860 // MCSymbols and therefore different relaxation results. X86PadForAlign is 861 // disabled by default to eliminate the -g vs non -g difference. 862 DenseSet<MCFragment *> LabeledFragments; 863 for (const MCSymbol &S : Asm.symbols()) 864 LabeledFragments.insert(S.getFragment(false)); 865 866 for (MCSection &Sec : Asm) { 867 if (!Sec.getKind().isText()) 868 continue; 869 870 SmallVector<MCRelaxableFragment *, 4> Relaxable; 871 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { 872 MCFragment &F = *I; 873 874 if (LabeledFragments.count(&F)) 875 Relaxable.clear(); 876 877 if (F.getKind() == MCFragment::FT_Data || 878 F.getKind() == MCFragment::FT_CompactEncodedInst) 879 // Skip and ignore 880 continue; 881 882 if (F.getKind() == MCFragment::FT_Relaxable) { 883 auto &RF = cast<MCRelaxableFragment>(*I); 884 Relaxable.push_back(&RF); 885 continue; 886 } 887 888 auto canHandle = [](MCFragment &F) -> bool { 889 switch (F.getKind()) { 890 default: 891 return false; 892 case MCFragment::FT_Align: 893 return X86PadForAlign; 894 case MCFragment::FT_BoundaryAlign: 895 return X86PadForBranchAlign; 896 } 897 }; 898 // For any unhandled kind, assume we can't change layout. 899 if (!canHandle(F)) { 900 Relaxable.clear(); 901 continue; 902 } 903 904 #ifndef NDEBUG 905 const uint64_t OrigOffset = Layout.getFragmentOffset(&F); 906 #endif 907 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F); 908 909 // To keep the effects local, prefer to relax instructions closest to 910 // the align directive. This is purely about human understandability 911 // of the resulting code. If we later find a reason to expand 912 // particular instructions over others, we can adjust. 913 MCFragment *FirstChangedFragment = nullptr; 914 unsigned RemainingSize = OrigSize; 915 while (!Relaxable.empty() && RemainingSize != 0) { 916 auto &RF = *Relaxable.pop_back_val(); 917 // Give the backend a chance to play any tricks it wishes to increase 918 // the encoding size of the given instruction. Target independent code 919 // will try further relaxation, but target's may play further tricks. 920 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) 921 FirstChangedFragment = &RF; 922 923 // If we have an instruction which hasn't been fully relaxed, we can't 924 // skip past it and insert bytes before it. Changing its starting 925 // offset might require a larger negative offset than it can encode. 926 // We don't need to worry about larger positive offsets as none of the 927 // possible offsets between this and our align are visible, and the 928 // ones afterwards aren't changing. 929 if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo())) 930 break; 931 } 932 Relaxable.clear(); 933 934 if (FirstChangedFragment) { 935 // Make sure the offsets for any fragments in the effected range get 936 // updated. Note that this (conservatively) invalidates the offsets of 937 // those following, but this is not required. 938 Layout.invalidateFragmentsFrom(FirstChangedFragment); 939 } 940 941 // BoundaryAlign explicitly tracks it's size (unlike align) 942 if (F.getKind() == MCFragment::FT_BoundaryAlign) 943 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); 944 945 #ifndef NDEBUG 946 const uint64_t FinalOffset = Layout.getFragmentOffset(&F); 947 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F); 948 assert(OrigOffset + OrigSize == FinalOffset + FinalSize && 949 "can't move start of next fragment!"); 950 assert(FinalSize == RemainingSize && "inconsistent size computation?"); 951 #endif 952 953 // If we're looking at a boundary align, make sure we don't try to pad 954 // its target instructions for some following directive. Doing so would 955 // break the alignment of the current boundary align. 956 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { 957 const MCFragment *LastFragment = BF->getLastFragment(); 958 if (!LastFragment) 959 continue; 960 while (&*I != LastFragment) 961 ++I; 962 } 963 } 964 } 965 966 // The layout is done. Mark every fragment as valid. 967 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { 968 MCSection &Section = *Layout.getSectionOrder()[i]; 969 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); 970 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); 971 } 972 } 973 974 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const { 975 if (STI.hasFeature(X86::Is16Bit)) 976 return 4; 977 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit)) 978 return 1; 979 if (STI.hasFeature(X86::TuningFast7ByteNOP)) 980 return 7; 981 if (STI.hasFeature(X86::TuningFast15ByteNOP)) 982 return 15; 983 if (STI.hasFeature(X86::TuningFast11ByteNOP)) 984 return 11; 985 // FIXME: handle 32-bit mode 986 // 15-bytes is the longest single NOP instruction, but 10-bytes is 987 // commonly the longest that can be efficiently decoded. 988 return 10; 989 } 990 991 /// Write a sequence of optimal nops to the output, covering \p Count 992 /// bytes. 993 /// \return - true on success, false on failure 994 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, 995 const MCSubtargetInfo *STI) const { 996 static const char Nops32Bit[10][11] = { 997 // nop 998 "\x90", 999 // xchg %ax,%ax 1000 "\x66\x90", 1001 // nopl (%[re]ax) 1002 "\x0f\x1f\x00", 1003 // nopl 0(%[re]ax) 1004 "\x0f\x1f\x40\x00", 1005 // nopl 0(%[re]ax,%[re]ax,1) 1006 "\x0f\x1f\x44\x00\x00", 1007 // nopw 0(%[re]ax,%[re]ax,1) 1008 "\x66\x0f\x1f\x44\x00\x00", 1009 // nopl 0L(%[re]ax) 1010 "\x0f\x1f\x80\x00\x00\x00\x00", 1011 // nopl 0L(%[re]ax,%[re]ax,1) 1012 "\x0f\x1f\x84\x00\x00\x00\x00\x00", 1013 // nopw 0L(%[re]ax,%[re]ax,1) 1014 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", 1015 // nopw %cs:0L(%[re]ax,%[re]ax,1) 1016 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00", 1017 }; 1018 1019 // 16-bit mode uses different nop patterns than 32-bit. 1020 static const char Nops16Bit[4][11] = { 1021 // nop 1022 "\x90", 1023 // xchg %eax,%eax 1024 "\x66\x90", 1025 // lea 0(%si),%si 1026 "\x8d\x74\x00", 1027 // lea 0w(%si),%si 1028 "\x8d\xb4\x00\x00", 1029 }; 1030 1031 const char(*Nops)[11] = 1032 STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit; 1033 1034 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI); 1035 1036 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining 1037 // length. 1038 do { 1039 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); 1040 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; 1041 for (uint8_t i = 0; i < Prefixes; i++) 1042 OS << '\x66'; 1043 const uint8_t Rest = ThisNopLength - Prefixes; 1044 if (Rest != 0) 1045 OS.write(Nops[Rest - 1], Rest); 1046 Count -= ThisNopLength; 1047 } while (Count != 0); 1048 1049 return true; 1050 } 1051 1052 /* *** */ 1053 1054 namespace { 1055 1056 class ELFX86AsmBackend : public X86AsmBackend { 1057 public: 1058 uint8_t OSABI; 1059 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) 1060 : X86AsmBackend(T, STI), OSABI(OSABI) {} 1061 }; 1062 1063 class ELFX86_32AsmBackend : public ELFX86AsmBackend { 1064 public: 1065 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, 1066 const MCSubtargetInfo &STI) 1067 : ELFX86AsmBackend(T, OSABI, STI) {} 1068 1069 std::unique_ptr<MCObjectTargetWriter> 1070 createObjectTargetWriter() const override { 1071 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386); 1072 } 1073 }; 1074 1075 class ELFX86_X32AsmBackend : public ELFX86AsmBackend { 1076 public: 1077 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, 1078 const MCSubtargetInfo &STI) 1079 : ELFX86AsmBackend(T, OSABI, STI) {} 1080 1081 std::unique_ptr<MCObjectTargetWriter> 1082 createObjectTargetWriter() const override { 1083 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1084 ELF::EM_X86_64); 1085 } 1086 }; 1087 1088 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { 1089 public: 1090 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, 1091 const MCSubtargetInfo &STI) 1092 : ELFX86AsmBackend(T, OSABI, STI) {} 1093 1094 std::unique_ptr<MCObjectTargetWriter> 1095 createObjectTargetWriter() const override { 1096 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1097 ELF::EM_IAMCU); 1098 } 1099 }; 1100 1101 class ELFX86_64AsmBackend : public ELFX86AsmBackend { 1102 public: 1103 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, 1104 const MCSubtargetInfo &STI) 1105 : ELFX86AsmBackend(T, OSABI, STI) {} 1106 1107 std::unique_ptr<MCObjectTargetWriter> 1108 createObjectTargetWriter() const override { 1109 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64); 1110 } 1111 }; 1112 1113 class WindowsX86AsmBackend : public X86AsmBackend { 1114 bool Is64Bit; 1115 1116 public: 1117 WindowsX86AsmBackend(const Target &T, bool is64Bit, 1118 const MCSubtargetInfo &STI) 1119 : X86AsmBackend(T, STI) 1120 , Is64Bit(is64Bit) { 1121 } 1122 1123 std::optional<MCFixupKind> getFixupKind(StringRef Name) const override { 1124 return StringSwitch<std::optional<MCFixupKind>>(Name) 1125 .Case("dir32", FK_Data_4) 1126 .Case("secrel32", FK_SecRel_4) 1127 .Case("secidx", FK_SecRel_2) 1128 .Default(MCAsmBackend::getFixupKind(Name)); 1129 } 1130 1131 std::unique_ptr<MCObjectTargetWriter> 1132 createObjectTargetWriter() const override { 1133 return createX86WinCOFFObjectWriter(Is64Bit); 1134 } 1135 }; 1136 1137 namespace CU { 1138 1139 /// Compact unwind encoding values. 1140 enum CompactUnwindEncodings { 1141 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after 1142 /// the return address, then [RE]SP is moved to [RE]BP. 1143 UNWIND_MODE_BP_FRAME = 0x01000000, 1144 1145 /// A frameless function with a small constant stack size. 1146 UNWIND_MODE_STACK_IMMD = 0x02000000, 1147 1148 /// A frameless function with a large constant stack size. 1149 UNWIND_MODE_STACK_IND = 0x03000000, 1150 1151 /// No compact unwind encoding is available. 1152 UNWIND_MODE_DWARF = 0x04000000, 1153 1154 /// Mask for encoding the frame registers. 1155 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, 1156 1157 /// Mask for encoding the frameless registers. 1158 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF 1159 }; 1160 1161 } // namespace CU 1162 1163 class DarwinX86AsmBackend : public X86AsmBackend { 1164 const MCRegisterInfo &MRI; 1165 1166 /// Number of registers that can be saved in a compact unwind encoding. 1167 enum { CU_NUM_SAVED_REGS = 6 }; 1168 1169 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; 1170 Triple TT; 1171 bool Is64Bit; 1172 1173 unsigned OffsetSize; ///< Offset of a "push" instruction. 1174 unsigned MoveInstrSize; ///< Size of a "move" instruction. 1175 unsigned StackDivide; ///< Amount to adjust stack size by. 1176 protected: 1177 /// Size of a "push" instruction for the given register. 1178 unsigned PushInstrSize(unsigned Reg) const { 1179 switch (Reg) { 1180 case X86::EBX: 1181 case X86::ECX: 1182 case X86::EDX: 1183 case X86::EDI: 1184 case X86::ESI: 1185 case X86::EBP: 1186 case X86::RBX: 1187 case X86::RBP: 1188 return 1; 1189 case X86::R12: 1190 case X86::R13: 1191 case X86::R14: 1192 case X86::R15: 1193 return 2; 1194 } 1195 return 1; 1196 } 1197 1198 private: 1199 /// Get the compact unwind number for a given register. The number 1200 /// corresponds to the enum lists in compact_unwind_encoding.h. 1201 int getCompactUnwindRegNum(unsigned Reg) const { 1202 static const MCPhysReg CU32BitRegs[7] = { 1203 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 1204 }; 1205 static const MCPhysReg CU64BitRegs[] = { 1206 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 1207 }; 1208 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; 1209 for (int Idx = 1; *CURegs; ++CURegs, ++Idx) 1210 if (*CURegs == Reg) 1211 return Idx; 1212 1213 return -1; 1214 } 1215 1216 /// Return the registers encoded for a compact encoding with a frame 1217 /// pointer. 1218 uint32_t encodeCompactUnwindRegistersWithFrame() const { 1219 // Encode the registers in the order they were saved --- 3-bits per 1220 // register. The list of saved registers is assumed to be in reverse 1221 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. 1222 uint32_t RegEnc = 0; 1223 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { 1224 unsigned Reg = SavedRegs[i]; 1225 if (Reg == 0) break; 1226 1227 int CURegNum = getCompactUnwindRegNum(Reg); 1228 if (CURegNum == -1) return ~0U; 1229 1230 // Encode the 3-bit register number in order, skipping over 3-bits for 1231 // each register. 1232 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); 1233 } 1234 1235 assert((RegEnc & 0x3FFFF) == RegEnc && 1236 "Invalid compact register encoding!"); 1237 return RegEnc; 1238 } 1239 1240 /// Create the permutation encoding used with frameless stacks. It is 1241 /// passed the number of registers to be saved and an array of the registers 1242 /// saved. 1243 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { 1244 // The saved registers are numbered from 1 to 6. In order to encode the 1245 // order in which they were saved, we re-number them according to their 1246 // place in the register order. The re-numbering is relative to the last 1247 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in 1248 // that order: 1249 // 1250 // Orig Re-Num 1251 // ---- ------ 1252 // 6 6 1253 // 2 2 1254 // 4 3 1255 // 5 3 1256 // 1257 for (unsigned i = 0; i < RegCount; ++i) { 1258 int CUReg = getCompactUnwindRegNum(SavedRegs[i]); 1259 if (CUReg == -1) return ~0U; 1260 SavedRegs[i] = CUReg; 1261 } 1262 1263 // Reverse the list. 1264 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); 1265 1266 uint32_t RenumRegs[CU_NUM_SAVED_REGS]; 1267 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ 1268 unsigned Countless = 0; 1269 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) 1270 if (SavedRegs[j] < SavedRegs[i]) 1271 ++Countless; 1272 1273 RenumRegs[i] = SavedRegs[i] - Countless - 1; 1274 } 1275 1276 // Take the renumbered values and encode them into a 10-bit number. 1277 uint32_t permutationEncoding = 0; 1278 switch (RegCount) { 1279 case 6: 1280 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 1281 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 1282 + RenumRegs[4]; 1283 break; 1284 case 5: 1285 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 1286 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 1287 + RenumRegs[5]; 1288 break; 1289 case 4: 1290 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 1291 + 3 * RenumRegs[4] + RenumRegs[5]; 1292 break; 1293 case 3: 1294 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 1295 + RenumRegs[5]; 1296 break; 1297 case 2: 1298 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 1299 break; 1300 case 1: 1301 permutationEncoding |= RenumRegs[5]; 1302 break; 1303 } 1304 1305 assert((permutationEncoding & 0x3FF) == permutationEncoding && 1306 "Invalid compact register encoding!"); 1307 return permutationEncoding; 1308 } 1309 1310 public: 1311 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, 1312 const MCSubtargetInfo &STI) 1313 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), 1314 Is64Bit(TT.isArch64Bit()) { 1315 memset(SavedRegs, 0, sizeof(SavedRegs)); 1316 OffsetSize = Is64Bit ? 8 : 4; 1317 MoveInstrSize = Is64Bit ? 3 : 2; 1318 StackDivide = Is64Bit ? 8 : 4; 1319 } 1320 1321 std::unique_ptr<MCObjectTargetWriter> 1322 createObjectTargetWriter() const override { 1323 uint32_t CPUType = cantFail(MachO::getCPUType(TT)); 1324 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); 1325 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); 1326 } 1327 1328 /// Implementation of algorithm to generate the compact unwind encoding 1329 /// for the CFI instructions. 1330 uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI, 1331 const MCContext *Ctxt) const override { 1332 ArrayRef<MCCFIInstruction> Instrs = FI->Instructions; 1333 if (Instrs.empty()) return 0; 1334 if (!isDarwinCanonicalPersonality(FI->Personality) && 1335 !Ctxt->emitCompactUnwindNonCanonical()) 1336 return CU::UNWIND_MODE_DWARF; 1337 1338 // Reset the saved registers. 1339 unsigned SavedRegIdx = 0; 1340 memset(SavedRegs, 0, sizeof(SavedRegs)); 1341 1342 bool HasFP = false; 1343 1344 // Encode that we are using EBP/RBP as the frame pointer. 1345 uint32_t CompactUnwindEncoding = 0; 1346 1347 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2; 1348 unsigned InstrOffset = 0; 1349 unsigned StackAdjust = 0; 1350 unsigned StackSize = 0; 1351 int MinAbsOffset = std::numeric_limits<int>::max(); 1352 1353 for (const MCCFIInstruction &Inst : Instrs) { 1354 switch (Inst.getOperation()) { 1355 default: 1356 // Any other CFI directives indicate a frame that we aren't prepared 1357 // to represent via compact unwind, so just bail out. 1358 return CU::UNWIND_MODE_DWARF; 1359 case MCCFIInstruction::OpDefCfaRegister: { 1360 // Defines a frame pointer. E.g. 1361 // 1362 // movq %rsp, %rbp 1363 // L0: 1364 // .cfi_def_cfa_register %rbp 1365 // 1366 HasFP = true; 1367 1368 // If the frame pointer is other than esp/rsp, we do not have a way to 1369 // generate a compact unwinding representation, so bail out. 1370 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != 1371 (Is64Bit ? X86::RBP : X86::EBP)) 1372 return CU::UNWIND_MODE_DWARF; 1373 1374 // Reset the counts. 1375 memset(SavedRegs, 0, sizeof(SavedRegs)); 1376 StackAdjust = 0; 1377 SavedRegIdx = 0; 1378 MinAbsOffset = std::numeric_limits<int>::max(); 1379 InstrOffset += MoveInstrSize; 1380 break; 1381 } 1382 case MCCFIInstruction::OpDefCfaOffset: { 1383 // Defines a new offset for the CFA. E.g. 1384 // 1385 // With frame: 1386 // 1387 // pushq %rbp 1388 // L0: 1389 // .cfi_def_cfa_offset 16 1390 // 1391 // Without frame: 1392 // 1393 // subq $72, %rsp 1394 // L0: 1395 // .cfi_def_cfa_offset 80 1396 // 1397 StackSize = Inst.getOffset() / StackDivide; 1398 break; 1399 } 1400 case MCCFIInstruction::OpOffset: { 1401 // Defines a "push" of a callee-saved register. E.g. 1402 // 1403 // pushq %r15 1404 // pushq %r14 1405 // pushq %rbx 1406 // L0: 1407 // subq $120, %rsp 1408 // L1: 1409 // .cfi_offset %rbx, -40 1410 // .cfi_offset %r14, -32 1411 // .cfi_offset %r15, -24 1412 // 1413 if (SavedRegIdx == CU_NUM_SAVED_REGS) 1414 // If there are too many saved registers, we cannot use a compact 1415 // unwind encoding. 1416 return CU::UNWIND_MODE_DWARF; 1417 1418 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); 1419 SavedRegs[SavedRegIdx++] = Reg; 1420 StackAdjust += OffsetSize; 1421 MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset())); 1422 InstrOffset += PushInstrSize(Reg); 1423 break; 1424 } 1425 } 1426 } 1427 1428 StackAdjust /= StackDivide; 1429 1430 if (HasFP) { 1431 if ((StackAdjust & 0xFF) != StackAdjust) 1432 // Offset was too big for a compact unwind encoding. 1433 return CU::UNWIND_MODE_DWARF; 1434 1435 // We don't attempt to track a real StackAdjust, so if the saved registers 1436 // aren't adjacent to rbp we can't cope. 1437 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize) 1438 return CU::UNWIND_MODE_DWARF; 1439 1440 // Get the encoding of the saved registers when we have a frame pointer. 1441 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); 1442 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1443 1444 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; 1445 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; 1446 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; 1447 } else { 1448 SubtractInstrIdx += InstrOffset; 1449 ++StackAdjust; 1450 1451 if ((StackSize & 0xFF) == StackSize) { 1452 // Frameless stack with a small stack size. 1453 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; 1454 1455 // Encode the stack size. 1456 CompactUnwindEncoding |= (StackSize & 0xFF) << 16; 1457 } else { 1458 if ((StackAdjust & 0x7) != StackAdjust) 1459 // The extra stack adjustments are too big for us to handle. 1460 return CU::UNWIND_MODE_DWARF; 1461 1462 // Frameless stack with an offset too large for us to encode compactly. 1463 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; 1464 1465 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 1466 // instruction. 1467 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 1468 1469 // Encode any extra stack adjustments (done via push instructions). 1470 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; 1471 } 1472 1473 // Encode the number of registers saved. (Reverse the list first.) 1474 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]); 1475 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; 1476 1477 // Get the encoding of the saved registers when we don't have a frame 1478 // pointer. 1479 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx); 1480 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1481 1482 // Encode the register encoding. 1483 CompactUnwindEncoding |= 1484 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; 1485 } 1486 1487 return CompactUnwindEncoding; 1488 } 1489 }; 1490 1491 } // end anonymous namespace 1492 1493 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, 1494 const MCSubtargetInfo &STI, 1495 const MCRegisterInfo &MRI, 1496 const MCTargetOptions &Options) { 1497 const Triple &TheTriple = STI.getTargetTriple(); 1498 if (TheTriple.isOSBinFormatMachO()) 1499 return new DarwinX86AsmBackend(T, MRI, STI); 1500 1501 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1502 return new WindowsX86AsmBackend(T, false, STI); 1503 1504 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1505 1506 if (TheTriple.isOSIAMCU()) 1507 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); 1508 1509 return new ELFX86_32AsmBackend(T, OSABI, STI); 1510 } 1511 1512 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, 1513 const MCSubtargetInfo &STI, 1514 const MCRegisterInfo &MRI, 1515 const MCTargetOptions &Options) { 1516 const Triple &TheTriple = STI.getTargetTriple(); 1517 if (TheTriple.isOSBinFormatMachO()) 1518 return new DarwinX86AsmBackend(T, MRI, STI); 1519 1520 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1521 return new WindowsX86AsmBackend(T, true, STI); 1522 1523 if (TheTriple.isUEFI()) { 1524 assert(TheTriple.isOSBinFormatCOFF() && 1525 "Only COFF format is supported in UEFI environment."); 1526 return new WindowsX86AsmBackend(T, true, STI); 1527 } 1528 1529 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1530 1531 if (TheTriple.isX32()) 1532 return new ELFX86_X32AsmBackend(T, OSABI, STI); 1533 return new ELFX86_64AsmBackend(T, OSABI, STI); 1534 } 1535