1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86FixupKinds.h" 11 #include "MCTargetDesc/X86InstrRelaxTables.h" 12 #include "llvm/ADT/StringSwitch.h" 13 #include "llvm/BinaryFormat/ELF.h" 14 #include "llvm/BinaryFormat/MachO.h" 15 #include "llvm/MC/MCAsmBackend.h" 16 #include "llvm/MC/MCAsmLayout.h" 17 #include "llvm/MC/MCAssembler.h" 18 #include "llvm/MC/MCCodeEmitter.h" 19 #include "llvm/MC/MCContext.h" 20 #include "llvm/MC/MCDwarf.h" 21 #include "llvm/MC/MCELFObjectWriter.h" 22 #include "llvm/MC/MCExpr.h" 23 #include "llvm/MC/MCFixupKindInfo.h" 24 #include "llvm/MC/MCInst.h" 25 #include "llvm/MC/MCInstrInfo.h" 26 #include "llvm/MC/MCMachObjectWriter.h" 27 #include "llvm/MC/MCObjectStreamer.h" 28 #include "llvm/MC/MCObjectWriter.h" 29 #include "llvm/MC/MCRegisterInfo.h" 30 #include "llvm/MC/MCSectionMachO.h" 31 #include "llvm/MC/MCSubtargetInfo.h" 32 #include "llvm/MC/MCValue.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/raw_ostream.h" 37 38 using namespace llvm; 39 40 namespace { 41 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind 42 class X86AlignBranchKind { 43 private: 44 uint8_t AlignBranchKind = 0; 45 46 public: 47 void operator=(const std::string &Val) { 48 if (Val.empty()) 49 return; 50 SmallVector<StringRef, 6> BranchTypes; 51 StringRef(Val).split(BranchTypes, '+', -1, false); 52 for (auto BranchType : BranchTypes) { 53 if (BranchType == "fused") 54 addKind(X86::AlignBranchFused); 55 else if (BranchType == "jcc") 56 addKind(X86::AlignBranchJcc); 57 else if (BranchType == "jmp") 58 addKind(X86::AlignBranchJmp); 59 else if (BranchType == "call") 60 addKind(X86::AlignBranchCall); 61 else if (BranchType == "ret") 62 addKind(X86::AlignBranchRet); 63 else if (BranchType == "indirect") 64 addKind(X86::AlignBranchIndirect); 65 else { 66 errs() << "invalid argument " << BranchType.str() 67 << " to -x86-align-branch=; each element must be one of: fused, " 68 "jcc, jmp, call, ret, indirect.(plus separated)\n"; 69 } 70 } 71 } 72 73 operator uint8_t() const { return AlignBranchKind; } 74 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; } 75 }; 76 77 X86AlignBranchKind X86AlignBranchKindLoc; 78 79 cl::opt<unsigned> X86AlignBranchBoundary( 80 "x86-align-branch-boundary", cl::init(0), 81 cl::desc( 82 "Control how the assembler should align branches with NOP. If the " 83 "boundary's size is not 0, it should be a power of 2 and no less " 84 "than 32. Branches will be aligned to prevent from being across or " 85 "against the boundary of specified size. The default value 0 does not " 86 "align branches.")); 87 88 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( 89 "x86-align-branch", 90 cl::desc( 91 "Specify types of branches to align (plus separated list of types):" 92 "\njcc indicates conditional jumps" 93 "\nfused indicates fused conditional jumps" 94 "\njmp indicates direct unconditional jumps" 95 "\ncall indicates direct and indirect calls" 96 "\nret indicates rets" 97 "\nindirect indicates indirect unconditional jumps"), 98 cl::location(X86AlignBranchKindLoc)); 99 100 cl::opt<bool> X86AlignBranchWithin32BBoundaries( 101 "x86-branches-within-32B-boundaries", cl::init(false), 102 cl::desc( 103 "Align selected instructions to mitigate negative performance impact " 104 "of Intel's micro code update for errata skx102. May break " 105 "assumptions about labels corresponding to particular instructions, " 106 "and should be used with caution.")); 107 108 cl::opt<unsigned> X86PadMaxPrefixSize( 109 "x86-pad-max-prefix-size", cl::init(0), 110 cl::desc("Maximum number of prefixes to use for padding")); 111 112 cl::opt<bool> X86PadForAlign( 113 "x86-pad-for-align", cl::init(false), cl::Hidden, 114 cl::desc("Pad previous instructions to implement align directives")); 115 116 cl::opt<bool> X86PadForBranchAlign( 117 "x86-pad-for-branch-align", cl::init(true), cl::Hidden, 118 cl::desc("Pad previous instructions to implement branch alignment")); 119 120 class X86AsmBackend : public MCAsmBackend { 121 const MCSubtargetInfo &STI; 122 std::unique_ptr<const MCInstrInfo> MCII; 123 X86AlignBranchKind AlignBranchType; 124 Align AlignBoundary; 125 unsigned TargetPrefixMax = 0; 126 127 MCInst PrevInst; 128 MCBoundaryAlignFragment *PendingBA = nullptr; 129 std::pair<MCFragment *, size_t> PrevInstPosition; 130 bool CanPadInst; 131 132 uint8_t determinePaddingPrefix(const MCInst &Inst) const; 133 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; 134 bool needAlign(const MCInst &Inst) const; 135 bool canPadBranches(MCObjectStreamer &OS) const; 136 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; 137 138 public: 139 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) 140 : MCAsmBackend(support::little), STI(STI), 141 MCII(T.createMCInstrInfo()) { 142 if (X86AlignBranchWithin32BBoundaries) { 143 // At the moment, this defaults to aligning fused branches, unconditional 144 // jumps, and (unfused) conditional jumps with nops. Both the 145 // instructions aligned and the alignment method (nop vs prefix) may 146 // change in the future. 147 AlignBoundary = assumeAligned(32);; 148 AlignBranchType.addKind(X86::AlignBranchFused); 149 AlignBranchType.addKind(X86::AlignBranchJcc); 150 AlignBranchType.addKind(X86::AlignBranchJmp); 151 } 152 // Allow overriding defaults set by main flag 153 if (X86AlignBranchBoundary.getNumOccurrences()) 154 AlignBoundary = assumeAligned(X86AlignBranchBoundary); 155 if (X86AlignBranch.getNumOccurrences()) 156 AlignBranchType = X86AlignBranchKindLoc; 157 if (X86PadMaxPrefixSize.getNumOccurrences()) 158 TargetPrefixMax = X86PadMaxPrefixSize; 159 } 160 161 bool allowAutoPadding() const override; 162 bool allowEnhancedRelaxation() const override; 163 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst, 164 const MCSubtargetInfo &STI) override; 165 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override; 166 167 unsigned getNumFixupKinds() const override { 168 return X86::NumTargetFixupKinds; 169 } 170 171 Optional<MCFixupKind> getFixupKind(StringRef Name) const override; 172 173 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; 174 175 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, 176 const MCValue &Target) override; 177 178 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 179 const MCValue &Target, MutableArrayRef<char> Data, 180 uint64_t Value, bool IsResolved, 181 const MCSubtargetInfo *STI) const override; 182 183 bool mayNeedRelaxation(const MCInst &Inst, 184 const MCSubtargetInfo &STI) const override; 185 186 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, 187 const MCRelaxableFragment *DF, 188 const MCAsmLayout &Layout) const override; 189 190 void relaxInstruction(MCInst &Inst, 191 const MCSubtargetInfo &STI) const override; 192 193 bool padInstructionViaRelaxation(MCRelaxableFragment &RF, 194 MCCodeEmitter &Emitter, 195 unsigned &RemainingSize) const; 196 197 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 198 unsigned &RemainingSize) const; 199 200 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 201 unsigned &RemainingSize) const; 202 203 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; 204 205 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override; 206 207 bool writeNopData(raw_ostream &OS, uint64_t Count, 208 const MCSubtargetInfo *STI) const override; 209 }; 210 } // end anonymous namespace 211 212 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) { 213 unsigned Op = Inst.getOpcode(); 214 switch (Op) { 215 default: 216 return Op; 217 case X86::JCC_1: 218 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; 219 case X86::JMP_1: 220 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; 221 } 222 } 223 224 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) { 225 unsigned Op = Inst.getOpcode(); 226 return X86::getRelaxedOpcodeArith(Op); 227 } 228 229 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) { 230 unsigned R = getRelaxedOpcodeArith(Inst); 231 if (R != Inst.getOpcode()) 232 return R; 233 return getRelaxedOpcodeBranch(Inst, Is16BitMode); 234 } 235 236 static X86::CondCode getCondFromBranch(const MCInst &MI, 237 const MCInstrInfo &MCII) { 238 unsigned Opcode = MI.getOpcode(); 239 switch (Opcode) { 240 default: 241 return X86::COND_INVALID; 242 case X86::JCC_1: { 243 const MCInstrDesc &Desc = MCII.get(Opcode); 244 return static_cast<X86::CondCode>( 245 MI.getOperand(Desc.getNumOperands() - 1).getImm()); 246 } 247 } 248 } 249 250 static X86::SecondMacroFusionInstKind 251 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { 252 X86::CondCode CC = getCondFromBranch(MI, MCII); 253 return classifySecondCondCodeInMacroFusion(CC); 254 } 255 256 /// Check if the instruction uses RIP relative addressing. 257 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { 258 unsigned Opcode = MI.getOpcode(); 259 const MCInstrDesc &Desc = MCII.get(Opcode); 260 uint64_t TSFlags = Desc.TSFlags; 261 unsigned CurOp = X86II::getOperandBias(Desc); 262 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 263 if (MemoryOperand < 0) 264 return false; 265 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; 266 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); 267 return (BaseReg == X86::RIP); 268 } 269 270 /// Check if the instruction is a prefix. 271 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) { 272 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags); 273 } 274 275 /// Check if the instruction is valid as the first instruction in macro fusion. 276 static bool isFirstMacroFusibleInst(const MCInst &Inst, 277 const MCInstrInfo &MCII) { 278 // An Intel instruction with RIP relative addressing is not macro fusible. 279 if (isRIPRelative(Inst, MCII)) 280 return false; 281 X86::FirstMacroFusionInstKind FIK = 282 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); 283 return FIK != X86::FirstMacroFusionInstKind::Invalid; 284 } 285 286 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to 287 /// get a better peformance in some cases. Here, we determine which prefix is 288 /// the most suitable. 289 /// 290 /// If the instruction has a segment override prefix, use the existing one. 291 /// If the target is 64-bit, use the CS. 292 /// If the target is 32-bit, 293 /// - If the instruction has a ESP/EBP base register, use SS. 294 /// - Otherwise use DS. 295 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { 296 assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) && 297 "Prefixes can be added only in 32-bit or 64-bit mode."); 298 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 299 uint64_t TSFlags = Desc.TSFlags; 300 301 // Determine where the memory operand starts, if present. 302 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 303 if (MemoryOperand != -1) 304 MemoryOperand += X86II::getOperandBias(Desc); 305 306 unsigned SegmentReg = 0; 307 if (MemoryOperand >= 0) { 308 // Check for explicit segment override on memory operand. 309 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); 310 } 311 312 switch (TSFlags & X86II::FormMask) { 313 default: 314 break; 315 case X86II::RawFrmDstSrc: { 316 // Check segment override opcode prefix as needed (not for %ds). 317 if (Inst.getOperand(2).getReg() != X86::DS) 318 SegmentReg = Inst.getOperand(2).getReg(); 319 break; 320 } 321 case X86II::RawFrmSrc: { 322 // Check segment override opcode prefix as needed (not for %ds). 323 if (Inst.getOperand(1).getReg() != X86::DS) 324 SegmentReg = Inst.getOperand(1).getReg(); 325 break; 326 } 327 case X86II::RawFrmMemOffs: { 328 // Check segment override opcode prefix as needed. 329 SegmentReg = Inst.getOperand(1).getReg(); 330 break; 331 } 332 } 333 334 if (SegmentReg != 0) 335 return X86::getSegmentOverridePrefixForReg(SegmentReg); 336 337 if (STI.hasFeature(X86::Is64Bit)) 338 return X86::CS_Encoding; 339 340 if (MemoryOperand >= 0) { 341 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; 342 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); 343 if (BaseReg == X86::ESP || BaseReg == X86::EBP) 344 return X86::SS_Encoding; 345 } 346 return X86::DS_Encoding; 347 } 348 349 /// Check if the two instructions will be macro-fused on the target cpu. 350 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { 351 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); 352 if (!InstDesc.isConditionalBranch()) 353 return false; 354 if (!isFirstMacroFusibleInst(Cmp, *MCII)) 355 return false; 356 const X86::FirstMacroFusionInstKind CmpKind = 357 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); 358 const X86::SecondMacroFusionInstKind BranchKind = 359 classifySecondInstInMacroFusion(Jcc, *MCII); 360 return X86::isMacroFused(CmpKind, BranchKind); 361 } 362 363 /// Check if the instruction has a variant symbol operand. 364 static bool hasVariantSymbol(const MCInst &MI) { 365 for (auto &Operand : MI) { 366 if (!Operand.isExpr()) 367 continue; 368 const MCExpr &Expr = *Operand.getExpr(); 369 if (Expr.getKind() == MCExpr::SymbolRef && 370 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None) 371 return true; 372 } 373 return false; 374 } 375 376 bool X86AsmBackend::allowAutoPadding() const { 377 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); 378 } 379 380 bool X86AsmBackend::allowEnhancedRelaxation() const { 381 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; 382 } 383 384 /// X86 has certain instructions which enable interrupts exactly one 385 /// instruction *after* the instruction which stores to SS. Return true if the 386 /// given instruction has such an interrupt delay slot. 387 static bool hasInterruptDelaySlot(const MCInst &Inst) { 388 switch (Inst.getOpcode()) { 389 case X86::POPSS16: 390 case X86::POPSS32: 391 case X86::STI: 392 return true; 393 394 case X86::MOV16sr: 395 case X86::MOV32sr: 396 case X86::MOV64sr: 397 case X86::MOV16sm: 398 if (Inst.getOperand(0).getReg() == X86::SS) 399 return true; 400 break; 401 } 402 return false; 403 } 404 405 /// Check if the instruction to be emitted is right after any data. 406 static bool 407 isRightAfterData(MCFragment *CurrentFragment, 408 const std::pair<MCFragment *, size_t> &PrevInstPosition) { 409 MCFragment *F = CurrentFragment; 410 // Empty data fragments may be created to prevent further data being 411 // added into the previous fragment, we need to skip them since they 412 // have no contents. 413 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode()) 414 if (cast<MCDataFragment>(F)->getContents().size() != 0) 415 break; 416 417 // Since data is always emitted into a DataFragment, our check strategy is 418 // simple here. 419 // - If the fragment is a DataFragment 420 // - If it's not the fragment where the previous instruction is, 421 // returns true. 422 // - If it's the fragment holding the previous instruction but its 423 // size changed since the the previous instruction was emitted into 424 // it, returns true. 425 // - Otherwise returns false. 426 // - If the fragment is not a DataFragment, returns false. 427 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) 428 return DF != PrevInstPosition.first || 429 DF->getContents().size() != PrevInstPosition.second; 430 431 return false; 432 } 433 434 /// \returns the fragment size if it has instructions, otherwise returns 0. 435 static size_t getSizeForInstFragment(const MCFragment *F) { 436 if (!F || !F->hasInstructions()) 437 return 0; 438 // MCEncodedFragmentWithContents being templated makes this tricky. 439 switch (F->getKind()) { 440 default: 441 llvm_unreachable("Unknown fragment with instructions!"); 442 case MCFragment::FT_Data: 443 return cast<MCDataFragment>(*F).getContents().size(); 444 case MCFragment::FT_Relaxable: 445 return cast<MCRelaxableFragment>(*F).getContents().size(); 446 case MCFragment::FT_CompactEncodedInst: 447 return cast<MCCompactEncodedInstFragment>(*F).getContents().size(); 448 } 449 } 450 451 /// Return true if we can insert NOP or prefixes automatically before the 452 /// the instruction to be emitted. 453 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { 454 if (hasVariantSymbol(Inst)) 455 // Linker may rewrite the instruction with variant symbol operand(e.g. 456 // TLSCALL). 457 return false; 458 459 if (hasInterruptDelaySlot(PrevInst)) 460 // If this instruction follows an interrupt enabling instruction with a one 461 // instruction delay, inserting a nop would change behavior. 462 return false; 463 464 if (isPrefix(PrevInst, *MCII)) 465 // If this instruction follows a prefix, inserting a nop/prefix would change 466 // semantic. 467 return false; 468 469 if (isPrefix(Inst, *MCII)) 470 // If this instruction is a prefix, inserting a prefix would change 471 // semantic. 472 return false; 473 474 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) 475 // If this instruction follows any data, there is no clear 476 // instruction boundary, inserting a nop/prefix would change semantic. 477 return false; 478 479 return true; 480 } 481 482 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { 483 if (!OS.getAllowAutoPadding()) 484 return false; 485 assert(allowAutoPadding() && "incorrect initialization!"); 486 487 // We only pad in text section. 488 if (!OS.getCurrentSectionOnly()->getKind().isText()) 489 return false; 490 491 // To be Done: Currently don't deal with Bundle cases. 492 if (OS.getAssembler().isBundlingEnabled()) 493 return false; 494 495 // Branches only need to be aligned in 32-bit or 64-bit mode. 496 if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit))) 497 return false; 498 499 return true; 500 } 501 502 /// Check if the instruction operand needs to be aligned. 503 bool X86AsmBackend::needAlign(const MCInst &Inst) const { 504 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 505 return (Desc.isConditionalBranch() && 506 (AlignBranchType & X86::AlignBranchJcc)) || 507 (Desc.isUnconditionalBranch() && 508 (AlignBranchType & X86::AlignBranchJmp)) || 509 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || 510 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || 511 (Desc.isIndirectBranch() && 512 (AlignBranchType & X86::AlignBranchIndirect)); 513 } 514 515 /// Insert BoundaryAlignFragment before instructions to align branches. 516 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, 517 const MCInst &Inst, const MCSubtargetInfo &STI) { 518 CanPadInst = canPadInst(Inst, OS); 519 520 if (!canPadBranches(OS)) 521 return; 522 523 if (!isMacroFused(PrevInst, Inst)) 524 // Macro fusion doesn't happen indeed, clear the pending. 525 PendingBA = nullptr; 526 527 if (!CanPadInst) 528 return; 529 530 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) { 531 // Macro fusion actually happens and there is no other fragment inserted 532 // after the previous instruction. 533 // 534 // Do nothing here since we already inserted a BoudaryAlign fragment when 535 // we met the first instruction in the fused pair and we'll tie them 536 // together in emitInstructionEnd. 537 // 538 // Note: When there is at least one fragment, such as MCAlignFragment, 539 // inserted after the previous instruction, e.g. 540 // 541 // \code 542 // cmp %rax %rcx 543 // .align 16 544 // je .Label0 545 // \ endcode 546 // 547 // We will treat the JCC as a unfused branch although it may be fused 548 // with the CMP. 549 return; 550 } 551 552 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && 553 isFirstMacroFusibleInst(Inst, *MCII))) { 554 // If we meet a unfused branch or the first instuction in a fusiable pair, 555 // insert a BoundaryAlign fragment. 556 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI)); 557 } 558 } 559 560 /// Set the last fragment to be aligned for the BoundaryAlignFragment. 561 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { 562 PrevInst = Inst; 563 MCFragment *CF = OS.getCurrentFragment(); 564 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); 565 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) 566 F->setAllowAutoPadding(CanPadInst); 567 568 if (!canPadBranches(OS)) 569 return; 570 571 if (!needAlign(Inst) || !PendingBA) 572 return; 573 574 // Tie the aligned instructions into a a pending BoundaryAlign. 575 PendingBA->setLastFragment(CF); 576 PendingBA = nullptr; 577 578 // We need to ensure that further data isn't added to the current 579 // DataFragment, so that we can get the size of instructions later in 580 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty 581 // DataFragment. 582 if (isa_and_nonnull<MCDataFragment>(CF)) 583 OS.insert(new MCDataFragment()); 584 585 // Update the maximum alignment on the current section if necessary. 586 MCSection *Sec = OS.getCurrentSectionOnly(); 587 if (AlignBoundary.value() > Sec->getAlignment()) 588 Sec->setAlignment(AlignBoundary); 589 } 590 591 Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { 592 if (STI.getTargetTriple().isOSBinFormatELF()) { 593 unsigned Type; 594 if (STI.getTargetTriple().getArch() == Triple::x86_64) { 595 Type = llvm::StringSwitch<unsigned>(Name) 596 #define ELF_RELOC(X, Y) .Case(#X, Y) 597 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def" 598 #undef ELF_RELOC 599 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE) 600 .Case("BFD_RELOC_8", ELF::R_X86_64_8) 601 .Case("BFD_RELOC_16", ELF::R_X86_64_16) 602 .Case("BFD_RELOC_32", ELF::R_X86_64_32) 603 .Case("BFD_RELOC_64", ELF::R_X86_64_64) 604 .Default(-1u); 605 } else { 606 Type = llvm::StringSwitch<unsigned>(Name) 607 #define ELF_RELOC(X, Y) .Case(#X, Y) 608 #include "llvm/BinaryFormat/ELFRelocs/i386.def" 609 #undef ELF_RELOC 610 .Case("BFD_RELOC_NONE", ELF::R_386_NONE) 611 .Case("BFD_RELOC_8", ELF::R_386_8) 612 .Case("BFD_RELOC_16", ELF::R_386_16) 613 .Case("BFD_RELOC_32", ELF::R_386_32) 614 .Default(-1u); 615 } 616 if (Type == -1u) 617 return None; 618 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); 619 } 620 return MCAsmBackend::getFixupKind(Name); 621 } 622 623 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { 624 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { 625 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 626 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 627 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 628 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 629 {"reloc_signed_4byte", 0, 32, 0}, 630 {"reloc_signed_4byte_relax", 0, 32, 0}, 631 {"reloc_global_offset_table", 0, 32, 0}, 632 {"reloc_global_offset_table8", 0, 64, 0}, 633 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 634 }; 635 636 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They 637 // do not require any extra processing. 638 if (Kind >= FirstLiteralRelocationKind) 639 return MCAsmBackend::getFixupKindInfo(FK_NONE); 640 641 if (Kind < FirstTargetFixupKind) 642 return MCAsmBackend::getFixupKindInfo(Kind); 643 644 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && 645 "Invalid kind!"); 646 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!"); 647 return Infos[Kind - FirstTargetFixupKind]; 648 } 649 650 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, 651 const MCFixup &Fixup, 652 const MCValue &) { 653 return Fixup.getKind() >= FirstLiteralRelocationKind; 654 } 655 656 static unsigned getFixupKindSize(unsigned Kind) { 657 switch (Kind) { 658 default: 659 llvm_unreachable("invalid fixup kind!"); 660 case FK_NONE: 661 return 0; 662 case FK_PCRel_1: 663 case FK_SecRel_1: 664 case FK_Data_1: 665 return 1; 666 case FK_PCRel_2: 667 case FK_SecRel_2: 668 case FK_Data_2: 669 return 2; 670 case FK_PCRel_4: 671 case X86::reloc_riprel_4byte: 672 case X86::reloc_riprel_4byte_relax: 673 case X86::reloc_riprel_4byte_relax_rex: 674 case X86::reloc_riprel_4byte_movq_load: 675 case X86::reloc_signed_4byte: 676 case X86::reloc_signed_4byte_relax: 677 case X86::reloc_global_offset_table: 678 case X86::reloc_branch_4byte_pcrel: 679 case FK_SecRel_4: 680 case FK_Data_4: 681 return 4; 682 case FK_PCRel_8: 683 case FK_SecRel_8: 684 case FK_Data_8: 685 case X86::reloc_global_offset_table8: 686 return 8; 687 } 688 } 689 690 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 691 const MCValue &Target, 692 MutableArrayRef<char> Data, 693 uint64_t Value, bool IsResolved, 694 const MCSubtargetInfo *STI) const { 695 unsigned Kind = Fixup.getKind(); 696 if (Kind >= FirstLiteralRelocationKind) 697 return; 698 unsigned Size = getFixupKindSize(Kind); 699 700 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); 701 702 int64_t SignedValue = static_cast<int64_t>(Value); 703 if ((Target.isAbsolute() || IsResolved) && 704 getFixupKindInfo(Fixup.getKind()).Flags & 705 MCFixupKindInfo::FKF_IsPCRel) { 706 // check that PC relative fixup fits into the fixup size. 707 if (Size > 0 && !isIntN(Size * 8, SignedValue)) 708 Asm.getContext().reportError( 709 Fixup.getLoc(), "value of " + Twine(SignedValue) + 710 " is too large for field of " + Twine(Size) + 711 ((Size == 1) ? " byte." : " bytes.")); 712 } else { 713 // Check that uppper bits are either all zeros or all ones. 714 // Specifically ignore overflow/underflow as long as the leakage is 715 // limited to the lower bits. This is to remain compatible with 716 // other assemblers. 717 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && 718 "Value does not fit in the Fixup field"); 719 } 720 721 for (unsigned i = 0; i != Size; ++i) 722 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); 723 } 724 725 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst, 726 const MCSubtargetInfo &STI) const { 727 // Branches can always be relaxed in either mode. 728 if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode()) 729 return true; 730 731 // Check if this instruction is ever relaxable. 732 if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode()) 733 return false; 734 735 736 // Check if the relaxable operand has an expression. For the current set of 737 // relaxable instructions, the relaxable operand is always the last operand. 738 unsigned RelaxableOp = Inst.getNumOperands() - 1; 739 if (Inst.getOperand(RelaxableOp).isExpr()) 740 return true; 741 742 return false; 743 } 744 745 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, 746 uint64_t Value, 747 const MCRelaxableFragment *DF, 748 const MCAsmLayout &Layout) const { 749 // Relax if the value is too big for a (signed) i8. 750 return !isInt<8>(Value); 751 } 752 753 // FIXME: Can tblgen help at all here to verify there aren't other instructions 754 // we can relax? 755 void X86AsmBackend::relaxInstruction(MCInst &Inst, 756 const MCSubtargetInfo &STI) const { 757 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. 758 bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit]; 759 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); 760 761 if (RelaxedOp == Inst.getOpcode()) { 762 SmallString<256> Tmp; 763 raw_svector_ostream OS(Tmp); 764 Inst.dump_pretty(OS); 765 OS << "\n"; 766 report_fatal_error("unexpected instruction to relax: " + OS.str()); 767 } 768 769 Inst.setOpcode(RelaxedOp); 770 } 771 772 /// Return true if this instruction has been fully relaxed into it's most 773 /// general available form. 774 static bool isFullyRelaxed(const MCRelaxableFragment &RF) { 775 auto &Inst = RF.getInst(); 776 auto &STI = *RF.getSubtargetInfo(); 777 bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit]; 778 return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode(); 779 } 780 781 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, 782 MCCodeEmitter &Emitter, 783 unsigned &RemainingSize) const { 784 if (!RF.getAllowAutoPadding()) 785 return false; 786 // If the instruction isn't fully relaxed, shifting it around might require a 787 // larger value for one of the fixups then can be encoded. The outer loop 788 // will also catch this before moving to the next instruction, but we need to 789 // prevent padding this single instruction as well. 790 if (!isFullyRelaxed(RF)) 791 return false; 792 793 const unsigned OldSize = RF.getContents().size(); 794 if (OldSize == 15) 795 return false; 796 797 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); 798 const unsigned RemainingPrefixSize = [&]() -> unsigned { 799 SmallString<15> Code; 800 raw_svector_ostream VecOS(Code); 801 Emitter.emitPrefix(RF.getInst(), VecOS, STI); 802 assert(Code.size() < 15 && "The number of prefixes must be less than 15."); 803 804 // TODO: It turns out we need a decent amount of plumbing for the target 805 // specific bits to determine number of prefixes its safe to add. Various 806 // targets (older chips mostly, but also Atom family) encounter decoder 807 // stalls with too many prefixes. For testing purposes, we set the value 808 // externally for the moment. 809 unsigned ExistingPrefixSize = Code.size(); 810 if (TargetPrefixMax <= ExistingPrefixSize) 811 return 0; 812 return TargetPrefixMax - ExistingPrefixSize; 813 }(); 814 const unsigned PrefixBytesToAdd = 815 std::min(MaxPossiblePad, RemainingPrefixSize); 816 if (PrefixBytesToAdd == 0) 817 return false; 818 819 const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); 820 821 SmallString<256> Code; 822 Code.append(PrefixBytesToAdd, Prefix); 823 Code.append(RF.getContents().begin(), RF.getContents().end()); 824 RF.getContents() = Code; 825 826 // Adjust the fixups for the change in offsets 827 for (auto &F : RF.getFixups()) { 828 F.setOffset(F.getOffset() + PrefixBytesToAdd); 829 } 830 831 RemainingSize -= PrefixBytesToAdd; 832 return true; 833 } 834 835 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, 836 MCCodeEmitter &Emitter, 837 unsigned &RemainingSize) const { 838 if (isFullyRelaxed(RF)) 839 // TODO: There are lots of other tricks we could apply for increasing 840 // encoding size without impacting performance. 841 return false; 842 843 MCInst Relaxed = RF.getInst(); 844 relaxInstruction(Relaxed, *RF.getSubtargetInfo()); 845 846 SmallVector<MCFixup, 4> Fixups; 847 SmallString<15> Code; 848 raw_svector_ostream VecOS(Code); 849 Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo()); 850 const unsigned OldSize = RF.getContents().size(); 851 const unsigned NewSize = Code.size(); 852 assert(NewSize >= OldSize && "size decrease during relaxation?"); 853 unsigned Delta = NewSize - OldSize; 854 if (Delta > RemainingSize) 855 return false; 856 RF.setInst(Relaxed); 857 RF.getContents() = Code; 858 RF.getFixups() = Fixups; 859 RemainingSize -= Delta; 860 return true; 861 } 862 863 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, 864 MCCodeEmitter &Emitter, 865 unsigned &RemainingSize) const { 866 bool Changed = false; 867 if (RemainingSize != 0) 868 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); 869 if (RemainingSize != 0) 870 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); 871 return Changed; 872 } 873 874 void X86AsmBackend::finishLayout(MCAssembler const &Asm, 875 MCAsmLayout &Layout) const { 876 // See if we can further relax some instructions to cut down on the number of 877 // nop bytes required for code alignment. The actual win is in reducing 878 // instruction count, not number of bytes. Modern X86-64 can easily end up 879 // decode limited. It is often better to reduce the number of instructions 880 // (i.e. eliminate nops) even at the cost of increasing the size and 881 // complexity of others. 882 if (!X86PadForAlign && !X86PadForBranchAlign) 883 return; 884 885 // The processed regions are delimitered by LabeledFragments. -g may have more 886 // MCSymbols and therefore different relaxation results. X86PadForAlign is 887 // disabled by default to eliminate the -g vs non -g difference. 888 DenseSet<MCFragment *> LabeledFragments; 889 for (const MCSymbol &S : Asm.symbols()) 890 LabeledFragments.insert(S.getFragment(false)); 891 892 for (MCSection &Sec : Asm) { 893 if (!Sec.getKind().isText()) 894 continue; 895 896 SmallVector<MCRelaxableFragment *, 4> Relaxable; 897 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { 898 MCFragment &F = *I; 899 900 if (LabeledFragments.count(&F)) 901 Relaxable.clear(); 902 903 if (F.getKind() == MCFragment::FT_Data || 904 F.getKind() == MCFragment::FT_CompactEncodedInst) 905 // Skip and ignore 906 continue; 907 908 if (F.getKind() == MCFragment::FT_Relaxable) { 909 auto &RF = cast<MCRelaxableFragment>(*I); 910 Relaxable.push_back(&RF); 911 continue; 912 } 913 914 auto canHandle = [](MCFragment &F) -> bool { 915 switch (F.getKind()) { 916 default: 917 return false; 918 case MCFragment::FT_Align: 919 return X86PadForAlign; 920 case MCFragment::FT_BoundaryAlign: 921 return X86PadForBranchAlign; 922 } 923 }; 924 // For any unhandled kind, assume we can't change layout. 925 if (!canHandle(F)) { 926 Relaxable.clear(); 927 continue; 928 } 929 930 #ifndef NDEBUG 931 const uint64_t OrigOffset = Layout.getFragmentOffset(&F); 932 #endif 933 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F); 934 935 // To keep the effects local, prefer to relax instructions closest to 936 // the align directive. This is purely about human understandability 937 // of the resulting code. If we later find a reason to expand 938 // particular instructions over others, we can adjust. 939 MCFragment *FirstChangedFragment = nullptr; 940 unsigned RemainingSize = OrigSize; 941 while (!Relaxable.empty() && RemainingSize != 0) { 942 auto &RF = *Relaxable.pop_back_val(); 943 // Give the backend a chance to play any tricks it wishes to increase 944 // the encoding size of the given instruction. Target independent code 945 // will try further relaxation, but target's may play further tricks. 946 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) 947 FirstChangedFragment = &RF; 948 949 // If we have an instruction which hasn't been fully relaxed, we can't 950 // skip past it and insert bytes before it. Changing its starting 951 // offset might require a larger negative offset than it can encode. 952 // We don't need to worry about larger positive offsets as none of the 953 // possible offsets between this and our align are visible, and the 954 // ones afterwards aren't changing. 955 if (!isFullyRelaxed(RF)) 956 break; 957 } 958 Relaxable.clear(); 959 960 if (FirstChangedFragment) { 961 // Make sure the offsets for any fragments in the effected range get 962 // updated. Note that this (conservatively) invalidates the offsets of 963 // those following, but this is not required. 964 Layout.invalidateFragmentsFrom(FirstChangedFragment); 965 } 966 967 // BoundaryAlign explicitly tracks it's size (unlike align) 968 if (F.getKind() == MCFragment::FT_BoundaryAlign) 969 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); 970 971 #ifndef NDEBUG 972 const uint64_t FinalOffset = Layout.getFragmentOffset(&F); 973 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F); 974 assert(OrigOffset + OrigSize == FinalOffset + FinalSize && 975 "can't move start of next fragment!"); 976 assert(FinalSize == RemainingSize && "inconsistent size computation?"); 977 #endif 978 979 // If we're looking at a boundary align, make sure we don't try to pad 980 // its target instructions for some following directive. Doing so would 981 // break the alignment of the current boundary align. 982 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { 983 const MCFragment *LastFragment = BF->getLastFragment(); 984 if (!LastFragment) 985 continue; 986 while (&*I != LastFragment) 987 ++I; 988 } 989 } 990 } 991 992 // The layout is done. Mark every fragment as valid. 993 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { 994 MCSection &Section = *Layout.getSectionOrder()[i]; 995 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); 996 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); 997 } 998 } 999 1000 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const { 1001 if (STI.hasFeature(X86::Is16Bit)) 1002 return 4; 1003 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit)) 1004 return 1; 1005 if (STI.getFeatureBits()[X86::TuningFast7ByteNOP]) 1006 return 7; 1007 if (STI.getFeatureBits()[X86::TuningFast15ByteNOP]) 1008 return 15; 1009 if (STI.getFeatureBits()[X86::TuningFast11ByteNOP]) 1010 return 11; 1011 // FIXME: handle 32-bit mode 1012 // 15-bytes is the longest single NOP instruction, but 10-bytes is 1013 // commonly the longest that can be efficiently decoded. 1014 return 10; 1015 } 1016 1017 /// Write a sequence of optimal nops to the output, covering \p Count 1018 /// bytes. 1019 /// \return - true on success, false on failure 1020 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, 1021 const MCSubtargetInfo *STI) const { 1022 static const char Nops32Bit[10][11] = { 1023 // nop 1024 "\x90", 1025 // xchg %ax,%ax 1026 "\x66\x90", 1027 // nopl (%[re]ax) 1028 "\x0f\x1f\x00", 1029 // nopl 0(%[re]ax) 1030 "\x0f\x1f\x40\x00", 1031 // nopl 0(%[re]ax,%[re]ax,1) 1032 "\x0f\x1f\x44\x00\x00", 1033 // nopw 0(%[re]ax,%[re]ax,1) 1034 "\x66\x0f\x1f\x44\x00\x00", 1035 // nopl 0L(%[re]ax) 1036 "\x0f\x1f\x80\x00\x00\x00\x00", 1037 // nopl 0L(%[re]ax,%[re]ax,1) 1038 "\x0f\x1f\x84\x00\x00\x00\x00\x00", 1039 // nopw 0L(%[re]ax,%[re]ax,1) 1040 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", 1041 // nopw %cs:0L(%[re]ax,%[re]ax,1) 1042 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00", 1043 }; 1044 1045 // 16-bit mode uses different nop patterns than 32-bit. 1046 static const char Nops16Bit[4][11] = { 1047 // nop 1048 "\x90", 1049 // xchg %eax,%eax 1050 "\x66\x90", 1051 // lea 0(%si),%si 1052 "\x8d\x74\x00", 1053 // lea 0w(%si),%si 1054 "\x8d\xb4\x00\x00", 1055 }; 1056 1057 const char(*Nops)[11] = 1058 STI->getFeatureBits()[X86::Is16Bit] ? Nops16Bit : Nops32Bit; 1059 1060 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI); 1061 1062 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining 1063 // length. 1064 do { 1065 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); 1066 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; 1067 for (uint8_t i = 0; i < Prefixes; i++) 1068 OS << '\x66'; 1069 const uint8_t Rest = ThisNopLength - Prefixes; 1070 if (Rest != 0) 1071 OS.write(Nops[Rest - 1], Rest); 1072 Count -= ThisNopLength; 1073 } while (Count != 0); 1074 1075 return true; 1076 } 1077 1078 /* *** */ 1079 1080 namespace { 1081 1082 class ELFX86AsmBackend : public X86AsmBackend { 1083 public: 1084 uint8_t OSABI; 1085 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) 1086 : X86AsmBackend(T, STI), OSABI(OSABI) {} 1087 }; 1088 1089 class ELFX86_32AsmBackend : public ELFX86AsmBackend { 1090 public: 1091 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, 1092 const MCSubtargetInfo &STI) 1093 : ELFX86AsmBackend(T, OSABI, STI) {} 1094 1095 std::unique_ptr<MCObjectTargetWriter> 1096 createObjectTargetWriter() const override { 1097 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386); 1098 } 1099 }; 1100 1101 class ELFX86_X32AsmBackend : public ELFX86AsmBackend { 1102 public: 1103 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, 1104 const MCSubtargetInfo &STI) 1105 : ELFX86AsmBackend(T, OSABI, STI) {} 1106 1107 std::unique_ptr<MCObjectTargetWriter> 1108 createObjectTargetWriter() const override { 1109 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1110 ELF::EM_X86_64); 1111 } 1112 }; 1113 1114 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { 1115 public: 1116 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, 1117 const MCSubtargetInfo &STI) 1118 : ELFX86AsmBackend(T, OSABI, STI) {} 1119 1120 std::unique_ptr<MCObjectTargetWriter> 1121 createObjectTargetWriter() const override { 1122 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1123 ELF::EM_IAMCU); 1124 } 1125 }; 1126 1127 class ELFX86_64AsmBackend : public ELFX86AsmBackend { 1128 public: 1129 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, 1130 const MCSubtargetInfo &STI) 1131 : ELFX86AsmBackend(T, OSABI, STI) {} 1132 1133 std::unique_ptr<MCObjectTargetWriter> 1134 createObjectTargetWriter() const override { 1135 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64); 1136 } 1137 }; 1138 1139 class WindowsX86AsmBackend : public X86AsmBackend { 1140 bool Is64Bit; 1141 1142 public: 1143 WindowsX86AsmBackend(const Target &T, bool is64Bit, 1144 const MCSubtargetInfo &STI) 1145 : X86AsmBackend(T, STI) 1146 , Is64Bit(is64Bit) { 1147 } 1148 1149 Optional<MCFixupKind> getFixupKind(StringRef Name) const override { 1150 return StringSwitch<Optional<MCFixupKind>>(Name) 1151 .Case("dir32", FK_Data_4) 1152 .Case("secrel32", FK_SecRel_4) 1153 .Case("secidx", FK_SecRel_2) 1154 .Default(MCAsmBackend::getFixupKind(Name)); 1155 } 1156 1157 std::unique_ptr<MCObjectTargetWriter> 1158 createObjectTargetWriter() const override { 1159 return createX86WinCOFFObjectWriter(Is64Bit); 1160 } 1161 }; 1162 1163 namespace CU { 1164 1165 /// Compact unwind encoding values. 1166 enum CompactUnwindEncodings { 1167 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after 1168 /// the return address, then [RE]SP is moved to [RE]BP. 1169 UNWIND_MODE_BP_FRAME = 0x01000000, 1170 1171 /// A frameless function with a small constant stack size. 1172 UNWIND_MODE_STACK_IMMD = 0x02000000, 1173 1174 /// A frameless function with a large constant stack size. 1175 UNWIND_MODE_STACK_IND = 0x03000000, 1176 1177 /// No compact unwind encoding is available. 1178 UNWIND_MODE_DWARF = 0x04000000, 1179 1180 /// Mask for encoding the frame registers. 1181 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, 1182 1183 /// Mask for encoding the frameless registers. 1184 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF 1185 }; 1186 1187 } // namespace CU 1188 1189 class DarwinX86AsmBackend : public X86AsmBackend { 1190 const MCRegisterInfo &MRI; 1191 1192 /// Number of registers that can be saved in a compact unwind encoding. 1193 enum { CU_NUM_SAVED_REGS = 6 }; 1194 1195 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; 1196 Triple TT; 1197 bool Is64Bit; 1198 1199 unsigned OffsetSize; ///< Offset of a "push" instruction. 1200 unsigned MoveInstrSize; ///< Size of a "move" instruction. 1201 unsigned StackDivide; ///< Amount to adjust stack size by. 1202 protected: 1203 /// Size of a "push" instruction for the given register. 1204 unsigned PushInstrSize(unsigned Reg) const { 1205 switch (Reg) { 1206 case X86::EBX: 1207 case X86::ECX: 1208 case X86::EDX: 1209 case X86::EDI: 1210 case X86::ESI: 1211 case X86::EBP: 1212 case X86::RBX: 1213 case X86::RBP: 1214 return 1; 1215 case X86::R12: 1216 case X86::R13: 1217 case X86::R14: 1218 case X86::R15: 1219 return 2; 1220 } 1221 return 1; 1222 } 1223 1224 private: 1225 /// Get the compact unwind number for a given register. The number 1226 /// corresponds to the enum lists in compact_unwind_encoding.h. 1227 int getCompactUnwindRegNum(unsigned Reg) const { 1228 static const MCPhysReg CU32BitRegs[7] = { 1229 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 1230 }; 1231 static const MCPhysReg CU64BitRegs[] = { 1232 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 1233 }; 1234 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; 1235 for (int Idx = 1; *CURegs; ++CURegs, ++Idx) 1236 if (*CURegs == Reg) 1237 return Idx; 1238 1239 return -1; 1240 } 1241 1242 /// Return the registers encoded for a compact encoding with a frame 1243 /// pointer. 1244 uint32_t encodeCompactUnwindRegistersWithFrame() const { 1245 // Encode the registers in the order they were saved --- 3-bits per 1246 // register. The list of saved registers is assumed to be in reverse 1247 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. 1248 uint32_t RegEnc = 0; 1249 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { 1250 unsigned Reg = SavedRegs[i]; 1251 if (Reg == 0) break; 1252 1253 int CURegNum = getCompactUnwindRegNum(Reg); 1254 if (CURegNum == -1) return ~0U; 1255 1256 // Encode the 3-bit register number in order, skipping over 3-bits for 1257 // each register. 1258 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); 1259 } 1260 1261 assert((RegEnc & 0x3FFFF) == RegEnc && 1262 "Invalid compact register encoding!"); 1263 return RegEnc; 1264 } 1265 1266 /// Create the permutation encoding used with frameless stacks. It is 1267 /// passed the number of registers to be saved and an array of the registers 1268 /// saved. 1269 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { 1270 // The saved registers are numbered from 1 to 6. In order to encode the 1271 // order in which they were saved, we re-number them according to their 1272 // place in the register order. The re-numbering is relative to the last 1273 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in 1274 // that order: 1275 // 1276 // Orig Re-Num 1277 // ---- ------ 1278 // 6 6 1279 // 2 2 1280 // 4 3 1281 // 5 3 1282 // 1283 for (unsigned i = 0; i < RegCount; ++i) { 1284 int CUReg = getCompactUnwindRegNum(SavedRegs[i]); 1285 if (CUReg == -1) return ~0U; 1286 SavedRegs[i] = CUReg; 1287 } 1288 1289 // Reverse the list. 1290 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); 1291 1292 uint32_t RenumRegs[CU_NUM_SAVED_REGS]; 1293 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ 1294 unsigned Countless = 0; 1295 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) 1296 if (SavedRegs[j] < SavedRegs[i]) 1297 ++Countless; 1298 1299 RenumRegs[i] = SavedRegs[i] - Countless - 1; 1300 } 1301 1302 // Take the renumbered values and encode them into a 10-bit number. 1303 uint32_t permutationEncoding = 0; 1304 switch (RegCount) { 1305 case 6: 1306 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 1307 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 1308 + RenumRegs[4]; 1309 break; 1310 case 5: 1311 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 1312 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 1313 + RenumRegs[5]; 1314 break; 1315 case 4: 1316 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 1317 + 3 * RenumRegs[4] + RenumRegs[5]; 1318 break; 1319 case 3: 1320 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 1321 + RenumRegs[5]; 1322 break; 1323 case 2: 1324 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 1325 break; 1326 case 1: 1327 permutationEncoding |= RenumRegs[5]; 1328 break; 1329 } 1330 1331 assert((permutationEncoding & 0x3FF) == permutationEncoding && 1332 "Invalid compact register encoding!"); 1333 return permutationEncoding; 1334 } 1335 1336 public: 1337 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, 1338 const MCSubtargetInfo &STI) 1339 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), 1340 Is64Bit(TT.isArch64Bit()) { 1341 memset(SavedRegs, 0, sizeof(SavedRegs)); 1342 OffsetSize = Is64Bit ? 8 : 4; 1343 MoveInstrSize = Is64Bit ? 3 : 2; 1344 StackDivide = Is64Bit ? 8 : 4; 1345 } 1346 1347 std::unique_ptr<MCObjectTargetWriter> 1348 createObjectTargetWriter() const override { 1349 uint32_t CPUType = cantFail(MachO::getCPUType(TT)); 1350 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); 1351 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); 1352 } 1353 1354 /// Implementation of algorithm to generate the compact unwind encoding 1355 /// for the CFI instructions. 1356 uint32_t 1357 generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override { 1358 if (Instrs.empty()) return 0; 1359 1360 // Reset the saved registers. 1361 unsigned SavedRegIdx = 0; 1362 memset(SavedRegs, 0, sizeof(SavedRegs)); 1363 1364 bool HasFP = false; 1365 1366 // Encode that we are using EBP/RBP as the frame pointer. 1367 uint32_t CompactUnwindEncoding = 0; 1368 1369 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2; 1370 unsigned InstrOffset = 0; 1371 unsigned StackAdjust = 0; 1372 unsigned StackSize = 0; 1373 int MinAbsOffset = std::numeric_limits<int>::max(); 1374 1375 for (const MCCFIInstruction &Inst : Instrs) { 1376 switch (Inst.getOperation()) { 1377 default: 1378 // Any other CFI directives indicate a frame that we aren't prepared 1379 // to represent via compact unwind, so just bail out. 1380 return CU::UNWIND_MODE_DWARF; 1381 case MCCFIInstruction::OpDefCfaRegister: { 1382 // Defines a frame pointer. E.g. 1383 // 1384 // movq %rsp, %rbp 1385 // L0: 1386 // .cfi_def_cfa_register %rbp 1387 // 1388 HasFP = true; 1389 1390 // If the frame pointer is other than esp/rsp, we do not have a way to 1391 // generate a compact unwinding representation, so bail out. 1392 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != 1393 (Is64Bit ? X86::RBP : X86::EBP)) 1394 return CU::UNWIND_MODE_DWARF; 1395 1396 // Reset the counts. 1397 memset(SavedRegs, 0, sizeof(SavedRegs)); 1398 StackAdjust = 0; 1399 SavedRegIdx = 0; 1400 MinAbsOffset = std::numeric_limits<int>::max(); 1401 InstrOffset += MoveInstrSize; 1402 break; 1403 } 1404 case MCCFIInstruction::OpDefCfaOffset: { 1405 // Defines a new offset for the CFA. E.g. 1406 // 1407 // With frame: 1408 // 1409 // pushq %rbp 1410 // L0: 1411 // .cfi_def_cfa_offset 16 1412 // 1413 // Without frame: 1414 // 1415 // subq $72, %rsp 1416 // L0: 1417 // .cfi_def_cfa_offset 80 1418 // 1419 StackSize = Inst.getOffset() / StackDivide; 1420 break; 1421 } 1422 case MCCFIInstruction::OpOffset: { 1423 // Defines a "push" of a callee-saved register. E.g. 1424 // 1425 // pushq %r15 1426 // pushq %r14 1427 // pushq %rbx 1428 // L0: 1429 // subq $120, %rsp 1430 // L1: 1431 // .cfi_offset %rbx, -40 1432 // .cfi_offset %r14, -32 1433 // .cfi_offset %r15, -24 1434 // 1435 if (SavedRegIdx == CU_NUM_SAVED_REGS) 1436 // If there are too many saved registers, we cannot use a compact 1437 // unwind encoding. 1438 return CU::UNWIND_MODE_DWARF; 1439 1440 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); 1441 SavedRegs[SavedRegIdx++] = Reg; 1442 StackAdjust += OffsetSize; 1443 MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset())); 1444 InstrOffset += PushInstrSize(Reg); 1445 break; 1446 } 1447 } 1448 } 1449 1450 StackAdjust /= StackDivide; 1451 1452 if (HasFP) { 1453 if ((StackAdjust & 0xFF) != StackAdjust) 1454 // Offset was too big for a compact unwind encoding. 1455 return CU::UNWIND_MODE_DWARF; 1456 1457 // We don't attempt to track a real StackAdjust, so if the saved registers 1458 // aren't adjacent to rbp we can't cope. 1459 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize) 1460 return CU::UNWIND_MODE_DWARF; 1461 1462 // Get the encoding of the saved registers when we have a frame pointer. 1463 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); 1464 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1465 1466 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; 1467 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; 1468 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; 1469 } else { 1470 SubtractInstrIdx += InstrOffset; 1471 ++StackAdjust; 1472 1473 if ((StackSize & 0xFF) == StackSize) { 1474 // Frameless stack with a small stack size. 1475 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; 1476 1477 // Encode the stack size. 1478 CompactUnwindEncoding |= (StackSize & 0xFF) << 16; 1479 } else { 1480 if ((StackAdjust & 0x7) != StackAdjust) 1481 // The extra stack adjustments are too big for us to handle. 1482 return CU::UNWIND_MODE_DWARF; 1483 1484 // Frameless stack with an offset too large for us to encode compactly. 1485 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; 1486 1487 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 1488 // instruction. 1489 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 1490 1491 // Encode any extra stack adjustments (done via push instructions). 1492 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; 1493 } 1494 1495 // Encode the number of registers saved. (Reverse the list first.) 1496 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]); 1497 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; 1498 1499 // Get the encoding of the saved registers when we don't have a frame 1500 // pointer. 1501 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx); 1502 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1503 1504 // Encode the register encoding. 1505 CompactUnwindEncoding |= 1506 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; 1507 } 1508 1509 return CompactUnwindEncoding; 1510 } 1511 }; 1512 1513 } // end anonymous namespace 1514 1515 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, 1516 const MCSubtargetInfo &STI, 1517 const MCRegisterInfo &MRI, 1518 const MCTargetOptions &Options) { 1519 const Triple &TheTriple = STI.getTargetTriple(); 1520 if (TheTriple.isOSBinFormatMachO()) 1521 return new DarwinX86AsmBackend(T, MRI, STI); 1522 1523 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1524 return new WindowsX86AsmBackend(T, false, STI); 1525 1526 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1527 1528 if (TheTriple.isOSIAMCU()) 1529 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); 1530 1531 return new ELFX86_32AsmBackend(T, OSABI, STI); 1532 } 1533 1534 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, 1535 const MCSubtargetInfo &STI, 1536 const MCRegisterInfo &MRI, 1537 const MCTargetOptions &Options) { 1538 const Triple &TheTriple = STI.getTargetTriple(); 1539 if (TheTriple.isOSBinFormatMachO()) 1540 return new DarwinX86AsmBackend(T, MRI, STI); 1541 1542 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1543 return new WindowsX86AsmBackend(T, true, STI); 1544 1545 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1546 1547 if (TheTriple.isX32()) 1548 return new ELFX86_X32AsmBackend(T, OSABI, STI); 1549 return new ELFX86_64AsmBackend(T, OSABI, STI); 1550 } 1551