1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86FixupKinds.h" 11 #include "llvm/ADT/StringSwitch.h" 12 #include "llvm/BinaryFormat/ELF.h" 13 #include "llvm/BinaryFormat/MachO.h" 14 #include "llvm/MC/MCAsmBackend.h" 15 #include "llvm/MC/MCAsmLayout.h" 16 #include "llvm/MC/MCAssembler.h" 17 #include "llvm/MC/MCCodeEmitter.h" 18 #include "llvm/MC/MCContext.h" 19 #include "llvm/MC/MCDwarf.h" 20 #include "llvm/MC/MCELFObjectWriter.h" 21 #include "llvm/MC/MCExpr.h" 22 #include "llvm/MC/MCFixupKindInfo.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCMachObjectWriter.h" 26 #include "llvm/MC/MCObjectStreamer.h" 27 #include "llvm/MC/MCObjectWriter.h" 28 #include "llvm/MC/MCRegisterInfo.h" 29 #include "llvm/MC/MCSectionMachO.h" 30 #include "llvm/MC/MCSubtargetInfo.h" 31 #include "llvm/MC/MCValue.h" 32 #include "llvm/MC/TargetRegistry.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/raw_ostream.h" 36 37 using namespace llvm; 38 39 namespace { 40 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind 41 class X86AlignBranchKind { 42 private: 43 uint8_t AlignBranchKind = 0; 44 45 public: 46 void operator=(const std::string &Val) { 47 if (Val.empty()) 48 return; 49 SmallVector<StringRef, 6> BranchTypes; 50 StringRef(Val).split(BranchTypes, '+', -1, false); 51 for (auto BranchType : BranchTypes) { 52 if (BranchType == "fused") 53 addKind(X86::AlignBranchFused); 54 else if (BranchType == "jcc") 55 addKind(X86::AlignBranchJcc); 56 else if (BranchType == "jmp") 57 addKind(X86::AlignBranchJmp); 58 else if (BranchType == "call") 59 addKind(X86::AlignBranchCall); 60 else if (BranchType == "ret") 61 addKind(X86::AlignBranchRet); 62 else if (BranchType == "indirect") 63 addKind(X86::AlignBranchIndirect); 64 else { 65 errs() << "invalid argument " << BranchType.str() 66 << " to -x86-align-branch=; each element must be one of: fused, " 67 "jcc, jmp, call, ret, indirect.(plus separated)\n"; 68 } 69 } 70 } 71 72 operator uint8_t() const { return AlignBranchKind; } 73 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; } 74 }; 75 76 X86AlignBranchKind X86AlignBranchKindLoc; 77 78 cl::opt<unsigned> X86AlignBranchBoundary( 79 "x86-align-branch-boundary", cl::init(0), 80 cl::desc( 81 "Control how the assembler should align branches with NOP. If the " 82 "boundary's size is not 0, it should be a power of 2 and no less " 83 "than 32. Branches will be aligned to prevent from being across or " 84 "against the boundary of specified size. The default value 0 does not " 85 "align branches.")); 86 87 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( 88 "x86-align-branch", 89 cl::desc( 90 "Specify types of branches to align (plus separated list of types):" 91 "\njcc indicates conditional jumps" 92 "\nfused indicates fused conditional jumps" 93 "\njmp indicates direct unconditional jumps" 94 "\ncall indicates direct and indirect calls" 95 "\nret indicates rets" 96 "\nindirect indicates indirect unconditional jumps"), 97 cl::location(X86AlignBranchKindLoc)); 98 99 cl::opt<bool> X86AlignBranchWithin32BBoundaries( 100 "x86-branches-within-32B-boundaries", cl::init(false), 101 cl::desc( 102 "Align selected instructions to mitigate negative performance impact " 103 "of Intel's micro code update for errata skx102. May break " 104 "assumptions about labels corresponding to particular instructions, " 105 "and should be used with caution.")); 106 107 cl::opt<unsigned> X86PadMaxPrefixSize( 108 "x86-pad-max-prefix-size", cl::init(0), 109 cl::desc("Maximum number of prefixes to use for padding")); 110 111 cl::opt<bool> X86PadForAlign( 112 "x86-pad-for-align", cl::init(false), cl::Hidden, 113 cl::desc("Pad previous instructions to implement align directives")); 114 115 cl::opt<bool> X86PadForBranchAlign( 116 "x86-pad-for-branch-align", cl::init(true), cl::Hidden, 117 cl::desc("Pad previous instructions to implement branch alignment")); 118 119 class X86AsmBackend : public MCAsmBackend { 120 const MCSubtargetInfo &STI; 121 std::unique_ptr<const MCInstrInfo> MCII; 122 X86AlignBranchKind AlignBranchType; 123 Align AlignBoundary; 124 unsigned TargetPrefixMax = 0; 125 126 MCInst PrevInst; 127 MCBoundaryAlignFragment *PendingBA = nullptr; 128 std::pair<MCFragment *, size_t> PrevInstPosition; 129 bool CanPadInst; 130 131 uint8_t determinePaddingPrefix(const MCInst &Inst) const; 132 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; 133 bool needAlign(const MCInst &Inst) const; 134 bool canPadBranches(MCObjectStreamer &OS) const; 135 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; 136 137 public: 138 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) 139 : MCAsmBackend(support::little), STI(STI), 140 MCII(T.createMCInstrInfo()) { 141 if (X86AlignBranchWithin32BBoundaries) { 142 // At the moment, this defaults to aligning fused branches, unconditional 143 // jumps, and (unfused) conditional jumps with nops. Both the 144 // instructions aligned and the alignment method (nop vs prefix) may 145 // change in the future. 146 AlignBoundary = assumeAligned(32);; 147 AlignBranchType.addKind(X86::AlignBranchFused); 148 AlignBranchType.addKind(X86::AlignBranchJcc); 149 AlignBranchType.addKind(X86::AlignBranchJmp); 150 } 151 // Allow overriding defaults set by main flag 152 if (X86AlignBranchBoundary.getNumOccurrences()) 153 AlignBoundary = assumeAligned(X86AlignBranchBoundary); 154 if (X86AlignBranch.getNumOccurrences()) 155 AlignBranchType = X86AlignBranchKindLoc; 156 if (X86PadMaxPrefixSize.getNumOccurrences()) 157 TargetPrefixMax = X86PadMaxPrefixSize; 158 } 159 160 bool allowAutoPadding() const override; 161 bool allowEnhancedRelaxation() const override; 162 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst, 163 const MCSubtargetInfo &STI) override; 164 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override; 165 166 unsigned getNumFixupKinds() const override { 167 return X86::NumTargetFixupKinds; 168 } 169 170 Optional<MCFixupKind> getFixupKind(StringRef Name) const override; 171 172 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; 173 174 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, 175 const MCValue &Target) override; 176 177 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 178 const MCValue &Target, MutableArrayRef<char> Data, 179 uint64_t Value, bool IsResolved, 180 const MCSubtargetInfo *STI) const override; 181 182 bool mayNeedRelaxation(const MCInst &Inst, 183 const MCSubtargetInfo &STI) const override; 184 185 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, 186 const MCRelaxableFragment *DF, 187 const MCAsmLayout &Layout) const override; 188 189 void relaxInstruction(MCInst &Inst, 190 const MCSubtargetInfo &STI) const override; 191 192 bool padInstructionViaRelaxation(MCRelaxableFragment &RF, 193 MCCodeEmitter &Emitter, 194 unsigned &RemainingSize) const; 195 196 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 197 unsigned &RemainingSize) const; 198 199 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 200 unsigned &RemainingSize) const; 201 202 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; 203 204 unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override; 205 206 bool writeNopData(raw_ostream &OS, uint64_t Count, 207 const MCSubtargetInfo *STI) const override; 208 }; 209 } // end anonymous namespace 210 211 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) { 212 unsigned Op = Inst.getOpcode(); 213 switch (Op) { 214 default: 215 return Op; 216 case X86::JCC_1: 217 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; 218 case X86::JMP_1: 219 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; 220 } 221 } 222 223 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) { 224 unsigned Op = Inst.getOpcode(); 225 switch (Op) { 226 default: 227 return Op; 228 229 // IMUL 230 case X86::IMUL16rri8: return X86::IMUL16rri; 231 case X86::IMUL16rmi8: return X86::IMUL16rmi; 232 case X86::IMUL32rri8: return X86::IMUL32rri; 233 case X86::IMUL32rmi8: return X86::IMUL32rmi; 234 case X86::IMUL64rri8: return X86::IMUL64rri32; 235 case X86::IMUL64rmi8: return X86::IMUL64rmi32; 236 237 // AND 238 case X86::AND16ri8: return X86::AND16ri; 239 case X86::AND16mi8: return X86::AND16mi; 240 case X86::AND32ri8: return X86::AND32ri; 241 case X86::AND32mi8: return X86::AND32mi; 242 case X86::AND64ri8: return X86::AND64ri32; 243 case X86::AND64mi8: return X86::AND64mi32; 244 245 // OR 246 case X86::OR16ri8: return X86::OR16ri; 247 case X86::OR16mi8: return X86::OR16mi; 248 case X86::OR32ri8: return X86::OR32ri; 249 case X86::OR32mi8: return X86::OR32mi; 250 case X86::OR64ri8: return X86::OR64ri32; 251 case X86::OR64mi8: return X86::OR64mi32; 252 253 // XOR 254 case X86::XOR16ri8: return X86::XOR16ri; 255 case X86::XOR16mi8: return X86::XOR16mi; 256 case X86::XOR32ri8: return X86::XOR32ri; 257 case X86::XOR32mi8: return X86::XOR32mi; 258 case X86::XOR64ri8: return X86::XOR64ri32; 259 case X86::XOR64mi8: return X86::XOR64mi32; 260 261 // ADD 262 case X86::ADD16ri8: return X86::ADD16ri; 263 case X86::ADD16mi8: return X86::ADD16mi; 264 case X86::ADD32ri8: return X86::ADD32ri; 265 case X86::ADD32mi8: return X86::ADD32mi; 266 case X86::ADD64ri8: return X86::ADD64ri32; 267 case X86::ADD64mi8: return X86::ADD64mi32; 268 269 // ADC 270 case X86::ADC16ri8: return X86::ADC16ri; 271 case X86::ADC16mi8: return X86::ADC16mi; 272 case X86::ADC32ri8: return X86::ADC32ri; 273 case X86::ADC32mi8: return X86::ADC32mi; 274 case X86::ADC64ri8: return X86::ADC64ri32; 275 case X86::ADC64mi8: return X86::ADC64mi32; 276 277 // SUB 278 case X86::SUB16ri8: return X86::SUB16ri; 279 case X86::SUB16mi8: return X86::SUB16mi; 280 case X86::SUB32ri8: return X86::SUB32ri; 281 case X86::SUB32mi8: return X86::SUB32mi; 282 case X86::SUB64ri8: return X86::SUB64ri32; 283 case X86::SUB64mi8: return X86::SUB64mi32; 284 285 // SBB 286 case X86::SBB16ri8: return X86::SBB16ri; 287 case X86::SBB16mi8: return X86::SBB16mi; 288 case X86::SBB32ri8: return X86::SBB32ri; 289 case X86::SBB32mi8: return X86::SBB32mi; 290 case X86::SBB64ri8: return X86::SBB64ri32; 291 case X86::SBB64mi8: return X86::SBB64mi32; 292 293 // CMP 294 case X86::CMP16ri8: return X86::CMP16ri; 295 case X86::CMP16mi8: return X86::CMP16mi; 296 case X86::CMP32ri8: return X86::CMP32ri; 297 case X86::CMP32mi8: return X86::CMP32mi; 298 case X86::CMP64ri8: return X86::CMP64ri32; 299 case X86::CMP64mi8: return X86::CMP64mi32; 300 301 // PUSH 302 case X86::PUSH32i8: return X86::PUSHi32; 303 case X86::PUSH16i8: return X86::PUSHi16; 304 case X86::PUSH64i8: return X86::PUSH64i32; 305 } 306 } 307 308 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) { 309 unsigned R = getRelaxedOpcodeArith(Inst); 310 if (R != Inst.getOpcode()) 311 return R; 312 return getRelaxedOpcodeBranch(Inst, Is16BitMode); 313 } 314 315 static X86::CondCode getCondFromBranch(const MCInst &MI, 316 const MCInstrInfo &MCII) { 317 unsigned Opcode = MI.getOpcode(); 318 switch (Opcode) { 319 default: 320 return X86::COND_INVALID; 321 case X86::JCC_1: { 322 const MCInstrDesc &Desc = MCII.get(Opcode); 323 return static_cast<X86::CondCode>( 324 MI.getOperand(Desc.getNumOperands() - 1).getImm()); 325 } 326 } 327 } 328 329 static X86::SecondMacroFusionInstKind 330 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { 331 X86::CondCode CC = getCondFromBranch(MI, MCII); 332 return classifySecondCondCodeInMacroFusion(CC); 333 } 334 335 /// Check if the instruction uses RIP relative addressing. 336 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { 337 unsigned Opcode = MI.getOpcode(); 338 const MCInstrDesc &Desc = MCII.get(Opcode); 339 uint64_t TSFlags = Desc.TSFlags; 340 unsigned CurOp = X86II::getOperandBias(Desc); 341 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 342 if (MemoryOperand < 0) 343 return false; 344 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; 345 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); 346 return (BaseReg == X86::RIP); 347 } 348 349 /// Check if the instruction is a prefix. 350 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) { 351 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags); 352 } 353 354 /// Check if the instruction is valid as the first instruction in macro fusion. 355 static bool isFirstMacroFusibleInst(const MCInst &Inst, 356 const MCInstrInfo &MCII) { 357 // An Intel instruction with RIP relative addressing is not macro fusible. 358 if (isRIPRelative(Inst, MCII)) 359 return false; 360 X86::FirstMacroFusionInstKind FIK = 361 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); 362 return FIK != X86::FirstMacroFusionInstKind::Invalid; 363 } 364 365 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to 366 /// get a better peformance in some cases. Here, we determine which prefix is 367 /// the most suitable. 368 /// 369 /// If the instruction has a segment override prefix, use the existing one. 370 /// If the target is 64-bit, use the CS. 371 /// If the target is 32-bit, 372 /// - If the instruction has a ESP/EBP base register, use SS. 373 /// - Otherwise use DS. 374 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { 375 assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) && 376 "Prefixes can be added only in 32-bit or 64-bit mode."); 377 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 378 uint64_t TSFlags = Desc.TSFlags; 379 380 // Determine where the memory operand starts, if present. 381 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 382 if (MemoryOperand != -1) 383 MemoryOperand += X86II::getOperandBias(Desc); 384 385 unsigned SegmentReg = 0; 386 if (MemoryOperand >= 0) { 387 // Check for explicit segment override on memory operand. 388 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); 389 } 390 391 switch (TSFlags & X86II::FormMask) { 392 default: 393 break; 394 case X86II::RawFrmDstSrc: { 395 // Check segment override opcode prefix as needed (not for %ds). 396 if (Inst.getOperand(2).getReg() != X86::DS) 397 SegmentReg = Inst.getOperand(2).getReg(); 398 break; 399 } 400 case X86II::RawFrmSrc: { 401 // Check segment override opcode prefix as needed (not for %ds). 402 if (Inst.getOperand(1).getReg() != X86::DS) 403 SegmentReg = Inst.getOperand(1).getReg(); 404 break; 405 } 406 case X86II::RawFrmMemOffs: { 407 // Check segment override opcode prefix as needed. 408 SegmentReg = Inst.getOperand(1).getReg(); 409 break; 410 } 411 } 412 413 if (SegmentReg != 0) 414 return X86::getSegmentOverridePrefixForReg(SegmentReg); 415 416 if (STI.hasFeature(X86::Mode64Bit)) 417 return X86::CS_Encoding; 418 419 if (MemoryOperand >= 0) { 420 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; 421 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); 422 if (BaseReg == X86::ESP || BaseReg == X86::EBP) 423 return X86::SS_Encoding; 424 } 425 return X86::DS_Encoding; 426 } 427 428 /// Check if the two instructions will be macro-fused on the target cpu. 429 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { 430 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); 431 if (!InstDesc.isConditionalBranch()) 432 return false; 433 if (!isFirstMacroFusibleInst(Cmp, *MCII)) 434 return false; 435 const X86::FirstMacroFusionInstKind CmpKind = 436 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); 437 const X86::SecondMacroFusionInstKind BranchKind = 438 classifySecondInstInMacroFusion(Jcc, *MCII); 439 return X86::isMacroFused(CmpKind, BranchKind); 440 } 441 442 /// Check if the instruction has a variant symbol operand. 443 static bool hasVariantSymbol(const MCInst &MI) { 444 for (auto &Operand : MI) { 445 if (!Operand.isExpr()) 446 continue; 447 const MCExpr &Expr = *Operand.getExpr(); 448 if (Expr.getKind() == MCExpr::SymbolRef && 449 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None) 450 return true; 451 } 452 return false; 453 } 454 455 bool X86AsmBackend::allowAutoPadding() const { 456 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); 457 } 458 459 bool X86AsmBackend::allowEnhancedRelaxation() const { 460 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; 461 } 462 463 /// X86 has certain instructions which enable interrupts exactly one 464 /// instruction *after* the instruction which stores to SS. Return true if the 465 /// given instruction has such an interrupt delay slot. 466 static bool hasInterruptDelaySlot(const MCInst &Inst) { 467 switch (Inst.getOpcode()) { 468 case X86::POPSS16: 469 case X86::POPSS32: 470 case X86::STI: 471 return true; 472 473 case X86::MOV16sr: 474 case X86::MOV32sr: 475 case X86::MOV64sr: 476 case X86::MOV16sm: 477 if (Inst.getOperand(0).getReg() == X86::SS) 478 return true; 479 break; 480 } 481 return false; 482 } 483 484 /// Check if the instruction to be emitted is right after any data. 485 static bool 486 isRightAfterData(MCFragment *CurrentFragment, 487 const std::pair<MCFragment *, size_t> &PrevInstPosition) { 488 MCFragment *F = CurrentFragment; 489 // Empty data fragments may be created to prevent further data being 490 // added into the previous fragment, we need to skip them since they 491 // have no contents. 492 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode()) 493 if (cast<MCDataFragment>(F)->getContents().size() != 0) 494 break; 495 496 // Since data is always emitted into a DataFragment, our check strategy is 497 // simple here. 498 // - If the fragment is a DataFragment 499 // - If it's not the fragment where the previous instruction is, 500 // returns true. 501 // - If it's the fragment holding the previous instruction but its 502 // size changed since the the previous instruction was emitted into 503 // it, returns true. 504 // - Otherwise returns false. 505 // - If the fragment is not a DataFragment, returns false. 506 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) 507 return DF != PrevInstPosition.first || 508 DF->getContents().size() != PrevInstPosition.second; 509 510 return false; 511 } 512 513 /// \returns the fragment size if it has instructions, otherwise returns 0. 514 static size_t getSizeForInstFragment(const MCFragment *F) { 515 if (!F || !F->hasInstructions()) 516 return 0; 517 // MCEncodedFragmentWithContents being templated makes this tricky. 518 switch (F->getKind()) { 519 default: 520 llvm_unreachable("Unknown fragment with instructions!"); 521 case MCFragment::FT_Data: 522 return cast<MCDataFragment>(*F).getContents().size(); 523 case MCFragment::FT_Relaxable: 524 return cast<MCRelaxableFragment>(*F).getContents().size(); 525 case MCFragment::FT_CompactEncodedInst: 526 return cast<MCCompactEncodedInstFragment>(*F).getContents().size(); 527 } 528 } 529 530 /// Return true if we can insert NOP or prefixes automatically before the 531 /// the instruction to be emitted. 532 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { 533 if (hasVariantSymbol(Inst)) 534 // Linker may rewrite the instruction with variant symbol operand(e.g. 535 // TLSCALL). 536 return false; 537 538 if (hasInterruptDelaySlot(PrevInst)) 539 // If this instruction follows an interrupt enabling instruction with a one 540 // instruction delay, inserting a nop would change behavior. 541 return false; 542 543 if (isPrefix(PrevInst, *MCII)) 544 // If this instruction follows a prefix, inserting a nop/prefix would change 545 // semantic. 546 return false; 547 548 if (isPrefix(Inst, *MCII)) 549 // If this instruction is a prefix, inserting a prefix would change 550 // semantic. 551 return false; 552 553 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) 554 // If this instruction follows any data, there is no clear 555 // instruction boundary, inserting a nop/prefix would change semantic. 556 return false; 557 558 return true; 559 } 560 561 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { 562 if (!OS.getAllowAutoPadding()) 563 return false; 564 assert(allowAutoPadding() && "incorrect initialization!"); 565 566 // We only pad in text section. 567 if (!OS.getCurrentSectionOnly()->getKind().isText()) 568 return false; 569 570 // To be Done: Currently don't deal with Bundle cases. 571 if (OS.getAssembler().isBundlingEnabled()) 572 return false; 573 574 // Branches only need to be aligned in 32-bit or 64-bit mode. 575 if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit))) 576 return false; 577 578 return true; 579 } 580 581 /// Check if the instruction operand needs to be aligned. 582 bool X86AsmBackend::needAlign(const MCInst &Inst) const { 583 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 584 return (Desc.isConditionalBranch() && 585 (AlignBranchType & X86::AlignBranchJcc)) || 586 (Desc.isUnconditionalBranch() && 587 (AlignBranchType & X86::AlignBranchJmp)) || 588 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || 589 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || 590 (Desc.isIndirectBranch() && 591 (AlignBranchType & X86::AlignBranchIndirect)); 592 } 593 594 /// Insert BoundaryAlignFragment before instructions to align branches. 595 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, 596 const MCInst &Inst, const MCSubtargetInfo &STI) { 597 CanPadInst = canPadInst(Inst, OS); 598 599 if (!canPadBranches(OS)) 600 return; 601 602 if (!isMacroFused(PrevInst, Inst)) 603 // Macro fusion doesn't happen indeed, clear the pending. 604 PendingBA = nullptr; 605 606 if (!CanPadInst) 607 return; 608 609 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) { 610 // Macro fusion actually happens and there is no other fragment inserted 611 // after the previous instruction. 612 // 613 // Do nothing here since we already inserted a BoudaryAlign fragment when 614 // we met the first instruction in the fused pair and we'll tie them 615 // together in emitInstructionEnd. 616 // 617 // Note: When there is at least one fragment, such as MCAlignFragment, 618 // inserted after the previous instruction, e.g. 619 // 620 // \code 621 // cmp %rax %rcx 622 // .align 16 623 // je .Label0 624 // \ endcode 625 // 626 // We will treat the JCC as a unfused branch although it may be fused 627 // with the CMP. 628 return; 629 } 630 631 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && 632 isFirstMacroFusibleInst(Inst, *MCII))) { 633 // If we meet a unfused branch or the first instuction in a fusiable pair, 634 // insert a BoundaryAlign fragment. 635 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI)); 636 } 637 } 638 639 /// Set the last fragment to be aligned for the BoundaryAlignFragment. 640 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { 641 PrevInst = Inst; 642 MCFragment *CF = OS.getCurrentFragment(); 643 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); 644 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) 645 F->setAllowAutoPadding(CanPadInst); 646 647 if (!canPadBranches(OS)) 648 return; 649 650 if (!needAlign(Inst) || !PendingBA) 651 return; 652 653 // Tie the aligned instructions into a a pending BoundaryAlign. 654 PendingBA->setLastFragment(CF); 655 PendingBA = nullptr; 656 657 // We need to ensure that further data isn't added to the current 658 // DataFragment, so that we can get the size of instructions later in 659 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty 660 // DataFragment. 661 if (isa_and_nonnull<MCDataFragment>(CF)) 662 OS.insert(new MCDataFragment()); 663 664 // Update the maximum alignment on the current section if necessary. 665 MCSection *Sec = OS.getCurrentSectionOnly(); 666 if (AlignBoundary.value() > Sec->getAlignment()) 667 Sec->setAlignment(AlignBoundary); 668 } 669 670 Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { 671 if (STI.getTargetTriple().isOSBinFormatELF()) { 672 unsigned Type; 673 if (STI.getTargetTriple().getArch() == Triple::x86_64) { 674 Type = llvm::StringSwitch<unsigned>(Name) 675 #define ELF_RELOC(X, Y) .Case(#X, Y) 676 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def" 677 #undef ELF_RELOC 678 .Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE) 679 .Case("BFD_RELOC_8", ELF::R_X86_64_8) 680 .Case("BFD_RELOC_16", ELF::R_X86_64_16) 681 .Case("BFD_RELOC_32", ELF::R_X86_64_32) 682 .Case("BFD_RELOC_64", ELF::R_X86_64_64) 683 .Default(-1u); 684 } else { 685 Type = llvm::StringSwitch<unsigned>(Name) 686 #define ELF_RELOC(X, Y) .Case(#X, Y) 687 #include "llvm/BinaryFormat/ELFRelocs/i386.def" 688 #undef ELF_RELOC 689 .Case("BFD_RELOC_NONE", ELF::R_386_NONE) 690 .Case("BFD_RELOC_8", ELF::R_386_8) 691 .Case("BFD_RELOC_16", ELF::R_386_16) 692 .Case("BFD_RELOC_32", ELF::R_386_32) 693 .Default(-1u); 694 } 695 if (Type == -1u) 696 return None; 697 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); 698 } 699 return MCAsmBackend::getFixupKind(Name); 700 } 701 702 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { 703 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { 704 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 705 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 706 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 707 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 708 {"reloc_signed_4byte", 0, 32, 0}, 709 {"reloc_signed_4byte_relax", 0, 32, 0}, 710 {"reloc_global_offset_table", 0, 32, 0}, 711 {"reloc_global_offset_table8", 0, 64, 0}, 712 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 713 }; 714 715 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They 716 // do not require any extra processing. 717 if (Kind >= FirstLiteralRelocationKind) 718 return MCAsmBackend::getFixupKindInfo(FK_NONE); 719 720 if (Kind < FirstTargetFixupKind) 721 return MCAsmBackend::getFixupKindInfo(Kind); 722 723 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && 724 "Invalid kind!"); 725 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!"); 726 return Infos[Kind - FirstTargetFixupKind]; 727 } 728 729 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, 730 const MCFixup &Fixup, 731 const MCValue &) { 732 return Fixup.getKind() >= FirstLiteralRelocationKind; 733 } 734 735 static unsigned getFixupKindSize(unsigned Kind) { 736 switch (Kind) { 737 default: 738 llvm_unreachable("invalid fixup kind!"); 739 case FK_NONE: 740 return 0; 741 case FK_PCRel_1: 742 case FK_SecRel_1: 743 case FK_Data_1: 744 return 1; 745 case FK_PCRel_2: 746 case FK_SecRel_2: 747 case FK_Data_2: 748 return 2; 749 case FK_PCRel_4: 750 case X86::reloc_riprel_4byte: 751 case X86::reloc_riprel_4byte_relax: 752 case X86::reloc_riprel_4byte_relax_rex: 753 case X86::reloc_riprel_4byte_movq_load: 754 case X86::reloc_signed_4byte: 755 case X86::reloc_signed_4byte_relax: 756 case X86::reloc_global_offset_table: 757 case X86::reloc_branch_4byte_pcrel: 758 case FK_SecRel_4: 759 case FK_Data_4: 760 return 4; 761 case FK_PCRel_8: 762 case FK_SecRel_8: 763 case FK_Data_8: 764 case X86::reloc_global_offset_table8: 765 return 8; 766 } 767 } 768 769 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 770 const MCValue &Target, 771 MutableArrayRef<char> Data, 772 uint64_t Value, bool IsResolved, 773 const MCSubtargetInfo *STI) const { 774 unsigned Kind = Fixup.getKind(); 775 if (Kind >= FirstLiteralRelocationKind) 776 return; 777 unsigned Size = getFixupKindSize(Kind); 778 779 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); 780 781 int64_t SignedValue = static_cast<int64_t>(Value); 782 if ((Target.isAbsolute() || IsResolved) && 783 getFixupKindInfo(Fixup.getKind()).Flags & 784 MCFixupKindInfo::FKF_IsPCRel) { 785 // check that PC relative fixup fits into the fixup size. 786 if (Size > 0 && !isIntN(Size * 8, SignedValue)) 787 Asm.getContext().reportError( 788 Fixup.getLoc(), "value of " + Twine(SignedValue) + 789 " is too large for field of " + Twine(Size) + 790 ((Size == 1) ? " byte." : " bytes.")); 791 } else { 792 // Check that uppper bits are either all zeros or all ones. 793 // Specifically ignore overflow/underflow as long as the leakage is 794 // limited to the lower bits. This is to remain compatible with 795 // other assemblers. 796 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && 797 "Value does not fit in the Fixup field"); 798 } 799 800 for (unsigned i = 0; i != Size; ++i) 801 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); 802 } 803 804 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst, 805 const MCSubtargetInfo &STI) const { 806 // Branches can always be relaxed in either mode. 807 if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode()) 808 return true; 809 810 // Check if this instruction is ever relaxable. 811 if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode()) 812 return false; 813 814 815 // Check if the relaxable operand has an expression. For the current set of 816 // relaxable instructions, the relaxable operand is always the last operand. 817 unsigned RelaxableOp = Inst.getNumOperands() - 1; 818 if (Inst.getOperand(RelaxableOp).isExpr()) 819 return true; 820 821 return false; 822 } 823 824 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, 825 uint64_t Value, 826 const MCRelaxableFragment *DF, 827 const MCAsmLayout &Layout) const { 828 // Relax if the value is too big for a (signed) i8. 829 return !isInt<8>(Value); 830 } 831 832 // FIXME: Can tblgen help at all here to verify there aren't other instructions 833 // we can relax? 834 void X86AsmBackend::relaxInstruction(MCInst &Inst, 835 const MCSubtargetInfo &STI) const { 836 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. 837 bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; 838 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); 839 840 if (RelaxedOp == Inst.getOpcode()) { 841 SmallString<256> Tmp; 842 raw_svector_ostream OS(Tmp); 843 Inst.dump_pretty(OS); 844 OS << "\n"; 845 report_fatal_error("unexpected instruction to relax: " + OS.str()); 846 } 847 848 Inst.setOpcode(RelaxedOp); 849 } 850 851 /// Return true if this instruction has been fully relaxed into it's most 852 /// general available form. 853 static bool isFullyRelaxed(const MCRelaxableFragment &RF) { 854 auto &Inst = RF.getInst(); 855 auto &STI = *RF.getSubtargetInfo(); 856 bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; 857 return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode(); 858 } 859 860 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, 861 MCCodeEmitter &Emitter, 862 unsigned &RemainingSize) const { 863 if (!RF.getAllowAutoPadding()) 864 return false; 865 // If the instruction isn't fully relaxed, shifting it around might require a 866 // larger value for one of the fixups then can be encoded. The outer loop 867 // will also catch this before moving to the next instruction, but we need to 868 // prevent padding this single instruction as well. 869 if (!isFullyRelaxed(RF)) 870 return false; 871 872 const unsigned OldSize = RF.getContents().size(); 873 if (OldSize == 15) 874 return false; 875 876 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); 877 const unsigned RemainingPrefixSize = [&]() -> unsigned { 878 SmallString<15> Code; 879 raw_svector_ostream VecOS(Code); 880 Emitter.emitPrefix(RF.getInst(), VecOS, STI); 881 assert(Code.size() < 15 && "The number of prefixes must be less than 15."); 882 883 // TODO: It turns out we need a decent amount of plumbing for the target 884 // specific bits to determine number of prefixes its safe to add. Various 885 // targets (older chips mostly, but also Atom family) encounter decoder 886 // stalls with too many prefixes. For testing purposes, we set the value 887 // externally for the moment. 888 unsigned ExistingPrefixSize = Code.size(); 889 if (TargetPrefixMax <= ExistingPrefixSize) 890 return 0; 891 return TargetPrefixMax - ExistingPrefixSize; 892 }(); 893 const unsigned PrefixBytesToAdd = 894 std::min(MaxPossiblePad, RemainingPrefixSize); 895 if (PrefixBytesToAdd == 0) 896 return false; 897 898 const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); 899 900 SmallString<256> Code; 901 Code.append(PrefixBytesToAdd, Prefix); 902 Code.append(RF.getContents().begin(), RF.getContents().end()); 903 RF.getContents() = Code; 904 905 // Adjust the fixups for the change in offsets 906 for (auto &F : RF.getFixups()) { 907 F.setOffset(F.getOffset() + PrefixBytesToAdd); 908 } 909 910 RemainingSize -= PrefixBytesToAdd; 911 return true; 912 } 913 914 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, 915 MCCodeEmitter &Emitter, 916 unsigned &RemainingSize) const { 917 if (isFullyRelaxed(RF)) 918 // TODO: There are lots of other tricks we could apply for increasing 919 // encoding size without impacting performance. 920 return false; 921 922 MCInst Relaxed = RF.getInst(); 923 relaxInstruction(Relaxed, *RF.getSubtargetInfo()); 924 925 SmallVector<MCFixup, 4> Fixups; 926 SmallString<15> Code; 927 raw_svector_ostream VecOS(Code); 928 Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo()); 929 const unsigned OldSize = RF.getContents().size(); 930 const unsigned NewSize = Code.size(); 931 assert(NewSize >= OldSize && "size decrease during relaxation?"); 932 unsigned Delta = NewSize - OldSize; 933 if (Delta > RemainingSize) 934 return false; 935 RF.setInst(Relaxed); 936 RF.getContents() = Code; 937 RF.getFixups() = Fixups; 938 RemainingSize -= Delta; 939 return true; 940 } 941 942 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, 943 MCCodeEmitter &Emitter, 944 unsigned &RemainingSize) const { 945 bool Changed = false; 946 if (RemainingSize != 0) 947 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); 948 if (RemainingSize != 0) 949 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); 950 return Changed; 951 } 952 953 void X86AsmBackend::finishLayout(MCAssembler const &Asm, 954 MCAsmLayout &Layout) const { 955 // See if we can further relax some instructions to cut down on the number of 956 // nop bytes required for code alignment. The actual win is in reducing 957 // instruction count, not number of bytes. Modern X86-64 can easily end up 958 // decode limited. It is often better to reduce the number of instructions 959 // (i.e. eliminate nops) even at the cost of increasing the size and 960 // complexity of others. 961 if (!X86PadForAlign && !X86PadForBranchAlign) 962 return; 963 964 // The processed regions are delimitered by LabeledFragments. -g may have more 965 // MCSymbols and therefore different relaxation results. X86PadForAlign is 966 // disabled by default to eliminate the -g vs non -g difference. 967 DenseSet<MCFragment *> LabeledFragments; 968 for (const MCSymbol &S : Asm.symbols()) 969 LabeledFragments.insert(S.getFragment(false)); 970 971 for (MCSection &Sec : Asm) { 972 if (!Sec.getKind().isText()) 973 continue; 974 975 SmallVector<MCRelaxableFragment *, 4> Relaxable; 976 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { 977 MCFragment &F = *I; 978 979 if (LabeledFragments.count(&F)) 980 Relaxable.clear(); 981 982 if (F.getKind() == MCFragment::FT_Data || 983 F.getKind() == MCFragment::FT_CompactEncodedInst) 984 // Skip and ignore 985 continue; 986 987 if (F.getKind() == MCFragment::FT_Relaxable) { 988 auto &RF = cast<MCRelaxableFragment>(*I); 989 Relaxable.push_back(&RF); 990 continue; 991 } 992 993 auto canHandle = [](MCFragment &F) -> bool { 994 switch (F.getKind()) { 995 default: 996 return false; 997 case MCFragment::FT_Align: 998 return X86PadForAlign; 999 case MCFragment::FT_BoundaryAlign: 1000 return X86PadForBranchAlign; 1001 } 1002 }; 1003 // For any unhandled kind, assume we can't change layout. 1004 if (!canHandle(F)) { 1005 Relaxable.clear(); 1006 continue; 1007 } 1008 1009 #ifndef NDEBUG 1010 const uint64_t OrigOffset = Layout.getFragmentOffset(&F); 1011 #endif 1012 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F); 1013 1014 // To keep the effects local, prefer to relax instructions closest to 1015 // the align directive. This is purely about human understandability 1016 // of the resulting code. If we later find a reason to expand 1017 // particular instructions over others, we can adjust. 1018 MCFragment *FirstChangedFragment = nullptr; 1019 unsigned RemainingSize = OrigSize; 1020 while (!Relaxable.empty() && RemainingSize != 0) { 1021 auto &RF = *Relaxable.pop_back_val(); 1022 // Give the backend a chance to play any tricks it wishes to increase 1023 // the encoding size of the given instruction. Target independent code 1024 // will try further relaxation, but target's may play further tricks. 1025 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) 1026 FirstChangedFragment = &RF; 1027 1028 // If we have an instruction which hasn't been fully relaxed, we can't 1029 // skip past it and insert bytes before it. Changing its starting 1030 // offset might require a larger negative offset than it can encode. 1031 // We don't need to worry about larger positive offsets as none of the 1032 // possible offsets between this and our align are visible, and the 1033 // ones afterwards aren't changing. 1034 if (!isFullyRelaxed(RF)) 1035 break; 1036 } 1037 Relaxable.clear(); 1038 1039 if (FirstChangedFragment) { 1040 // Make sure the offsets for any fragments in the effected range get 1041 // updated. Note that this (conservatively) invalidates the offsets of 1042 // those following, but this is not required. 1043 Layout.invalidateFragmentsFrom(FirstChangedFragment); 1044 } 1045 1046 // BoundaryAlign explicitly tracks it's size (unlike align) 1047 if (F.getKind() == MCFragment::FT_BoundaryAlign) 1048 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); 1049 1050 #ifndef NDEBUG 1051 const uint64_t FinalOffset = Layout.getFragmentOffset(&F); 1052 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F); 1053 assert(OrigOffset + OrigSize == FinalOffset + FinalSize && 1054 "can't move start of next fragment!"); 1055 assert(FinalSize == RemainingSize && "inconsistent size computation?"); 1056 #endif 1057 1058 // If we're looking at a boundary align, make sure we don't try to pad 1059 // its target instructions for some following directive. Doing so would 1060 // break the alignment of the current boundary align. 1061 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { 1062 const MCFragment *LastFragment = BF->getLastFragment(); 1063 if (!LastFragment) 1064 continue; 1065 while (&*I != LastFragment) 1066 ++I; 1067 } 1068 } 1069 } 1070 1071 // The layout is done. Mark every fragment as valid. 1072 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { 1073 MCSection &Section = *Layout.getSectionOrder()[i]; 1074 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); 1075 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); 1076 } 1077 } 1078 1079 unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const { 1080 if (STI.hasFeature(X86::Mode16Bit)) 1081 return 4; 1082 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) 1083 return 1; 1084 if (STI.getFeatureBits()[X86::TuningFast7ByteNOP]) 1085 return 7; 1086 if (STI.getFeatureBits()[X86::TuningFast15ByteNOP]) 1087 return 15; 1088 if (STI.getFeatureBits()[X86::TuningFast11ByteNOP]) 1089 return 11; 1090 // FIXME: handle 32-bit mode 1091 // 15-bytes is the longest single NOP instruction, but 10-bytes is 1092 // commonly the longest that can be efficiently decoded. 1093 return 10; 1094 } 1095 1096 /// Write a sequence of optimal nops to the output, covering \p Count 1097 /// bytes. 1098 /// \return - true on success, false on failure 1099 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, 1100 const MCSubtargetInfo *STI) const { 1101 static const char Nops32Bit[10][11] = { 1102 // nop 1103 "\x90", 1104 // xchg %ax,%ax 1105 "\x66\x90", 1106 // nopl (%[re]ax) 1107 "\x0f\x1f\x00", 1108 // nopl 0(%[re]ax) 1109 "\x0f\x1f\x40\x00", 1110 // nopl 0(%[re]ax,%[re]ax,1) 1111 "\x0f\x1f\x44\x00\x00", 1112 // nopw 0(%[re]ax,%[re]ax,1) 1113 "\x66\x0f\x1f\x44\x00\x00", 1114 // nopl 0L(%[re]ax) 1115 "\x0f\x1f\x80\x00\x00\x00\x00", 1116 // nopl 0L(%[re]ax,%[re]ax,1) 1117 "\x0f\x1f\x84\x00\x00\x00\x00\x00", 1118 // nopw 0L(%[re]ax,%[re]ax,1) 1119 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", 1120 // nopw %cs:0L(%[re]ax,%[re]ax,1) 1121 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00", 1122 }; 1123 1124 // 16-bit mode uses different nop patterns than 32-bit. 1125 static const char Nops16Bit[4][11] = { 1126 // nop 1127 "\x90", 1128 // xchg %eax,%eax 1129 "\x66\x90", 1130 // lea 0(%si),%si 1131 "\x8d\x74\x00", 1132 // lea 0w(%si),%si 1133 "\x8d\xb4\x00\x00", 1134 }; 1135 1136 const char(*Nops)[11] = 1137 STI->getFeatureBits()[X86::Mode16Bit] ? Nops16Bit : Nops32Bit; 1138 1139 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI); 1140 1141 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining 1142 // length. 1143 do { 1144 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); 1145 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; 1146 for (uint8_t i = 0; i < Prefixes; i++) 1147 OS << '\x66'; 1148 const uint8_t Rest = ThisNopLength - Prefixes; 1149 if (Rest != 0) 1150 OS.write(Nops[Rest - 1], Rest); 1151 Count -= ThisNopLength; 1152 } while (Count != 0); 1153 1154 return true; 1155 } 1156 1157 /* *** */ 1158 1159 namespace { 1160 1161 class ELFX86AsmBackend : public X86AsmBackend { 1162 public: 1163 uint8_t OSABI; 1164 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) 1165 : X86AsmBackend(T, STI), OSABI(OSABI) {} 1166 }; 1167 1168 class ELFX86_32AsmBackend : public ELFX86AsmBackend { 1169 public: 1170 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, 1171 const MCSubtargetInfo &STI) 1172 : ELFX86AsmBackend(T, OSABI, STI) {} 1173 1174 std::unique_ptr<MCObjectTargetWriter> 1175 createObjectTargetWriter() const override { 1176 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386); 1177 } 1178 }; 1179 1180 class ELFX86_X32AsmBackend : public ELFX86AsmBackend { 1181 public: 1182 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, 1183 const MCSubtargetInfo &STI) 1184 : ELFX86AsmBackend(T, OSABI, STI) {} 1185 1186 std::unique_ptr<MCObjectTargetWriter> 1187 createObjectTargetWriter() const override { 1188 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1189 ELF::EM_X86_64); 1190 } 1191 }; 1192 1193 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { 1194 public: 1195 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, 1196 const MCSubtargetInfo &STI) 1197 : ELFX86AsmBackend(T, OSABI, STI) {} 1198 1199 std::unique_ptr<MCObjectTargetWriter> 1200 createObjectTargetWriter() const override { 1201 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1202 ELF::EM_IAMCU); 1203 } 1204 }; 1205 1206 class ELFX86_64AsmBackend : public ELFX86AsmBackend { 1207 public: 1208 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, 1209 const MCSubtargetInfo &STI) 1210 : ELFX86AsmBackend(T, OSABI, STI) {} 1211 1212 std::unique_ptr<MCObjectTargetWriter> 1213 createObjectTargetWriter() const override { 1214 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64); 1215 } 1216 }; 1217 1218 class WindowsX86AsmBackend : public X86AsmBackend { 1219 bool Is64Bit; 1220 1221 public: 1222 WindowsX86AsmBackend(const Target &T, bool is64Bit, 1223 const MCSubtargetInfo &STI) 1224 : X86AsmBackend(T, STI) 1225 , Is64Bit(is64Bit) { 1226 } 1227 1228 Optional<MCFixupKind> getFixupKind(StringRef Name) const override { 1229 return StringSwitch<Optional<MCFixupKind>>(Name) 1230 .Case("dir32", FK_Data_4) 1231 .Case("secrel32", FK_SecRel_4) 1232 .Case("secidx", FK_SecRel_2) 1233 .Default(MCAsmBackend::getFixupKind(Name)); 1234 } 1235 1236 std::unique_ptr<MCObjectTargetWriter> 1237 createObjectTargetWriter() const override { 1238 return createX86WinCOFFObjectWriter(Is64Bit); 1239 } 1240 }; 1241 1242 namespace CU { 1243 1244 /// Compact unwind encoding values. 1245 enum CompactUnwindEncodings { 1246 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after 1247 /// the return address, then [RE]SP is moved to [RE]BP. 1248 UNWIND_MODE_BP_FRAME = 0x01000000, 1249 1250 /// A frameless function with a small constant stack size. 1251 UNWIND_MODE_STACK_IMMD = 0x02000000, 1252 1253 /// A frameless function with a large constant stack size. 1254 UNWIND_MODE_STACK_IND = 0x03000000, 1255 1256 /// No compact unwind encoding is available. 1257 UNWIND_MODE_DWARF = 0x04000000, 1258 1259 /// Mask for encoding the frame registers. 1260 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, 1261 1262 /// Mask for encoding the frameless registers. 1263 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF 1264 }; 1265 1266 } // namespace CU 1267 1268 class DarwinX86AsmBackend : public X86AsmBackend { 1269 const MCRegisterInfo &MRI; 1270 1271 /// Number of registers that can be saved in a compact unwind encoding. 1272 enum { CU_NUM_SAVED_REGS = 6 }; 1273 1274 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; 1275 Triple TT; 1276 bool Is64Bit; 1277 1278 unsigned OffsetSize; ///< Offset of a "push" instruction. 1279 unsigned MoveInstrSize; ///< Size of a "move" instruction. 1280 unsigned StackDivide; ///< Amount to adjust stack size by. 1281 protected: 1282 /// Size of a "push" instruction for the given register. 1283 unsigned PushInstrSize(unsigned Reg) const { 1284 switch (Reg) { 1285 case X86::EBX: 1286 case X86::ECX: 1287 case X86::EDX: 1288 case X86::EDI: 1289 case X86::ESI: 1290 case X86::EBP: 1291 case X86::RBX: 1292 case X86::RBP: 1293 return 1; 1294 case X86::R12: 1295 case X86::R13: 1296 case X86::R14: 1297 case X86::R15: 1298 return 2; 1299 } 1300 return 1; 1301 } 1302 1303 private: 1304 /// Get the compact unwind number for a given register. The number 1305 /// corresponds to the enum lists in compact_unwind_encoding.h. 1306 int getCompactUnwindRegNum(unsigned Reg) const { 1307 static const MCPhysReg CU32BitRegs[7] = { 1308 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 1309 }; 1310 static const MCPhysReg CU64BitRegs[] = { 1311 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 1312 }; 1313 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; 1314 for (int Idx = 1; *CURegs; ++CURegs, ++Idx) 1315 if (*CURegs == Reg) 1316 return Idx; 1317 1318 return -1; 1319 } 1320 1321 /// Return the registers encoded for a compact encoding with a frame 1322 /// pointer. 1323 uint32_t encodeCompactUnwindRegistersWithFrame() const { 1324 // Encode the registers in the order they were saved --- 3-bits per 1325 // register. The list of saved registers is assumed to be in reverse 1326 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. 1327 uint32_t RegEnc = 0; 1328 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { 1329 unsigned Reg = SavedRegs[i]; 1330 if (Reg == 0) break; 1331 1332 int CURegNum = getCompactUnwindRegNum(Reg); 1333 if (CURegNum == -1) return ~0U; 1334 1335 // Encode the 3-bit register number in order, skipping over 3-bits for 1336 // each register. 1337 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); 1338 } 1339 1340 assert((RegEnc & 0x3FFFF) == RegEnc && 1341 "Invalid compact register encoding!"); 1342 return RegEnc; 1343 } 1344 1345 /// Create the permutation encoding used with frameless stacks. It is 1346 /// passed the number of registers to be saved and an array of the registers 1347 /// saved. 1348 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { 1349 // The saved registers are numbered from 1 to 6. In order to encode the 1350 // order in which they were saved, we re-number them according to their 1351 // place in the register order. The re-numbering is relative to the last 1352 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in 1353 // that order: 1354 // 1355 // Orig Re-Num 1356 // ---- ------ 1357 // 6 6 1358 // 2 2 1359 // 4 3 1360 // 5 3 1361 // 1362 for (unsigned i = 0; i < RegCount; ++i) { 1363 int CUReg = getCompactUnwindRegNum(SavedRegs[i]); 1364 if (CUReg == -1) return ~0U; 1365 SavedRegs[i] = CUReg; 1366 } 1367 1368 // Reverse the list. 1369 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); 1370 1371 uint32_t RenumRegs[CU_NUM_SAVED_REGS]; 1372 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ 1373 unsigned Countless = 0; 1374 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) 1375 if (SavedRegs[j] < SavedRegs[i]) 1376 ++Countless; 1377 1378 RenumRegs[i] = SavedRegs[i] - Countless - 1; 1379 } 1380 1381 // Take the renumbered values and encode them into a 10-bit number. 1382 uint32_t permutationEncoding = 0; 1383 switch (RegCount) { 1384 case 6: 1385 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 1386 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 1387 + RenumRegs[4]; 1388 break; 1389 case 5: 1390 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 1391 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 1392 + RenumRegs[5]; 1393 break; 1394 case 4: 1395 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 1396 + 3 * RenumRegs[4] + RenumRegs[5]; 1397 break; 1398 case 3: 1399 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 1400 + RenumRegs[5]; 1401 break; 1402 case 2: 1403 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 1404 break; 1405 case 1: 1406 permutationEncoding |= RenumRegs[5]; 1407 break; 1408 } 1409 1410 assert((permutationEncoding & 0x3FF) == permutationEncoding && 1411 "Invalid compact register encoding!"); 1412 return permutationEncoding; 1413 } 1414 1415 public: 1416 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, 1417 const MCSubtargetInfo &STI) 1418 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), 1419 Is64Bit(TT.isArch64Bit()) { 1420 memset(SavedRegs, 0, sizeof(SavedRegs)); 1421 OffsetSize = Is64Bit ? 8 : 4; 1422 MoveInstrSize = Is64Bit ? 3 : 2; 1423 StackDivide = Is64Bit ? 8 : 4; 1424 } 1425 1426 std::unique_ptr<MCObjectTargetWriter> 1427 createObjectTargetWriter() const override { 1428 uint32_t CPUType = cantFail(MachO::getCPUType(TT)); 1429 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); 1430 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); 1431 } 1432 1433 /// Implementation of algorithm to generate the compact unwind encoding 1434 /// for the CFI instructions. 1435 uint32_t 1436 generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override { 1437 if (Instrs.empty()) return 0; 1438 1439 // Reset the saved registers. 1440 unsigned SavedRegIdx = 0; 1441 memset(SavedRegs, 0, sizeof(SavedRegs)); 1442 1443 bool HasFP = false; 1444 1445 // Encode that we are using EBP/RBP as the frame pointer. 1446 uint32_t CompactUnwindEncoding = 0; 1447 1448 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2; 1449 unsigned InstrOffset = 0; 1450 unsigned StackAdjust = 0; 1451 unsigned StackSize = 0; 1452 unsigned NumDefCFAOffsets = 0; 1453 int MinAbsOffset = std::numeric_limits<int>::max(); 1454 1455 for (const MCCFIInstruction &Inst : Instrs) { 1456 switch (Inst.getOperation()) { 1457 default: 1458 // Any other CFI directives indicate a frame that we aren't prepared 1459 // to represent via compact unwind, so just bail out. 1460 return 0; 1461 case MCCFIInstruction::OpDefCfaRegister: { 1462 // Defines a frame pointer. E.g. 1463 // 1464 // movq %rsp, %rbp 1465 // L0: 1466 // .cfi_def_cfa_register %rbp 1467 // 1468 HasFP = true; 1469 1470 // If the frame pointer is other than esp/rsp, we do not have a way to 1471 // generate a compact unwinding representation, so bail out. 1472 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != 1473 (Is64Bit ? X86::RBP : X86::EBP)) 1474 return 0; 1475 1476 // Reset the counts. 1477 memset(SavedRegs, 0, sizeof(SavedRegs)); 1478 StackAdjust = 0; 1479 SavedRegIdx = 0; 1480 MinAbsOffset = std::numeric_limits<int>::max(); 1481 InstrOffset += MoveInstrSize; 1482 break; 1483 } 1484 case MCCFIInstruction::OpDefCfaOffset: { 1485 // Defines a new offset for the CFA. E.g. 1486 // 1487 // With frame: 1488 // 1489 // pushq %rbp 1490 // L0: 1491 // .cfi_def_cfa_offset 16 1492 // 1493 // Without frame: 1494 // 1495 // subq $72, %rsp 1496 // L0: 1497 // .cfi_def_cfa_offset 80 1498 // 1499 StackSize = Inst.getOffset() / StackDivide; 1500 ++NumDefCFAOffsets; 1501 break; 1502 } 1503 case MCCFIInstruction::OpOffset: { 1504 // Defines a "push" of a callee-saved register. E.g. 1505 // 1506 // pushq %r15 1507 // pushq %r14 1508 // pushq %rbx 1509 // L0: 1510 // subq $120, %rsp 1511 // L1: 1512 // .cfi_offset %rbx, -40 1513 // .cfi_offset %r14, -32 1514 // .cfi_offset %r15, -24 1515 // 1516 if (SavedRegIdx == CU_NUM_SAVED_REGS) 1517 // If there are too many saved registers, we cannot use a compact 1518 // unwind encoding. 1519 return CU::UNWIND_MODE_DWARF; 1520 1521 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); 1522 SavedRegs[SavedRegIdx++] = Reg; 1523 StackAdjust += OffsetSize; 1524 MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset())); 1525 InstrOffset += PushInstrSize(Reg); 1526 break; 1527 } 1528 } 1529 } 1530 1531 StackAdjust /= StackDivide; 1532 1533 if (HasFP) { 1534 if ((StackAdjust & 0xFF) != StackAdjust) 1535 // Offset was too big for a compact unwind encoding. 1536 return CU::UNWIND_MODE_DWARF; 1537 1538 // We don't attempt to track a real StackAdjust, so if the saved registers 1539 // aren't adjacent to rbp we can't cope. 1540 if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize) 1541 return CU::UNWIND_MODE_DWARF; 1542 1543 // Get the encoding of the saved registers when we have a frame pointer. 1544 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); 1545 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1546 1547 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; 1548 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; 1549 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; 1550 } else { 1551 SubtractInstrIdx += InstrOffset; 1552 ++StackAdjust; 1553 1554 if ((StackSize & 0xFF) == StackSize) { 1555 // Frameless stack with a small stack size. 1556 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; 1557 1558 // Encode the stack size. 1559 CompactUnwindEncoding |= (StackSize & 0xFF) << 16; 1560 } else { 1561 if ((StackAdjust & 0x7) != StackAdjust) 1562 // The extra stack adjustments are too big for us to handle. 1563 return CU::UNWIND_MODE_DWARF; 1564 1565 // Frameless stack with an offset too large for us to encode compactly. 1566 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; 1567 1568 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 1569 // instruction. 1570 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 1571 1572 // Encode any extra stack adjustments (done via push instructions). 1573 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; 1574 } 1575 1576 // Encode the number of registers saved. (Reverse the list first.) 1577 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]); 1578 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; 1579 1580 // Get the encoding of the saved registers when we don't have a frame 1581 // pointer. 1582 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx); 1583 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1584 1585 // Encode the register encoding. 1586 CompactUnwindEncoding |= 1587 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; 1588 } 1589 1590 return CompactUnwindEncoding; 1591 } 1592 }; 1593 1594 } // end anonymous namespace 1595 1596 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, 1597 const MCSubtargetInfo &STI, 1598 const MCRegisterInfo &MRI, 1599 const MCTargetOptions &Options) { 1600 const Triple &TheTriple = STI.getTargetTriple(); 1601 if (TheTriple.isOSBinFormatMachO()) 1602 return new DarwinX86AsmBackend(T, MRI, STI); 1603 1604 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1605 return new WindowsX86AsmBackend(T, false, STI); 1606 1607 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1608 1609 if (TheTriple.isOSIAMCU()) 1610 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); 1611 1612 return new ELFX86_32AsmBackend(T, OSABI, STI); 1613 } 1614 1615 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, 1616 const MCSubtargetInfo &STI, 1617 const MCRegisterInfo &MRI, 1618 const MCTargetOptions &Options) { 1619 const Triple &TheTriple = STI.getTargetTriple(); 1620 if (TheTriple.isOSBinFormatMachO()) 1621 return new DarwinX86AsmBackend(T, MRI, STI); 1622 1623 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1624 return new WindowsX86AsmBackend(T, true, STI); 1625 1626 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1627 1628 if (TheTriple.isX32()) 1629 return new ELFX86_X32AsmBackend(T, OSABI, STI); 1630 return new ELFX86_64AsmBackend(T, OSABI, STI); 1631 } 1632