1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86FixupKinds.h" 11 #include "llvm/ADT/StringSwitch.h" 12 #include "llvm/BinaryFormat/ELF.h" 13 #include "llvm/BinaryFormat/MachO.h" 14 #include "llvm/MC/MCAsmBackend.h" 15 #include "llvm/MC/MCAsmLayout.h" 16 #include "llvm/MC/MCAssembler.h" 17 #include "llvm/MC/MCCodeEmitter.h" 18 #include "llvm/MC/MCContext.h" 19 #include "llvm/MC/MCDwarf.h" 20 #include "llvm/MC/MCELFObjectWriter.h" 21 #include "llvm/MC/MCExpr.h" 22 #include "llvm/MC/MCFixupKindInfo.h" 23 #include "llvm/MC/MCInst.h" 24 #include "llvm/MC/MCInstrInfo.h" 25 #include "llvm/MC/MCMachObjectWriter.h" 26 #include "llvm/MC/MCObjectStreamer.h" 27 #include "llvm/MC/MCObjectWriter.h" 28 #include "llvm/MC/MCRegisterInfo.h" 29 #include "llvm/MC/MCSectionMachO.h" 30 #include "llvm/MC/MCSubtargetInfo.h" 31 #include "llvm/MC/MCValue.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/TargetRegistry.h" 35 #include "llvm/Support/raw_ostream.h" 36 37 using namespace llvm; 38 39 namespace { 40 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind 41 class X86AlignBranchKind { 42 private: 43 uint8_t AlignBranchKind = 0; 44 45 public: 46 void operator=(const std::string &Val) { 47 if (Val.empty()) 48 return; 49 SmallVector<StringRef, 6> BranchTypes; 50 StringRef(Val).split(BranchTypes, '+', -1, false); 51 for (auto BranchType : BranchTypes) { 52 if (BranchType == "fused") 53 addKind(X86::AlignBranchFused); 54 else if (BranchType == "jcc") 55 addKind(X86::AlignBranchJcc); 56 else if (BranchType == "jmp") 57 addKind(X86::AlignBranchJmp); 58 else if (BranchType == "call") 59 addKind(X86::AlignBranchCall); 60 else if (BranchType == "ret") 61 addKind(X86::AlignBranchRet); 62 else if (BranchType == "indirect") 63 addKind(X86::AlignBranchIndirect); 64 else { 65 errs() << "invalid argument " << BranchType.str() 66 << " to -x86-align-branch=; each element must be one of: fused, " 67 "jcc, jmp, call, ret, indirect.(plus separated)\n"; 68 } 69 } 70 } 71 72 operator uint8_t() const { return AlignBranchKind; } 73 void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; } 74 }; 75 76 X86AlignBranchKind X86AlignBranchKindLoc; 77 78 cl::opt<unsigned> X86AlignBranchBoundary( 79 "x86-align-branch-boundary", cl::init(0), 80 cl::desc( 81 "Control how the assembler should align branches with NOP. If the " 82 "boundary's size is not 0, it should be a power of 2 and no less " 83 "than 32. Branches will be aligned to prevent from being across or " 84 "against the boundary of specified size. The default value 0 does not " 85 "align branches.")); 86 87 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch( 88 "x86-align-branch", 89 cl::desc( 90 "Specify types of branches to align (plus separated list of types):" 91 "\njcc indicates conditional jumps" 92 "\nfused indicates fused conditional jumps" 93 "\njmp indicates direct unconditional jumps" 94 "\ncall indicates direct and indirect calls" 95 "\nret indicates rets" 96 "\nindirect indicates indirect unconditional jumps"), 97 cl::location(X86AlignBranchKindLoc)); 98 99 cl::opt<bool> X86AlignBranchWithin32BBoundaries( 100 "x86-branches-within-32B-boundaries", cl::init(false), 101 cl::desc( 102 "Align selected instructions to mitigate negative performance impact " 103 "of Intel's micro code update for errata skx102. May break " 104 "assumptions about labels corresponding to particular instructions, " 105 "and should be used with caution.")); 106 107 cl::opt<unsigned> X86PadMaxPrefixSize( 108 "x86-pad-max-prefix-size", cl::init(0), 109 cl::desc("Maximum number of prefixes to use for padding")); 110 111 cl::opt<bool> X86PadForAlign( 112 "x86-pad-for-align", cl::init(false), cl::Hidden, 113 cl::desc("Pad previous instructions to implement align directives")); 114 115 cl::opt<bool> X86PadForBranchAlign( 116 "x86-pad-for-branch-align", cl::init(true), cl::Hidden, 117 cl::desc("Pad previous instructions to implement branch alignment")); 118 119 class X86ELFObjectWriter : public MCELFObjectTargetWriter { 120 public: 121 X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine, 122 bool HasRelocationAddend, bool foobar) 123 : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {} 124 }; 125 126 class X86AsmBackend : public MCAsmBackend { 127 const MCSubtargetInfo &STI; 128 std::unique_ptr<const MCInstrInfo> MCII; 129 X86AlignBranchKind AlignBranchType; 130 Align AlignBoundary; 131 unsigned TargetPrefixMax = 0; 132 133 MCInst PrevInst; 134 MCBoundaryAlignFragment *PendingBA = nullptr; 135 std::pair<MCFragment *, size_t> PrevInstPosition; 136 bool CanPadInst; 137 138 uint8_t determinePaddingPrefix(const MCInst &Inst) const; 139 bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const; 140 bool needAlign(const MCInst &Inst) const; 141 bool canPadBranches(MCObjectStreamer &OS) const; 142 bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const; 143 144 public: 145 X86AsmBackend(const Target &T, const MCSubtargetInfo &STI) 146 : MCAsmBackend(support::little), STI(STI), 147 MCII(T.createMCInstrInfo()) { 148 if (X86AlignBranchWithin32BBoundaries) { 149 // At the moment, this defaults to aligning fused branches, unconditional 150 // jumps, and (unfused) conditional jumps with nops. Both the 151 // instructions aligned and the alignment method (nop vs prefix) may 152 // change in the future. 153 AlignBoundary = assumeAligned(32);; 154 AlignBranchType.addKind(X86::AlignBranchFused); 155 AlignBranchType.addKind(X86::AlignBranchJcc); 156 AlignBranchType.addKind(X86::AlignBranchJmp); 157 } 158 // Allow overriding defaults set by master flag 159 if (X86AlignBranchBoundary.getNumOccurrences()) 160 AlignBoundary = assumeAligned(X86AlignBranchBoundary); 161 if (X86AlignBranch.getNumOccurrences()) 162 AlignBranchType = X86AlignBranchKindLoc; 163 if (X86PadMaxPrefixSize.getNumOccurrences()) 164 TargetPrefixMax = X86PadMaxPrefixSize; 165 } 166 167 bool allowAutoPadding() const override; 168 bool allowEnhancedRelaxation() const override; 169 void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override; 170 void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override; 171 172 unsigned getNumFixupKinds() const override { 173 return X86::NumTargetFixupKinds; 174 } 175 176 Optional<MCFixupKind> getFixupKind(StringRef Name) const override; 177 178 const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; 179 180 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, 181 const MCValue &Target) override; 182 183 void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 184 const MCValue &Target, MutableArrayRef<char> Data, 185 uint64_t Value, bool IsResolved, 186 const MCSubtargetInfo *STI) const override; 187 188 bool mayNeedRelaxation(const MCInst &Inst, 189 const MCSubtargetInfo &STI) const override; 190 191 bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, 192 const MCRelaxableFragment *DF, 193 const MCAsmLayout &Layout) const override; 194 195 void relaxInstruction(MCInst &Inst, 196 const MCSubtargetInfo &STI) const override; 197 198 bool padInstructionViaRelaxation(MCRelaxableFragment &RF, 199 MCCodeEmitter &Emitter, 200 unsigned &RemainingSize) const; 201 202 bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 203 unsigned &RemainingSize) const; 204 205 bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter, 206 unsigned &RemainingSize) const; 207 208 void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override; 209 210 unsigned getMaximumNopSize() const override; 211 212 bool writeNopData(raw_ostream &OS, uint64_t Count) const override; 213 }; 214 } // end anonymous namespace 215 216 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) { 217 unsigned Op = Inst.getOpcode(); 218 switch (Op) { 219 default: 220 return Op; 221 case X86::JCC_1: 222 return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4; 223 case X86::JMP_1: 224 return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4; 225 } 226 } 227 228 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) { 229 unsigned Op = Inst.getOpcode(); 230 switch (Op) { 231 default: 232 return Op; 233 234 // IMUL 235 case X86::IMUL16rri8: return X86::IMUL16rri; 236 case X86::IMUL16rmi8: return X86::IMUL16rmi; 237 case X86::IMUL32rri8: return X86::IMUL32rri; 238 case X86::IMUL32rmi8: return X86::IMUL32rmi; 239 case X86::IMUL64rri8: return X86::IMUL64rri32; 240 case X86::IMUL64rmi8: return X86::IMUL64rmi32; 241 242 // AND 243 case X86::AND16ri8: return X86::AND16ri; 244 case X86::AND16mi8: return X86::AND16mi; 245 case X86::AND32ri8: return X86::AND32ri; 246 case X86::AND32mi8: return X86::AND32mi; 247 case X86::AND64ri8: return X86::AND64ri32; 248 case X86::AND64mi8: return X86::AND64mi32; 249 250 // OR 251 case X86::OR16ri8: return X86::OR16ri; 252 case X86::OR16mi8: return X86::OR16mi; 253 case X86::OR32ri8: return X86::OR32ri; 254 case X86::OR32mi8: return X86::OR32mi; 255 case X86::OR64ri8: return X86::OR64ri32; 256 case X86::OR64mi8: return X86::OR64mi32; 257 258 // XOR 259 case X86::XOR16ri8: return X86::XOR16ri; 260 case X86::XOR16mi8: return X86::XOR16mi; 261 case X86::XOR32ri8: return X86::XOR32ri; 262 case X86::XOR32mi8: return X86::XOR32mi; 263 case X86::XOR64ri8: return X86::XOR64ri32; 264 case X86::XOR64mi8: return X86::XOR64mi32; 265 266 // ADD 267 case X86::ADD16ri8: return X86::ADD16ri; 268 case X86::ADD16mi8: return X86::ADD16mi; 269 case X86::ADD32ri8: return X86::ADD32ri; 270 case X86::ADD32mi8: return X86::ADD32mi; 271 case X86::ADD64ri8: return X86::ADD64ri32; 272 case X86::ADD64mi8: return X86::ADD64mi32; 273 274 // ADC 275 case X86::ADC16ri8: return X86::ADC16ri; 276 case X86::ADC16mi8: return X86::ADC16mi; 277 case X86::ADC32ri8: return X86::ADC32ri; 278 case X86::ADC32mi8: return X86::ADC32mi; 279 case X86::ADC64ri8: return X86::ADC64ri32; 280 case X86::ADC64mi8: return X86::ADC64mi32; 281 282 // SUB 283 case X86::SUB16ri8: return X86::SUB16ri; 284 case X86::SUB16mi8: return X86::SUB16mi; 285 case X86::SUB32ri8: return X86::SUB32ri; 286 case X86::SUB32mi8: return X86::SUB32mi; 287 case X86::SUB64ri8: return X86::SUB64ri32; 288 case X86::SUB64mi8: return X86::SUB64mi32; 289 290 // SBB 291 case X86::SBB16ri8: return X86::SBB16ri; 292 case X86::SBB16mi8: return X86::SBB16mi; 293 case X86::SBB32ri8: return X86::SBB32ri; 294 case X86::SBB32mi8: return X86::SBB32mi; 295 case X86::SBB64ri8: return X86::SBB64ri32; 296 case X86::SBB64mi8: return X86::SBB64mi32; 297 298 // CMP 299 case X86::CMP16ri8: return X86::CMP16ri; 300 case X86::CMP16mi8: return X86::CMP16mi; 301 case X86::CMP32ri8: return X86::CMP32ri; 302 case X86::CMP32mi8: return X86::CMP32mi; 303 case X86::CMP64ri8: return X86::CMP64ri32; 304 case X86::CMP64mi8: return X86::CMP64mi32; 305 306 // PUSH 307 case X86::PUSH32i8: return X86::PUSHi32; 308 case X86::PUSH16i8: return X86::PUSHi16; 309 case X86::PUSH64i8: return X86::PUSH64i32; 310 } 311 } 312 313 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) { 314 unsigned R = getRelaxedOpcodeArith(Inst); 315 if (R != Inst.getOpcode()) 316 return R; 317 return getRelaxedOpcodeBranch(Inst, Is16BitMode); 318 } 319 320 static X86::CondCode getCondFromBranch(const MCInst &MI, 321 const MCInstrInfo &MCII) { 322 unsigned Opcode = MI.getOpcode(); 323 switch (Opcode) { 324 default: 325 return X86::COND_INVALID; 326 case X86::JCC_1: { 327 const MCInstrDesc &Desc = MCII.get(Opcode); 328 return static_cast<X86::CondCode>( 329 MI.getOperand(Desc.getNumOperands() - 1).getImm()); 330 } 331 } 332 } 333 334 static X86::SecondMacroFusionInstKind 335 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) { 336 X86::CondCode CC = getCondFromBranch(MI, MCII); 337 return classifySecondCondCodeInMacroFusion(CC); 338 } 339 340 /// Check if the instruction uses RIP relative addressing. 341 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) { 342 unsigned Opcode = MI.getOpcode(); 343 const MCInstrDesc &Desc = MCII.get(Opcode); 344 uint64_t TSFlags = Desc.TSFlags; 345 unsigned CurOp = X86II::getOperandBias(Desc); 346 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 347 if (MemoryOperand < 0) 348 return false; 349 unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg; 350 unsigned BaseReg = MI.getOperand(BaseRegNum).getReg(); 351 return (BaseReg == X86::RIP); 352 } 353 354 /// Check if the instruction is a prefix. 355 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) { 356 return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags); 357 } 358 359 /// Check if the instruction is valid as the first instruction in macro fusion. 360 static bool isFirstMacroFusibleInst(const MCInst &Inst, 361 const MCInstrInfo &MCII) { 362 // An Intel instruction with RIP relative addressing is not macro fusible. 363 if (isRIPRelative(Inst, MCII)) 364 return false; 365 X86::FirstMacroFusionInstKind FIK = 366 X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode()); 367 return FIK != X86::FirstMacroFusionInstKind::Invalid; 368 } 369 370 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to 371 /// get a better peformance in some cases. Here, we determine which prefix is 372 /// the most suitable. 373 /// 374 /// If the instruction has a segment override prefix, use the existing one. 375 /// If the target is 64-bit, use the CS. 376 /// If the target is 32-bit, 377 /// - If the instruction has a ESP/EBP base register, use SS. 378 /// - Otherwise use DS. 379 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const { 380 assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) && 381 "Prefixes can be added only in 32-bit or 64-bit mode."); 382 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 383 uint64_t TSFlags = Desc.TSFlags; 384 385 // Determine where the memory operand starts, if present. 386 int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); 387 if (MemoryOperand != -1) 388 MemoryOperand += X86II::getOperandBias(Desc); 389 390 unsigned SegmentReg = 0; 391 if (MemoryOperand >= 0) { 392 // Check for explicit segment override on memory operand. 393 SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg(); 394 } 395 396 switch (TSFlags & X86II::FormMask) { 397 default: 398 break; 399 case X86II::RawFrmDstSrc: { 400 // Check segment override opcode prefix as needed (not for %ds). 401 if (Inst.getOperand(2).getReg() != X86::DS) 402 SegmentReg = Inst.getOperand(2).getReg(); 403 break; 404 } 405 case X86II::RawFrmSrc: { 406 // Check segment override opcode prefix as needed (not for %ds). 407 if (Inst.getOperand(1).getReg() != X86::DS) 408 SegmentReg = Inst.getOperand(1).getReg(); 409 break; 410 } 411 case X86II::RawFrmMemOffs: { 412 // Check segment override opcode prefix as needed. 413 SegmentReg = Inst.getOperand(1).getReg(); 414 break; 415 } 416 } 417 418 if (SegmentReg != 0) 419 return X86::getSegmentOverridePrefixForReg(SegmentReg); 420 421 if (STI.hasFeature(X86::Mode64Bit)) 422 return X86::CS_Encoding; 423 424 if (MemoryOperand >= 0) { 425 unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg; 426 unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg(); 427 if (BaseReg == X86::ESP || BaseReg == X86::EBP) 428 return X86::SS_Encoding; 429 } 430 return X86::DS_Encoding; 431 } 432 433 /// Check if the two instructions will be macro-fused on the target cpu. 434 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const { 435 const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode()); 436 if (!InstDesc.isConditionalBranch()) 437 return false; 438 if (!isFirstMacroFusibleInst(Cmp, *MCII)) 439 return false; 440 const X86::FirstMacroFusionInstKind CmpKind = 441 X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode()); 442 const X86::SecondMacroFusionInstKind BranchKind = 443 classifySecondInstInMacroFusion(Jcc, *MCII); 444 return X86::isMacroFused(CmpKind, BranchKind); 445 } 446 447 /// Check if the instruction has a variant symbol operand. 448 static bool hasVariantSymbol(const MCInst &MI) { 449 for (auto &Operand : MI) { 450 if (!Operand.isExpr()) 451 continue; 452 const MCExpr &Expr = *Operand.getExpr(); 453 if (Expr.getKind() == MCExpr::SymbolRef && 454 cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None) 455 return true; 456 } 457 return false; 458 } 459 460 bool X86AsmBackend::allowAutoPadding() const { 461 return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone); 462 } 463 464 bool X86AsmBackend::allowEnhancedRelaxation() const { 465 return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign; 466 } 467 468 /// X86 has certain instructions which enable interrupts exactly one 469 /// instruction *after* the instruction which stores to SS. Return true if the 470 /// given instruction has such an interrupt delay slot. 471 static bool hasInterruptDelaySlot(const MCInst &Inst) { 472 switch (Inst.getOpcode()) { 473 case X86::POPSS16: 474 case X86::POPSS32: 475 case X86::STI: 476 return true; 477 478 case X86::MOV16sr: 479 case X86::MOV32sr: 480 case X86::MOV64sr: 481 case X86::MOV16sm: 482 if (Inst.getOperand(0).getReg() == X86::SS) 483 return true; 484 break; 485 } 486 return false; 487 } 488 489 /// Check if the instruction to be emitted is right after any data. 490 static bool 491 isRightAfterData(MCFragment *CurrentFragment, 492 const std::pair<MCFragment *, size_t> &PrevInstPosition) { 493 MCFragment *F = CurrentFragment; 494 // Empty data fragments may be created to prevent further data being 495 // added into the previous fragment, we need to skip them since they 496 // have no contents. 497 for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode()) 498 if (cast<MCDataFragment>(F)->getContents().size() != 0) 499 break; 500 501 // Since data is always emitted into a DataFragment, our check strategy is 502 // simple here. 503 // - If the fragment is a DataFragment 504 // - If it's not the fragment where the previous instruction is, 505 // returns true. 506 // - If it's the fragment holding the previous instruction but its 507 // size changed since the the previous instruction was emitted into 508 // it, returns true. 509 // - Otherwise returns false. 510 // - If the fragment is not a DataFragment, returns false. 511 if (auto *DF = dyn_cast_or_null<MCDataFragment>(F)) 512 return DF != PrevInstPosition.first || 513 DF->getContents().size() != PrevInstPosition.second; 514 515 return false; 516 } 517 518 /// \returns the fragment size if it has instructions, otherwise returns 0. 519 static size_t getSizeForInstFragment(const MCFragment *F) { 520 if (!F || !F->hasInstructions()) 521 return 0; 522 // MCEncodedFragmentWithContents being templated makes this tricky. 523 switch (F->getKind()) { 524 default: 525 llvm_unreachable("Unknown fragment with instructions!"); 526 case MCFragment::FT_Data: 527 return cast<MCDataFragment>(*F).getContents().size(); 528 case MCFragment::FT_Relaxable: 529 return cast<MCRelaxableFragment>(*F).getContents().size(); 530 case MCFragment::FT_CompactEncodedInst: 531 return cast<MCCompactEncodedInstFragment>(*F).getContents().size(); 532 } 533 } 534 535 /// Return true if we can insert NOP or prefixes automatically before the 536 /// the instruction to be emitted. 537 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const { 538 if (hasVariantSymbol(Inst)) 539 // Linker may rewrite the instruction with variant symbol operand(e.g. 540 // TLSCALL). 541 return false; 542 543 if (hasInterruptDelaySlot(PrevInst)) 544 // If this instruction follows an interrupt enabling instruction with a one 545 // instruction delay, inserting a nop would change behavior. 546 return false; 547 548 if (isPrefix(PrevInst, *MCII)) 549 // If this instruction follows a prefix, inserting a nop/prefix would change 550 // semantic. 551 return false; 552 553 if (isPrefix(Inst, *MCII)) 554 // If this instruction is a prefix, inserting a prefix would change 555 // semantic. 556 return false; 557 558 if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition)) 559 // If this instruction follows any data, there is no clear 560 // instruction boundary, inserting a nop/prefix would change semantic. 561 return false; 562 563 return true; 564 } 565 566 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const { 567 if (!OS.getAllowAutoPadding()) 568 return false; 569 assert(allowAutoPadding() && "incorrect initialization!"); 570 571 // We only pad in text section. 572 if (!OS.getCurrentSectionOnly()->getKind().isText()) 573 return false; 574 575 // To be Done: Currently don't deal with Bundle cases. 576 if (OS.getAssembler().isBundlingEnabled()) 577 return false; 578 579 // Branches only need to be aligned in 32-bit or 64-bit mode. 580 if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit))) 581 return false; 582 583 return true; 584 } 585 586 /// Check if the instruction operand needs to be aligned. 587 bool X86AsmBackend::needAlign(const MCInst &Inst) const { 588 const MCInstrDesc &Desc = MCII->get(Inst.getOpcode()); 589 return (Desc.isConditionalBranch() && 590 (AlignBranchType & X86::AlignBranchJcc)) || 591 (Desc.isUnconditionalBranch() && 592 (AlignBranchType & X86::AlignBranchJmp)) || 593 (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) || 594 (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) || 595 (Desc.isIndirectBranch() && 596 (AlignBranchType & X86::AlignBranchIndirect)); 597 } 598 599 /// Insert BoundaryAlignFragment before instructions to align branches. 600 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, 601 const MCInst &Inst) { 602 CanPadInst = canPadInst(Inst, OS); 603 604 if (!canPadBranches(OS)) 605 return; 606 607 if (!isMacroFused(PrevInst, Inst)) 608 // Macro fusion doesn't happen indeed, clear the pending. 609 PendingBA = nullptr; 610 611 if (!CanPadInst) 612 return; 613 614 if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) { 615 // Macro fusion actually happens and there is no other fragment inserted 616 // after the previous instruction. 617 // 618 // Do nothing here since we already inserted a BoudaryAlign fragment when 619 // we met the first instruction in the fused pair and we'll tie them 620 // together in emitInstructionEnd. 621 // 622 // Note: When there is at least one fragment, such as MCAlignFragment, 623 // inserted after the previous instruction, e.g. 624 // 625 // \code 626 // cmp %rax %rcx 627 // .align 16 628 // je .Label0 629 // \ endcode 630 // 631 // We will treat the JCC as a unfused branch although it may be fused 632 // with the CMP. 633 return; 634 } 635 636 if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) && 637 isFirstMacroFusibleInst(Inst, *MCII))) { 638 // If we meet a unfused branch or the first instuction in a fusiable pair, 639 // insert a BoundaryAlign fragment. 640 OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary)); 641 } 642 } 643 644 /// Set the last fragment to be aligned for the BoundaryAlignFragment. 645 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) { 646 PrevInst = Inst; 647 MCFragment *CF = OS.getCurrentFragment(); 648 PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF)); 649 if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF)) 650 F->setAllowAutoPadding(CanPadInst); 651 652 if (!canPadBranches(OS)) 653 return; 654 655 if (!needAlign(Inst) || !PendingBA) 656 return; 657 658 // Tie the aligned instructions into a a pending BoundaryAlign. 659 PendingBA->setLastFragment(CF); 660 PendingBA = nullptr; 661 662 // We need to ensure that further data isn't added to the current 663 // DataFragment, so that we can get the size of instructions later in 664 // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty 665 // DataFragment. 666 if (isa_and_nonnull<MCDataFragment>(CF)) 667 OS.insert(new MCDataFragment()); 668 669 // Update the maximum alignment on the current section if necessary. 670 MCSection *Sec = OS.getCurrentSectionOnly(); 671 if (AlignBoundary.value() > Sec->getAlignment()) 672 Sec->setAlignment(AlignBoundary); 673 } 674 675 Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const { 676 if (STI.getTargetTriple().isOSBinFormatELF()) { 677 unsigned Type; 678 if (STI.getTargetTriple().getArch() == Triple::x86_64) { 679 Type = llvm::StringSwitch<unsigned>(Name) 680 #define ELF_RELOC(X, Y) .Case(#X, Y) 681 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def" 682 #undef ELF_RELOC 683 .Default(-1u); 684 } else { 685 Type = llvm::StringSwitch<unsigned>(Name) 686 #define ELF_RELOC(X, Y) .Case(#X, Y) 687 #include "llvm/BinaryFormat/ELFRelocs/i386.def" 688 #undef ELF_RELOC 689 .Default(-1u); 690 } 691 if (Type == -1u) 692 return None; 693 return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type); 694 } 695 return MCAsmBackend::getFixupKind(Name); 696 } 697 698 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const { 699 const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = { 700 {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 701 {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 702 {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 703 {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 704 {"reloc_signed_4byte", 0, 32, 0}, 705 {"reloc_signed_4byte_relax", 0, 32, 0}, 706 {"reloc_global_offset_table", 0, 32, 0}, 707 {"reloc_global_offset_table8", 0, 64, 0}, 708 {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, 709 }; 710 711 // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They 712 // do not require any extra processing. 713 if (Kind >= FirstLiteralRelocationKind) 714 return MCAsmBackend::getFixupKindInfo(FK_NONE); 715 716 if (Kind < FirstTargetFixupKind) 717 return MCAsmBackend::getFixupKindInfo(Kind); 718 719 assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && 720 "Invalid kind!"); 721 assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!"); 722 return Infos[Kind - FirstTargetFixupKind]; 723 } 724 725 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &, 726 const MCFixup &Fixup, 727 const MCValue &) { 728 return Fixup.getKind() >= FirstLiteralRelocationKind; 729 } 730 731 static unsigned getFixupKindSize(unsigned Kind) { 732 switch (Kind) { 733 default: 734 llvm_unreachable("invalid fixup kind!"); 735 case FK_NONE: 736 return 0; 737 case FK_PCRel_1: 738 case FK_SecRel_1: 739 case FK_Data_1: 740 return 1; 741 case FK_PCRel_2: 742 case FK_SecRel_2: 743 case FK_Data_2: 744 return 2; 745 case FK_PCRel_4: 746 case X86::reloc_riprel_4byte: 747 case X86::reloc_riprel_4byte_relax: 748 case X86::reloc_riprel_4byte_relax_rex: 749 case X86::reloc_riprel_4byte_movq_load: 750 case X86::reloc_signed_4byte: 751 case X86::reloc_signed_4byte_relax: 752 case X86::reloc_global_offset_table: 753 case X86::reloc_branch_4byte_pcrel: 754 case FK_SecRel_4: 755 case FK_Data_4: 756 return 4; 757 case FK_PCRel_8: 758 case FK_SecRel_8: 759 case FK_Data_8: 760 case X86::reloc_global_offset_table8: 761 return 8; 762 } 763 } 764 765 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, 766 const MCValue &Target, 767 MutableArrayRef<char> Data, 768 uint64_t Value, bool IsResolved, 769 const MCSubtargetInfo *STI) const { 770 unsigned Kind = Fixup.getKind(); 771 if (Kind >= FirstLiteralRelocationKind) 772 return; 773 unsigned Size = getFixupKindSize(Kind); 774 775 assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); 776 777 int64_t SignedValue = static_cast<int64_t>(Value); 778 if ((Target.isAbsolute() || IsResolved) && 779 getFixupKindInfo(Fixup.getKind()).Flags & 780 MCFixupKindInfo::FKF_IsPCRel) { 781 // check that PC relative fixup fits into the fixup size. 782 if (Size > 0 && !isIntN(Size * 8, SignedValue)) 783 Asm.getContext().reportError( 784 Fixup.getLoc(), "value of " + Twine(SignedValue) + 785 " is too large for field of " + Twine(Size) + 786 ((Size == 1) ? " byte." : " bytes.")); 787 } else { 788 // Check that uppper bits are either all zeros or all ones. 789 // Specifically ignore overflow/underflow as long as the leakage is 790 // limited to the lower bits. This is to remain compatible with 791 // other assemblers. 792 assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) && 793 "Value does not fit in the Fixup field"); 794 } 795 796 for (unsigned i = 0; i != Size; ++i) 797 Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); 798 } 799 800 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst, 801 const MCSubtargetInfo &STI) const { 802 // Branches can always be relaxed in either mode. 803 if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode()) 804 return true; 805 806 // Check if this instruction is ever relaxable. 807 if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode()) 808 return false; 809 810 811 // Check if the relaxable operand has an expression. For the current set of 812 // relaxable instructions, the relaxable operand is always the last operand. 813 unsigned RelaxableOp = Inst.getNumOperands() - 1; 814 if (Inst.getOperand(RelaxableOp).isExpr()) 815 return true; 816 817 return false; 818 } 819 820 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, 821 uint64_t Value, 822 const MCRelaxableFragment *DF, 823 const MCAsmLayout &Layout) const { 824 // Relax if the value is too big for a (signed) i8. 825 return !isInt<8>(Value); 826 } 827 828 // FIXME: Can tblgen help at all here to verify there aren't other instructions 829 // we can relax? 830 void X86AsmBackend::relaxInstruction(MCInst &Inst, 831 const MCSubtargetInfo &STI) const { 832 // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel. 833 bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; 834 unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode); 835 836 if (RelaxedOp == Inst.getOpcode()) { 837 SmallString<256> Tmp; 838 raw_svector_ostream OS(Tmp); 839 Inst.dump_pretty(OS); 840 OS << "\n"; 841 report_fatal_error("unexpected instruction to relax: " + OS.str()); 842 } 843 844 Inst.setOpcode(RelaxedOp); 845 } 846 847 /// Return true if this instruction has been fully relaxed into it's most 848 /// general available form. 849 static bool isFullyRelaxed(const MCRelaxableFragment &RF) { 850 auto &Inst = RF.getInst(); 851 auto &STI = *RF.getSubtargetInfo(); 852 bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit]; 853 return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode(); 854 } 855 856 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF, 857 MCCodeEmitter &Emitter, 858 unsigned &RemainingSize) const { 859 if (!RF.getAllowAutoPadding()) 860 return false; 861 // If the instruction isn't fully relaxed, shifting it around might require a 862 // larger value for one of the fixups then can be encoded. The outer loop 863 // will also catch this before moving to the next instruction, but we need to 864 // prevent padding this single instruction as well. 865 if (!isFullyRelaxed(RF)) 866 return false; 867 868 const unsigned OldSize = RF.getContents().size(); 869 if (OldSize == 15) 870 return false; 871 872 const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize); 873 const unsigned RemainingPrefixSize = [&]() -> unsigned { 874 SmallString<15> Code; 875 raw_svector_ostream VecOS(Code); 876 Emitter.emitPrefix(RF.getInst(), VecOS, STI); 877 assert(Code.size() < 15 && "The number of prefixes must be less than 15."); 878 879 // TODO: It turns out we need a decent amount of plumbing for the target 880 // specific bits to determine number of prefixes its safe to add. Various 881 // targets (older chips mostly, but also Atom family) encounter decoder 882 // stalls with too many prefixes. For testing purposes, we set the value 883 // externally for the moment. 884 unsigned ExistingPrefixSize = Code.size(); 885 if (TargetPrefixMax <= ExistingPrefixSize) 886 return 0; 887 return TargetPrefixMax - ExistingPrefixSize; 888 }(); 889 const unsigned PrefixBytesToAdd = 890 std::min(MaxPossiblePad, RemainingPrefixSize); 891 if (PrefixBytesToAdd == 0) 892 return false; 893 894 const uint8_t Prefix = determinePaddingPrefix(RF.getInst()); 895 896 SmallString<256> Code; 897 Code.append(PrefixBytesToAdd, Prefix); 898 Code.append(RF.getContents().begin(), RF.getContents().end()); 899 RF.getContents() = Code; 900 901 // Adjust the fixups for the change in offsets 902 for (auto &F : RF.getFixups()) { 903 F.setOffset(F.getOffset() + PrefixBytesToAdd); 904 } 905 906 RemainingSize -= PrefixBytesToAdd; 907 return true; 908 } 909 910 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF, 911 MCCodeEmitter &Emitter, 912 unsigned &RemainingSize) const { 913 if (isFullyRelaxed(RF)) 914 // TODO: There are lots of other tricks we could apply for increasing 915 // encoding size without impacting performance. 916 return false; 917 918 MCInst Relaxed = RF.getInst(); 919 relaxInstruction(Relaxed, *RF.getSubtargetInfo()); 920 921 SmallVector<MCFixup, 4> Fixups; 922 SmallString<15> Code; 923 raw_svector_ostream VecOS(Code); 924 Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo()); 925 const unsigned OldSize = RF.getContents().size(); 926 const unsigned NewSize = Code.size(); 927 assert(NewSize >= OldSize && "size decrease during relaxation?"); 928 unsigned Delta = NewSize - OldSize; 929 if (Delta > RemainingSize) 930 return false; 931 RF.setInst(Relaxed); 932 RF.getContents() = Code; 933 RF.getFixups() = Fixups; 934 RemainingSize -= Delta; 935 return true; 936 } 937 938 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF, 939 MCCodeEmitter &Emitter, 940 unsigned &RemainingSize) const { 941 bool Changed = false; 942 if (RemainingSize != 0) 943 Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize); 944 if (RemainingSize != 0) 945 Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize); 946 return Changed; 947 } 948 949 void X86AsmBackend::finishLayout(MCAssembler const &Asm, 950 MCAsmLayout &Layout) const { 951 // See if we can further relax some instructions to cut down on the number of 952 // nop bytes required for code alignment. The actual win is in reducing 953 // instruction count, not number of bytes. Modern X86-64 can easily end up 954 // decode limited. It is often better to reduce the number of instructions 955 // (i.e. eliminate nops) even at the cost of increasing the size and 956 // complexity of others. 957 if (!X86PadForAlign && !X86PadForBranchAlign) 958 return; 959 960 // The processed regions are delimitered by LabeledFragments. -g may have more 961 // MCSymbols and therefore different relaxation results. X86PadForAlign is 962 // disabled by default to eliminate the -g vs non -g difference. 963 DenseSet<MCFragment *> LabeledFragments; 964 for (const MCSymbol &S : Asm.symbols()) 965 LabeledFragments.insert(S.getFragment(false)); 966 967 for (MCSection &Sec : Asm) { 968 if (!Sec.getKind().isText()) 969 continue; 970 971 SmallVector<MCRelaxableFragment *, 4> Relaxable; 972 for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) { 973 MCFragment &F = *I; 974 975 if (LabeledFragments.count(&F)) 976 Relaxable.clear(); 977 978 if (F.getKind() == MCFragment::FT_Data || 979 F.getKind() == MCFragment::FT_CompactEncodedInst) 980 // Skip and ignore 981 continue; 982 983 if (F.getKind() == MCFragment::FT_Relaxable) { 984 auto &RF = cast<MCRelaxableFragment>(*I); 985 Relaxable.push_back(&RF); 986 continue; 987 } 988 989 auto canHandle = [](MCFragment &F) -> bool { 990 switch (F.getKind()) { 991 default: 992 return false; 993 case MCFragment::FT_Align: 994 return X86PadForAlign; 995 case MCFragment::FT_BoundaryAlign: 996 return X86PadForBranchAlign; 997 } 998 }; 999 // For any unhandled kind, assume we can't change layout. 1000 if (!canHandle(F)) { 1001 Relaxable.clear(); 1002 continue; 1003 } 1004 1005 #ifndef NDEBUG 1006 const uint64_t OrigOffset = Layout.getFragmentOffset(&F); 1007 #endif 1008 const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F); 1009 1010 // To keep the effects local, prefer to relax instructions closest to 1011 // the align directive. This is purely about human understandability 1012 // of the resulting code. If we later find a reason to expand 1013 // particular instructions over others, we can adjust. 1014 MCFragment *FirstChangedFragment = nullptr; 1015 unsigned RemainingSize = OrigSize; 1016 while (!Relaxable.empty() && RemainingSize != 0) { 1017 auto &RF = *Relaxable.pop_back_val(); 1018 // Give the backend a chance to play any tricks it wishes to increase 1019 // the encoding size of the given instruction. Target independent code 1020 // will try further relaxation, but target's may play further tricks. 1021 if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize)) 1022 FirstChangedFragment = &RF; 1023 1024 // If we have an instruction which hasn't been fully relaxed, we can't 1025 // skip past it and insert bytes before it. Changing its starting 1026 // offset might require a larger negative offset than it can encode. 1027 // We don't need to worry about larger positive offsets as none of the 1028 // possible offsets between this and our align are visible, and the 1029 // ones afterwards aren't changing. 1030 if (!isFullyRelaxed(RF)) 1031 break; 1032 } 1033 Relaxable.clear(); 1034 1035 if (FirstChangedFragment) { 1036 // Make sure the offsets for any fragments in the effected range get 1037 // updated. Note that this (conservatively) invalidates the offsets of 1038 // those following, but this is not required. 1039 Layout.invalidateFragmentsFrom(FirstChangedFragment); 1040 } 1041 1042 // BoundaryAlign explicitly tracks it's size (unlike align) 1043 if (F.getKind() == MCFragment::FT_BoundaryAlign) 1044 cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize); 1045 1046 #ifndef NDEBUG 1047 const uint64_t FinalOffset = Layout.getFragmentOffset(&F); 1048 const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F); 1049 assert(OrigOffset + OrigSize == FinalOffset + FinalSize && 1050 "can't move start of next fragment!"); 1051 assert(FinalSize == RemainingSize && "inconsistent size computation?"); 1052 #endif 1053 1054 // If we're looking at a boundary align, make sure we don't try to pad 1055 // its target instructions for some following directive. Doing so would 1056 // break the alignment of the current boundary align. 1057 if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) { 1058 const MCFragment *LastFragment = BF->getLastFragment(); 1059 if (!LastFragment) 1060 continue; 1061 while (&*I != LastFragment) 1062 ++I; 1063 } 1064 } 1065 } 1066 1067 // The layout is done. Mark every fragment as valid. 1068 for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) { 1069 MCSection &Section = *Layout.getSectionOrder()[i]; 1070 Layout.getFragmentOffset(&*Section.getFragmentList().rbegin()); 1071 Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin()); 1072 } 1073 } 1074 1075 unsigned X86AsmBackend::getMaximumNopSize() const { 1076 if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) 1077 return 1; 1078 if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP]) 1079 return 7; 1080 if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP]) 1081 return 15; 1082 if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP]) 1083 return 11; 1084 // FIXME: handle 32-bit mode 1085 // 15-bytes is the longest single NOP instruction, but 10-bytes is 1086 // commonly the longest that can be efficiently decoded. 1087 return 10; 1088 } 1089 1090 /// Write a sequence of optimal nops to the output, covering \p Count 1091 /// bytes. 1092 /// \return - true on success, false on failure 1093 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const { 1094 static const char Nops[10][11] = { 1095 // nop 1096 "\x90", 1097 // xchg %ax,%ax 1098 "\x66\x90", 1099 // nopl (%[re]ax) 1100 "\x0f\x1f\x00", 1101 // nopl 0(%[re]ax) 1102 "\x0f\x1f\x40\x00", 1103 // nopl 0(%[re]ax,%[re]ax,1) 1104 "\x0f\x1f\x44\x00\x00", 1105 // nopw 0(%[re]ax,%[re]ax,1) 1106 "\x66\x0f\x1f\x44\x00\x00", 1107 // nopl 0L(%[re]ax) 1108 "\x0f\x1f\x80\x00\x00\x00\x00", 1109 // nopl 0L(%[re]ax,%[re]ax,1) 1110 "\x0f\x1f\x84\x00\x00\x00\x00\x00", 1111 // nopw 0L(%[re]ax,%[re]ax,1) 1112 "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", 1113 // nopw %cs:0L(%[re]ax,%[re]ax,1) 1114 "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00", 1115 }; 1116 1117 uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(); 1118 1119 // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining 1120 // length. 1121 do { 1122 const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); 1123 const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; 1124 for (uint8_t i = 0; i < Prefixes; i++) 1125 OS << '\x66'; 1126 const uint8_t Rest = ThisNopLength - Prefixes; 1127 if (Rest != 0) 1128 OS.write(Nops[Rest - 1], Rest); 1129 Count -= ThisNopLength; 1130 } while (Count != 0); 1131 1132 return true; 1133 } 1134 1135 /* *** */ 1136 1137 namespace { 1138 1139 class ELFX86AsmBackend : public X86AsmBackend { 1140 public: 1141 uint8_t OSABI; 1142 ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI) 1143 : X86AsmBackend(T, STI), OSABI(OSABI) {} 1144 }; 1145 1146 class ELFX86_32AsmBackend : public ELFX86AsmBackend { 1147 public: 1148 ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, 1149 const MCSubtargetInfo &STI) 1150 : ELFX86AsmBackend(T, OSABI, STI) {} 1151 1152 std::unique_ptr<MCObjectTargetWriter> 1153 createObjectTargetWriter() const override { 1154 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386); 1155 } 1156 }; 1157 1158 class ELFX86_X32AsmBackend : public ELFX86AsmBackend { 1159 public: 1160 ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, 1161 const MCSubtargetInfo &STI) 1162 : ELFX86AsmBackend(T, OSABI, STI) {} 1163 1164 std::unique_ptr<MCObjectTargetWriter> 1165 createObjectTargetWriter() const override { 1166 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1167 ELF::EM_X86_64); 1168 } 1169 }; 1170 1171 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { 1172 public: 1173 ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, 1174 const MCSubtargetInfo &STI) 1175 : ELFX86AsmBackend(T, OSABI, STI) {} 1176 1177 std::unique_ptr<MCObjectTargetWriter> 1178 createObjectTargetWriter() const override { 1179 return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, 1180 ELF::EM_IAMCU); 1181 } 1182 }; 1183 1184 class ELFX86_64AsmBackend : public ELFX86AsmBackend { 1185 public: 1186 ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, 1187 const MCSubtargetInfo &STI) 1188 : ELFX86AsmBackend(T, OSABI, STI) {} 1189 1190 std::unique_ptr<MCObjectTargetWriter> 1191 createObjectTargetWriter() const override { 1192 return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64); 1193 } 1194 }; 1195 1196 class WindowsX86AsmBackend : public X86AsmBackend { 1197 bool Is64Bit; 1198 1199 public: 1200 WindowsX86AsmBackend(const Target &T, bool is64Bit, 1201 const MCSubtargetInfo &STI) 1202 : X86AsmBackend(T, STI) 1203 , Is64Bit(is64Bit) { 1204 } 1205 1206 Optional<MCFixupKind> getFixupKind(StringRef Name) const override { 1207 return StringSwitch<Optional<MCFixupKind>>(Name) 1208 .Case("dir32", FK_Data_4) 1209 .Case("secrel32", FK_SecRel_4) 1210 .Case("secidx", FK_SecRel_2) 1211 .Default(MCAsmBackend::getFixupKind(Name)); 1212 } 1213 1214 std::unique_ptr<MCObjectTargetWriter> 1215 createObjectTargetWriter() const override { 1216 return createX86WinCOFFObjectWriter(Is64Bit); 1217 } 1218 }; 1219 1220 namespace CU { 1221 1222 /// Compact unwind encoding values. 1223 enum CompactUnwindEncodings { 1224 /// [RE]BP based frame where [RE]BP is pused on the stack immediately after 1225 /// the return address, then [RE]SP is moved to [RE]BP. 1226 UNWIND_MODE_BP_FRAME = 0x01000000, 1227 1228 /// A frameless function with a small constant stack size. 1229 UNWIND_MODE_STACK_IMMD = 0x02000000, 1230 1231 /// A frameless function with a large constant stack size. 1232 UNWIND_MODE_STACK_IND = 0x03000000, 1233 1234 /// No compact unwind encoding is available. 1235 UNWIND_MODE_DWARF = 0x04000000, 1236 1237 /// Mask for encoding the frame registers. 1238 UNWIND_BP_FRAME_REGISTERS = 0x00007FFF, 1239 1240 /// Mask for encoding the frameless registers. 1241 UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF 1242 }; 1243 1244 } // namespace CU 1245 1246 class DarwinX86AsmBackend : public X86AsmBackend { 1247 const MCRegisterInfo &MRI; 1248 1249 /// Number of registers that can be saved in a compact unwind encoding. 1250 enum { CU_NUM_SAVED_REGS = 6 }; 1251 1252 mutable unsigned SavedRegs[CU_NUM_SAVED_REGS]; 1253 Triple TT; 1254 bool Is64Bit; 1255 1256 unsigned OffsetSize; ///< Offset of a "push" instruction. 1257 unsigned MoveInstrSize; ///< Size of a "move" instruction. 1258 unsigned StackDivide; ///< Amount to adjust stack size by. 1259 protected: 1260 /// Size of a "push" instruction for the given register. 1261 unsigned PushInstrSize(unsigned Reg) const { 1262 switch (Reg) { 1263 case X86::EBX: 1264 case X86::ECX: 1265 case X86::EDX: 1266 case X86::EDI: 1267 case X86::ESI: 1268 case X86::EBP: 1269 case X86::RBX: 1270 case X86::RBP: 1271 return 1; 1272 case X86::R12: 1273 case X86::R13: 1274 case X86::R14: 1275 case X86::R15: 1276 return 2; 1277 } 1278 return 1; 1279 } 1280 1281 private: 1282 /// Get the compact unwind number for a given register. The number 1283 /// corresponds to the enum lists in compact_unwind_encoding.h. 1284 int getCompactUnwindRegNum(unsigned Reg) const { 1285 static const MCPhysReg CU32BitRegs[7] = { 1286 X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 1287 }; 1288 static const MCPhysReg CU64BitRegs[] = { 1289 X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 1290 }; 1291 const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; 1292 for (int Idx = 1; *CURegs; ++CURegs, ++Idx) 1293 if (*CURegs == Reg) 1294 return Idx; 1295 1296 return -1; 1297 } 1298 1299 /// Return the registers encoded for a compact encoding with a frame 1300 /// pointer. 1301 uint32_t encodeCompactUnwindRegistersWithFrame() const { 1302 // Encode the registers in the order they were saved --- 3-bits per 1303 // register. The list of saved registers is assumed to be in reverse 1304 // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS. 1305 uint32_t RegEnc = 0; 1306 for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) { 1307 unsigned Reg = SavedRegs[i]; 1308 if (Reg == 0) break; 1309 1310 int CURegNum = getCompactUnwindRegNum(Reg); 1311 if (CURegNum == -1) return ~0U; 1312 1313 // Encode the 3-bit register number in order, skipping over 3-bits for 1314 // each register. 1315 RegEnc |= (CURegNum & 0x7) << (Idx++ * 3); 1316 } 1317 1318 assert((RegEnc & 0x3FFFF) == RegEnc && 1319 "Invalid compact register encoding!"); 1320 return RegEnc; 1321 } 1322 1323 /// Create the permutation encoding used with frameless stacks. It is 1324 /// passed the number of registers to be saved and an array of the registers 1325 /// saved. 1326 uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const { 1327 // The saved registers are numbered from 1 to 6. In order to encode the 1328 // order in which they were saved, we re-number them according to their 1329 // place in the register order. The re-numbering is relative to the last 1330 // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in 1331 // that order: 1332 // 1333 // Orig Re-Num 1334 // ---- ------ 1335 // 6 6 1336 // 2 2 1337 // 4 3 1338 // 5 3 1339 // 1340 for (unsigned i = 0; i < RegCount; ++i) { 1341 int CUReg = getCompactUnwindRegNum(SavedRegs[i]); 1342 if (CUReg == -1) return ~0U; 1343 SavedRegs[i] = CUReg; 1344 } 1345 1346 // Reverse the list. 1347 std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]); 1348 1349 uint32_t RenumRegs[CU_NUM_SAVED_REGS]; 1350 for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){ 1351 unsigned Countless = 0; 1352 for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j) 1353 if (SavedRegs[j] < SavedRegs[i]) 1354 ++Countless; 1355 1356 RenumRegs[i] = SavedRegs[i] - Countless - 1; 1357 } 1358 1359 // Take the renumbered values and encode them into a 10-bit number. 1360 uint32_t permutationEncoding = 0; 1361 switch (RegCount) { 1362 case 6: 1363 permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1] 1364 + 6 * RenumRegs[2] + 2 * RenumRegs[3] 1365 + RenumRegs[4]; 1366 break; 1367 case 5: 1368 permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2] 1369 + 6 * RenumRegs[3] + 2 * RenumRegs[4] 1370 + RenumRegs[5]; 1371 break; 1372 case 4: 1373 permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3] 1374 + 3 * RenumRegs[4] + RenumRegs[5]; 1375 break; 1376 case 3: 1377 permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4] 1378 + RenumRegs[5]; 1379 break; 1380 case 2: 1381 permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5]; 1382 break; 1383 case 1: 1384 permutationEncoding |= RenumRegs[5]; 1385 break; 1386 } 1387 1388 assert((permutationEncoding & 0x3FF) == permutationEncoding && 1389 "Invalid compact register encoding!"); 1390 return permutationEncoding; 1391 } 1392 1393 public: 1394 DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI, 1395 const MCSubtargetInfo &STI) 1396 : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()), 1397 Is64Bit(TT.isArch64Bit()) { 1398 memset(SavedRegs, 0, sizeof(SavedRegs)); 1399 OffsetSize = Is64Bit ? 8 : 4; 1400 MoveInstrSize = Is64Bit ? 3 : 2; 1401 StackDivide = Is64Bit ? 8 : 4; 1402 } 1403 1404 std::unique_ptr<MCObjectTargetWriter> 1405 createObjectTargetWriter() const override { 1406 uint32_t CPUType = cantFail(MachO::getCPUType(TT)); 1407 uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT)); 1408 return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType); 1409 } 1410 1411 /// Implementation of algorithm to generate the compact unwind encoding 1412 /// for the CFI instructions. 1413 uint32_t 1414 generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override { 1415 if (Instrs.empty()) return 0; 1416 1417 // Reset the saved registers. 1418 unsigned SavedRegIdx = 0; 1419 memset(SavedRegs, 0, sizeof(SavedRegs)); 1420 1421 bool HasFP = false; 1422 1423 // Encode that we are using EBP/RBP as the frame pointer. 1424 uint32_t CompactUnwindEncoding = 0; 1425 1426 unsigned SubtractInstrIdx = Is64Bit ? 3 : 2; 1427 unsigned InstrOffset = 0; 1428 unsigned StackAdjust = 0; 1429 unsigned StackSize = 0; 1430 unsigned NumDefCFAOffsets = 0; 1431 1432 for (unsigned i = 0, e = Instrs.size(); i != e; ++i) { 1433 const MCCFIInstruction &Inst = Instrs[i]; 1434 1435 switch (Inst.getOperation()) { 1436 default: 1437 // Any other CFI directives indicate a frame that we aren't prepared 1438 // to represent via compact unwind, so just bail out. 1439 return 0; 1440 case MCCFIInstruction::OpDefCfaRegister: { 1441 // Defines a frame pointer. E.g. 1442 // 1443 // movq %rsp, %rbp 1444 // L0: 1445 // .cfi_def_cfa_register %rbp 1446 // 1447 HasFP = true; 1448 1449 // If the frame pointer is other than esp/rsp, we do not have a way to 1450 // generate a compact unwinding representation, so bail out. 1451 if (*MRI.getLLVMRegNum(Inst.getRegister(), true) != 1452 (Is64Bit ? X86::RBP : X86::EBP)) 1453 return 0; 1454 1455 // Reset the counts. 1456 memset(SavedRegs, 0, sizeof(SavedRegs)); 1457 StackAdjust = 0; 1458 SavedRegIdx = 0; 1459 InstrOffset += MoveInstrSize; 1460 break; 1461 } 1462 case MCCFIInstruction::OpDefCfaOffset: { 1463 // Defines a new offset for the CFA. E.g. 1464 // 1465 // With frame: 1466 // 1467 // pushq %rbp 1468 // L0: 1469 // .cfi_def_cfa_offset 16 1470 // 1471 // Without frame: 1472 // 1473 // subq $72, %rsp 1474 // L0: 1475 // .cfi_def_cfa_offset 80 1476 // 1477 StackSize = Inst.getOffset() / StackDivide; 1478 ++NumDefCFAOffsets; 1479 break; 1480 } 1481 case MCCFIInstruction::OpOffset: { 1482 // Defines a "push" of a callee-saved register. E.g. 1483 // 1484 // pushq %r15 1485 // pushq %r14 1486 // pushq %rbx 1487 // L0: 1488 // subq $120, %rsp 1489 // L1: 1490 // .cfi_offset %rbx, -40 1491 // .cfi_offset %r14, -32 1492 // .cfi_offset %r15, -24 1493 // 1494 if (SavedRegIdx == CU_NUM_SAVED_REGS) 1495 // If there are too many saved registers, we cannot use a compact 1496 // unwind encoding. 1497 return CU::UNWIND_MODE_DWARF; 1498 1499 unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true); 1500 SavedRegs[SavedRegIdx++] = Reg; 1501 StackAdjust += OffsetSize; 1502 InstrOffset += PushInstrSize(Reg); 1503 break; 1504 } 1505 } 1506 } 1507 1508 StackAdjust /= StackDivide; 1509 1510 if (HasFP) { 1511 if ((StackAdjust & 0xFF) != StackAdjust) 1512 // Offset was too big for a compact unwind encoding. 1513 return CU::UNWIND_MODE_DWARF; 1514 1515 // Get the encoding of the saved registers when we have a frame pointer. 1516 uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(); 1517 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1518 1519 CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME; 1520 CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16; 1521 CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS; 1522 } else { 1523 SubtractInstrIdx += InstrOffset; 1524 ++StackAdjust; 1525 1526 if ((StackSize & 0xFF) == StackSize) { 1527 // Frameless stack with a small stack size. 1528 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD; 1529 1530 // Encode the stack size. 1531 CompactUnwindEncoding |= (StackSize & 0xFF) << 16; 1532 } else { 1533 if ((StackAdjust & 0x7) != StackAdjust) 1534 // The extra stack adjustments are too big for us to handle. 1535 return CU::UNWIND_MODE_DWARF; 1536 1537 // Frameless stack with an offset too large for us to encode compactly. 1538 CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND; 1539 1540 // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP' 1541 // instruction. 1542 CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16; 1543 1544 // Encode any extra stack adjustments (done via push instructions). 1545 CompactUnwindEncoding |= (StackAdjust & 0x7) << 13; 1546 } 1547 1548 // Encode the number of registers saved. (Reverse the list first.) 1549 std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]); 1550 CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10; 1551 1552 // Get the encoding of the saved registers when we don't have a frame 1553 // pointer. 1554 uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx); 1555 if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF; 1556 1557 // Encode the register encoding. 1558 CompactUnwindEncoding |= 1559 RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION; 1560 } 1561 1562 return CompactUnwindEncoding; 1563 } 1564 }; 1565 1566 } // end anonymous namespace 1567 1568 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, 1569 const MCSubtargetInfo &STI, 1570 const MCRegisterInfo &MRI, 1571 const MCTargetOptions &Options) { 1572 const Triple &TheTriple = STI.getTargetTriple(); 1573 if (TheTriple.isOSBinFormatMachO()) 1574 return new DarwinX86AsmBackend(T, MRI, STI); 1575 1576 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1577 return new WindowsX86AsmBackend(T, false, STI); 1578 1579 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1580 1581 if (TheTriple.isOSIAMCU()) 1582 return new ELFX86_IAMCUAsmBackend(T, OSABI, STI); 1583 1584 return new ELFX86_32AsmBackend(T, OSABI, STI); 1585 } 1586 1587 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, 1588 const MCSubtargetInfo &STI, 1589 const MCRegisterInfo &MRI, 1590 const MCTargetOptions &Options) { 1591 const Triple &TheTriple = STI.getTargetTriple(); 1592 if (TheTriple.isOSBinFormatMachO()) 1593 return new DarwinX86AsmBackend(T, MRI, STI); 1594 1595 if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF()) 1596 return new WindowsX86AsmBackend(T, true, STI); 1597 1598 uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); 1599 1600 if (TheTriple.getEnvironment() == Triple::GNUX32) 1601 return new ELFX86_X32AsmBackend(T, OSABI, STI); 1602 return new ELFX86_64AsmBackend(T, OSABI, STI); 1603 } 1604