1 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the X86 implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "X86FrameLowering.h" 14 #include "X86InstrBuilder.h" 15 #include "X86InstrInfo.h" 16 #include "X86MachineFunctionInfo.h" 17 #include "X86Subtarget.h" 18 #include "X86TargetMachine.h" 19 #include "llvm/ADT/SmallSet.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/Analysis/EHPersonalities.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineModuleInfo.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/WinEHFuncInfo.h" 28 #include "llvm/IR/DataLayout.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/MC/MCAsmInfo.h" 31 #include "llvm/MC/MCObjectFileInfo.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Target/TargetOptions.h" 35 #include <cstdlib> 36 37 #define DEBUG_TYPE "x86-fl" 38 39 STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue"); 40 STATISTIC(NumFrameExtraProbe, 41 "Number of extra stack probes generated in prologue"); 42 43 using namespace llvm; 44 45 X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, 46 MaybeAlign StackAlignOverride) 47 : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), 48 STI.is64Bit() ? -8 : -4), 49 STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) { 50 // Cache a bunch of frame-related predicates for this subtarget. 51 SlotSize = TRI->getSlotSize(); 52 Is64Bit = STI.is64Bit(); 53 IsLP64 = STI.isTarget64BitLP64(); 54 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 55 Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); 56 StackPtr = TRI->getStackRegister(); 57 } 58 59 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 60 return !MF.getFrameInfo().hasVarSizedObjects() && 61 !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() && 62 !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall(); 63 } 64 65 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the 66 /// call frame pseudos can be simplified. Having a FP, as in the default 67 /// implementation, is not sufficient here since we can't always use it. 68 /// Use a more nuanced condition. 69 bool 70 X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { 71 return hasReservedCallFrame(MF) || 72 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || 73 (hasFP(MF) && !TRI->hasStackRealignment(MF)) || 74 TRI->hasBasePointer(MF); 75 } 76 77 // needsFrameIndexResolution - Do we need to perform FI resolution for 78 // this function. Normally, this is required only when the function 79 // has any stack objects. However, FI resolution actually has another job, 80 // not apparent from the title - it resolves callframesetup/destroy 81 // that were not simplified earlier. 82 // So, this is required for x86 functions that have push sequences even 83 // when there are no stack objects. 84 bool 85 X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { 86 return MF.getFrameInfo().hasStackObjects() || 87 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); 88 } 89 90 /// hasFP - Return true if the specified function should have a dedicated frame 91 /// pointer register. This is true if the function has variable sized allocas 92 /// or if frame pointer elimination is disabled. 93 bool X86FrameLowering::hasFP(const MachineFunction &MF) const { 94 const MachineFrameInfo &MFI = MF.getFrameInfo(); 95 return (MF.getTarget().Options.DisableFramePointerElim(MF) || 96 TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || 97 MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() || 98 MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || 99 MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || 100 MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() || 101 MFI.hasStackMap() || MFI.hasPatchPoint() || 102 (isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment())); 103 } 104 105 static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { 106 if (IsLP64) { 107 if (isInt<8>(Imm)) 108 return X86::SUB64ri8; 109 return X86::SUB64ri32; 110 } else { 111 if (isInt<8>(Imm)) 112 return X86::SUB32ri8; 113 return X86::SUB32ri; 114 } 115 } 116 117 static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) { 118 if (IsLP64) { 119 if (isInt<8>(Imm)) 120 return X86::ADD64ri8; 121 return X86::ADD64ri32; 122 } else { 123 if (isInt<8>(Imm)) 124 return X86::ADD32ri8; 125 return X86::ADD32ri; 126 } 127 } 128 129 static unsigned getSUBrrOpcode(bool IsLP64) { 130 return IsLP64 ? X86::SUB64rr : X86::SUB32rr; 131 } 132 133 static unsigned getADDrrOpcode(bool IsLP64) { 134 return IsLP64 ? X86::ADD64rr : X86::ADD32rr; 135 } 136 137 static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { 138 if (IsLP64) { 139 if (isInt<8>(Imm)) 140 return X86::AND64ri8; 141 return X86::AND64ri32; 142 } 143 if (isInt<8>(Imm)) 144 return X86::AND32ri8; 145 return X86::AND32ri; 146 } 147 148 static unsigned getLEArOpcode(bool IsLP64) { 149 return IsLP64 ? X86::LEA64r : X86::LEA32r; 150 } 151 152 static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) { 153 if (Use64BitReg) { 154 if (isUInt<32>(Imm)) 155 return X86::MOV32ri64; 156 if (isInt<32>(Imm)) 157 return X86::MOV64ri32; 158 return X86::MOV64ri; 159 } 160 return X86::MOV32ri; 161 } 162 163 static bool isEAXLiveIn(MachineBasicBlock &MBB) { 164 for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) { 165 unsigned Reg = RegMask.PhysReg; 166 167 if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || 168 Reg == X86::AH || Reg == X86::AL) 169 return true; 170 } 171 172 return false; 173 } 174 175 /// Check if the flags need to be preserved before the terminators. 176 /// This would be the case, if the eflags is live-in of the region 177 /// composed by the terminators or live-out of that region, without 178 /// being defined by a terminator. 179 static bool 180 flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) { 181 for (const MachineInstr &MI : MBB.terminators()) { 182 bool BreakNext = false; 183 for (const MachineOperand &MO : MI.operands()) { 184 if (!MO.isReg()) 185 continue; 186 Register Reg = MO.getReg(); 187 if (Reg != X86::EFLAGS) 188 continue; 189 190 // This terminator needs an eflags that is not defined 191 // by a previous another terminator: 192 // EFLAGS is live-in of the region composed by the terminators. 193 if (!MO.isDef()) 194 return true; 195 // This terminator defines the eflags, i.e., we don't need to preserve it. 196 // However, we still need to check this specific terminator does not 197 // read a live-in value. 198 BreakNext = true; 199 } 200 // We found a definition of the eflags, no need to preserve them. 201 if (BreakNext) 202 return false; 203 } 204 205 // None of the terminators use or define the eflags. 206 // Check if they are live-out, that would imply we need to preserve them. 207 for (const MachineBasicBlock *Succ : MBB.successors()) 208 if (Succ->isLiveIn(X86::EFLAGS)) 209 return true; 210 211 return false; 212 } 213 214 /// emitSPUpdate - Emit a series of instructions to increment / decrement the 215 /// stack pointer by a constant value. 216 void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, 217 MachineBasicBlock::iterator &MBBI, 218 const DebugLoc &DL, 219 int64_t NumBytes, bool InEpilogue) const { 220 bool isSub = NumBytes < 0; 221 uint64_t Offset = isSub ? -NumBytes : NumBytes; 222 MachineInstr::MIFlag Flag = 223 isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy; 224 225 uint64_t Chunk = (1LL << 31) - 1; 226 227 MachineFunction &MF = *MBB.getParent(); 228 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 229 const X86TargetLowering &TLI = *STI.getTargetLowering(); 230 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); 231 232 // It's ok to not take into account large chunks when probing, as the 233 // allocation is split in smaller chunks anyway. 234 if (EmitInlineStackProbe && !InEpilogue) { 235 236 // This pseudo-instruction is going to be expanded, potentially using a 237 // loop, by inlineStackProbe(). 238 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset); 239 return; 240 } else if (Offset > Chunk) { 241 // Rather than emit a long series of instructions for large offsets, 242 // load the offset into a register and do one sub/add 243 unsigned Reg = 0; 244 unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX); 245 246 if (isSub && !isEAXLiveIn(MBB)) 247 Reg = Rax; 248 else 249 Reg = TRI->findDeadCallerSavedReg(MBB, MBBI); 250 251 unsigned AddSubRROpc = 252 isSub ? getSUBrrOpcode(Is64Bit) : getADDrrOpcode(Is64Bit); 253 if (Reg) { 254 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Reg) 255 .addImm(Offset) 256 .setMIFlag(Flag); 257 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr) 258 .addReg(StackPtr) 259 .addReg(Reg); 260 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 261 return; 262 } else if (Offset > 8 * Chunk) { 263 // If we would need more than 8 add or sub instructions (a >16GB stack 264 // frame), it's worth spilling RAX to materialize this immediate. 265 // pushq %rax 266 // movabsq +-$Offset+-SlotSize, %rax 267 // addq %rsp, %rax 268 // xchg %rax, (%rsp) 269 // movq (%rsp), %rsp 270 assert(Is64Bit && "can't have 32-bit 16GB stack frame"); 271 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) 272 .addReg(Rax, RegState::Kill) 273 .setMIFlag(Flag); 274 // Subtract is not commutative, so negate the offset and always use add. 275 // Subtract 8 less and add 8 more to account for the PUSH we just did. 276 if (isSub) 277 Offset = -(Offset - SlotSize); 278 else 279 Offset = Offset + SlotSize; 280 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Rax) 281 .addImm(Offset) 282 .setMIFlag(Flag); 283 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax) 284 .addReg(Rax) 285 .addReg(StackPtr); 286 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 287 // Exchange the new SP in RAX with the top of the stack. 288 addRegOffset( 289 BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax), 290 StackPtr, false, 0); 291 // Load new SP from the top of the stack into RSP. 292 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr), 293 StackPtr, false, 0); 294 return; 295 } 296 } 297 298 while (Offset) { 299 uint64_t ThisVal = std::min(Offset, Chunk); 300 if (ThisVal == SlotSize) { 301 // Use push / pop for slot sized adjustments as a size optimization. We 302 // need to find a dead register when using pop. 303 unsigned Reg = isSub 304 ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) 305 : TRI->findDeadCallerSavedReg(MBB, MBBI); 306 if (Reg) { 307 unsigned Opc = isSub 308 ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) 309 : (Is64Bit ? X86::POP64r : X86::POP32r); 310 BuildMI(MBB, MBBI, DL, TII.get(Opc)) 311 .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)) 312 .setMIFlag(Flag); 313 Offset -= ThisVal; 314 continue; 315 } 316 } 317 318 BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue) 319 .setMIFlag(Flag); 320 321 Offset -= ThisVal; 322 } 323 } 324 325 MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( 326 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 327 const DebugLoc &DL, int64_t Offset, bool InEpilogue) const { 328 assert(Offset != 0 && "zero offset stack adjustment requested"); 329 330 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue 331 // is tricky. 332 bool UseLEA; 333 if (!InEpilogue) { 334 // Check if inserting the prologue at the beginning 335 // of MBB would require to use LEA operations. 336 // We need to use LEA operations if EFLAGS is live in, because 337 // it means an instruction will read it before it gets defined. 338 UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS); 339 } else { 340 // If we can use LEA for SP but we shouldn't, check that none 341 // of the terminators uses the eflags. Otherwise we will insert 342 // a ADD that will redefine the eflags and break the condition. 343 // Alternatively, we could move the ADD, but this may not be possible 344 // and is an optimization anyway. 345 UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent()); 346 if (UseLEA && !STI.useLeaForSP()) 347 UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB); 348 // If that assert breaks, that means we do not do the right thing 349 // in canUseAsEpilogue. 350 assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) && 351 "We shouldn't have allowed this insertion point"); 352 } 353 354 MachineInstrBuilder MI; 355 if (UseLEA) { 356 MI = addRegOffset(BuildMI(MBB, MBBI, DL, 357 TII.get(getLEArOpcode(Uses64BitFramePtr)), 358 StackPtr), 359 StackPtr, false, Offset); 360 } else { 361 bool IsSub = Offset < 0; 362 uint64_t AbsOffset = IsSub ? -Offset : Offset; 363 const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) 364 : getADDriOpcode(Uses64BitFramePtr, AbsOffset); 365 MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 366 .addReg(StackPtr) 367 .addImm(AbsOffset); 368 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 369 } 370 return MI; 371 } 372 373 int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, 374 MachineBasicBlock::iterator &MBBI, 375 bool doMergeWithPrevious) const { 376 if ((doMergeWithPrevious && MBBI == MBB.begin()) || 377 (!doMergeWithPrevious && MBBI == MBB.end())) 378 return 0; 379 380 MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; 381 382 PI = skipDebugInstructionsBackward(PI, MBB.begin()); 383 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI 384 // instruction, and that there are no DBG_VALUE or other instructions between 385 // ADD/SUB/LEA and its corresponding CFI instruction. 386 /* TODO: Add support for the case where there are multiple CFI instructions 387 below the ADD/SUB/LEA, e.g.: 388 ... 389 add 390 cfi_def_cfa_offset 391 cfi_offset 392 ... 393 */ 394 if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction()) 395 PI = std::prev(PI); 396 397 unsigned Opc = PI->getOpcode(); 398 int Offset = 0; 399 400 if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || 401 Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && 402 PI->getOperand(0).getReg() == StackPtr){ 403 assert(PI->getOperand(1).getReg() == StackPtr); 404 Offset = PI->getOperand(2).getImm(); 405 } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) && 406 PI->getOperand(0).getReg() == StackPtr && 407 PI->getOperand(1).getReg() == StackPtr && 408 PI->getOperand(2).getImm() == 1 && 409 PI->getOperand(3).getReg() == X86::NoRegister && 410 PI->getOperand(5).getReg() == X86::NoRegister) { 411 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg. 412 Offset = PI->getOperand(4).getImm(); 413 } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || 414 Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && 415 PI->getOperand(0).getReg() == StackPtr) { 416 assert(PI->getOperand(1).getReg() == StackPtr); 417 Offset = -PI->getOperand(2).getImm(); 418 } else 419 return 0; 420 421 PI = MBB.erase(PI); 422 if (PI != MBB.end() && PI->isCFIInstruction()) { 423 auto CIs = MBB.getParent()->getFrameInstructions(); 424 MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()]; 425 if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset || 426 CI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) 427 PI = MBB.erase(PI); 428 } 429 if (!doMergeWithPrevious) 430 MBBI = skipDebugInstructionsForward(PI, MBB.end()); 431 432 return Offset; 433 } 434 435 void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, 436 MachineBasicBlock::iterator MBBI, 437 const DebugLoc &DL, 438 const MCCFIInstruction &CFIInst) const { 439 MachineFunction &MF = *MBB.getParent(); 440 unsigned CFIIndex = MF.addFrameInst(CFIInst); 441 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 442 .addCFIIndex(CFIIndex); 443 } 444 445 /// Emits Dwarf Info specifying offsets of callee saved registers and 446 /// frame pointer. This is called only when basic block sections are enabled. 447 void X86FrameLowering::emitCalleeSavedFrameMovesFullCFA( 448 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 449 MachineFunction &MF = *MBB.getParent(); 450 if (!hasFP(MF)) { 451 emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true); 452 return; 453 } 454 const MachineModuleInfo &MMI = MF.getMMI(); 455 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 456 const Register FramePtr = TRI->getFrameRegister(MF); 457 const Register MachineFramePtr = 458 STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64)) 459 : FramePtr; 460 unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true); 461 // Offset = space for return address + size of the frame pointer itself. 462 unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4); 463 BuildCFI(MBB, MBBI, DebugLoc{}, 464 MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset)); 465 emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true); 466 } 467 468 void X86FrameLowering::emitCalleeSavedFrameMoves( 469 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 470 const DebugLoc &DL, bool IsPrologue) const { 471 MachineFunction &MF = *MBB.getParent(); 472 MachineFrameInfo &MFI = MF.getFrameInfo(); 473 MachineModuleInfo &MMI = MF.getMMI(); 474 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 475 476 // Add callee saved registers to move list. 477 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 478 479 // Calculate offsets. 480 for (const CalleeSavedInfo &I : CSI) { 481 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()); 482 Register Reg = I.getReg(); 483 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 484 485 if (IsPrologue) { 486 BuildCFI(MBB, MBBI, DL, 487 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 488 } else { 489 BuildCFI(MBB, MBBI, DL, 490 MCCFIInstruction::createRestore(nullptr, DwarfReg)); 491 } 492 } 493 } 494 495 void X86FrameLowering::emitStackProbe( 496 MachineFunction &MF, MachineBasicBlock &MBB, 497 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, 498 Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const { 499 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 500 if (STI.isTargetWindowsCoreCLR()) { 501 if (InProlog) { 502 BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)) 503 .addImm(0 /* no explicit stack size */); 504 } else { 505 emitStackProbeInline(MF, MBB, MBBI, DL, false); 506 } 507 } else { 508 emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum); 509 } 510 } 511 512 bool X86FrameLowering::stackProbeFunctionModifiesSP() const { 513 return STI.isOSWindows() && !STI.isTargetWin64(); 514 } 515 516 void X86FrameLowering::inlineStackProbe(MachineFunction &MF, 517 MachineBasicBlock &PrologMBB) const { 518 auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 519 return MI.getOpcode() == X86::STACKALLOC_W_PROBING; 520 }); 521 if (Where != PrologMBB.end()) { 522 DebugLoc DL = PrologMBB.findDebugLoc(Where); 523 emitStackProbeInline(MF, PrologMBB, Where, DL, true); 524 Where->eraseFromParent(); 525 } 526 } 527 528 void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, 529 MachineBasicBlock &MBB, 530 MachineBasicBlock::iterator MBBI, 531 const DebugLoc &DL, 532 bool InProlog) const { 533 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 534 if (STI.isTargetWindowsCoreCLR() && STI.is64Bit()) 535 emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog); 536 else 537 emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog); 538 } 539 540 void X86FrameLowering::emitStackProbeInlineGeneric( 541 MachineFunction &MF, MachineBasicBlock &MBB, 542 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { 543 MachineInstr &AllocWithProbe = *MBBI; 544 uint64_t Offset = AllocWithProbe.getOperand(0).getImm(); 545 546 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 547 const X86TargetLowering &TLI = *STI.getTargetLowering(); 548 assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) && 549 "different expansion expected for CoreCLR 64 bit"); 550 551 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); 552 uint64_t ProbeChunk = StackProbeSize * 8; 553 554 uint64_t MaxAlign = 555 TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0; 556 557 // Synthesize a loop or unroll it, depending on the number of iterations. 558 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left 559 // between the unaligned rsp and current rsp. 560 if (Offset > ProbeChunk) { 561 emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset, 562 MaxAlign % StackProbeSize); 563 } else { 564 emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset, 565 MaxAlign % StackProbeSize); 566 } 567 } 568 569 void X86FrameLowering::emitStackProbeInlineGenericBlock( 570 MachineFunction &MF, MachineBasicBlock &MBB, 571 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, 572 uint64_t AlignOffset) const { 573 574 const bool NeedsDwarfCFI = needsDwarfCFI(MF); 575 const bool HasFP = hasFP(MF); 576 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 577 const X86TargetLowering &TLI = *STI.getTargetLowering(); 578 const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); 579 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; 580 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); 581 582 uint64_t CurrentOffset = 0; 583 584 assert(AlignOffset < StackProbeSize); 585 586 // If the offset is so small it fits within a page, there's nothing to do. 587 if (StackProbeSize < Offset + AlignOffset) { 588 589 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 590 .addReg(StackPtr) 591 .addImm(StackProbeSize - AlignOffset) 592 .setMIFlag(MachineInstr::FrameSetup); 593 if (!HasFP && NeedsDwarfCFI) { 594 BuildCFI(MBB, MBBI, DL, 595 MCCFIInstruction::createAdjustCfaOffset( 596 nullptr, StackProbeSize - AlignOffset)); 597 } 598 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 599 600 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) 601 .setMIFlag(MachineInstr::FrameSetup), 602 StackPtr, false, 0) 603 .addImm(0) 604 .setMIFlag(MachineInstr::FrameSetup); 605 NumFrameExtraProbe++; 606 CurrentOffset = StackProbeSize - AlignOffset; 607 } 608 609 // For the next N - 1 pages, just probe. I tried to take advantage of 610 // natural probes but it implies much more logic and there was very few 611 // interesting natural probes to interleave. 612 while (CurrentOffset + StackProbeSize < Offset) { 613 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 614 .addReg(StackPtr) 615 .addImm(StackProbeSize) 616 .setMIFlag(MachineInstr::FrameSetup); 617 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 618 619 if (!HasFP && NeedsDwarfCFI) { 620 BuildCFI( 621 MBB, MBBI, DL, 622 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize)); 623 } 624 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) 625 .setMIFlag(MachineInstr::FrameSetup), 626 StackPtr, false, 0) 627 .addImm(0) 628 .setMIFlag(MachineInstr::FrameSetup); 629 NumFrameExtraProbe++; 630 CurrentOffset += StackProbeSize; 631 } 632 633 // No need to probe the tail, it is smaller than a Page. 634 uint64_t ChunkSize = Offset - CurrentOffset; 635 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 636 .addReg(StackPtr) 637 .addImm(ChunkSize) 638 .setMIFlag(MachineInstr::FrameSetup); 639 // No need to adjust Dwarf CFA offset here, the last position of the stack has 640 // been defined 641 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 642 } 643 644 void X86FrameLowering::emitStackProbeInlineGenericLoop( 645 MachineFunction &MF, MachineBasicBlock &MBB, 646 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, 647 uint64_t AlignOffset) const { 648 assert(Offset && "null offset"); 649 650 const bool NeedsDwarfCFI = needsDwarfCFI(MF); 651 const bool HasFP = hasFP(MF); 652 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 653 const X86TargetLowering &TLI = *STI.getTargetLowering(); 654 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; 655 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); 656 657 if (AlignOffset) { 658 if (AlignOffset < StackProbeSize) { 659 // Perform a first smaller allocation followed by a probe. 660 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset); 661 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr) 662 .addReg(StackPtr) 663 .addImm(AlignOffset) 664 .setMIFlag(MachineInstr::FrameSetup); 665 MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. 666 667 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) 668 .setMIFlag(MachineInstr::FrameSetup), 669 StackPtr, false, 0) 670 .addImm(0) 671 .setMIFlag(MachineInstr::FrameSetup); 672 NumFrameExtraProbe++; 673 Offset -= AlignOffset; 674 } 675 } 676 677 // Synthesize a loop 678 NumFrameLoopProbe++; 679 const BasicBlock *LLVM_BB = MBB.getBasicBlock(); 680 681 MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB); 682 MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB); 683 684 MachineFunction::iterator MBBIter = ++MBB.getIterator(); 685 MF.insert(MBBIter, testMBB); 686 MF.insert(MBBIter, tailMBB); 687 688 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 689 : Is64Bit ? X86::R11D 690 : X86::EAX; 691 692 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) 693 .addReg(StackPtr) 694 .setMIFlag(MachineInstr::FrameSetup); 695 696 // save loop bound 697 { 698 const unsigned BoundOffset = alignDown(Offset, StackProbeSize); 699 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, BoundOffset); 700 BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) 701 .addReg(FinalStackProbed) 702 .addImm(BoundOffset) 703 .setMIFlag(MachineInstr::FrameSetup); 704 705 // while in the loop, use loop-invariant reg for CFI, 706 // instead of the stack pointer, which changes during the loop 707 if (!HasFP && NeedsDwarfCFI) { 708 // x32 uses the same DWARF register numbers as x86-64, 709 // so there isn't a register number for r11d, we must use r11 instead 710 const Register DwarfFinalStackProbed = 711 STI.isTarget64BitILP32() 712 ? Register(getX86SubSuperRegister(FinalStackProbed, 64)) 713 : FinalStackProbed; 714 715 BuildCFI(MBB, MBBI, DL, 716 MCCFIInstruction::createDefCfaRegister( 717 nullptr, TRI->getDwarfRegNum(DwarfFinalStackProbed, true))); 718 BuildCFI(MBB, MBBI, DL, 719 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset)); 720 } 721 } 722 723 // allocate a page 724 { 725 const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); 726 BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr) 727 .addReg(StackPtr) 728 .addImm(StackProbeSize) 729 .setMIFlag(MachineInstr::FrameSetup); 730 } 731 732 // touch the page 733 addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc)) 734 .setMIFlag(MachineInstr::FrameSetup), 735 StackPtr, false, 0) 736 .addImm(0) 737 .setMIFlag(MachineInstr::FrameSetup); 738 739 // cmp with stack pointer bound 740 BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) 741 .addReg(StackPtr) 742 .addReg(FinalStackProbed) 743 .setMIFlag(MachineInstr::FrameSetup); 744 745 // jump 746 BuildMI(testMBB, DL, TII.get(X86::JCC_1)) 747 .addMBB(testMBB) 748 .addImm(X86::COND_NE) 749 .setMIFlag(MachineInstr::FrameSetup); 750 testMBB->addSuccessor(testMBB); 751 testMBB->addSuccessor(tailMBB); 752 753 // BB management 754 tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end()); 755 tailMBB->transferSuccessorsAndUpdatePHIs(&MBB); 756 MBB.addSuccessor(testMBB); 757 758 // handle tail 759 const unsigned TailOffset = Offset % StackProbeSize; 760 MachineBasicBlock::iterator TailMBBIter = tailMBB->begin(); 761 if (TailOffset) { 762 const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset); 763 BuildMI(*tailMBB, TailMBBIter, DL, TII.get(Opc), StackPtr) 764 .addReg(StackPtr) 765 .addImm(TailOffset) 766 .setMIFlag(MachineInstr::FrameSetup); 767 } 768 769 // after the loop, switch back to stack pointer for CFI 770 if (!HasFP && NeedsDwarfCFI) { 771 // x32 uses the same DWARF register numbers as x86-64, 772 // so there isn't a register number for esp, we must use rsp instead 773 const Register DwarfStackPtr = 774 STI.isTarget64BitILP32() 775 ? Register(getX86SubSuperRegister(StackPtr, 64)) 776 : Register(StackPtr); 777 778 BuildCFI(*tailMBB, TailMBBIter, DL, 779 MCCFIInstruction::createDefCfaRegister( 780 nullptr, TRI->getDwarfRegNum(DwarfStackPtr, true))); 781 } 782 783 // Update Live In information 784 recomputeLiveIns(*testMBB); 785 recomputeLiveIns(*tailMBB); 786 } 787 788 void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64( 789 MachineFunction &MF, MachineBasicBlock &MBB, 790 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { 791 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 792 assert(STI.is64Bit() && "different expansion needed for 32 bit"); 793 assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR"); 794 const TargetInstrInfo &TII = *STI.getInstrInfo(); 795 const BasicBlock *LLVM_BB = MBB.getBasicBlock(); 796 797 // RAX contains the number of bytes of desired stack adjustment. 798 // The handling here assumes this value has already been updated so as to 799 // maintain stack alignment. 800 // 801 // We need to exit with RSP modified by this amount and execute suitable 802 // page touches to notify the OS that we're growing the stack responsibly. 803 // All stack probing must be done without modifying RSP. 804 // 805 // MBB: 806 // SizeReg = RAX; 807 // ZeroReg = 0 808 // CopyReg = RSP 809 // Flags, TestReg = CopyReg - SizeReg 810 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg 811 // LimitReg = gs magic thread env access 812 // if FinalReg >= LimitReg goto ContinueMBB 813 // RoundBB: 814 // RoundReg = page address of FinalReg 815 // LoopMBB: 816 // LoopReg = PHI(LimitReg,ProbeReg) 817 // ProbeReg = LoopReg - PageSize 818 // [ProbeReg] = 0 819 // if (ProbeReg > RoundReg) goto LoopMBB 820 // ContinueMBB: 821 // RSP = RSP - RAX 822 // [rest of original MBB] 823 824 // Set up the new basic blocks 825 MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB); 826 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); 827 MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB); 828 829 MachineFunction::iterator MBBIter = std::next(MBB.getIterator()); 830 MF.insert(MBBIter, RoundMBB); 831 MF.insert(MBBIter, LoopMBB); 832 MF.insert(MBBIter, ContinueMBB); 833 834 // Split MBB and move the tail portion down to ContinueMBB. 835 MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI); 836 ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end()); 837 ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB); 838 839 // Some useful constants 840 const int64_t ThreadEnvironmentStackLimit = 0x10; 841 const int64_t PageSize = 0x1000; 842 const int64_t PageMask = ~(PageSize - 1); 843 844 // Registers we need. For the normal case we use virtual 845 // registers. For the prolog expansion we use RAX, RCX and RDX. 846 MachineRegisterInfo &MRI = MF.getRegInfo(); 847 const TargetRegisterClass *RegClass = &X86::GR64RegClass; 848 const Register SizeReg = InProlog ? X86::RAX 849 : MRI.createVirtualRegister(RegClass), 850 ZeroReg = InProlog ? X86::RCX 851 : MRI.createVirtualRegister(RegClass), 852 CopyReg = InProlog ? X86::RDX 853 : MRI.createVirtualRegister(RegClass), 854 TestReg = InProlog ? X86::RDX 855 : MRI.createVirtualRegister(RegClass), 856 FinalReg = InProlog ? X86::RDX 857 : MRI.createVirtualRegister(RegClass), 858 RoundedReg = InProlog ? X86::RDX 859 : MRI.createVirtualRegister(RegClass), 860 LimitReg = InProlog ? X86::RCX 861 : MRI.createVirtualRegister(RegClass), 862 JoinReg = InProlog ? X86::RCX 863 : MRI.createVirtualRegister(RegClass), 864 ProbeReg = InProlog ? X86::RCX 865 : MRI.createVirtualRegister(RegClass); 866 867 // SP-relative offsets where we can save RCX and RDX. 868 int64_t RCXShadowSlot = 0; 869 int64_t RDXShadowSlot = 0; 870 871 // If inlining in the prolog, save RCX and RDX. 872 if (InProlog) { 873 // Compute the offsets. We need to account for things already 874 // pushed onto the stack at this point: return address, frame 875 // pointer (if used), and callee saves. 876 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 877 const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize(); 878 const bool HasFP = hasFP(MF); 879 880 // Check if we need to spill RCX and/or RDX. 881 // Here we assume that no earlier prologue instruction changes RCX and/or 882 // RDX, so checking the block live-ins is enough. 883 const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX); 884 const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX); 885 int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); 886 // Assign the initial slot to both registers, then change RDX's slot if both 887 // need to be spilled. 888 if (IsRCXLiveIn) 889 RCXShadowSlot = InitSlot; 890 if (IsRDXLiveIn) 891 RDXShadowSlot = InitSlot; 892 if (IsRDXLiveIn && IsRCXLiveIn) 893 RDXShadowSlot += 8; 894 // Emit the saves if needed. 895 if (IsRCXLiveIn) 896 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, 897 RCXShadowSlot) 898 .addReg(X86::RCX); 899 if (IsRDXLiveIn) 900 addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, 901 RDXShadowSlot) 902 .addReg(X86::RDX); 903 } else { 904 // Not in the prolog. Copy RAX to a virtual reg. 905 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX); 906 } 907 908 // Add code to MBB to check for overflow and set the new target stack pointer 909 // to zero if so. 910 BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg) 911 .addReg(ZeroReg, RegState::Undef) 912 .addReg(ZeroReg, RegState::Undef); 913 BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP); 914 BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg) 915 .addReg(CopyReg) 916 .addReg(SizeReg); 917 BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg) 918 .addReg(TestReg) 919 .addReg(ZeroReg) 920 .addImm(X86::COND_B); 921 922 // FinalReg now holds final stack pointer value, or zero if 923 // allocation would overflow. Compare against the current stack 924 // limit from the thread environment block. Note this limit is the 925 // lowest touched page on the stack, not the point at which the OS 926 // will cause an overflow exception, so this is just an optimization 927 // to avoid unnecessarily touching pages that are below the current 928 // SP but already committed to the stack by the OS. 929 BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg) 930 .addReg(0) 931 .addImm(1) 932 .addReg(0) 933 .addImm(ThreadEnvironmentStackLimit) 934 .addReg(X86::GS); 935 BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg); 936 // Jump if the desired stack pointer is at or above the stack limit. 937 BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE); 938 939 // Add code to roundMBB to round the final stack pointer to a page boundary. 940 RoundMBB->addLiveIn(FinalReg); 941 BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg) 942 .addReg(FinalReg) 943 .addImm(PageMask); 944 BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB); 945 946 // LimitReg now holds the current stack limit, RoundedReg page-rounded 947 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page 948 // and probe until we reach RoundedReg. 949 if (!InProlog) { 950 BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg) 951 .addReg(LimitReg) 952 .addMBB(RoundMBB) 953 .addReg(ProbeReg) 954 .addMBB(LoopMBB); 955 } 956 957 LoopMBB->addLiveIn(JoinReg); 958 addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg, 959 false, -PageSize); 960 961 // Probe by storing a byte onto the stack. 962 BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi)) 963 .addReg(ProbeReg) 964 .addImm(1) 965 .addReg(0) 966 .addImm(0) 967 .addReg(0) 968 .addImm(0); 969 970 LoopMBB->addLiveIn(RoundedReg); 971 BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr)) 972 .addReg(RoundedReg) 973 .addReg(ProbeReg); 974 BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE); 975 976 MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI(); 977 978 // If in prolog, restore RDX and RCX. 979 if (InProlog) { 980 if (RCXShadowSlot) // It means we spilled RCX in the prologue. 981 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, 982 TII.get(X86::MOV64rm), X86::RCX), 983 X86::RSP, false, RCXShadowSlot); 984 if (RDXShadowSlot) // It means we spilled RDX in the prologue. 985 addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, 986 TII.get(X86::MOV64rm), X86::RDX), 987 X86::RSP, false, RDXShadowSlot); 988 } 989 990 // Now that the probing is done, add code to continueMBB to update 991 // the stack pointer for real. 992 ContinueMBB->addLiveIn(SizeReg); 993 BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP) 994 .addReg(X86::RSP) 995 .addReg(SizeReg); 996 997 // Add the control flow edges we need. 998 MBB.addSuccessor(ContinueMBB); 999 MBB.addSuccessor(RoundMBB); 1000 RoundMBB->addSuccessor(LoopMBB); 1001 LoopMBB->addSuccessor(ContinueMBB); 1002 LoopMBB->addSuccessor(LoopMBB); 1003 1004 // Mark all the instructions added to the prolog as frame setup. 1005 if (InProlog) { 1006 for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) { 1007 BeforeMBBI->setFlag(MachineInstr::FrameSetup); 1008 } 1009 for (MachineInstr &MI : *RoundMBB) { 1010 MI.setFlag(MachineInstr::FrameSetup); 1011 } 1012 for (MachineInstr &MI : *LoopMBB) { 1013 MI.setFlag(MachineInstr::FrameSetup); 1014 } 1015 for (MachineBasicBlock::iterator CMBBI = ContinueMBB->begin(); 1016 CMBBI != ContinueMBBI; ++CMBBI) { 1017 CMBBI->setFlag(MachineInstr::FrameSetup); 1018 } 1019 } 1020 } 1021 1022 void X86FrameLowering::emitStackProbeCall( 1023 MachineFunction &MF, MachineBasicBlock &MBB, 1024 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, 1025 Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const { 1026 bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; 1027 1028 // FIXME: Add indirect thunk support and remove this. 1029 if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls()) 1030 report_fatal_error("Emitting stack probe calls on 64-bit with the large " 1031 "code model and indirect thunks not yet implemented."); 1032 1033 unsigned CallOp; 1034 if (Is64Bit) 1035 CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; 1036 else 1037 CallOp = X86::CALLpcrel32; 1038 1039 StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF); 1040 1041 MachineInstrBuilder CI; 1042 MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI); 1043 1044 // All current stack probes take AX and SP as input, clobber flags, and 1045 // preserve all registers. x86_64 probes leave RSP unmodified. 1046 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 1047 // For the large code model, we have to call through a register. Use R11, 1048 // as it is scratch in all supported calling conventions. 1049 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) 1050 .addExternalSymbol(MF.createExternalSymbolName(Symbol)); 1051 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); 1052 } else { 1053 CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)) 1054 .addExternalSymbol(MF.createExternalSymbolName(Symbol)); 1055 } 1056 1057 unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX; 1058 unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP; 1059 CI.addReg(AX, RegState::Implicit) 1060 .addReg(SP, RegState::Implicit) 1061 .addReg(AX, RegState::Define | RegState::Implicit) 1062 .addReg(SP, RegState::Define | RegState::Implicit) 1063 .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); 1064 1065 MachineInstr *ModInst = CI; 1066 if (STI.isTargetWin64() || !STI.isOSWindows()) { 1067 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. 1068 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp 1069 // themselves. They also does not clobber %rax so we can reuse it when 1070 // adjusting %rsp. 1071 // All other platforms do not specify a particular ABI for the stack probe 1072 // function, so we arbitrarily define it to not adjust %esp/%rsp itself. 1073 ModInst = 1074 BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP) 1075 .addReg(SP) 1076 .addReg(AX); 1077 } 1078 1079 // DebugInfo variable locations -- if there's an instruction number for the 1080 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that 1081 // modifies SP. 1082 if (InstrNum) { 1083 if (STI.isTargetWin64() || !STI.isOSWindows()) { 1084 // Label destination operand of the subtract. 1085 MF.makeDebugValueSubstitution(*InstrNum, 1086 {ModInst->getDebugInstrNum(), 0}); 1087 } else { 1088 // Label the call. The operand number is the penultimate operand, zero 1089 // based. 1090 unsigned SPDefOperand = ModInst->getNumOperands() - 2; 1091 MF.makeDebugValueSubstitution( 1092 *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand}); 1093 } 1094 } 1095 1096 if (InProlog) { 1097 // Apply the frame setup flag to all inserted instrs. 1098 for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI) 1099 ExpansionMBBI->setFlag(MachineInstr::FrameSetup); 1100 } 1101 } 1102 1103 static unsigned calculateSetFPREG(uint64_t SPAdjust) { 1104 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well 1105 // and might require smaller successive adjustments. 1106 const uint64_t Win64MaxSEHOffset = 128; 1107 uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset); 1108 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. 1109 return SEHFrameOffset & -16; 1110 } 1111 1112 // If we're forcing a stack realignment we can't rely on just the frame 1113 // info, we need to know the ABI stack alignment as well in case we 1114 // have a call out. Otherwise just make sure we have some alignment - we'll 1115 // go with the minimum SlotSize. 1116 uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const { 1117 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1118 Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment. 1119 Align StackAlign = getStackAlign(); 1120 if (MF.getFunction().hasFnAttribute("stackrealign")) { 1121 if (MFI.hasCalls()) 1122 MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; 1123 else if (MaxAlign < SlotSize) 1124 MaxAlign = Align(SlotSize); 1125 } 1126 return MaxAlign.value(); 1127 } 1128 1129 void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, 1130 MachineBasicBlock::iterator MBBI, 1131 const DebugLoc &DL, unsigned Reg, 1132 uint64_t MaxAlign) const { 1133 uint64_t Val = -MaxAlign; 1134 unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val); 1135 1136 MachineFunction &MF = *MBB.getParent(); 1137 const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); 1138 const X86TargetLowering &TLI = *STI.getTargetLowering(); 1139 const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); 1140 const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); 1141 1142 // We want to make sure that (in worst case) less than StackProbeSize bytes 1143 // are not probed after the AND. This assumption is used in 1144 // emitStackProbeInlineGeneric. 1145 if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) { 1146 { 1147 NumFrameLoopProbe++; 1148 MachineBasicBlock *entryMBB = 1149 MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 1150 MachineBasicBlock *headMBB = 1151 MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 1152 MachineBasicBlock *bodyMBB = 1153 MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 1154 MachineBasicBlock *footMBB = 1155 MF.CreateMachineBasicBlock(MBB.getBasicBlock()); 1156 1157 MachineFunction::iterator MBBIter = MBB.getIterator(); 1158 MF.insert(MBBIter, entryMBB); 1159 MF.insert(MBBIter, headMBB); 1160 MF.insert(MBBIter, bodyMBB); 1161 MF.insert(MBBIter, footMBB); 1162 const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; 1163 Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 1164 : Is64Bit ? X86::R11D 1165 : X86::EAX; 1166 1167 // Setup entry block 1168 { 1169 1170 entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI); 1171 BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) 1172 .addReg(StackPtr) 1173 .setMIFlag(MachineInstr::FrameSetup); 1174 MachineInstr *MI = 1175 BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed) 1176 .addReg(FinalStackProbed) 1177 .addImm(Val) 1178 .setMIFlag(MachineInstr::FrameSetup); 1179 1180 // The EFLAGS implicit def is dead. 1181 MI->getOperand(3).setIsDead(); 1182 1183 BuildMI(entryMBB, DL, 1184 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) 1185 .addReg(FinalStackProbed) 1186 .addReg(StackPtr) 1187 .setMIFlag(MachineInstr::FrameSetup); 1188 BuildMI(entryMBB, DL, TII.get(X86::JCC_1)) 1189 .addMBB(&MBB) 1190 .addImm(X86::COND_E) 1191 .setMIFlag(MachineInstr::FrameSetup); 1192 entryMBB->addSuccessor(headMBB); 1193 entryMBB->addSuccessor(&MBB); 1194 } 1195 1196 // Loop entry block 1197 1198 { 1199 const unsigned SUBOpc = 1200 getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); 1201 BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr) 1202 .addReg(StackPtr) 1203 .addImm(StackProbeSize) 1204 .setMIFlag(MachineInstr::FrameSetup); 1205 1206 BuildMI(headMBB, DL, 1207 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) 1208 .addReg(FinalStackProbed) 1209 .addReg(StackPtr) 1210 .setMIFlag(MachineInstr::FrameSetup); 1211 1212 // jump 1213 BuildMI(headMBB, DL, TII.get(X86::JCC_1)) 1214 .addMBB(footMBB) 1215 .addImm(X86::COND_B) 1216 .setMIFlag(MachineInstr::FrameSetup); 1217 1218 headMBB->addSuccessor(bodyMBB); 1219 headMBB->addSuccessor(footMBB); 1220 } 1221 1222 // setup loop body 1223 { 1224 addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc)) 1225 .setMIFlag(MachineInstr::FrameSetup), 1226 StackPtr, false, 0) 1227 .addImm(0) 1228 .setMIFlag(MachineInstr::FrameSetup); 1229 1230 const unsigned SUBOpc = 1231 getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); 1232 BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr) 1233 .addReg(StackPtr) 1234 .addImm(StackProbeSize) 1235 .setMIFlag(MachineInstr::FrameSetup); 1236 1237 // cmp with stack pointer bound 1238 BuildMI(bodyMBB, DL, 1239 TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) 1240 .addReg(FinalStackProbed) 1241 .addReg(StackPtr) 1242 .setMIFlag(MachineInstr::FrameSetup); 1243 1244 // jump 1245 BuildMI(bodyMBB, DL, TII.get(X86::JCC_1)) 1246 .addMBB(bodyMBB) 1247 .addImm(X86::COND_B) 1248 .setMIFlag(MachineInstr::FrameSetup); 1249 bodyMBB->addSuccessor(bodyMBB); 1250 bodyMBB->addSuccessor(footMBB); 1251 } 1252 1253 // setup loop footer 1254 { 1255 BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr) 1256 .addReg(FinalStackProbed) 1257 .setMIFlag(MachineInstr::FrameSetup); 1258 addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc)) 1259 .setMIFlag(MachineInstr::FrameSetup), 1260 StackPtr, false, 0) 1261 .addImm(0) 1262 .setMIFlag(MachineInstr::FrameSetup); 1263 footMBB->addSuccessor(&MBB); 1264 } 1265 1266 recomputeLiveIns(*headMBB); 1267 recomputeLiveIns(*bodyMBB); 1268 recomputeLiveIns(*footMBB); 1269 recomputeLiveIns(MBB); 1270 } 1271 } else { 1272 MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) 1273 .addReg(Reg) 1274 .addImm(Val) 1275 .setMIFlag(MachineInstr::FrameSetup); 1276 1277 // The EFLAGS implicit def is dead. 1278 MI->getOperand(3).setIsDead(); 1279 } 1280 } 1281 1282 bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const { 1283 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be 1284 // clobbered by any interrupt handler. 1285 assert(&STI == &MF.getSubtarget<X86Subtarget>() && 1286 "MF used frame lowering for wrong subtarget"); 1287 const Function &Fn = MF.getFunction(); 1288 const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv()); 1289 return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone); 1290 } 1291 1292 /// Return true if we need to use the restricted Windows x64 prologue and 1293 /// epilogue code patterns that can be described with WinCFI (.seh_* 1294 /// directives). 1295 bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const { 1296 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 1297 } 1298 1299 bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { 1300 return !isWin64Prologue(MF) && MF.needsFrameMoves(); 1301 } 1302 1303 /// emitPrologue - Push callee-saved registers onto the stack, which 1304 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate 1305 /// space for local variables. Also emit labels used by the exception handler to 1306 /// generate the exception handling frames. 1307 1308 /* 1309 Here's a gist of what gets emitted: 1310 1311 ; Establish frame pointer, if needed 1312 [if needs FP] 1313 push %rbp 1314 .cfi_def_cfa_offset 16 1315 .cfi_offset %rbp, -16 1316 .seh_pushreg %rpb 1317 mov %rsp, %rbp 1318 .cfi_def_cfa_register %rbp 1319 1320 ; Spill general-purpose registers 1321 [for all callee-saved GPRs] 1322 pushq %<reg> 1323 [if not needs FP] 1324 .cfi_def_cfa_offset (offset from RETADDR) 1325 .seh_pushreg %<reg> 1326 1327 ; If the required stack alignment > default stack alignment 1328 ; rsp needs to be re-aligned. This creates a "re-alignment gap" 1329 ; of unknown size in the stack frame. 1330 [if stack needs re-alignment] 1331 and $MASK, %rsp 1332 1333 ; Allocate space for locals 1334 [if target is Windows and allocated space > 4096 bytes] 1335 ; Windows needs special care for allocations larger 1336 ; than one page. 1337 mov $NNN, %rax 1338 call ___chkstk_ms/___chkstk 1339 sub %rax, %rsp 1340 [else] 1341 sub $NNN, %rsp 1342 1343 [if needs FP] 1344 .seh_stackalloc (size of XMM spill slots) 1345 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots 1346 [else] 1347 .seh_stackalloc NNN 1348 1349 ; Spill XMMs 1350 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, 1351 ; they may get spilled on any platform, if the current function 1352 ; calls @llvm.eh.unwind.init 1353 [if needs FP] 1354 [for all callee-saved XMM registers] 1355 movaps %<xmm reg>, -MMM(%rbp) 1356 [for all callee-saved XMM registers] 1357 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) 1358 ; i.e. the offset relative to (%rbp - SEHFrameOffset) 1359 [else] 1360 [for all callee-saved XMM registers] 1361 movaps %<xmm reg>, KKK(%rsp) 1362 [for all callee-saved XMM registers] 1363 .seh_savexmm %<xmm reg>, KKK 1364 1365 .seh_endprologue 1366 1367 [if needs base pointer] 1368 mov %rsp, %rbx 1369 [if needs to restore base pointer] 1370 mov %rsp, -MMM(%rbp) 1371 1372 ; Emit CFI info 1373 [if needs FP] 1374 [for all callee-saved registers] 1375 .cfi_offset %<reg>, (offset from %rbp) 1376 [else] 1377 .cfi_def_cfa_offset (offset from RETADDR) 1378 [for all callee-saved registers] 1379 .cfi_offset %<reg>, (offset from %rsp) 1380 1381 Notes: 1382 - .seh directives are emitted only for Windows 64 ABI 1383 - .cv_fpo directives are emitted on win32 when emitting CodeView 1384 - .cfi directives are emitted for all other ABIs 1385 - for 32-bit code, substitute %e?? registers for %r?? 1386 */ 1387 1388 void X86FrameLowering::emitPrologue(MachineFunction &MF, 1389 MachineBasicBlock &MBB) const { 1390 assert(&STI == &MF.getSubtarget<X86Subtarget>() && 1391 "MF used frame lowering for wrong subtarget"); 1392 MachineBasicBlock::iterator MBBI = MBB.begin(); 1393 MachineFrameInfo &MFI = MF.getFrameInfo(); 1394 const Function &Fn = MF.getFunction(); 1395 MachineModuleInfo &MMI = MF.getMMI(); 1396 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 1397 uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. 1398 uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate. 1399 bool IsFunclet = MBB.isEHFuncletEntry(); 1400 EHPersonality Personality = EHPersonality::Unknown; 1401 if (Fn.hasPersonalityFn()) 1402 Personality = classifyEHPersonality(Fn.getPersonalityFn()); 1403 bool FnHasClrFunclet = 1404 MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; 1405 bool IsClrFunclet = IsFunclet && FnHasClrFunclet; 1406 bool HasFP = hasFP(MF); 1407 bool IsWin64Prologue = isWin64Prologue(MF); 1408 bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); 1409 // FIXME: Emit FPO data for EH funclets. 1410 bool NeedsWinFPO = 1411 !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag(); 1412 bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; 1413 bool NeedsDwarfCFI = needsDwarfCFI(MF); 1414 Register FramePtr = TRI->getFrameRegister(MF); 1415 const Register MachineFramePtr = 1416 STI.isTarget64BitILP32() 1417 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; 1418 Register BasePtr = TRI->getBaseRegister(); 1419 bool HasWinCFI = false; 1420 1421 // Debug location must be unknown since the first debug location is used 1422 // to determine the end of the prologue. 1423 DebugLoc DL; 1424 1425 // Space reserved for stack-based arguments when making a (ABI-guaranteed) 1426 // tail call. 1427 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta(); 1428 if (TailCallArgReserveSize && IsWin64Prologue) 1429 report_fatal_error("Can't handle guaranteed tail call under win64 yet"); 1430 1431 const bool EmitStackProbeCall = 1432 STI.getTargetLowering()->hasStackProbeSymbol(MF); 1433 unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); 1434 1435 if (HasFP && X86FI->hasSwiftAsyncContext()) { 1436 switch (MF.getTarget().Options.SwiftAsyncFramePointer) { 1437 case SwiftAsyncFramePointerMode::DeploymentBased: 1438 if (STI.swiftAsyncContextIsDynamicallySet()) { 1439 // The special symbol below is absolute and has a *value* suitable to be 1440 // combined with the frame pointer directly. 1441 BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr) 1442 .addUse(MachineFramePtr) 1443 .addUse(X86::RIP) 1444 .addImm(1) 1445 .addUse(X86::NoRegister) 1446 .addExternalSymbol("swift_async_extendedFramePointerFlags", 1447 X86II::MO_GOTPCREL) 1448 .addUse(X86::NoRegister); 1449 break; 1450 } 1451 LLVM_FALLTHROUGH; 1452 1453 case SwiftAsyncFramePointerMode::Always: 1454 BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr) 1455 .addUse(MachineFramePtr) 1456 .addImm(60) 1457 .setMIFlag(MachineInstr::FrameSetup); 1458 break; 1459 1460 case SwiftAsyncFramePointerMode::Never: 1461 break; 1462 } 1463 } 1464 1465 // Re-align the stack on 64-bit if the x86-interrupt calling convention is 1466 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte 1467 // stack alignment. 1468 if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit && 1469 Fn.arg_size() == 2) { 1470 StackSize += 8; 1471 MFI.setStackSize(StackSize); 1472 emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false); 1473 } 1474 1475 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf 1476 // function, and use up to 128 bytes of stack space, don't have a frame 1477 // pointer, calls, or dynamic alloca then we do not need to adjust the 1478 // stack pointer (we fit in the Red Zone). We also check that we don't 1479 // push and pop from the stack. 1480 if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) && 1481 !MFI.hasVarSizedObjects() && // No dynamic alloca. 1482 !MFI.adjustsStack() && // No calls. 1483 !EmitStackProbeCall && // No stack probes. 1484 !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop. 1485 !MF.shouldSplitStack()) { // Regular stack 1486 uint64_t MinSize = 1487 X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta(); 1488 if (HasFP) MinSize += SlotSize; 1489 X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); 1490 StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); 1491 MFI.setStackSize(StackSize); 1492 } 1493 1494 // Insert stack pointer adjustment for later moving of return addr. Only 1495 // applies to tail call optimized functions where the callee argument stack 1496 // size is bigger than the callers. 1497 if (TailCallArgReserveSize != 0) { 1498 BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize, 1499 /*InEpilogue=*/false) 1500 .setMIFlag(MachineInstr::FrameSetup); 1501 } 1502 1503 // Mapping for machine moves: 1504 // 1505 // DST: VirtualFP AND 1506 // SRC: VirtualFP => DW_CFA_def_cfa_offset 1507 // ELSE => DW_CFA_def_cfa 1508 // 1509 // SRC: VirtualFP AND 1510 // DST: Register => DW_CFA_def_cfa_register 1511 // 1512 // ELSE 1513 // OFFSET < 0 => DW_CFA_offset_extended_sf 1514 // REG < 64 => DW_CFA_offset + Reg 1515 // ELSE => DW_CFA_offset_extended 1516 1517 uint64_t NumBytes = 0; 1518 int stackGrowth = -SlotSize; 1519 1520 // Find the funclet establisher parameter 1521 Register Establisher = X86::NoRegister; 1522 if (IsClrFunclet) 1523 Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX; 1524 else if (IsFunclet) 1525 Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX; 1526 1527 if (IsWin64Prologue && IsFunclet && !IsClrFunclet) { 1528 // Immediately spill establisher into the home slot. 1529 // The runtime cares about this. 1530 // MOV64mr %rdx, 16(%rsp) 1531 unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 1532 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16) 1533 .addReg(Establisher) 1534 .setMIFlag(MachineInstr::FrameSetup); 1535 MBB.addLiveIn(Establisher); 1536 } 1537 1538 if (HasFP) { 1539 assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved"); 1540 1541 // Calculate required stack adjustment. 1542 uint64_t FrameSize = StackSize - SlotSize; 1543 // If required, include space for extra hidden slot for stashing base pointer. 1544 if (X86FI->getRestoreBasePointer()) 1545 FrameSize += SlotSize; 1546 1547 NumBytes = FrameSize - 1548 (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); 1549 1550 // Callee-saved registers are pushed on stack before the stack is realigned. 1551 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) 1552 NumBytes = alignTo(NumBytes, MaxAlign); 1553 1554 // Save EBP/RBP into the appropriate stack slot. 1555 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) 1556 .addReg(MachineFramePtr, RegState::Kill) 1557 .setMIFlag(MachineInstr::FrameSetup); 1558 1559 if (NeedsDwarfCFI) { 1560 // Mark the place where EBP/RBP was saved. 1561 // Define the current CFA rule to use the provided offset. 1562 assert(StackSize); 1563 BuildCFI(MBB, MBBI, DL, 1564 MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth)); 1565 1566 // Change the rule for the FramePtr to be an "offset" rule. 1567 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); 1568 BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset( 1569 nullptr, DwarfFramePtr, 2 * stackGrowth)); 1570 } 1571 1572 if (NeedsWinCFI) { 1573 HasWinCFI = true; 1574 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) 1575 .addImm(FramePtr) 1576 .setMIFlag(MachineInstr::FrameSetup); 1577 } 1578 1579 if (!IsFunclet) { 1580 if (X86FI->hasSwiftAsyncContext()) { 1581 const auto &Attrs = MF.getFunction().getAttributes(); 1582 1583 // Before we update the live frame pointer we have to ensure there's a 1584 // valid (or null) asynchronous context in its slot just before FP in 1585 // the frame record, so store it now. 1586 if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) { 1587 // We have an initial context in r14, store it just before the frame 1588 // pointer. 1589 MBB.addLiveIn(X86::R14); 1590 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) 1591 .addReg(X86::R14) 1592 .setMIFlag(MachineInstr::FrameSetup); 1593 } else { 1594 // No initial context, store null so that there's no pointer that 1595 // could be misused. 1596 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8)) 1597 .addImm(0) 1598 .setMIFlag(MachineInstr::FrameSetup); 1599 } 1600 1601 if (NeedsWinCFI) { 1602 HasWinCFI = true; 1603 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) 1604 .addImm(X86::R14) 1605 .setMIFlag(MachineInstr::FrameSetup); 1606 } 1607 1608 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr) 1609 .addUse(X86::RSP) 1610 .addImm(1) 1611 .addUse(X86::NoRegister) 1612 .addImm(8) 1613 .addUse(X86::NoRegister) 1614 .setMIFlag(MachineInstr::FrameSetup); 1615 BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP) 1616 .addUse(X86::RSP) 1617 .addImm(8) 1618 .setMIFlag(MachineInstr::FrameSetup); 1619 } 1620 1621 if (!IsWin64Prologue && !IsFunclet) { 1622 // Update EBP with the new base value. 1623 if (!X86FI->hasSwiftAsyncContext()) 1624 BuildMI(MBB, MBBI, DL, 1625 TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), 1626 FramePtr) 1627 .addReg(StackPtr) 1628 .setMIFlag(MachineInstr::FrameSetup); 1629 1630 if (NeedsDwarfCFI) { 1631 // Mark effective beginning of when frame pointer becomes valid. 1632 // Define the current CFA to use the EBP/RBP register. 1633 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); 1634 BuildCFI( 1635 MBB, MBBI, DL, 1636 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); 1637 } 1638 1639 if (NeedsWinFPO) { 1640 // .cv_fpo_setframe $FramePtr 1641 HasWinCFI = true; 1642 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) 1643 .addImm(FramePtr) 1644 .addImm(0) 1645 .setMIFlag(MachineInstr::FrameSetup); 1646 } 1647 } 1648 } 1649 } else { 1650 assert(!IsFunclet && "funclets without FPs not yet implemented"); 1651 NumBytes = StackSize - 1652 (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize); 1653 } 1654 1655 // Update the offset adjustment, which is mainly used by codeview to translate 1656 // from ESP to VFRAME relative local variable offsets. 1657 if (!IsFunclet) { 1658 if (HasFP && TRI->hasStackRealignment(MF)) 1659 MFI.setOffsetAdjustment(-NumBytes); 1660 else 1661 MFI.setOffsetAdjustment(-StackSize); 1662 } 1663 1664 // For EH funclets, only allocate enough space for outgoing calls. Save the 1665 // NumBytes value that we would've used for the parent frame. 1666 unsigned ParentFrameNumBytes = NumBytes; 1667 if (IsFunclet) 1668 NumBytes = getWinEHFuncletFrameSize(MF); 1669 1670 // Skip the callee-saved push instructions. 1671 bool PushedRegs = false; 1672 int StackOffset = 2 * stackGrowth; 1673 1674 while (MBBI != MBB.end() && 1675 MBBI->getFlag(MachineInstr::FrameSetup) && 1676 (MBBI->getOpcode() == X86::PUSH32r || 1677 MBBI->getOpcode() == X86::PUSH64r)) { 1678 PushedRegs = true; 1679 Register Reg = MBBI->getOperand(0).getReg(); 1680 ++MBBI; 1681 1682 if (!HasFP && NeedsDwarfCFI) { 1683 // Mark callee-saved push instruction. 1684 // Define the current CFA rule to use the provided offset. 1685 assert(StackSize); 1686 BuildCFI(MBB, MBBI, DL, 1687 MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset)); 1688 StackOffset += stackGrowth; 1689 } 1690 1691 if (NeedsWinCFI) { 1692 HasWinCFI = true; 1693 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) 1694 .addImm(Reg) 1695 .setMIFlag(MachineInstr::FrameSetup); 1696 } 1697 } 1698 1699 // Realign stack after we pushed callee-saved registers (so that we'll be 1700 // able to calculate their offsets from the frame pointer). 1701 // Don't do this for Win64, it needs to realign the stack after the prologue. 1702 if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) { 1703 assert(HasFP && "There should be a frame pointer if stack is realigned."); 1704 BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); 1705 1706 if (NeedsWinCFI) { 1707 HasWinCFI = true; 1708 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign)) 1709 .addImm(MaxAlign) 1710 .setMIFlag(MachineInstr::FrameSetup); 1711 } 1712 } 1713 1714 // If there is an SUB32ri of ESP immediately before this instruction, merge 1715 // the two. This can be the case when tail call elimination is enabled and 1716 // the callee has more arguments then the caller. 1717 NumBytes -= mergeSPUpdates(MBB, MBBI, true); 1718 1719 // Adjust stack pointer: ESP -= numbytes. 1720 1721 // Windows and cygwin/mingw require a prologue helper routine when allocating 1722 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw 1723 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the 1724 // stack and adjust the stack pointer in one go. The 64-bit version of 1725 // __chkstk is only responsible for probing the stack. The 64-bit prologue is 1726 // responsible for adjusting the stack pointer. Touching the stack at 4K 1727 // increments is necessary to ensure that the guard pages used by the OS 1728 // virtual memory manager are allocated in correct sequence. 1729 uint64_t AlignedNumBytes = NumBytes; 1730 if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) 1731 AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign); 1732 if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) { 1733 assert(!X86FI->getUsesRedZone() && 1734 "The Red Zone is not accounted for in stack probes"); 1735 1736 // Check whether EAX is livein for this block. 1737 bool isEAXAlive = isEAXLiveIn(MBB); 1738 1739 if (isEAXAlive) { 1740 if (Is64Bit) { 1741 // Save RAX 1742 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) 1743 .addReg(X86::RAX, RegState::Kill) 1744 .setMIFlag(MachineInstr::FrameSetup); 1745 } else { 1746 // Save EAX 1747 BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) 1748 .addReg(X86::EAX, RegState::Kill) 1749 .setMIFlag(MachineInstr::FrameSetup); 1750 } 1751 } 1752 1753 if (Is64Bit) { 1754 // Handle the 64-bit Windows ABI case where we need to call __chkstk. 1755 // Function prologue is responsible for adjusting the stack pointer. 1756 int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; 1757 BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX) 1758 .addImm(Alloc) 1759 .setMIFlag(MachineInstr::FrameSetup); 1760 } else { 1761 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. 1762 // We'll also use 4 already allocated bytes for EAX. 1763 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 1764 .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) 1765 .setMIFlag(MachineInstr::FrameSetup); 1766 } 1767 1768 // Call __chkstk, __chkstk_ms, or __alloca. 1769 emitStackProbe(MF, MBB, MBBI, DL, true); 1770 1771 if (isEAXAlive) { 1772 // Restore RAX/EAX 1773 MachineInstr *MI; 1774 if (Is64Bit) 1775 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX), 1776 StackPtr, false, NumBytes - 8); 1777 else 1778 MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), 1779 StackPtr, false, NumBytes - 4); 1780 MI->setFlag(MachineInstr::FrameSetup); 1781 MBB.insert(MBBI, MI); 1782 } 1783 } else if (NumBytes) { 1784 emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false); 1785 } 1786 1787 if (NeedsWinCFI && NumBytes) { 1788 HasWinCFI = true; 1789 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) 1790 .addImm(NumBytes) 1791 .setMIFlag(MachineInstr::FrameSetup); 1792 } 1793 1794 int SEHFrameOffset = 0; 1795 unsigned SPOrEstablisher; 1796 if (IsFunclet) { 1797 if (IsClrFunclet) { 1798 // The establisher parameter passed to a CLR funclet is actually a pointer 1799 // to the (mostly empty) frame of its nearest enclosing funclet; we have 1800 // to find the root function establisher frame by loading the PSPSym from 1801 // the intermediate frame. 1802 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); 1803 MachinePointerInfo NoInfo; 1804 MBB.addLiveIn(Establisher); 1805 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher), 1806 Establisher, false, PSPSlotOffset) 1807 .addMemOperand(MF.getMachineMemOperand( 1808 NoInfo, MachineMemOperand::MOLoad, SlotSize, Align(SlotSize))); 1809 ; 1810 // Save the root establisher back into the current funclet's (mostly 1811 // empty) frame, in case a sub-funclet or the GC needs it. 1812 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, 1813 false, PSPSlotOffset) 1814 .addReg(Establisher) 1815 .addMemOperand(MF.getMachineMemOperand( 1816 NoInfo, 1817 MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, 1818 SlotSize, Align(SlotSize))); 1819 } 1820 SPOrEstablisher = Establisher; 1821 } else { 1822 SPOrEstablisher = StackPtr; 1823 } 1824 1825 if (IsWin64Prologue && HasFP) { 1826 // Set RBP to a small fixed offset from RSP. In the funclet case, we base 1827 // this calculation on the incoming establisher, which holds the value of 1828 // RSP from the parent frame at the end of the prologue. 1829 SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes); 1830 if (SEHFrameOffset) 1831 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), 1832 SPOrEstablisher, false, SEHFrameOffset); 1833 else 1834 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr) 1835 .addReg(SPOrEstablisher); 1836 1837 // If this is not a funclet, emit the CFI describing our frame pointer. 1838 if (NeedsWinCFI && !IsFunclet) { 1839 assert(!NeedsWinFPO && "this setframe incompatible with FPO data"); 1840 HasWinCFI = true; 1841 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) 1842 .addImm(FramePtr) 1843 .addImm(SEHFrameOffset) 1844 .setMIFlag(MachineInstr::FrameSetup); 1845 if (isAsynchronousEHPersonality(Personality)) 1846 MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset; 1847 } 1848 } else if (IsFunclet && STI.is32Bit()) { 1849 // Reset EBP / ESI to something good for funclets. 1850 MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); 1851 // If we're a catch funclet, we can be returned to via catchret. Save ESP 1852 // into the registration node so that the runtime will restore it for us. 1853 if (!MBB.isCleanupFuncletEntry()) { 1854 assert(Personality == EHPersonality::MSVC_CXX); 1855 Register FrameReg; 1856 int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex; 1857 int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed(); 1858 // ESP is the first field, so no extra displacement is needed. 1859 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg, 1860 false, EHRegOffset) 1861 .addReg(X86::ESP); 1862 } 1863 } 1864 1865 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { 1866 const MachineInstr &FrameInstr = *MBBI; 1867 ++MBBI; 1868 1869 if (NeedsWinCFI) { 1870 int FI; 1871 if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { 1872 if (X86::FR64RegClass.contains(Reg)) { 1873 int Offset; 1874 Register IgnoredFrameReg; 1875 if (IsWin64Prologue && IsFunclet) 1876 Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg); 1877 else 1878 Offset = 1879 getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() + 1880 SEHFrameOffset; 1881 1882 HasWinCFI = true; 1883 assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data"); 1884 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) 1885 .addImm(Reg) 1886 .addImm(Offset) 1887 .setMIFlag(MachineInstr::FrameSetup); 1888 } 1889 } 1890 } 1891 } 1892 1893 if (NeedsWinCFI && HasWinCFI) 1894 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) 1895 .setMIFlag(MachineInstr::FrameSetup); 1896 1897 if (FnHasClrFunclet && !IsFunclet) { 1898 // Save the so-called Initial-SP (i.e. the value of the stack pointer 1899 // immediately after the prolog) into the PSPSlot so that funclets 1900 // and the GC can recover it. 1901 unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); 1902 auto PSPInfo = MachinePointerInfo::getFixedStack( 1903 MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx); 1904 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false, 1905 PSPSlotOffset) 1906 .addReg(StackPtr) 1907 .addMemOperand(MF.getMachineMemOperand( 1908 PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, 1909 SlotSize, Align(SlotSize))); 1910 } 1911 1912 // Realign stack after we spilled callee-saved registers (so that we'll be 1913 // able to calculate their offsets from the frame pointer). 1914 // Win64 requires aligning the stack after the prologue. 1915 if (IsWin64Prologue && TRI->hasStackRealignment(MF)) { 1916 assert(HasFP && "There should be a frame pointer if stack is realigned."); 1917 BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign); 1918 } 1919 1920 // We already dealt with stack realignment and funclets above. 1921 if (IsFunclet && STI.is32Bit()) 1922 return; 1923 1924 // If we need a base pointer, set it up here. It's whatever the value 1925 // of the stack pointer is at this point. Any variable size objects 1926 // will be allocated after this, so we can still use the base pointer 1927 // to reference locals. 1928 if (TRI->hasBasePointer(MF)) { 1929 // Update the base pointer with the current stack pointer. 1930 unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; 1931 BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) 1932 .addReg(SPOrEstablisher) 1933 .setMIFlag(MachineInstr::FrameSetup); 1934 if (X86FI->getRestoreBasePointer()) { 1935 // Stash value of base pointer. Saving RSP instead of EBP shortens 1936 // dependence chain. Used by SjLj EH. 1937 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 1938 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), 1939 FramePtr, true, X86FI->getRestoreBasePointerOffset()) 1940 .addReg(SPOrEstablisher) 1941 .setMIFlag(MachineInstr::FrameSetup); 1942 } 1943 1944 if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) { 1945 // Stash the value of the frame pointer relative to the base pointer for 1946 // Win32 EH. This supports Win32 EH, which does the inverse of the above: 1947 // it recovers the frame pointer from the base pointer rather than the 1948 // other way around. 1949 unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; 1950 Register UsedReg; 1951 int Offset = 1952 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg) 1953 .getFixed(); 1954 assert(UsedReg == BasePtr); 1955 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset) 1956 .addReg(FramePtr) 1957 .setMIFlag(MachineInstr::FrameSetup); 1958 } 1959 } 1960 1961 if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { 1962 // Mark end of stack pointer adjustment. 1963 if (!HasFP && NumBytes) { 1964 // Define the current CFA rule to use the provided offset. 1965 assert(StackSize); 1966 BuildCFI( 1967 MBB, MBBI, DL, 1968 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth)); 1969 } 1970 1971 // Emit DWARF info specifying the offsets of the callee-saved registers. 1972 emitCalleeSavedFrameMoves(MBB, MBBI, DL, true); 1973 } 1974 1975 // X86 Interrupt handling function cannot assume anything about the direction 1976 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction 1977 // in each prologue of interrupt handler function. 1978 // 1979 // FIXME: Create "cld" instruction only in these cases: 1980 // 1. The interrupt handling function uses any of the "rep" instructions. 1981 // 2. Interrupt handling function calls another function. 1982 // 1983 if (Fn.getCallingConv() == CallingConv::X86_INTR) 1984 BuildMI(MBB, MBBI, DL, TII.get(X86::CLD)) 1985 .setMIFlag(MachineInstr::FrameSetup); 1986 1987 // At this point we know if the function has WinCFI or not. 1988 MF.setHasWinCFI(HasWinCFI); 1989 } 1990 1991 bool X86FrameLowering::canUseLEAForSPInEpilogue( 1992 const MachineFunction &MF) const { 1993 // We can't use LEA instructions for adjusting the stack pointer if we don't 1994 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used 1995 // to deallocate the stack. 1996 // This means that we can use LEA for SP in two situations: 1997 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA. 1998 // 2. We *have* a frame pointer which means we are permitted to use LEA. 1999 return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); 2000 } 2001 2002 static bool isFuncletReturnInstr(MachineInstr &MI) { 2003 switch (MI.getOpcode()) { 2004 case X86::CATCHRET: 2005 case X86::CLEANUPRET: 2006 return true; 2007 default: 2008 return false; 2009 } 2010 llvm_unreachable("impossible"); 2011 } 2012 2013 // CLR funclets use a special "Previous Stack Pointer Symbol" slot on the 2014 // stack. It holds a pointer to the bottom of the root function frame. The 2015 // establisher frame pointer passed to a nested funclet may point to the 2016 // (mostly empty) frame of its parent funclet, but it will need to find 2017 // the frame of the root function to access locals. To facilitate this, 2018 // every funclet copies the pointer to the bottom of the root function 2019 // frame into a PSPSym slot in its own (mostly empty) stack frame. Using the 2020 // same offset for the PSPSym in the root function frame that's used in the 2021 // funclets' frames allows each funclet to dynamically accept any ancestor 2022 // frame as its establisher argument (the runtime doesn't guarantee the 2023 // immediate parent for some reason lost to history), and also allows the GC, 2024 // which uses the PSPSym for some bookkeeping, to find it in any funclet's 2025 // frame with only a single offset reported for the entire method. 2026 unsigned 2027 X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const { 2028 const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo(); 2029 Register SPReg; 2030 int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg, 2031 /*IgnoreSPUpdates*/ true) 2032 .getFixed(); 2033 assert(Offset >= 0 && SPReg == TRI->getStackRegister()); 2034 return static_cast<unsigned>(Offset); 2035 } 2036 2037 unsigned 2038 X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { 2039 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2040 // This is the size of the pushed CSRs. 2041 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 2042 // This is the size of callee saved XMMs. 2043 const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); 2044 unsigned XMMSize = WinEHXMMSlotInfo.size() * 2045 TRI->getSpillSize(X86::VR128RegClass); 2046 // This is the amount of stack a funclet needs to allocate. 2047 unsigned UsedSize; 2048 EHPersonality Personality = 2049 classifyEHPersonality(MF.getFunction().getPersonalityFn()); 2050 if (Personality == EHPersonality::CoreCLR) { 2051 // CLR funclets need to hold enough space to include the PSPSym, at the 2052 // same offset from the stack pointer (immediately after the prolog) as it 2053 // resides at in the main function. 2054 UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize; 2055 } else { 2056 // Other funclets just need enough stack for outgoing call arguments. 2057 UsedSize = MF.getFrameInfo().getMaxCallFrameSize(); 2058 } 2059 // RBP is not included in the callee saved register block. After pushing RBP, 2060 // everything is 16 byte aligned. Everything we allocate before an outgoing 2061 // call must also be 16 byte aligned. 2062 unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign()); 2063 // Subtract out the size of the callee saved registers. This is how much stack 2064 // each funclet will allocate. 2065 return FrameSizeMinusRBP + XMMSize - CSSize; 2066 } 2067 2068 static bool isTailCallOpcode(unsigned Opc) { 2069 return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi || 2070 Opc == X86::TCRETURNmi || 2071 Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNdi64 || 2072 Opc == X86::TCRETURNmi64; 2073 } 2074 2075 void X86FrameLowering::emitEpilogue(MachineFunction &MF, 2076 MachineBasicBlock &MBB) const { 2077 const MachineFrameInfo &MFI = MF.getFrameInfo(); 2078 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2079 MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator(); 2080 MachineBasicBlock::iterator MBBI = Terminator; 2081 DebugLoc DL; 2082 if (MBBI != MBB.end()) 2083 DL = MBBI->getDebugLoc(); 2084 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. 2085 const bool Is64BitILP32 = STI.isTarget64BitILP32(); 2086 Register FramePtr = TRI->getFrameRegister(MF); 2087 Register MachineFramePtr = 2088 Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; 2089 2090 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 2091 bool NeedsWin64CFI = 2092 IsWin64Prologue && MF.getFunction().needsUnwindTableEntry(); 2093 bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI); 2094 2095 // Get the number of bytes to allocate from the FrameInfo. 2096 uint64_t StackSize = MFI.getStackSize(); 2097 uint64_t MaxAlign = calculateMaxStackAlign(MF); 2098 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 2099 unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta(); 2100 bool HasFP = hasFP(MF); 2101 uint64_t NumBytes = 0; 2102 2103 bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() && 2104 !MF.getTarget().getTargetTriple().isOSWindows()) && 2105 MF.needsFrameMoves(); 2106 2107 if (IsFunclet) { 2108 assert(HasFP && "EH funclets without FP not yet implemented"); 2109 NumBytes = getWinEHFuncletFrameSize(MF); 2110 } else if (HasFP) { 2111 // Calculate required stack adjustment. 2112 uint64_t FrameSize = StackSize - SlotSize; 2113 NumBytes = FrameSize - CSSize - TailCallArgReserveSize; 2114 2115 // Callee-saved registers were pushed on stack before the stack was 2116 // realigned. 2117 if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) 2118 NumBytes = alignTo(FrameSize, MaxAlign); 2119 } else { 2120 NumBytes = StackSize - CSSize - TailCallArgReserveSize; 2121 } 2122 uint64_t SEHStackAllocAmt = NumBytes; 2123 2124 // AfterPop is the position to insert .cfi_restore. 2125 MachineBasicBlock::iterator AfterPop = MBBI; 2126 if (HasFP) { 2127 if (X86FI->hasSwiftAsyncContext()) { 2128 // Discard the context. 2129 int Offset = 16 + mergeSPUpdates(MBB, MBBI, true); 2130 emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true); 2131 } 2132 // Pop EBP. 2133 BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), 2134 MachineFramePtr) 2135 .setMIFlag(MachineInstr::FrameDestroy); 2136 2137 // We need to reset FP to its untagged state on return. Bit 60 is currently 2138 // used to show the presence of an extended frame. 2139 if (X86FI->hasSwiftAsyncContext()) { 2140 BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), 2141 MachineFramePtr) 2142 .addUse(MachineFramePtr) 2143 .addImm(60) 2144 .setMIFlag(MachineInstr::FrameDestroy); 2145 } 2146 2147 if (NeedsDwarfCFI) { 2148 unsigned DwarfStackPtr = 2149 TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); 2150 BuildCFI(MBB, MBBI, DL, 2151 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize)); 2152 if (!MBB.succ_empty() && !MBB.isReturnBlock()) { 2153 unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); 2154 BuildCFI(MBB, AfterPop, DL, 2155 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr)); 2156 --MBBI; 2157 --AfterPop; 2158 } 2159 --MBBI; 2160 } 2161 } 2162 2163 MachineBasicBlock::iterator FirstCSPop = MBBI; 2164 // Skip the callee-saved pop instructions. 2165 while (MBBI != MBB.begin()) { 2166 MachineBasicBlock::iterator PI = std::prev(MBBI); 2167 unsigned Opc = PI->getOpcode(); 2168 2169 if (Opc != X86::DBG_VALUE && !PI->isTerminator()) { 2170 if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && 2171 (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && 2172 (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) && 2173 (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy))) 2174 break; 2175 FirstCSPop = PI; 2176 } 2177 2178 --MBBI; 2179 } 2180 MBBI = FirstCSPop; 2181 2182 if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) 2183 emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator); 2184 2185 if (MBBI != MBB.end()) 2186 DL = MBBI->getDebugLoc(); 2187 // If there is an ADD32ri or SUB32ri of ESP immediately before this 2188 // instruction, merge the two instructions. 2189 if (NumBytes || MFI.hasVarSizedObjects()) 2190 NumBytes += mergeSPUpdates(MBB, MBBI, true); 2191 2192 // If dynamic alloca is used, then reset esp to point to the last callee-saved 2193 // slot before popping them off! Same applies for the case, when stack was 2194 // realigned. Don't do this if this was a funclet epilogue, since the funclets 2195 // will not do realignment or dynamic stack allocation. 2196 if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) && 2197 !IsFunclet) { 2198 if (TRI->hasStackRealignment(MF)) 2199 MBBI = FirstCSPop; 2200 unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); 2201 uint64_t LEAAmount = 2202 IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; 2203 2204 if (X86FI->hasSwiftAsyncContext()) 2205 LEAAmount -= 16; 2206 2207 // There are only two legal forms of epilogue: 2208 // - add SEHAllocationSize, %rsp 2209 // - lea SEHAllocationSize(%FramePtr), %rsp 2210 // 2211 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence. 2212 // However, we may use this sequence if we have a frame pointer because the 2213 // effects of the prologue can safely be undone. 2214 if (LEAAmount != 0) { 2215 unsigned Opc = getLEArOpcode(Uses64BitFramePtr); 2216 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), 2217 FramePtr, false, LEAAmount); 2218 --MBBI; 2219 } else { 2220 unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); 2221 BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) 2222 .addReg(FramePtr); 2223 --MBBI; 2224 } 2225 } else if (NumBytes) { 2226 // Adjust stack pointer back: ESP += numbytes. 2227 emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true); 2228 if (!HasFP && NeedsDwarfCFI) { 2229 // Define the current CFA rule to use the provided offset. 2230 BuildCFI(MBB, MBBI, DL, 2231 MCCFIInstruction::cfiDefCfaOffset( 2232 nullptr, CSSize + TailCallArgReserveSize + SlotSize)); 2233 } 2234 --MBBI; 2235 } 2236 2237 // Windows unwinder will not invoke function's exception handler if IP is 2238 // either in prologue or in epilogue. This behavior causes a problem when a 2239 // call immediately precedes an epilogue, because the return address points 2240 // into the epilogue. To cope with that, we insert an epilogue marker here, 2241 // then replace it with a 'nop' if it ends up immediately after a CALL in the 2242 // final emitted code. 2243 if (NeedsWin64CFI && MF.hasWinCFI()) 2244 BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); 2245 2246 if (!HasFP && NeedsDwarfCFI) { 2247 MBBI = FirstCSPop; 2248 int64_t Offset = -CSSize - SlotSize; 2249 // Mark callee-saved pop instruction. 2250 // Define the current CFA rule to use the provided offset. 2251 while (MBBI != MBB.end()) { 2252 MachineBasicBlock::iterator PI = MBBI; 2253 unsigned Opc = PI->getOpcode(); 2254 ++MBBI; 2255 if (Opc == X86::POP32r || Opc == X86::POP64r) { 2256 Offset += SlotSize; 2257 BuildCFI(MBB, MBBI, DL, 2258 MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset)); 2259 } 2260 } 2261 } 2262 2263 // Emit DWARF info specifying the restores of the callee-saved registers. 2264 // For epilogue with return inside or being other block without successor, 2265 // no need to generate .cfi_restore for callee-saved registers. 2266 if (NeedsDwarfCFI && !MBB.succ_empty()) 2267 emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false); 2268 2269 if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) { 2270 // Add the return addr area delta back since we are not tail calling. 2271 int Offset = -1 * X86FI->getTCReturnAddrDelta(); 2272 assert(Offset >= 0 && "TCDelta should never be positive"); 2273 if (Offset) { 2274 // Check for possible merge with preceding ADD instruction. 2275 Offset += mergeSPUpdates(MBB, Terminator, true); 2276 emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true); 2277 } 2278 } 2279 2280 // Emit tilerelease for AMX kernel. 2281 if (X86FI->hasVirtualTileReg()) 2282 BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE)); 2283 } 2284 2285 StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, 2286 int FI, 2287 Register &FrameReg) const { 2288 const MachineFrameInfo &MFI = MF.getFrameInfo(); 2289 2290 bool IsFixed = MFI.isFixedObjectIndex(FI); 2291 // We can't calculate offset from frame pointer if the stack is realigned, 2292 // so enforce usage of stack/base pointer. The base pointer is used when we 2293 // have dynamic allocas in addition to dynamic realignment. 2294 if (TRI->hasBasePointer(MF)) 2295 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister(); 2296 else if (TRI->hasStackRealignment(MF)) 2297 FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister(); 2298 else 2299 FrameReg = TRI->getFrameRegister(MF); 2300 2301 // Offset will hold the offset from the stack pointer at function entry to the 2302 // object. 2303 // We need to factor in additional offsets applied during the prologue to the 2304 // frame, base, and stack pointer depending on which is used. 2305 int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); 2306 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2307 unsigned CSSize = X86FI->getCalleeSavedFrameSize(); 2308 uint64_t StackSize = MFI.getStackSize(); 2309 bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 2310 int64_t FPDelta = 0; 2311 2312 // In an x86 interrupt, remove the offset we added to account for the return 2313 // address from any stack object allocated in the caller's frame. Interrupts 2314 // do not have a standard return address. Fixed objects in the current frame, 2315 // such as SSE register spills, should not get this treatment. 2316 if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR && 2317 Offset >= 0) { 2318 Offset += getOffsetOfLocalArea(); 2319 } 2320 2321 if (IsWin64Prologue) { 2322 assert(!MFI.hasCalls() || (StackSize % 16) == 8); 2323 2324 // Calculate required stack adjustment. 2325 uint64_t FrameSize = StackSize - SlotSize; 2326 // If required, include space for extra hidden slot for stashing base pointer. 2327 if (X86FI->getRestoreBasePointer()) 2328 FrameSize += SlotSize; 2329 uint64_t NumBytes = FrameSize - CSSize; 2330 2331 uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes); 2332 if (FI && FI == X86FI->getFAIndex()) 2333 return StackOffset::getFixed(-SEHFrameOffset); 2334 2335 // FPDelta is the offset from the "traditional" FP location of the old base 2336 // pointer followed by return address and the location required by the 2337 // restricted Win64 prologue. 2338 // Add FPDelta to all offsets below that go through the frame pointer. 2339 FPDelta = FrameSize - SEHFrameOffset; 2340 assert((!MFI.hasCalls() || (FPDelta % 16) == 0) && 2341 "FPDelta isn't aligned per the Win64 ABI!"); 2342 } 2343 2344 if (FrameReg == TRI->getFramePtr()) { 2345 // Skip saved EBP/RBP 2346 Offset += SlotSize; 2347 2348 // Account for restricted Windows prologue. 2349 Offset += FPDelta; 2350 2351 // Skip the RETADDR move area 2352 int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 2353 if (TailCallReturnAddrDelta < 0) 2354 Offset -= TailCallReturnAddrDelta; 2355 2356 return StackOffset::getFixed(Offset); 2357 } 2358 2359 // FrameReg is either the stack pointer or a base pointer. But the base is 2360 // located at the end of the statically known StackSize so the distinction 2361 // doesn't really matter. 2362 if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF)) 2363 assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize))); 2364 return StackOffset::getFixed(Offset + StackSize); 2365 } 2366 2367 int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, 2368 Register &FrameReg) const { 2369 const MachineFrameInfo &MFI = MF.getFrameInfo(); 2370 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2371 const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); 2372 const auto it = WinEHXMMSlotInfo.find(FI); 2373 2374 if (it == WinEHXMMSlotInfo.end()) 2375 return getFrameIndexReference(MF, FI, FrameReg).getFixed(); 2376 2377 FrameReg = TRI->getStackRegister(); 2378 return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) + 2379 it->second; 2380 } 2381 2382 StackOffset 2383 X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI, 2384 Register &FrameReg, 2385 int Adjustment) const { 2386 const MachineFrameInfo &MFI = MF.getFrameInfo(); 2387 FrameReg = TRI->getStackRegister(); 2388 return StackOffset::getFixed(MFI.getObjectOffset(FI) - 2389 getOffsetOfLocalArea() + Adjustment); 2390 } 2391 2392 StackOffset 2393 X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, 2394 int FI, Register &FrameReg, 2395 bool IgnoreSPUpdates) const { 2396 2397 const MachineFrameInfo &MFI = MF.getFrameInfo(); 2398 // Does not include any dynamic realign. 2399 const uint64_t StackSize = MFI.getStackSize(); 2400 // LLVM arranges the stack as follows: 2401 // ... 2402 // ARG2 2403 // ARG1 2404 // RETADDR 2405 // PUSH RBP <-- RBP points here 2406 // PUSH CSRs 2407 // ~~~~~~~ <-- possible stack realignment (non-win64) 2408 // ... 2409 // STACK OBJECTS 2410 // ... <-- RSP after prologue points here 2411 // ~~~~~~~ <-- possible stack realignment (win64) 2412 // 2413 // if (hasVarSizedObjects()): 2414 // ... <-- "base pointer" (ESI/RBX) points here 2415 // DYNAMIC ALLOCAS 2416 // ... <-- RSP points here 2417 // 2418 // Case 1: In the simple case of no stack realignment and no dynamic 2419 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable 2420 // with fixed offsets from RSP. 2421 // 2422 // Case 2: In the case of stack realignment with no dynamic allocas, fixed 2423 // stack objects are addressed with RBP and regular stack objects with RSP. 2424 // 2425 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used 2426 // to address stack arguments for outgoing calls and nothing else. The "base 2427 // pointer" points to local variables, and RBP points to fixed objects. 2428 // 2429 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the 2430 // answer we give is relative to the SP after the prologue, and not the 2431 // SP in the middle of the function. 2432 2433 if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) && 2434 !STI.isTargetWin64()) 2435 return getFrameIndexReference(MF, FI, FrameReg); 2436 2437 // If !hasReservedCallFrame the function might have SP adjustement in the 2438 // body. So, even though the offset is statically known, it depends on where 2439 // we are in the function. 2440 if (!IgnoreSPUpdates && !hasReservedCallFrame(MF)) 2441 return getFrameIndexReference(MF, FI, FrameReg); 2442 2443 // We don't handle tail calls, and shouldn't be seeing them either. 2444 assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 && 2445 "we don't handle this case!"); 2446 2447 // This is how the math works out: 2448 // 2449 // %rsp grows (i.e. gets lower) left to right. Each box below is 2450 // one word (eight bytes). Obj0 is the stack slot we're trying to 2451 // get to. 2452 // 2453 // ---------------------------------- 2454 // | BP | Obj0 | Obj1 | ... | ObjN | 2455 // ---------------------------------- 2456 // ^ ^ ^ ^ 2457 // A B C E 2458 // 2459 // A is the incoming stack pointer. 2460 // (B - A) is the local area offset (-8 for x86-64) [1] 2461 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2] 2462 // 2463 // |(E - B)| is the StackSize (absolute value, positive). For a 2464 // stack that grown down, this works out to be (B - E). [3] 2465 // 2466 // E is also the value of %rsp after stack has been set up, and we 2467 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now 2468 // (C - E) == (C - A) - (B - A) + (B - E) 2469 // { Using [1], [2] and [3] above } 2470 // == getObjectOffset - LocalAreaOffset + StackSize 2471 2472 return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize); 2473 } 2474 2475 bool X86FrameLowering::assignCalleeSavedSpillSlots( 2476 MachineFunction &MF, const TargetRegisterInfo *TRI, 2477 std::vector<CalleeSavedInfo> &CSI) const { 2478 MachineFrameInfo &MFI = MF.getFrameInfo(); 2479 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2480 2481 unsigned CalleeSavedFrameSize = 0; 2482 unsigned XMMCalleeSavedFrameSize = 0; 2483 auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); 2484 int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); 2485 2486 int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); 2487 2488 if (TailCallReturnAddrDelta < 0) { 2489 // create RETURNADDR area 2490 // arg 2491 // arg 2492 // RETADDR 2493 // { ... 2494 // RETADDR area 2495 // ... 2496 // } 2497 // [EBP] 2498 MFI.CreateFixedObject(-TailCallReturnAddrDelta, 2499 TailCallReturnAddrDelta - SlotSize, true); 2500 } 2501 2502 // Spill the BasePtr if it's used. 2503 if (this->TRI->hasBasePointer(MF)) { 2504 // Allocate a spill slot for EBP if we have a base pointer and EH funclets. 2505 if (MF.hasEHFunclets()) { 2506 int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize)); 2507 X86FI->setHasSEHFramePtrSave(true); 2508 X86FI->setSEHFramePtrSaveIndex(FI); 2509 } 2510 } 2511 2512 if (hasFP(MF)) { 2513 // emitPrologue always spills frame register the first thing. 2514 SpillSlotOffset -= SlotSize; 2515 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 2516 2517 // The async context lives directly before the frame pointer, and we 2518 // allocate a second slot to preserve stack alignment. 2519 if (X86FI->hasSwiftAsyncContext()) { 2520 SpillSlotOffset -= SlotSize; 2521 MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 2522 SpillSlotOffset -= SlotSize; 2523 } 2524 2525 // Since emitPrologue and emitEpilogue will handle spilling and restoring of 2526 // the frame register, we can delete it from CSI list and not have to worry 2527 // about avoiding it later. 2528 Register FPReg = TRI->getFrameRegister(MF); 2529 for (unsigned i = 0; i < CSI.size(); ++i) { 2530 if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { 2531 CSI.erase(CSI.begin() + i); 2532 break; 2533 } 2534 } 2535 } 2536 2537 // Assign slots for GPRs. It increases frame size. 2538 for (CalleeSavedInfo &I : llvm::reverse(CSI)) { 2539 Register Reg = I.getReg(); 2540 2541 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 2542 continue; 2543 2544 SpillSlotOffset -= SlotSize; 2545 CalleeSavedFrameSize += SlotSize; 2546 2547 int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); 2548 I.setFrameIdx(SlotIndex); 2549 } 2550 2551 X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); 2552 MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize); 2553 2554 // Assign slots for XMMs. 2555 for (CalleeSavedInfo &I : llvm::reverse(CSI)) { 2556 Register Reg = I.getReg(); 2557 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 2558 continue; 2559 2560 // If this is k-register make sure we lookup via the largest legal type. 2561 MVT VT = MVT::Other; 2562 if (X86::VK16RegClass.contains(Reg)) 2563 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; 2564 2565 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); 2566 unsigned Size = TRI->getSpillSize(*RC); 2567 Align Alignment = TRI->getSpillAlign(*RC); 2568 // ensure alignment 2569 assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86"); 2570 SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment); 2571 2572 // spill into slot 2573 SpillSlotOffset -= Size; 2574 int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset); 2575 I.setFrameIdx(SlotIndex); 2576 MFI.ensureMaxAlignment(Alignment); 2577 2578 // Save the start offset and size of XMM in stack frame for funclets. 2579 if (X86::VR128RegClass.contains(Reg)) { 2580 WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize; 2581 XMMCalleeSavedFrameSize += Size; 2582 } 2583 } 2584 2585 return true; 2586 } 2587 2588 bool X86FrameLowering::spillCalleeSavedRegisters( 2589 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2590 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2591 DebugLoc DL = MBB.findDebugLoc(MI); 2592 2593 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI 2594 // for us, and there are no XMM CSRs on Win32. 2595 if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows()) 2596 return true; 2597 2598 // Push GPRs. It increases frame size. 2599 const MachineFunction &MF = *MBB.getParent(); 2600 unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; 2601 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) { 2602 Register Reg = I.getReg(); 2603 2604 if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) 2605 continue; 2606 2607 const MachineRegisterInfo &MRI = MF.getRegInfo(); 2608 bool isLiveIn = MRI.isLiveIn(Reg); 2609 if (!isLiveIn) 2610 MBB.addLiveIn(Reg); 2611 2612 // Decide whether we can add a kill flag to the use. 2613 bool CanKill = !isLiveIn; 2614 // Check if any subregister is live-in 2615 if (CanKill) { 2616 for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) { 2617 if (MRI.isLiveIn(*AReg)) { 2618 CanKill = false; 2619 break; 2620 } 2621 } 2622 } 2623 2624 // Do not set a kill flag on values that are also marked as live-in. This 2625 // happens with the @llvm-returnaddress intrinsic and with arguments 2626 // passed in callee saved registers. 2627 // Omitting the kill flags is conservatively correct even if the live-in 2628 // is not used after all. 2629 BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, getKillRegState(CanKill)) 2630 .setMIFlag(MachineInstr::FrameSetup); 2631 } 2632 2633 // Make XMM regs spilled. X86 does not have ability of push/pop XMM. 2634 // It can be done by spilling XMMs to stack frame. 2635 for (const CalleeSavedInfo &I : llvm::reverse(CSI)) { 2636 Register Reg = I.getReg(); 2637 if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) 2638 continue; 2639 2640 // If this is k-register make sure we lookup via the largest legal type. 2641 MVT VT = MVT::Other; 2642 if (X86::VK16RegClass.contains(Reg)) 2643 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; 2644 2645 // Add the callee-saved register as live-in. It's killed at the spill. 2646 MBB.addLiveIn(Reg); 2647 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); 2648 2649 TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI); 2650 --MI; 2651 MI->setFlag(MachineInstr::FrameSetup); 2652 ++MI; 2653 } 2654 2655 return true; 2656 } 2657 2658 void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB, 2659 MachineBasicBlock::iterator MBBI, 2660 MachineInstr *CatchRet) const { 2661 // SEH shouldn't use catchret. 2662 assert(!isAsynchronousEHPersonality(classifyEHPersonality( 2663 MBB.getParent()->getFunction().getPersonalityFn())) && 2664 "SEH should not use CATCHRET"); 2665 const DebugLoc &DL = CatchRet->getDebugLoc(); 2666 MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB(); 2667 2668 // Fill EAX/RAX with the address of the target block. 2669 if (STI.is64Bit()) { 2670 // LEA64r CatchRetTarget(%rip), %rax 2671 BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX) 2672 .addReg(X86::RIP) 2673 .addImm(0) 2674 .addReg(0) 2675 .addMBB(CatchRetTarget) 2676 .addReg(0); 2677 } else { 2678 // MOV32ri $CatchRetTarget, %eax 2679 BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) 2680 .addMBB(CatchRetTarget); 2681 } 2682 2683 // Record that we've taken the address of CatchRetTarget and no longer just 2684 // reference it in a terminator. 2685 CatchRetTarget->setHasAddressTaken(); 2686 } 2687 2688 bool X86FrameLowering::restoreCalleeSavedRegisters( 2689 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2690 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2691 if (CSI.empty()) 2692 return false; 2693 2694 if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) { 2695 // Don't restore CSRs in 32-bit EH funclets. Matches 2696 // spillCalleeSavedRegisters. 2697 if (STI.is32Bit()) 2698 return true; 2699 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form 2700 // funclets. emitEpilogue transforms these to normal jumps. 2701 if (MI->getOpcode() == X86::CATCHRET) { 2702 const Function &F = MBB.getParent()->getFunction(); 2703 bool IsSEH = isAsynchronousEHPersonality( 2704 classifyEHPersonality(F.getPersonalityFn())); 2705 if (IsSEH) 2706 return true; 2707 } 2708 } 2709 2710 DebugLoc DL = MBB.findDebugLoc(MI); 2711 2712 // Reload XMMs from stack frame. 2713 for (const CalleeSavedInfo &I : CSI) { 2714 Register Reg = I.getReg(); 2715 if (X86::GR64RegClass.contains(Reg) || 2716 X86::GR32RegClass.contains(Reg)) 2717 continue; 2718 2719 // If this is k-register make sure we lookup via the largest legal type. 2720 MVT VT = MVT::Other; 2721 if (X86::VK16RegClass.contains(Reg)) 2722 VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; 2723 2724 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); 2725 TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI); 2726 } 2727 2728 // POP GPRs. 2729 unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; 2730 for (const CalleeSavedInfo &I : CSI) { 2731 Register Reg = I.getReg(); 2732 if (!X86::GR64RegClass.contains(Reg) && 2733 !X86::GR32RegClass.contains(Reg)) 2734 continue; 2735 2736 BuildMI(MBB, MI, DL, TII.get(Opc), Reg) 2737 .setMIFlag(MachineInstr::FrameDestroy); 2738 } 2739 return true; 2740 } 2741 2742 void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, 2743 BitVector &SavedRegs, 2744 RegScavenger *RS) const { 2745 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 2746 2747 // Spill the BasePtr if it's used. 2748 if (TRI->hasBasePointer(MF)){ 2749 Register BasePtr = TRI->getBaseRegister(); 2750 if (STI.isTarget64BitILP32()) 2751 BasePtr = getX86SubSuperRegister(BasePtr, 64); 2752 SavedRegs.set(BasePtr); 2753 } 2754 } 2755 2756 static bool 2757 HasNestArgument(const MachineFunction *MF) { 2758 const Function &F = MF->getFunction(); 2759 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); 2760 I != E; I++) { 2761 if (I->hasNestAttr() && !I->use_empty()) 2762 return true; 2763 } 2764 return false; 2765 } 2766 2767 /// GetScratchRegister - Get a temp register for performing work in the 2768 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform 2769 /// and the properties of the function either one or two registers will be 2770 /// needed. Set primary to true for the first register, false for the second. 2771 static unsigned 2772 GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) { 2773 CallingConv::ID CallingConvention = MF.getFunction().getCallingConv(); 2774 2775 // Erlang stuff. 2776 if (CallingConvention == CallingConv::HiPE) { 2777 if (Is64Bit) 2778 return Primary ? X86::R14 : X86::R13; 2779 else 2780 return Primary ? X86::EBX : X86::EDI; 2781 } 2782 2783 if (Is64Bit) { 2784 if (IsLP64) 2785 return Primary ? X86::R11 : X86::R12; 2786 else 2787 return Primary ? X86::R11D : X86::R12D; 2788 } 2789 2790 bool IsNested = HasNestArgument(&MF); 2791 2792 if (CallingConvention == CallingConv::X86_FastCall || 2793 CallingConvention == CallingConv::Fast || 2794 CallingConvention == CallingConv::Tail) { 2795 if (IsNested) 2796 report_fatal_error("Segmented stacks does not support fastcall with " 2797 "nested function."); 2798 return Primary ? X86::EAX : X86::ECX; 2799 } 2800 if (IsNested) 2801 return Primary ? X86::EDX : X86::EAX; 2802 return Primary ? X86::ECX : X86::EAX; 2803 } 2804 2805 // The stack limit in the TCB is set to this many bytes above the actual stack 2806 // limit. 2807 static const uint64_t kSplitStackAvailable = 256; 2808 2809 void X86FrameLowering::adjustForSegmentedStacks( 2810 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { 2811 MachineFrameInfo &MFI = MF.getFrameInfo(); 2812 uint64_t StackSize; 2813 unsigned TlsReg, TlsOffset; 2814 DebugLoc DL; 2815 2816 // To support shrink-wrapping we would need to insert the new blocks 2817 // at the right place and update the branches to PrologueMBB. 2818 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet"); 2819 2820 unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 2821 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 2822 "Scratch register is live-in"); 2823 2824 if (MF.getFunction().isVarArg()) 2825 report_fatal_error("Segmented stacks do not support vararg functions."); 2826 if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && 2827 !STI.isTargetWin64() && !STI.isTargetFreeBSD() && 2828 !STI.isTargetDragonFly()) 2829 report_fatal_error("Segmented stacks not supported on this platform."); 2830 2831 // Eventually StackSize will be calculated by a link-time pass; which will 2832 // also decide whether checking code needs to be injected into this particular 2833 // prologue. 2834 StackSize = MFI.getStackSize(); 2835 2836 // Do not generate a prologue for leaf functions with a stack of size zero. 2837 // For non-leaf functions we have to allow for the possibility that the 2838 // callis to a non-split function, as in PR37807. This function could also 2839 // take the address of a non-split function. When the linker tries to adjust 2840 // its non-existent prologue, it would fail with an error. Mark the object 2841 // file so that such failures are not errors. See this Go language bug-report 2842 // https://go-review.googlesource.com/c/go/+/148819/ 2843 if (StackSize == 0 && !MFI.hasTailCall()) { 2844 MF.getMMI().setHasNosplitStack(true); 2845 return; 2846 } 2847 2848 MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); 2849 MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); 2850 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2851 bool IsNested = false; 2852 2853 // We need to know if the function has a nest argument only in 64 bit mode. 2854 if (Is64Bit) 2855 IsNested = HasNestArgument(&MF); 2856 2857 // The MOV R10, RAX needs to be in a different block, since the RET we emit in 2858 // allocMBB needs to be last (terminating) instruction. 2859 2860 for (const auto &LI : PrologueMBB.liveins()) { 2861 allocMBB->addLiveIn(LI); 2862 checkMBB->addLiveIn(LI); 2863 } 2864 2865 if (IsNested) 2866 allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D); 2867 2868 MF.push_front(allocMBB); 2869 MF.push_front(checkMBB); 2870 2871 // When the frame size is less than 256 we just compare the stack 2872 // boundary directly to the value of the stack pointer, per gcc. 2873 bool CompareStackPointer = StackSize < kSplitStackAvailable; 2874 2875 // Read the limit off the current stacklet off the stack_guard location. 2876 if (Is64Bit) { 2877 if (STI.isTargetLinux()) { 2878 TlsReg = X86::FS; 2879 TlsOffset = IsLP64 ? 0x70 : 0x40; 2880 } else if (STI.isTargetDarwin()) { 2881 TlsReg = X86::GS; 2882 TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. 2883 } else if (STI.isTargetWin64()) { 2884 TlsReg = X86::GS; 2885 TlsOffset = 0x28; // pvArbitrary, reserved for application use 2886 } else if (STI.isTargetFreeBSD()) { 2887 TlsReg = X86::FS; 2888 TlsOffset = 0x18; 2889 } else if (STI.isTargetDragonFly()) { 2890 TlsReg = X86::FS; 2891 TlsOffset = 0x20; // use tls_tcb.tcb_segstack 2892 } else { 2893 report_fatal_error("Segmented stacks not supported on this platform."); 2894 } 2895 2896 if (CompareStackPointer) 2897 ScratchReg = IsLP64 ? X86::RSP : X86::ESP; 2898 else 2899 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP) 2900 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 2901 2902 BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg) 2903 .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); 2904 } else { 2905 if (STI.isTargetLinux()) { 2906 TlsReg = X86::GS; 2907 TlsOffset = 0x30; 2908 } else if (STI.isTargetDarwin()) { 2909 TlsReg = X86::GS; 2910 TlsOffset = 0x48 + 90*4; 2911 } else if (STI.isTargetWin32()) { 2912 TlsReg = X86::FS; 2913 TlsOffset = 0x14; // pvArbitrary, reserved for application use 2914 } else if (STI.isTargetDragonFly()) { 2915 TlsReg = X86::FS; 2916 TlsOffset = 0x10; // use tls_tcb.tcb_segstack 2917 } else if (STI.isTargetFreeBSD()) { 2918 report_fatal_error("Segmented stacks not supported on FreeBSD i386."); 2919 } else { 2920 report_fatal_error("Segmented stacks not supported on this platform."); 2921 } 2922 2923 if (CompareStackPointer) 2924 ScratchReg = X86::ESP; 2925 else 2926 BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) 2927 .addImm(1).addReg(0).addImm(-StackSize).addReg(0); 2928 2929 if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || 2930 STI.isTargetDragonFly()) { 2931 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) 2932 .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); 2933 } else if (STI.isTargetDarwin()) { 2934 2935 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. 2936 unsigned ScratchReg2; 2937 bool SaveScratch2; 2938 if (CompareStackPointer) { 2939 // The primary scratch register is available for holding the TLS offset. 2940 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true); 2941 SaveScratch2 = false; 2942 } else { 2943 // Need to use a second register to hold the TLS offset 2944 ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false); 2945 2946 // Unfortunately, with fastcc the second scratch register may hold an 2947 // argument. 2948 SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); 2949 } 2950 2951 // If Scratch2 is live-in then it needs to be saved. 2952 assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && 2953 "Scratch register is live-in and not saved"); 2954 2955 if (SaveScratch2) 2956 BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) 2957 .addReg(ScratchReg2, RegState::Kill); 2958 2959 BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) 2960 .addImm(TlsOffset); 2961 BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) 2962 .addReg(ScratchReg) 2963 .addReg(ScratchReg2).addImm(1).addReg(0) 2964 .addImm(0) 2965 .addReg(TlsReg); 2966 2967 if (SaveScratch2) 2968 BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); 2969 } 2970 } 2971 2972 // This jump is taken if SP >= (Stacklet Limit + Stack Space required). 2973 // It jumps to normal execution of the function body. 2974 BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A); 2975 2976 // On 32 bit we first push the arguments size and then the frame size. On 64 2977 // bit, we pass the stack frame size in r10 and the argument size in r11. 2978 if (Is64Bit) { 2979 // Functions with nested arguments use R10, so it needs to be saved across 2980 // the call to _morestack 2981 2982 const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; 2983 const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; 2984 const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; 2985 const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; 2986 2987 if (IsNested) 2988 BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); 2989 2990 BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10) 2991 .addImm(StackSize); 2992 BuildMI(allocMBB, DL, 2993 TII.get(getMOVriOpcode(IsLP64, X86FI->getArgumentStackSize())), 2994 Reg11) 2995 .addImm(X86FI->getArgumentStackSize()); 2996 } else { 2997 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 2998 .addImm(X86FI->getArgumentStackSize()); 2999 BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) 3000 .addImm(StackSize); 3001 } 3002 3003 // __morestack is in libgcc 3004 if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { 3005 // Under the large code model, we cannot assume that __morestack lives 3006 // within 2^31 bytes of the call site, so we cannot use pc-relative 3007 // addressing. We cannot perform the call via a temporary register, 3008 // as the rax register may be used to store the static chain, and all 3009 // other suitable registers may be either callee-save or used for 3010 // parameter passing. We cannot use the stack at this point either 3011 // because __morestack manipulates the stack directly. 3012 // 3013 // To avoid these issues, perform an indirect call via a read-only memory 3014 // location containing the address. 3015 // 3016 // This solution is not perfect, as it assumes that the .rodata section 3017 // is laid out within 2^31 bytes of each function body, but this seems 3018 // to be sufficient for JIT. 3019 // FIXME: Add retpoline support and remove the error here.. 3020 if (STI.useIndirectThunkCalls()) 3021 report_fatal_error("Emitting morestack calls on 64-bit with the large " 3022 "code model and thunks not yet implemented."); 3023 BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) 3024 .addReg(X86::RIP) 3025 .addImm(0) 3026 .addReg(0) 3027 .addExternalSymbol("__morestack_addr") 3028 .addReg(0); 3029 MF.getMMI().setUsesMorestackAddr(true); 3030 } else { 3031 if (Is64Bit) 3032 BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) 3033 .addExternalSymbol("__morestack"); 3034 else 3035 BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) 3036 .addExternalSymbol("__morestack"); 3037 } 3038 3039 if (IsNested) 3040 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); 3041 else 3042 BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); 3043 3044 allocMBB->addSuccessor(&PrologueMBB); 3045 3046 checkMBB->addSuccessor(allocMBB, BranchProbability::getZero()); 3047 checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne()); 3048 3049 #ifdef EXPENSIVE_CHECKS 3050 MF.verify(); 3051 #endif 3052 } 3053 3054 /// Lookup an ERTS parameter in the !hipe.literals named metadata node. 3055 /// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets 3056 /// to fields it needs, through a named metadata node "hipe.literals" containing 3057 /// name-value pairs. 3058 static unsigned getHiPELiteral( 3059 NamedMDNode *HiPELiteralsMD, const StringRef LiteralName) { 3060 for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) { 3061 MDNode *Node = HiPELiteralsMD->getOperand(i); 3062 if (Node->getNumOperands() != 2) continue; 3063 MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0)); 3064 ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1)); 3065 if (!NodeName || !NodeVal) continue; 3066 ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue()); 3067 if (ValConst && NodeName->getString() == LiteralName) { 3068 return ValConst->getZExtValue(); 3069 } 3070 } 3071 3072 report_fatal_error("HiPE literal " + LiteralName 3073 + " required but not provided"); 3074 } 3075 3076 // Return true if there are no non-ehpad successors to MBB and there are no 3077 // non-meta instructions between MBBI and MBB.end(). 3078 static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, 3079 MachineBasicBlock::const_iterator MBBI) { 3080 return llvm::all_of( 3081 MBB.successors(), 3082 [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) && 3083 std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) { 3084 return MI.isMetaInstruction(); 3085 }); 3086 } 3087 3088 /// Erlang programs may need a special prologue to handle the stack size they 3089 /// might need at runtime. That is because Erlang/OTP does not implement a C 3090 /// stack but uses a custom implementation of hybrid stack/heap architecture. 3091 /// (for more information see Eric Stenman's Ph.D. thesis: 3092 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) 3093 /// 3094 /// CheckStack: 3095 /// temp0 = sp - MaxStack 3096 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 3097 /// OldStart: 3098 /// ... 3099 /// IncStack: 3100 /// call inc_stack # doubles the stack space 3101 /// temp0 = sp - MaxStack 3102 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart 3103 void X86FrameLowering::adjustForHiPEPrologue( 3104 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { 3105 MachineFrameInfo &MFI = MF.getFrameInfo(); 3106 DebugLoc DL; 3107 3108 // To support shrink-wrapping we would need to insert the new blocks 3109 // at the right place and update the branches to PrologueMBB. 3110 assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet"); 3111 3112 // HiPE-specific values 3113 NamedMDNode *HiPELiteralsMD = MF.getMMI().getModule() 3114 ->getNamedMetadata("hipe.literals"); 3115 if (!HiPELiteralsMD) 3116 report_fatal_error( 3117 "Can't generate HiPE prologue without runtime parameters"); 3118 const unsigned HipeLeafWords 3119 = getHiPELiteral(HiPELiteralsMD, 3120 Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS"); 3121 const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; 3122 const unsigned Guaranteed = HipeLeafWords * SlotSize; 3123 unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ? 3124 MF.getFunction().arg_size() - CCRegisteredArgs : 0; 3125 unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize; 3126 3127 assert(STI.isTargetLinux() && 3128 "HiPE prologue is only supported on Linux operating systems."); 3129 3130 // Compute the largest caller's frame that is needed to fit the callees' 3131 // frames. This 'MaxStack' is computed from: 3132 // 3133 // a) the fixed frame size, which is the space needed for all spilled temps, 3134 // b) outgoing on-stack parameter areas, and 3135 // c) the minimum stack space this function needs to make available for the 3136 // functions it calls (a tunable ABI property). 3137 if (MFI.hasCalls()) { 3138 unsigned MoreStackForCalls = 0; 3139 3140 for (auto &MBB : MF) { 3141 for (auto &MI : MBB) { 3142 if (!MI.isCall()) 3143 continue; 3144 3145 // Get callee operand. 3146 const MachineOperand &MO = MI.getOperand(0); 3147 3148 // Only take account of global function calls (no closures etc.). 3149 if (!MO.isGlobal()) 3150 continue; 3151 3152 const Function *F = dyn_cast<Function>(MO.getGlobal()); 3153 if (!F) 3154 continue; 3155 3156 // Do not update 'MaxStack' for primitive and built-in functions 3157 // (encoded with names either starting with "erlang."/"bif_" or not 3158 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an 3159 // "_", such as the BIF "suspend_0") as they are executed on another 3160 // stack. 3161 if (F->getName().contains("erlang.") || F->getName().contains("bif_") || 3162 F->getName().find_first_of("._") == StringRef::npos) 3163 continue; 3164 3165 unsigned CalleeStkArity = 3166 F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; 3167 if (HipeLeafWords - 1 > CalleeStkArity) 3168 MoreStackForCalls = std::max(MoreStackForCalls, 3169 (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); 3170 } 3171 } 3172 MaxStack += MoreStackForCalls; 3173 } 3174 3175 // If the stack frame needed is larger than the guaranteed then runtime checks 3176 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. 3177 if (MaxStack > Guaranteed) { 3178 MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); 3179 MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); 3180 3181 for (const auto &LI : PrologueMBB.liveins()) { 3182 stackCheckMBB->addLiveIn(LI); 3183 incStackMBB->addLiveIn(LI); 3184 } 3185 3186 MF.push_front(incStackMBB); 3187 MF.push_front(stackCheckMBB); 3188 3189 unsigned ScratchReg, SPReg, PReg, SPLimitOffset; 3190 unsigned LEAop, CMPop, CALLop; 3191 SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT"); 3192 if (Is64Bit) { 3193 SPReg = X86::RSP; 3194 PReg = X86::RBP; 3195 LEAop = X86::LEA64r; 3196 CMPop = X86::CMP64rm; 3197 CALLop = X86::CALL64pcrel32; 3198 } else { 3199 SPReg = X86::ESP; 3200 PReg = X86::EBP; 3201 LEAop = X86::LEA32r; 3202 CMPop = X86::CMP32rm; 3203 CALLop = X86::CALLpcrel32; 3204 } 3205 3206 ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); 3207 assert(!MF.getRegInfo().isLiveIn(ScratchReg) && 3208 "HiPE prologue scratch register is live-in"); 3209 3210 // Create new MBB for StackCheck: 3211 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), 3212 SPReg, false, -MaxStack); 3213 // SPLimitOffset is in a fixed heap location (pointed by BP). 3214 addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) 3215 .addReg(ScratchReg), PReg, false, SPLimitOffset); 3216 BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE); 3217 3218 // Create new MBB for IncStack: 3219 BuildMI(incStackMBB, DL, TII.get(CALLop)). 3220 addExternalSymbol("inc_stack_0"); 3221 addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), 3222 SPReg, false, -MaxStack); 3223 addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) 3224 .addReg(ScratchReg), PReg, false, SPLimitOffset); 3225 BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE); 3226 3227 stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100}); 3228 stackCheckMBB->addSuccessor(incStackMBB, {1, 100}); 3229 incStackMBB->addSuccessor(&PrologueMBB, {99, 100}); 3230 incStackMBB->addSuccessor(incStackMBB, {1, 100}); 3231 } 3232 #ifdef EXPENSIVE_CHECKS 3233 MF.verify(); 3234 #endif 3235 } 3236 3237 bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, 3238 MachineBasicBlock::iterator MBBI, 3239 const DebugLoc &DL, 3240 int Offset) const { 3241 if (Offset <= 0) 3242 return false; 3243 3244 if (Offset % SlotSize) 3245 return false; 3246 3247 int NumPops = Offset / SlotSize; 3248 // This is only worth it if we have at most 2 pops. 3249 if (NumPops != 1 && NumPops != 2) 3250 return false; 3251 3252 // Handle only the trivial case where the adjustment directly follows 3253 // a call. This is the most common one, anyway. 3254 if (MBBI == MBB.begin()) 3255 return false; 3256 MachineBasicBlock::iterator Prev = std::prev(MBBI); 3257 if (!Prev->isCall() || !Prev->getOperand(1).isRegMask()) 3258 return false; 3259 3260 unsigned Regs[2]; 3261 unsigned FoundRegs = 0; 3262 3263 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 3264 const MachineOperand &RegMask = Prev->getOperand(1); 3265 3266 auto &RegClass = 3267 Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass; 3268 // Try to find up to NumPops free registers. 3269 for (auto Candidate : RegClass) { 3270 // Poor man's liveness: 3271 // Since we're immediately after a call, any register that is clobbered 3272 // by the call and not defined by it can be considered dead. 3273 if (!RegMask.clobbersPhysReg(Candidate)) 3274 continue; 3275 3276 // Don't clobber reserved registers 3277 if (MRI.isReserved(Candidate)) 3278 continue; 3279 3280 bool IsDef = false; 3281 for (const MachineOperand &MO : Prev->implicit_operands()) { 3282 if (MO.isReg() && MO.isDef() && 3283 TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) { 3284 IsDef = true; 3285 break; 3286 } 3287 } 3288 3289 if (IsDef) 3290 continue; 3291 3292 Regs[FoundRegs++] = Candidate; 3293 if (FoundRegs == (unsigned)NumPops) 3294 break; 3295 } 3296 3297 if (FoundRegs == 0) 3298 return false; 3299 3300 // If we found only one free register, but need two, reuse the same one twice. 3301 while (FoundRegs < (unsigned)NumPops) 3302 Regs[FoundRegs++] = Regs[0]; 3303 3304 for (int i = 0; i < NumPops; ++i) 3305 BuildMI(MBB, MBBI, DL, 3306 TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]); 3307 3308 return true; 3309 } 3310 3311 MachineBasicBlock::iterator X86FrameLowering:: 3312 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 3313 MachineBasicBlock::iterator I) const { 3314 bool reserveCallFrame = hasReservedCallFrame(MF); 3315 unsigned Opcode = I->getOpcode(); 3316 bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); 3317 DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased. 3318 uint64_t Amount = TII.getFrameSize(*I); 3319 uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0; 3320 I = MBB.erase(I); 3321 auto InsertPos = skipDebugInstructionsForward(I, MBB.end()); 3322 3323 // Try to avoid emitting dead SP adjustments if the block end is unreachable, 3324 // typically because the function is marked noreturn (abort, throw, 3325 // assert_fail, etc). 3326 if (isDestroy && blockEndIsUnreachable(MBB, I)) 3327 return I; 3328 3329 if (!reserveCallFrame) { 3330 // If the stack pointer can be changed after prologue, turn the 3331 // adjcallstackup instruction into a 'sub ESP, <amt>' and the 3332 // adjcallstackdown instruction into 'add ESP, <amt>' 3333 3334 // We need to keep the stack aligned properly. To do this, we round the 3335 // amount of space needed for the outgoing arguments up to the next 3336 // alignment boundary. 3337 Amount = alignTo(Amount, getStackAlign()); 3338 3339 const Function &F = MF.getFunction(); 3340 bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); 3341 bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves(); 3342 3343 // If we have any exception handlers in this function, and we adjust 3344 // the SP before calls, we may need to indicate this to the unwinder 3345 // using GNU_ARGS_SIZE. Note that this may be necessary even when 3346 // Amount == 0, because the preceding function may have set a non-0 3347 // GNU_ARGS_SIZE. 3348 // TODO: We don't need to reset this between subsequent functions, 3349 // if it didn't change. 3350 bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty(); 3351 3352 if (HasDwarfEHHandlers && !isDestroy && 3353 MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()) 3354 BuildCFI(MBB, InsertPos, DL, 3355 MCCFIInstruction::createGnuArgsSize(nullptr, Amount)); 3356 3357 if (Amount == 0) 3358 return I; 3359 3360 // Factor out the amount that gets handled inside the sequence 3361 // (Pushes of argument for frame setup, callee pops for frame destroy) 3362 Amount -= InternalAmt; 3363 3364 // TODO: This is needed only if we require precise CFA. 3365 // If this is a callee-pop calling convention, emit a CFA adjust for 3366 // the amount the callee popped. 3367 if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) 3368 BuildCFI(MBB, InsertPos, DL, 3369 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); 3370 3371 // Add Amount to SP to destroy a frame, or subtract to setup. 3372 int64_t StackAdjustment = isDestroy ? Amount : -Amount; 3373 3374 if (StackAdjustment) { 3375 // Merge with any previous or following adjustment instruction. Note: the 3376 // instructions merged with here do not have CFI, so their stack 3377 // adjustments do not feed into CfaAdjustment. 3378 StackAdjustment += mergeSPUpdates(MBB, InsertPos, true); 3379 StackAdjustment += mergeSPUpdates(MBB, InsertPos, false); 3380 3381 if (StackAdjustment) { 3382 if (!(F.hasMinSize() && 3383 adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment))) 3384 BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment, 3385 /*InEpilogue=*/false); 3386 } 3387 } 3388 3389 if (DwarfCFI && !hasFP(MF)) { 3390 // If we don't have FP, but need to generate unwind information, 3391 // we need to set the correct CFA offset after the stack adjustment. 3392 // How much we adjust the CFA offset depends on whether we're emitting 3393 // CFI only for EH purposes or for debugging. EH only requires the CFA 3394 // offset to be correct at each call site, while for debugging we want 3395 // it to be more precise. 3396 3397 int64_t CfaAdjustment = -StackAdjustment; 3398 // TODO: When not using precise CFA, we also need to adjust for the 3399 // InternalAmt here. 3400 if (CfaAdjustment) { 3401 BuildCFI(MBB, InsertPos, DL, 3402 MCCFIInstruction::createAdjustCfaOffset(nullptr, 3403 CfaAdjustment)); 3404 } 3405 } 3406 3407 return I; 3408 } 3409 3410 if (InternalAmt) { 3411 MachineBasicBlock::iterator CI = I; 3412 MachineBasicBlock::iterator B = MBB.begin(); 3413 while (CI != B && !std::prev(CI)->isCall()) 3414 --CI; 3415 BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false); 3416 } 3417 3418 return I; 3419 } 3420 3421 bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 3422 assert(MBB.getParent() && "Block is not attached to a function!"); 3423 const MachineFunction &MF = *MBB.getParent(); 3424 if (!MBB.isLiveIn(X86::EFLAGS)) 3425 return true; 3426 3427 const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 3428 return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext(); 3429 } 3430 3431 bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 3432 assert(MBB.getParent() && "Block is not attached to a function!"); 3433 3434 // Win64 has strict requirements in terms of epilogue and we are 3435 // not taking a chance at messing with them. 3436 // I.e., unless this block is already an exit block, we can't use 3437 // it as an epilogue. 3438 if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock()) 3439 return false; 3440 3441 // Swift async context epilogue has a BTR instruction that clobbers parts of 3442 // EFLAGS. 3443 const MachineFunction &MF = *MBB.getParent(); 3444 if (MF.getInfo<X86MachineFunctionInfo>()->hasSwiftAsyncContext()) 3445 return !flagsNeedToBePreservedBeforeTheTerminators(MBB); 3446 3447 if (canUseLEAForSPInEpilogue(*MBB.getParent())) 3448 return true; 3449 3450 // If we cannot use LEA to adjust SP, we may need to use ADD, which 3451 // clobbers the EFLAGS. Check that we do not need to preserve it, 3452 // otherwise, conservatively assume this is not 3453 // safe to insert the epilogue here. 3454 return !flagsNeedToBePreservedBeforeTheTerminators(MBB); 3455 } 3456 3457 bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 3458 // If we may need to emit frameless compact unwind information, give 3459 // up as this is currently broken: PR25614. 3460 bool CompactUnwind = 3461 MF.getMMI().getContext().getObjectFileInfo()->getCompactUnwindSection() != 3462 nullptr; 3463 return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) || 3464 !CompactUnwind) && 3465 // The lowering of segmented stack and HiPE only support entry 3466 // blocks as prologue blocks: PR26107. This limitation may be 3467 // lifted if we fix: 3468 // - adjustForSegmentedStacks 3469 // - adjustForHiPEPrologue 3470 MF.getFunction().getCallingConv() != CallingConv::HiPE && 3471 !MF.shouldSplitStack(); 3472 } 3473 3474 MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( 3475 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 3476 const DebugLoc &DL, bool RestoreSP) const { 3477 assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env"); 3478 assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32"); 3479 assert(STI.is32Bit() && !Uses64BitFramePtr && 3480 "restoring EBP/ESI on non-32-bit target"); 3481 3482 MachineFunction &MF = *MBB.getParent(); 3483 Register FramePtr = TRI->getFrameRegister(MF); 3484 Register BasePtr = TRI->getBaseRegister(); 3485 WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo(); 3486 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 3487 MachineFrameInfo &MFI = MF.getFrameInfo(); 3488 3489 // FIXME: Don't set FrameSetup flag in catchret case. 3490 3491 int FI = FuncInfo.EHRegNodeFrameIndex; 3492 int EHRegSize = MFI.getObjectSize(FI); 3493 3494 if (RestoreSP) { 3495 // MOV32rm -EHRegSize(%ebp), %esp 3496 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP), 3497 X86::EBP, true, -EHRegSize) 3498 .setMIFlag(MachineInstr::FrameSetup); 3499 } 3500 3501 Register UsedReg; 3502 int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed(); 3503 int EndOffset = -EHRegOffset - EHRegSize; 3504 FuncInfo.EHRegNodeEndOffset = EndOffset; 3505 3506 if (UsedReg == FramePtr) { 3507 // ADD $offset, %ebp 3508 unsigned ADDri = getADDriOpcode(false, EndOffset); 3509 BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr) 3510 .addReg(FramePtr) 3511 .addImm(EndOffset) 3512 .setMIFlag(MachineInstr::FrameSetup) 3513 ->getOperand(3) 3514 .setIsDead(); 3515 assert(EndOffset >= 0 && 3516 "end of registration object above normal EBP position!"); 3517 } else if (UsedReg == BasePtr) { 3518 // LEA offset(%ebp), %esi 3519 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr), 3520 FramePtr, false, EndOffset) 3521 .setMIFlag(MachineInstr::FrameSetup); 3522 // MOV32rm SavedEBPOffset(%esi), %ebp 3523 assert(X86FI->getHasSEHFramePtrSave()); 3524 int Offset = 3525 getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg) 3526 .getFixed(); 3527 assert(UsedReg == BasePtr); 3528 addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr), 3529 UsedReg, true, Offset) 3530 .setMIFlag(MachineInstr::FrameSetup); 3531 } else { 3532 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr"); 3533 } 3534 return MBBI; 3535 } 3536 3537 int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { 3538 return TRI->getSlotSize(); 3539 } 3540 3541 Register 3542 X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const { 3543 return TRI->getDwarfRegNum(StackPtr, true); 3544 } 3545 3546 namespace { 3547 // Struct used by orderFrameObjects to help sort the stack objects. 3548 struct X86FrameSortingObject { 3549 bool IsValid = false; // true if we care about this Object. 3550 unsigned ObjectIndex = 0; // Index of Object into MFI list. 3551 unsigned ObjectSize = 0; // Size of Object in bytes. 3552 Align ObjectAlignment = Align(1); // Alignment of Object in bytes. 3553 unsigned ObjectNumUses = 0; // Object static number of uses. 3554 }; 3555 3556 // The comparison function we use for std::sort to order our local 3557 // stack symbols. The current algorithm is to use an estimated 3558 // "density". This takes into consideration the size and number of 3559 // uses each object has in order to roughly minimize code size. 3560 // So, for example, an object of size 16B that is referenced 5 times 3561 // will get higher priority than 4 4B objects referenced 1 time each. 3562 // It's not perfect and we may be able to squeeze a few more bytes out of 3563 // it (for example : 0(esp) requires fewer bytes, symbols allocated at the 3564 // fringe end can have special consideration, given their size is less 3565 // important, etc.), but the algorithmic complexity grows too much to be 3566 // worth the extra gains we get. This gets us pretty close. 3567 // The final order leaves us with objects with highest priority going 3568 // at the end of our list. 3569 struct X86FrameSortingComparator { 3570 inline bool operator()(const X86FrameSortingObject &A, 3571 const X86FrameSortingObject &B) const { 3572 uint64_t DensityAScaled, DensityBScaled; 3573 3574 // For consistency in our comparison, all invalid objects are placed 3575 // at the end. This also allows us to stop walking when we hit the 3576 // first invalid item after it's all sorted. 3577 if (!A.IsValid) 3578 return false; 3579 if (!B.IsValid) 3580 return true; 3581 3582 // The density is calculated by doing : 3583 // (double)DensityA = A.ObjectNumUses / A.ObjectSize 3584 // (double)DensityB = B.ObjectNumUses / B.ObjectSize 3585 // Since this approach may cause inconsistencies in 3586 // the floating point <, >, == comparisons, depending on the floating 3587 // point model with which the compiler was built, we're going 3588 // to scale both sides by multiplying with 3589 // A.ObjectSize * B.ObjectSize. This ends up factoring away 3590 // the division and, with it, the need for any floating point 3591 // arithmetic. 3592 DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) * 3593 static_cast<uint64_t>(B.ObjectSize); 3594 DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) * 3595 static_cast<uint64_t>(A.ObjectSize); 3596 3597 // If the two densities are equal, prioritize highest alignment 3598 // objects. This allows for similar alignment objects 3599 // to be packed together (given the same density). 3600 // There's room for improvement here, also, since we can pack 3601 // similar alignment (different density) objects next to each 3602 // other to save padding. This will also require further 3603 // complexity/iterations, and the overall gain isn't worth it, 3604 // in general. Something to keep in mind, though. 3605 if (DensityAScaled == DensityBScaled) 3606 return A.ObjectAlignment < B.ObjectAlignment; 3607 3608 return DensityAScaled < DensityBScaled; 3609 } 3610 }; 3611 } // namespace 3612 3613 // Order the symbols in the local stack. 3614 // We want to place the local stack objects in some sort of sensible order. 3615 // The heuristic we use is to try and pack them according to static number 3616 // of uses and size of object in order to minimize code size. 3617 void X86FrameLowering::orderFrameObjects( 3618 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { 3619 const MachineFrameInfo &MFI = MF.getFrameInfo(); 3620 3621 // Don't waste time if there's nothing to do. 3622 if (ObjectsToAllocate.empty()) 3623 return; 3624 3625 // Create an array of all MFI objects. We won't need all of these 3626 // objects, but we're going to create a full array of them to make 3627 // it easier to index into when we're counting "uses" down below. 3628 // We want to be able to easily/cheaply access an object by simply 3629 // indexing into it, instead of having to search for it every time. 3630 std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd()); 3631 3632 // Walk the objects we care about and mark them as such in our working 3633 // struct. 3634 for (auto &Obj : ObjectsToAllocate) { 3635 SortingObjects[Obj].IsValid = true; 3636 SortingObjects[Obj].ObjectIndex = Obj; 3637 SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj); 3638 // Set the size. 3639 int ObjectSize = MFI.getObjectSize(Obj); 3640 if (ObjectSize == 0) 3641 // Variable size. Just use 4. 3642 SortingObjects[Obj].ObjectSize = 4; 3643 else 3644 SortingObjects[Obj].ObjectSize = ObjectSize; 3645 } 3646 3647 // Count the number of uses for each object. 3648 for (auto &MBB : MF) { 3649 for (auto &MI : MBB) { 3650 if (MI.isDebugInstr()) 3651 continue; 3652 for (const MachineOperand &MO : MI.operands()) { 3653 // Check to see if it's a local stack symbol. 3654 if (!MO.isFI()) 3655 continue; 3656 int Index = MO.getIndex(); 3657 // Check to see if it falls within our range, and is tagged 3658 // to require ordering. 3659 if (Index >= 0 && Index < MFI.getObjectIndexEnd() && 3660 SortingObjects[Index].IsValid) 3661 SortingObjects[Index].ObjectNumUses++; 3662 } 3663 } 3664 } 3665 3666 // Sort the objects using X86FrameSortingAlgorithm (see its comment for 3667 // info). 3668 llvm::stable_sort(SortingObjects, X86FrameSortingComparator()); 3669 3670 // Now modify the original list to represent the final order that 3671 // we want. The order will depend on whether we're going to access them 3672 // from the stack pointer or the frame pointer. For SP, the list should 3673 // end up with the END containing objects that we want with smaller offsets. 3674 // For FP, it should be flipped. 3675 int i = 0; 3676 for (auto &Obj : SortingObjects) { 3677 // All invalid items are sorted at the end, so it's safe to stop. 3678 if (!Obj.IsValid) 3679 break; 3680 ObjectsToAllocate[i++] = Obj.ObjectIndex; 3681 } 3682 3683 // Flip it if we're accessing off of the FP. 3684 if (!TRI->hasStackRealignment(MF) && hasFP(MF)) 3685 std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end()); 3686 } 3687 3688 3689 unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const { 3690 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue. 3691 unsigned Offset = 16; 3692 // RBP is immediately pushed. 3693 Offset += SlotSize; 3694 // All callee-saved registers are then pushed. 3695 Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize(); 3696 // Every funclet allocates enough stack space for the largest outgoing call. 3697 Offset += getWinEHFuncletFrameSize(MF); 3698 return Offset; 3699 } 3700 3701 void X86FrameLowering::processFunctionBeforeFrameFinalized( 3702 MachineFunction &MF, RegScavenger *RS) const { 3703 // Mark the function as not having WinCFI. We will set it back to true in 3704 // emitPrologue if it gets called and emits CFI. 3705 MF.setHasWinCFI(false); 3706 3707 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte 3708 // aligned. The format doesn't support misaligned stack adjustments. 3709 if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) 3710 MF.getFrameInfo().ensureMaxAlignment(Align(SlotSize)); 3711 3712 // If this function isn't doing Win64-style C++ EH, we don't need to do 3713 // anything. 3714 if (STI.is64Bit() && MF.hasEHFunclets() && 3715 classifyEHPersonality(MF.getFunction().getPersonalityFn()) == 3716 EHPersonality::MSVC_CXX) { 3717 adjustFrameForMsvcCxxEh(MF); 3718 } 3719 } 3720 3721 void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const { 3722 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset 3723 // relative to RSP after the prologue. Find the offset of the last fixed 3724 // object, so that we can allocate a slot immediately following it. If there 3725 // were no fixed objects, use offset -SlotSize, which is immediately after the 3726 // return address. Fixed objects have negative frame indices. 3727 MachineFrameInfo &MFI = MF.getFrameInfo(); 3728 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); 3729 int64_t MinFixedObjOffset = -SlotSize; 3730 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) 3731 MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I)); 3732 3733 for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { 3734 for (WinEHHandlerType &H : TBME.HandlerArray) { 3735 int FrameIndex = H.CatchObj.FrameIndex; 3736 if (FrameIndex != INT_MAX) { 3737 // Ensure alignment. 3738 unsigned Align = MFI.getObjectAlign(FrameIndex).value(); 3739 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align; 3740 MinFixedObjOffset -= MFI.getObjectSize(FrameIndex); 3741 MFI.setObjectOffset(FrameIndex, MinFixedObjOffset); 3742 } 3743 } 3744 } 3745 3746 // Ensure alignment. 3747 MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8; 3748 int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize; 3749 int UnwindHelpFI = 3750 MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false); 3751 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; 3752 3753 // Store -2 into UnwindHelp on function entry. We have to scan forwards past 3754 // other frame setup instructions. 3755 MachineBasicBlock &MBB = MF.front(); 3756 auto MBBI = MBB.begin(); 3757 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) 3758 ++MBBI; 3759 3760 DebugLoc DL = MBB.findDebugLoc(MBBI); 3761 addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)), 3762 UnwindHelpFI) 3763 .addImm(-2); 3764 } 3765 3766 void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( 3767 MachineFunction &MF, RegScavenger *RS) const { 3768 if (STI.is32Bit() && MF.hasEHFunclets()) 3769 restoreWinEHStackPointersInParent(MF); 3770 } 3771 3772 void X86FrameLowering::restoreWinEHStackPointersInParent( 3773 MachineFunction &MF) const { 3774 // 32-bit functions have to restore stack pointers when control is transferred 3775 // back to the parent function. These blocks are identified as eh pads that 3776 // are not funclet entries. 3777 bool IsSEH = isAsynchronousEHPersonality( 3778 classifyEHPersonality(MF.getFunction().getPersonalityFn())); 3779 for (MachineBasicBlock &MBB : MF) { 3780 bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry(); 3781 if (NeedsRestore) 3782 restoreWin32EHStackPointers(MBB, MBB.begin(), DebugLoc(), 3783 /*RestoreSP=*/IsSEH); 3784 } 3785 } 3786