1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "MCTargetDesc/PPCPredicates.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/LivePhysRegs.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineModuleInfo.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/RegisterScavenging.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/Target/TargetOptions.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "framelowering" 34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 36 STATISTIC(NumPrologProbed, "Number of prologues probed"); 37 38 static cl::opt<bool> 39 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 40 cl::desc("Enable spills in prologue to vector registers."), 41 cl::init(false), cl::Hidden); 42 43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 44 if (STI.isAIXABI()) 45 return STI.isPPC64() ? 16 : 8; 46 // SVR4 ABI: 47 return STI.isPPC64() ? 16 : 4; 48 } 49 50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 51 if (STI.isAIXABI()) 52 return STI.isPPC64() ? 40 : 20; 53 return STI.isELFv2ABI() ? 24 : 40; 54 } 55 56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 57 // First slot in the general register save area. 58 return STI.isPPC64() ? -8U : -4U; 59 } 60 61 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 62 if (STI.isAIXABI() || STI.isPPC64()) 63 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 64 65 // 32-bit SVR4 ABI: 66 return 8; 67 } 68 69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 70 // Third slot in the general purpose register save area. 71 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 72 return -12U; 73 74 // Second slot in the general purpose register save area. 75 return STI.isPPC64() ? -16U : -8U; 76 } 77 78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 79 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 80 } 81 82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 83 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 84 STI.getPlatformStackAlignment(), 0), 85 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 86 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 87 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 88 LinkageSize(computeLinkageSize(Subtarget)), 89 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 90 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 91 92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 94 unsigned &NumEntries) const { 95 96 // Floating-point register save area offsets. 97 #define CALLEE_SAVED_FPRS \ 98 {PPC::F31, -8}, \ 99 {PPC::F30, -16}, \ 100 {PPC::F29, -24}, \ 101 {PPC::F28, -32}, \ 102 {PPC::F27, -40}, \ 103 {PPC::F26, -48}, \ 104 {PPC::F25, -56}, \ 105 {PPC::F24, -64}, \ 106 {PPC::F23, -72}, \ 107 {PPC::F22, -80}, \ 108 {PPC::F21, -88}, \ 109 {PPC::F20, -96}, \ 110 {PPC::F19, -104}, \ 111 {PPC::F18, -112}, \ 112 {PPC::F17, -120}, \ 113 {PPC::F16, -128}, \ 114 {PPC::F15, -136}, \ 115 {PPC::F14, -144} 116 117 // 32-bit general purpose register save area offsets shared by ELF and 118 // AIX. AIX has an extra CSR with r13. 119 #define CALLEE_SAVED_GPRS32 \ 120 {PPC::R31, -4}, \ 121 {PPC::R30, -8}, \ 122 {PPC::R29, -12}, \ 123 {PPC::R28, -16}, \ 124 {PPC::R27, -20}, \ 125 {PPC::R26, -24}, \ 126 {PPC::R25, -28}, \ 127 {PPC::R24, -32}, \ 128 {PPC::R23, -36}, \ 129 {PPC::R22, -40}, \ 130 {PPC::R21, -44}, \ 131 {PPC::R20, -48}, \ 132 {PPC::R19, -52}, \ 133 {PPC::R18, -56}, \ 134 {PPC::R17, -60}, \ 135 {PPC::R16, -64}, \ 136 {PPC::R15, -68}, \ 137 {PPC::R14, -72} 138 139 // 64-bit general purpose register save area offsets. 140 #define CALLEE_SAVED_GPRS64 \ 141 {PPC::X31, -8}, \ 142 {PPC::X30, -16}, \ 143 {PPC::X29, -24}, \ 144 {PPC::X28, -32}, \ 145 {PPC::X27, -40}, \ 146 {PPC::X26, -48}, \ 147 {PPC::X25, -56}, \ 148 {PPC::X24, -64}, \ 149 {PPC::X23, -72}, \ 150 {PPC::X22, -80}, \ 151 {PPC::X21, -88}, \ 152 {PPC::X20, -96}, \ 153 {PPC::X19, -104}, \ 154 {PPC::X18, -112}, \ 155 {PPC::X17, -120}, \ 156 {PPC::X16, -128}, \ 157 {PPC::X15, -136}, \ 158 {PPC::X14, -144} 159 160 // Vector register save area offsets. 161 #define CALLEE_SAVED_VRS \ 162 {PPC::V31, -16}, \ 163 {PPC::V30, -32}, \ 164 {PPC::V29, -48}, \ 165 {PPC::V28, -64}, \ 166 {PPC::V27, -80}, \ 167 {PPC::V26, -96}, \ 168 {PPC::V25, -112}, \ 169 {PPC::V24, -128}, \ 170 {PPC::V23, -144}, \ 171 {PPC::V22, -160}, \ 172 {PPC::V21, -176}, \ 173 {PPC::V20, -192} 174 175 // Note that the offsets here overlap, but this is fixed up in 176 // processFunctionBeforeFrameFinalized. 177 178 static const SpillSlot ELFOffsets32[] = { 179 CALLEE_SAVED_FPRS, 180 CALLEE_SAVED_GPRS32, 181 182 // CR save area offset. We map each of the nonvolatile CR fields 183 // to the slot for CR2, which is the first of the nonvolatile CR 184 // fields to be assigned, so that we only allocate one save slot. 185 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 186 {PPC::CR2, -4}, 187 188 // VRSAVE save area offset. 189 {PPC::VRSAVE, -4}, 190 191 CALLEE_SAVED_VRS, 192 193 // SPE register save area (overlaps Vector save area). 194 {PPC::S31, -8}, 195 {PPC::S30, -16}, 196 {PPC::S29, -24}, 197 {PPC::S28, -32}, 198 {PPC::S27, -40}, 199 {PPC::S26, -48}, 200 {PPC::S25, -56}, 201 {PPC::S24, -64}, 202 {PPC::S23, -72}, 203 {PPC::S22, -80}, 204 {PPC::S21, -88}, 205 {PPC::S20, -96}, 206 {PPC::S19, -104}, 207 {PPC::S18, -112}, 208 {PPC::S17, -120}, 209 {PPC::S16, -128}, 210 {PPC::S15, -136}, 211 {PPC::S14, -144}}; 212 213 static const SpillSlot ELFOffsets64[] = { 214 CALLEE_SAVED_FPRS, 215 CALLEE_SAVED_GPRS64, 216 217 // VRSAVE save area offset. 218 {PPC::VRSAVE, -4}, 219 CALLEE_SAVED_VRS 220 }; 221 222 static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, 223 CALLEE_SAVED_GPRS32, 224 // Add AIX's extra CSR. 225 {PPC::R13, -76}, 226 CALLEE_SAVED_VRS}; 227 228 static const SpillSlot AIXOffsets64[] = { 229 CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; 230 231 if (Subtarget.is64BitELFABI()) { 232 NumEntries = array_lengthof(ELFOffsets64); 233 return ELFOffsets64; 234 } 235 236 if (Subtarget.is32BitELFABI()) { 237 NumEntries = array_lengthof(ELFOffsets32); 238 return ELFOffsets32; 239 } 240 241 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 242 243 if (Subtarget.isPPC64()) { 244 NumEntries = array_lengthof(AIXOffsets64); 245 return AIXOffsets64; 246 } 247 248 NumEntries = array_lengthof(AIXOffsets32); 249 return AIXOffsets32; 250 } 251 252 static bool spillsCR(const MachineFunction &MF) { 253 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 254 return FuncInfo->isCRSpilled(); 255 } 256 257 static bool hasSpills(const MachineFunction &MF) { 258 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 259 return FuncInfo->hasSpills(); 260 } 261 262 static bool hasNonRISpills(const MachineFunction &MF) { 263 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 264 return FuncInfo->hasNonRISpills(); 265 } 266 267 /// MustSaveLR - Return true if this function requires that we save the LR 268 /// register onto the stack in the prolog and restore it in the epilog of the 269 /// function. 270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 271 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 272 273 // We need a save/restore of LR if there is any def of LR (which is 274 // defined by calls, including the PIC setup sequence), or if there is 275 // some use of the LR stack slot (e.g. for builtin_return_address). 276 // (LR comes in 32 and 64 bit versions.) 277 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 278 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 279 } 280 281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 282 /// call frame size. Update the MachineFunction object with the stack size. 283 uint64_t 284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 285 bool UseEstimate) const { 286 unsigned NewMaxCallFrameSize = 0; 287 uint64_t FrameSize = determineFrameLayout(MF, UseEstimate, 288 &NewMaxCallFrameSize); 289 MF.getFrameInfo().setStackSize(FrameSize); 290 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 291 return FrameSize; 292 } 293 294 /// determineFrameLayout - Determine the size of the frame and maximum call 295 /// frame size. 296 uint64_t 297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 298 bool UseEstimate, 299 unsigned *NewMaxCallFrameSize) const { 300 const MachineFrameInfo &MFI = MF.getFrameInfo(); 301 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 302 303 // Get the number of bytes to allocate from the FrameInfo 304 uint64_t FrameSize = 305 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 306 307 // Get stack alignments. The frame must be aligned to the greatest of these: 308 Align TargetAlign = getStackAlign(); // alignment required per the ABI 309 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 310 Align Alignment = std::max(TargetAlign, MaxAlign); 311 312 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 313 314 unsigned LR = RegInfo->getRARegister(); 315 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 316 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 317 !MFI.adjustsStack() && // No calls. 318 !MustSaveLR(MF, LR) && // No need to save LR. 319 !FI->mustSaveTOC() && // No need to save TOC. 320 !RegInfo->hasBasePointer(MF); // No special alignment. 321 322 // Note: for PPC32 SVR4ABI, we can still generate stackless 323 // code if all local vars are reg-allocated. 324 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 325 326 // Check whether we can skip adjusting the stack pointer (by using red zone) 327 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 328 // No need for frame 329 return 0; 330 } 331 332 // Get the maximum call frame size of all the calls. 333 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 334 335 // Maximum call frame needs to be at least big enough for linkage area. 336 unsigned minCallFrameSize = getLinkageSize(); 337 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 338 339 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 340 // that allocations will be aligned. 341 if (MFI.hasVarSizedObjects()) 342 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 343 344 // Update the new max call frame size if the caller passes in a valid pointer. 345 if (NewMaxCallFrameSize) 346 *NewMaxCallFrameSize = maxCallFrameSize; 347 348 // Include call frame size in total. 349 FrameSize += maxCallFrameSize; 350 351 // Make sure the frame is aligned. 352 FrameSize = alignTo(FrameSize, Alignment); 353 354 return FrameSize; 355 } 356 357 // hasFP - Return true if the specified function actually has a dedicated frame 358 // pointer register. 359 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 360 const MachineFrameInfo &MFI = MF.getFrameInfo(); 361 // FIXME: This is pretty much broken by design: hasFP() might be called really 362 // early, before the stack layout was calculated and thus hasFP() might return 363 // true or false here depending on the time of call. 364 return (MFI.getStackSize()) && needsFP(MF); 365 } 366 367 // needsFP - Return true if the specified function should have a dedicated frame 368 // pointer register. This is true if the function has variable sized allocas or 369 // if frame pointer elimination is disabled. 370 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 371 const MachineFrameInfo &MFI = MF.getFrameInfo(); 372 373 // Naked functions have no stack frame pushed, so we don't have a frame 374 // pointer. 375 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 376 return false; 377 378 return MF.getTarget().Options.DisableFramePointerElim(MF) || 379 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 380 MF.exposesReturnsTwice() || 381 (MF.getTarget().Options.GuaranteedTailCallOpt && 382 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 383 } 384 385 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 386 bool is31 = needsFP(MF); 387 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 388 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 389 390 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 391 bool HasBP = RegInfo->hasBasePointer(MF); 392 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 393 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 394 395 for (MachineBasicBlock &MBB : MF) 396 for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) { 397 --MBBI; 398 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 399 MachineOperand &MO = MBBI->getOperand(I); 400 if (!MO.isReg()) 401 continue; 402 403 switch (MO.getReg()) { 404 case PPC::FP: 405 MO.setReg(FPReg); 406 break; 407 case PPC::FP8: 408 MO.setReg(FP8Reg); 409 break; 410 case PPC::BP: 411 MO.setReg(BPReg); 412 break; 413 case PPC::BP8: 414 MO.setReg(BP8Reg); 415 break; 416 417 } 418 } 419 } 420 } 421 422 /* This function will do the following: 423 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 424 respectively (defaults recommended by the ABI) and return true 425 - If MBB is not an entry block, initialize the register scavenger and look 426 for available registers. 427 - If the defaults (R0/R12) are available, return true 428 - If TwoUniqueRegsRequired is set to true, it looks for two unique 429 registers. Otherwise, look for a single available register. 430 - If the required registers are found, set SR1 and SR2 and return true. 431 - If the required registers are not found, set SR2 or both SR1 and SR2 to 432 PPC::NoRegister and return false. 433 434 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 435 is not set, this function will attempt to find two different registers, but 436 still return true if only one register is available (and set SR1 == SR2). 437 */ 438 bool 439 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 440 bool UseAtEnd, 441 bool TwoUniqueRegsRequired, 442 Register *SR1, 443 Register *SR2) const { 444 RegScavenger RS; 445 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 446 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 447 448 // Set the defaults for the two scratch registers. 449 if (SR1) 450 *SR1 = R0; 451 452 if (SR2) { 453 assert (SR1 && "Asking for the second scratch register but not the first?"); 454 *SR2 = R12; 455 } 456 457 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 458 if ((UseAtEnd && MBB->isReturnBlock()) || 459 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 460 return true; 461 462 RS.enterBasicBlock(*MBB); 463 464 if (UseAtEnd && !MBB->empty()) { 465 // The scratch register will be used at the end of the block, so must 466 // consider all registers used within the block 467 468 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 469 // If no terminator, back iterator up to previous instruction. 470 if (MBBI == MBB->end()) 471 MBBI = std::prev(MBBI); 472 473 if (MBBI != MBB->begin()) 474 RS.forward(MBBI); 475 } 476 477 // If the two registers are available, we're all good. 478 // Note that we only return here if both R0 and R12 are available because 479 // although the function may not require two unique registers, it may benefit 480 // from having two so we should try to provide them. 481 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 482 return true; 483 484 // Get the list of callee-saved registers for the target. 485 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 486 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 487 488 // Get all the available registers in the block. 489 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 490 &PPC::GPRCRegClass); 491 492 // We shouldn't use callee-saved registers as scratch registers as they may be 493 // available when looking for a candidate block for shrink wrapping but not 494 // available when the actual prologue/epilogue is being emitted because they 495 // were added as live-in to the prologue block by PrologueEpilogueInserter. 496 for (int i = 0; CSRegs[i]; ++i) 497 BV.reset(CSRegs[i]); 498 499 // Set the first scratch register to the first available one. 500 if (SR1) { 501 int FirstScratchReg = BV.find_first(); 502 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 503 } 504 505 // If there is another one available, set the second scratch register to that. 506 // Otherwise, set it to either PPC::NoRegister if this function requires two 507 // or to whatever SR1 is set to if this function doesn't require two. 508 if (SR2) { 509 int SecondScratchReg = BV.find_next(*SR1); 510 if (SecondScratchReg != -1) 511 *SR2 = SecondScratchReg; 512 else 513 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 514 } 515 516 // Now that we've done our best to provide both registers, double check 517 // whether we were unable to provide enough. 518 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 519 return false; 520 521 return true; 522 } 523 524 // We need a scratch register for spilling LR and for spilling CR. By default, 525 // we use two scratch registers to hide latency. However, if only one scratch 526 // register is available, we can adjust for that by not overlapping the spill 527 // code. However, if we need to realign the stack (i.e. have a base pointer) 528 // and the stack frame is large, we need two scratch registers. 529 // Also, stack probe requires two scratch registers, one for old sp, one for 530 // large frame and large probe size. 531 bool 532 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 533 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 534 MachineFunction &MF = *(MBB->getParent()); 535 bool HasBP = RegInfo->hasBasePointer(MF); 536 unsigned FrameSize = determineFrameLayout(MF); 537 int NegFrameSize = -FrameSize; 538 bool IsLargeFrame = !isInt<16>(NegFrameSize); 539 MachineFrameInfo &MFI = MF.getFrameInfo(); 540 Align MaxAlign = MFI.getMaxAlign(); 541 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 542 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 543 544 return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || 545 TLI.hasInlineStackProbe(MF); 546 } 547 548 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 549 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 550 551 return findScratchRegister(TmpMBB, false, 552 twoUniqueScratchRegsRequired(TmpMBB)); 553 } 554 555 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 556 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 557 558 return findScratchRegister(TmpMBB, true); 559 } 560 561 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 562 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 563 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 564 565 // Abort if there is no register info or function info. 566 if (!RegInfo || !FI) 567 return false; 568 569 // Only move the stack update on ELFv2 ABI and PPC64. 570 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 571 return false; 572 573 // Check the frame size first and return false if it does not fit the 574 // requirements. 575 // We need a non-zero frame size as well as a frame that will fit in the red 576 // zone. This is because by moving the stack pointer update we are now storing 577 // to the red zone until the stack pointer is updated. If we get an interrupt 578 // inside the prologue but before the stack update we now have a number of 579 // stores to the red zone and those stores must all fit. 580 MachineFrameInfo &MFI = MF.getFrameInfo(); 581 unsigned FrameSize = MFI.getStackSize(); 582 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 583 return false; 584 585 // Frame pointers and base pointers complicate matters so don't do anything 586 // if we have them. For example having a frame pointer will sometimes require 587 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 588 // difficult. Similar situation exists with setjmp. 589 if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) 590 return false; 591 592 // Calls to fast_cc functions use different rules for passing parameters on 593 // the stack from the ABI and using PIC base in the function imposes 594 // similar restrictions to using the base pointer. It is not generally safe 595 // to move the stack pointer update in these situations. 596 if (FI->hasFastCall() || FI->usesPICBase()) 597 return false; 598 599 // Finally we can move the stack update if we do not require register 600 // scavenging. Register scavenging can introduce more spills and so 601 // may make the frame size larger than we have computed. 602 return !RegInfo->requiresFrameIndexScavenging(MF); 603 } 604 605 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 606 MachineBasicBlock &MBB) const { 607 MachineBasicBlock::iterator MBBI = MBB.begin(); 608 MachineFrameInfo &MFI = MF.getFrameInfo(); 609 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 610 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 611 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 612 613 MachineModuleInfo &MMI = MF.getMMI(); 614 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 615 DebugLoc dl; 616 // AIX assembler does not support cfi directives. 617 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 618 619 // Get processor type. 620 bool isPPC64 = Subtarget.isPPC64(); 621 // Get the ABI. 622 bool isSVR4ABI = Subtarget.isSVR4ABI(); 623 bool isELFv2ABI = Subtarget.isELFv2ABI(); 624 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); 625 626 // Work out frame sizes. 627 uint64_t FrameSize = determineFrameLayoutAndUpdate(MF); 628 int64_t NegFrameSize = -FrameSize; 629 if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize))) 630 llvm_unreachable("Unhandled stack size!"); 631 632 if (MFI.isFrameAddressTaken()) 633 replaceFPWithRealFP(MF); 634 635 // Check if the link register (LR) must be saved. 636 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 637 bool MustSaveLR = FI->mustSaveLR(); 638 bool MustSaveTOC = FI->mustSaveTOC(); 639 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 640 bool MustSaveCR = !MustSaveCRs.empty(); 641 // Do we have a frame pointer and/or base pointer for this function? 642 bool HasFP = hasFP(MF); 643 bool HasBP = RegInfo->hasBasePointer(MF); 644 bool HasRedZone = isPPC64 || !isSVR4ABI; 645 bool HasROPProtect = Subtarget.hasROPProtect(); 646 bool HasPrivileged = Subtarget.hasPrivileged(); 647 648 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 649 Register BPReg = RegInfo->getBaseRegister(MF); 650 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 651 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 652 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 653 Register ScratchReg; 654 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 655 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 656 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 657 : PPC::MFLR ); 658 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 659 : PPC::STW ); 660 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 661 : PPC::STWU ); 662 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 663 : PPC::STWUX); 664 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 665 : PPC::OR ); 666 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 667 : PPC::SUBFC); 668 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 669 : PPC::SUBFIC); 670 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 671 : PPC::MFCR); 672 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 673 const MCInstrDesc &HashST = 674 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8) 675 : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST)); 676 677 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 678 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 679 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 680 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 681 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 682 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 683 684 // Using the same bool variable as below to suppress compiler warnings. 685 bool SingleScratchReg = findScratchRegister( 686 &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); 687 assert(SingleScratchReg && 688 "Required number of registers not available in this block"); 689 690 SingleScratchReg = ScratchReg == TempReg; 691 692 int64_t LROffset = getReturnSaveOffset(); 693 694 int64_t FPOffset = 0; 695 if (HasFP) { 696 MachineFrameInfo &MFI = MF.getFrameInfo(); 697 int FPIndex = FI->getFramePointerSaveIndex(); 698 assert(FPIndex && "No Frame Pointer Save Slot!"); 699 FPOffset = MFI.getObjectOffset(FPIndex); 700 } 701 702 int64_t BPOffset = 0; 703 if (HasBP) { 704 MachineFrameInfo &MFI = MF.getFrameInfo(); 705 int BPIndex = FI->getBasePointerSaveIndex(); 706 assert(BPIndex && "No Base Pointer Save Slot!"); 707 BPOffset = MFI.getObjectOffset(BPIndex); 708 } 709 710 int64_t PBPOffset = 0; 711 if (FI->usesPICBase()) { 712 MachineFrameInfo &MFI = MF.getFrameInfo(); 713 int PBPIndex = FI->getPICBasePointerSaveIndex(); 714 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 715 PBPOffset = MFI.getObjectOffset(PBPIndex); 716 } 717 718 // Get stack alignments. 719 Align MaxAlign = MFI.getMaxAlign(); 720 if (HasBP && MaxAlign > 1) 721 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 722 723 // Frames of 32KB & larger require special handling because they cannot be 724 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 725 bool isLargeFrame = !isInt<16>(NegFrameSize); 726 727 // Check if we can move the stack update instruction (stdu) down the prologue 728 // past the callee saves. Hopefully this will avoid the situation where the 729 // saves are waiting for the update on the store with update to complete. 730 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 731 bool MovingStackUpdateDown = false; 732 733 // Check if we can move the stack update. 734 if (stackUpdateCanBeMoved(MF)) { 735 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 736 for (CalleeSavedInfo CSI : Info) { 737 // If the callee saved register is spilled to a register instead of the 738 // stack then the spill no longer uses the stack pointer. 739 // This can lead to two consequences: 740 // 1) We no longer need to update the stack because the function does not 741 // spill any callee saved registers to stack. 742 // 2) We have a situation where we still have to update the stack pointer 743 // even though some registers are spilled to other registers. In 744 // this case the current code moves the stack update to an incorrect 745 // position. 746 // In either case we should abort moving the stack update operation. 747 if (CSI.isSpilledToReg()) { 748 StackUpdateLoc = MBBI; 749 MovingStackUpdateDown = false; 750 break; 751 } 752 753 int FrIdx = CSI.getFrameIdx(); 754 // If the frame index is not negative the callee saved info belongs to a 755 // stack object that is not a fixed stack object. We ignore non-fixed 756 // stack objects because we won't move the stack update pointer past them. 757 if (FrIdx >= 0) 758 continue; 759 760 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 761 StackUpdateLoc++; 762 MovingStackUpdateDown = true; 763 } else { 764 // We need all of the Frame Indices to meet these conditions. 765 // If they do not, abort the whole operation. 766 StackUpdateLoc = MBBI; 767 MovingStackUpdateDown = false; 768 break; 769 } 770 } 771 772 // If the operation was not aborted then update the object offset. 773 if (MovingStackUpdateDown) { 774 for (CalleeSavedInfo CSI : Info) { 775 int FrIdx = CSI.getFrameIdx(); 776 if (FrIdx < 0) 777 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 778 } 779 } 780 } 781 782 // Where in the prologue we move the CR fields depends on how many scratch 783 // registers we have, and if we need to save the link register or not. This 784 // lambda is to avoid duplicating the logic in 2 places. 785 auto BuildMoveFromCR = [&]() { 786 if (isELFv2ABI && MustSaveCRs.size() == 1) { 787 // In the ELFv2 ABI, we are not required to save all CR fields. 788 // If only one CR field is clobbered, it is more efficient to use 789 // mfocrf to selectively save just that field, because mfocrf has short 790 // latency compares to mfcr. 791 assert(isPPC64 && "V2 ABI is 64-bit only."); 792 MachineInstrBuilder MIB = 793 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 794 MIB.addReg(MustSaveCRs[0], RegState::Kill); 795 } else { 796 MachineInstrBuilder MIB = 797 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 798 for (unsigned CRfield : MustSaveCRs) 799 MIB.addReg(CRfield, RegState::ImplicitKill); 800 } 801 }; 802 803 // If we need to spill the CR and the LR but we don't have two separate 804 // registers available, we must spill them one at a time 805 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 806 BuildMoveFromCR(); 807 BuildMI(MBB, MBBI, dl, StoreWordInst) 808 .addReg(TempReg, getKillRegState(true)) 809 .addImm(CRSaveOffset) 810 .addReg(SPReg); 811 } 812 813 if (MustSaveLR) 814 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 815 816 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 817 BuildMoveFromCR(); 818 819 if (HasRedZone) { 820 if (HasFP) 821 BuildMI(MBB, MBBI, dl, StoreInst) 822 .addReg(FPReg) 823 .addImm(FPOffset) 824 .addReg(SPReg); 825 if (FI->usesPICBase()) 826 BuildMI(MBB, MBBI, dl, StoreInst) 827 .addReg(PPC::R30) 828 .addImm(PBPOffset) 829 .addReg(SPReg); 830 if (HasBP) 831 BuildMI(MBB, MBBI, dl, StoreInst) 832 .addReg(BPReg) 833 .addImm(BPOffset) 834 .addReg(SPReg); 835 } 836 837 // Generate the instruction to store the LR. In the case where ROP protection 838 // is required the register holding the LR should not be killed as it will be 839 // used by the hash store instruction. 840 if (MustSaveLR) { 841 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 842 .addReg(ScratchReg, getKillRegState(!HasROPProtect)) 843 .addImm(LROffset) 844 .addReg(SPReg); 845 846 // Add the ROP protection Hash Store instruction. 847 // NOTE: This is technically a violation of the ABI. The hash can be saved 848 // up to 512 bytes into the Protected Zone. This can be outside of the 849 // initial 288 byte volatile program storage region in the Protected Zone. 850 // However, this restriction will be removed in an upcoming revision of the 851 // ABI. 852 if (HasROPProtect) { 853 const int SaveIndex = FI->getROPProtectionHashSaveIndex(); 854 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex); 855 assert((ImmOffset <= -8 && ImmOffset >= -512) && 856 "ROP hash save offset out of range."); 857 assert(((ImmOffset & 0x7) == 0) && 858 "ROP hash save offset must be 8 byte aligned."); 859 BuildMI(MBB, StackUpdateLoc, dl, HashST) 860 .addReg(ScratchReg, getKillRegState(true)) 861 .addImm(ImmOffset) 862 .addReg(SPReg); 863 } 864 } 865 866 if (MustSaveCR && 867 !(SingleScratchReg && MustSaveLR)) { 868 assert(HasRedZone && "A red zone is always available on PPC64"); 869 BuildMI(MBB, MBBI, dl, StoreWordInst) 870 .addReg(TempReg, getKillRegState(true)) 871 .addImm(CRSaveOffset) 872 .addReg(SPReg); 873 } 874 875 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 876 if (!FrameSize) 877 return; 878 879 // Adjust stack pointer: r1 += NegFrameSize. 880 // If there is a preferred stack alignment, align R1 now 881 882 if (HasBP && HasRedZone) { 883 // Save a copy of r1 as the base pointer. 884 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 885 .addReg(SPReg) 886 .addReg(SPReg); 887 } 888 889 // Have we generated a STUX instruction to claim stack frame? If so, 890 // the negated frame size will be placed in ScratchReg. 891 bool HasSTUX = false; 892 893 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 894 // pointer is always stored at SP, we will get a free probe due to an essential 895 // STU(X) instruction. 896 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 897 // To be consistent with other targets, a pseudo instruction is emitted and 898 // will be later expanded in `inlineStackProbe`. 899 BuildMI(MBB, MBBI, dl, 900 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 901 : PPC::PROBED_STACKALLOC_32)) 902 .addDef(TempReg) 903 .addDef(ScratchReg) // ScratchReg stores the old sp. 904 .addImm(NegFrameSize); 905 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 906 // update the ScratchReg to meet the assumption that ScratchReg contains 907 // the NegFrameSize. This solution is rather tricky. 908 if (!HasRedZone) { 909 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 910 .addReg(ScratchReg) 911 .addReg(SPReg); 912 HasSTUX = true; 913 } 914 } else { 915 // This condition must be kept in sync with canUseAsPrologue. 916 if (HasBP && MaxAlign > 1) { 917 if (isPPC64) 918 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 919 .addReg(SPReg) 920 .addImm(0) 921 .addImm(64 - Log2(MaxAlign)); 922 else // PPC32... 923 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 924 .addReg(SPReg) 925 .addImm(0) 926 .addImm(32 - Log2(MaxAlign)) 927 .addImm(31); 928 if (!isLargeFrame) { 929 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 930 .addReg(ScratchReg, RegState::Kill) 931 .addImm(NegFrameSize); 932 } else { 933 assert(!SingleScratchReg && "Only a single scratch reg available"); 934 TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize); 935 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 936 .addReg(ScratchReg, RegState::Kill) 937 .addReg(TempReg, RegState::Kill); 938 } 939 940 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 941 .addReg(SPReg, RegState::Kill) 942 .addReg(SPReg) 943 .addReg(ScratchReg); 944 HasSTUX = true; 945 946 } else if (!isLargeFrame) { 947 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 948 .addReg(SPReg) 949 .addImm(NegFrameSize) 950 .addReg(SPReg); 951 952 } else { 953 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize); 954 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 955 .addReg(SPReg, RegState::Kill) 956 .addReg(SPReg) 957 .addReg(ScratchReg); 958 HasSTUX = true; 959 } 960 } 961 962 // Save the TOC register after the stack pointer update if a prologue TOC 963 // save is required for the function. 964 if (MustSaveTOC) { 965 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 966 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 967 .addReg(TOCReg, getKillRegState(true)) 968 .addImm(TOCSaveOffset) 969 .addReg(SPReg); 970 } 971 972 if (!HasRedZone) { 973 assert(!isPPC64 && "A red zone is always available on PPC64"); 974 if (HasSTUX) { 975 // The negated frame size is in ScratchReg, and the SPReg has been 976 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 977 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 978 // the stack frame (i.e. the old SP), ideally, we would put the old 979 // SP into a register and use it as the base for the stores. The 980 // problem is that the only available register may be ScratchReg, 981 // which could be R0, and R0 cannot be used as a base address. 982 983 // First, set ScratchReg to the old SP. This may need to be modified 984 // later. 985 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 986 .addReg(ScratchReg, RegState::Kill) 987 .addReg(SPReg); 988 989 if (ScratchReg == PPC::R0) { 990 // R0 cannot be used as a base register, but it can be used as an 991 // index in a store-indexed. 992 int LastOffset = 0; 993 if (HasFP) { 994 // R0 += (FPOffset-LastOffset). 995 // Need addic, since addi treats R0 as 0. 996 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 997 .addReg(ScratchReg) 998 .addImm(FPOffset-LastOffset); 999 LastOffset = FPOffset; 1000 // Store FP into *R0. 1001 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1002 .addReg(FPReg, RegState::Kill) // Save FP. 1003 .addReg(PPC::ZERO) 1004 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1005 } 1006 if (FI->usesPICBase()) { 1007 // R0 += (PBPOffset-LastOffset). 1008 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1009 .addReg(ScratchReg) 1010 .addImm(PBPOffset-LastOffset); 1011 LastOffset = PBPOffset; 1012 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1013 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1014 .addReg(PPC::ZERO) 1015 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1016 } 1017 if (HasBP) { 1018 // R0 += (BPOffset-LastOffset). 1019 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1020 .addReg(ScratchReg) 1021 .addImm(BPOffset-LastOffset); 1022 LastOffset = BPOffset; 1023 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1024 .addReg(BPReg, RegState::Kill) // Save BP. 1025 .addReg(PPC::ZERO) 1026 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1027 // BP = R0-LastOffset 1028 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1029 .addReg(ScratchReg, RegState::Kill) 1030 .addImm(-LastOffset); 1031 } 1032 } else { 1033 // ScratchReg is not R0, so use it as the base register. It is 1034 // already set to the old SP, so we can use the offsets directly. 1035 1036 // Now that the stack frame has been allocated, save all the necessary 1037 // registers using ScratchReg as the base address. 1038 if (HasFP) 1039 BuildMI(MBB, MBBI, dl, StoreInst) 1040 .addReg(FPReg) 1041 .addImm(FPOffset) 1042 .addReg(ScratchReg); 1043 if (FI->usesPICBase()) 1044 BuildMI(MBB, MBBI, dl, StoreInst) 1045 .addReg(PPC::R30) 1046 .addImm(PBPOffset) 1047 .addReg(ScratchReg); 1048 if (HasBP) { 1049 BuildMI(MBB, MBBI, dl, StoreInst) 1050 .addReg(BPReg) 1051 .addImm(BPOffset) 1052 .addReg(ScratchReg); 1053 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1054 .addReg(ScratchReg, RegState::Kill) 1055 .addReg(ScratchReg); 1056 } 1057 } 1058 } else { 1059 // The frame size is a known 16-bit constant (fitting in the immediate 1060 // field of STWU). To be here we have to be compiling for PPC32. 1061 // Since the SPReg has been decreased by FrameSize, add it back to each 1062 // offset. 1063 if (HasFP) 1064 BuildMI(MBB, MBBI, dl, StoreInst) 1065 .addReg(FPReg) 1066 .addImm(FrameSize + FPOffset) 1067 .addReg(SPReg); 1068 if (FI->usesPICBase()) 1069 BuildMI(MBB, MBBI, dl, StoreInst) 1070 .addReg(PPC::R30) 1071 .addImm(FrameSize + PBPOffset) 1072 .addReg(SPReg); 1073 if (HasBP) { 1074 BuildMI(MBB, MBBI, dl, StoreInst) 1075 .addReg(BPReg) 1076 .addImm(FrameSize + BPOffset) 1077 .addReg(SPReg); 1078 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1079 .addReg(SPReg) 1080 .addImm(FrameSize); 1081 } 1082 } 1083 } 1084 1085 // Add Call Frame Information for the instructions we generated above. 1086 if (needsCFI) { 1087 unsigned CFIIndex; 1088 1089 if (HasBP) { 1090 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1091 // because if the stack needed aligning then CFA won't be at a fixed 1092 // offset from FP/SP. 1093 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1094 CFIIndex = MF.addFrameInst( 1095 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1096 } else { 1097 // Adjust the definition of CFA to account for the change in SP. 1098 assert(NegFrameSize); 1099 CFIIndex = MF.addFrameInst( 1100 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1101 } 1102 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1103 .addCFIIndex(CFIIndex); 1104 1105 if (HasFP) { 1106 // Describe where FP was saved, at a fixed offset from CFA. 1107 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1108 CFIIndex = MF.addFrameInst( 1109 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1110 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1111 .addCFIIndex(CFIIndex); 1112 } 1113 1114 if (FI->usesPICBase()) { 1115 // Describe where FP was saved, at a fixed offset from CFA. 1116 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1117 CFIIndex = MF.addFrameInst( 1118 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1119 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1120 .addCFIIndex(CFIIndex); 1121 } 1122 1123 if (HasBP) { 1124 // Describe where BP was saved, at a fixed offset from CFA. 1125 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1126 CFIIndex = MF.addFrameInst( 1127 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1128 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1129 .addCFIIndex(CFIIndex); 1130 } 1131 1132 if (MustSaveLR) { 1133 // Describe where LR was saved, at a fixed offset from CFA. 1134 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1135 CFIIndex = MF.addFrameInst( 1136 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1137 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1138 .addCFIIndex(CFIIndex); 1139 } 1140 } 1141 1142 // If there is a frame pointer, copy R1 into R31 1143 if (HasFP) { 1144 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1145 .addReg(SPReg) 1146 .addReg(SPReg); 1147 1148 if (!HasBP && needsCFI) { 1149 // Change the definition of CFA from SP+offset to FP+offset, because SP 1150 // will change at every alloca. 1151 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1152 unsigned CFIIndex = MF.addFrameInst( 1153 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1154 1155 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1156 .addCFIIndex(CFIIndex); 1157 } 1158 } 1159 1160 if (needsCFI) { 1161 // Describe where callee saved registers were saved, at fixed offsets from 1162 // CFA. 1163 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1164 for (const CalleeSavedInfo &I : CSI) { 1165 Register Reg = I.getReg(); 1166 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1167 1168 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1169 // subregisters of CR2. We just need to emit a move of CR2. 1170 if (PPC::CRBITRCRegClass.contains(Reg)) 1171 continue; 1172 1173 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1174 continue; 1175 1176 // For SVR4, don't emit a move for the CR spill slot if we haven't 1177 // spilled CRs. 1178 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1179 && !MustSaveCR) 1180 continue; 1181 1182 // For 64-bit SVR4 when we have spilled CRs, the spill location 1183 // is SP+8, not a frame-relative slot. 1184 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1185 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1186 // the whole CR word. In the ELFv2 ABI, every CR that was 1187 // actually saved gets its own CFI record. 1188 Register CRReg = isELFv2ABI? Reg : PPC::CR2; 1189 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1190 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1191 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1192 .addCFIIndex(CFIIndex); 1193 continue; 1194 } 1195 1196 if (I.isSpilledToReg()) { 1197 unsigned SpilledReg = I.getDstReg(); 1198 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1199 nullptr, MRI->getDwarfRegNum(Reg, true), 1200 MRI->getDwarfRegNum(SpilledReg, true))); 1201 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1202 .addCFIIndex(CFIRegister); 1203 } else { 1204 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()); 1205 // We have changed the object offset above but we do not want to change 1206 // the actual offsets in the CFI instruction so we have to undo the 1207 // offset change here. 1208 if (MovingStackUpdateDown) 1209 Offset -= NegFrameSize; 1210 1211 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1212 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1213 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1214 .addCFIIndex(CFIIndex); 1215 } 1216 } 1217 } 1218 } 1219 1220 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1221 MachineBasicBlock &PrologMBB) const { 1222 bool isPPC64 = Subtarget.isPPC64(); 1223 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1224 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1225 MachineFrameInfo &MFI = MF.getFrameInfo(); 1226 MachineModuleInfo &MMI = MF.getMMI(); 1227 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 1228 // AIX assembler does not support cfi directives. 1229 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1230 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1231 int Opc = MI.getOpcode(); 1232 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1233 }); 1234 if (StackAllocMIPos == PrologMBB.end()) 1235 return; 1236 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1237 MachineBasicBlock *CurrentMBB = &PrologMBB; 1238 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1239 MachineInstr &MI = *StackAllocMIPos; 1240 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1241 unsigned ProbeSize = TLI.getStackProbeSize(MF); 1242 int64_t NegProbeSize = -(int64_t)ProbeSize; 1243 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1244 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1245 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1246 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1247 Register ScratchReg = MI.getOperand(0).getReg(); 1248 Register FPReg = MI.getOperand(1).getReg(); 1249 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1250 bool HasBP = RegInfo->hasBasePointer(MF); 1251 Register BPReg = RegInfo->getBaseRegister(MF); 1252 Align MaxAlign = MFI.getMaxAlign(); 1253 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1254 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1255 // Subroutines to generate .cfi_* directives. 1256 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1257 MachineBasicBlock::iterator MBBI, Register Reg) { 1258 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1259 unsigned CFIIndex = MF.addFrameInst( 1260 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1261 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1262 .addCFIIndex(CFIIndex); 1263 }; 1264 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1265 MachineBasicBlock::iterator MBBI, Register Reg, 1266 int Offset) { 1267 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1268 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1269 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1270 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1271 .addCFIIndex(CFIIndex); 1272 }; 1273 // Subroutine to determine if we can use the Imm as part of d-form. 1274 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1275 // Subroutine to materialize the Imm into TempReg. 1276 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1277 MachineBasicBlock::iterator MBBI, int64_t Imm, 1278 Register &TempReg) { 1279 assert(isInt<32>(Imm) && "Unhandled imm"); 1280 if (isInt<16>(Imm)) 1281 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1282 .addImm(Imm); 1283 else { 1284 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1285 .addImm(Imm >> 16); 1286 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1287 .addReg(TempReg) 1288 .addImm(Imm & 0xFFFF); 1289 } 1290 }; 1291 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1292 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1293 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1294 Register NegSizeReg, bool UseDForm, 1295 Register StoreReg) { 1296 if (UseDForm) 1297 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1298 .addReg(StoreReg) 1299 .addImm(NegSize) 1300 .addReg(SPReg); 1301 else 1302 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1303 .addReg(StoreReg) 1304 .addReg(SPReg) 1305 .addReg(NegSizeReg); 1306 }; 1307 // Used to probe stack when realignment is required. 1308 // Note that, according to ABI's requirement, *sp must always equals the 1309 // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. 1310 // Following is pseudo code: 1311 // final_sp = (sp & align) + negframesize; 1312 // neg_gap = final_sp - sp; 1313 // while (neg_gap < negprobesize) { 1314 // stdu fp, negprobesize(sp); 1315 // neg_gap -= negprobesize; 1316 // } 1317 // stdux fp, sp, neg_gap 1318 // 1319 // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg 1320 // before probe code, we don't need to save it, so we get one additional reg 1321 // that can be used to materialize the probeside if needed to use xform. 1322 // Otherwise, we can NOT materialize probeside, so we can only use Dform for 1323 // now. 1324 // 1325 // The allocations are: 1326 // if (HasBP && HasRedzone) { 1327 // r0: materialize the probesize if needed so that we can use xform. 1328 // r12: `neg_gap` 1329 // } else { 1330 // r0: back-chain pointer 1331 // r12: `neg_gap`. 1332 // } 1333 auto probeRealignedStack = [&](MachineBasicBlock &MBB, 1334 MachineBasicBlock::iterator MBBI, 1335 Register ScratchReg, Register TempReg) { 1336 assert(HasBP && "The function is supposed to have base pointer when its " 1337 "stack is realigned."); 1338 assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); 1339 1340 // FIXME: We can eliminate this limitation if we get more infomation about 1341 // which part of redzone are already used. Used redzone can be treated 1342 // probed. But there might be `holes' in redzone probed, this could 1343 // complicate the implementation. 1344 assert(ProbeSize >= Subtarget.getRedZoneSize() && 1345 "Probe size should be larger or equal to the size of red-zone so " 1346 "that red-zone is not clobbered by probing."); 1347 1348 Register &FinalStackPtr = TempReg; 1349 // FIXME: We only support NegProbeSize materializable by DForm currently. 1350 // When HasBP && HasRedzone, we can use xform if we have an additional idle 1351 // register. 1352 NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); 1353 assert(isInt<16>(NegProbeSize) && 1354 "NegProbeSize should be materializable by DForm"); 1355 Register CRReg = PPC::CR0; 1356 // Layout of output assembly kinda like: 1357 // bb.0: 1358 // ... 1359 // sub $scratchreg, $finalsp, r1 1360 // cmpdi $scratchreg, <negprobesize> 1361 // bge bb.2 1362 // bb.1: 1363 // stdu <backchain>, <negprobesize>(r1) 1364 // sub $scratchreg, $scratchreg, negprobesize 1365 // cmpdi $scratchreg, <negprobesize> 1366 // blt bb.1 1367 // bb.2: 1368 // stdux <backchain>, r1, $scratchreg 1369 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); 1370 MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); 1371 MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); 1372 MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1373 MF.insert(MBBInsertPoint, ProbeExitMBB); 1374 // bb.2 1375 { 1376 Register BackChainPointer = HasRedZone ? BPReg : TempReg; 1377 allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, 1378 BackChainPointer); 1379 if (HasRedZone) 1380 // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg 1381 // to TempReg to satisfy it. 1382 BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) 1383 .addReg(BPReg) 1384 .addReg(BPReg); 1385 ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); 1386 ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 1387 } 1388 // bb.0 1389 { 1390 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) 1391 .addReg(SPReg) 1392 .addReg(FinalStackPtr); 1393 if (!HasRedZone) 1394 BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); 1395 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) 1396 .addReg(ScratchReg) 1397 .addImm(NegProbeSize); 1398 BuildMI(&MBB, DL, TII.get(PPC::BCC)) 1399 .addImm(PPC::PRED_GE) 1400 .addReg(CRReg) 1401 .addMBB(ProbeExitMBB); 1402 MBB.addSuccessor(ProbeLoopBodyMBB); 1403 MBB.addSuccessor(ProbeExitMBB); 1404 } 1405 // bb.1 1406 { 1407 Register BackChainPointer = HasRedZone ? BPReg : TempReg; 1408 allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, 1409 0, true /*UseDForm*/, BackChainPointer); 1410 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), 1411 ScratchReg) 1412 .addReg(ScratchReg) 1413 .addImm(-NegProbeSize); 1414 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), 1415 CRReg) 1416 .addReg(ScratchReg) 1417 .addImm(NegProbeSize); 1418 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) 1419 .addImm(PPC::PRED_LT) 1420 .addReg(CRReg) 1421 .addMBB(ProbeLoopBodyMBB); 1422 ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); 1423 ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); 1424 } 1425 // Update liveins. 1426 recomputeLiveIns(*ProbeLoopBodyMBB); 1427 recomputeLiveIns(*ProbeExitMBB); 1428 return ProbeExitMBB; 1429 }; 1430 // For case HasBP && MaxAlign > 1, we have to realign the SP by performing 1431 // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since 1432 // the offset subtracted from SP is determined by SP's runtime value. 1433 if (HasBP && MaxAlign > 1) { 1434 // Calculate final stack pointer. 1435 if (isPPC64) 1436 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) 1437 .addReg(SPReg) 1438 .addImm(0) 1439 .addImm(64 - Log2(MaxAlign)); 1440 else 1441 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1442 .addReg(SPReg) 1443 .addImm(0) 1444 .addImm(32 - Log2(MaxAlign)) 1445 .addImm(31); 1446 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), 1447 FPReg) 1448 .addReg(ScratchReg) 1449 .addReg(SPReg); 1450 MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); 1451 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), 1452 FPReg) 1453 .addReg(ScratchReg) 1454 .addReg(FPReg); 1455 CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); 1456 if (needsCFI) 1457 buildDefCFAReg(*CurrentMBB, {MI}, FPReg); 1458 } else { 1459 // Initialize current frame pointer. 1460 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1461 // Use FPReg to calculate CFA. 1462 if (needsCFI) 1463 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); 1464 // Probe residual part. 1465 if (NegResidualSize) { 1466 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1467 if (!ResidualUseDForm) 1468 MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); 1469 allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, 1470 ResidualUseDForm, FPReg); 1471 } 1472 bool UseDForm = CanUseDForm(NegProbeSize); 1473 // If number of blocks is small, just probe them directly. 1474 if (NumBlocks < 3) { 1475 if (!UseDForm) 1476 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1477 for (int i = 0; i < NumBlocks; ++i) 1478 allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, 1479 FPReg); 1480 if (needsCFI) { 1481 // Restore using SPReg to calculate CFA. 1482 buildDefCFAReg(*CurrentMBB, {MI}, SPReg); 1483 } 1484 } else { 1485 // Since CTR is a volatile register and current shrinkwrap implementation 1486 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1487 // CTR loop to probe. 1488 // Calculate trip count and stores it in CTRReg. 1489 MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); 1490 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1491 .addReg(ScratchReg, RegState::Kill); 1492 if (!UseDForm) 1493 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1494 // Create MBBs of the loop. 1495 MachineFunction::iterator MBBInsertPoint = 1496 std::next(CurrentMBB->getIterator()); 1497 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1498 MF.insert(MBBInsertPoint, LoopMBB); 1499 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1500 MF.insert(MBBInsertPoint, ExitMBB); 1501 // Synthesize the loop body. 1502 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1503 UseDForm, FPReg); 1504 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1505 .addMBB(LoopMBB); 1506 LoopMBB->addSuccessor(ExitMBB); 1507 LoopMBB->addSuccessor(LoopMBB); 1508 // Synthesize the exit MBB. 1509 ExitMBB->splice(ExitMBB->end(), CurrentMBB, 1510 std::next(MachineBasicBlock::iterator(MI)), 1511 CurrentMBB->end()); 1512 ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); 1513 CurrentMBB->addSuccessor(LoopMBB); 1514 if (needsCFI) { 1515 // Restore using SPReg to calculate CFA. 1516 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1517 } 1518 // Update liveins. 1519 recomputeLiveIns(*LoopMBB); 1520 recomputeLiveIns(*ExitMBB); 1521 } 1522 } 1523 ++NumPrologProbed; 1524 MI.eraseFromParent(); 1525 } 1526 1527 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1528 MachineBasicBlock &MBB) const { 1529 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1530 DebugLoc dl; 1531 1532 if (MBBI != MBB.end()) 1533 dl = MBBI->getDebugLoc(); 1534 1535 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1536 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1537 1538 // Get alignment info so we know how to restore the SP. 1539 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1540 1541 // Get the number of bytes allocated from the FrameInfo. 1542 int64_t FrameSize = MFI.getStackSize(); 1543 1544 // Get processor type. 1545 bool isPPC64 = Subtarget.isPPC64(); 1546 1547 // Check if the link register (LR) has been saved. 1548 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1549 bool MustSaveLR = FI->mustSaveLR(); 1550 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1551 bool MustSaveCR = !MustSaveCRs.empty(); 1552 // Do we have a frame pointer and/or base pointer for this function? 1553 bool HasFP = hasFP(MF); 1554 bool HasBP = RegInfo->hasBasePointer(MF); 1555 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1556 bool HasROPProtect = Subtarget.hasROPProtect(); 1557 bool HasPrivileged = Subtarget.hasPrivileged(); 1558 1559 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1560 Register BPReg = RegInfo->getBaseRegister(MF); 1561 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1562 Register ScratchReg; 1563 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1564 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1565 : PPC::MTLR ); 1566 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1567 : PPC::LWZ ); 1568 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1569 : PPC::LIS ); 1570 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1571 : PPC::OR ); 1572 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1573 : PPC::ORI ); 1574 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1575 : PPC::ADDI ); 1576 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1577 : PPC::ADD4 ); 1578 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1579 : PPC::LWZ); 1580 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1581 : PPC::MTOCRF); 1582 const MCInstrDesc &HashChk = 1583 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8) 1584 : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK)); 1585 int64_t LROffset = getReturnSaveOffset(); 1586 1587 int64_t FPOffset = 0; 1588 1589 // Using the same bool variable as below to suppress compiler warnings. 1590 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1591 &TempReg); 1592 assert(SingleScratchReg && 1593 "Could not find an available scratch register"); 1594 1595 SingleScratchReg = ScratchReg == TempReg; 1596 1597 if (HasFP) { 1598 int FPIndex = FI->getFramePointerSaveIndex(); 1599 assert(FPIndex && "No Frame Pointer Save Slot!"); 1600 FPOffset = MFI.getObjectOffset(FPIndex); 1601 } 1602 1603 int64_t BPOffset = 0; 1604 if (HasBP) { 1605 int BPIndex = FI->getBasePointerSaveIndex(); 1606 assert(BPIndex && "No Base Pointer Save Slot!"); 1607 BPOffset = MFI.getObjectOffset(BPIndex); 1608 } 1609 1610 int64_t PBPOffset = 0; 1611 if (FI->usesPICBase()) { 1612 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1613 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1614 PBPOffset = MFI.getObjectOffset(PBPIndex); 1615 } 1616 1617 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1618 1619 if (IsReturnBlock) { 1620 unsigned RetOpcode = MBBI->getOpcode(); 1621 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1622 RetOpcode == PPC::TCRETURNdi || 1623 RetOpcode == PPC::TCRETURNai || 1624 RetOpcode == PPC::TCRETURNri8 || 1625 RetOpcode == PPC::TCRETURNdi8 || 1626 RetOpcode == PPC::TCRETURNai8; 1627 1628 if (UsesTCRet) { 1629 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1630 MachineOperand &StackAdjust = MBBI->getOperand(1); 1631 assert(StackAdjust.isImm() && "Expecting immediate value."); 1632 // Adjust stack pointer. 1633 int StackAdj = StackAdjust.getImm(); 1634 int Delta = StackAdj - MaxTCRetDelta; 1635 assert((Delta >= 0) && "Delta must be positive"); 1636 if (MaxTCRetDelta>0) 1637 FrameSize += (StackAdj +Delta); 1638 else 1639 FrameSize += StackAdj; 1640 } 1641 } 1642 1643 // Frames of 32KB & larger require special handling because they cannot be 1644 // indexed into with a simple LD/LWZ immediate offset operand. 1645 bool isLargeFrame = !isInt<16>(FrameSize); 1646 1647 // On targets without red zone, the SP needs to be restored last, so that 1648 // all live contents of the stack frame are upwards of the SP. This means 1649 // that we cannot restore SP just now, since there may be more registers 1650 // to restore from the stack frame (e.g. R31). If the frame size is not 1651 // a simple immediate value, we will need a spare register to hold the 1652 // restored SP. If the frame size is known and small, we can simply adjust 1653 // the offsets of the registers to be restored, and still use SP to restore 1654 // them. In such case, the final update of SP will be to add the frame 1655 // size to it. 1656 // To simplify the code, set RBReg to the base register used to restore 1657 // values from the stack, and set SPAdd to the value that needs to be added 1658 // to the SP at the end. The default values are as if red zone was present. 1659 unsigned RBReg = SPReg; 1660 uint64_t SPAdd = 0; 1661 1662 // Check if we can move the stack update instruction up the epilogue 1663 // past the callee saves. This will allow the move to LR instruction 1664 // to be executed before the restores of the callee saves which means 1665 // that the callee saves can hide the latency from the MTLR instrcution. 1666 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1667 if (stackUpdateCanBeMoved(MF)) { 1668 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1669 for (CalleeSavedInfo CSI : Info) { 1670 // If the callee saved register is spilled to another register abort the 1671 // stack update movement. 1672 if (CSI.isSpilledToReg()) { 1673 StackUpdateLoc = MBBI; 1674 break; 1675 } 1676 int FrIdx = CSI.getFrameIdx(); 1677 // If the frame index is not negative the callee saved info belongs to a 1678 // stack object that is not a fixed stack object. We ignore non-fixed 1679 // stack objects because we won't move the update of the stack pointer 1680 // past them. 1681 if (FrIdx >= 0) 1682 continue; 1683 1684 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1685 StackUpdateLoc--; 1686 else { 1687 // Abort the operation as we can't update all CSR restores. 1688 StackUpdateLoc = MBBI; 1689 break; 1690 } 1691 } 1692 } 1693 1694 if (FrameSize) { 1695 // In the prologue, the loaded (or persistent) stack pointer value is 1696 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1697 // zone add this offset back now. 1698 1699 // If the function has a base pointer, the stack pointer has been copied 1700 // to it so we can restore it by copying in the other direction. 1701 if (HasRedZone && HasBP) { 1702 BuildMI(MBB, MBBI, dl, OrInst, RBReg). 1703 addReg(BPReg). 1704 addReg(BPReg); 1705 } 1706 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1707 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1708 // call which invalidates the stack pointer value in SP(0). So we use the 1709 // value of R31 in this case. Similar situation exists with setjmp. 1710 else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { 1711 assert(HasFP && "Expecting a valid frame pointer."); 1712 if (!HasRedZone) 1713 RBReg = FPReg; 1714 if (!isLargeFrame) { 1715 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1716 .addReg(FPReg).addImm(FrameSize); 1717 } else { 1718 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize); 1719 BuildMI(MBB, MBBI, dl, AddInst) 1720 .addReg(RBReg) 1721 .addReg(FPReg) 1722 .addReg(ScratchReg); 1723 } 1724 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1725 if (HasRedZone) { 1726 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1727 .addReg(SPReg) 1728 .addImm(FrameSize); 1729 } else { 1730 // Make sure that adding FrameSize will not overflow the max offset 1731 // size. 1732 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1733 "Local offsets should be negative"); 1734 SPAdd = FrameSize; 1735 FPOffset += FrameSize; 1736 BPOffset += FrameSize; 1737 PBPOffset += FrameSize; 1738 } 1739 } else { 1740 // We don't want to use ScratchReg as a base register, because it 1741 // could happen to be R0. Use FP instead, but make sure to preserve it. 1742 if (!HasRedZone) { 1743 // If FP is not saved, copy it to ScratchReg. 1744 if (!HasFP) 1745 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1746 .addReg(FPReg) 1747 .addReg(FPReg); 1748 RBReg = FPReg; 1749 } 1750 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1751 .addImm(0) 1752 .addReg(SPReg); 1753 } 1754 } 1755 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1756 // If there is no red zone, ScratchReg may be needed for holding a useful 1757 // value (although not the base register). Make sure it is not overwritten 1758 // too early. 1759 1760 // If we need to restore both the LR and the CR and we only have one 1761 // available scratch register, we must do them one at a time. 1762 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1763 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1764 // is live here. 1765 assert(HasRedZone && "Expecting red zone"); 1766 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1767 .addImm(CRSaveOffset) 1768 .addReg(SPReg); 1769 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1770 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1771 .addReg(TempReg, getKillRegState(i == e-1)); 1772 } 1773 1774 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1775 // LR is stored in the caller's stack frame. ScratchReg will be needed 1776 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1777 // a base register anyway, because it may happen to be R0. 1778 bool LoadedLR = false; 1779 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1780 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1781 .addImm(LROffset+SPAdd) 1782 .addReg(RBReg); 1783 LoadedLR = true; 1784 } 1785 1786 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1787 assert(RBReg == SPReg && "Should be using SP as a base register"); 1788 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1789 .addImm(CRSaveOffset) 1790 .addReg(RBReg); 1791 } 1792 1793 if (HasFP) { 1794 // If there is red zone, restore FP directly, since SP has already been 1795 // restored. Otherwise, restore the value of FP into ScratchReg. 1796 if (HasRedZone || RBReg == SPReg) 1797 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1798 .addImm(FPOffset) 1799 .addReg(SPReg); 1800 else 1801 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1802 .addImm(FPOffset) 1803 .addReg(RBReg); 1804 } 1805 1806 if (FI->usesPICBase()) 1807 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1808 .addImm(PBPOffset) 1809 .addReg(RBReg); 1810 1811 if (HasBP) 1812 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1813 .addImm(BPOffset) 1814 .addReg(RBReg); 1815 1816 // There is nothing more to be loaded from the stack, so now we can 1817 // restore SP: SP = RBReg + SPAdd. 1818 if (RBReg != SPReg || SPAdd != 0) { 1819 assert(!HasRedZone && "This should not happen with red zone"); 1820 // If SPAdd is 0, generate a copy. 1821 if (SPAdd == 0) 1822 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1823 .addReg(RBReg) 1824 .addReg(RBReg); 1825 else 1826 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1827 .addReg(RBReg) 1828 .addImm(SPAdd); 1829 1830 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1831 if (RBReg == FPReg) 1832 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1833 .addReg(ScratchReg) 1834 .addReg(ScratchReg); 1835 1836 // Now load the LR from the caller's stack frame. 1837 if (MustSaveLR && !LoadedLR) 1838 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1839 .addImm(LROffset) 1840 .addReg(SPReg); 1841 } 1842 1843 if (MustSaveCR && 1844 !(SingleScratchReg && MustSaveLR)) 1845 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1846 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1847 .addReg(TempReg, getKillRegState(i == e-1)); 1848 1849 if (MustSaveLR) { 1850 // If ROP protection is required, an extra instruction is added to compute a 1851 // hash and then compare it to the hash stored in the prologue. 1852 if (HasROPProtect) { 1853 const int SaveIndex = FI->getROPProtectionHashSaveIndex(); 1854 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex); 1855 assert((ImmOffset <= -8 && ImmOffset >= -512) && 1856 "ROP hash check location offset out of range."); 1857 assert(((ImmOffset & 0x7) == 0) && 1858 "ROP hash check location offset must be 8 byte aligned."); 1859 BuildMI(MBB, StackUpdateLoc, dl, HashChk) 1860 .addReg(ScratchReg) 1861 .addImm(ImmOffset) 1862 .addReg(SPReg); 1863 } 1864 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1865 } 1866 1867 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1868 // call optimization 1869 if (IsReturnBlock) { 1870 unsigned RetOpcode = MBBI->getOpcode(); 1871 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1872 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1873 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1874 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1875 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1876 1877 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1878 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1879 .addReg(SPReg).addImm(CallerAllocatedAmt); 1880 } else { 1881 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1882 .addImm(CallerAllocatedAmt >> 16); 1883 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1884 .addReg(ScratchReg, RegState::Kill) 1885 .addImm(CallerAllocatedAmt & 0xFFFF); 1886 BuildMI(MBB, MBBI, dl, AddInst) 1887 .addReg(SPReg) 1888 .addReg(FPReg) 1889 .addReg(ScratchReg); 1890 } 1891 } else { 1892 createTailCallBranchInstr(MBB); 1893 } 1894 } 1895 } 1896 1897 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1898 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1899 1900 // If we got this far a first terminator should exist. 1901 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1902 1903 DebugLoc dl = MBBI->getDebugLoc(); 1904 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1905 1906 // Create branch instruction for pseudo tail call return instruction. 1907 // The TCRETURNdi variants are direct calls. Valid targets for those are 1908 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1909 // since we can tail call external functions with PC-Rel (i.e. we don't need 1910 // to worry about different TOC pointers). Some of the external functions will 1911 // be MO_GlobalAddress while others like memcpy for example, are going to 1912 // be MO_ExternalSymbol. 1913 unsigned RetOpcode = MBBI->getOpcode(); 1914 if (RetOpcode == PPC::TCRETURNdi) { 1915 MBBI = MBB.getLastNonDebugInstr(); 1916 MachineOperand &JumpTarget = MBBI->getOperand(0); 1917 if (JumpTarget.isGlobal()) 1918 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1919 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1920 else if (JumpTarget.isSymbol()) 1921 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1922 addExternalSymbol(JumpTarget.getSymbolName()); 1923 else 1924 llvm_unreachable("Expecting Global or External Symbol"); 1925 } else if (RetOpcode == PPC::TCRETURNri) { 1926 MBBI = MBB.getLastNonDebugInstr(); 1927 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1928 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1929 } else if (RetOpcode == PPC::TCRETURNai) { 1930 MBBI = MBB.getLastNonDebugInstr(); 1931 MachineOperand &JumpTarget = MBBI->getOperand(0); 1932 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1933 } else if (RetOpcode == PPC::TCRETURNdi8) { 1934 MBBI = MBB.getLastNonDebugInstr(); 1935 MachineOperand &JumpTarget = MBBI->getOperand(0); 1936 if (JumpTarget.isGlobal()) 1937 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1938 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1939 else if (JumpTarget.isSymbol()) 1940 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1941 addExternalSymbol(JumpTarget.getSymbolName()); 1942 else 1943 llvm_unreachable("Expecting Global or External Symbol"); 1944 } else if (RetOpcode == PPC::TCRETURNri8) { 1945 MBBI = MBB.getLastNonDebugInstr(); 1946 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1947 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1948 } else if (RetOpcode == PPC::TCRETURNai8) { 1949 MBBI = MBB.getLastNonDebugInstr(); 1950 MachineOperand &JumpTarget = MBBI->getOperand(0); 1951 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1952 } 1953 } 1954 1955 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1956 BitVector &SavedRegs, 1957 RegScavenger *RS) const { 1958 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1959 1960 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1961 1962 // Do not explicitly save the callee saved VSRp registers. 1963 // The individual VSR subregisters will be saved instead. 1964 SavedRegs.reset(PPC::VSRp26); 1965 SavedRegs.reset(PPC::VSRp27); 1966 SavedRegs.reset(PPC::VSRp28); 1967 SavedRegs.reset(PPC::VSRp29); 1968 SavedRegs.reset(PPC::VSRp30); 1969 SavedRegs.reset(PPC::VSRp31); 1970 1971 // Save and clear the LR state. 1972 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1973 unsigned LR = RegInfo->getRARegister(); 1974 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1975 SavedRegs.reset(LR); 1976 1977 // Save R31 if necessary 1978 int FPSI = FI->getFramePointerSaveIndex(); 1979 const bool isPPC64 = Subtarget.isPPC64(); 1980 MachineFrameInfo &MFI = MF.getFrameInfo(); 1981 1982 // If the frame pointer save index hasn't been defined yet. 1983 if (!FPSI && needsFP(MF)) { 1984 // Find out what the fix offset of the frame pointer save area. 1985 int FPOffset = getFramePointerSaveOffset(); 1986 // Allocate the frame index for frame pointer save area. 1987 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1988 // Save the result. 1989 FI->setFramePointerSaveIndex(FPSI); 1990 } 1991 1992 int BPSI = FI->getBasePointerSaveIndex(); 1993 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1994 int BPOffset = getBasePointerSaveOffset(); 1995 // Allocate the frame index for the base pointer save area. 1996 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1997 // Save the result. 1998 FI->setBasePointerSaveIndex(BPSI); 1999 } 2000 2001 // Reserve stack space for the PIC Base register (R30). 2002 // Only used in SVR4 32-bit. 2003 if (FI->usesPICBase()) { 2004 int PBPSI = MFI.CreateFixedObject(4, -8, true); 2005 FI->setPICBasePointerSaveIndex(PBPSI); 2006 } 2007 2008 // Make sure we don't explicitly spill r31, because, for example, we have 2009 // some inline asm which explicitly clobbers it, when we otherwise have a 2010 // frame pointer and are using r31's spill slot for the prologue/epilogue 2011 // code. Same goes for the base pointer and the PIC base register. 2012 if (needsFP(MF)) 2013 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 2014 if (RegInfo->hasBasePointer(MF)) 2015 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 2016 if (FI->usesPICBase()) 2017 SavedRegs.reset(PPC::R30); 2018 2019 // Reserve stack space to move the linkage area to in case of a tail call. 2020 int TCSPDelta = 0; 2021 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2022 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 2023 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 2024 } 2025 2026 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 2027 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 2028 // object at the offset of the CR-save slot in the linkage area. The actual 2029 // save and restore of the condition register will be created as part of the 2030 // prologue and epilogue insertion, but the FixedStack object is needed to 2031 // keep the CalleSavedInfo valid. 2032 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 2033 SavedRegs.test(PPC::CR4))) { 2034 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 2035 const int64_t SpillOffset = 2036 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 2037 int FrameIdx = 2038 MFI.CreateFixedObject(SpillSize, SpillOffset, 2039 /* IsImmutable */ true, /* IsAliased */ false); 2040 FI->setCRSpillFrameIndex(FrameIdx); 2041 } 2042 } 2043 2044 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 2045 RegScavenger *RS) const { 2046 // Get callee saved register information. 2047 MachineFrameInfo &MFI = MF.getFrameInfo(); 2048 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 2049 2050 // If the function is shrink-wrapped, and if the function has a tail call, the 2051 // tail call might not be in the new RestoreBlock, so real branch instruction 2052 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 2053 // RestoreBlock. So we handle this case here. 2054 if (MFI.getSavePoint() && MFI.hasTailCall()) { 2055 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 2056 for (MachineBasicBlock &MBB : MF) { 2057 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 2058 createTailCallBranchInstr(MBB); 2059 } 2060 } 2061 2062 // Early exit if no callee saved registers are modified! 2063 if (CSI.empty() && !needsFP(MF)) { 2064 addScavengingSpillSlot(MF, RS); 2065 return; 2066 } 2067 2068 unsigned MinGPR = PPC::R31; 2069 unsigned MinG8R = PPC::X31; 2070 unsigned MinFPR = PPC::F31; 2071 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 2072 2073 bool HasGPSaveArea = false; 2074 bool HasG8SaveArea = false; 2075 bool HasFPSaveArea = false; 2076 bool HasVRSaveArea = false; 2077 2078 SmallVector<CalleeSavedInfo, 18> GPRegs; 2079 SmallVector<CalleeSavedInfo, 18> G8Regs; 2080 SmallVector<CalleeSavedInfo, 18> FPRegs; 2081 SmallVector<CalleeSavedInfo, 18> VRegs; 2082 2083 for (const CalleeSavedInfo &I : CSI) { 2084 Register Reg = I.getReg(); 2085 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 2086 (Reg != PPC::X2 && Reg != PPC::R2)) && 2087 "Not expecting to try to spill R2 in a function that must save TOC"); 2088 if (PPC::GPRCRegClass.contains(Reg)) { 2089 HasGPSaveArea = true; 2090 2091 GPRegs.push_back(I); 2092 2093 if (Reg < MinGPR) { 2094 MinGPR = Reg; 2095 } 2096 } else if (PPC::G8RCRegClass.contains(Reg)) { 2097 HasG8SaveArea = true; 2098 2099 G8Regs.push_back(I); 2100 2101 if (Reg < MinG8R) { 2102 MinG8R = Reg; 2103 } 2104 } else if (PPC::F8RCRegClass.contains(Reg)) { 2105 HasFPSaveArea = true; 2106 2107 FPRegs.push_back(I); 2108 2109 if (Reg < MinFPR) { 2110 MinFPR = Reg; 2111 } 2112 } else if (PPC::CRBITRCRegClass.contains(Reg) || 2113 PPC::CRRCRegClass.contains(Reg)) { 2114 ; // do nothing, as we already know whether CRs are spilled 2115 } else if (PPC::VRRCRegClass.contains(Reg) || 2116 PPC::SPERCRegClass.contains(Reg)) { 2117 // Altivec and SPE are mutually exclusive, but have the same stack 2118 // alignment requirements, so overload the save area for both cases. 2119 HasVRSaveArea = true; 2120 2121 VRegs.push_back(I); 2122 2123 if (Reg < MinVR) { 2124 MinVR = Reg; 2125 } 2126 } else { 2127 llvm_unreachable("Unknown RegisterClass!"); 2128 } 2129 } 2130 2131 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 2132 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2133 2134 int64_t LowerBound = 0; 2135 2136 // Take into account stack space reserved for tail calls. 2137 int TCSPDelta = 0; 2138 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2139 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 2140 LowerBound = TCSPDelta; 2141 } 2142 2143 // The Floating-point register save area is right below the back chain word 2144 // of the previous stack frame. 2145 if (HasFPSaveArea) { 2146 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 2147 int FI = FPRegs[i].getFrameIdx(); 2148 2149 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2150 } 2151 2152 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 2153 } 2154 2155 // Check whether the frame pointer register is allocated. If so, make sure it 2156 // is spilled to the correct offset. 2157 if (needsFP(MF)) { 2158 int FI = PFI->getFramePointerSaveIndex(); 2159 assert(FI && "No Frame Pointer Save Slot!"); 2160 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2161 // FP is R31/X31, so no need to update MinGPR/MinG8R. 2162 HasGPSaveArea = true; 2163 } 2164 2165 if (PFI->usesPICBase()) { 2166 int FI = PFI->getPICBasePointerSaveIndex(); 2167 assert(FI && "No PIC Base Pointer Save Slot!"); 2168 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2169 2170 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 2171 HasGPSaveArea = true; 2172 } 2173 2174 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2175 if (RegInfo->hasBasePointer(MF)) { 2176 int FI = PFI->getBasePointerSaveIndex(); 2177 assert(FI && "No Base Pointer Save Slot!"); 2178 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2179 2180 Register BP = RegInfo->getBaseRegister(MF); 2181 if (PPC::G8RCRegClass.contains(BP)) { 2182 MinG8R = std::min<unsigned>(MinG8R, BP); 2183 HasG8SaveArea = true; 2184 } else if (PPC::GPRCRegClass.contains(BP)) { 2185 MinGPR = std::min<unsigned>(MinGPR, BP); 2186 HasGPSaveArea = true; 2187 } 2188 } 2189 2190 // General register save area starts right below the Floating-point 2191 // register save area. 2192 if (HasGPSaveArea || HasG8SaveArea) { 2193 // Move general register save area spill slots down, taking into account 2194 // the size of the Floating-point register save area. 2195 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 2196 if (!GPRegs[i].isSpilledToReg()) { 2197 int FI = GPRegs[i].getFrameIdx(); 2198 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2199 } 2200 } 2201 2202 // Move general register save area spill slots down, taking into account 2203 // the size of the Floating-point register save area. 2204 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2205 if (!G8Regs[i].isSpilledToReg()) { 2206 int FI = G8Regs[i].getFrameIdx(); 2207 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2208 } 2209 } 2210 2211 unsigned MinReg = 2212 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2213 TRI->getEncodingValue(MinG8R)); 2214 2215 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2216 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2217 } 2218 2219 // For 32-bit only, the CR save area is below the general register 2220 // save area. For 64-bit SVR4, the CR save area is addressed relative 2221 // to the stack pointer and hence does not need an adjustment here. 2222 // Only CR2 (the first nonvolatile spilled) has an associated frame 2223 // index so that we have a single uniform save area. 2224 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2225 // Adjust the frame index of the CR spill slot. 2226 for (const auto &CSInfo : CSI) { 2227 if (CSInfo.getReg() == PPC::CR2) { 2228 int FI = CSInfo.getFrameIdx(); 2229 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2230 break; 2231 } 2232 } 2233 2234 LowerBound -= 4; // The CR save area is always 4 bytes long. 2235 } 2236 2237 // Both Altivec and SPE have the same alignment and padding requirements 2238 // within the stack frame. 2239 if (HasVRSaveArea) { 2240 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2241 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2242 // we are using negative number here (the stack grows downward). We should 2243 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2244 // is the alignment size ( n = 16 here) and y is the size after aligning. 2245 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2246 LowerBound &= ~(15); 2247 2248 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2249 int FI = VRegs[i].getFrameIdx(); 2250 2251 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2252 } 2253 } 2254 2255 addScavengingSpillSlot(MF, RS); 2256 } 2257 2258 void 2259 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2260 RegScavenger *RS) const { 2261 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2262 // a large stack, which will require scavenging a register to materialize a 2263 // large offset. 2264 2265 // We need to have a scavenger spill slot for spills if the frame size is 2266 // large. In case there is no free register for large-offset addressing, 2267 // this slot is used for the necessary emergency spill. Also, we need the 2268 // slot for dynamic stack allocations. 2269 2270 // The scavenger might be invoked if the frame offset does not fit into 2271 // the 16-bit immediate. We don't know the complete frame size here 2272 // because we've not yet computed callee-saved register spills or the 2273 // needed alignment padding. 2274 unsigned StackSize = determineFrameLayout(MF, true); 2275 MachineFrameInfo &MFI = MF.getFrameInfo(); 2276 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || 2277 (hasSpills(MF) && !isInt<16>(StackSize))) { 2278 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2279 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2280 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2281 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2282 unsigned Size = TRI.getSpillSize(RC); 2283 Align Alignment = TRI.getSpillAlign(RC); 2284 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2285 2286 // Might we have over-aligned allocas? 2287 bool HasAlVars = 2288 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2289 2290 // These kinds of spills might need two registers. 2291 if (spillsCR(MF) || HasAlVars) 2292 RS->addScavengingFrameIndex( 2293 MFI.CreateStackObject(Size, Alignment, false)); 2294 } 2295 } 2296 2297 // This function checks if a callee saved gpr can be spilled to a volatile 2298 // vector register. This occurs for leaf functions when the option 2299 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2300 // which were not spilled to vectors, return false so the target independent 2301 // code can handle them by assigning a FrameIdx to a stack slot. 2302 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2303 MachineFunction &MF, const TargetRegisterInfo *TRI, 2304 std::vector<CalleeSavedInfo> &CSI) const { 2305 2306 if (CSI.empty()) 2307 return true; // Early exit if no callee saved registers are modified! 2308 2309 // Early exit if cannot spill gprs to volatile vector registers. 2310 MachineFrameInfo &MFI = MF.getFrameInfo(); 2311 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2312 return false; 2313 2314 // Build a BitVector of VSRs that can be used for spilling GPRs. 2315 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2316 BitVector BVCalleeSaved(TRI->getNumRegs()); 2317 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2318 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2319 for (unsigned i = 0; CSRegs[i]; ++i) 2320 BVCalleeSaved.set(CSRegs[i]); 2321 2322 for (unsigned Reg : BVAllocatable.set_bits()) { 2323 // Set to 0 if the register is not a volatile VSX register, or if it is 2324 // used in the function. 2325 if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || 2326 MF.getRegInfo().isPhysRegUsed(Reg)) 2327 BVAllocatable.reset(Reg); 2328 } 2329 2330 bool AllSpilledToReg = true; 2331 unsigned LastVSRUsedForSpill = 0; 2332 for (auto &CS : CSI) { 2333 if (BVAllocatable.none()) 2334 return false; 2335 2336 Register Reg = CS.getReg(); 2337 2338 if (!PPC::G8RCRegClass.contains(Reg)) { 2339 AllSpilledToReg = false; 2340 continue; 2341 } 2342 2343 // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs 2344 // into one VSR using the mtvsrdd instruction. 2345 if (LastVSRUsedForSpill != 0) { 2346 CS.setDstReg(LastVSRUsedForSpill); 2347 BVAllocatable.reset(LastVSRUsedForSpill); 2348 LastVSRUsedForSpill = 0; 2349 continue; 2350 } 2351 2352 unsigned VolatileVFReg = BVAllocatable.find_first(); 2353 if (VolatileVFReg < BVAllocatable.size()) { 2354 CS.setDstReg(VolatileVFReg); 2355 LastVSRUsedForSpill = VolatileVFReg; 2356 } else { 2357 AllSpilledToReg = false; 2358 } 2359 } 2360 return AllSpilledToReg; 2361 } 2362 2363 bool PPCFrameLowering::spillCalleeSavedRegisters( 2364 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2365 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2366 2367 MachineFunction *MF = MBB.getParent(); 2368 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2369 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2370 bool MustSaveTOC = FI->mustSaveTOC(); 2371 DebugLoc DL; 2372 bool CRSpilled = false; 2373 MachineInstrBuilder CRMIB; 2374 BitVector Spilled(TRI->getNumRegs()); 2375 2376 VSRContainingGPRs.clear(); 2377 2378 // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one 2379 // or two GPRs, so we need table to record information for later save/restore. 2380 for (const CalleeSavedInfo &Info : CSI) { 2381 if (Info.isSpilledToReg()) { 2382 auto &SpilledVSR = 2383 VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second; 2384 assert(SpilledVSR.second == 0 && 2385 "Can't spill more than two GPRs into VSR!"); 2386 if (SpilledVSR.first == 0) 2387 SpilledVSR.first = Info.getReg(); 2388 else 2389 SpilledVSR.second = Info.getReg(); 2390 } 2391 } 2392 2393 for (const CalleeSavedInfo &I : CSI) { 2394 Register Reg = I.getReg(); 2395 2396 // CR2 through CR4 are the nonvolatile CR fields. 2397 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2398 2399 // Add the callee-saved register as live-in; it's killed at the spill. 2400 // Do not do this for callee-saved registers that are live-in to the 2401 // function because they will already be marked live-in and this will be 2402 // adding it for a second time. It is an error to add the same register 2403 // to the set more than once. 2404 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2405 bool IsLiveIn = MRI.isLiveIn(Reg); 2406 if (!IsLiveIn) 2407 MBB.addLiveIn(Reg); 2408 2409 if (CRSpilled && IsCRField) { 2410 CRMIB.addReg(Reg, RegState::ImplicitKill); 2411 continue; 2412 } 2413 2414 // The actual spill will happen in the prologue. 2415 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2416 continue; 2417 2418 // Insert the spill to the stack frame. 2419 if (IsCRField) { 2420 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2421 if (!Subtarget.is32BitELFABI()) { 2422 // The actual spill will happen at the start of the prologue. 2423 FuncInfo->addMustSaveCR(Reg); 2424 } else { 2425 CRSpilled = true; 2426 FuncInfo->setSpillsCR(); 2427 2428 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2429 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2430 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2431 .addReg(Reg, RegState::ImplicitKill); 2432 2433 MBB.insert(MI, CRMIB); 2434 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2435 .addReg(PPC::R12, 2436 getKillRegState(true)), 2437 I.getFrameIdx())); 2438 } 2439 } else { 2440 if (I.isSpilledToReg()) { 2441 unsigned Dst = I.getDstReg(); 2442 2443 if (Spilled[Dst]) 2444 continue; 2445 2446 if (VSRContainingGPRs[Dst].second != 0) { 2447 assert(Subtarget.hasP9Vector() && 2448 "mtvsrdd is unavailable on pre-P9 targets."); 2449 2450 NumPESpillVSR += 2; 2451 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst) 2452 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)) 2453 .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true)); 2454 } else if (VSRContainingGPRs[Dst].second == 0) { 2455 assert(Subtarget.hasP8Vector() && 2456 "Can't move GPR to VSR on pre-P8 targets."); 2457 2458 ++NumPESpillVSR; 2459 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), 2460 TRI->getSubReg(Dst, PPC::sub_64)) 2461 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)); 2462 } else { 2463 llvm_unreachable("More than two GPRs spilled to a VSR!"); 2464 } 2465 Spilled.set(Dst); 2466 } else { 2467 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2468 // Use !IsLiveIn for the kill flag. 2469 // We do not want to kill registers that are live in this function 2470 // before their use because they will become undefined registers. 2471 // Functions without NoUnwind need to preserve the order of elements in 2472 // saved vector registers. 2473 if (Subtarget.needsSwapsForVSXMemOps() && 2474 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2475 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2476 I.getFrameIdx(), RC, TRI); 2477 else 2478 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), 2479 RC, TRI); 2480 } 2481 } 2482 } 2483 return true; 2484 } 2485 2486 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2487 bool CR4Spilled, MachineBasicBlock &MBB, 2488 MachineBasicBlock::iterator MI, 2489 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2490 2491 MachineFunction *MF = MBB.getParent(); 2492 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2493 DebugLoc DL; 2494 unsigned MoveReg = PPC::R12; 2495 2496 // 32-bit: FP-relative 2497 MBB.insert(MI, 2498 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2499 CSI[CSIIndex].getFrameIdx())); 2500 2501 unsigned RestoreOp = PPC::MTOCRF; 2502 if (CR2Spilled) 2503 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2504 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2505 2506 if (CR3Spilled) 2507 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2508 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2509 2510 if (CR4Spilled) 2511 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2512 .addReg(MoveReg, getKillRegState(true))); 2513 } 2514 2515 MachineBasicBlock::iterator PPCFrameLowering:: 2516 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2517 MachineBasicBlock::iterator I) const { 2518 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2519 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2520 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2521 // Add (actually subtract) back the amount the callee popped on return. 2522 if (int CalleeAmt = I->getOperand(1).getImm()) { 2523 bool is64Bit = Subtarget.isPPC64(); 2524 CalleeAmt *= -1; 2525 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2526 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2527 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2528 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2529 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2530 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2531 const DebugLoc &dl = I->getDebugLoc(); 2532 2533 if (isInt<16>(CalleeAmt)) { 2534 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2535 .addReg(StackReg, RegState::Kill) 2536 .addImm(CalleeAmt); 2537 } else { 2538 MachineBasicBlock::iterator MBBI = I; 2539 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2540 .addImm(CalleeAmt >> 16); 2541 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2542 .addReg(TmpReg, RegState::Kill) 2543 .addImm(CalleeAmt & 0xFFFF); 2544 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2545 .addReg(StackReg, RegState::Kill) 2546 .addReg(TmpReg); 2547 } 2548 } 2549 } 2550 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2551 return MBB.erase(I); 2552 } 2553 2554 static bool isCalleeSavedCR(unsigned Reg) { 2555 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2556 } 2557 2558 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2559 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2560 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2561 MachineFunction *MF = MBB.getParent(); 2562 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2563 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2564 bool MustSaveTOC = FI->mustSaveTOC(); 2565 bool CR2Spilled = false; 2566 bool CR3Spilled = false; 2567 bool CR4Spilled = false; 2568 unsigned CSIIndex = 0; 2569 BitVector Restored(TRI->getNumRegs()); 2570 2571 // Initialize insertion-point logic; we will be restoring in reverse 2572 // order of spill. 2573 MachineBasicBlock::iterator I = MI, BeforeI = I; 2574 bool AtStart = I == MBB.begin(); 2575 2576 if (!AtStart) 2577 --BeforeI; 2578 2579 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2580 Register Reg = CSI[i].getReg(); 2581 2582 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2583 continue; 2584 2585 // Restore of callee saved condition register field is handled during 2586 // epilogue insertion. 2587 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2588 continue; 2589 2590 if (Reg == PPC::CR2) { 2591 CR2Spilled = true; 2592 // The spill slot is associated only with CR2, which is the 2593 // first nonvolatile spilled. Save it here. 2594 CSIIndex = i; 2595 continue; 2596 } else if (Reg == PPC::CR3) { 2597 CR3Spilled = true; 2598 continue; 2599 } else if (Reg == PPC::CR4) { 2600 CR4Spilled = true; 2601 continue; 2602 } else { 2603 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2604 // least one CR register, restore all spilled CRs together. 2605 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2606 bool is31 = needsFP(*MF); 2607 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2608 CSIIndex); 2609 CR2Spilled = CR3Spilled = CR4Spilled = false; 2610 } 2611 2612 if (CSI[i].isSpilledToReg()) { 2613 DebugLoc DL; 2614 unsigned Dst = CSI[i].getDstReg(); 2615 2616 if (Restored[Dst]) 2617 continue; 2618 2619 if (VSRContainingGPRs[Dst].second != 0) { 2620 assert(Subtarget.hasP9Vector()); 2621 NumPEReloadVSR += 2; 2622 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD), 2623 VSRContainingGPRs[Dst].second) 2624 .addReg(Dst); 2625 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), 2626 VSRContainingGPRs[Dst].first) 2627 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); 2628 } else if (VSRContainingGPRs[Dst].second == 0) { 2629 assert(Subtarget.hasP8Vector()); 2630 ++NumPEReloadVSR; 2631 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), 2632 VSRContainingGPRs[Dst].first) 2633 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); 2634 } else { 2635 llvm_unreachable("More than two GPRs spilled to a VSR!"); 2636 } 2637 2638 Restored.set(Dst); 2639 2640 } else { 2641 // Default behavior for non-CR saves. 2642 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2643 2644 // Functions without NoUnwind need to preserve the order of elements in 2645 // saved vector registers. 2646 if (Subtarget.needsSwapsForVSXMemOps() && 2647 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2648 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2649 TRI); 2650 else 2651 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2652 2653 assert(I != MBB.begin() && 2654 "loadRegFromStackSlot didn't insert any code!"); 2655 } 2656 } 2657 2658 // Insert in reverse order. 2659 if (AtStart) 2660 I = MBB.begin(); 2661 else { 2662 I = BeforeI; 2663 ++I; 2664 } 2665 } 2666 2667 // If we haven't yet spilled the CRs, do so now. 2668 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2669 assert(Subtarget.is32BitELFABI() && 2670 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2671 bool is31 = needsFP(*MF); 2672 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2673 } 2674 2675 return true; 2676 } 2677 2678 uint64_t PPCFrameLowering::getTOCSaveOffset() const { 2679 return TOCSaveOffset; 2680 } 2681 2682 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const { 2683 return FramePointerSaveOffset; 2684 } 2685 2686 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const { 2687 return BasePointerSaveOffset; 2688 } 2689 2690 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2691 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2692 return false; 2693 return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI(); 2694 } 2695