1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "PPCInstrBuilder.h" 15 #include "PPCInstrInfo.h" 16 #include "PPCMachineFunctionInfo.h" 17 #include "PPCSubtarget.h" 18 #include "PPCTargetMachine.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "framelowering" 32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 34 35 static cl::opt<bool> 36 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 37 cl::desc("Enable spills in prologue to vector registers."), 38 cl::init(false), cl::Hidden); 39 40 /// VRRegNo - Map from a numbered VR register to its enum value. 41 /// 42 static const MCPhysReg VRRegNo[] = { 43 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 44 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 45 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 46 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 47 }; 48 49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isDarwinABI()) 51 return STI.isPPC64() ? 16 : 8; 52 // SVR4 ABI: 53 return STI.isPPC64() ? 16 : 4; 54 } 55 56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 57 return STI.isELFv2ABI() ? 24 : 40; 58 } 59 60 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 61 // For the Darwin ABI: 62 // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area 63 // for saving the frame pointer (if needed.) While the published ABI has 64 // not used this slot since at least MacOSX 10.2, there is older code 65 // around that does use it, and that needs to continue to work. 66 if (STI.isDarwinABI()) 67 return STI.isPPC64() ? -8U : -4U; 68 69 // SVR4 ABI: First slot in the general register save area. 70 return STI.isPPC64() ? -8U : -4U; 71 } 72 73 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 74 if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64()) 75 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 76 77 // 32-bit SVR4 ABI: 78 return 8; 79 } 80 81 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 82 if (STI.isDarwinABI()) 83 return STI.isPPC64() ? -16U : -8U; 84 85 // SVR4 ABI: First slot in the general register save area. 86 return STI.isPPC64() 87 ? -16U 88 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; 89 } 90 91 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 92 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 93 STI.getPlatformStackAlignment(), 0), 94 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 95 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 96 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 97 LinkageSize(computeLinkageSize(Subtarget)), 98 BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {} 99 100 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 101 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 102 unsigned &NumEntries) const { 103 if (Subtarget.isDarwinABI()) { 104 NumEntries = 1; 105 if (Subtarget.isPPC64()) { 106 static const SpillSlot darwin64Offsets = {PPC::X31, -8}; 107 return &darwin64Offsets; 108 } else { 109 static const SpillSlot darwinOffsets = {PPC::R31, -4}; 110 return &darwinOffsets; 111 } 112 } 113 114 // Early exit if not using the SVR4 ABI. 115 if (!Subtarget.isSVR4ABI()) { 116 NumEntries = 0; 117 return nullptr; 118 } 119 120 // Note that the offsets here overlap, but this is fixed up in 121 // processFunctionBeforeFrameFinalized. 122 123 static const SpillSlot Offsets[] = { 124 // Floating-point register save area offsets. 125 {PPC::F31, -8}, 126 {PPC::F30, -16}, 127 {PPC::F29, -24}, 128 {PPC::F28, -32}, 129 {PPC::F27, -40}, 130 {PPC::F26, -48}, 131 {PPC::F25, -56}, 132 {PPC::F24, -64}, 133 {PPC::F23, -72}, 134 {PPC::F22, -80}, 135 {PPC::F21, -88}, 136 {PPC::F20, -96}, 137 {PPC::F19, -104}, 138 {PPC::F18, -112}, 139 {PPC::F17, -120}, 140 {PPC::F16, -128}, 141 {PPC::F15, -136}, 142 {PPC::F14, -144}, 143 144 // General register save area offsets. 145 {PPC::R31, -4}, 146 {PPC::R30, -8}, 147 {PPC::R29, -12}, 148 {PPC::R28, -16}, 149 {PPC::R27, -20}, 150 {PPC::R26, -24}, 151 {PPC::R25, -28}, 152 {PPC::R24, -32}, 153 {PPC::R23, -36}, 154 {PPC::R22, -40}, 155 {PPC::R21, -44}, 156 {PPC::R20, -48}, 157 {PPC::R19, -52}, 158 {PPC::R18, -56}, 159 {PPC::R17, -60}, 160 {PPC::R16, -64}, 161 {PPC::R15, -68}, 162 {PPC::R14, -72}, 163 164 // CR save area offset. We map each of the nonvolatile CR fields 165 // to the slot for CR2, which is the first of the nonvolatile CR 166 // fields to be assigned, so that we only allocate one save slot. 167 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 168 {PPC::CR2, -4}, 169 170 // VRSAVE save area offset. 171 {PPC::VRSAVE, -4}, 172 173 // Vector register save area 174 {PPC::V31, -16}, 175 {PPC::V30, -32}, 176 {PPC::V29, -48}, 177 {PPC::V28, -64}, 178 {PPC::V27, -80}, 179 {PPC::V26, -96}, 180 {PPC::V25, -112}, 181 {PPC::V24, -128}, 182 {PPC::V23, -144}, 183 {PPC::V22, -160}, 184 {PPC::V21, -176}, 185 {PPC::V20, -192}, 186 187 // SPE register save area (overlaps Vector save area). 188 {PPC::S31, -8}, 189 {PPC::S30, -16}, 190 {PPC::S29, -24}, 191 {PPC::S28, -32}, 192 {PPC::S27, -40}, 193 {PPC::S26, -48}, 194 {PPC::S25, -56}, 195 {PPC::S24, -64}, 196 {PPC::S23, -72}, 197 {PPC::S22, -80}, 198 {PPC::S21, -88}, 199 {PPC::S20, -96}, 200 {PPC::S19, -104}, 201 {PPC::S18, -112}, 202 {PPC::S17, -120}, 203 {PPC::S16, -128}, 204 {PPC::S15, -136}, 205 {PPC::S14, -144}}; 206 207 static const SpillSlot Offsets64[] = { 208 // Floating-point register save area offsets. 209 {PPC::F31, -8}, 210 {PPC::F30, -16}, 211 {PPC::F29, -24}, 212 {PPC::F28, -32}, 213 {PPC::F27, -40}, 214 {PPC::F26, -48}, 215 {PPC::F25, -56}, 216 {PPC::F24, -64}, 217 {PPC::F23, -72}, 218 {PPC::F22, -80}, 219 {PPC::F21, -88}, 220 {PPC::F20, -96}, 221 {PPC::F19, -104}, 222 {PPC::F18, -112}, 223 {PPC::F17, -120}, 224 {PPC::F16, -128}, 225 {PPC::F15, -136}, 226 {PPC::F14, -144}, 227 228 // General register save area offsets. 229 {PPC::X31, -8}, 230 {PPC::X30, -16}, 231 {PPC::X29, -24}, 232 {PPC::X28, -32}, 233 {PPC::X27, -40}, 234 {PPC::X26, -48}, 235 {PPC::X25, -56}, 236 {PPC::X24, -64}, 237 {PPC::X23, -72}, 238 {PPC::X22, -80}, 239 {PPC::X21, -88}, 240 {PPC::X20, -96}, 241 {PPC::X19, -104}, 242 {PPC::X18, -112}, 243 {PPC::X17, -120}, 244 {PPC::X16, -128}, 245 {PPC::X15, -136}, 246 {PPC::X14, -144}, 247 248 // VRSAVE save area offset. 249 {PPC::VRSAVE, -4}, 250 251 // Vector register save area 252 {PPC::V31, -16}, 253 {PPC::V30, -32}, 254 {PPC::V29, -48}, 255 {PPC::V28, -64}, 256 {PPC::V27, -80}, 257 {PPC::V26, -96}, 258 {PPC::V25, -112}, 259 {PPC::V24, -128}, 260 {PPC::V23, -144}, 261 {PPC::V22, -160}, 262 {PPC::V21, -176}, 263 {PPC::V20, -192}}; 264 265 if (Subtarget.isPPC64()) { 266 NumEntries = array_lengthof(Offsets64); 267 268 return Offsets64; 269 } else { 270 NumEntries = array_lengthof(Offsets); 271 272 return Offsets; 273 } 274 } 275 276 /// RemoveVRSaveCode - We have found that this function does not need any code 277 /// to manipulate the VRSAVE register, even though it uses vector registers. 278 /// This can happen when the only registers used are known to be live in or out 279 /// of the function. Remove all of the VRSAVE related code from the function. 280 /// FIXME: The removal of the code results in a compile failure at -O0 when the 281 /// function contains a function call, as the GPR containing original VRSAVE 282 /// contents is spilled and reloaded around the call. Without the prolog code, 283 /// the spill instruction refers to an undefined register. This code needs 284 /// to account for all uses of that GPR. 285 static void RemoveVRSaveCode(MachineInstr &MI) { 286 MachineBasicBlock *Entry = MI.getParent(); 287 MachineFunction *MF = Entry->getParent(); 288 289 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 290 MachineBasicBlock::iterator MBBI = MI; 291 ++MBBI; 292 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 293 MBBI->eraseFromParent(); 294 295 bool RemovedAllMTVRSAVEs = true; 296 // See if we can find and remove the MTVRSAVE instruction from all of the 297 // epilog blocks. 298 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 299 // If last instruction is a return instruction, add an epilogue 300 if (I->isReturnBlock()) { 301 bool FoundIt = false; 302 for (MBBI = I->end(); MBBI != I->begin(); ) { 303 --MBBI; 304 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 305 MBBI->eraseFromParent(); // remove it. 306 FoundIt = true; 307 break; 308 } 309 } 310 RemovedAllMTVRSAVEs &= FoundIt; 311 } 312 } 313 314 // If we found and removed all MTVRSAVE instructions, remove the read of 315 // VRSAVE as well. 316 if (RemovedAllMTVRSAVEs) { 317 MBBI = MI; 318 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 319 --MBBI; 320 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 321 MBBI->eraseFromParent(); 322 } 323 324 // Finally, nuke the UPDATE_VRSAVE. 325 MI.eraseFromParent(); 326 } 327 328 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 329 // instruction selector. Based on the vector registers that have been used, 330 // transform this into the appropriate ORI instruction. 331 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 332 MachineFunction *MF = MI.getParent()->getParent(); 333 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 334 DebugLoc dl = MI.getDebugLoc(); 335 336 const MachineRegisterInfo &MRI = MF->getRegInfo(); 337 unsigned UsedRegMask = 0; 338 for (unsigned i = 0; i != 32; ++i) 339 if (MRI.isPhysRegModified(VRRegNo[i])) 340 UsedRegMask |= 1 << (31-i); 341 342 // Live in and live out values already must be in the mask, so don't bother 343 // marking them. 344 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 345 unsigned RegNo = TRI->getEncodingValue(LI.first); 346 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 347 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 348 } 349 350 // Live out registers appear as use operands on return instructions. 351 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 352 UsedRegMask != 0 && BI != BE; ++BI) { 353 const MachineBasicBlock &MBB = *BI; 354 if (!MBB.isReturnBlock()) 355 continue; 356 const MachineInstr &Ret = MBB.back(); 357 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 358 const MachineOperand &MO = Ret.getOperand(I); 359 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 360 continue; 361 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 362 UsedRegMask &= ~(1 << (31-RegNo)); 363 } 364 } 365 366 // If no registers are used, turn this into a copy. 367 if (UsedRegMask == 0) { 368 // Remove all VRSAVE code. 369 RemoveVRSaveCode(MI); 370 return; 371 } 372 373 unsigned SrcReg = MI.getOperand(1).getReg(); 374 unsigned DstReg = MI.getOperand(0).getReg(); 375 376 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 377 if (DstReg != SrcReg) 378 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 379 .addReg(SrcReg) 380 .addImm(UsedRegMask); 381 else 382 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 383 .addReg(SrcReg, RegState::Kill) 384 .addImm(UsedRegMask); 385 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 386 if (DstReg != SrcReg) 387 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 388 .addReg(SrcReg) 389 .addImm(UsedRegMask >> 16); 390 else 391 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 392 .addReg(SrcReg, RegState::Kill) 393 .addImm(UsedRegMask >> 16); 394 } else { 395 if (DstReg != SrcReg) 396 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 397 .addReg(SrcReg) 398 .addImm(UsedRegMask >> 16); 399 else 400 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 401 .addReg(SrcReg, RegState::Kill) 402 .addImm(UsedRegMask >> 16); 403 404 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 405 .addReg(DstReg, RegState::Kill) 406 .addImm(UsedRegMask & 0xFFFF); 407 } 408 409 // Remove the old UPDATE_VRSAVE instruction. 410 MI.eraseFromParent(); 411 } 412 413 static bool spillsCR(const MachineFunction &MF) { 414 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 415 return FuncInfo->isCRSpilled(); 416 } 417 418 static bool spillsVRSAVE(const MachineFunction &MF) { 419 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 420 return FuncInfo->isVRSAVESpilled(); 421 } 422 423 static bool hasSpills(const MachineFunction &MF) { 424 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 425 return FuncInfo->hasSpills(); 426 } 427 428 static bool hasNonRISpills(const MachineFunction &MF) { 429 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 430 return FuncInfo->hasNonRISpills(); 431 } 432 433 /// MustSaveLR - Return true if this function requires that we save the LR 434 /// register onto the stack in the prolog and restore it in the epilog of the 435 /// function. 436 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 437 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 438 439 // We need a save/restore of LR if there is any def of LR (which is 440 // defined by calls, including the PIC setup sequence), or if there is 441 // some use of the LR stack slot (e.g. for builtin_return_address). 442 // (LR comes in 32 and 64 bit versions.) 443 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 444 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 445 } 446 447 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 448 /// call frame size. Update the MachineFunction object with the stack size. 449 unsigned 450 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 451 bool UseEstimate) const { 452 unsigned NewMaxCallFrameSize = 0; 453 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 454 &NewMaxCallFrameSize); 455 MF.getFrameInfo().setStackSize(FrameSize); 456 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 457 return FrameSize; 458 } 459 460 /// determineFrameLayout - Determine the size of the frame and maximum call 461 /// frame size. 462 unsigned 463 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 464 bool UseEstimate, 465 unsigned *NewMaxCallFrameSize) const { 466 const MachineFrameInfo &MFI = MF.getFrameInfo(); 467 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 468 469 // Get the number of bytes to allocate from the FrameInfo 470 unsigned FrameSize = 471 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 472 473 // Get stack alignments. The frame must be aligned to the greatest of these: 474 unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI 475 unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame 476 unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; 477 478 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 479 480 unsigned LR = RegInfo->getRARegister(); 481 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 482 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 483 !MFI.adjustsStack() && // No calls. 484 !MustSaveLR(MF, LR) && // No need to save LR. 485 !FI->mustSaveTOC() && // No need to save TOC. 486 !RegInfo->hasBasePointer(MF); // No special alignment. 487 488 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 489 // code if all local vars are reg-allocated. 490 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 491 492 // Check whether we can skip adjusting the stack pointer (by using red zone) 493 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 494 // No need for frame 495 return 0; 496 } 497 498 // Get the maximum call frame size of all the calls. 499 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 500 501 // Maximum call frame needs to be at least big enough for linkage area. 502 unsigned minCallFrameSize = getLinkageSize(); 503 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 504 505 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 506 // that allocations will be aligned. 507 if (MFI.hasVarSizedObjects()) 508 maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; 509 510 // Update the new max call frame size if the caller passes in a valid pointer. 511 if (NewMaxCallFrameSize) 512 *NewMaxCallFrameSize = maxCallFrameSize; 513 514 // Include call frame size in total. 515 FrameSize += maxCallFrameSize; 516 517 // Make sure the frame is aligned. 518 FrameSize = (FrameSize + AlignMask) & ~AlignMask; 519 520 return FrameSize; 521 } 522 523 // hasFP - Return true if the specified function actually has a dedicated frame 524 // pointer register. 525 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 526 const MachineFrameInfo &MFI = MF.getFrameInfo(); 527 // FIXME: This is pretty much broken by design: hasFP() might be called really 528 // early, before the stack layout was calculated and thus hasFP() might return 529 // true or false here depending on the time of call. 530 return (MFI.getStackSize()) && needsFP(MF); 531 } 532 533 // needsFP - Return true if the specified function should have a dedicated frame 534 // pointer register. This is true if the function has variable sized allocas or 535 // if frame pointer elimination is disabled. 536 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 537 const MachineFrameInfo &MFI = MF.getFrameInfo(); 538 539 // Naked functions have no stack frame pushed, so we don't have a frame 540 // pointer. 541 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 542 return false; 543 544 return MF.getTarget().Options.DisableFramePointerElim(MF) || 545 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 546 (MF.getTarget().Options.GuaranteedTailCallOpt && 547 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 548 } 549 550 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 551 bool is31 = needsFP(MF); 552 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 553 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 554 555 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 556 bool HasBP = RegInfo->hasBasePointer(MF); 557 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 558 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 559 560 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 561 BI != BE; ++BI) 562 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 563 --MBBI; 564 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 565 MachineOperand &MO = MBBI->getOperand(I); 566 if (!MO.isReg()) 567 continue; 568 569 switch (MO.getReg()) { 570 case PPC::FP: 571 MO.setReg(FPReg); 572 break; 573 case PPC::FP8: 574 MO.setReg(FP8Reg); 575 break; 576 case PPC::BP: 577 MO.setReg(BPReg); 578 break; 579 case PPC::BP8: 580 MO.setReg(BP8Reg); 581 break; 582 583 } 584 } 585 } 586 } 587 588 /* This function will do the following: 589 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 590 respectively (defaults recommended by the ABI) and return true 591 - If MBB is not an entry block, initialize the register scavenger and look 592 for available registers. 593 - If the defaults (R0/R12) are available, return true 594 - If TwoUniqueRegsRequired is set to true, it looks for two unique 595 registers. Otherwise, look for a single available register. 596 - If the required registers are found, set SR1 and SR2 and return true. 597 - If the required registers are not found, set SR2 or both SR1 and SR2 to 598 PPC::NoRegister and return false. 599 600 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 601 is not set, this function will attempt to find two different registers, but 602 still return true if only one register is available (and set SR1 == SR2). 603 */ 604 bool 605 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 606 bool UseAtEnd, 607 bool TwoUniqueRegsRequired, 608 unsigned *SR1, 609 unsigned *SR2) const { 610 RegScavenger RS; 611 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 612 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 613 614 // Set the defaults for the two scratch registers. 615 if (SR1) 616 *SR1 = R0; 617 618 if (SR2) { 619 assert (SR1 && "Asking for the second scratch register but not the first?"); 620 *SR2 = R12; 621 } 622 623 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 624 if ((UseAtEnd && MBB->isReturnBlock()) || 625 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 626 return true; 627 628 RS.enterBasicBlock(*MBB); 629 630 if (UseAtEnd && !MBB->empty()) { 631 // The scratch register will be used at the end of the block, so must 632 // consider all registers used within the block 633 634 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 635 // If no terminator, back iterator up to previous instruction. 636 if (MBBI == MBB->end()) 637 MBBI = std::prev(MBBI); 638 639 if (MBBI != MBB->begin()) 640 RS.forward(MBBI); 641 } 642 643 // If the two registers are available, we're all good. 644 // Note that we only return here if both R0 and R12 are available because 645 // although the function may not require two unique registers, it may benefit 646 // from having two so we should try to provide them. 647 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 648 return true; 649 650 // Get the list of callee-saved registers for the target. 651 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 652 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 653 654 // Get all the available registers in the block. 655 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 656 &PPC::GPRCRegClass); 657 658 // We shouldn't use callee-saved registers as scratch registers as they may be 659 // available when looking for a candidate block for shrink wrapping but not 660 // available when the actual prologue/epilogue is being emitted because they 661 // were added as live-in to the prologue block by PrologueEpilogueInserter. 662 for (int i = 0; CSRegs[i]; ++i) 663 BV.reset(CSRegs[i]); 664 665 // Set the first scratch register to the first available one. 666 if (SR1) { 667 int FirstScratchReg = BV.find_first(); 668 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 669 } 670 671 // If there is another one available, set the second scratch register to that. 672 // Otherwise, set it to either PPC::NoRegister if this function requires two 673 // or to whatever SR1 is set to if this function doesn't require two. 674 if (SR2) { 675 int SecondScratchReg = BV.find_next(*SR1); 676 if (SecondScratchReg != -1) 677 *SR2 = SecondScratchReg; 678 else 679 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; 680 } 681 682 // Now that we've done our best to provide both registers, double check 683 // whether we were unable to provide enough. 684 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 685 return false; 686 687 return true; 688 } 689 690 // We need a scratch register for spilling LR and for spilling CR. By default, 691 // we use two scratch registers to hide latency. However, if only one scratch 692 // register is available, we can adjust for that by not overlapping the spill 693 // code. However, if we need to realign the stack (i.e. have a base pointer) 694 // and the stack frame is large, we need two scratch registers. 695 bool 696 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 697 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 698 MachineFunction &MF = *(MBB->getParent()); 699 bool HasBP = RegInfo->hasBasePointer(MF); 700 unsigned FrameSize = determineFrameLayout(MF); 701 int NegFrameSize = -FrameSize; 702 bool IsLargeFrame = !isInt<16>(NegFrameSize); 703 MachineFrameInfo &MFI = MF.getFrameInfo(); 704 unsigned MaxAlign = MFI.getMaxAlignment(); 705 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 706 707 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 708 } 709 710 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 711 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 712 713 return findScratchRegister(TmpMBB, false, 714 twoUniqueScratchRegsRequired(TmpMBB)); 715 } 716 717 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 718 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 719 720 return findScratchRegister(TmpMBB, true); 721 } 722 723 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 724 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 725 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 726 727 // Abort if there is no register info or function info. 728 if (!RegInfo || !FI) 729 return false; 730 731 // Only move the stack update on ELFv2 ABI and PPC64. 732 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 733 return false; 734 735 // Check the frame size first and return false if it does not fit the 736 // requirements. 737 // We need a non-zero frame size as well as a frame that will fit in the red 738 // zone. This is because by moving the stack pointer update we are now storing 739 // to the red zone until the stack pointer is updated. If we get an interrupt 740 // inside the prologue but before the stack update we now have a number of 741 // stores to the red zone and those stores must all fit. 742 MachineFrameInfo &MFI = MF.getFrameInfo(); 743 unsigned FrameSize = MFI.getStackSize(); 744 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 745 return false; 746 747 // Frame pointers and base pointers complicate matters so don't do anything 748 // if we have them. For example having a frame pointer will sometimes require 749 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 750 // difficult. 751 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 752 return false; 753 754 // Calls to fast_cc functions use different rules for passing parameters on 755 // the stack from the ABI and using PIC base in the function imposes 756 // similar restrictions to using the base pointer. It is not generally safe 757 // to move the stack pointer update in these situations. 758 if (FI->hasFastCall() || FI->usesPICBase()) 759 return false; 760 761 // Finally we can move the stack update if we do not require register 762 // scavenging. Register scavenging can introduce more spills and so 763 // may make the frame size larger than we have computed. 764 return !RegInfo->requiresFrameIndexScavenging(MF); 765 } 766 767 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 768 MachineBasicBlock &MBB) const { 769 MachineBasicBlock::iterator MBBI = MBB.begin(); 770 MachineFrameInfo &MFI = MF.getFrameInfo(); 771 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 772 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 773 774 MachineModuleInfo &MMI = MF.getMMI(); 775 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 776 DebugLoc dl; 777 bool needsCFI = MMI.hasDebugInfo() || 778 MF.getFunction().needsUnwindTableEntry(); 779 780 // Get processor type. 781 bool isPPC64 = Subtarget.isPPC64(); 782 // Get the ABI. 783 bool isSVR4ABI = Subtarget.isSVR4ABI(); 784 bool isELFv2ABI = Subtarget.isELFv2ABI(); 785 assert((Subtarget.isDarwinABI() || isSVR4ABI) && 786 "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); 787 788 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 789 // process it. 790 if (!isSVR4ABI) 791 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 792 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 793 HandleVRSaveUpdate(*MBBI, TII); 794 break; 795 } 796 } 797 798 // Move MBBI back to the beginning of the prologue block. 799 MBBI = MBB.begin(); 800 801 // Work out frame sizes. 802 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 803 int NegFrameSize = -FrameSize; 804 if (!isInt<32>(NegFrameSize)) 805 llvm_unreachable("Unhandled stack size!"); 806 807 if (MFI.isFrameAddressTaken()) 808 replaceFPWithRealFP(MF); 809 810 // Check if the link register (LR) must be saved. 811 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 812 bool MustSaveLR = FI->mustSaveLR(); 813 bool MustSaveTOC = FI->mustSaveTOC(); 814 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 815 bool MustSaveCR = !MustSaveCRs.empty(); 816 // Do we have a frame pointer and/or base pointer for this function? 817 bool HasFP = hasFP(MF); 818 bool HasBP = RegInfo->hasBasePointer(MF); 819 bool HasRedZone = isPPC64 || !isSVR4ABI; 820 821 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 822 unsigned BPReg = RegInfo->getBaseRegister(MF); 823 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 824 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 825 unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 826 unsigned ScratchReg = 0; 827 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 828 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 829 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 830 : PPC::MFLR ); 831 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 832 : PPC::STW ); 833 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 834 : PPC::STWU ); 835 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 836 : PPC::STWUX); 837 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 838 : PPC::LIS ); 839 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 840 : PPC::ORI ); 841 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 842 : PPC::OR ); 843 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 844 : PPC::SUBFC); 845 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 846 : PPC::SUBFIC); 847 848 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 849 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 850 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 851 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 852 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 853 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 854 855 // Using the same bool variable as below to suppress compiler warnings. 856 bool SingleScratchReg = 857 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 858 &ScratchReg, &TempReg); 859 assert(SingleScratchReg && 860 "Required number of registers not available in this block"); 861 862 SingleScratchReg = ScratchReg == TempReg; 863 864 int LROffset = getReturnSaveOffset(); 865 866 int FPOffset = 0; 867 if (HasFP) { 868 if (isSVR4ABI) { 869 MachineFrameInfo &MFI = MF.getFrameInfo(); 870 int FPIndex = FI->getFramePointerSaveIndex(); 871 assert(FPIndex && "No Frame Pointer Save Slot!"); 872 FPOffset = MFI.getObjectOffset(FPIndex); 873 } else { 874 FPOffset = getFramePointerSaveOffset(); 875 } 876 } 877 878 int BPOffset = 0; 879 if (HasBP) { 880 if (isSVR4ABI) { 881 MachineFrameInfo &MFI = MF.getFrameInfo(); 882 int BPIndex = FI->getBasePointerSaveIndex(); 883 assert(BPIndex && "No Base Pointer Save Slot!"); 884 BPOffset = MFI.getObjectOffset(BPIndex); 885 } else { 886 BPOffset = getBasePointerSaveOffset(); 887 } 888 } 889 890 int PBPOffset = 0; 891 if (FI->usesPICBase()) { 892 MachineFrameInfo &MFI = MF.getFrameInfo(); 893 int PBPIndex = FI->getPICBasePointerSaveIndex(); 894 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 895 PBPOffset = MFI.getObjectOffset(PBPIndex); 896 } 897 898 // Get stack alignments. 899 unsigned MaxAlign = MFI.getMaxAlignment(); 900 if (HasBP && MaxAlign > 1) 901 assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && 902 "Invalid alignment!"); 903 904 // Frames of 32KB & larger require special handling because they cannot be 905 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 906 bool isLargeFrame = !isInt<16>(NegFrameSize); 907 908 assert((isPPC64 || !MustSaveCR) && 909 "Prologue CR saving supported only in 64-bit mode"); 910 911 // Check if we can move the stack update instruction (stdu) down the prologue 912 // past the callee saves. Hopefully this will avoid the situation where the 913 // saves are waiting for the update on the store with update to complete. 914 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 915 bool MovingStackUpdateDown = false; 916 917 // Check if we can move the stack update. 918 if (stackUpdateCanBeMoved(MF)) { 919 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 920 for (CalleeSavedInfo CSI : Info) { 921 int FrIdx = CSI.getFrameIdx(); 922 // If the frame index is not negative the callee saved info belongs to a 923 // stack object that is not a fixed stack object. We ignore non-fixed 924 // stack objects because we won't move the stack update pointer past them. 925 if (FrIdx >= 0) 926 continue; 927 928 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 929 StackUpdateLoc++; 930 MovingStackUpdateDown = true; 931 } else { 932 // We need all of the Frame Indices to meet these conditions. 933 // If they do not, abort the whole operation. 934 StackUpdateLoc = MBBI; 935 MovingStackUpdateDown = false; 936 break; 937 } 938 } 939 940 // If the operation was not aborted then update the object offset. 941 if (MovingStackUpdateDown) { 942 for (CalleeSavedInfo CSI : Info) { 943 int FrIdx = CSI.getFrameIdx(); 944 if (FrIdx < 0) 945 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 946 } 947 } 948 } 949 950 // If we need to spill the CR and the LR but we don't have two separate 951 // registers available, we must spill them one at a time 952 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 953 // In the ELFv2 ABI, we are not required to save all CR fields. 954 // If only one or two CR fields are clobbered, it is more efficient to use 955 // mfocrf to selectively save just those fields, because mfocrf has short 956 // latency compares to mfcr. 957 unsigned MfcrOpcode = PPC::MFCR8; 958 unsigned CrState = RegState::ImplicitKill; 959 if (isELFv2ABI && MustSaveCRs.size() == 1) { 960 MfcrOpcode = PPC::MFOCRF8; 961 CrState = RegState::Kill; 962 } 963 MachineInstrBuilder MIB = 964 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 965 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 966 MIB.addReg(MustSaveCRs[i], CrState); 967 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 968 .addReg(TempReg, getKillRegState(true)) 969 .addImm(8) 970 .addReg(SPReg); 971 } 972 973 if (MustSaveLR) 974 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 975 976 if (MustSaveCR && 977 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 978 // In the ELFv2 ABI, we are not required to save all CR fields. 979 // If only one or two CR fields are clobbered, it is more efficient to use 980 // mfocrf to selectively save just those fields, because mfocrf has short 981 // latency compares to mfcr. 982 unsigned MfcrOpcode = PPC::MFCR8; 983 unsigned CrState = RegState::ImplicitKill; 984 if (isELFv2ABI && MustSaveCRs.size() == 1) { 985 MfcrOpcode = PPC::MFOCRF8; 986 CrState = RegState::Kill; 987 } 988 MachineInstrBuilder MIB = 989 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 990 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 991 MIB.addReg(MustSaveCRs[i], CrState); 992 } 993 994 if (HasRedZone) { 995 if (HasFP) 996 BuildMI(MBB, MBBI, dl, StoreInst) 997 .addReg(FPReg) 998 .addImm(FPOffset) 999 .addReg(SPReg); 1000 if (FI->usesPICBase()) 1001 BuildMI(MBB, MBBI, dl, StoreInst) 1002 .addReg(PPC::R30) 1003 .addImm(PBPOffset) 1004 .addReg(SPReg); 1005 if (HasBP) 1006 BuildMI(MBB, MBBI, dl, StoreInst) 1007 .addReg(BPReg) 1008 .addImm(BPOffset) 1009 .addReg(SPReg); 1010 } 1011 1012 if (MustSaveLR) 1013 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 1014 .addReg(ScratchReg, getKillRegState(true)) 1015 .addImm(LROffset) 1016 .addReg(SPReg); 1017 1018 if (MustSaveCR && 1019 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 1020 assert(HasRedZone && "A red zone is always available on PPC64"); 1021 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 1022 .addReg(TempReg, getKillRegState(true)) 1023 .addImm(8) 1024 .addReg(SPReg); 1025 } 1026 1027 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 1028 if (!FrameSize) 1029 return; 1030 1031 // Adjust stack pointer: r1 += NegFrameSize. 1032 // If there is a preferred stack alignment, align R1 now 1033 1034 if (HasBP && HasRedZone) { 1035 // Save a copy of r1 as the base pointer. 1036 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1037 .addReg(SPReg) 1038 .addReg(SPReg); 1039 } 1040 1041 // Have we generated a STUX instruction to claim stack frame? If so, 1042 // the negated frame size will be placed in ScratchReg. 1043 bool HasSTUX = false; 1044 1045 // This condition must be kept in sync with canUseAsPrologue. 1046 if (HasBP && MaxAlign > 1) { 1047 if (isPPC64) 1048 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 1049 .addReg(SPReg) 1050 .addImm(0) 1051 .addImm(64 - Log2_32(MaxAlign)); 1052 else // PPC32... 1053 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 1054 .addReg(SPReg) 1055 .addImm(0) 1056 .addImm(32 - Log2_32(MaxAlign)) 1057 .addImm(31); 1058 if (!isLargeFrame) { 1059 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 1060 .addReg(ScratchReg, RegState::Kill) 1061 .addImm(NegFrameSize); 1062 } else { 1063 assert(!SingleScratchReg && "Only a single scratch reg available"); 1064 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 1065 .addImm(NegFrameSize >> 16); 1066 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 1067 .addReg(TempReg, RegState::Kill) 1068 .addImm(NegFrameSize & 0xFFFF); 1069 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 1070 .addReg(ScratchReg, RegState::Kill) 1071 .addReg(TempReg, RegState::Kill); 1072 } 1073 1074 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1075 .addReg(SPReg, RegState::Kill) 1076 .addReg(SPReg) 1077 .addReg(ScratchReg); 1078 HasSTUX = true; 1079 1080 } else if (!isLargeFrame) { 1081 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 1082 .addReg(SPReg) 1083 .addImm(NegFrameSize) 1084 .addReg(SPReg); 1085 1086 } else { 1087 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1088 .addImm(NegFrameSize >> 16); 1089 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1090 .addReg(ScratchReg, RegState::Kill) 1091 .addImm(NegFrameSize & 0xFFFF); 1092 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1093 .addReg(SPReg, RegState::Kill) 1094 .addReg(SPReg) 1095 .addReg(ScratchReg); 1096 HasSTUX = true; 1097 } 1098 1099 // Save the TOC register after the stack pointer update if a prologue TOC 1100 // save is required for the function. 1101 if (MustSaveTOC) { 1102 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 1103 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 1104 .addReg(TOCReg, getKillRegState(true)) 1105 .addImm(TOCSaveOffset) 1106 .addReg(SPReg); 1107 } 1108 1109 if (!HasRedZone) { 1110 assert(!isPPC64 && "A red zone is always available on PPC64"); 1111 if (HasSTUX) { 1112 // The negated frame size is in ScratchReg, and the SPReg has been 1113 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1114 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1115 // the stack frame (i.e. the old SP), ideally, we would put the old 1116 // SP into a register and use it as the base for the stores. The 1117 // problem is that the only available register may be ScratchReg, 1118 // which could be R0, and R0 cannot be used as a base address. 1119 1120 // First, set ScratchReg to the old SP. This may need to be modified 1121 // later. 1122 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1123 .addReg(ScratchReg, RegState::Kill) 1124 .addReg(SPReg); 1125 1126 if (ScratchReg == PPC::R0) { 1127 // R0 cannot be used as a base register, but it can be used as an 1128 // index in a store-indexed. 1129 int LastOffset = 0; 1130 if (HasFP) { 1131 // R0 += (FPOffset-LastOffset). 1132 // Need addic, since addi treats R0 as 0. 1133 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1134 .addReg(ScratchReg) 1135 .addImm(FPOffset-LastOffset); 1136 LastOffset = FPOffset; 1137 // Store FP into *R0. 1138 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1139 .addReg(FPReg, RegState::Kill) // Save FP. 1140 .addReg(PPC::ZERO) 1141 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1142 } 1143 if (FI->usesPICBase()) { 1144 // R0 += (PBPOffset-LastOffset). 1145 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1146 .addReg(ScratchReg) 1147 .addImm(PBPOffset-LastOffset); 1148 LastOffset = PBPOffset; 1149 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1150 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1151 .addReg(PPC::ZERO) 1152 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1153 } 1154 if (HasBP) { 1155 // R0 += (BPOffset-LastOffset). 1156 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1157 .addReg(ScratchReg) 1158 .addImm(BPOffset-LastOffset); 1159 LastOffset = BPOffset; 1160 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1161 .addReg(BPReg, RegState::Kill) // Save BP. 1162 .addReg(PPC::ZERO) 1163 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1164 // BP = R0-LastOffset 1165 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1166 .addReg(ScratchReg, RegState::Kill) 1167 .addImm(-LastOffset); 1168 } 1169 } else { 1170 // ScratchReg is not R0, so use it as the base register. It is 1171 // already set to the old SP, so we can use the offsets directly. 1172 1173 // Now that the stack frame has been allocated, save all the necessary 1174 // registers using ScratchReg as the base address. 1175 if (HasFP) 1176 BuildMI(MBB, MBBI, dl, StoreInst) 1177 .addReg(FPReg) 1178 .addImm(FPOffset) 1179 .addReg(ScratchReg); 1180 if (FI->usesPICBase()) 1181 BuildMI(MBB, MBBI, dl, StoreInst) 1182 .addReg(PPC::R30) 1183 .addImm(PBPOffset) 1184 .addReg(ScratchReg); 1185 if (HasBP) { 1186 BuildMI(MBB, MBBI, dl, StoreInst) 1187 .addReg(BPReg) 1188 .addImm(BPOffset) 1189 .addReg(ScratchReg); 1190 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1191 .addReg(ScratchReg, RegState::Kill) 1192 .addReg(ScratchReg); 1193 } 1194 } 1195 } else { 1196 // The frame size is a known 16-bit constant (fitting in the immediate 1197 // field of STWU). To be here we have to be compiling for PPC32. 1198 // Since the SPReg has been decreased by FrameSize, add it back to each 1199 // offset. 1200 if (HasFP) 1201 BuildMI(MBB, MBBI, dl, StoreInst) 1202 .addReg(FPReg) 1203 .addImm(FrameSize + FPOffset) 1204 .addReg(SPReg); 1205 if (FI->usesPICBase()) 1206 BuildMI(MBB, MBBI, dl, StoreInst) 1207 .addReg(PPC::R30) 1208 .addImm(FrameSize + PBPOffset) 1209 .addReg(SPReg); 1210 if (HasBP) { 1211 BuildMI(MBB, MBBI, dl, StoreInst) 1212 .addReg(BPReg) 1213 .addImm(FrameSize + BPOffset) 1214 .addReg(SPReg); 1215 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1216 .addReg(SPReg) 1217 .addImm(FrameSize); 1218 } 1219 } 1220 } 1221 1222 // Add Call Frame Information for the instructions we generated above. 1223 if (needsCFI) { 1224 unsigned CFIIndex; 1225 1226 if (HasBP) { 1227 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1228 // because if the stack needed aligning then CFA won't be at a fixed 1229 // offset from FP/SP. 1230 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1231 CFIIndex = MF.addFrameInst( 1232 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1233 } else { 1234 // Adjust the definition of CFA to account for the change in SP. 1235 assert(NegFrameSize); 1236 CFIIndex = MF.addFrameInst( 1237 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); 1238 } 1239 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1240 .addCFIIndex(CFIIndex); 1241 1242 if (HasFP) { 1243 // Describe where FP was saved, at a fixed offset from CFA. 1244 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1245 CFIIndex = MF.addFrameInst( 1246 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1247 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1248 .addCFIIndex(CFIIndex); 1249 } 1250 1251 if (FI->usesPICBase()) { 1252 // Describe where FP was saved, at a fixed offset from CFA. 1253 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1254 CFIIndex = MF.addFrameInst( 1255 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1256 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1257 .addCFIIndex(CFIIndex); 1258 } 1259 1260 if (HasBP) { 1261 // Describe where BP was saved, at a fixed offset from CFA. 1262 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1263 CFIIndex = MF.addFrameInst( 1264 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1265 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1266 .addCFIIndex(CFIIndex); 1267 } 1268 1269 if (MustSaveLR) { 1270 // Describe where LR was saved, at a fixed offset from CFA. 1271 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1272 CFIIndex = MF.addFrameInst( 1273 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1274 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1275 .addCFIIndex(CFIIndex); 1276 } 1277 } 1278 1279 // If there is a frame pointer, copy R1 into R31 1280 if (HasFP) { 1281 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1282 .addReg(SPReg) 1283 .addReg(SPReg); 1284 1285 if (!HasBP && needsCFI) { 1286 // Change the definition of CFA from SP+offset to FP+offset, because SP 1287 // will change at every alloca. 1288 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1289 unsigned CFIIndex = MF.addFrameInst( 1290 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1291 1292 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1293 .addCFIIndex(CFIIndex); 1294 } 1295 } 1296 1297 if (needsCFI) { 1298 // Describe where callee saved registers were saved, at fixed offsets from 1299 // CFA. 1300 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1301 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1302 unsigned Reg = CSI[I].getReg(); 1303 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1304 1305 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1306 // subregisters of CR2. We just need to emit a move of CR2. 1307 if (PPC::CRBITRCRegClass.contains(Reg)) 1308 continue; 1309 1310 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1311 continue; 1312 1313 // For SVR4, don't emit a move for the CR spill slot if we haven't 1314 // spilled CRs. 1315 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1316 && !MustSaveCR) 1317 continue; 1318 1319 // For 64-bit SVR4 when we have spilled CRs, the spill location 1320 // is SP+8, not a frame-relative slot. 1321 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1322 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1323 // the whole CR word. In the ELFv2 ABI, every CR that was 1324 // actually saved gets its own CFI record. 1325 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1326 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1327 nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); 1328 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1329 .addCFIIndex(CFIIndex); 1330 continue; 1331 } 1332 1333 if (CSI[I].isSpilledToReg()) { 1334 unsigned SpilledReg = CSI[I].getDstReg(); 1335 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1336 nullptr, MRI->getDwarfRegNum(Reg, true), 1337 MRI->getDwarfRegNum(SpilledReg, true))); 1338 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1339 .addCFIIndex(CFIRegister); 1340 } else { 1341 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1342 // We have changed the object offset above but we do not want to change 1343 // the actual offsets in the CFI instruction so we have to undo the 1344 // offset change here. 1345 if (MovingStackUpdateDown) 1346 Offset -= NegFrameSize; 1347 1348 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1349 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1350 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1351 .addCFIIndex(CFIIndex); 1352 } 1353 } 1354 } 1355 } 1356 1357 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1358 MachineBasicBlock &MBB) const { 1359 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1360 DebugLoc dl; 1361 1362 if (MBBI != MBB.end()) 1363 dl = MBBI->getDebugLoc(); 1364 1365 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1366 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1367 1368 // Get alignment info so we know how to restore the SP. 1369 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1370 1371 // Get the number of bytes allocated from the FrameInfo. 1372 int FrameSize = MFI.getStackSize(); 1373 1374 // Get processor type. 1375 bool isPPC64 = Subtarget.isPPC64(); 1376 // Get the ABI. 1377 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1378 1379 // Check if the link register (LR) has been saved. 1380 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1381 bool MustSaveLR = FI->mustSaveLR(); 1382 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 1383 bool MustSaveCR = !MustSaveCRs.empty(); 1384 // Do we have a frame pointer and/or base pointer for this function? 1385 bool HasFP = hasFP(MF); 1386 bool HasBP = RegInfo->hasBasePointer(MF); 1387 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1388 1389 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1390 unsigned BPReg = RegInfo->getBaseRegister(MF); 1391 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1392 unsigned ScratchReg = 0; 1393 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1394 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1395 : PPC::MTLR ); 1396 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1397 : PPC::LWZ ); 1398 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1399 : PPC::LIS ); 1400 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1401 : PPC::OR ); 1402 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1403 : PPC::ORI ); 1404 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1405 : PPC::ADDI ); 1406 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1407 : PPC::ADD4 ); 1408 1409 int LROffset = getReturnSaveOffset(); 1410 1411 int FPOffset = 0; 1412 1413 // Using the same bool variable as below to suppress compiler warnings. 1414 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1415 &TempReg); 1416 assert(SingleScratchReg && 1417 "Could not find an available scratch register"); 1418 1419 SingleScratchReg = ScratchReg == TempReg; 1420 1421 if (HasFP) { 1422 if (isSVR4ABI) { 1423 int FPIndex = FI->getFramePointerSaveIndex(); 1424 assert(FPIndex && "No Frame Pointer Save Slot!"); 1425 FPOffset = MFI.getObjectOffset(FPIndex); 1426 } else { 1427 FPOffset = getFramePointerSaveOffset(); 1428 } 1429 } 1430 1431 int BPOffset = 0; 1432 if (HasBP) { 1433 if (isSVR4ABI) { 1434 int BPIndex = FI->getBasePointerSaveIndex(); 1435 assert(BPIndex && "No Base Pointer Save Slot!"); 1436 BPOffset = MFI.getObjectOffset(BPIndex); 1437 } else { 1438 BPOffset = getBasePointerSaveOffset(); 1439 } 1440 } 1441 1442 int PBPOffset = 0; 1443 if (FI->usesPICBase()) { 1444 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1445 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1446 PBPOffset = MFI.getObjectOffset(PBPIndex); 1447 } 1448 1449 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1450 1451 if (IsReturnBlock) { 1452 unsigned RetOpcode = MBBI->getOpcode(); 1453 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1454 RetOpcode == PPC::TCRETURNdi || 1455 RetOpcode == PPC::TCRETURNai || 1456 RetOpcode == PPC::TCRETURNri8 || 1457 RetOpcode == PPC::TCRETURNdi8 || 1458 RetOpcode == PPC::TCRETURNai8; 1459 1460 if (UsesTCRet) { 1461 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1462 MachineOperand &StackAdjust = MBBI->getOperand(1); 1463 assert(StackAdjust.isImm() && "Expecting immediate value."); 1464 // Adjust stack pointer. 1465 int StackAdj = StackAdjust.getImm(); 1466 int Delta = StackAdj - MaxTCRetDelta; 1467 assert((Delta >= 0) && "Delta must be positive"); 1468 if (MaxTCRetDelta>0) 1469 FrameSize += (StackAdj +Delta); 1470 else 1471 FrameSize += StackAdj; 1472 } 1473 } 1474 1475 // Frames of 32KB & larger require special handling because they cannot be 1476 // indexed into with a simple LD/LWZ immediate offset operand. 1477 bool isLargeFrame = !isInt<16>(FrameSize); 1478 1479 // On targets without red zone, the SP needs to be restored last, so that 1480 // all live contents of the stack frame are upwards of the SP. This means 1481 // that we cannot restore SP just now, since there may be more registers 1482 // to restore from the stack frame (e.g. R31). If the frame size is not 1483 // a simple immediate value, we will need a spare register to hold the 1484 // restored SP. If the frame size is known and small, we can simply adjust 1485 // the offsets of the registers to be restored, and still use SP to restore 1486 // them. In such case, the final update of SP will be to add the frame 1487 // size to it. 1488 // To simplify the code, set RBReg to the base register used to restore 1489 // values from the stack, and set SPAdd to the value that needs to be added 1490 // to the SP at the end. The default values are as if red zone was present. 1491 unsigned RBReg = SPReg; 1492 unsigned SPAdd = 0; 1493 1494 // Check if we can move the stack update instruction up the epilogue 1495 // past the callee saves. This will allow the move to LR instruction 1496 // to be executed before the restores of the callee saves which means 1497 // that the callee saves can hide the latency from the MTLR instrcution. 1498 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1499 if (stackUpdateCanBeMoved(MF)) { 1500 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1501 for (CalleeSavedInfo CSI : Info) { 1502 int FrIdx = CSI.getFrameIdx(); 1503 // If the frame index is not negative the callee saved info belongs to a 1504 // stack object that is not a fixed stack object. We ignore non-fixed 1505 // stack objects because we won't move the update of the stack pointer 1506 // past them. 1507 if (FrIdx >= 0) 1508 continue; 1509 1510 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1511 StackUpdateLoc--; 1512 else { 1513 // Abort the operation as we can't update all CSR restores. 1514 StackUpdateLoc = MBBI; 1515 break; 1516 } 1517 } 1518 } 1519 1520 if (FrameSize) { 1521 // In the prologue, the loaded (or persistent) stack pointer value is 1522 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1523 // zone add this offset back now. 1524 1525 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1526 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1527 // call which invalidates the stack pointer value in SP(0). So we use the 1528 // value of R31 in this case. 1529 if (FI->hasFastCall()) { 1530 assert(HasFP && "Expecting a valid frame pointer."); 1531 if (!HasRedZone) 1532 RBReg = FPReg; 1533 if (!isLargeFrame) { 1534 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1535 .addReg(FPReg).addImm(FrameSize); 1536 } else { 1537 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1538 .addImm(FrameSize >> 16); 1539 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1540 .addReg(ScratchReg, RegState::Kill) 1541 .addImm(FrameSize & 0xFFFF); 1542 BuildMI(MBB, MBBI, dl, AddInst) 1543 .addReg(RBReg) 1544 .addReg(FPReg) 1545 .addReg(ScratchReg); 1546 } 1547 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1548 if (HasRedZone) { 1549 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1550 .addReg(SPReg) 1551 .addImm(FrameSize); 1552 } else { 1553 // Make sure that adding FrameSize will not overflow the max offset 1554 // size. 1555 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1556 "Local offsets should be negative"); 1557 SPAdd = FrameSize; 1558 FPOffset += FrameSize; 1559 BPOffset += FrameSize; 1560 PBPOffset += FrameSize; 1561 } 1562 } else { 1563 // We don't want to use ScratchReg as a base register, because it 1564 // could happen to be R0. Use FP instead, but make sure to preserve it. 1565 if (!HasRedZone) { 1566 // If FP is not saved, copy it to ScratchReg. 1567 if (!HasFP) 1568 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1569 .addReg(FPReg) 1570 .addReg(FPReg); 1571 RBReg = FPReg; 1572 } 1573 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1574 .addImm(0) 1575 .addReg(SPReg); 1576 } 1577 } 1578 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1579 // If there is no red zone, ScratchReg may be needed for holding a useful 1580 // value (although not the base register). Make sure it is not overwritten 1581 // too early. 1582 1583 assert((isPPC64 || !MustSaveCR) && 1584 "Epilogue CR restoring supported only in 64-bit mode"); 1585 1586 // If we need to restore both the LR and the CR and we only have one 1587 // available scratch register, we must do them one at a time. 1588 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1589 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1590 // is live here. 1591 assert(HasRedZone && "Expecting red zone"); 1592 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1593 .addImm(8) 1594 .addReg(SPReg); 1595 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1596 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1597 .addReg(TempReg, getKillRegState(i == e-1)); 1598 } 1599 1600 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1601 // LR is stored in the caller's stack frame. ScratchReg will be needed 1602 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1603 // a base register anyway, because it may happen to be R0. 1604 bool LoadedLR = false; 1605 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1606 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1607 .addImm(LROffset+SPAdd) 1608 .addReg(RBReg); 1609 LoadedLR = true; 1610 } 1611 1612 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1613 // This will only occur for PPC64. 1614 assert(isPPC64 && "Expecting 64-bit mode"); 1615 assert(RBReg == SPReg && "Should be using SP as a base register"); 1616 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1617 .addImm(8) 1618 .addReg(RBReg); 1619 } 1620 1621 if (HasFP) { 1622 // If there is red zone, restore FP directly, since SP has already been 1623 // restored. Otherwise, restore the value of FP into ScratchReg. 1624 if (HasRedZone || RBReg == SPReg) 1625 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1626 .addImm(FPOffset) 1627 .addReg(SPReg); 1628 else 1629 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1630 .addImm(FPOffset) 1631 .addReg(RBReg); 1632 } 1633 1634 if (FI->usesPICBase()) 1635 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1636 .addImm(PBPOffset) 1637 .addReg(RBReg); 1638 1639 if (HasBP) 1640 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1641 .addImm(BPOffset) 1642 .addReg(RBReg); 1643 1644 // There is nothing more to be loaded from the stack, so now we can 1645 // restore SP: SP = RBReg + SPAdd. 1646 if (RBReg != SPReg || SPAdd != 0) { 1647 assert(!HasRedZone && "This should not happen with red zone"); 1648 // If SPAdd is 0, generate a copy. 1649 if (SPAdd == 0) 1650 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1651 .addReg(RBReg) 1652 .addReg(RBReg); 1653 else 1654 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1655 .addReg(RBReg) 1656 .addImm(SPAdd); 1657 1658 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1659 if (RBReg == FPReg) 1660 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1661 .addReg(ScratchReg) 1662 .addReg(ScratchReg); 1663 1664 // Now load the LR from the caller's stack frame. 1665 if (MustSaveLR && !LoadedLR) 1666 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1667 .addImm(LROffset) 1668 .addReg(SPReg); 1669 } 1670 1671 if (MustSaveCR && 1672 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 1673 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1674 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1675 .addReg(TempReg, getKillRegState(i == e-1)); 1676 1677 if (MustSaveLR) 1678 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1679 1680 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1681 // call optimization 1682 if (IsReturnBlock) { 1683 unsigned RetOpcode = MBBI->getOpcode(); 1684 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1685 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1686 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1687 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1688 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1689 1690 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1691 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1692 .addReg(SPReg).addImm(CallerAllocatedAmt); 1693 } else { 1694 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1695 .addImm(CallerAllocatedAmt >> 16); 1696 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1697 .addReg(ScratchReg, RegState::Kill) 1698 .addImm(CallerAllocatedAmt & 0xFFFF); 1699 BuildMI(MBB, MBBI, dl, AddInst) 1700 .addReg(SPReg) 1701 .addReg(FPReg) 1702 .addReg(ScratchReg); 1703 } 1704 } else { 1705 createTailCallBranchInstr(MBB); 1706 } 1707 } 1708 } 1709 1710 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1711 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1712 1713 // If we got this far a first terminator should exist. 1714 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1715 1716 DebugLoc dl = MBBI->getDebugLoc(); 1717 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1718 1719 // Create branch instruction for pseudo tail call return instruction 1720 unsigned RetOpcode = MBBI->getOpcode(); 1721 if (RetOpcode == PPC::TCRETURNdi) { 1722 MBBI = MBB.getLastNonDebugInstr(); 1723 MachineOperand &JumpTarget = MBBI->getOperand(0); 1724 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1725 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1726 } else if (RetOpcode == PPC::TCRETURNri) { 1727 MBBI = MBB.getLastNonDebugInstr(); 1728 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1729 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1730 } else if (RetOpcode == PPC::TCRETURNai) { 1731 MBBI = MBB.getLastNonDebugInstr(); 1732 MachineOperand &JumpTarget = MBBI->getOperand(0); 1733 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1734 } else if (RetOpcode == PPC::TCRETURNdi8) { 1735 MBBI = MBB.getLastNonDebugInstr(); 1736 MachineOperand &JumpTarget = MBBI->getOperand(0); 1737 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1738 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1739 } else if (RetOpcode == PPC::TCRETURNri8) { 1740 MBBI = MBB.getLastNonDebugInstr(); 1741 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1742 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1743 } else if (RetOpcode == PPC::TCRETURNai8) { 1744 MBBI = MBB.getLastNonDebugInstr(); 1745 MachineOperand &JumpTarget = MBBI->getOperand(0); 1746 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1747 } 1748 } 1749 1750 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1751 BitVector &SavedRegs, 1752 RegScavenger *RS) const { 1753 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1754 1755 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1756 1757 // Save and clear the LR state. 1758 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1759 unsigned LR = RegInfo->getRARegister(); 1760 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1761 SavedRegs.reset(LR); 1762 1763 // Save R31 if necessary 1764 int FPSI = FI->getFramePointerSaveIndex(); 1765 bool isPPC64 = Subtarget.isPPC64(); 1766 bool isDarwinABI = Subtarget.isDarwinABI(); 1767 MachineFrameInfo &MFI = MF.getFrameInfo(); 1768 1769 // If the frame pointer save index hasn't been defined yet. 1770 if (!FPSI && needsFP(MF)) { 1771 // Find out what the fix offset of the frame pointer save area. 1772 int FPOffset = getFramePointerSaveOffset(); 1773 // Allocate the frame index for frame pointer save area. 1774 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1775 // Save the result. 1776 FI->setFramePointerSaveIndex(FPSI); 1777 } 1778 1779 int BPSI = FI->getBasePointerSaveIndex(); 1780 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1781 int BPOffset = getBasePointerSaveOffset(); 1782 // Allocate the frame index for the base pointer save area. 1783 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1784 // Save the result. 1785 FI->setBasePointerSaveIndex(BPSI); 1786 } 1787 1788 // Reserve stack space for the PIC Base register (R30). 1789 // Only used in SVR4 32-bit. 1790 if (FI->usesPICBase()) { 1791 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1792 FI->setPICBasePointerSaveIndex(PBPSI); 1793 } 1794 1795 // Make sure we don't explicitly spill r31, because, for example, we have 1796 // some inline asm which explicitly clobbers it, when we otherwise have a 1797 // frame pointer and are using r31's spill slot for the prologue/epilogue 1798 // code. Same goes for the base pointer and the PIC base register. 1799 if (needsFP(MF)) 1800 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1801 if (RegInfo->hasBasePointer(MF)) 1802 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1803 if (FI->usesPICBase()) 1804 SavedRegs.reset(PPC::R30); 1805 1806 // Reserve stack space to move the linkage area to in case of a tail call. 1807 int TCSPDelta = 0; 1808 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1809 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1810 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1811 } 1812 1813 // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the 1814 // function uses CR 2, 3, or 4. 1815 if (!isPPC64 && !isDarwinABI && 1816 (SavedRegs.test(PPC::CR2) || 1817 SavedRegs.test(PPC::CR3) || 1818 SavedRegs.test(PPC::CR4))) { 1819 int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true); 1820 FI->setCRSpillFrameIndex(FrameIdx); 1821 } 1822 } 1823 1824 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1825 RegScavenger *RS) const { 1826 // Early exit if not using the SVR4 ABI. 1827 if (!Subtarget.isSVR4ABI()) { 1828 addScavengingSpillSlot(MF, RS); 1829 return; 1830 } 1831 1832 // Get callee saved register information. 1833 MachineFrameInfo &MFI = MF.getFrameInfo(); 1834 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1835 1836 // If the function is shrink-wrapped, and if the function has a tail call, the 1837 // tail call might not be in the new RestoreBlock, so real branch instruction 1838 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1839 // RestoreBlock. So we handle this case here. 1840 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1841 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1842 for (MachineBasicBlock &MBB : MF) { 1843 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1844 createTailCallBranchInstr(MBB); 1845 } 1846 } 1847 1848 // Early exit if no callee saved registers are modified! 1849 if (CSI.empty() && !needsFP(MF)) { 1850 addScavengingSpillSlot(MF, RS); 1851 return; 1852 } 1853 1854 unsigned MinGPR = PPC::R31; 1855 unsigned MinG8R = PPC::X31; 1856 unsigned MinFPR = PPC::F31; 1857 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1858 1859 bool HasGPSaveArea = false; 1860 bool HasG8SaveArea = false; 1861 bool HasFPSaveArea = false; 1862 bool HasVRSAVESaveArea = false; 1863 bool HasVRSaveArea = false; 1864 1865 SmallVector<CalleeSavedInfo, 18> GPRegs; 1866 SmallVector<CalleeSavedInfo, 18> G8Regs; 1867 SmallVector<CalleeSavedInfo, 18> FPRegs; 1868 SmallVector<CalleeSavedInfo, 18> VRegs; 1869 1870 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1871 unsigned Reg = CSI[i].getReg(); 1872 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 1873 (Reg != PPC::X2 && Reg != PPC::R2)) && 1874 "Not expecting to try to spill R2 in a function that must save TOC"); 1875 if (PPC::GPRCRegClass.contains(Reg) || 1876 PPC::SPE4RCRegClass.contains(Reg)) { 1877 HasGPSaveArea = true; 1878 1879 GPRegs.push_back(CSI[i]); 1880 1881 if (Reg < MinGPR) { 1882 MinGPR = Reg; 1883 } 1884 } else if (PPC::G8RCRegClass.contains(Reg)) { 1885 HasG8SaveArea = true; 1886 1887 G8Regs.push_back(CSI[i]); 1888 1889 if (Reg < MinG8R) { 1890 MinG8R = Reg; 1891 } 1892 } else if (PPC::F8RCRegClass.contains(Reg)) { 1893 HasFPSaveArea = true; 1894 1895 FPRegs.push_back(CSI[i]); 1896 1897 if (Reg < MinFPR) { 1898 MinFPR = Reg; 1899 } 1900 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1901 PPC::CRRCRegClass.contains(Reg)) { 1902 ; // do nothing, as we already know whether CRs are spilled 1903 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1904 HasVRSAVESaveArea = true; 1905 } else if (PPC::VRRCRegClass.contains(Reg) || 1906 PPC::SPERCRegClass.contains(Reg)) { 1907 // Altivec and SPE are mutually exclusive, but have the same stack 1908 // alignment requirements, so overload the save area for both cases. 1909 HasVRSaveArea = true; 1910 1911 VRegs.push_back(CSI[i]); 1912 1913 if (Reg < MinVR) { 1914 MinVR = Reg; 1915 } 1916 } else { 1917 llvm_unreachable("Unknown RegisterClass!"); 1918 } 1919 } 1920 1921 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1922 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1923 1924 int64_t LowerBound = 0; 1925 1926 // Take into account stack space reserved for tail calls. 1927 int TCSPDelta = 0; 1928 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1929 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1930 LowerBound = TCSPDelta; 1931 } 1932 1933 // The Floating-point register save area is right below the back chain word 1934 // of the previous stack frame. 1935 if (HasFPSaveArea) { 1936 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1937 int FI = FPRegs[i].getFrameIdx(); 1938 1939 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1940 } 1941 1942 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1943 } 1944 1945 // Check whether the frame pointer register is allocated. If so, make sure it 1946 // is spilled to the correct offset. 1947 if (needsFP(MF)) { 1948 int FI = PFI->getFramePointerSaveIndex(); 1949 assert(FI && "No Frame Pointer Save Slot!"); 1950 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1951 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1952 HasGPSaveArea = true; 1953 } 1954 1955 if (PFI->usesPICBase()) { 1956 int FI = PFI->getPICBasePointerSaveIndex(); 1957 assert(FI && "No PIC Base Pointer Save Slot!"); 1958 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1959 1960 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1961 HasGPSaveArea = true; 1962 } 1963 1964 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1965 if (RegInfo->hasBasePointer(MF)) { 1966 int FI = PFI->getBasePointerSaveIndex(); 1967 assert(FI && "No Base Pointer Save Slot!"); 1968 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1969 1970 unsigned BP = RegInfo->getBaseRegister(MF); 1971 if (PPC::G8RCRegClass.contains(BP)) { 1972 MinG8R = std::min<unsigned>(MinG8R, BP); 1973 HasG8SaveArea = true; 1974 } else if (PPC::GPRCRegClass.contains(BP)) { 1975 MinGPR = std::min<unsigned>(MinGPR, BP); 1976 HasGPSaveArea = true; 1977 } 1978 } 1979 1980 // General register save area starts right below the Floating-point 1981 // register save area. 1982 if (HasGPSaveArea || HasG8SaveArea) { 1983 // Move general register save area spill slots down, taking into account 1984 // the size of the Floating-point register save area. 1985 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1986 if (!GPRegs[i].isSpilledToReg()) { 1987 int FI = GPRegs[i].getFrameIdx(); 1988 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1989 } 1990 } 1991 1992 // Move general register save area spill slots down, taking into account 1993 // the size of the Floating-point register save area. 1994 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1995 if (!G8Regs[i].isSpilledToReg()) { 1996 int FI = G8Regs[i].getFrameIdx(); 1997 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1998 } 1999 } 2000 2001 unsigned MinReg = 2002 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2003 TRI->getEncodingValue(MinG8R)); 2004 2005 if (Subtarget.isPPC64()) { 2006 LowerBound -= (31 - MinReg + 1) * 8; 2007 } else { 2008 LowerBound -= (31 - MinReg + 1) * 4; 2009 } 2010 } 2011 2012 // For 32-bit only, the CR save area is below the general register 2013 // save area. For 64-bit SVR4, the CR save area is addressed relative 2014 // to the stack pointer and hence does not need an adjustment here. 2015 // Only CR2 (the first nonvolatile spilled) has an associated frame 2016 // index so that we have a single uniform save area. 2017 if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { 2018 // Adjust the frame index of the CR spill slot. 2019 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2020 unsigned Reg = CSI[i].getReg(); 2021 2022 if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) 2023 // Leave Darwin logic as-is. 2024 || (!Subtarget.isSVR4ABI() && 2025 (PPC::CRBITRCRegClass.contains(Reg) || 2026 PPC::CRRCRegClass.contains(Reg)))) { 2027 int FI = CSI[i].getFrameIdx(); 2028 2029 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2030 } 2031 } 2032 2033 LowerBound -= 4; // The CR save area is always 4 bytes long. 2034 } 2035 2036 if (HasVRSAVESaveArea) { 2037 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 2038 // which have the VRSAVE register class? 2039 // Adjust the frame index of the VRSAVE spill slot. 2040 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2041 unsigned Reg = CSI[i].getReg(); 2042 2043 if (PPC::VRSAVERCRegClass.contains(Reg)) { 2044 int FI = CSI[i].getFrameIdx(); 2045 2046 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2047 } 2048 } 2049 2050 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 2051 } 2052 2053 // Both Altivec and SPE have the same alignment and padding requirements 2054 // within the stack frame. 2055 if (HasVRSaveArea) { 2056 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2057 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2058 // we are using negative number here (the stack grows downward). We should 2059 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2060 // is the alignment size ( n = 16 here) and y is the size after aligning. 2061 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2062 LowerBound &= ~(15); 2063 2064 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2065 int FI = VRegs[i].getFrameIdx(); 2066 2067 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2068 } 2069 } 2070 2071 addScavengingSpillSlot(MF, RS); 2072 } 2073 2074 void 2075 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2076 RegScavenger *RS) const { 2077 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2078 // a large stack, which will require scavenging a register to materialize a 2079 // large offset. 2080 2081 // We need to have a scavenger spill slot for spills if the frame size is 2082 // large. In case there is no free register for large-offset addressing, 2083 // this slot is used for the necessary emergency spill. Also, we need the 2084 // slot for dynamic stack allocations. 2085 2086 // The scavenger might be invoked if the frame offset does not fit into 2087 // the 16-bit immediate. We don't know the complete frame size here 2088 // because we've not yet computed callee-saved register spills or the 2089 // needed alignment padding. 2090 unsigned StackSize = determineFrameLayout(MF, true); 2091 MachineFrameInfo &MFI = MF.getFrameInfo(); 2092 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 2093 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 2094 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2095 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2096 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2097 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2098 unsigned Size = TRI.getSpillSize(RC); 2099 unsigned Align = TRI.getSpillAlignment(RC); 2100 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2101 2102 // Might we have over-aligned allocas? 2103 bool HasAlVars = MFI.hasVarSizedObjects() && 2104 MFI.getMaxAlignment() > getStackAlignment(); 2105 2106 // These kinds of spills might need two registers. 2107 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 2108 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2109 2110 } 2111 } 2112 2113 // This function checks if a callee saved gpr can be spilled to a volatile 2114 // vector register. This occurs for leaf functions when the option 2115 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2116 // which were not spilled to vectors, return false so the target independent 2117 // code can handle them by assigning a FrameIdx to a stack slot. 2118 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2119 MachineFunction &MF, const TargetRegisterInfo *TRI, 2120 std::vector<CalleeSavedInfo> &CSI) const { 2121 2122 if (CSI.empty()) 2123 return true; // Early exit if no callee saved registers are modified! 2124 2125 // Early exit if cannot spill gprs to volatile vector registers. 2126 MachineFrameInfo &MFI = MF.getFrameInfo(); 2127 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2128 return false; 2129 2130 // Build a BitVector of VSRs that can be used for spilling GPRs. 2131 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2132 BitVector BVCalleeSaved(TRI->getNumRegs()); 2133 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2134 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2135 for (unsigned i = 0; CSRegs[i]; ++i) 2136 BVCalleeSaved.set(CSRegs[i]); 2137 2138 for (unsigned Reg : BVAllocatable.set_bits()) { 2139 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2140 // used in the function. 2141 if (BVCalleeSaved[Reg] || 2142 (!PPC::F8RCRegClass.contains(Reg) && 2143 !PPC::VFRCRegClass.contains(Reg)) || 2144 (MF.getRegInfo().isPhysRegUsed(Reg))) 2145 BVAllocatable.reset(Reg); 2146 } 2147 2148 bool AllSpilledToReg = true; 2149 for (auto &CS : CSI) { 2150 if (BVAllocatable.none()) 2151 return false; 2152 2153 unsigned Reg = CS.getReg(); 2154 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2155 AllSpilledToReg = false; 2156 continue; 2157 } 2158 2159 unsigned VolatileVFReg = BVAllocatable.find_first(); 2160 if (VolatileVFReg < BVAllocatable.size()) { 2161 CS.setDstReg(VolatileVFReg); 2162 BVAllocatable.reset(VolatileVFReg); 2163 } else { 2164 AllSpilledToReg = false; 2165 } 2166 } 2167 return AllSpilledToReg; 2168 } 2169 2170 2171 bool 2172 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 2173 MachineBasicBlock::iterator MI, 2174 const std::vector<CalleeSavedInfo> &CSI, 2175 const TargetRegisterInfo *TRI) const { 2176 2177 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2178 // Return false otherwise to maintain pre-existing behavior. 2179 if (!Subtarget.isSVR4ABI()) 2180 return false; 2181 2182 MachineFunction *MF = MBB.getParent(); 2183 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2184 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2185 bool MustSaveTOC = FI->mustSaveTOC(); 2186 DebugLoc DL; 2187 bool CRSpilled = false; 2188 MachineInstrBuilder CRMIB; 2189 2190 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2191 unsigned Reg = CSI[i].getReg(); 2192 // Only Darwin actually uses the VRSAVE register, but it can still appear 2193 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 2194 // Darwin, ignore it. 2195 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 2196 continue; 2197 2198 // CR2 through CR4 are the nonvolatile CR fields. 2199 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2200 2201 // Add the callee-saved register as live-in; it's killed at the spill. 2202 // Do not do this for callee-saved registers that are live-in to the 2203 // function because they will already be marked live-in and this will be 2204 // adding it for a second time. It is an error to add the same register 2205 // to the set more than once. 2206 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2207 bool IsLiveIn = MRI.isLiveIn(Reg); 2208 if (!IsLiveIn) 2209 MBB.addLiveIn(Reg); 2210 2211 if (CRSpilled && IsCRField) { 2212 CRMIB.addReg(Reg, RegState::ImplicitKill); 2213 continue; 2214 } 2215 2216 // The actual spill will happen in the prologue. 2217 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2218 continue; 2219 2220 // Insert the spill to the stack frame. 2221 if (IsCRField) { 2222 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2223 if (Subtarget.isPPC64()) { 2224 // The actual spill will happen at the start of the prologue. 2225 FuncInfo->addMustSaveCR(Reg); 2226 } else { 2227 CRSpilled = true; 2228 FuncInfo->setSpillsCR(); 2229 2230 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2231 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2232 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2233 .addReg(Reg, RegState::ImplicitKill); 2234 2235 MBB.insert(MI, CRMIB); 2236 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2237 .addReg(PPC::R12, 2238 getKillRegState(true)), 2239 CSI[i].getFrameIdx())); 2240 } 2241 } else { 2242 if (CSI[i].isSpilledToReg()) { 2243 NumPESpillVSR++; 2244 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2245 .addReg(Reg, getKillRegState(true)); 2246 } else { 2247 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2248 // Use !IsLiveIn for the kill flag. 2249 // We do not want to kill registers that are live in this function 2250 // before their use because they will become undefined registers. 2251 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, 2252 CSI[i].getFrameIdx(), RC, TRI); 2253 } 2254 } 2255 } 2256 return true; 2257 } 2258 2259 static void 2260 restoreCRs(bool isPPC64, bool is31, 2261 bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, 2262 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2263 const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { 2264 2265 MachineFunction *MF = MBB.getParent(); 2266 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2267 DebugLoc DL; 2268 unsigned RestoreOp, MoveReg; 2269 2270 if (isPPC64) 2271 // This is handled during epilogue generation. 2272 return; 2273 else { 2274 // 32-bit: FP-relative 2275 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), 2276 PPC::R12), 2277 CSI[CSIIndex].getFrameIdx())); 2278 RestoreOp = PPC::MTOCRF; 2279 MoveReg = PPC::R12; 2280 } 2281 2282 if (CR2Spilled) 2283 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2284 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2285 2286 if (CR3Spilled) 2287 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2288 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2289 2290 if (CR4Spilled) 2291 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2292 .addReg(MoveReg, getKillRegState(true))); 2293 } 2294 2295 MachineBasicBlock::iterator PPCFrameLowering:: 2296 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2297 MachineBasicBlock::iterator I) const { 2298 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2299 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2300 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2301 // Add (actually subtract) back the amount the callee popped on return. 2302 if (int CalleeAmt = I->getOperand(1).getImm()) { 2303 bool is64Bit = Subtarget.isPPC64(); 2304 CalleeAmt *= -1; 2305 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2306 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2307 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2308 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2309 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2310 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2311 const DebugLoc &dl = I->getDebugLoc(); 2312 2313 if (isInt<16>(CalleeAmt)) { 2314 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2315 .addReg(StackReg, RegState::Kill) 2316 .addImm(CalleeAmt); 2317 } else { 2318 MachineBasicBlock::iterator MBBI = I; 2319 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2320 .addImm(CalleeAmt >> 16); 2321 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2322 .addReg(TmpReg, RegState::Kill) 2323 .addImm(CalleeAmt & 0xFFFF); 2324 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2325 .addReg(StackReg, RegState::Kill) 2326 .addReg(TmpReg); 2327 } 2328 } 2329 } 2330 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2331 return MBB.erase(I); 2332 } 2333 2334 bool 2335 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2336 MachineBasicBlock::iterator MI, 2337 std::vector<CalleeSavedInfo> &CSI, 2338 const TargetRegisterInfo *TRI) const { 2339 2340 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2341 // Return false otherwise to maintain pre-existing behavior. 2342 if (!Subtarget.isSVR4ABI()) 2343 return false; 2344 2345 MachineFunction *MF = MBB.getParent(); 2346 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2347 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2348 bool MustSaveTOC = FI->mustSaveTOC(); 2349 bool CR2Spilled = false; 2350 bool CR3Spilled = false; 2351 bool CR4Spilled = false; 2352 unsigned CSIIndex = 0; 2353 2354 // Initialize insertion-point logic; we will be restoring in reverse 2355 // order of spill. 2356 MachineBasicBlock::iterator I = MI, BeforeI = I; 2357 bool AtStart = I == MBB.begin(); 2358 2359 if (!AtStart) 2360 --BeforeI; 2361 2362 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2363 unsigned Reg = CSI[i].getReg(); 2364 2365 // Only Darwin actually uses the VRSAVE register, but it can still appear 2366 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 2367 // Darwin, ignore it. 2368 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 2369 continue; 2370 2371 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2372 continue; 2373 2374 if (Reg == PPC::CR2) { 2375 CR2Spilled = true; 2376 // The spill slot is associated only with CR2, which is the 2377 // first nonvolatile spilled. Save it here. 2378 CSIIndex = i; 2379 continue; 2380 } else if (Reg == PPC::CR3) { 2381 CR3Spilled = true; 2382 continue; 2383 } else if (Reg == PPC::CR4) { 2384 CR4Spilled = true; 2385 continue; 2386 } else { 2387 // When we first encounter a non-CR register after seeing at 2388 // least one CR register, restore all spilled CRs together. 2389 if ((CR2Spilled || CR3Spilled || CR4Spilled) 2390 && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 2391 bool is31 = needsFP(*MF); 2392 restoreCRs(Subtarget.isPPC64(), is31, 2393 CR2Spilled, CR3Spilled, CR4Spilled, 2394 MBB, I, CSI, CSIIndex); 2395 CR2Spilled = CR3Spilled = CR4Spilled = false; 2396 } 2397 2398 if (CSI[i].isSpilledToReg()) { 2399 DebugLoc DL; 2400 NumPEReloadVSR++; 2401 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2402 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2403 } else { 2404 // Default behavior for non-CR saves. 2405 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2406 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2407 assert(I != MBB.begin() && 2408 "loadRegFromStackSlot didn't insert any code!"); 2409 } 2410 } 2411 2412 // Insert in reverse order. 2413 if (AtStart) 2414 I = MBB.begin(); 2415 else { 2416 I = BeforeI; 2417 ++I; 2418 } 2419 } 2420 2421 // If we haven't yet spilled the CRs, do so now. 2422 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2423 bool is31 = needsFP(*MF); 2424 restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, 2425 MBB, I, CSI, CSIIndex); 2426 } 2427 2428 return true; 2429 } 2430 2431 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2432 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2433 return false; 2434 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2435 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2436 } 2437