1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "MCTargetDesc/PPCPredicates.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/LivePhysRegs.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineModuleInfo.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/RegisterScavenging.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/Target/TargetOptions.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "framelowering" 34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 36 STATISTIC(NumPrologProbed, "Number of prologues probed"); 37 38 static cl::opt<bool> 39 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 40 cl::desc("Enable spills in prologue to vector registers."), 41 cl::init(false), cl::Hidden); 42 43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 44 if (STI.isAIXABI()) 45 return STI.isPPC64() ? 16 : 8; 46 // SVR4 ABI: 47 return STI.isPPC64() ? 16 : 4; 48 } 49 50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 51 if (STI.isAIXABI()) 52 return STI.isPPC64() ? 40 : 20; 53 return STI.isELFv2ABI() ? 24 : 40; 54 } 55 56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 57 // First slot in the general register save area. 58 return STI.isPPC64() ? -8U : -4U; 59 } 60 61 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 62 if (STI.isAIXABI() || STI.isPPC64()) 63 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 64 65 // 32-bit SVR4 ABI: 66 return 8; 67 } 68 69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 70 // Third slot in the general purpose register save area. 71 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 72 return -12U; 73 74 // Second slot in the general purpose register save area. 75 return STI.isPPC64() ? -16U : -8U; 76 } 77 78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 79 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 80 } 81 82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 83 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 84 STI.getPlatformStackAlignment(), 0), 85 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 86 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 87 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 88 LinkageSize(computeLinkageSize(Subtarget)), 89 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 90 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 91 92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 94 unsigned &NumEntries) const { 95 96 // Floating-point register save area offsets. 97 #define CALLEE_SAVED_FPRS \ 98 {PPC::F31, -8}, \ 99 {PPC::F30, -16}, \ 100 {PPC::F29, -24}, \ 101 {PPC::F28, -32}, \ 102 {PPC::F27, -40}, \ 103 {PPC::F26, -48}, \ 104 {PPC::F25, -56}, \ 105 {PPC::F24, -64}, \ 106 {PPC::F23, -72}, \ 107 {PPC::F22, -80}, \ 108 {PPC::F21, -88}, \ 109 {PPC::F20, -96}, \ 110 {PPC::F19, -104}, \ 111 {PPC::F18, -112}, \ 112 {PPC::F17, -120}, \ 113 {PPC::F16, -128}, \ 114 {PPC::F15, -136}, \ 115 {PPC::F14, -144} 116 117 // 32-bit general purpose register save area offsets shared by ELF and 118 // AIX. AIX has an extra CSR with r13. 119 #define CALLEE_SAVED_GPRS32 \ 120 {PPC::R31, -4}, \ 121 {PPC::R30, -8}, \ 122 {PPC::R29, -12}, \ 123 {PPC::R28, -16}, \ 124 {PPC::R27, -20}, \ 125 {PPC::R26, -24}, \ 126 {PPC::R25, -28}, \ 127 {PPC::R24, -32}, \ 128 {PPC::R23, -36}, \ 129 {PPC::R22, -40}, \ 130 {PPC::R21, -44}, \ 131 {PPC::R20, -48}, \ 132 {PPC::R19, -52}, \ 133 {PPC::R18, -56}, \ 134 {PPC::R17, -60}, \ 135 {PPC::R16, -64}, \ 136 {PPC::R15, -68}, \ 137 {PPC::R14, -72} 138 139 // 64-bit general purpose register save area offsets. 140 #define CALLEE_SAVED_GPRS64 \ 141 {PPC::X31, -8}, \ 142 {PPC::X30, -16}, \ 143 {PPC::X29, -24}, \ 144 {PPC::X28, -32}, \ 145 {PPC::X27, -40}, \ 146 {PPC::X26, -48}, \ 147 {PPC::X25, -56}, \ 148 {PPC::X24, -64}, \ 149 {PPC::X23, -72}, \ 150 {PPC::X22, -80}, \ 151 {PPC::X21, -88}, \ 152 {PPC::X20, -96}, \ 153 {PPC::X19, -104}, \ 154 {PPC::X18, -112}, \ 155 {PPC::X17, -120}, \ 156 {PPC::X16, -128}, \ 157 {PPC::X15, -136}, \ 158 {PPC::X14, -144} 159 160 // Vector register save area offsets. 161 #define CALLEE_SAVED_VRS \ 162 {PPC::V31, -16}, \ 163 {PPC::V30, -32}, \ 164 {PPC::V29, -48}, \ 165 {PPC::V28, -64}, \ 166 {PPC::V27, -80}, \ 167 {PPC::V26, -96}, \ 168 {PPC::V25, -112}, \ 169 {PPC::V24, -128}, \ 170 {PPC::V23, -144}, \ 171 {PPC::V22, -160}, \ 172 {PPC::V21, -176}, \ 173 {PPC::V20, -192} 174 175 // Note that the offsets here overlap, but this is fixed up in 176 // processFunctionBeforeFrameFinalized. 177 178 static const SpillSlot ELFOffsets32[] = { 179 CALLEE_SAVED_FPRS, 180 CALLEE_SAVED_GPRS32, 181 182 // CR save area offset. We map each of the nonvolatile CR fields 183 // to the slot for CR2, which is the first of the nonvolatile CR 184 // fields to be assigned, so that we only allocate one save slot. 185 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 186 {PPC::CR2, -4}, 187 188 // VRSAVE save area offset. 189 {PPC::VRSAVE, -4}, 190 191 CALLEE_SAVED_VRS, 192 193 // SPE register save area (overlaps Vector save area). 194 {PPC::S31, -8}, 195 {PPC::S30, -16}, 196 {PPC::S29, -24}, 197 {PPC::S28, -32}, 198 {PPC::S27, -40}, 199 {PPC::S26, -48}, 200 {PPC::S25, -56}, 201 {PPC::S24, -64}, 202 {PPC::S23, -72}, 203 {PPC::S22, -80}, 204 {PPC::S21, -88}, 205 {PPC::S20, -96}, 206 {PPC::S19, -104}, 207 {PPC::S18, -112}, 208 {PPC::S17, -120}, 209 {PPC::S16, -128}, 210 {PPC::S15, -136}, 211 {PPC::S14, -144}}; 212 213 static const SpillSlot ELFOffsets64[] = { 214 CALLEE_SAVED_FPRS, 215 CALLEE_SAVED_GPRS64, 216 217 // VRSAVE save area offset. 218 {PPC::VRSAVE, -4}, 219 CALLEE_SAVED_VRS 220 }; 221 222 static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, 223 CALLEE_SAVED_GPRS32, 224 // Add AIX's extra CSR. 225 {PPC::R13, -76}, 226 CALLEE_SAVED_VRS}; 227 228 static const SpillSlot AIXOffsets64[] = { 229 CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; 230 231 if (Subtarget.is64BitELFABI()) { 232 NumEntries = std::size(ELFOffsets64); 233 return ELFOffsets64; 234 } 235 236 if (Subtarget.is32BitELFABI()) { 237 NumEntries = std::size(ELFOffsets32); 238 return ELFOffsets32; 239 } 240 241 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 242 243 if (Subtarget.isPPC64()) { 244 NumEntries = std::size(AIXOffsets64); 245 return AIXOffsets64; 246 } 247 248 NumEntries = std::size(AIXOffsets32); 249 return AIXOffsets32; 250 } 251 252 static bool spillsCR(const MachineFunction &MF) { 253 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 254 return FuncInfo->isCRSpilled(); 255 } 256 257 static bool hasSpills(const MachineFunction &MF) { 258 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 259 return FuncInfo->hasSpills(); 260 } 261 262 static bool hasNonRISpills(const MachineFunction &MF) { 263 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 264 return FuncInfo->hasNonRISpills(); 265 } 266 267 /// MustSaveLR - Return true if this function requires that we save the LR 268 /// register onto the stack in the prolog and restore it in the epilog of the 269 /// function. 270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 271 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 272 273 // We need a save/restore of LR if there is any def of LR (which is 274 // defined by calls, including the PIC setup sequence), or if there is 275 // some use of the LR stack slot (e.g. for builtin_return_address). 276 // (LR comes in 32 and 64 bit versions.) 277 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 278 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 279 } 280 281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 282 /// call frame size. Update the MachineFunction object with the stack size. 283 uint64_t 284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 285 bool UseEstimate) const { 286 unsigned NewMaxCallFrameSize = 0; 287 uint64_t FrameSize = determineFrameLayout(MF, UseEstimate, 288 &NewMaxCallFrameSize); 289 MF.getFrameInfo().setStackSize(FrameSize); 290 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 291 return FrameSize; 292 } 293 294 /// determineFrameLayout - Determine the size of the frame and maximum call 295 /// frame size. 296 uint64_t 297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 298 bool UseEstimate, 299 unsigned *NewMaxCallFrameSize) const { 300 const MachineFrameInfo &MFI = MF.getFrameInfo(); 301 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 302 303 // Get the number of bytes to allocate from the FrameInfo 304 uint64_t FrameSize = 305 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 306 307 // Get stack alignments. The frame must be aligned to the greatest of these: 308 Align TargetAlign = getStackAlign(); // alignment required per the ABI 309 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 310 Align Alignment = std::max(TargetAlign, MaxAlign); 311 312 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 313 314 unsigned LR = RegInfo->getRARegister(); 315 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 316 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 317 !MFI.adjustsStack() && // No calls. 318 !MustSaveLR(MF, LR) && // No need to save LR. 319 !FI->mustSaveTOC() && // No need to save TOC. 320 !RegInfo->hasBasePointer(MF); // No special alignment. 321 322 // Note: for PPC32 SVR4ABI, we can still generate stackless 323 // code if all local vars are reg-allocated. 324 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 325 326 // Check whether we can skip adjusting the stack pointer (by using red zone) 327 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 328 // No need for frame 329 return 0; 330 } 331 332 // Get the maximum call frame size of all the calls. 333 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 334 335 // Maximum call frame needs to be at least big enough for linkage area. 336 unsigned minCallFrameSize = getLinkageSize(); 337 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 338 339 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 340 // that allocations will be aligned. 341 if (MFI.hasVarSizedObjects()) 342 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 343 344 // Update the new max call frame size if the caller passes in a valid pointer. 345 if (NewMaxCallFrameSize) 346 *NewMaxCallFrameSize = maxCallFrameSize; 347 348 // Include call frame size in total. 349 FrameSize += maxCallFrameSize; 350 351 // Make sure the frame is aligned. 352 FrameSize = alignTo(FrameSize, Alignment); 353 354 return FrameSize; 355 } 356 357 // hasFP - Return true if the specified function actually has a dedicated frame 358 // pointer register. 359 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 360 const MachineFrameInfo &MFI = MF.getFrameInfo(); 361 // FIXME: This is pretty much broken by design: hasFP() might be called really 362 // early, before the stack layout was calculated and thus hasFP() might return 363 // true or false here depending on the time of call. 364 return (MFI.getStackSize()) && needsFP(MF); 365 } 366 367 // needsFP - Return true if the specified function should have a dedicated frame 368 // pointer register. This is true if the function has variable sized allocas or 369 // if frame pointer elimination is disabled. 370 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 371 const MachineFrameInfo &MFI = MF.getFrameInfo(); 372 373 // Naked functions have no stack frame pushed, so we don't have a frame 374 // pointer. 375 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 376 return false; 377 378 return MF.getTarget().Options.DisableFramePointerElim(MF) || 379 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 380 MF.exposesReturnsTwice() || 381 (MF.getTarget().Options.GuaranteedTailCallOpt && 382 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 383 } 384 385 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 386 bool is31 = needsFP(MF); 387 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 388 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 389 390 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 391 bool HasBP = RegInfo->hasBasePointer(MF); 392 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 393 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 394 395 for (MachineBasicBlock &MBB : MF) 396 for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) { 397 --MBBI; 398 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 399 MachineOperand &MO = MBBI->getOperand(I); 400 if (!MO.isReg()) 401 continue; 402 403 switch (MO.getReg()) { 404 case PPC::FP: 405 MO.setReg(FPReg); 406 break; 407 case PPC::FP8: 408 MO.setReg(FP8Reg); 409 break; 410 case PPC::BP: 411 MO.setReg(BPReg); 412 break; 413 case PPC::BP8: 414 MO.setReg(BP8Reg); 415 break; 416 417 } 418 } 419 } 420 } 421 422 /* This function will do the following: 423 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 424 respectively (defaults recommended by the ABI) and return true 425 - If MBB is not an entry block, initialize the register scavenger and look 426 for available registers. 427 - If the defaults (R0/R12) are available, return true 428 - If TwoUniqueRegsRequired is set to true, it looks for two unique 429 registers. Otherwise, look for a single available register. 430 - If the required registers are found, set SR1 and SR2 and return true. 431 - If the required registers are not found, set SR2 or both SR1 and SR2 to 432 PPC::NoRegister and return false. 433 434 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 435 is not set, this function will attempt to find two different registers, but 436 still return true if only one register is available (and set SR1 == SR2). 437 */ 438 bool 439 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 440 bool UseAtEnd, 441 bool TwoUniqueRegsRequired, 442 Register *SR1, 443 Register *SR2) const { 444 RegScavenger RS; 445 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 446 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 447 448 // Set the defaults for the two scratch registers. 449 if (SR1) 450 *SR1 = R0; 451 452 if (SR2) { 453 assert (SR1 && "Asking for the second scratch register but not the first?"); 454 *SR2 = R12; 455 } 456 457 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 458 if ((UseAtEnd && MBB->isReturnBlock()) || 459 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 460 return true; 461 462 RS.enterBasicBlock(*MBB); 463 464 if (UseAtEnd && !MBB->empty()) { 465 // The scratch register will be used at the end of the block, so must 466 // consider all registers used within the block 467 468 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 469 // If no terminator, back iterator up to previous instruction. 470 if (MBBI == MBB->end()) 471 MBBI = std::prev(MBBI); 472 473 if (MBBI != MBB->begin()) 474 RS.forward(MBBI); 475 } 476 477 // If the two registers are available, we're all good. 478 // Note that we only return here if both R0 and R12 are available because 479 // although the function may not require two unique registers, it may benefit 480 // from having two so we should try to provide them. 481 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 482 return true; 483 484 // Get the list of callee-saved registers for the target. 485 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 486 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 487 488 // Get all the available registers in the block. 489 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 490 &PPC::GPRCRegClass); 491 492 // We shouldn't use callee-saved registers as scratch registers as they may be 493 // available when looking for a candidate block for shrink wrapping but not 494 // available when the actual prologue/epilogue is being emitted because they 495 // were added as live-in to the prologue block by PrologueEpilogueInserter. 496 for (int i = 0; CSRegs[i]; ++i) 497 BV.reset(CSRegs[i]); 498 499 // Set the first scratch register to the first available one. 500 if (SR1) { 501 int FirstScratchReg = BV.find_first(); 502 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 503 } 504 505 // If there is another one available, set the second scratch register to that. 506 // Otherwise, set it to either PPC::NoRegister if this function requires two 507 // or to whatever SR1 is set to if this function doesn't require two. 508 if (SR2) { 509 int SecondScratchReg = BV.find_next(*SR1); 510 if (SecondScratchReg != -1) 511 *SR2 = SecondScratchReg; 512 else 513 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 514 } 515 516 // Now that we've done our best to provide both registers, double check 517 // whether we were unable to provide enough. 518 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 519 return false; 520 521 return true; 522 } 523 524 // We need a scratch register for spilling LR and for spilling CR. By default, 525 // we use two scratch registers to hide latency. However, if only one scratch 526 // register is available, we can adjust for that by not overlapping the spill 527 // code. However, if we need to realign the stack (i.e. have a base pointer) 528 // and the stack frame is large, we need two scratch registers. 529 // Also, stack probe requires two scratch registers, one for old sp, one for 530 // large frame and large probe size. 531 bool 532 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 533 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 534 MachineFunction &MF = *(MBB->getParent()); 535 bool HasBP = RegInfo->hasBasePointer(MF); 536 unsigned FrameSize = determineFrameLayout(MF); 537 int NegFrameSize = -FrameSize; 538 bool IsLargeFrame = !isInt<16>(NegFrameSize); 539 MachineFrameInfo &MFI = MF.getFrameInfo(); 540 Align MaxAlign = MFI.getMaxAlign(); 541 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 542 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 543 544 return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || 545 TLI.hasInlineStackProbe(MF); 546 } 547 548 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 549 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 550 551 return findScratchRegister(TmpMBB, false, 552 twoUniqueScratchRegsRequired(TmpMBB)); 553 } 554 555 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 556 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 557 558 return findScratchRegister(TmpMBB, true); 559 } 560 561 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 562 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 563 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 564 565 // Abort if there is no register info or function info. 566 if (!RegInfo || !FI) 567 return false; 568 569 // Only move the stack update on ELFv2 ABI and PPC64. 570 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 571 return false; 572 573 // Check the frame size first and return false if it does not fit the 574 // requirements. 575 // We need a non-zero frame size as well as a frame that will fit in the red 576 // zone. This is because by moving the stack pointer update we are now storing 577 // to the red zone until the stack pointer is updated. If we get an interrupt 578 // inside the prologue but before the stack update we now have a number of 579 // stores to the red zone and those stores must all fit. 580 MachineFrameInfo &MFI = MF.getFrameInfo(); 581 unsigned FrameSize = MFI.getStackSize(); 582 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 583 return false; 584 585 // Frame pointers and base pointers complicate matters so don't do anything 586 // if we have them. For example having a frame pointer will sometimes require 587 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 588 // difficult. Similar situation exists with setjmp. 589 if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) 590 return false; 591 592 // Calls to fast_cc functions use different rules for passing parameters on 593 // the stack from the ABI and using PIC base in the function imposes 594 // similar restrictions to using the base pointer. It is not generally safe 595 // to move the stack pointer update in these situations. 596 if (FI->hasFastCall() || FI->usesPICBase()) 597 return false; 598 599 // Finally we can move the stack update if we do not require register 600 // scavenging. Register scavenging can introduce more spills and so 601 // may make the frame size larger than we have computed. 602 return !RegInfo->requiresFrameIndexScavenging(MF); 603 } 604 605 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 606 MachineBasicBlock &MBB) const { 607 MachineBasicBlock::iterator MBBI = MBB.begin(); 608 MachineFrameInfo &MFI = MF.getFrameInfo(); 609 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 610 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 611 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 612 613 MachineModuleInfo &MMI = MF.getMMI(); 614 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 615 DebugLoc dl; 616 // AIX assembler does not support cfi directives. 617 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 618 619 const bool HasFastMFLR = Subtarget.hasFastMFLR(); 620 621 // Get processor type. 622 bool isPPC64 = Subtarget.isPPC64(); 623 // Get the ABI. 624 bool isSVR4ABI = Subtarget.isSVR4ABI(); 625 bool isELFv2ABI = Subtarget.isELFv2ABI(); 626 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); 627 628 // Work out frame sizes. 629 uint64_t FrameSize = determineFrameLayoutAndUpdate(MF); 630 int64_t NegFrameSize = -FrameSize; 631 if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize))) 632 llvm_unreachable("Unhandled stack size!"); 633 634 if (MFI.isFrameAddressTaken()) 635 replaceFPWithRealFP(MF); 636 637 // Check if the link register (LR) must be saved. 638 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 639 bool MustSaveLR = FI->mustSaveLR(); 640 bool MustSaveTOC = FI->mustSaveTOC(); 641 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 642 bool MustSaveCR = !MustSaveCRs.empty(); 643 // Do we have a frame pointer and/or base pointer for this function? 644 bool HasFP = hasFP(MF); 645 bool HasBP = RegInfo->hasBasePointer(MF); 646 bool HasRedZone = isPPC64 || !isSVR4ABI; 647 bool HasROPProtect = Subtarget.hasROPProtect(); 648 bool HasPrivileged = Subtarget.hasPrivileged(); 649 650 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 651 Register BPReg = RegInfo->getBaseRegister(MF); 652 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 653 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 654 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 655 Register ScratchReg; 656 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 657 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 658 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 659 : PPC::MFLR ); 660 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 661 : PPC::STW ); 662 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 663 : PPC::STWU ); 664 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 665 : PPC::STWUX); 666 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 667 : PPC::OR ); 668 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 669 : PPC::SUBFC); 670 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 671 : PPC::SUBFIC); 672 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 673 : PPC::MFCR); 674 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 675 const MCInstrDesc &HashST = 676 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8) 677 : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST)); 678 679 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 680 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 681 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 682 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 683 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 684 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 685 686 // Using the same bool variable as below to suppress compiler warnings. 687 bool SingleScratchReg = findScratchRegister( 688 &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); 689 assert(SingleScratchReg && 690 "Required number of registers not available in this block"); 691 692 SingleScratchReg = ScratchReg == TempReg; 693 694 int64_t LROffset = getReturnSaveOffset(); 695 696 int64_t FPOffset = 0; 697 if (HasFP) { 698 MachineFrameInfo &MFI = MF.getFrameInfo(); 699 int FPIndex = FI->getFramePointerSaveIndex(); 700 assert(FPIndex && "No Frame Pointer Save Slot!"); 701 FPOffset = MFI.getObjectOffset(FPIndex); 702 } 703 704 int64_t BPOffset = 0; 705 if (HasBP) { 706 MachineFrameInfo &MFI = MF.getFrameInfo(); 707 int BPIndex = FI->getBasePointerSaveIndex(); 708 assert(BPIndex && "No Base Pointer Save Slot!"); 709 BPOffset = MFI.getObjectOffset(BPIndex); 710 } 711 712 int64_t PBPOffset = 0; 713 if (FI->usesPICBase()) { 714 MachineFrameInfo &MFI = MF.getFrameInfo(); 715 int PBPIndex = FI->getPICBasePointerSaveIndex(); 716 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 717 PBPOffset = MFI.getObjectOffset(PBPIndex); 718 } 719 720 // Get stack alignments. 721 Align MaxAlign = MFI.getMaxAlign(); 722 if (HasBP && MaxAlign > 1) 723 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 724 725 // Frames of 32KB & larger require special handling because they cannot be 726 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 727 bool isLargeFrame = !isInt<16>(NegFrameSize); 728 729 // Check if we can move the stack update instruction (stdu) down the prologue 730 // past the callee saves. Hopefully this will avoid the situation where the 731 // saves are waiting for the update on the store with update to complete. 732 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 733 bool MovingStackUpdateDown = false; 734 735 // Check if we can move the stack update. 736 if (stackUpdateCanBeMoved(MF)) { 737 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 738 for (CalleeSavedInfo CSI : Info) { 739 // If the callee saved register is spilled to a register instead of the 740 // stack then the spill no longer uses the stack pointer. 741 // This can lead to two consequences: 742 // 1) We no longer need to update the stack because the function does not 743 // spill any callee saved registers to stack. 744 // 2) We have a situation where we still have to update the stack pointer 745 // even though some registers are spilled to other registers. In 746 // this case the current code moves the stack update to an incorrect 747 // position. 748 // In either case we should abort moving the stack update operation. 749 if (CSI.isSpilledToReg()) { 750 StackUpdateLoc = MBBI; 751 MovingStackUpdateDown = false; 752 break; 753 } 754 755 int FrIdx = CSI.getFrameIdx(); 756 // If the frame index is not negative the callee saved info belongs to a 757 // stack object that is not a fixed stack object. We ignore non-fixed 758 // stack objects because we won't move the stack update pointer past them. 759 if (FrIdx >= 0) 760 continue; 761 762 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 763 StackUpdateLoc++; 764 MovingStackUpdateDown = true; 765 } else { 766 // We need all of the Frame Indices to meet these conditions. 767 // If they do not, abort the whole operation. 768 StackUpdateLoc = MBBI; 769 MovingStackUpdateDown = false; 770 break; 771 } 772 } 773 774 // If the operation was not aborted then update the object offset. 775 if (MovingStackUpdateDown) { 776 for (CalleeSavedInfo CSI : Info) { 777 int FrIdx = CSI.getFrameIdx(); 778 if (FrIdx < 0) 779 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 780 } 781 } 782 } 783 784 // Where in the prologue we move the CR fields depends on how many scratch 785 // registers we have, and if we need to save the link register or not. This 786 // lambda is to avoid duplicating the logic in 2 places. 787 auto BuildMoveFromCR = [&]() { 788 if (isELFv2ABI && MustSaveCRs.size() == 1) { 789 // In the ELFv2 ABI, we are not required to save all CR fields. 790 // If only one CR field is clobbered, it is more efficient to use 791 // mfocrf to selectively save just that field, because mfocrf has short 792 // latency compares to mfcr. 793 assert(isPPC64 && "V2 ABI is 64-bit only."); 794 MachineInstrBuilder MIB = 795 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 796 MIB.addReg(MustSaveCRs[0], RegState::Kill); 797 } else { 798 MachineInstrBuilder MIB = 799 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 800 for (unsigned CRfield : MustSaveCRs) 801 MIB.addReg(CRfield, RegState::ImplicitKill); 802 } 803 }; 804 805 // If we need to spill the CR and the LR but we don't have two separate 806 // registers available, we must spill them one at a time 807 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 808 BuildMoveFromCR(); 809 BuildMI(MBB, MBBI, dl, StoreWordInst) 810 .addReg(TempReg, getKillRegState(true)) 811 .addImm(CRSaveOffset) 812 .addReg(SPReg); 813 } 814 815 if (MustSaveLR) 816 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 817 818 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 819 BuildMoveFromCR(); 820 821 if (HasRedZone) { 822 if (HasFP) 823 BuildMI(MBB, MBBI, dl, StoreInst) 824 .addReg(FPReg) 825 .addImm(FPOffset) 826 .addReg(SPReg); 827 if (FI->usesPICBase()) 828 BuildMI(MBB, MBBI, dl, StoreInst) 829 .addReg(PPC::R30) 830 .addImm(PBPOffset) 831 .addReg(SPReg); 832 if (HasBP) 833 BuildMI(MBB, MBBI, dl, StoreInst) 834 .addReg(BPReg) 835 .addImm(BPOffset) 836 .addReg(SPReg); 837 } 838 839 // Generate the instruction to store the LR. In the case where ROP protection 840 // is required the register holding the LR should not be killed as it will be 841 // used by the hash store instruction. 842 auto SaveLR = [&](int64_t Offset) { 843 assert(MustSaveLR && "LR is not required to be saved!"); 844 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 845 .addReg(ScratchReg, getKillRegState(!HasROPProtect)) 846 .addImm(Offset) 847 .addReg(SPReg); 848 849 // Add the ROP protection Hash Store instruction. 850 // NOTE: This is technically a violation of the ABI. The hash can be saved 851 // up to 512 bytes into the Protected Zone. This can be outside of the 852 // initial 288 byte volatile program storage region in the Protected Zone. 853 // However, this restriction will be removed in an upcoming revision of the 854 // ABI. 855 if (HasROPProtect) { 856 const int SaveIndex = FI->getROPProtectionHashSaveIndex(); 857 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex); 858 assert((ImmOffset <= -8 && ImmOffset >= -512) && 859 "ROP hash save offset out of range."); 860 assert(((ImmOffset & 0x7) == 0) && 861 "ROP hash save offset must be 8 byte aligned."); 862 BuildMI(MBB, StackUpdateLoc, dl, HashST) 863 .addReg(ScratchReg, getKillRegState(true)) 864 .addImm(ImmOffset) 865 .addReg(SPReg); 866 } 867 }; 868 869 if (MustSaveLR && HasFastMFLR) 870 SaveLR(LROffset); 871 872 if (MustSaveCR && 873 !(SingleScratchReg && MustSaveLR)) { 874 assert(HasRedZone && "A red zone is always available on PPC64"); 875 BuildMI(MBB, MBBI, dl, StoreWordInst) 876 .addReg(TempReg, getKillRegState(true)) 877 .addImm(CRSaveOffset) 878 .addReg(SPReg); 879 } 880 881 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 882 if (!FrameSize) { 883 if (MustSaveLR && !HasFastMFLR) 884 SaveLR(LROffset); 885 return; 886 } 887 888 // Adjust stack pointer: r1 += NegFrameSize. 889 // If there is a preferred stack alignment, align R1 now 890 891 if (HasBP && HasRedZone) { 892 // Save a copy of r1 as the base pointer. 893 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 894 .addReg(SPReg) 895 .addReg(SPReg); 896 } 897 898 // Have we generated a STUX instruction to claim stack frame? If so, 899 // the negated frame size will be placed in ScratchReg. 900 bool HasSTUX = 901 (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) || 902 (HasBP && MaxAlign > 1) || isLargeFrame; 903 904 // If we use STUX to update the stack pointer, we need the two scratch 905 // registers TempReg and ScratchReg, we have to save LR here which is stored 906 // in ScratchReg. 907 // If the offset can not be encoded into the store instruction, we also have 908 // to save LR here. 909 if (MustSaveLR && !HasFastMFLR && 910 (HasSTUX || !isInt<16>(FrameSize + LROffset))) 911 SaveLR(LROffset); 912 913 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 914 // pointer is always stored at SP, we will get a free probe due to an essential 915 // STU(X) instruction. 916 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 917 // To be consistent with other targets, a pseudo instruction is emitted and 918 // will be later expanded in `inlineStackProbe`. 919 BuildMI(MBB, MBBI, dl, 920 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 921 : PPC::PROBED_STACKALLOC_32)) 922 .addDef(TempReg) 923 .addDef(ScratchReg) // ScratchReg stores the old sp. 924 .addImm(NegFrameSize); 925 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 926 // update the ScratchReg to meet the assumption that ScratchReg contains 927 // the NegFrameSize. This solution is rather tricky. 928 if (!HasRedZone) { 929 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 930 .addReg(ScratchReg) 931 .addReg(SPReg); 932 } 933 } else { 934 // This condition must be kept in sync with canUseAsPrologue. 935 if (HasBP && MaxAlign > 1) { 936 if (isPPC64) 937 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 938 .addReg(SPReg) 939 .addImm(0) 940 .addImm(64 - Log2(MaxAlign)); 941 else // PPC32... 942 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 943 .addReg(SPReg) 944 .addImm(0) 945 .addImm(32 - Log2(MaxAlign)) 946 .addImm(31); 947 if (!isLargeFrame) { 948 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 949 .addReg(ScratchReg, RegState::Kill) 950 .addImm(NegFrameSize); 951 } else { 952 assert(!SingleScratchReg && "Only a single scratch reg available"); 953 TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize); 954 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 955 .addReg(ScratchReg, RegState::Kill) 956 .addReg(TempReg, RegState::Kill); 957 } 958 959 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 960 .addReg(SPReg, RegState::Kill) 961 .addReg(SPReg) 962 .addReg(ScratchReg); 963 } else if (!isLargeFrame) { 964 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 965 .addReg(SPReg) 966 .addImm(NegFrameSize) 967 .addReg(SPReg); 968 } else { 969 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize); 970 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 971 .addReg(SPReg, RegState::Kill) 972 .addReg(SPReg) 973 .addReg(ScratchReg); 974 } 975 } 976 977 // Save the TOC register after the stack pointer update if a prologue TOC 978 // save is required for the function. 979 if (MustSaveTOC) { 980 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 981 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 982 .addReg(TOCReg, getKillRegState(true)) 983 .addImm(TOCSaveOffset) 984 .addReg(SPReg); 985 } 986 987 if (!HasRedZone) { 988 assert(!isPPC64 && "A red zone is always available on PPC64"); 989 if (HasSTUX) { 990 // The negated frame size is in ScratchReg, and the SPReg has been 991 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 992 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 993 // the stack frame (i.e. the old SP), ideally, we would put the old 994 // SP into a register and use it as the base for the stores. The 995 // problem is that the only available register may be ScratchReg, 996 // which could be R0, and R0 cannot be used as a base address. 997 998 // First, set ScratchReg to the old SP. This may need to be modified 999 // later. 1000 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1001 .addReg(ScratchReg, RegState::Kill) 1002 .addReg(SPReg); 1003 1004 if (ScratchReg == PPC::R0) { 1005 // R0 cannot be used as a base register, but it can be used as an 1006 // index in a store-indexed. 1007 int LastOffset = 0; 1008 if (HasFP) { 1009 // R0 += (FPOffset-LastOffset). 1010 // Need addic, since addi treats R0 as 0. 1011 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1012 .addReg(ScratchReg) 1013 .addImm(FPOffset-LastOffset); 1014 LastOffset = FPOffset; 1015 // Store FP into *R0. 1016 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1017 .addReg(FPReg, RegState::Kill) // Save FP. 1018 .addReg(PPC::ZERO) 1019 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1020 } 1021 if (FI->usesPICBase()) { 1022 // R0 += (PBPOffset-LastOffset). 1023 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1024 .addReg(ScratchReg) 1025 .addImm(PBPOffset-LastOffset); 1026 LastOffset = PBPOffset; 1027 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1028 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1029 .addReg(PPC::ZERO) 1030 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1031 } 1032 if (HasBP) { 1033 // R0 += (BPOffset-LastOffset). 1034 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1035 .addReg(ScratchReg) 1036 .addImm(BPOffset-LastOffset); 1037 LastOffset = BPOffset; 1038 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1039 .addReg(BPReg, RegState::Kill) // Save BP. 1040 .addReg(PPC::ZERO) 1041 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1042 // BP = R0-LastOffset 1043 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1044 .addReg(ScratchReg, RegState::Kill) 1045 .addImm(-LastOffset); 1046 } 1047 } else { 1048 // ScratchReg is not R0, so use it as the base register. It is 1049 // already set to the old SP, so we can use the offsets directly. 1050 1051 // Now that the stack frame has been allocated, save all the necessary 1052 // registers using ScratchReg as the base address. 1053 if (HasFP) 1054 BuildMI(MBB, MBBI, dl, StoreInst) 1055 .addReg(FPReg) 1056 .addImm(FPOffset) 1057 .addReg(ScratchReg); 1058 if (FI->usesPICBase()) 1059 BuildMI(MBB, MBBI, dl, StoreInst) 1060 .addReg(PPC::R30) 1061 .addImm(PBPOffset) 1062 .addReg(ScratchReg); 1063 if (HasBP) { 1064 BuildMI(MBB, MBBI, dl, StoreInst) 1065 .addReg(BPReg) 1066 .addImm(BPOffset) 1067 .addReg(ScratchReg); 1068 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1069 .addReg(ScratchReg, RegState::Kill) 1070 .addReg(ScratchReg); 1071 } 1072 } 1073 } else { 1074 // The frame size is a known 16-bit constant (fitting in the immediate 1075 // field of STWU). To be here we have to be compiling for PPC32. 1076 // Since the SPReg has been decreased by FrameSize, add it back to each 1077 // offset. 1078 if (HasFP) 1079 BuildMI(MBB, MBBI, dl, StoreInst) 1080 .addReg(FPReg) 1081 .addImm(FrameSize + FPOffset) 1082 .addReg(SPReg); 1083 if (FI->usesPICBase()) 1084 BuildMI(MBB, MBBI, dl, StoreInst) 1085 .addReg(PPC::R30) 1086 .addImm(FrameSize + PBPOffset) 1087 .addReg(SPReg); 1088 if (HasBP) { 1089 BuildMI(MBB, MBBI, dl, StoreInst) 1090 .addReg(BPReg) 1091 .addImm(FrameSize + BPOffset) 1092 .addReg(SPReg); 1093 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1094 .addReg(SPReg) 1095 .addImm(FrameSize); 1096 } 1097 } 1098 } 1099 1100 // Save the LR now. 1101 if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset)) 1102 SaveLR(LROffset + FrameSize); 1103 1104 // Add Call Frame Information for the instructions we generated above. 1105 if (needsCFI) { 1106 unsigned CFIIndex; 1107 1108 if (HasBP) { 1109 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1110 // because if the stack needed aligning then CFA won't be at a fixed 1111 // offset from FP/SP. 1112 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1113 CFIIndex = MF.addFrameInst( 1114 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1115 } else { 1116 // Adjust the definition of CFA to account for the change in SP. 1117 assert(NegFrameSize); 1118 CFIIndex = MF.addFrameInst( 1119 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1120 } 1121 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1122 .addCFIIndex(CFIIndex); 1123 1124 if (HasFP) { 1125 // Describe where FP was saved, at a fixed offset from CFA. 1126 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1127 CFIIndex = MF.addFrameInst( 1128 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1129 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1130 .addCFIIndex(CFIIndex); 1131 } 1132 1133 if (FI->usesPICBase()) { 1134 // Describe where FP was saved, at a fixed offset from CFA. 1135 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1136 CFIIndex = MF.addFrameInst( 1137 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1138 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1139 .addCFIIndex(CFIIndex); 1140 } 1141 1142 if (HasBP) { 1143 // Describe where BP was saved, at a fixed offset from CFA. 1144 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1145 CFIIndex = MF.addFrameInst( 1146 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1147 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1148 .addCFIIndex(CFIIndex); 1149 } 1150 1151 if (MustSaveLR) { 1152 // Describe where LR was saved, at a fixed offset from CFA. 1153 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1154 CFIIndex = MF.addFrameInst( 1155 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1156 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1157 .addCFIIndex(CFIIndex); 1158 } 1159 } 1160 1161 // If there is a frame pointer, copy R1 into R31 1162 if (HasFP) { 1163 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1164 .addReg(SPReg) 1165 .addReg(SPReg); 1166 1167 if (!HasBP && needsCFI) { 1168 // Change the definition of CFA from SP+offset to FP+offset, because SP 1169 // will change at every alloca. 1170 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1171 unsigned CFIIndex = MF.addFrameInst( 1172 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1173 1174 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1175 .addCFIIndex(CFIIndex); 1176 } 1177 } 1178 1179 if (needsCFI) { 1180 // Describe where callee saved registers were saved, at fixed offsets from 1181 // CFA. 1182 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1183 for (const CalleeSavedInfo &I : CSI) { 1184 Register Reg = I.getReg(); 1185 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1186 1187 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1188 // subregisters of CR2. We just need to emit a move of CR2. 1189 if (PPC::CRBITRCRegClass.contains(Reg)) 1190 continue; 1191 1192 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1193 continue; 1194 1195 // For SVR4, don't emit a move for the CR spill slot if we haven't 1196 // spilled CRs. 1197 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1198 && !MustSaveCR) 1199 continue; 1200 1201 // For 64-bit SVR4 when we have spilled CRs, the spill location 1202 // is SP+8, not a frame-relative slot. 1203 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1204 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1205 // the whole CR word. In the ELFv2 ABI, every CR that was 1206 // actually saved gets its own CFI record. 1207 Register CRReg = isELFv2ABI? Reg : PPC::CR2; 1208 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1209 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1210 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1211 .addCFIIndex(CFIIndex); 1212 continue; 1213 } 1214 1215 if (I.isSpilledToReg()) { 1216 unsigned SpilledReg = I.getDstReg(); 1217 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1218 nullptr, MRI->getDwarfRegNum(Reg, true), 1219 MRI->getDwarfRegNum(SpilledReg, true))); 1220 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1221 .addCFIIndex(CFIRegister); 1222 } else { 1223 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()); 1224 // We have changed the object offset above but we do not want to change 1225 // the actual offsets in the CFI instruction so we have to undo the 1226 // offset change here. 1227 if (MovingStackUpdateDown) 1228 Offset -= NegFrameSize; 1229 1230 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1231 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1232 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1233 .addCFIIndex(CFIIndex); 1234 } 1235 } 1236 } 1237 } 1238 1239 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1240 MachineBasicBlock &PrologMBB) const { 1241 bool isPPC64 = Subtarget.isPPC64(); 1242 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1243 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1244 MachineFrameInfo &MFI = MF.getFrameInfo(); 1245 MachineModuleInfo &MMI = MF.getMMI(); 1246 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 1247 // AIX assembler does not support cfi directives. 1248 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1249 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1250 int Opc = MI.getOpcode(); 1251 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1252 }); 1253 if (StackAllocMIPos == PrologMBB.end()) 1254 return; 1255 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1256 MachineBasicBlock *CurrentMBB = &PrologMBB; 1257 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1258 MachineInstr &MI = *StackAllocMIPos; 1259 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1260 unsigned ProbeSize = TLI.getStackProbeSize(MF); 1261 int64_t NegProbeSize = -(int64_t)ProbeSize; 1262 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1263 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1264 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1265 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1266 Register ScratchReg = MI.getOperand(0).getReg(); 1267 Register FPReg = MI.getOperand(1).getReg(); 1268 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1269 bool HasBP = RegInfo->hasBasePointer(MF); 1270 Register BPReg = RegInfo->getBaseRegister(MF); 1271 Align MaxAlign = MFI.getMaxAlign(); 1272 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1273 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1274 // Subroutines to generate .cfi_* directives. 1275 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1276 MachineBasicBlock::iterator MBBI, Register Reg) { 1277 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1278 unsigned CFIIndex = MF.addFrameInst( 1279 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1280 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1281 .addCFIIndex(CFIIndex); 1282 }; 1283 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1284 MachineBasicBlock::iterator MBBI, Register Reg, 1285 int Offset) { 1286 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1287 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1288 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1289 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1290 .addCFIIndex(CFIIndex); 1291 }; 1292 // Subroutine to determine if we can use the Imm as part of d-form. 1293 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1294 // Subroutine to materialize the Imm into TempReg. 1295 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1296 MachineBasicBlock::iterator MBBI, int64_t Imm, 1297 Register &TempReg) { 1298 assert(isInt<32>(Imm) && "Unhandled imm"); 1299 if (isInt<16>(Imm)) 1300 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1301 .addImm(Imm); 1302 else { 1303 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1304 .addImm(Imm >> 16); 1305 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1306 .addReg(TempReg) 1307 .addImm(Imm & 0xFFFF); 1308 } 1309 }; 1310 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1311 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1312 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1313 Register NegSizeReg, bool UseDForm, 1314 Register StoreReg) { 1315 if (UseDForm) 1316 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1317 .addReg(StoreReg) 1318 .addImm(NegSize) 1319 .addReg(SPReg); 1320 else 1321 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1322 .addReg(StoreReg) 1323 .addReg(SPReg) 1324 .addReg(NegSizeReg); 1325 }; 1326 // Used to probe stack when realignment is required. 1327 // Note that, according to ABI's requirement, *sp must always equals the 1328 // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. 1329 // Following is pseudo code: 1330 // final_sp = (sp & align) + negframesize; 1331 // neg_gap = final_sp - sp; 1332 // while (neg_gap < negprobesize) { 1333 // stdu fp, negprobesize(sp); 1334 // neg_gap -= negprobesize; 1335 // } 1336 // stdux fp, sp, neg_gap 1337 // 1338 // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg 1339 // before probe code, we don't need to save it, so we get one additional reg 1340 // that can be used to materialize the probeside if needed to use xform. 1341 // Otherwise, we can NOT materialize probeside, so we can only use Dform for 1342 // now. 1343 // 1344 // The allocations are: 1345 // if (HasBP && HasRedzone) { 1346 // r0: materialize the probesize if needed so that we can use xform. 1347 // r12: `neg_gap` 1348 // } else { 1349 // r0: back-chain pointer 1350 // r12: `neg_gap`. 1351 // } 1352 auto probeRealignedStack = [&](MachineBasicBlock &MBB, 1353 MachineBasicBlock::iterator MBBI, 1354 Register ScratchReg, Register TempReg) { 1355 assert(HasBP && "The function is supposed to have base pointer when its " 1356 "stack is realigned."); 1357 assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); 1358 1359 // FIXME: We can eliminate this limitation if we get more infomation about 1360 // which part of redzone are already used. Used redzone can be treated 1361 // probed. But there might be `holes' in redzone probed, this could 1362 // complicate the implementation. 1363 assert(ProbeSize >= Subtarget.getRedZoneSize() && 1364 "Probe size should be larger or equal to the size of red-zone so " 1365 "that red-zone is not clobbered by probing."); 1366 1367 Register &FinalStackPtr = TempReg; 1368 // FIXME: We only support NegProbeSize materializable by DForm currently. 1369 // When HasBP && HasRedzone, we can use xform if we have an additional idle 1370 // register. 1371 NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); 1372 assert(isInt<16>(NegProbeSize) && 1373 "NegProbeSize should be materializable by DForm"); 1374 Register CRReg = PPC::CR0; 1375 // Layout of output assembly kinda like: 1376 // bb.0: 1377 // ... 1378 // sub $scratchreg, $finalsp, r1 1379 // cmpdi $scratchreg, <negprobesize> 1380 // bge bb.2 1381 // bb.1: 1382 // stdu <backchain>, <negprobesize>(r1) 1383 // sub $scratchreg, $scratchreg, negprobesize 1384 // cmpdi $scratchreg, <negprobesize> 1385 // blt bb.1 1386 // bb.2: 1387 // stdux <backchain>, r1, $scratchreg 1388 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); 1389 MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); 1390 MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); 1391 MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1392 MF.insert(MBBInsertPoint, ProbeExitMBB); 1393 // bb.2 1394 { 1395 Register BackChainPointer = HasRedZone ? BPReg : TempReg; 1396 allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, 1397 BackChainPointer); 1398 if (HasRedZone) 1399 // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg 1400 // to TempReg to satisfy it. 1401 BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) 1402 .addReg(BPReg) 1403 .addReg(BPReg); 1404 ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); 1405 ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 1406 } 1407 // bb.0 1408 { 1409 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) 1410 .addReg(SPReg) 1411 .addReg(FinalStackPtr); 1412 if (!HasRedZone) 1413 BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); 1414 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) 1415 .addReg(ScratchReg) 1416 .addImm(NegProbeSize); 1417 BuildMI(&MBB, DL, TII.get(PPC::BCC)) 1418 .addImm(PPC::PRED_GE) 1419 .addReg(CRReg) 1420 .addMBB(ProbeExitMBB); 1421 MBB.addSuccessor(ProbeLoopBodyMBB); 1422 MBB.addSuccessor(ProbeExitMBB); 1423 } 1424 // bb.1 1425 { 1426 Register BackChainPointer = HasRedZone ? BPReg : TempReg; 1427 allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, 1428 0, true /*UseDForm*/, BackChainPointer); 1429 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), 1430 ScratchReg) 1431 .addReg(ScratchReg) 1432 .addImm(-NegProbeSize); 1433 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), 1434 CRReg) 1435 .addReg(ScratchReg) 1436 .addImm(NegProbeSize); 1437 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) 1438 .addImm(PPC::PRED_LT) 1439 .addReg(CRReg) 1440 .addMBB(ProbeLoopBodyMBB); 1441 ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); 1442 ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); 1443 } 1444 // Update liveins. 1445 recomputeLiveIns(*ProbeLoopBodyMBB); 1446 recomputeLiveIns(*ProbeExitMBB); 1447 return ProbeExitMBB; 1448 }; 1449 // For case HasBP && MaxAlign > 1, we have to realign the SP by performing 1450 // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since 1451 // the offset subtracted from SP is determined by SP's runtime value. 1452 if (HasBP && MaxAlign > 1) { 1453 // Calculate final stack pointer. 1454 if (isPPC64) 1455 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) 1456 .addReg(SPReg) 1457 .addImm(0) 1458 .addImm(64 - Log2(MaxAlign)); 1459 else 1460 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1461 .addReg(SPReg) 1462 .addImm(0) 1463 .addImm(32 - Log2(MaxAlign)) 1464 .addImm(31); 1465 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), 1466 FPReg) 1467 .addReg(ScratchReg) 1468 .addReg(SPReg); 1469 MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); 1470 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), 1471 FPReg) 1472 .addReg(ScratchReg) 1473 .addReg(FPReg); 1474 CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); 1475 if (needsCFI) 1476 buildDefCFAReg(*CurrentMBB, {MI}, FPReg); 1477 } else { 1478 // Initialize current frame pointer. 1479 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1480 // Use FPReg to calculate CFA. 1481 if (needsCFI) 1482 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); 1483 // Probe residual part. 1484 if (NegResidualSize) { 1485 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1486 if (!ResidualUseDForm) 1487 MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); 1488 allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, 1489 ResidualUseDForm, FPReg); 1490 } 1491 bool UseDForm = CanUseDForm(NegProbeSize); 1492 // If number of blocks is small, just probe them directly. 1493 if (NumBlocks < 3) { 1494 if (!UseDForm) 1495 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1496 for (int i = 0; i < NumBlocks; ++i) 1497 allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, 1498 FPReg); 1499 if (needsCFI) { 1500 // Restore using SPReg to calculate CFA. 1501 buildDefCFAReg(*CurrentMBB, {MI}, SPReg); 1502 } 1503 } else { 1504 // Since CTR is a volatile register and current shrinkwrap implementation 1505 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1506 // CTR loop to probe. 1507 // Calculate trip count and stores it in CTRReg. 1508 MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); 1509 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1510 .addReg(ScratchReg, RegState::Kill); 1511 if (!UseDForm) 1512 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1513 // Create MBBs of the loop. 1514 MachineFunction::iterator MBBInsertPoint = 1515 std::next(CurrentMBB->getIterator()); 1516 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1517 MF.insert(MBBInsertPoint, LoopMBB); 1518 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1519 MF.insert(MBBInsertPoint, ExitMBB); 1520 // Synthesize the loop body. 1521 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1522 UseDForm, FPReg); 1523 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1524 .addMBB(LoopMBB); 1525 LoopMBB->addSuccessor(ExitMBB); 1526 LoopMBB->addSuccessor(LoopMBB); 1527 // Synthesize the exit MBB. 1528 ExitMBB->splice(ExitMBB->end(), CurrentMBB, 1529 std::next(MachineBasicBlock::iterator(MI)), 1530 CurrentMBB->end()); 1531 ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); 1532 CurrentMBB->addSuccessor(LoopMBB); 1533 if (needsCFI) { 1534 // Restore using SPReg to calculate CFA. 1535 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1536 } 1537 // Update liveins. 1538 recomputeLiveIns(*LoopMBB); 1539 recomputeLiveIns(*ExitMBB); 1540 } 1541 } 1542 ++NumPrologProbed; 1543 MI.eraseFromParent(); 1544 } 1545 1546 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1547 MachineBasicBlock &MBB) const { 1548 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1549 DebugLoc dl; 1550 1551 if (MBBI != MBB.end()) 1552 dl = MBBI->getDebugLoc(); 1553 1554 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1555 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1556 1557 // Get alignment info so we know how to restore the SP. 1558 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1559 1560 // Get the number of bytes allocated from the FrameInfo. 1561 int64_t FrameSize = MFI.getStackSize(); 1562 1563 // Get processor type. 1564 bool isPPC64 = Subtarget.isPPC64(); 1565 1566 // Check if the link register (LR) has been saved. 1567 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1568 bool MustSaveLR = FI->mustSaveLR(); 1569 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1570 bool MustSaveCR = !MustSaveCRs.empty(); 1571 // Do we have a frame pointer and/or base pointer for this function? 1572 bool HasFP = hasFP(MF); 1573 bool HasBP = RegInfo->hasBasePointer(MF); 1574 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1575 bool HasROPProtect = Subtarget.hasROPProtect(); 1576 bool HasPrivileged = Subtarget.hasPrivileged(); 1577 1578 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1579 Register BPReg = RegInfo->getBaseRegister(MF); 1580 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1581 Register ScratchReg; 1582 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1583 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1584 : PPC::MTLR ); 1585 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1586 : PPC::LWZ ); 1587 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1588 : PPC::LIS ); 1589 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1590 : PPC::OR ); 1591 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1592 : PPC::ORI ); 1593 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1594 : PPC::ADDI ); 1595 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1596 : PPC::ADD4 ); 1597 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1598 : PPC::LWZ); 1599 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1600 : PPC::MTOCRF); 1601 const MCInstrDesc &HashChk = 1602 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8) 1603 : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK)); 1604 int64_t LROffset = getReturnSaveOffset(); 1605 1606 int64_t FPOffset = 0; 1607 1608 // Using the same bool variable as below to suppress compiler warnings. 1609 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1610 &TempReg); 1611 assert(SingleScratchReg && 1612 "Could not find an available scratch register"); 1613 1614 SingleScratchReg = ScratchReg == TempReg; 1615 1616 if (HasFP) { 1617 int FPIndex = FI->getFramePointerSaveIndex(); 1618 assert(FPIndex && "No Frame Pointer Save Slot!"); 1619 FPOffset = MFI.getObjectOffset(FPIndex); 1620 } 1621 1622 int64_t BPOffset = 0; 1623 if (HasBP) { 1624 int BPIndex = FI->getBasePointerSaveIndex(); 1625 assert(BPIndex && "No Base Pointer Save Slot!"); 1626 BPOffset = MFI.getObjectOffset(BPIndex); 1627 } 1628 1629 int64_t PBPOffset = 0; 1630 if (FI->usesPICBase()) { 1631 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1632 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1633 PBPOffset = MFI.getObjectOffset(PBPIndex); 1634 } 1635 1636 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1637 1638 if (IsReturnBlock) { 1639 unsigned RetOpcode = MBBI->getOpcode(); 1640 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1641 RetOpcode == PPC::TCRETURNdi || 1642 RetOpcode == PPC::TCRETURNai || 1643 RetOpcode == PPC::TCRETURNri8 || 1644 RetOpcode == PPC::TCRETURNdi8 || 1645 RetOpcode == PPC::TCRETURNai8; 1646 1647 if (UsesTCRet) { 1648 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1649 MachineOperand &StackAdjust = MBBI->getOperand(1); 1650 assert(StackAdjust.isImm() && "Expecting immediate value."); 1651 // Adjust stack pointer. 1652 int StackAdj = StackAdjust.getImm(); 1653 int Delta = StackAdj - MaxTCRetDelta; 1654 assert((Delta >= 0) && "Delta must be positive"); 1655 if (MaxTCRetDelta>0) 1656 FrameSize += (StackAdj +Delta); 1657 else 1658 FrameSize += StackAdj; 1659 } 1660 } 1661 1662 // Frames of 32KB & larger require special handling because they cannot be 1663 // indexed into with a simple LD/LWZ immediate offset operand. 1664 bool isLargeFrame = !isInt<16>(FrameSize); 1665 1666 // On targets without red zone, the SP needs to be restored last, so that 1667 // all live contents of the stack frame are upwards of the SP. This means 1668 // that we cannot restore SP just now, since there may be more registers 1669 // to restore from the stack frame (e.g. R31). If the frame size is not 1670 // a simple immediate value, we will need a spare register to hold the 1671 // restored SP. If the frame size is known and small, we can simply adjust 1672 // the offsets of the registers to be restored, and still use SP to restore 1673 // them. In such case, the final update of SP will be to add the frame 1674 // size to it. 1675 // To simplify the code, set RBReg to the base register used to restore 1676 // values from the stack, and set SPAdd to the value that needs to be added 1677 // to the SP at the end. The default values are as if red zone was present. 1678 unsigned RBReg = SPReg; 1679 uint64_t SPAdd = 0; 1680 1681 // Check if we can move the stack update instruction up the epilogue 1682 // past the callee saves. This will allow the move to LR instruction 1683 // to be executed before the restores of the callee saves which means 1684 // that the callee saves can hide the latency from the MTLR instrcution. 1685 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1686 if (stackUpdateCanBeMoved(MF)) { 1687 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1688 for (CalleeSavedInfo CSI : Info) { 1689 // If the callee saved register is spilled to another register abort the 1690 // stack update movement. 1691 if (CSI.isSpilledToReg()) { 1692 StackUpdateLoc = MBBI; 1693 break; 1694 } 1695 int FrIdx = CSI.getFrameIdx(); 1696 // If the frame index is not negative the callee saved info belongs to a 1697 // stack object that is not a fixed stack object. We ignore non-fixed 1698 // stack objects because we won't move the update of the stack pointer 1699 // past them. 1700 if (FrIdx >= 0) 1701 continue; 1702 1703 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1704 StackUpdateLoc--; 1705 else { 1706 // Abort the operation as we can't update all CSR restores. 1707 StackUpdateLoc = MBBI; 1708 break; 1709 } 1710 } 1711 } 1712 1713 if (FrameSize) { 1714 // In the prologue, the loaded (or persistent) stack pointer value is 1715 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1716 // zone add this offset back now. 1717 1718 // If the function has a base pointer, the stack pointer has been copied 1719 // to it so we can restore it by copying in the other direction. 1720 if (HasRedZone && HasBP) { 1721 BuildMI(MBB, MBBI, dl, OrInst, RBReg). 1722 addReg(BPReg). 1723 addReg(BPReg); 1724 } 1725 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1726 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1727 // call which invalidates the stack pointer value in SP(0). So we use the 1728 // value of R31 in this case. Similar situation exists with setjmp. 1729 else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { 1730 assert(HasFP && "Expecting a valid frame pointer."); 1731 if (!HasRedZone) 1732 RBReg = FPReg; 1733 if (!isLargeFrame) { 1734 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1735 .addReg(FPReg).addImm(FrameSize); 1736 } else { 1737 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize); 1738 BuildMI(MBB, MBBI, dl, AddInst) 1739 .addReg(RBReg) 1740 .addReg(FPReg) 1741 .addReg(ScratchReg); 1742 } 1743 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1744 if (HasRedZone) { 1745 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1746 .addReg(SPReg) 1747 .addImm(FrameSize); 1748 } else { 1749 // Make sure that adding FrameSize will not overflow the max offset 1750 // size. 1751 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1752 "Local offsets should be negative"); 1753 SPAdd = FrameSize; 1754 FPOffset += FrameSize; 1755 BPOffset += FrameSize; 1756 PBPOffset += FrameSize; 1757 } 1758 } else { 1759 // We don't want to use ScratchReg as a base register, because it 1760 // could happen to be R0. Use FP instead, but make sure to preserve it. 1761 if (!HasRedZone) { 1762 // If FP is not saved, copy it to ScratchReg. 1763 if (!HasFP) 1764 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1765 .addReg(FPReg) 1766 .addReg(FPReg); 1767 RBReg = FPReg; 1768 } 1769 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1770 .addImm(0) 1771 .addReg(SPReg); 1772 } 1773 } 1774 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1775 // If there is no red zone, ScratchReg may be needed for holding a useful 1776 // value (although not the base register). Make sure it is not overwritten 1777 // too early. 1778 1779 // If we need to restore both the LR and the CR and we only have one 1780 // available scratch register, we must do them one at a time. 1781 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1782 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1783 // is live here. 1784 assert(HasRedZone && "Expecting red zone"); 1785 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1786 .addImm(CRSaveOffset) 1787 .addReg(SPReg); 1788 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1789 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1790 .addReg(TempReg, getKillRegState(i == e-1)); 1791 } 1792 1793 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1794 // LR is stored in the caller's stack frame. ScratchReg will be needed 1795 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1796 // a base register anyway, because it may happen to be R0. 1797 bool LoadedLR = false; 1798 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1799 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1800 .addImm(LROffset+SPAdd) 1801 .addReg(RBReg); 1802 LoadedLR = true; 1803 } 1804 1805 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1806 assert(RBReg == SPReg && "Should be using SP as a base register"); 1807 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1808 .addImm(CRSaveOffset) 1809 .addReg(RBReg); 1810 } 1811 1812 if (HasFP) { 1813 // If there is red zone, restore FP directly, since SP has already been 1814 // restored. Otherwise, restore the value of FP into ScratchReg. 1815 if (HasRedZone || RBReg == SPReg) 1816 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1817 .addImm(FPOffset) 1818 .addReg(SPReg); 1819 else 1820 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1821 .addImm(FPOffset) 1822 .addReg(RBReg); 1823 } 1824 1825 if (FI->usesPICBase()) 1826 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1827 .addImm(PBPOffset) 1828 .addReg(RBReg); 1829 1830 if (HasBP) 1831 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1832 .addImm(BPOffset) 1833 .addReg(RBReg); 1834 1835 // There is nothing more to be loaded from the stack, so now we can 1836 // restore SP: SP = RBReg + SPAdd. 1837 if (RBReg != SPReg || SPAdd != 0) { 1838 assert(!HasRedZone && "This should not happen with red zone"); 1839 // If SPAdd is 0, generate a copy. 1840 if (SPAdd == 0) 1841 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1842 .addReg(RBReg) 1843 .addReg(RBReg); 1844 else 1845 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1846 .addReg(RBReg) 1847 .addImm(SPAdd); 1848 1849 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1850 if (RBReg == FPReg) 1851 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1852 .addReg(ScratchReg) 1853 .addReg(ScratchReg); 1854 1855 // Now load the LR from the caller's stack frame. 1856 if (MustSaveLR && !LoadedLR) 1857 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1858 .addImm(LROffset) 1859 .addReg(SPReg); 1860 } 1861 1862 if (MustSaveCR && 1863 !(SingleScratchReg && MustSaveLR)) 1864 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1865 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1866 .addReg(TempReg, getKillRegState(i == e-1)); 1867 1868 if (MustSaveLR) { 1869 // If ROP protection is required, an extra instruction is added to compute a 1870 // hash and then compare it to the hash stored in the prologue. 1871 if (HasROPProtect) { 1872 const int SaveIndex = FI->getROPProtectionHashSaveIndex(); 1873 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex); 1874 assert((ImmOffset <= -8 && ImmOffset >= -512) && 1875 "ROP hash check location offset out of range."); 1876 assert(((ImmOffset & 0x7) == 0) && 1877 "ROP hash check location offset must be 8 byte aligned."); 1878 BuildMI(MBB, StackUpdateLoc, dl, HashChk) 1879 .addReg(ScratchReg) 1880 .addImm(ImmOffset) 1881 .addReg(SPReg); 1882 } 1883 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1884 } 1885 1886 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1887 // call optimization 1888 if (IsReturnBlock) { 1889 unsigned RetOpcode = MBBI->getOpcode(); 1890 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1891 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1892 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1893 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1894 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1895 1896 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1897 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1898 .addReg(SPReg).addImm(CallerAllocatedAmt); 1899 } else { 1900 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1901 .addImm(CallerAllocatedAmt >> 16); 1902 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1903 .addReg(ScratchReg, RegState::Kill) 1904 .addImm(CallerAllocatedAmt & 0xFFFF); 1905 BuildMI(MBB, MBBI, dl, AddInst) 1906 .addReg(SPReg) 1907 .addReg(FPReg) 1908 .addReg(ScratchReg); 1909 } 1910 } else { 1911 createTailCallBranchInstr(MBB); 1912 } 1913 } 1914 } 1915 1916 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1917 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1918 1919 // If we got this far a first terminator should exist. 1920 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1921 1922 DebugLoc dl = MBBI->getDebugLoc(); 1923 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1924 1925 // Create branch instruction for pseudo tail call return instruction. 1926 // The TCRETURNdi variants are direct calls. Valid targets for those are 1927 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1928 // since we can tail call external functions with PC-Rel (i.e. we don't need 1929 // to worry about different TOC pointers). Some of the external functions will 1930 // be MO_GlobalAddress while others like memcpy for example, are going to 1931 // be MO_ExternalSymbol. 1932 unsigned RetOpcode = MBBI->getOpcode(); 1933 if (RetOpcode == PPC::TCRETURNdi) { 1934 MBBI = MBB.getLastNonDebugInstr(); 1935 MachineOperand &JumpTarget = MBBI->getOperand(0); 1936 if (JumpTarget.isGlobal()) 1937 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1938 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1939 else if (JumpTarget.isSymbol()) 1940 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1941 addExternalSymbol(JumpTarget.getSymbolName()); 1942 else 1943 llvm_unreachable("Expecting Global or External Symbol"); 1944 } else if (RetOpcode == PPC::TCRETURNri) { 1945 MBBI = MBB.getLastNonDebugInstr(); 1946 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1947 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1948 } else if (RetOpcode == PPC::TCRETURNai) { 1949 MBBI = MBB.getLastNonDebugInstr(); 1950 MachineOperand &JumpTarget = MBBI->getOperand(0); 1951 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1952 } else if (RetOpcode == PPC::TCRETURNdi8) { 1953 MBBI = MBB.getLastNonDebugInstr(); 1954 MachineOperand &JumpTarget = MBBI->getOperand(0); 1955 if (JumpTarget.isGlobal()) 1956 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1957 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1958 else if (JumpTarget.isSymbol()) 1959 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1960 addExternalSymbol(JumpTarget.getSymbolName()); 1961 else 1962 llvm_unreachable("Expecting Global or External Symbol"); 1963 } else if (RetOpcode == PPC::TCRETURNri8) { 1964 MBBI = MBB.getLastNonDebugInstr(); 1965 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1966 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1967 } else if (RetOpcode == PPC::TCRETURNai8) { 1968 MBBI = MBB.getLastNonDebugInstr(); 1969 MachineOperand &JumpTarget = MBBI->getOperand(0); 1970 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1971 } 1972 } 1973 1974 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1975 BitVector &SavedRegs, 1976 RegScavenger *RS) const { 1977 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1978 1979 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1980 1981 // Do not explicitly save the callee saved VSRp registers. 1982 // The individual VSR subregisters will be saved instead. 1983 SavedRegs.reset(PPC::VSRp26); 1984 SavedRegs.reset(PPC::VSRp27); 1985 SavedRegs.reset(PPC::VSRp28); 1986 SavedRegs.reset(PPC::VSRp29); 1987 SavedRegs.reset(PPC::VSRp30); 1988 SavedRegs.reset(PPC::VSRp31); 1989 1990 // Save and clear the LR state. 1991 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1992 unsigned LR = RegInfo->getRARegister(); 1993 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1994 SavedRegs.reset(LR); 1995 1996 // Save R31 if necessary 1997 int FPSI = FI->getFramePointerSaveIndex(); 1998 const bool isPPC64 = Subtarget.isPPC64(); 1999 MachineFrameInfo &MFI = MF.getFrameInfo(); 2000 2001 // If the frame pointer save index hasn't been defined yet. 2002 if (!FPSI && needsFP(MF)) { 2003 // Find out what the fix offset of the frame pointer save area. 2004 int FPOffset = getFramePointerSaveOffset(); 2005 // Allocate the frame index for frame pointer save area. 2006 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 2007 // Save the result. 2008 FI->setFramePointerSaveIndex(FPSI); 2009 } 2010 2011 int BPSI = FI->getBasePointerSaveIndex(); 2012 if (!BPSI && RegInfo->hasBasePointer(MF)) { 2013 int BPOffset = getBasePointerSaveOffset(); 2014 // Allocate the frame index for the base pointer save area. 2015 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 2016 // Save the result. 2017 FI->setBasePointerSaveIndex(BPSI); 2018 } 2019 2020 // Reserve stack space for the PIC Base register (R30). 2021 // Only used in SVR4 32-bit. 2022 if (FI->usesPICBase()) { 2023 int PBPSI = MFI.CreateFixedObject(4, -8, true); 2024 FI->setPICBasePointerSaveIndex(PBPSI); 2025 } 2026 2027 // Make sure we don't explicitly spill r31, because, for example, we have 2028 // some inline asm which explicitly clobbers it, when we otherwise have a 2029 // frame pointer and are using r31's spill slot for the prologue/epilogue 2030 // code. Same goes for the base pointer and the PIC base register. 2031 if (needsFP(MF)) 2032 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 2033 if (RegInfo->hasBasePointer(MF)) 2034 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 2035 if (FI->usesPICBase()) 2036 SavedRegs.reset(PPC::R30); 2037 2038 // Reserve stack space to move the linkage area to in case of a tail call. 2039 int TCSPDelta = 0; 2040 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2041 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 2042 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 2043 } 2044 2045 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 2046 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 2047 // object at the offset of the CR-save slot in the linkage area. The actual 2048 // save and restore of the condition register will be created as part of the 2049 // prologue and epilogue insertion, but the FixedStack object is needed to 2050 // keep the CalleSavedInfo valid. 2051 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 2052 SavedRegs.test(PPC::CR4))) { 2053 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 2054 const int64_t SpillOffset = 2055 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 2056 int FrameIdx = 2057 MFI.CreateFixedObject(SpillSize, SpillOffset, 2058 /* IsImmutable */ true, /* IsAliased */ false); 2059 FI->setCRSpillFrameIndex(FrameIdx); 2060 } 2061 } 2062 2063 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 2064 RegScavenger *RS) const { 2065 // Get callee saved register information. 2066 MachineFrameInfo &MFI = MF.getFrameInfo(); 2067 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 2068 2069 // If the function is shrink-wrapped, and if the function has a tail call, the 2070 // tail call might not be in the new RestoreBlock, so real branch instruction 2071 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 2072 // RestoreBlock. So we handle this case here. 2073 if (MFI.getSavePoint() && MFI.hasTailCall()) { 2074 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 2075 for (MachineBasicBlock &MBB : MF) { 2076 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 2077 createTailCallBranchInstr(MBB); 2078 } 2079 } 2080 2081 // Early exit if no callee saved registers are modified! 2082 if (CSI.empty() && !needsFP(MF)) { 2083 addScavengingSpillSlot(MF, RS); 2084 return; 2085 } 2086 2087 unsigned MinGPR = PPC::R31; 2088 unsigned MinG8R = PPC::X31; 2089 unsigned MinFPR = PPC::F31; 2090 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 2091 2092 bool HasGPSaveArea = false; 2093 bool HasG8SaveArea = false; 2094 bool HasFPSaveArea = false; 2095 bool HasVRSaveArea = false; 2096 2097 SmallVector<CalleeSavedInfo, 18> GPRegs; 2098 SmallVector<CalleeSavedInfo, 18> G8Regs; 2099 SmallVector<CalleeSavedInfo, 18> FPRegs; 2100 SmallVector<CalleeSavedInfo, 18> VRegs; 2101 2102 for (const CalleeSavedInfo &I : CSI) { 2103 Register Reg = I.getReg(); 2104 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 2105 (Reg != PPC::X2 && Reg != PPC::R2)) && 2106 "Not expecting to try to spill R2 in a function that must save TOC"); 2107 if (PPC::GPRCRegClass.contains(Reg)) { 2108 HasGPSaveArea = true; 2109 2110 GPRegs.push_back(I); 2111 2112 if (Reg < MinGPR) { 2113 MinGPR = Reg; 2114 } 2115 } else if (PPC::G8RCRegClass.contains(Reg)) { 2116 HasG8SaveArea = true; 2117 2118 G8Regs.push_back(I); 2119 2120 if (Reg < MinG8R) { 2121 MinG8R = Reg; 2122 } 2123 } else if (PPC::F8RCRegClass.contains(Reg)) { 2124 HasFPSaveArea = true; 2125 2126 FPRegs.push_back(I); 2127 2128 if (Reg < MinFPR) { 2129 MinFPR = Reg; 2130 } 2131 } else if (PPC::CRBITRCRegClass.contains(Reg) || 2132 PPC::CRRCRegClass.contains(Reg)) { 2133 ; // do nothing, as we already know whether CRs are spilled 2134 } else if (PPC::VRRCRegClass.contains(Reg) || 2135 PPC::SPERCRegClass.contains(Reg)) { 2136 // Altivec and SPE are mutually exclusive, but have the same stack 2137 // alignment requirements, so overload the save area for both cases. 2138 HasVRSaveArea = true; 2139 2140 VRegs.push_back(I); 2141 2142 if (Reg < MinVR) { 2143 MinVR = Reg; 2144 } 2145 } else { 2146 llvm_unreachable("Unknown RegisterClass!"); 2147 } 2148 } 2149 2150 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 2151 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2152 2153 int64_t LowerBound = 0; 2154 2155 // Take into account stack space reserved for tail calls. 2156 int TCSPDelta = 0; 2157 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2158 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 2159 LowerBound = TCSPDelta; 2160 } 2161 2162 // The Floating-point register save area is right below the back chain word 2163 // of the previous stack frame. 2164 if (HasFPSaveArea) { 2165 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 2166 int FI = FPRegs[i].getFrameIdx(); 2167 2168 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2169 } 2170 2171 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 2172 } 2173 2174 // Check whether the frame pointer register is allocated. If so, make sure it 2175 // is spilled to the correct offset. 2176 if (needsFP(MF)) { 2177 int FI = PFI->getFramePointerSaveIndex(); 2178 assert(FI && "No Frame Pointer Save Slot!"); 2179 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2180 // FP is R31/X31, so no need to update MinGPR/MinG8R. 2181 HasGPSaveArea = true; 2182 } 2183 2184 if (PFI->usesPICBase()) { 2185 int FI = PFI->getPICBasePointerSaveIndex(); 2186 assert(FI && "No PIC Base Pointer Save Slot!"); 2187 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2188 2189 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 2190 HasGPSaveArea = true; 2191 } 2192 2193 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2194 if (RegInfo->hasBasePointer(MF)) { 2195 int FI = PFI->getBasePointerSaveIndex(); 2196 assert(FI && "No Base Pointer Save Slot!"); 2197 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2198 2199 Register BP = RegInfo->getBaseRegister(MF); 2200 if (PPC::G8RCRegClass.contains(BP)) { 2201 MinG8R = std::min<unsigned>(MinG8R, BP); 2202 HasG8SaveArea = true; 2203 } else if (PPC::GPRCRegClass.contains(BP)) { 2204 MinGPR = std::min<unsigned>(MinGPR, BP); 2205 HasGPSaveArea = true; 2206 } 2207 } 2208 2209 // General register save area starts right below the Floating-point 2210 // register save area. 2211 if (HasGPSaveArea || HasG8SaveArea) { 2212 // Move general register save area spill slots down, taking into account 2213 // the size of the Floating-point register save area. 2214 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 2215 if (!GPRegs[i].isSpilledToReg()) { 2216 int FI = GPRegs[i].getFrameIdx(); 2217 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2218 } 2219 } 2220 2221 // Move general register save area spill slots down, taking into account 2222 // the size of the Floating-point register save area. 2223 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2224 if (!G8Regs[i].isSpilledToReg()) { 2225 int FI = G8Regs[i].getFrameIdx(); 2226 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2227 } 2228 } 2229 2230 unsigned MinReg = 2231 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2232 TRI->getEncodingValue(MinG8R)); 2233 2234 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2235 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2236 } 2237 2238 // For 32-bit only, the CR save area is below the general register 2239 // save area. For 64-bit SVR4, the CR save area is addressed relative 2240 // to the stack pointer and hence does not need an adjustment here. 2241 // Only CR2 (the first nonvolatile spilled) has an associated frame 2242 // index so that we have a single uniform save area. 2243 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2244 // Adjust the frame index of the CR spill slot. 2245 for (const auto &CSInfo : CSI) { 2246 if (CSInfo.getReg() == PPC::CR2) { 2247 int FI = CSInfo.getFrameIdx(); 2248 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2249 break; 2250 } 2251 } 2252 2253 LowerBound -= 4; // The CR save area is always 4 bytes long. 2254 } 2255 2256 // Both Altivec and SPE have the same alignment and padding requirements 2257 // within the stack frame. 2258 if (HasVRSaveArea) { 2259 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2260 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2261 // we are using negative number here (the stack grows downward). We should 2262 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2263 // is the alignment size ( n = 16 here) and y is the size after aligning. 2264 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2265 LowerBound &= ~(15); 2266 2267 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2268 int FI = VRegs[i].getFrameIdx(); 2269 2270 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2271 } 2272 } 2273 2274 addScavengingSpillSlot(MF, RS); 2275 } 2276 2277 void 2278 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2279 RegScavenger *RS) const { 2280 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2281 // a large stack, which will require scavenging a register to materialize a 2282 // large offset. 2283 2284 // We need to have a scavenger spill slot for spills if the frame size is 2285 // large. In case there is no free register for large-offset addressing, 2286 // this slot is used for the necessary emergency spill. Also, we need the 2287 // slot for dynamic stack allocations. 2288 2289 // The scavenger might be invoked if the frame offset does not fit into 2290 // the 16-bit immediate. We don't know the complete frame size here 2291 // because we've not yet computed callee-saved register spills or the 2292 // needed alignment padding. 2293 unsigned StackSize = determineFrameLayout(MF, true); 2294 MachineFrameInfo &MFI = MF.getFrameInfo(); 2295 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || 2296 (hasSpills(MF) && !isInt<16>(StackSize))) { 2297 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2298 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2299 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2300 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2301 unsigned Size = TRI.getSpillSize(RC); 2302 Align Alignment = TRI.getSpillAlign(RC); 2303 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2304 2305 // Might we have over-aligned allocas? 2306 bool HasAlVars = 2307 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2308 2309 // These kinds of spills might need two registers. 2310 if (spillsCR(MF) || HasAlVars) 2311 RS->addScavengingFrameIndex( 2312 MFI.CreateStackObject(Size, Alignment, false)); 2313 } 2314 } 2315 2316 // This function checks if a callee saved gpr can be spilled to a volatile 2317 // vector register. This occurs for leaf functions when the option 2318 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2319 // which were not spilled to vectors, return false so the target independent 2320 // code can handle them by assigning a FrameIdx to a stack slot. 2321 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2322 MachineFunction &MF, const TargetRegisterInfo *TRI, 2323 std::vector<CalleeSavedInfo> &CSI) const { 2324 2325 if (CSI.empty()) 2326 return true; // Early exit if no callee saved registers are modified! 2327 2328 // Early exit if cannot spill gprs to volatile vector registers. 2329 MachineFrameInfo &MFI = MF.getFrameInfo(); 2330 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2331 return false; 2332 2333 // Build a BitVector of VSRs that can be used for spilling GPRs. 2334 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2335 BitVector BVCalleeSaved(TRI->getNumRegs()); 2336 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2337 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2338 for (unsigned i = 0; CSRegs[i]; ++i) 2339 BVCalleeSaved.set(CSRegs[i]); 2340 2341 for (unsigned Reg : BVAllocatable.set_bits()) { 2342 // Set to 0 if the register is not a volatile VSX register, or if it is 2343 // used in the function. 2344 if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || 2345 MF.getRegInfo().isPhysRegUsed(Reg)) 2346 BVAllocatable.reset(Reg); 2347 } 2348 2349 bool AllSpilledToReg = true; 2350 unsigned LastVSRUsedForSpill = 0; 2351 for (auto &CS : CSI) { 2352 if (BVAllocatable.none()) 2353 return false; 2354 2355 Register Reg = CS.getReg(); 2356 2357 if (!PPC::G8RCRegClass.contains(Reg)) { 2358 AllSpilledToReg = false; 2359 continue; 2360 } 2361 2362 // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs 2363 // into one VSR using the mtvsrdd instruction. 2364 if (LastVSRUsedForSpill != 0) { 2365 CS.setDstReg(LastVSRUsedForSpill); 2366 BVAllocatable.reset(LastVSRUsedForSpill); 2367 LastVSRUsedForSpill = 0; 2368 continue; 2369 } 2370 2371 unsigned VolatileVFReg = BVAllocatable.find_first(); 2372 if (VolatileVFReg < BVAllocatable.size()) { 2373 CS.setDstReg(VolatileVFReg); 2374 LastVSRUsedForSpill = VolatileVFReg; 2375 } else { 2376 AllSpilledToReg = false; 2377 } 2378 } 2379 return AllSpilledToReg; 2380 } 2381 2382 bool PPCFrameLowering::spillCalleeSavedRegisters( 2383 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2384 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2385 2386 MachineFunction *MF = MBB.getParent(); 2387 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2388 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2389 bool MustSaveTOC = FI->mustSaveTOC(); 2390 DebugLoc DL; 2391 bool CRSpilled = false; 2392 MachineInstrBuilder CRMIB; 2393 BitVector Spilled(TRI->getNumRegs()); 2394 2395 VSRContainingGPRs.clear(); 2396 2397 // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one 2398 // or two GPRs, so we need table to record information for later save/restore. 2399 for (const CalleeSavedInfo &Info : CSI) { 2400 if (Info.isSpilledToReg()) { 2401 auto &SpilledVSR = 2402 VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second; 2403 assert(SpilledVSR.second == 0 && 2404 "Can't spill more than two GPRs into VSR!"); 2405 if (SpilledVSR.first == 0) 2406 SpilledVSR.first = Info.getReg(); 2407 else 2408 SpilledVSR.second = Info.getReg(); 2409 } 2410 } 2411 2412 for (const CalleeSavedInfo &I : CSI) { 2413 Register Reg = I.getReg(); 2414 2415 // CR2 through CR4 are the nonvolatile CR fields. 2416 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2417 2418 // Add the callee-saved register as live-in; it's killed at the spill. 2419 // Do not do this for callee-saved registers that are live-in to the 2420 // function because they will already be marked live-in and this will be 2421 // adding it for a second time. It is an error to add the same register 2422 // to the set more than once. 2423 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2424 bool IsLiveIn = MRI.isLiveIn(Reg); 2425 if (!IsLiveIn) 2426 MBB.addLiveIn(Reg); 2427 2428 if (CRSpilled && IsCRField) { 2429 CRMIB.addReg(Reg, RegState::ImplicitKill); 2430 continue; 2431 } 2432 2433 // The actual spill will happen in the prologue. 2434 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2435 continue; 2436 2437 // Insert the spill to the stack frame. 2438 if (IsCRField) { 2439 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2440 if (!Subtarget.is32BitELFABI()) { 2441 // The actual spill will happen at the start of the prologue. 2442 FuncInfo->addMustSaveCR(Reg); 2443 } else { 2444 CRSpilled = true; 2445 FuncInfo->setSpillsCR(); 2446 2447 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2448 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2449 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2450 .addReg(Reg, RegState::ImplicitKill); 2451 2452 MBB.insert(MI, CRMIB); 2453 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2454 .addReg(PPC::R12, 2455 getKillRegState(true)), 2456 I.getFrameIdx())); 2457 } 2458 } else { 2459 if (I.isSpilledToReg()) { 2460 unsigned Dst = I.getDstReg(); 2461 2462 if (Spilled[Dst]) 2463 continue; 2464 2465 if (VSRContainingGPRs[Dst].second != 0) { 2466 assert(Subtarget.hasP9Vector() && 2467 "mtvsrdd is unavailable on pre-P9 targets."); 2468 2469 NumPESpillVSR += 2; 2470 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst) 2471 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)) 2472 .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true)); 2473 } else if (VSRContainingGPRs[Dst].second == 0) { 2474 assert(Subtarget.hasP8Vector() && 2475 "Can't move GPR to VSR on pre-P8 targets."); 2476 2477 ++NumPESpillVSR; 2478 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), 2479 TRI->getSubReg(Dst, PPC::sub_64)) 2480 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)); 2481 } else { 2482 llvm_unreachable("More than two GPRs spilled to a VSR!"); 2483 } 2484 Spilled.set(Dst); 2485 } else { 2486 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2487 // Use !IsLiveIn for the kill flag. 2488 // We do not want to kill registers that are live in this function 2489 // before their use because they will become undefined registers. 2490 // Functions without NoUnwind need to preserve the order of elements in 2491 // saved vector registers. 2492 if (Subtarget.needsSwapsForVSXMemOps() && 2493 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2494 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2495 I.getFrameIdx(), RC, TRI); 2496 else 2497 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC, 2498 TRI, Register()); 2499 } 2500 } 2501 } 2502 return true; 2503 } 2504 2505 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2506 bool CR4Spilled, MachineBasicBlock &MBB, 2507 MachineBasicBlock::iterator MI, 2508 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2509 2510 MachineFunction *MF = MBB.getParent(); 2511 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2512 DebugLoc DL; 2513 unsigned MoveReg = PPC::R12; 2514 2515 // 32-bit: FP-relative 2516 MBB.insert(MI, 2517 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2518 CSI[CSIIndex].getFrameIdx())); 2519 2520 unsigned RestoreOp = PPC::MTOCRF; 2521 if (CR2Spilled) 2522 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2523 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2524 2525 if (CR3Spilled) 2526 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2527 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2528 2529 if (CR4Spilled) 2530 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2531 .addReg(MoveReg, getKillRegState(true))); 2532 } 2533 2534 MachineBasicBlock::iterator PPCFrameLowering:: 2535 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2536 MachineBasicBlock::iterator I) const { 2537 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2538 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2539 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2540 // Add (actually subtract) back the amount the callee popped on return. 2541 if (int CalleeAmt = I->getOperand(1).getImm()) { 2542 bool is64Bit = Subtarget.isPPC64(); 2543 CalleeAmt *= -1; 2544 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2545 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2546 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2547 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2548 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2549 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2550 const DebugLoc &dl = I->getDebugLoc(); 2551 2552 if (isInt<16>(CalleeAmt)) { 2553 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2554 .addReg(StackReg, RegState::Kill) 2555 .addImm(CalleeAmt); 2556 } else { 2557 MachineBasicBlock::iterator MBBI = I; 2558 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2559 .addImm(CalleeAmt >> 16); 2560 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2561 .addReg(TmpReg, RegState::Kill) 2562 .addImm(CalleeAmt & 0xFFFF); 2563 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2564 .addReg(StackReg, RegState::Kill) 2565 .addReg(TmpReg); 2566 } 2567 } 2568 } 2569 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2570 return MBB.erase(I); 2571 } 2572 2573 static bool isCalleeSavedCR(unsigned Reg) { 2574 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2575 } 2576 2577 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2578 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2579 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2580 MachineFunction *MF = MBB.getParent(); 2581 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2582 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2583 bool MustSaveTOC = FI->mustSaveTOC(); 2584 bool CR2Spilled = false; 2585 bool CR3Spilled = false; 2586 bool CR4Spilled = false; 2587 unsigned CSIIndex = 0; 2588 BitVector Restored(TRI->getNumRegs()); 2589 2590 // Initialize insertion-point logic; we will be restoring in reverse 2591 // order of spill. 2592 MachineBasicBlock::iterator I = MI, BeforeI = I; 2593 bool AtStart = I == MBB.begin(); 2594 2595 if (!AtStart) 2596 --BeforeI; 2597 2598 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2599 Register Reg = CSI[i].getReg(); 2600 2601 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2602 continue; 2603 2604 // Restore of callee saved condition register field is handled during 2605 // epilogue insertion. 2606 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2607 continue; 2608 2609 if (Reg == PPC::CR2) { 2610 CR2Spilled = true; 2611 // The spill slot is associated only with CR2, which is the 2612 // first nonvolatile spilled. Save it here. 2613 CSIIndex = i; 2614 continue; 2615 } else if (Reg == PPC::CR3) { 2616 CR3Spilled = true; 2617 continue; 2618 } else if (Reg == PPC::CR4) { 2619 CR4Spilled = true; 2620 continue; 2621 } else { 2622 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2623 // least one CR register, restore all spilled CRs together. 2624 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2625 bool is31 = needsFP(*MF); 2626 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2627 CSIIndex); 2628 CR2Spilled = CR3Spilled = CR4Spilled = false; 2629 } 2630 2631 if (CSI[i].isSpilledToReg()) { 2632 DebugLoc DL; 2633 unsigned Dst = CSI[i].getDstReg(); 2634 2635 if (Restored[Dst]) 2636 continue; 2637 2638 if (VSRContainingGPRs[Dst].second != 0) { 2639 assert(Subtarget.hasP9Vector()); 2640 NumPEReloadVSR += 2; 2641 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD), 2642 VSRContainingGPRs[Dst].second) 2643 .addReg(Dst); 2644 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), 2645 VSRContainingGPRs[Dst].first) 2646 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); 2647 } else if (VSRContainingGPRs[Dst].second == 0) { 2648 assert(Subtarget.hasP8Vector()); 2649 ++NumPEReloadVSR; 2650 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), 2651 VSRContainingGPRs[Dst].first) 2652 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); 2653 } else { 2654 llvm_unreachable("More than two GPRs spilled to a VSR!"); 2655 } 2656 2657 Restored.set(Dst); 2658 2659 } else { 2660 // Default behavior for non-CR saves. 2661 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2662 2663 // Functions without NoUnwind need to preserve the order of elements in 2664 // saved vector registers. 2665 if (Subtarget.needsSwapsForVSXMemOps() && 2666 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2667 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2668 TRI); 2669 else 2670 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI, 2671 Register()); 2672 2673 assert(I != MBB.begin() && 2674 "loadRegFromStackSlot didn't insert any code!"); 2675 } 2676 } 2677 2678 // Insert in reverse order. 2679 if (AtStart) 2680 I = MBB.begin(); 2681 else { 2682 I = BeforeI; 2683 ++I; 2684 } 2685 } 2686 2687 // If we haven't yet spilled the CRs, do so now. 2688 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2689 assert(Subtarget.is32BitELFABI() && 2690 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2691 bool is31 = needsFP(*MF); 2692 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2693 } 2694 2695 return true; 2696 } 2697 2698 uint64_t PPCFrameLowering::getTOCSaveOffset() const { 2699 return TOCSaveOffset; 2700 } 2701 2702 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const { 2703 return FramePointerSaveOffset; 2704 } 2705 2706 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const { 2707 return BasePointerSaveOffset; 2708 } 2709 2710 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2711 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2712 return false; 2713 return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI(); 2714 } 2715