1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MCTargetDesc/PPCPredicates.h" 14 #include "PPCFrameLowering.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/Target/TargetOptions.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "framelowering" 33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 35 STATISTIC(NumPrologProbed, "Number of prologues probed"); 36 37 static cl::opt<bool> 38 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 39 cl::desc("Enable spills in prologue to vector registers."), 40 cl::init(false), cl::Hidden); 41 42 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 43 if (STI.isAIXABI()) 44 return STI.isPPC64() ? 16 : 8; 45 // SVR4 ABI: 46 return STI.isPPC64() ? 16 : 4; 47 } 48 49 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isAIXABI()) 51 return STI.isPPC64() ? 40 : 20; 52 return STI.isELFv2ABI() ? 24 : 40; 53 } 54 55 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 56 // First slot in the general register save area. 57 return STI.isPPC64() ? -8U : -4U; 58 } 59 60 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 61 if (STI.isAIXABI() || STI.isPPC64()) 62 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 63 64 // 32-bit SVR4 ABI: 65 return 8; 66 } 67 68 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 69 // Third slot in the general purpose register save area. 70 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 71 return -12U; 72 73 // Second slot in the general purpose register save area. 74 return STI.isPPC64() ? -16U : -8U; 75 } 76 77 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 78 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 79 } 80 81 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 82 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 83 STI.getPlatformStackAlignment(), 0), 84 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 85 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 86 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 87 LinkageSize(computeLinkageSize(Subtarget)), 88 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 89 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 90 91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 93 unsigned &NumEntries) const { 94 95 // Floating-point register save area offsets. 96 #define CALLEE_SAVED_FPRS \ 97 {PPC::F31, -8}, \ 98 {PPC::F30, -16}, \ 99 {PPC::F29, -24}, \ 100 {PPC::F28, -32}, \ 101 {PPC::F27, -40}, \ 102 {PPC::F26, -48}, \ 103 {PPC::F25, -56}, \ 104 {PPC::F24, -64}, \ 105 {PPC::F23, -72}, \ 106 {PPC::F22, -80}, \ 107 {PPC::F21, -88}, \ 108 {PPC::F20, -96}, \ 109 {PPC::F19, -104}, \ 110 {PPC::F18, -112}, \ 111 {PPC::F17, -120}, \ 112 {PPC::F16, -128}, \ 113 {PPC::F15, -136}, \ 114 {PPC::F14, -144} 115 116 // 32-bit general purpose register save area offsets shared by ELF and 117 // AIX. AIX has an extra CSR with r13. 118 #define CALLEE_SAVED_GPRS32 \ 119 {PPC::R31, -4}, \ 120 {PPC::R30, -8}, \ 121 {PPC::R29, -12}, \ 122 {PPC::R28, -16}, \ 123 {PPC::R27, -20}, \ 124 {PPC::R26, -24}, \ 125 {PPC::R25, -28}, \ 126 {PPC::R24, -32}, \ 127 {PPC::R23, -36}, \ 128 {PPC::R22, -40}, \ 129 {PPC::R21, -44}, \ 130 {PPC::R20, -48}, \ 131 {PPC::R19, -52}, \ 132 {PPC::R18, -56}, \ 133 {PPC::R17, -60}, \ 134 {PPC::R16, -64}, \ 135 {PPC::R15, -68}, \ 136 {PPC::R14, -72} 137 138 // 64-bit general purpose register save area offsets. 139 #define CALLEE_SAVED_GPRS64 \ 140 {PPC::X31, -8}, \ 141 {PPC::X30, -16}, \ 142 {PPC::X29, -24}, \ 143 {PPC::X28, -32}, \ 144 {PPC::X27, -40}, \ 145 {PPC::X26, -48}, \ 146 {PPC::X25, -56}, \ 147 {PPC::X24, -64}, \ 148 {PPC::X23, -72}, \ 149 {PPC::X22, -80}, \ 150 {PPC::X21, -88}, \ 151 {PPC::X20, -96}, \ 152 {PPC::X19, -104}, \ 153 {PPC::X18, -112}, \ 154 {PPC::X17, -120}, \ 155 {PPC::X16, -128}, \ 156 {PPC::X15, -136}, \ 157 {PPC::X14, -144} 158 159 // Vector register save area offsets. 160 #define CALLEE_SAVED_VRS \ 161 {PPC::V31, -16}, \ 162 {PPC::V30, -32}, \ 163 {PPC::V29, -48}, \ 164 {PPC::V28, -64}, \ 165 {PPC::V27, -80}, \ 166 {PPC::V26, -96}, \ 167 {PPC::V25, -112}, \ 168 {PPC::V24, -128}, \ 169 {PPC::V23, -144}, \ 170 {PPC::V22, -160}, \ 171 {PPC::V21, -176}, \ 172 {PPC::V20, -192} 173 174 // Note that the offsets here overlap, but this is fixed up in 175 // processFunctionBeforeFrameFinalized. 176 177 static const SpillSlot ELFOffsets32[] = { 178 CALLEE_SAVED_FPRS, 179 CALLEE_SAVED_GPRS32, 180 181 // CR save area offset. We map each of the nonvolatile CR fields 182 // to the slot for CR2, which is the first of the nonvolatile CR 183 // fields to be assigned, so that we only allocate one save slot. 184 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 185 {PPC::CR2, -4}, 186 187 // VRSAVE save area offset. 188 {PPC::VRSAVE, -4}, 189 190 CALLEE_SAVED_VRS, 191 192 // SPE register save area (overlaps Vector save area). 193 {PPC::S31, -8}, 194 {PPC::S30, -16}, 195 {PPC::S29, -24}, 196 {PPC::S28, -32}, 197 {PPC::S27, -40}, 198 {PPC::S26, -48}, 199 {PPC::S25, -56}, 200 {PPC::S24, -64}, 201 {PPC::S23, -72}, 202 {PPC::S22, -80}, 203 {PPC::S21, -88}, 204 {PPC::S20, -96}, 205 {PPC::S19, -104}, 206 {PPC::S18, -112}, 207 {PPC::S17, -120}, 208 {PPC::S16, -128}, 209 {PPC::S15, -136}, 210 {PPC::S14, -144}}; 211 212 static const SpillSlot ELFOffsets64[] = { 213 CALLEE_SAVED_FPRS, 214 CALLEE_SAVED_GPRS64, 215 216 // VRSAVE save area offset. 217 {PPC::VRSAVE, -4}, 218 CALLEE_SAVED_VRS 219 }; 220 221 static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, 222 CALLEE_SAVED_GPRS32, 223 // Add AIX's extra CSR. 224 {PPC::R13, -76}, 225 CALLEE_SAVED_VRS}; 226 227 static const SpillSlot AIXOffsets64[] = { 228 CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; 229 230 if (Subtarget.is64BitELFABI()) { 231 NumEntries = array_lengthof(ELFOffsets64); 232 return ELFOffsets64; 233 } 234 235 if (Subtarget.is32BitELFABI()) { 236 NumEntries = array_lengthof(ELFOffsets32); 237 return ELFOffsets32; 238 } 239 240 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 241 242 if (Subtarget.isPPC64()) { 243 NumEntries = array_lengthof(AIXOffsets64); 244 return AIXOffsets64; 245 } 246 247 NumEntries = array_lengthof(AIXOffsets32); 248 return AIXOffsets32; 249 } 250 251 static bool spillsCR(const MachineFunction &MF) { 252 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 253 return FuncInfo->isCRSpilled(); 254 } 255 256 static bool hasSpills(const MachineFunction &MF) { 257 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 258 return FuncInfo->hasSpills(); 259 } 260 261 static bool hasNonRISpills(const MachineFunction &MF) { 262 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 263 return FuncInfo->hasNonRISpills(); 264 } 265 266 /// MustSaveLR - Return true if this function requires that we save the LR 267 /// register onto the stack in the prolog and restore it in the epilog of the 268 /// function. 269 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 270 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 271 272 // We need a save/restore of LR if there is any def of LR (which is 273 // defined by calls, including the PIC setup sequence), or if there is 274 // some use of the LR stack slot (e.g. for builtin_return_address). 275 // (LR comes in 32 and 64 bit versions.) 276 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 277 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 278 } 279 280 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 281 /// call frame size. Update the MachineFunction object with the stack size. 282 unsigned 283 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 284 bool UseEstimate) const { 285 unsigned NewMaxCallFrameSize = 0; 286 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 287 &NewMaxCallFrameSize); 288 MF.getFrameInfo().setStackSize(FrameSize); 289 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 290 return FrameSize; 291 } 292 293 /// determineFrameLayout - Determine the size of the frame and maximum call 294 /// frame size. 295 unsigned 296 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 297 bool UseEstimate, 298 unsigned *NewMaxCallFrameSize) const { 299 const MachineFrameInfo &MFI = MF.getFrameInfo(); 300 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 301 302 // Get the number of bytes to allocate from the FrameInfo 303 unsigned FrameSize = 304 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 305 306 // Get stack alignments. The frame must be aligned to the greatest of these: 307 Align TargetAlign = getStackAlign(); // alignment required per the ABI 308 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 309 Align Alignment = std::max(TargetAlign, MaxAlign); 310 311 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 312 313 unsigned LR = RegInfo->getRARegister(); 314 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 315 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 316 !MFI.adjustsStack() && // No calls. 317 !MustSaveLR(MF, LR) && // No need to save LR. 318 !FI->mustSaveTOC() && // No need to save TOC. 319 !RegInfo->hasBasePointer(MF); // No special alignment. 320 321 // Note: for PPC32 SVR4ABI, we can still generate stackless 322 // code if all local vars are reg-allocated. 323 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 324 325 // Check whether we can skip adjusting the stack pointer (by using red zone) 326 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 327 // No need for frame 328 return 0; 329 } 330 331 // Get the maximum call frame size of all the calls. 332 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 333 334 // Maximum call frame needs to be at least big enough for linkage area. 335 unsigned minCallFrameSize = getLinkageSize(); 336 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 337 338 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 339 // that allocations will be aligned. 340 if (MFI.hasVarSizedObjects()) 341 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 342 343 // Update the new max call frame size if the caller passes in a valid pointer. 344 if (NewMaxCallFrameSize) 345 *NewMaxCallFrameSize = maxCallFrameSize; 346 347 // Include call frame size in total. 348 FrameSize += maxCallFrameSize; 349 350 // Make sure the frame is aligned. 351 FrameSize = alignTo(FrameSize, Alignment); 352 353 return FrameSize; 354 } 355 356 // hasFP - Return true if the specified function actually has a dedicated frame 357 // pointer register. 358 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 359 const MachineFrameInfo &MFI = MF.getFrameInfo(); 360 // FIXME: This is pretty much broken by design: hasFP() might be called really 361 // early, before the stack layout was calculated and thus hasFP() might return 362 // true or false here depending on the time of call. 363 return (MFI.getStackSize()) && needsFP(MF); 364 } 365 366 // needsFP - Return true if the specified function should have a dedicated frame 367 // pointer register. This is true if the function has variable sized allocas or 368 // if frame pointer elimination is disabled. 369 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 370 const MachineFrameInfo &MFI = MF.getFrameInfo(); 371 372 // Naked functions have no stack frame pushed, so we don't have a frame 373 // pointer. 374 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 375 return false; 376 377 return MF.getTarget().Options.DisableFramePointerElim(MF) || 378 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 379 MF.exposesReturnsTwice() || 380 (MF.getTarget().Options.GuaranteedTailCallOpt && 381 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 382 } 383 384 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 385 bool is31 = needsFP(MF); 386 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 387 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 388 389 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 390 bool HasBP = RegInfo->hasBasePointer(MF); 391 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 392 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 393 394 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 395 BI != BE; ++BI) 396 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 397 --MBBI; 398 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 399 MachineOperand &MO = MBBI->getOperand(I); 400 if (!MO.isReg()) 401 continue; 402 403 switch (MO.getReg()) { 404 case PPC::FP: 405 MO.setReg(FPReg); 406 break; 407 case PPC::FP8: 408 MO.setReg(FP8Reg); 409 break; 410 case PPC::BP: 411 MO.setReg(BPReg); 412 break; 413 case PPC::BP8: 414 MO.setReg(BP8Reg); 415 break; 416 417 } 418 } 419 } 420 } 421 422 /* This function will do the following: 423 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 424 respectively (defaults recommended by the ABI) and return true 425 - If MBB is not an entry block, initialize the register scavenger and look 426 for available registers. 427 - If the defaults (R0/R12) are available, return true 428 - If TwoUniqueRegsRequired is set to true, it looks for two unique 429 registers. Otherwise, look for a single available register. 430 - If the required registers are found, set SR1 and SR2 and return true. 431 - If the required registers are not found, set SR2 or both SR1 and SR2 to 432 PPC::NoRegister and return false. 433 434 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 435 is not set, this function will attempt to find two different registers, but 436 still return true if only one register is available (and set SR1 == SR2). 437 */ 438 bool 439 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 440 bool UseAtEnd, 441 bool TwoUniqueRegsRequired, 442 Register *SR1, 443 Register *SR2) const { 444 RegScavenger RS; 445 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 446 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 447 448 // Set the defaults for the two scratch registers. 449 if (SR1) 450 *SR1 = R0; 451 452 if (SR2) { 453 assert (SR1 && "Asking for the second scratch register but not the first?"); 454 *SR2 = R12; 455 } 456 457 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 458 if ((UseAtEnd && MBB->isReturnBlock()) || 459 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 460 return true; 461 462 RS.enterBasicBlock(*MBB); 463 464 if (UseAtEnd && !MBB->empty()) { 465 // The scratch register will be used at the end of the block, so must 466 // consider all registers used within the block 467 468 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 469 // If no terminator, back iterator up to previous instruction. 470 if (MBBI == MBB->end()) 471 MBBI = std::prev(MBBI); 472 473 if (MBBI != MBB->begin()) 474 RS.forward(MBBI); 475 } 476 477 // If the two registers are available, we're all good. 478 // Note that we only return here if both R0 and R12 are available because 479 // although the function may not require two unique registers, it may benefit 480 // from having two so we should try to provide them. 481 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 482 return true; 483 484 // Get the list of callee-saved registers for the target. 485 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 486 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 487 488 // Get all the available registers in the block. 489 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 490 &PPC::GPRCRegClass); 491 492 // We shouldn't use callee-saved registers as scratch registers as they may be 493 // available when looking for a candidate block for shrink wrapping but not 494 // available when the actual prologue/epilogue is being emitted because they 495 // were added as live-in to the prologue block by PrologueEpilogueInserter. 496 for (int i = 0; CSRegs[i]; ++i) 497 BV.reset(CSRegs[i]); 498 499 // Set the first scratch register to the first available one. 500 if (SR1) { 501 int FirstScratchReg = BV.find_first(); 502 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 503 } 504 505 // If there is another one available, set the second scratch register to that. 506 // Otherwise, set it to either PPC::NoRegister if this function requires two 507 // or to whatever SR1 is set to if this function doesn't require two. 508 if (SR2) { 509 int SecondScratchReg = BV.find_next(*SR1); 510 if (SecondScratchReg != -1) 511 *SR2 = SecondScratchReg; 512 else 513 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 514 } 515 516 // Now that we've done our best to provide both registers, double check 517 // whether we were unable to provide enough. 518 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 519 return false; 520 521 return true; 522 } 523 524 // We need a scratch register for spilling LR and for spilling CR. By default, 525 // we use two scratch registers to hide latency. However, if only one scratch 526 // register is available, we can adjust for that by not overlapping the spill 527 // code. However, if we need to realign the stack (i.e. have a base pointer) 528 // and the stack frame is large, we need two scratch registers. 529 // Also, stack probe requires two scratch registers, one for old sp, one for 530 // large frame and large probe size. 531 bool 532 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 533 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 534 MachineFunction &MF = *(MBB->getParent()); 535 bool HasBP = RegInfo->hasBasePointer(MF); 536 unsigned FrameSize = determineFrameLayout(MF); 537 int NegFrameSize = -FrameSize; 538 bool IsLargeFrame = !isInt<16>(NegFrameSize); 539 MachineFrameInfo &MFI = MF.getFrameInfo(); 540 Align MaxAlign = MFI.getMaxAlign(); 541 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 542 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 543 544 return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || 545 TLI.hasInlineStackProbe(MF); 546 } 547 548 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 549 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 550 551 return findScratchRegister(TmpMBB, false, 552 twoUniqueScratchRegsRequired(TmpMBB)); 553 } 554 555 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 556 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 557 558 return findScratchRegister(TmpMBB, true); 559 } 560 561 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 562 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 563 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 564 565 // Abort if there is no register info or function info. 566 if (!RegInfo || !FI) 567 return false; 568 569 // Only move the stack update on ELFv2 ABI and PPC64. 570 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 571 return false; 572 573 // Check the frame size first and return false if it does not fit the 574 // requirements. 575 // We need a non-zero frame size as well as a frame that will fit in the red 576 // zone. This is because by moving the stack pointer update we are now storing 577 // to the red zone until the stack pointer is updated. If we get an interrupt 578 // inside the prologue but before the stack update we now have a number of 579 // stores to the red zone and those stores must all fit. 580 MachineFrameInfo &MFI = MF.getFrameInfo(); 581 unsigned FrameSize = MFI.getStackSize(); 582 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 583 return false; 584 585 // Frame pointers and base pointers complicate matters so don't do anything 586 // if we have them. For example having a frame pointer will sometimes require 587 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 588 // difficult. Similar situation exists with setjmp. 589 if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) 590 return false; 591 592 // Calls to fast_cc functions use different rules for passing parameters on 593 // the stack from the ABI and using PIC base in the function imposes 594 // similar restrictions to using the base pointer. It is not generally safe 595 // to move the stack pointer update in these situations. 596 if (FI->hasFastCall() || FI->usesPICBase()) 597 return false; 598 599 // Finally we can move the stack update if we do not require register 600 // scavenging. Register scavenging can introduce more spills and so 601 // may make the frame size larger than we have computed. 602 return !RegInfo->requiresFrameIndexScavenging(MF); 603 } 604 605 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 606 MachineBasicBlock &MBB) const { 607 MachineBasicBlock::iterator MBBI = MBB.begin(); 608 MachineFrameInfo &MFI = MF.getFrameInfo(); 609 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 610 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 611 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 612 613 MachineModuleInfo &MMI = MF.getMMI(); 614 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 615 DebugLoc dl; 616 // AIX assembler does not support cfi directives. 617 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 618 619 // Get processor type. 620 bool isPPC64 = Subtarget.isPPC64(); 621 // Get the ABI. 622 bool isSVR4ABI = Subtarget.isSVR4ABI(); 623 bool isELFv2ABI = Subtarget.isELFv2ABI(); 624 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); 625 626 // Work out frame sizes. 627 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 628 int NegFrameSize = -FrameSize; 629 if (!isInt<32>(NegFrameSize)) 630 llvm_unreachable("Unhandled stack size!"); 631 632 if (MFI.isFrameAddressTaken()) 633 replaceFPWithRealFP(MF); 634 635 // Check if the link register (LR) must be saved. 636 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 637 bool MustSaveLR = FI->mustSaveLR(); 638 bool MustSaveTOC = FI->mustSaveTOC(); 639 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 640 bool MustSaveCR = !MustSaveCRs.empty(); 641 // Do we have a frame pointer and/or base pointer for this function? 642 bool HasFP = hasFP(MF); 643 bool HasBP = RegInfo->hasBasePointer(MF); 644 bool HasRedZone = isPPC64 || !isSVR4ABI; 645 646 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 647 Register BPReg = RegInfo->getBaseRegister(MF); 648 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 649 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 650 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 651 Register ScratchReg; 652 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 653 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 654 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 655 : PPC::MFLR ); 656 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 657 : PPC::STW ); 658 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 659 : PPC::STWU ); 660 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 661 : PPC::STWUX); 662 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 663 : PPC::LIS ); 664 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 665 : PPC::ORI ); 666 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 667 : PPC::OR ); 668 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 669 : PPC::SUBFC); 670 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 671 : PPC::SUBFIC); 672 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 673 : PPC::MFCR); 674 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 675 676 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 677 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 678 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 679 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 680 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 681 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 682 683 // Using the same bool variable as below to suppress compiler warnings. 684 bool SingleScratchReg = findScratchRegister( 685 &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); 686 assert(SingleScratchReg && 687 "Required number of registers not available in this block"); 688 689 SingleScratchReg = ScratchReg == TempReg; 690 691 int LROffset = getReturnSaveOffset(); 692 693 int FPOffset = 0; 694 if (HasFP) { 695 MachineFrameInfo &MFI = MF.getFrameInfo(); 696 int FPIndex = FI->getFramePointerSaveIndex(); 697 assert(FPIndex && "No Frame Pointer Save Slot!"); 698 FPOffset = MFI.getObjectOffset(FPIndex); 699 } 700 701 int BPOffset = 0; 702 if (HasBP) { 703 MachineFrameInfo &MFI = MF.getFrameInfo(); 704 int BPIndex = FI->getBasePointerSaveIndex(); 705 assert(BPIndex && "No Base Pointer Save Slot!"); 706 BPOffset = MFI.getObjectOffset(BPIndex); 707 } 708 709 int PBPOffset = 0; 710 if (FI->usesPICBase()) { 711 MachineFrameInfo &MFI = MF.getFrameInfo(); 712 int PBPIndex = FI->getPICBasePointerSaveIndex(); 713 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 714 PBPOffset = MFI.getObjectOffset(PBPIndex); 715 } 716 717 // Get stack alignments. 718 Align MaxAlign = MFI.getMaxAlign(); 719 if (HasBP && MaxAlign > 1) 720 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 721 722 // Frames of 32KB & larger require special handling because they cannot be 723 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 724 bool isLargeFrame = !isInt<16>(NegFrameSize); 725 726 // Check if we can move the stack update instruction (stdu) down the prologue 727 // past the callee saves. Hopefully this will avoid the situation where the 728 // saves are waiting for the update on the store with update to complete. 729 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 730 bool MovingStackUpdateDown = false; 731 732 // Check if we can move the stack update. 733 if (stackUpdateCanBeMoved(MF)) { 734 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 735 for (CalleeSavedInfo CSI : Info) { 736 int FrIdx = CSI.getFrameIdx(); 737 // If the frame index is not negative the callee saved info belongs to a 738 // stack object that is not a fixed stack object. We ignore non-fixed 739 // stack objects because we won't move the stack update pointer past them. 740 if (FrIdx >= 0) 741 continue; 742 743 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 744 StackUpdateLoc++; 745 MovingStackUpdateDown = true; 746 } else { 747 // We need all of the Frame Indices to meet these conditions. 748 // If they do not, abort the whole operation. 749 StackUpdateLoc = MBBI; 750 MovingStackUpdateDown = false; 751 break; 752 } 753 } 754 755 // If the operation was not aborted then update the object offset. 756 if (MovingStackUpdateDown) { 757 for (CalleeSavedInfo CSI : Info) { 758 int FrIdx = CSI.getFrameIdx(); 759 if (FrIdx < 0) 760 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 761 } 762 } 763 } 764 765 // Where in the prologue we move the CR fields depends on how many scratch 766 // registers we have, and if we need to save the link register or not. This 767 // lambda is to avoid duplicating the logic in 2 places. 768 auto BuildMoveFromCR = [&]() { 769 if (isELFv2ABI && MustSaveCRs.size() == 1) { 770 // In the ELFv2 ABI, we are not required to save all CR fields. 771 // If only one CR field is clobbered, it is more efficient to use 772 // mfocrf to selectively save just that field, because mfocrf has short 773 // latency compares to mfcr. 774 assert(isPPC64 && "V2 ABI is 64-bit only."); 775 MachineInstrBuilder MIB = 776 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 777 MIB.addReg(MustSaveCRs[0], RegState::Kill); 778 } else { 779 MachineInstrBuilder MIB = 780 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 781 for (unsigned CRfield : MustSaveCRs) 782 MIB.addReg(CRfield, RegState::ImplicitKill); 783 } 784 }; 785 786 // If we need to spill the CR and the LR but we don't have two separate 787 // registers available, we must spill them one at a time 788 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 789 BuildMoveFromCR(); 790 BuildMI(MBB, MBBI, dl, StoreWordInst) 791 .addReg(TempReg, getKillRegState(true)) 792 .addImm(CRSaveOffset) 793 .addReg(SPReg); 794 } 795 796 if (MustSaveLR) 797 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 798 799 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 800 BuildMoveFromCR(); 801 802 if (HasRedZone) { 803 if (HasFP) 804 BuildMI(MBB, MBBI, dl, StoreInst) 805 .addReg(FPReg) 806 .addImm(FPOffset) 807 .addReg(SPReg); 808 if (FI->usesPICBase()) 809 BuildMI(MBB, MBBI, dl, StoreInst) 810 .addReg(PPC::R30) 811 .addImm(PBPOffset) 812 .addReg(SPReg); 813 if (HasBP) 814 BuildMI(MBB, MBBI, dl, StoreInst) 815 .addReg(BPReg) 816 .addImm(BPOffset) 817 .addReg(SPReg); 818 } 819 820 if (MustSaveLR) 821 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 822 .addReg(ScratchReg, getKillRegState(true)) 823 .addImm(LROffset) 824 .addReg(SPReg); 825 826 if (MustSaveCR && 827 !(SingleScratchReg && MustSaveLR)) { 828 assert(HasRedZone && "A red zone is always available on PPC64"); 829 BuildMI(MBB, MBBI, dl, StoreWordInst) 830 .addReg(TempReg, getKillRegState(true)) 831 .addImm(CRSaveOffset) 832 .addReg(SPReg); 833 } 834 835 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 836 if (!FrameSize) 837 return; 838 839 // Adjust stack pointer: r1 += NegFrameSize. 840 // If there is a preferred stack alignment, align R1 now 841 842 if (HasBP && HasRedZone) { 843 // Save a copy of r1 as the base pointer. 844 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 845 .addReg(SPReg) 846 .addReg(SPReg); 847 } 848 849 // Have we generated a STUX instruction to claim stack frame? If so, 850 // the negated frame size will be placed in ScratchReg. 851 bool HasSTUX = false; 852 853 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 854 // pointer is always stored at SP, we will get a free probe due to an essential 855 // STU(X) instruction. 856 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 857 // To be consistent with other targets, a pseudo instruction is emitted and 858 // will be later expanded in `inlineStackProbe`. 859 BuildMI(MBB, MBBI, dl, 860 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 861 : PPC::PROBED_STACKALLOC_32)) 862 .addDef(ScratchReg) 863 .addDef(TempReg) // TempReg stores the old sp. 864 .addImm(NegFrameSize); 865 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 866 // update the ScratchReg to meet the assumption that ScratchReg contains 867 // the NegFrameSize. This solution is rather tricky. 868 if (!HasRedZone) { 869 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 870 .addReg(TempReg) 871 .addReg(SPReg); 872 HasSTUX = true; 873 } 874 } else { 875 // This condition must be kept in sync with canUseAsPrologue. 876 if (HasBP && MaxAlign > 1) { 877 if (isPPC64) 878 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 879 .addReg(SPReg) 880 .addImm(0) 881 .addImm(64 - Log2(MaxAlign)); 882 else // PPC32... 883 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 884 .addReg(SPReg) 885 .addImm(0) 886 .addImm(32 - Log2(MaxAlign)) 887 .addImm(31); 888 if (!isLargeFrame) { 889 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 890 .addReg(ScratchReg, RegState::Kill) 891 .addImm(NegFrameSize); 892 } else { 893 assert(!SingleScratchReg && "Only a single scratch reg available"); 894 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 895 .addImm(NegFrameSize >> 16); 896 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 897 .addReg(TempReg, RegState::Kill) 898 .addImm(NegFrameSize & 0xFFFF); 899 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 900 .addReg(ScratchReg, RegState::Kill) 901 .addReg(TempReg, RegState::Kill); 902 } 903 904 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 905 .addReg(SPReg, RegState::Kill) 906 .addReg(SPReg) 907 .addReg(ScratchReg); 908 HasSTUX = true; 909 910 } else if (!isLargeFrame) { 911 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 912 .addReg(SPReg) 913 .addImm(NegFrameSize) 914 .addReg(SPReg); 915 916 } else { 917 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 918 .addImm(NegFrameSize >> 16); 919 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 920 .addReg(ScratchReg, RegState::Kill) 921 .addImm(NegFrameSize & 0xFFFF); 922 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 923 .addReg(SPReg, RegState::Kill) 924 .addReg(SPReg) 925 .addReg(ScratchReg); 926 HasSTUX = true; 927 } 928 } 929 930 // Save the TOC register after the stack pointer update if a prologue TOC 931 // save is required for the function. 932 if (MustSaveTOC) { 933 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 934 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 935 .addReg(TOCReg, getKillRegState(true)) 936 .addImm(TOCSaveOffset) 937 .addReg(SPReg); 938 } 939 940 if (!HasRedZone) { 941 assert(!isPPC64 && "A red zone is always available on PPC64"); 942 if (HasSTUX) { 943 // The negated frame size is in ScratchReg, and the SPReg has been 944 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 945 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 946 // the stack frame (i.e. the old SP), ideally, we would put the old 947 // SP into a register and use it as the base for the stores. The 948 // problem is that the only available register may be ScratchReg, 949 // which could be R0, and R0 cannot be used as a base address. 950 951 // First, set ScratchReg to the old SP. This may need to be modified 952 // later. 953 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 954 .addReg(ScratchReg, RegState::Kill) 955 .addReg(SPReg); 956 957 if (ScratchReg == PPC::R0) { 958 // R0 cannot be used as a base register, but it can be used as an 959 // index in a store-indexed. 960 int LastOffset = 0; 961 if (HasFP) { 962 // R0 += (FPOffset-LastOffset). 963 // Need addic, since addi treats R0 as 0. 964 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 965 .addReg(ScratchReg) 966 .addImm(FPOffset-LastOffset); 967 LastOffset = FPOffset; 968 // Store FP into *R0. 969 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 970 .addReg(FPReg, RegState::Kill) // Save FP. 971 .addReg(PPC::ZERO) 972 .addReg(ScratchReg); // This will be the index (R0 is ok here). 973 } 974 if (FI->usesPICBase()) { 975 // R0 += (PBPOffset-LastOffset). 976 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 977 .addReg(ScratchReg) 978 .addImm(PBPOffset-LastOffset); 979 LastOffset = PBPOffset; 980 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 981 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 982 .addReg(PPC::ZERO) 983 .addReg(ScratchReg); // This will be the index (R0 is ok here). 984 } 985 if (HasBP) { 986 // R0 += (BPOffset-LastOffset). 987 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 988 .addReg(ScratchReg) 989 .addImm(BPOffset-LastOffset); 990 LastOffset = BPOffset; 991 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 992 .addReg(BPReg, RegState::Kill) // Save BP. 993 .addReg(PPC::ZERO) 994 .addReg(ScratchReg); // This will be the index (R0 is ok here). 995 // BP = R0-LastOffset 996 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 997 .addReg(ScratchReg, RegState::Kill) 998 .addImm(-LastOffset); 999 } 1000 } else { 1001 // ScratchReg is not R0, so use it as the base register. It is 1002 // already set to the old SP, so we can use the offsets directly. 1003 1004 // Now that the stack frame has been allocated, save all the necessary 1005 // registers using ScratchReg as the base address. 1006 if (HasFP) 1007 BuildMI(MBB, MBBI, dl, StoreInst) 1008 .addReg(FPReg) 1009 .addImm(FPOffset) 1010 .addReg(ScratchReg); 1011 if (FI->usesPICBase()) 1012 BuildMI(MBB, MBBI, dl, StoreInst) 1013 .addReg(PPC::R30) 1014 .addImm(PBPOffset) 1015 .addReg(ScratchReg); 1016 if (HasBP) { 1017 BuildMI(MBB, MBBI, dl, StoreInst) 1018 .addReg(BPReg) 1019 .addImm(BPOffset) 1020 .addReg(ScratchReg); 1021 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1022 .addReg(ScratchReg, RegState::Kill) 1023 .addReg(ScratchReg); 1024 } 1025 } 1026 } else { 1027 // The frame size is a known 16-bit constant (fitting in the immediate 1028 // field of STWU). To be here we have to be compiling for PPC32. 1029 // Since the SPReg has been decreased by FrameSize, add it back to each 1030 // offset. 1031 if (HasFP) 1032 BuildMI(MBB, MBBI, dl, StoreInst) 1033 .addReg(FPReg) 1034 .addImm(FrameSize + FPOffset) 1035 .addReg(SPReg); 1036 if (FI->usesPICBase()) 1037 BuildMI(MBB, MBBI, dl, StoreInst) 1038 .addReg(PPC::R30) 1039 .addImm(FrameSize + PBPOffset) 1040 .addReg(SPReg); 1041 if (HasBP) { 1042 BuildMI(MBB, MBBI, dl, StoreInst) 1043 .addReg(BPReg) 1044 .addImm(FrameSize + BPOffset) 1045 .addReg(SPReg); 1046 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1047 .addReg(SPReg) 1048 .addImm(FrameSize); 1049 } 1050 } 1051 } 1052 1053 // Add Call Frame Information for the instructions we generated above. 1054 if (needsCFI) { 1055 unsigned CFIIndex; 1056 1057 if (HasBP) { 1058 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1059 // because if the stack needed aligning then CFA won't be at a fixed 1060 // offset from FP/SP. 1061 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1062 CFIIndex = MF.addFrameInst( 1063 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1064 } else { 1065 // Adjust the definition of CFA to account for the change in SP. 1066 assert(NegFrameSize); 1067 CFIIndex = MF.addFrameInst( 1068 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1069 } 1070 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1071 .addCFIIndex(CFIIndex); 1072 1073 if (HasFP) { 1074 // Describe where FP was saved, at a fixed offset from CFA. 1075 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1076 CFIIndex = MF.addFrameInst( 1077 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1078 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1079 .addCFIIndex(CFIIndex); 1080 } 1081 1082 if (FI->usesPICBase()) { 1083 // Describe where FP was saved, at a fixed offset from CFA. 1084 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1085 CFIIndex = MF.addFrameInst( 1086 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1087 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1088 .addCFIIndex(CFIIndex); 1089 } 1090 1091 if (HasBP) { 1092 // Describe where BP was saved, at a fixed offset from CFA. 1093 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1094 CFIIndex = MF.addFrameInst( 1095 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1096 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1097 .addCFIIndex(CFIIndex); 1098 } 1099 1100 if (MustSaveLR) { 1101 // Describe where LR was saved, at a fixed offset from CFA. 1102 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1103 CFIIndex = MF.addFrameInst( 1104 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1105 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1106 .addCFIIndex(CFIIndex); 1107 } 1108 } 1109 1110 // If there is a frame pointer, copy R1 into R31 1111 if (HasFP) { 1112 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1113 .addReg(SPReg) 1114 .addReg(SPReg); 1115 1116 if (!HasBP && needsCFI) { 1117 // Change the definition of CFA from SP+offset to FP+offset, because SP 1118 // will change at every alloca. 1119 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1120 unsigned CFIIndex = MF.addFrameInst( 1121 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1122 1123 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1124 .addCFIIndex(CFIIndex); 1125 } 1126 } 1127 1128 if (needsCFI) { 1129 // Describe where callee saved registers were saved, at fixed offsets from 1130 // CFA. 1131 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1132 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1133 unsigned Reg = CSI[I].getReg(); 1134 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1135 1136 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1137 // subregisters of CR2. We just need to emit a move of CR2. 1138 if (PPC::CRBITRCRegClass.contains(Reg)) 1139 continue; 1140 1141 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1142 continue; 1143 1144 // For SVR4, don't emit a move for the CR spill slot if we haven't 1145 // spilled CRs. 1146 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1147 && !MustSaveCR) 1148 continue; 1149 1150 // For 64-bit SVR4 when we have spilled CRs, the spill location 1151 // is SP+8, not a frame-relative slot. 1152 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1153 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1154 // the whole CR word. In the ELFv2 ABI, every CR that was 1155 // actually saved gets its own CFI record. 1156 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1157 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1158 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1159 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1160 .addCFIIndex(CFIIndex); 1161 continue; 1162 } 1163 1164 if (CSI[I].isSpilledToReg()) { 1165 unsigned SpilledReg = CSI[I].getDstReg(); 1166 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1167 nullptr, MRI->getDwarfRegNum(Reg, true), 1168 MRI->getDwarfRegNum(SpilledReg, true))); 1169 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1170 .addCFIIndex(CFIRegister); 1171 } else { 1172 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1173 // We have changed the object offset above but we do not want to change 1174 // the actual offsets in the CFI instruction so we have to undo the 1175 // offset change here. 1176 if (MovingStackUpdateDown) 1177 Offset -= NegFrameSize; 1178 1179 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1180 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1181 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1182 .addCFIIndex(CFIIndex); 1183 } 1184 } 1185 } 1186 } 1187 1188 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1189 MachineBasicBlock &PrologMBB) const { 1190 // TODO: Generate CFI instructions. 1191 bool isPPC64 = Subtarget.isPPC64(); 1192 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1193 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1194 MachineFrameInfo &MFI = MF.getFrameInfo(); 1195 MachineModuleInfo &MMI = MF.getMMI(); 1196 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 1197 // AIX assembler does not support cfi directives. 1198 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1199 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1200 int Opc = MI.getOpcode(); 1201 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1202 }); 1203 if (StackAllocMIPos == PrologMBB.end()) 1204 return; 1205 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1206 MachineBasicBlock *CurrentMBB = &PrologMBB; 1207 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1208 MachineInstr &MI = *StackAllocMIPos; 1209 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1210 unsigned ProbeSize = TLI.getStackProbeSize(MF); 1211 int64_t NegProbeSize = -(int64_t)ProbeSize; 1212 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1213 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1214 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1215 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1216 Register ScratchReg = MI.getOperand(0).getReg(); 1217 Register FPReg = MI.getOperand(1).getReg(); 1218 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1219 bool HasBP = RegInfo->hasBasePointer(MF); 1220 Register BPReg = RegInfo->getBaseRegister(MF); 1221 Align MaxAlign = MFI.getMaxAlign(); 1222 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1223 // Subroutines to generate .cfi_* directives. 1224 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1225 MachineBasicBlock::iterator MBBI, Register Reg) { 1226 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1227 unsigned CFIIndex = MF.addFrameInst( 1228 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1229 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1230 .addCFIIndex(CFIIndex); 1231 }; 1232 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1233 MachineBasicBlock::iterator MBBI, Register Reg, 1234 int Offset) { 1235 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1236 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1237 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1238 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1239 .addCFIIndex(CFIIndex); 1240 }; 1241 // Subroutine to determine if we can use the Imm as part of d-form. 1242 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1243 // Subroutine to materialize the Imm into TempReg. 1244 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1245 MachineBasicBlock::iterator MBBI, int64_t Imm, 1246 Register &TempReg) { 1247 assert(isInt<32>(Imm) && "Unhandled imm"); 1248 if (isInt<16>(Imm)) 1249 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1250 .addImm(Imm); 1251 else { 1252 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1253 .addImm(Imm >> 16); 1254 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1255 .addReg(TempReg) 1256 .addImm(Imm & 0xFFFF); 1257 } 1258 }; 1259 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1260 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1261 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1262 Register NegSizeReg, bool UseDForm, 1263 Register StoreReg) { 1264 if (UseDForm) 1265 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1266 .addReg(StoreReg) 1267 .addImm(NegSize) 1268 .addReg(SPReg); 1269 else 1270 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1271 .addReg(StoreReg) 1272 .addReg(SPReg) 1273 .addReg(NegSizeReg); 1274 }; 1275 // Used to probe realignment gap [stackptr - (stackptr % align), stackptr) 1276 // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30 1277 // available and r1 is already copied to r30 which is BPReg. So BPReg stores 1278 // the value of stackptr. 1279 // First we have to probe tail interval whose size is less than probesize, 1280 // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage, 1281 // ScratchReg stores the value of ((stackptr % align) % probesize). Then we 1282 // probe each block sized probesize until stackptr meets 1283 // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized 1284 // as negprobesize. At both stages, TempReg stores the value of 1285 // (stackptr - (stackptr % align)). 1286 auto dynamicProbe = [&](MachineBasicBlock &MBB, 1287 MachineBasicBlock::iterator MBBI, Register ScratchReg, 1288 Register TempReg) { 1289 assert(HasBP && isPPC64 && "Probe alignment part not available"); 1290 assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); 1291 // ScratchReg = stackptr % align 1292 BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) 1293 .addReg(BPReg) 1294 .addImm(0) 1295 .addImm(64 - Log2(MaxAlign)); 1296 // TempReg = stackptr - (stackptr % align) 1297 BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg) 1298 .addReg(ScratchReg) 1299 .addReg(BPReg); 1300 // ScratchReg = (stackptr % align) % probesize 1301 BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) 1302 .addReg(ScratchReg) 1303 .addImm(0) 1304 .addImm(64 - Log2(ProbeSize)); 1305 Register CRReg = PPC::CR0; 1306 // If (stackptr % align) % probesize == 0, we should not generate probe 1307 // code. Layout of output assembly kinda like: 1308 // bb.0: 1309 // ... 1310 // cmpldi $scratchreg, 0 1311 // beq bb.2 1312 // bb.1: # Probe tail interval 1313 // neg $scratchreg, $scratchreg 1314 // stdux $bpreg, r1, $scratchreg 1315 // bb.2: 1316 // <materialize negprobesize into $scratchreg> 1317 // cmpd r1, $tempreg 1318 // beq bb.4 1319 // bb.3: # Loop to probe each block 1320 // stdux $bpreg, r1, $scratchreg 1321 // cmpd r1, $tempreg 1322 // bne bb.3 1323 // bb.4: 1324 // ... 1325 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); 1326 MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB); 1327 MF.insert(MBBInsertPoint, ProbeResidualMBB); 1328 MachineBasicBlock *ProbeLoopPreHeaderMBB = 1329 MF.CreateMachineBasicBlock(ProbedBB); 1330 MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB); 1331 MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); 1332 MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); 1333 MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1334 MF.insert(MBBInsertPoint, ProbeExitMBB); 1335 // bb.4 1336 ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); 1337 ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 1338 // bb.0 1339 BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0); 1340 BuildMI(&MBB, DL, TII.get(PPC::BCC)) 1341 .addImm(PPC::PRED_EQ) 1342 .addReg(CRReg) 1343 .addMBB(ProbeLoopPreHeaderMBB); 1344 MBB.addSuccessor(ProbeResidualMBB); 1345 MBB.addSuccessor(ProbeLoopPreHeaderMBB); 1346 // bb.1 1347 BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg) 1348 .addReg(ScratchReg); 1349 allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg, 1350 false, BPReg); 1351 ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB); 1352 // bb.2 1353 MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(), 1354 NegProbeSize, ScratchReg); 1355 BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg) 1356 .addReg(SPReg) 1357 .addReg(TempReg); 1358 BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC)) 1359 .addImm(PPC::PRED_EQ) 1360 .addReg(CRReg) 1361 .addMBB(ProbeExitMBB); 1362 ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB); 1363 ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB); 1364 // bb.3 1365 allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg, 1366 false, BPReg); 1367 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg) 1368 .addReg(SPReg) 1369 .addReg(TempReg); 1370 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) 1371 .addImm(PPC::PRED_NE) 1372 .addReg(CRReg) 1373 .addMBB(ProbeLoopBodyMBB); 1374 ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); 1375 ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); 1376 // Update liveins. 1377 recomputeLiveIns(*ProbeResidualMBB); 1378 recomputeLiveIns(*ProbeLoopPreHeaderMBB); 1379 recomputeLiveIns(*ProbeLoopBodyMBB); 1380 recomputeLiveIns(*ProbeExitMBB); 1381 return ProbeExitMBB; 1382 }; 1383 // For case HasBP && MaxAlign > 1, we have to realign the SP by performing 1384 // SP = SP - SP % MaxAlign. 1385 if (HasBP && MaxAlign > 1) { 1386 // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in 1387 // 64-bit mode. 1388 if (isPPC64) { 1389 // Use BPReg to calculate CFA. 1390 if (needsCFI) 1391 buildDefCFA(*CurrentMBB, {MI}, BPReg, 0); 1392 // Since we have SPReg copied to BPReg at the moment, FPReg can be used as 1393 // TempReg. 1394 Register TempReg = FPReg; 1395 CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg); 1396 // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64. 1397 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) 1398 .addReg(BPReg) 1399 .addReg(BPReg); 1400 } else { 1401 // Initialize current frame pointer. 1402 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) 1403 .addReg(SPReg) 1404 .addReg(SPReg); 1405 // Use FPReg to calculate CFA. 1406 if (needsCFI) 1407 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); 1408 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1409 .addReg(FPReg) 1410 .addImm(0) 1411 .addImm(32 - Log2(MaxAlign)) 1412 .addImm(31); 1413 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg) 1414 .addReg(ScratchReg) 1415 .addReg(SPReg); 1416 } 1417 } else { 1418 // Initialize current frame pointer. 1419 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1420 // Use FPReg to calculate CFA. 1421 if (needsCFI) 1422 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); 1423 } 1424 // Probe residual part. 1425 if (NegResidualSize) { 1426 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1427 if (!ResidualUseDForm) 1428 MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); 1429 allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, 1430 ResidualUseDForm, FPReg); 1431 } 1432 bool UseDForm = CanUseDForm(NegProbeSize); 1433 // If number of blocks is small, just probe them directly. 1434 if (NumBlocks < 3) { 1435 if (!UseDForm) 1436 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1437 for (int i = 0; i < NumBlocks; ++i) 1438 allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, 1439 FPReg); 1440 if (needsCFI) { 1441 // Restore using SPReg to calculate CFA. 1442 buildDefCFAReg(*CurrentMBB, {MI}, SPReg); 1443 } 1444 } else { 1445 // Since CTR is a volatile register and current shrinkwrap implementation 1446 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1447 // CTR loop to probe. 1448 // Calculate trip count and stores it in CTRReg. 1449 MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); 1450 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1451 .addReg(ScratchReg, RegState::Kill); 1452 if (!UseDForm) 1453 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1454 // Create MBBs of the loop. 1455 MachineFunction::iterator MBBInsertPoint = 1456 std::next(CurrentMBB->getIterator()); 1457 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1458 MF.insert(MBBInsertPoint, LoopMBB); 1459 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1460 MF.insert(MBBInsertPoint, ExitMBB); 1461 // Synthesize the loop body. 1462 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1463 UseDForm, FPReg); 1464 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1465 .addMBB(LoopMBB); 1466 LoopMBB->addSuccessor(ExitMBB); 1467 LoopMBB->addSuccessor(LoopMBB); 1468 // Synthesize the exit MBB. 1469 ExitMBB->splice(ExitMBB->end(), CurrentMBB, 1470 std::next(MachineBasicBlock::iterator(MI)), 1471 CurrentMBB->end()); 1472 ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); 1473 CurrentMBB->addSuccessor(LoopMBB); 1474 if (needsCFI) { 1475 // Restore using SPReg to calculate CFA. 1476 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1477 } 1478 // Update liveins. 1479 recomputeLiveIns(*LoopMBB); 1480 recomputeLiveIns(*ExitMBB); 1481 } 1482 ++NumPrologProbed; 1483 MI.eraseFromParent(); 1484 } 1485 1486 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1487 MachineBasicBlock &MBB) const { 1488 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1489 DebugLoc dl; 1490 1491 if (MBBI != MBB.end()) 1492 dl = MBBI->getDebugLoc(); 1493 1494 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1495 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1496 1497 // Get alignment info so we know how to restore the SP. 1498 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1499 1500 // Get the number of bytes allocated from the FrameInfo. 1501 int FrameSize = MFI.getStackSize(); 1502 1503 // Get processor type. 1504 bool isPPC64 = Subtarget.isPPC64(); 1505 1506 // Check if the link register (LR) has been saved. 1507 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1508 bool MustSaveLR = FI->mustSaveLR(); 1509 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1510 bool MustSaveCR = !MustSaveCRs.empty(); 1511 // Do we have a frame pointer and/or base pointer for this function? 1512 bool HasFP = hasFP(MF); 1513 bool HasBP = RegInfo->hasBasePointer(MF); 1514 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1515 1516 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1517 Register BPReg = RegInfo->getBaseRegister(MF); 1518 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1519 Register ScratchReg; 1520 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1521 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1522 : PPC::MTLR ); 1523 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1524 : PPC::LWZ ); 1525 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1526 : PPC::LIS ); 1527 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1528 : PPC::OR ); 1529 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1530 : PPC::ORI ); 1531 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1532 : PPC::ADDI ); 1533 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1534 : PPC::ADD4 ); 1535 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1536 : PPC::LWZ); 1537 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1538 : PPC::MTOCRF); 1539 int LROffset = getReturnSaveOffset(); 1540 1541 int FPOffset = 0; 1542 1543 // Using the same bool variable as below to suppress compiler warnings. 1544 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1545 &TempReg); 1546 assert(SingleScratchReg && 1547 "Could not find an available scratch register"); 1548 1549 SingleScratchReg = ScratchReg == TempReg; 1550 1551 if (HasFP) { 1552 int FPIndex = FI->getFramePointerSaveIndex(); 1553 assert(FPIndex && "No Frame Pointer Save Slot!"); 1554 FPOffset = MFI.getObjectOffset(FPIndex); 1555 } 1556 1557 int BPOffset = 0; 1558 if (HasBP) { 1559 int BPIndex = FI->getBasePointerSaveIndex(); 1560 assert(BPIndex && "No Base Pointer Save Slot!"); 1561 BPOffset = MFI.getObjectOffset(BPIndex); 1562 } 1563 1564 int PBPOffset = 0; 1565 if (FI->usesPICBase()) { 1566 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1567 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1568 PBPOffset = MFI.getObjectOffset(PBPIndex); 1569 } 1570 1571 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1572 1573 if (IsReturnBlock) { 1574 unsigned RetOpcode = MBBI->getOpcode(); 1575 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1576 RetOpcode == PPC::TCRETURNdi || 1577 RetOpcode == PPC::TCRETURNai || 1578 RetOpcode == PPC::TCRETURNri8 || 1579 RetOpcode == PPC::TCRETURNdi8 || 1580 RetOpcode == PPC::TCRETURNai8; 1581 1582 if (UsesTCRet) { 1583 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1584 MachineOperand &StackAdjust = MBBI->getOperand(1); 1585 assert(StackAdjust.isImm() && "Expecting immediate value."); 1586 // Adjust stack pointer. 1587 int StackAdj = StackAdjust.getImm(); 1588 int Delta = StackAdj - MaxTCRetDelta; 1589 assert((Delta >= 0) && "Delta must be positive"); 1590 if (MaxTCRetDelta>0) 1591 FrameSize += (StackAdj +Delta); 1592 else 1593 FrameSize += StackAdj; 1594 } 1595 } 1596 1597 // Frames of 32KB & larger require special handling because they cannot be 1598 // indexed into with a simple LD/LWZ immediate offset operand. 1599 bool isLargeFrame = !isInt<16>(FrameSize); 1600 1601 // On targets without red zone, the SP needs to be restored last, so that 1602 // all live contents of the stack frame are upwards of the SP. This means 1603 // that we cannot restore SP just now, since there may be more registers 1604 // to restore from the stack frame (e.g. R31). If the frame size is not 1605 // a simple immediate value, we will need a spare register to hold the 1606 // restored SP. If the frame size is known and small, we can simply adjust 1607 // the offsets of the registers to be restored, and still use SP to restore 1608 // them. In such case, the final update of SP will be to add the frame 1609 // size to it. 1610 // To simplify the code, set RBReg to the base register used to restore 1611 // values from the stack, and set SPAdd to the value that needs to be added 1612 // to the SP at the end. The default values are as if red zone was present. 1613 unsigned RBReg = SPReg; 1614 unsigned SPAdd = 0; 1615 1616 // Check if we can move the stack update instruction up the epilogue 1617 // past the callee saves. This will allow the move to LR instruction 1618 // to be executed before the restores of the callee saves which means 1619 // that the callee saves can hide the latency from the MTLR instrcution. 1620 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1621 if (stackUpdateCanBeMoved(MF)) { 1622 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1623 for (CalleeSavedInfo CSI : Info) { 1624 int FrIdx = CSI.getFrameIdx(); 1625 // If the frame index is not negative the callee saved info belongs to a 1626 // stack object that is not a fixed stack object. We ignore non-fixed 1627 // stack objects because we won't move the update of the stack pointer 1628 // past them. 1629 if (FrIdx >= 0) 1630 continue; 1631 1632 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1633 StackUpdateLoc--; 1634 else { 1635 // Abort the operation as we can't update all CSR restores. 1636 StackUpdateLoc = MBBI; 1637 break; 1638 } 1639 } 1640 } 1641 1642 if (FrameSize) { 1643 // In the prologue, the loaded (or persistent) stack pointer value is 1644 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1645 // zone add this offset back now. 1646 1647 // If the function has a base pointer, the stack pointer has been copied 1648 // to it so we can restore it by copying in the other direction. 1649 if (HasRedZone && HasBP) { 1650 BuildMI(MBB, MBBI, dl, OrInst, RBReg). 1651 addReg(BPReg). 1652 addReg(BPReg); 1653 } 1654 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1655 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1656 // call which invalidates the stack pointer value in SP(0). So we use the 1657 // value of R31 in this case. Similar situation exists with setjmp. 1658 else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { 1659 assert(HasFP && "Expecting a valid frame pointer."); 1660 if (!HasRedZone) 1661 RBReg = FPReg; 1662 if (!isLargeFrame) { 1663 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1664 .addReg(FPReg).addImm(FrameSize); 1665 } else { 1666 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1667 .addImm(FrameSize >> 16); 1668 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1669 .addReg(ScratchReg, RegState::Kill) 1670 .addImm(FrameSize & 0xFFFF); 1671 BuildMI(MBB, MBBI, dl, AddInst) 1672 .addReg(RBReg) 1673 .addReg(FPReg) 1674 .addReg(ScratchReg); 1675 } 1676 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1677 if (HasRedZone) { 1678 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1679 .addReg(SPReg) 1680 .addImm(FrameSize); 1681 } else { 1682 // Make sure that adding FrameSize will not overflow the max offset 1683 // size. 1684 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1685 "Local offsets should be negative"); 1686 SPAdd = FrameSize; 1687 FPOffset += FrameSize; 1688 BPOffset += FrameSize; 1689 PBPOffset += FrameSize; 1690 } 1691 } else { 1692 // We don't want to use ScratchReg as a base register, because it 1693 // could happen to be R0. Use FP instead, but make sure to preserve it. 1694 if (!HasRedZone) { 1695 // If FP is not saved, copy it to ScratchReg. 1696 if (!HasFP) 1697 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1698 .addReg(FPReg) 1699 .addReg(FPReg); 1700 RBReg = FPReg; 1701 } 1702 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1703 .addImm(0) 1704 .addReg(SPReg); 1705 } 1706 } 1707 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1708 // If there is no red zone, ScratchReg may be needed for holding a useful 1709 // value (although not the base register). Make sure it is not overwritten 1710 // too early. 1711 1712 // If we need to restore both the LR and the CR and we only have one 1713 // available scratch register, we must do them one at a time. 1714 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1715 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1716 // is live here. 1717 assert(HasRedZone && "Expecting red zone"); 1718 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1719 .addImm(CRSaveOffset) 1720 .addReg(SPReg); 1721 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1722 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1723 .addReg(TempReg, getKillRegState(i == e-1)); 1724 } 1725 1726 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1727 // LR is stored in the caller's stack frame. ScratchReg will be needed 1728 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1729 // a base register anyway, because it may happen to be R0. 1730 bool LoadedLR = false; 1731 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1732 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1733 .addImm(LROffset+SPAdd) 1734 .addReg(RBReg); 1735 LoadedLR = true; 1736 } 1737 1738 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1739 assert(RBReg == SPReg && "Should be using SP as a base register"); 1740 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1741 .addImm(CRSaveOffset) 1742 .addReg(RBReg); 1743 } 1744 1745 if (HasFP) { 1746 // If there is red zone, restore FP directly, since SP has already been 1747 // restored. Otherwise, restore the value of FP into ScratchReg. 1748 if (HasRedZone || RBReg == SPReg) 1749 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1750 .addImm(FPOffset) 1751 .addReg(SPReg); 1752 else 1753 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1754 .addImm(FPOffset) 1755 .addReg(RBReg); 1756 } 1757 1758 if (FI->usesPICBase()) 1759 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1760 .addImm(PBPOffset) 1761 .addReg(RBReg); 1762 1763 if (HasBP) 1764 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1765 .addImm(BPOffset) 1766 .addReg(RBReg); 1767 1768 // There is nothing more to be loaded from the stack, so now we can 1769 // restore SP: SP = RBReg + SPAdd. 1770 if (RBReg != SPReg || SPAdd != 0) { 1771 assert(!HasRedZone && "This should not happen with red zone"); 1772 // If SPAdd is 0, generate a copy. 1773 if (SPAdd == 0) 1774 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1775 .addReg(RBReg) 1776 .addReg(RBReg); 1777 else 1778 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1779 .addReg(RBReg) 1780 .addImm(SPAdd); 1781 1782 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1783 if (RBReg == FPReg) 1784 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1785 .addReg(ScratchReg) 1786 .addReg(ScratchReg); 1787 1788 // Now load the LR from the caller's stack frame. 1789 if (MustSaveLR && !LoadedLR) 1790 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1791 .addImm(LROffset) 1792 .addReg(SPReg); 1793 } 1794 1795 if (MustSaveCR && 1796 !(SingleScratchReg && MustSaveLR)) 1797 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1798 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1799 .addReg(TempReg, getKillRegState(i == e-1)); 1800 1801 if (MustSaveLR) 1802 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1803 1804 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1805 // call optimization 1806 if (IsReturnBlock) { 1807 unsigned RetOpcode = MBBI->getOpcode(); 1808 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1809 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1810 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1811 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1812 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1813 1814 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1815 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1816 .addReg(SPReg).addImm(CallerAllocatedAmt); 1817 } else { 1818 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1819 .addImm(CallerAllocatedAmt >> 16); 1820 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1821 .addReg(ScratchReg, RegState::Kill) 1822 .addImm(CallerAllocatedAmt & 0xFFFF); 1823 BuildMI(MBB, MBBI, dl, AddInst) 1824 .addReg(SPReg) 1825 .addReg(FPReg) 1826 .addReg(ScratchReg); 1827 } 1828 } else { 1829 createTailCallBranchInstr(MBB); 1830 } 1831 } 1832 } 1833 1834 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1835 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1836 1837 // If we got this far a first terminator should exist. 1838 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1839 1840 DebugLoc dl = MBBI->getDebugLoc(); 1841 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1842 1843 // Create branch instruction for pseudo tail call return instruction. 1844 // The TCRETURNdi variants are direct calls. Valid targets for those are 1845 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1846 // since we can tail call external functions with PC-Rel (i.e. we don't need 1847 // to worry about different TOC pointers). Some of the external functions will 1848 // be MO_GlobalAddress while others like memcpy for example, are going to 1849 // be MO_ExternalSymbol. 1850 unsigned RetOpcode = MBBI->getOpcode(); 1851 if (RetOpcode == PPC::TCRETURNdi) { 1852 MBBI = MBB.getLastNonDebugInstr(); 1853 MachineOperand &JumpTarget = MBBI->getOperand(0); 1854 if (JumpTarget.isGlobal()) 1855 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1856 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1857 else if (JumpTarget.isSymbol()) 1858 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1859 addExternalSymbol(JumpTarget.getSymbolName()); 1860 else 1861 llvm_unreachable("Expecting Global or External Symbol"); 1862 } else if (RetOpcode == PPC::TCRETURNri) { 1863 MBBI = MBB.getLastNonDebugInstr(); 1864 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1865 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1866 } else if (RetOpcode == PPC::TCRETURNai) { 1867 MBBI = MBB.getLastNonDebugInstr(); 1868 MachineOperand &JumpTarget = MBBI->getOperand(0); 1869 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1870 } else if (RetOpcode == PPC::TCRETURNdi8) { 1871 MBBI = MBB.getLastNonDebugInstr(); 1872 MachineOperand &JumpTarget = MBBI->getOperand(0); 1873 if (JumpTarget.isGlobal()) 1874 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1875 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1876 else if (JumpTarget.isSymbol()) 1877 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1878 addExternalSymbol(JumpTarget.getSymbolName()); 1879 else 1880 llvm_unreachable("Expecting Global or External Symbol"); 1881 } else if (RetOpcode == PPC::TCRETURNri8) { 1882 MBBI = MBB.getLastNonDebugInstr(); 1883 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1884 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1885 } else if (RetOpcode == PPC::TCRETURNai8) { 1886 MBBI = MBB.getLastNonDebugInstr(); 1887 MachineOperand &JumpTarget = MBBI->getOperand(0); 1888 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1889 } 1890 } 1891 1892 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1893 BitVector &SavedRegs, 1894 RegScavenger *RS) const { 1895 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1896 1897 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1898 1899 // Save and clear the LR state. 1900 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1901 unsigned LR = RegInfo->getRARegister(); 1902 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1903 SavedRegs.reset(LR); 1904 1905 // Save R31 if necessary 1906 int FPSI = FI->getFramePointerSaveIndex(); 1907 const bool isPPC64 = Subtarget.isPPC64(); 1908 MachineFrameInfo &MFI = MF.getFrameInfo(); 1909 1910 // If the frame pointer save index hasn't been defined yet. 1911 if (!FPSI && needsFP(MF)) { 1912 // Find out what the fix offset of the frame pointer save area. 1913 int FPOffset = getFramePointerSaveOffset(); 1914 // Allocate the frame index for frame pointer save area. 1915 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1916 // Save the result. 1917 FI->setFramePointerSaveIndex(FPSI); 1918 } 1919 1920 int BPSI = FI->getBasePointerSaveIndex(); 1921 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1922 int BPOffset = getBasePointerSaveOffset(); 1923 // Allocate the frame index for the base pointer save area. 1924 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1925 // Save the result. 1926 FI->setBasePointerSaveIndex(BPSI); 1927 } 1928 1929 // Reserve stack space for the PIC Base register (R30). 1930 // Only used in SVR4 32-bit. 1931 if (FI->usesPICBase()) { 1932 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1933 FI->setPICBasePointerSaveIndex(PBPSI); 1934 } 1935 1936 // Make sure we don't explicitly spill r31, because, for example, we have 1937 // some inline asm which explicitly clobbers it, when we otherwise have a 1938 // frame pointer and are using r31's spill slot for the prologue/epilogue 1939 // code. Same goes for the base pointer and the PIC base register. 1940 if (needsFP(MF)) 1941 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1942 if (RegInfo->hasBasePointer(MF)) 1943 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1944 if (FI->usesPICBase()) 1945 SavedRegs.reset(PPC::R30); 1946 1947 // Reserve stack space to move the linkage area to in case of a tail call. 1948 int TCSPDelta = 0; 1949 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1950 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1951 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1952 } 1953 1954 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 1955 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 1956 // object at the offset of the CR-save slot in the linkage area. The actual 1957 // save and restore of the condition register will be created as part of the 1958 // prologue and epilogue insertion, but the FixedStack object is needed to 1959 // keep the CalleSavedInfo valid. 1960 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1961 SavedRegs.test(PPC::CR4))) { 1962 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1963 const int64_t SpillOffset = 1964 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 1965 int FrameIdx = 1966 MFI.CreateFixedObject(SpillSize, SpillOffset, 1967 /* IsImmutable */ true, /* IsAliased */ false); 1968 FI->setCRSpillFrameIndex(FrameIdx); 1969 } 1970 } 1971 1972 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1973 RegScavenger *RS) const { 1974 // Get callee saved register information. 1975 MachineFrameInfo &MFI = MF.getFrameInfo(); 1976 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1977 1978 // If the function is shrink-wrapped, and if the function has a tail call, the 1979 // tail call might not be in the new RestoreBlock, so real branch instruction 1980 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1981 // RestoreBlock. So we handle this case here. 1982 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1983 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1984 for (MachineBasicBlock &MBB : MF) { 1985 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1986 createTailCallBranchInstr(MBB); 1987 } 1988 } 1989 1990 // Early exit if no callee saved registers are modified! 1991 if (CSI.empty() && !needsFP(MF)) { 1992 addScavengingSpillSlot(MF, RS); 1993 return; 1994 } 1995 1996 unsigned MinGPR = PPC::R31; 1997 unsigned MinG8R = PPC::X31; 1998 unsigned MinFPR = PPC::F31; 1999 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 2000 2001 bool HasGPSaveArea = false; 2002 bool HasG8SaveArea = false; 2003 bool HasFPSaveArea = false; 2004 bool HasVRSaveArea = false; 2005 2006 SmallVector<CalleeSavedInfo, 18> GPRegs; 2007 SmallVector<CalleeSavedInfo, 18> G8Regs; 2008 SmallVector<CalleeSavedInfo, 18> FPRegs; 2009 SmallVector<CalleeSavedInfo, 18> VRegs; 2010 2011 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2012 unsigned Reg = CSI[i].getReg(); 2013 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 2014 (Reg != PPC::X2 && Reg != PPC::R2)) && 2015 "Not expecting to try to spill R2 in a function that must save TOC"); 2016 if (PPC::GPRCRegClass.contains(Reg)) { 2017 HasGPSaveArea = true; 2018 2019 GPRegs.push_back(CSI[i]); 2020 2021 if (Reg < MinGPR) { 2022 MinGPR = Reg; 2023 } 2024 } else if (PPC::G8RCRegClass.contains(Reg)) { 2025 HasG8SaveArea = true; 2026 2027 G8Regs.push_back(CSI[i]); 2028 2029 if (Reg < MinG8R) { 2030 MinG8R = Reg; 2031 } 2032 } else if (PPC::F8RCRegClass.contains(Reg)) { 2033 HasFPSaveArea = true; 2034 2035 FPRegs.push_back(CSI[i]); 2036 2037 if (Reg < MinFPR) { 2038 MinFPR = Reg; 2039 } 2040 } else if (PPC::CRBITRCRegClass.contains(Reg) || 2041 PPC::CRRCRegClass.contains(Reg)) { 2042 ; // do nothing, as we already know whether CRs are spilled 2043 } else if (PPC::VRRCRegClass.contains(Reg) || 2044 PPC::SPERCRegClass.contains(Reg)) { 2045 // Altivec and SPE are mutually exclusive, but have the same stack 2046 // alignment requirements, so overload the save area for both cases. 2047 HasVRSaveArea = true; 2048 2049 VRegs.push_back(CSI[i]); 2050 2051 if (Reg < MinVR) { 2052 MinVR = Reg; 2053 } 2054 } else { 2055 llvm_unreachable("Unknown RegisterClass!"); 2056 } 2057 } 2058 2059 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 2060 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2061 2062 int64_t LowerBound = 0; 2063 2064 // Take into account stack space reserved for tail calls. 2065 int TCSPDelta = 0; 2066 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2067 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 2068 LowerBound = TCSPDelta; 2069 } 2070 2071 // The Floating-point register save area is right below the back chain word 2072 // of the previous stack frame. 2073 if (HasFPSaveArea) { 2074 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 2075 int FI = FPRegs[i].getFrameIdx(); 2076 2077 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2078 } 2079 2080 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 2081 } 2082 2083 // Check whether the frame pointer register is allocated. If so, make sure it 2084 // is spilled to the correct offset. 2085 if (needsFP(MF)) { 2086 int FI = PFI->getFramePointerSaveIndex(); 2087 assert(FI && "No Frame Pointer Save Slot!"); 2088 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2089 // FP is R31/X31, so no need to update MinGPR/MinG8R. 2090 HasGPSaveArea = true; 2091 } 2092 2093 if (PFI->usesPICBase()) { 2094 int FI = PFI->getPICBasePointerSaveIndex(); 2095 assert(FI && "No PIC Base Pointer Save Slot!"); 2096 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2097 2098 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 2099 HasGPSaveArea = true; 2100 } 2101 2102 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2103 if (RegInfo->hasBasePointer(MF)) { 2104 int FI = PFI->getBasePointerSaveIndex(); 2105 assert(FI && "No Base Pointer Save Slot!"); 2106 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2107 2108 Register BP = RegInfo->getBaseRegister(MF); 2109 if (PPC::G8RCRegClass.contains(BP)) { 2110 MinG8R = std::min<unsigned>(MinG8R, BP); 2111 HasG8SaveArea = true; 2112 } else if (PPC::GPRCRegClass.contains(BP)) { 2113 MinGPR = std::min<unsigned>(MinGPR, BP); 2114 HasGPSaveArea = true; 2115 } 2116 } 2117 2118 // General register save area starts right below the Floating-point 2119 // register save area. 2120 if (HasGPSaveArea || HasG8SaveArea) { 2121 // Move general register save area spill slots down, taking into account 2122 // the size of the Floating-point register save area. 2123 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 2124 if (!GPRegs[i].isSpilledToReg()) { 2125 int FI = GPRegs[i].getFrameIdx(); 2126 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2127 } 2128 } 2129 2130 // Move general register save area spill slots down, taking into account 2131 // the size of the Floating-point register save area. 2132 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2133 if (!G8Regs[i].isSpilledToReg()) { 2134 int FI = G8Regs[i].getFrameIdx(); 2135 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2136 } 2137 } 2138 2139 unsigned MinReg = 2140 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2141 TRI->getEncodingValue(MinG8R)); 2142 2143 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2144 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2145 } 2146 2147 // For 32-bit only, the CR save area is below the general register 2148 // save area. For 64-bit SVR4, the CR save area is addressed relative 2149 // to the stack pointer and hence does not need an adjustment here. 2150 // Only CR2 (the first nonvolatile spilled) has an associated frame 2151 // index so that we have a single uniform save area. 2152 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2153 // Adjust the frame index of the CR spill slot. 2154 for (const auto &CSInfo : CSI) { 2155 if (CSInfo.getReg() == PPC::CR2) { 2156 int FI = CSInfo.getFrameIdx(); 2157 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2158 break; 2159 } 2160 } 2161 2162 LowerBound -= 4; // The CR save area is always 4 bytes long. 2163 } 2164 2165 // Both Altivec and SPE have the same alignment and padding requirements 2166 // within the stack frame. 2167 if (HasVRSaveArea) { 2168 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2169 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2170 // we are using negative number here (the stack grows downward). We should 2171 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2172 // is the alignment size ( n = 16 here) and y is the size after aligning. 2173 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2174 LowerBound &= ~(15); 2175 2176 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2177 int FI = VRegs[i].getFrameIdx(); 2178 2179 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2180 } 2181 } 2182 2183 addScavengingSpillSlot(MF, RS); 2184 } 2185 2186 void 2187 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2188 RegScavenger *RS) const { 2189 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2190 // a large stack, which will require scavenging a register to materialize a 2191 // large offset. 2192 2193 // We need to have a scavenger spill slot for spills if the frame size is 2194 // large. In case there is no free register for large-offset addressing, 2195 // this slot is used for the necessary emergency spill. Also, we need the 2196 // slot for dynamic stack allocations. 2197 2198 // The scavenger might be invoked if the frame offset does not fit into 2199 // the 16-bit immediate. We don't know the complete frame size here 2200 // because we've not yet computed callee-saved register spills or the 2201 // needed alignment padding. 2202 unsigned StackSize = determineFrameLayout(MF, true); 2203 MachineFrameInfo &MFI = MF.getFrameInfo(); 2204 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || 2205 (hasSpills(MF) && !isInt<16>(StackSize))) { 2206 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2207 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2208 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2209 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2210 unsigned Size = TRI.getSpillSize(RC); 2211 Align Alignment = TRI.getSpillAlign(RC); 2212 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2213 2214 // Might we have over-aligned allocas? 2215 bool HasAlVars = 2216 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2217 2218 // These kinds of spills might need two registers. 2219 if (spillsCR(MF) || HasAlVars) 2220 RS->addScavengingFrameIndex( 2221 MFI.CreateStackObject(Size, Alignment, false)); 2222 } 2223 } 2224 2225 // This function checks if a callee saved gpr can be spilled to a volatile 2226 // vector register. This occurs for leaf functions when the option 2227 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2228 // which were not spilled to vectors, return false so the target independent 2229 // code can handle them by assigning a FrameIdx to a stack slot. 2230 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2231 MachineFunction &MF, const TargetRegisterInfo *TRI, 2232 std::vector<CalleeSavedInfo> &CSI) const { 2233 2234 if (CSI.empty()) 2235 return true; // Early exit if no callee saved registers are modified! 2236 2237 // Early exit if cannot spill gprs to volatile vector registers. 2238 MachineFrameInfo &MFI = MF.getFrameInfo(); 2239 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2240 return false; 2241 2242 // Build a BitVector of VSRs that can be used for spilling GPRs. 2243 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2244 BitVector BVCalleeSaved(TRI->getNumRegs()); 2245 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2246 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2247 for (unsigned i = 0; CSRegs[i]; ++i) 2248 BVCalleeSaved.set(CSRegs[i]); 2249 2250 for (unsigned Reg : BVAllocatable.set_bits()) { 2251 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2252 // used in the function. 2253 if (BVCalleeSaved[Reg] || 2254 (!PPC::F8RCRegClass.contains(Reg) && 2255 !PPC::VFRCRegClass.contains(Reg)) || 2256 (MF.getRegInfo().isPhysRegUsed(Reg))) 2257 BVAllocatable.reset(Reg); 2258 } 2259 2260 bool AllSpilledToReg = true; 2261 for (auto &CS : CSI) { 2262 if (BVAllocatable.none()) 2263 return false; 2264 2265 unsigned Reg = CS.getReg(); 2266 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2267 AllSpilledToReg = false; 2268 continue; 2269 } 2270 2271 unsigned VolatileVFReg = BVAllocatable.find_first(); 2272 if (VolatileVFReg < BVAllocatable.size()) { 2273 CS.setDstReg(VolatileVFReg); 2274 BVAllocatable.reset(VolatileVFReg); 2275 } else { 2276 AllSpilledToReg = false; 2277 } 2278 } 2279 return AllSpilledToReg; 2280 } 2281 2282 bool PPCFrameLowering::spillCalleeSavedRegisters( 2283 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2284 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2285 2286 MachineFunction *MF = MBB.getParent(); 2287 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2288 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2289 bool MustSaveTOC = FI->mustSaveTOC(); 2290 DebugLoc DL; 2291 bool CRSpilled = false; 2292 MachineInstrBuilder CRMIB; 2293 2294 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2295 unsigned Reg = CSI[i].getReg(); 2296 2297 // CR2 through CR4 are the nonvolatile CR fields. 2298 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2299 2300 // Add the callee-saved register as live-in; it's killed at the spill. 2301 // Do not do this for callee-saved registers that are live-in to the 2302 // function because they will already be marked live-in and this will be 2303 // adding it for a second time. It is an error to add the same register 2304 // to the set more than once. 2305 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2306 bool IsLiveIn = MRI.isLiveIn(Reg); 2307 if (!IsLiveIn) 2308 MBB.addLiveIn(Reg); 2309 2310 if (CRSpilled && IsCRField) { 2311 CRMIB.addReg(Reg, RegState::ImplicitKill); 2312 continue; 2313 } 2314 2315 // The actual spill will happen in the prologue. 2316 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2317 continue; 2318 2319 // Insert the spill to the stack frame. 2320 if (IsCRField) { 2321 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2322 if (!Subtarget.is32BitELFABI()) { 2323 // The actual spill will happen at the start of the prologue. 2324 FuncInfo->addMustSaveCR(Reg); 2325 } else { 2326 CRSpilled = true; 2327 FuncInfo->setSpillsCR(); 2328 2329 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2330 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2331 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2332 .addReg(Reg, RegState::ImplicitKill); 2333 2334 MBB.insert(MI, CRMIB); 2335 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2336 .addReg(PPC::R12, 2337 getKillRegState(true)), 2338 CSI[i].getFrameIdx())); 2339 } 2340 } else { 2341 if (CSI[i].isSpilledToReg()) { 2342 NumPESpillVSR++; 2343 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2344 .addReg(Reg, getKillRegState(true)); 2345 } else { 2346 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2347 // Use !IsLiveIn for the kill flag. 2348 // We do not want to kill registers that are live in this function 2349 // before their use because they will become undefined registers. 2350 // Functions without NoUnwind need to preserve the order of elements in 2351 // saved vector registers. 2352 if (Subtarget.needsSwapsForVSXMemOps() && 2353 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2354 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2355 CSI[i].getFrameIdx(), RC, TRI); 2356 else 2357 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2358 RC, TRI); 2359 } 2360 } 2361 } 2362 return true; 2363 } 2364 2365 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2366 bool CR4Spilled, MachineBasicBlock &MBB, 2367 MachineBasicBlock::iterator MI, 2368 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2369 2370 MachineFunction *MF = MBB.getParent(); 2371 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2372 DebugLoc DL; 2373 unsigned MoveReg = PPC::R12; 2374 2375 // 32-bit: FP-relative 2376 MBB.insert(MI, 2377 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2378 CSI[CSIIndex].getFrameIdx())); 2379 2380 unsigned RestoreOp = PPC::MTOCRF; 2381 if (CR2Spilled) 2382 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2383 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2384 2385 if (CR3Spilled) 2386 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2387 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2388 2389 if (CR4Spilled) 2390 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2391 .addReg(MoveReg, getKillRegState(true))); 2392 } 2393 2394 MachineBasicBlock::iterator PPCFrameLowering:: 2395 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2396 MachineBasicBlock::iterator I) const { 2397 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2398 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2399 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2400 // Add (actually subtract) back the amount the callee popped on return. 2401 if (int CalleeAmt = I->getOperand(1).getImm()) { 2402 bool is64Bit = Subtarget.isPPC64(); 2403 CalleeAmt *= -1; 2404 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2405 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2406 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2407 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2408 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2409 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2410 const DebugLoc &dl = I->getDebugLoc(); 2411 2412 if (isInt<16>(CalleeAmt)) { 2413 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2414 .addReg(StackReg, RegState::Kill) 2415 .addImm(CalleeAmt); 2416 } else { 2417 MachineBasicBlock::iterator MBBI = I; 2418 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2419 .addImm(CalleeAmt >> 16); 2420 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2421 .addReg(TmpReg, RegState::Kill) 2422 .addImm(CalleeAmt & 0xFFFF); 2423 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2424 .addReg(StackReg, RegState::Kill) 2425 .addReg(TmpReg); 2426 } 2427 } 2428 } 2429 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2430 return MBB.erase(I); 2431 } 2432 2433 static bool isCalleeSavedCR(unsigned Reg) { 2434 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2435 } 2436 2437 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2438 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2439 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2440 MachineFunction *MF = MBB.getParent(); 2441 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2442 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2443 bool MustSaveTOC = FI->mustSaveTOC(); 2444 bool CR2Spilled = false; 2445 bool CR3Spilled = false; 2446 bool CR4Spilled = false; 2447 unsigned CSIIndex = 0; 2448 2449 // Initialize insertion-point logic; we will be restoring in reverse 2450 // order of spill. 2451 MachineBasicBlock::iterator I = MI, BeforeI = I; 2452 bool AtStart = I == MBB.begin(); 2453 2454 if (!AtStart) 2455 --BeforeI; 2456 2457 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2458 unsigned Reg = CSI[i].getReg(); 2459 2460 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2461 continue; 2462 2463 // Restore of callee saved condition register field is handled during 2464 // epilogue insertion. 2465 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2466 continue; 2467 2468 if (Reg == PPC::CR2) { 2469 CR2Spilled = true; 2470 // The spill slot is associated only with CR2, which is the 2471 // first nonvolatile spilled. Save it here. 2472 CSIIndex = i; 2473 continue; 2474 } else if (Reg == PPC::CR3) { 2475 CR3Spilled = true; 2476 continue; 2477 } else if (Reg == PPC::CR4) { 2478 CR4Spilled = true; 2479 continue; 2480 } else { 2481 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2482 // least one CR register, restore all spilled CRs together. 2483 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2484 bool is31 = needsFP(*MF); 2485 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2486 CSIIndex); 2487 CR2Spilled = CR3Spilled = CR4Spilled = false; 2488 } 2489 2490 if (CSI[i].isSpilledToReg()) { 2491 DebugLoc DL; 2492 NumPEReloadVSR++; 2493 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2494 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2495 } else { 2496 // Default behavior for non-CR saves. 2497 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2498 2499 // Functions without NoUnwind need to preserve the order of elements in 2500 // saved vector registers. 2501 if (Subtarget.needsSwapsForVSXMemOps() && 2502 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2503 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2504 TRI); 2505 else 2506 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2507 2508 assert(I != MBB.begin() && 2509 "loadRegFromStackSlot didn't insert any code!"); 2510 } 2511 } 2512 2513 // Insert in reverse order. 2514 if (AtStart) 2515 I = MBB.begin(); 2516 else { 2517 I = BeforeI; 2518 ++I; 2519 } 2520 } 2521 2522 // If we haven't yet spilled the CRs, do so now. 2523 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2524 assert(Subtarget.is32BitELFABI() && 2525 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2526 bool is31 = needsFP(*MF); 2527 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2528 } 2529 2530 return true; 2531 } 2532 2533 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2534 return TOCSaveOffset; 2535 } 2536 2537 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2538 return FramePointerSaveOffset; 2539 } 2540 2541 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2542 return BasePointerSaveOffset; 2543 } 2544 2545 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2546 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2547 return false; 2548 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2549 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2550 } 2551