1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "MCTargetDesc/PPCPredicates.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/LivePhysRegs.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineModuleInfo.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/RegisterScavenging.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/Target/TargetOptions.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "framelowering" 34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 36 STATISTIC(NumPrologProbed, "Number of prologues probed"); 37 38 static cl::opt<bool> 39 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 40 cl::desc("Enable spills in prologue to vector registers."), 41 cl::init(false), cl::Hidden); 42 43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 44 if (STI.isAIXABI()) 45 return STI.isPPC64() ? 16 : 8; 46 // SVR4 ABI: 47 return STI.isPPC64() ? 16 : 4; 48 } 49 50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 51 if (STI.isAIXABI()) 52 return STI.isPPC64() ? 40 : 20; 53 return STI.isELFv2ABI() ? 24 : 40; 54 } 55 56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 57 // First slot in the general register save area. 58 return STI.isPPC64() ? -8U : -4U; 59 } 60 61 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 62 if (STI.isAIXABI() || STI.isPPC64()) 63 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 64 65 // 32-bit SVR4 ABI: 66 return 8; 67 } 68 69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 70 // Third slot in the general purpose register save area. 71 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 72 return -12U; 73 74 // Second slot in the general purpose register save area. 75 return STI.isPPC64() ? -16U : -8U; 76 } 77 78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 79 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 80 } 81 82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 83 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 84 STI.getPlatformStackAlignment(), 0), 85 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 86 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 87 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 88 LinkageSize(computeLinkageSize(Subtarget)), 89 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 90 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 91 92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 94 unsigned &NumEntries) const { 95 96 // Floating-point register save area offsets. 97 #define CALLEE_SAVED_FPRS \ 98 {PPC::F31, -8}, \ 99 {PPC::F30, -16}, \ 100 {PPC::F29, -24}, \ 101 {PPC::F28, -32}, \ 102 {PPC::F27, -40}, \ 103 {PPC::F26, -48}, \ 104 {PPC::F25, -56}, \ 105 {PPC::F24, -64}, \ 106 {PPC::F23, -72}, \ 107 {PPC::F22, -80}, \ 108 {PPC::F21, -88}, \ 109 {PPC::F20, -96}, \ 110 {PPC::F19, -104}, \ 111 {PPC::F18, -112}, \ 112 {PPC::F17, -120}, \ 113 {PPC::F16, -128}, \ 114 {PPC::F15, -136}, \ 115 {PPC::F14, -144} 116 117 // 32-bit general purpose register save area offsets shared by ELF and 118 // AIX. AIX has an extra CSR with r13. 119 #define CALLEE_SAVED_GPRS32 \ 120 {PPC::R31, -4}, \ 121 {PPC::R30, -8}, \ 122 {PPC::R29, -12}, \ 123 {PPC::R28, -16}, \ 124 {PPC::R27, -20}, \ 125 {PPC::R26, -24}, \ 126 {PPC::R25, -28}, \ 127 {PPC::R24, -32}, \ 128 {PPC::R23, -36}, \ 129 {PPC::R22, -40}, \ 130 {PPC::R21, -44}, \ 131 {PPC::R20, -48}, \ 132 {PPC::R19, -52}, \ 133 {PPC::R18, -56}, \ 134 {PPC::R17, -60}, \ 135 {PPC::R16, -64}, \ 136 {PPC::R15, -68}, \ 137 {PPC::R14, -72} 138 139 // 64-bit general purpose register save area offsets. 140 #define CALLEE_SAVED_GPRS64 \ 141 {PPC::X31, -8}, \ 142 {PPC::X30, -16}, \ 143 {PPC::X29, -24}, \ 144 {PPC::X28, -32}, \ 145 {PPC::X27, -40}, \ 146 {PPC::X26, -48}, \ 147 {PPC::X25, -56}, \ 148 {PPC::X24, -64}, \ 149 {PPC::X23, -72}, \ 150 {PPC::X22, -80}, \ 151 {PPC::X21, -88}, \ 152 {PPC::X20, -96}, \ 153 {PPC::X19, -104}, \ 154 {PPC::X18, -112}, \ 155 {PPC::X17, -120}, \ 156 {PPC::X16, -128}, \ 157 {PPC::X15, -136}, \ 158 {PPC::X14, -144} 159 160 // Vector register save area offsets. 161 #define CALLEE_SAVED_VRS \ 162 {PPC::V31, -16}, \ 163 {PPC::V30, -32}, \ 164 {PPC::V29, -48}, \ 165 {PPC::V28, -64}, \ 166 {PPC::V27, -80}, \ 167 {PPC::V26, -96}, \ 168 {PPC::V25, -112}, \ 169 {PPC::V24, -128}, \ 170 {PPC::V23, -144}, \ 171 {PPC::V22, -160}, \ 172 {PPC::V21, -176}, \ 173 {PPC::V20, -192} 174 175 // Note that the offsets here overlap, but this is fixed up in 176 // processFunctionBeforeFrameFinalized. 177 178 static const SpillSlot ELFOffsets32[] = { 179 CALLEE_SAVED_FPRS, 180 CALLEE_SAVED_GPRS32, 181 182 // CR save area offset. We map each of the nonvolatile CR fields 183 // to the slot for CR2, which is the first of the nonvolatile CR 184 // fields to be assigned, so that we only allocate one save slot. 185 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 186 {PPC::CR2, -4}, 187 188 // VRSAVE save area offset. 189 {PPC::VRSAVE, -4}, 190 191 CALLEE_SAVED_VRS, 192 193 // SPE register save area (overlaps Vector save area). 194 {PPC::S31, -8}, 195 {PPC::S30, -16}, 196 {PPC::S29, -24}, 197 {PPC::S28, -32}, 198 {PPC::S27, -40}, 199 {PPC::S26, -48}, 200 {PPC::S25, -56}, 201 {PPC::S24, -64}, 202 {PPC::S23, -72}, 203 {PPC::S22, -80}, 204 {PPC::S21, -88}, 205 {PPC::S20, -96}, 206 {PPC::S19, -104}, 207 {PPC::S18, -112}, 208 {PPC::S17, -120}, 209 {PPC::S16, -128}, 210 {PPC::S15, -136}, 211 {PPC::S14, -144}}; 212 213 static const SpillSlot ELFOffsets64[] = { 214 CALLEE_SAVED_FPRS, 215 CALLEE_SAVED_GPRS64, 216 217 // VRSAVE save area offset. 218 {PPC::VRSAVE, -4}, 219 CALLEE_SAVED_VRS 220 }; 221 222 static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, 223 CALLEE_SAVED_GPRS32, 224 // Add AIX's extra CSR. 225 {PPC::R13, -76}, 226 CALLEE_SAVED_VRS}; 227 228 static const SpillSlot AIXOffsets64[] = { 229 CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; 230 231 if (Subtarget.is64BitELFABI()) { 232 NumEntries = std::size(ELFOffsets64); 233 return ELFOffsets64; 234 } 235 236 if (Subtarget.is32BitELFABI()) { 237 NumEntries = std::size(ELFOffsets32); 238 return ELFOffsets32; 239 } 240 241 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 242 243 if (Subtarget.isPPC64()) { 244 NumEntries = std::size(AIXOffsets64); 245 return AIXOffsets64; 246 } 247 248 NumEntries = std::size(AIXOffsets32); 249 return AIXOffsets32; 250 } 251 252 static bool spillsCR(const MachineFunction &MF) { 253 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 254 return FuncInfo->isCRSpilled(); 255 } 256 257 static bool hasSpills(const MachineFunction &MF) { 258 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 259 return FuncInfo->hasSpills(); 260 } 261 262 static bool hasNonRISpills(const MachineFunction &MF) { 263 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 264 return FuncInfo->hasNonRISpills(); 265 } 266 267 /// MustSaveLR - Return true if this function requires that we save the LR 268 /// register onto the stack in the prolog and restore it in the epilog of the 269 /// function. 270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 271 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 272 273 // We need a save/restore of LR if there is any def of LR (which is 274 // defined by calls, including the PIC setup sequence), or if there is 275 // some use of the LR stack slot (e.g. for builtin_return_address). 276 // (LR comes in 32 and 64 bit versions.) 277 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 278 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 279 } 280 281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 282 /// call frame size. Update the MachineFunction object with the stack size. 283 uint64_t 284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 285 bool UseEstimate) const { 286 unsigned NewMaxCallFrameSize = 0; 287 uint64_t FrameSize = determineFrameLayout(MF, UseEstimate, 288 &NewMaxCallFrameSize); 289 MF.getFrameInfo().setStackSize(FrameSize); 290 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 291 return FrameSize; 292 } 293 294 /// determineFrameLayout - Determine the size of the frame and maximum call 295 /// frame size. 296 uint64_t 297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 298 bool UseEstimate, 299 unsigned *NewMaxCallFrameSize) const { 300 const MachineFrameInfo &MFI = MF.getFrameInfo(); 301 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 302 303 // Get the number of bytes to allocate from the FrameInfo 304 uint64_t FrameSize = 305 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 306 307 // Get stack alignments. The frame must be aligned to the greatest of these: 308 Align TargetAlign = getStackAlign(); // alignment required per the ABI 309 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 310 Align Alignment = std::max(TargetAlign, MaxAlign); 311 312 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 313 314 unsigned LR = RegInfo->getRARegister(); 315 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 316 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 317 !MFI.adjustsStack() && // No calls. 318 !MustSaveLR(MF, LR) && // No need to save LR. 319 !FI->mustSaveTOC() && // No need to save TOC. 320 !RegInfo->hasBasePointer(MF) && // No special alignment. 321 !MFI.isFrameAddressTaken(); 322 323 // Note: for PPC32 SVR4ABI, we can still generate stackless 324 // code if all local vars are reg-allocated. 325 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 326 327 // Check whether we can skip adjusting the stack pointer (by using red zone) 328 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 329 // No need for frame 330 return 0; 331 } 332 333 // Get the maximum call frame size of all the calls. 334 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 335 336 // Maximum call frame needs to be at least big enough for linkage area. 337 unsigned minCallFrameSize = getLinkageSize(); 338 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 339 340 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 341 // that allocations will be aligned. 342 if (MFI.hasVarSizedObjects()) 343 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 344 345 // Update the new max call frame size if the caller passes in a valid pointer. 346 if (NewMaxCallFrameSize) 347 *NewMaxCallFrameSize = maxCallFrameSize; 348 349 // Include call frame size in total. 350 FrameSize += maxCallFrameSize; 351 352 // Make sure the frame is aligned. 353 FrameSize = alignTo(FrameSize, Alignment); 354 355 return FrameSize; 356 } 357 358 // hasFP - Return true if the specified function actually has a dedicated frame 359 // pointer register. 360 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 361 const MachineFrameInfo &MFI = MF.getFrameInfo(); 362 // FIXME: This is pretty much broken by design: hasFP() might be called really 363 // early, before the stack layout was calculated and thus hasFP() might return 364 // true or false here depending on the time of call. 365 return (MFI.getStackSize()) && needsFP(MF); 366 } 367 368 // needsFP - Return true if the specified function should have a dedicated frame 369 // pointer register. This is true if the function has variable sized allocas or 370 // if frame pointer elimination is disabled. 371 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 372 const MachineFrameInfo &MFI = MF.getFrameInfo(); 373 374 // Naked functions have no stack frame pushed, so we don't have a frame 375 // pointer. 376 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 377 return false; 378 379 return MF.getTarget().Options.DisableFramePointerElim(MF) || 380 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 381 MF.exposesReturnsTwice() || 382 (MF.getTarget().Options.GuaranteedTailCallOpt && 383 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 384 } 385 386 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 387 // When there is dynamic alloca in this function, we can not use the frame 388 // pointer X31/R31 for the frameaddress lowering. In this case, only X1/R1 389 // always points to the backchain. 390 bool is31 = needsFP(MF) && !MF.getFrameInfo().hasVarSizedObjects(); 391 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 392 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 393 394 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 395 bool HasBP = RegInfo->hasBasePointer(MF); 396 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 397 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 398 399 for (MachineBasicBlock &MBB : MF) 400 for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) { 401 --MBBI; 402 for (MachineOperand &MO : MBBI->operands()) { 403 if (!MO.isReg()) 404 continue; 405 406 switch (MO.getReg()) { 407 case PPC::FP: 408 MO.setReg(FPReg); 409 break; 410 case PPC::FP8: 411 MO.setReg(FP8Reg); 412 break; 413 case PPC::BP: 414 MO.setReg(BPReg); 415 break; 416 case PPC::BP8: 417 MO.setReg(BP8Reg); 418 break; 419 420 } 421 } 422 } 423 } 424 425 /* This function will do the following: 426 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 427 respectively (defaults recommended by the ABI) and return true 428 - If MBB is not an entry block, initialize the register scavenger and look 429 for available registers. 430 - If the defaults (R0/R12) are available, return true 431 - If TwoUniqueRegsRequired is set to true, it looks for two unique 432 registers. Otherwise, look for a single available register. 433 - If the required registers are found, set SR1 and SR2 and return true. 434 - If the required registers are not found, set SR2 or both SR1 and SR2 to 435 PPC::NoRegister and return false. 436 437 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 438 is not set, this function will attempt to find two different registers, but 439 still return true if only one register is available (and set SR1 == SR2). 440 */ 441 bool 442 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 443 bool UseAtEnd, 444 bool TwoUniqueRegsRequired, 445 Register *SR1, 446 Register *SR2) const { 447 RegScavenger RS; 448 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 449 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 450 451 // Set the defaults for the two scratch registers. 452 if (SR1) 453 *SR1 = R0; 454 455 if (SR2) { 456 assert (SR1 && "Asking for the second scratch register but not the first?"); 457 *SR2 = R12; 458 } 459 460 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 461 if ((UseAtEnd && MBB->isReturnBlock()) || 462 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 463 return true; 464 465 if (UseAtEnd) { 466 // The scratch register will be used before the first terminator (or at the 467 // end of the block if there are no terminators). 468 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 469 if (MBBI == MBB->begin()) { 470 RS.enterBasicBlock(*MBB); 471 } else { 472 RS.enterBasicBlockEnd(*MBB); 473 RS.backward(MBBI); 474 } 475 } else { 476 // The scratch register will be used at the start of the block. 477 RS.enterBasicBlock(*MBB); 478 } 479 480 // If the two registers are available, we're all good. 481 // Note that we only return here if both R0 and R12 are available because 482 // although the function may not require two unique registers, it may benefit 483 // from having two so we should try to provide them. 484 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 485 return true; 486 487 // Get the list of callee-saved registers for the target. 488 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 489 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 490 491 // Get all the available registers in the block. 492 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 493 &PPC::GPRCRegClass); 494 495 // We shouldn't use callee-saved registers as scratch registers as they may be 496 // available when looking for a candidate block for shrink wrapping but not 497 // available when the actual prologue/epilogue is being emitted because they 498 // were added as live-in to the prologue block by PrologueEpilogueInserter. 499 for (int i = 0; CSRegs[i]; ++i) 500 BV.reset(CSRegs[i]); 501 502 // Set the first scratch register to the first available one. 503 if (SR1) { 504 int FirstScratchReg = BV.find_first(); 505 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 506 } 507 508 // If there is another one available, set the second scratch register to that. 509 // Otherwise, set it to either PPC::NoRegister if this function requires two 510 // or to whatever SR1 is set to if this function doesn't require two. 511 if (SR2) { 512 int SecondScratchReg = BV.find_next(*SR1); 513 if (SecondScratchReg != -1) 514 *SR2 = SecondScratchReg; 515 else 516 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 517 } 518 519 // Now that we've done our best to provide both registers, double check 520 // whether we were unable to provide enough. 521 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 522 return false; 523 524 return true; 525 } 526 527 // We need a scratch register for spilling LR and for spilling CR. By default, 528 // we use two scratch registers to hide latency. However, if only one scratch 529 // register is available, we can adjust for that by not overlapping the spill 530 // code. However, if we need to realign the stack (i.e. have a base pointer) 531 // and the stack frame is large, we need two scratch registers. 532 // Also, stack probe requires two scratch registers, one for old sp, one for 533 // large frame and large probe size. 534 bool 535 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 536 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 537 MachineFunction &MF = *(MBB->getParent()); 538 bool HasBP = RegInfo->hasBasePointer(MF); 539 unsigned FrameSize = determineFrameLayout(MF); 540 int NegFrameSize = -FrameSize; 541 bool IsLargeFrame = !isInt<16>(NegFrameSize); 542 MachineFrameInfo &MFI = MF.getFrameInfo(); 543 Align MaxAlign = MFI.getMaxAlign(); 544 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 545 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 546 547 return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || 548 TLI.hasInlineStackProbe(MF); 549 } 550 551 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 552 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 553 554 return findScratchRegister(TmpMBB, false, 555 twoUniqueScratchRegsRequired(TmpMBB)); 556 } 557 558 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 559 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 560 561 return findScratchRegister(TmpMBB, true); 562 } 563 564 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 565 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 566 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 567 568 // Abort if there is no register info or function info. 569 if (!RegInfo || !FI) 570 return false; 571 572 // Only move the stack update on ELFv2 ABI and PPC64. 573 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 574 return false; 575 576 // Check the frame size first and return false if it does not fit the 577 // requirements. 578 // We need a non-zero frame size as well as a frame that will fit in the red 579 // zone. This is because by moving the stack pointer update we are now storing 580 // to the red zone until the stack pointer is updated. If we get an interrupt 581 // inside the prologue but before the stack update we now have a number of 582 // stores to the red zone and those stores must all fit. 583 MachineFrameInfo &MFI = MF.getFrameInfo(); 584 unsigned FrameSize = MFI.getStackSize(); 585 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 586 return false; 587 588 // Frame pointers and base pointers complicate matters so don't do anything 589 // if we have them. For example having a frame pointer will sometimes require 590 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 591 // difficult. Similar situation exists with setjmp. 592 if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) 593 return false; 594 595 // Calls to fast_cc functions use different rules for passing parameters on 596 // the stack from the ABI and using PIC base in the function imposes 597 // similar restrictions to using the base pointer. It is not generally safe 598 // to move the stack pointer update in these situations. 599 if (FI->hasFastCall() || FI->usesPICBase()) 600 return false; 601 602 // Finally we can move the stack update if we do not require register 603 // scavenging. Register scavenging can introduce more spills and so 604 // may make the frame size larger than we have computed. 605 return !RegInfo->requiresFrameIndexScavenging(MF); 606 } 607 608 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 609 MachineBasicBlock &MBB) const { 610 MachineBasicBlock::iterator MBBI = MBB.begin(); 611 MachineFrameInfo &MFI = MF.getFrameInfo(); 612 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 613 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 614 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 615 616 const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); 617 DebugLoc dl; 618 // AIX assembler does not support cfi directives. 619 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 620 621 const bool HasFastMFLR = Subtarget.hasFastMFLR(); 622 623 // Get processor type. 624 bool isPPC64 = Subtarget.isPPC64(); 625 // Get the ABI. 626 bool isSVR4ABI = Subtarget.isSVR4ABI(); 627 bool isELFv2ABI = Subtarget.isELFv2ABI(); 628 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); 629 630 // Work out frame sizes. 631 uint64_t FrameSize = determineFrameLayoutAndUpdate(MF); 632 int64_t NegFrameSize = -FrameSize; 633 if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize))) 634 llvm_unreachable("Unhandled stack size!"); 635 636 if (MFI.isFrameAddressTaken()) 637 replaceFPWithRealFP(MF); 638 639 // Check if the link register (LR) must be saved. 640 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 641 bool MustSaveLR = FI->mustSaveLR(); 642 bool MustSaveTOC = FI->mustSaveTOC(); 643 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 644 bool MustSaveCR = !MustSaveCRs.empty(); 645 // Do we have a frame pointer and/or base pointer for this function? 646 bool HasFP = hasFP(MF); 647 bool HasBP = RegInfo->hasBasePointer(MF); 648 bool HasRedZone = isPPC64 || !isSVR4ABI; 649 bool HasROPProtect = Subtarget.hasROPProtect(); 650 bool HasPrivileged = Subtarget.hasPrivileged(); 651 652 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 653 Register BPReg = RegInfo->getBaseRegister(MF); 654 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 655 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 656 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 657 Register ScratchReg; 658 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 659 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 660 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 661 : PPC::MFLR ); 662 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 663 : PPC::STW ); 664 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 665 : PPC::STWU ); 666 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 667 : PPC::STWUX); 668 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 669 : PPC::OR ); 670 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 671 : PPC::SUBFC); 672 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 673 : PPC::SUBFIC); 674 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 675 : PPC::MFCR); 676 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 677 const MCInstrDesc &HashST = 678 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8) 679 : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST)); 680 681 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 682 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 683 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 684 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 685 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 686 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 687 688 // Using the same bool variable as below to suppress compiler warnings. 689 bool SingleScratchReg = findScratchRegister( 690 &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); 691 assert(SingleScratchReg && 692 "Required number of registers not available in this block"); 693 694 SingleScratchReg = ScratchReg == TempReg; 695 696 int64_t LROffset = getReturnSaveOffset(); 697 698 int64_t FPOffset = 0; 699 if (HasFP) { 700 MachineFrameInfo &MFI = MF.getFrameInfo(); 701 int FPIndex = FI->getFramePointerSaveIndex(); 702 assert(FPIndex && "No Frame Pointer Save Slot!"); 703 FPOffset = MFI.getObjectOffset(FPIndex); 704 } 705 706 int64_t BPOffset = 0; 707 if (HasBP) { 708 MachineFrameInfo &MFI = MF.getFrameInfo(); 709 int BPIndex = FI->getBasePointerSaveIndex(); 710 assert(BPIndex && "No Base Pointer Save Slot!"); 711 BPOffset = MFI.getObjectOffset(BPIndex); 712 } 713 714 int64_t PBPOffset = 0; 715 if (FI->usesPICBase()) { 716 MachineFrameInfo &MFI = MF.getFrameInfo(); 717 int PBPIndex = FI->getPICBasePointerSaveIndex(); 718 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 719 PBPOffset = MFI.getObjectOffset(PBPIndex); 720 } 721 722 // Get stack alignments. 723 Align MaxAlign = MFI.getMaxAlign(); 724 if (HasBP && MaxAlign > 1) 725 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 726 727 // Frames of 32KB & larger require special handling because they cannot be 728 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 729 bool isLargeFrame = !isInt<16>(NegFrameSize); 730 731 // Check if we can move the stack update instruction (stdu) down the prologue 732 // past the callee saves. Hopefully this will avoid the situation where the 733 // saves are waiting for the update on the store with update to complete. 734 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 735 bool MovingStackUpdateDown = false; 736 737 // Check if we can move the stack update. 738 if (stackUpdateCanBeMoved(MF)) { 739 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 740 for (CalleeSavedInfo CSI : Info) { 741 // If the callee saved register is spilled to a register instead of the 742 // stack then the spill no longer uses the stack pointer. 743 // This can lead to two consequences: 744 // 1) We no longer need to update the stack because the function does not 745 // spill any callee saved registers to stack. 746 // 2) We have a situation where we still have to update the stack pointer 747 // even though some registers are spilled to other registers. In 748 // this case the current code moves the stack update to an incorrect 749 // position. 750 // In either case we should abort moving the stack update operation. 751 if (CSI.isSpilledToReg()) { 752 StackUpdateLoc = MBBI; 753 MovingStackUpdateDown = false; 754 break; 755 } 756 757 int FrIdx = CSI.getFrameIdx(); 758 // If the frame index is not negative the callee saved info belongs to a 759 // stack object that is not a fixed stack object. We ignore non-fixed 760 // stack objects because we won't move the stack update pointer past them. 761 if (FrIdx >= 0) 762 continue; 763 764 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 765 StackUpdateLoc++; 766 MovingStackUpdateDown = true; 767 } else { 768 // We need all of the Frame Indices to meet these conditions. 769 // If they do not, abort the whole operation. 770 StackUpdateLoc = MBBI; 771 MovingStackUpdateDown = false; 772 break; 773 } 774 } 775 776 // If the operation was not aborted then update the object offset. 777 if (MovingStackUpdateDown) { 778 for (CalleeSavedInfo CSI : Info) { 779 int FrIdx = CSI.getFrameIdx(); 780 if (FrIdx < 0) 781 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 782 } 783 } 784 } 785 786 // Where in the prologue we move the CR fields depends on how many scratch 787 // registers we have, and if we need to save the link register or not. This 788 // lambda is to avoid duplicating the logic in 2 places. 789 auto BuildMoveFromCR = [&]() { 790 if (isELFv2ABI && MustSaveCRs.size() == 1) { 791 // In the ELFv2 ABI, we are not required to save all CR fields. 792 // If only one CR field is clobbered, it is more efficient to use 793 // mfocrf to selectively save just that field, because mfocrf has short 794 // latency compares to mfcr. 795 assert(isPPC64 && "V2 ABI is 64-bit only."); 796 MachineInstrBuilder MIB = 797 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 798 MIB.addReg(MustSaveCRs[0], RegState::Kill); 799 } else { 800 MachineInstrBuilder MIB = 801 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 802 for (unsigned CRfield : MustSaveCRs) 803 MIB.addReg(CRfield, RegState::ImplicitKill); 804 } 805 }; 806 807 // If we need to spill the CR and the LR but we don't have two separate 808 // registers available, we must spill them one at a time 809 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 810 BuildMoveFromCR(); 811 BuildMI(MBB, MBBI, dl, StoreWordInst) 812 .addReg(TempReg, getKillRegState(true)) 813 .addImm(CRSaveOffset) 814 .addReg(SPReg); 815 } 816 817 if (MustSaveLR) 818 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 819 820 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 821 BuildMoveFromCR(); 822 823 if (HasRedZone) { 824 if (HasFP) 825 BuildMI(MBB, MBBI, dl, StoreInst) 826 .addReg(FPReg) 827 .addImm(FPOffset) 828 .addReg(SPReg); 829 if (FI->usesPICBase()) 830 BuildMI(MBB, MBBI, dl, StoreInst) 831 .addReg(PPC::R30) 832 .addImm(PBPOffset) 833 .addReg(SPReg); 834 if (HasBP) 835 BuildMI(MBB, MBBI, dl, StoreInst) 836 .addReg(BPReg) 837 .addImm(BPOffset) 838 .addReg(SPReg); 839 } 840 841 // Generate the instruction to store the LR. In the case where ROP protection 842 // is required the register holding the LR should not be killed as it will be 843 // used by the hash store instruction. 844 auto SaveLR = [&](int64_t Offset) { 845 assert(MustSaveLR && "LR is not required to be saved!"); 846 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 847 .addReg(ScratchReg, getKillRegState(!HasROPProtect)) 848 .addImm(Offset) 849 .addReg(SPReg); 850 851 // Add the ROP protection Hash Store instruction. 852 // NOTE: This is technically a violation of the ABI. The hash can be saved 853 // up to 512 bytes into the Protected Zone. This can be outside of the 854 // initial 288 byte volatile program storage region in the Protected Zone. 855 // However, this restriction will be removed in an upcoming revision of the 856 // ABI. 857 if (HasROPProtect) { 858 const int SaveIndex = FI->getROPProtectionHashSaveIndex(); 859 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex); 860 assert((ImmOffset <= -8 && ImmOffset >= -512) && 861 "ROP hash save offset out of range."); 862 assert(((ImmOffset & 0x7) == 0) && 863 "ROP hash save offset must be 8 byte aligned."); 864 BuildMI(MBB, StackUpdateLoc, dl, HashST) 865 .addReg(ScratchReg, getKillRegState(true)) 866 .addImm(ImmOffset) 867 .addReg(SPReg); 868 } 869 }; 870 871 if (MustSaveLR && HasFastMFLR) 872 SaveLR(LROffset); 873 874 if (MustSaveCR && 875 !(SingleScratchReg && MustSaveLR)) { 876 assert(HasRedZone && "A red zone is always available on PPC64"); 877 BuildMI(MBB, MBBI, dl, StoreWordInst) 878 .addReg(TempReg, getKillRegState(true)) 879 .addImm(CRSaveOffset) 880 .addReg(SPReg); 881 } 882 883 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 884 if (!FrameSize) { 885 if (MustSaveLR && !HasFastMFLR) 886 SaveLR(LROffset); 887 return; 888 } 889 890 // Adjust stack pointer: r1 += NegFrameSize. 891 // If there is a preferred stack alignment, align R1 now 892 893 if (HasBP && HasRedZone) { 894 // Save a copy of r1 as the base pointer. 895 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 896 .addReg(SPReg) 897 .addReg(SPReg); 898 } 899 900 // Have we generated a STUX instruction to claim stack frame? If so, 901 // the negated frame size will be placed in ScratchReg. 902 bool HasSTUX = 903 (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) || 904 (HasBP && MaxAlign > 1) || isLargeFrame; 905 906 // If we use STUX to update the stack pointer, we need the two scratch 907 // registers TempReg and ScratchReg, we have to save LR here which is stored 908 // in ScratchReg. 909 // If the offset can not be encoded into the store instruction, we also have 910 // to save LR here. 911 if (MustSaveLR && !HasFastMFLR && 912 (HasSTUX || !isInt<16>(FrameSize + LROffset))) 913 SaveLR(LROffset); 914 915 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 916 // pointer is always stored at SP, we will get a free probe due to an essential 917 // STU(X) instruction. 918 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 919 // To be consistent with other targets, a pseudo instruction is emitted and 920 // will be later expanded in `inlineStackProbe`. 921 BuildMI(MBB, MBBI, dl, 922 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 923 : PPC::PROBED_STACKALLOC_32)) 924 .addDef(TempReg) 925 .addDef(ScratchReg) // ScratchReg stores the old sp. 926 .addImm(NegFrameSize); 927 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 928 // update the ScratchReg to meet the assumption that ScratchReg contains 929 // the NegFrameSize. This solution is rather tricky. 930 if (!HasRedZone) { 931 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 932 .addReg(ScratchReg) 933 .addReg(SPReg); 934 } 935 } else { 936 // This condition must be kept in sync with canUseAsPrologue. 937 if (HasBP && MaxAlign > 1) { 938 if (isPPC64) 939 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 940 .addReg(SPReg) 941 .addImm(0) 942 .addImm(64 - Log2(MaxAlign)); 943 else // PPC32... 944 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 945 .addReg(SPReg) 946 .addImm(0) 947 .addImm(32 - Log2(MaxAlign)) 948 .addImm(31); 949 if (!isLargeFrame) { 950 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 951 .addReg(ScratchReg, RegState::Kill) 952 .addImm(NegFrameSize); 953 } else { 954 assert(!SingleScratchReg && "Only a single scratch reg available"); 955 TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize); 956 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 957 .addReg(ScratchReg, RegState::Kill) 958 .addReg(TempReg, RegState::Kill); 959 } 960 961 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 962 .addReg(SPReg, RegState::Kill) 963 .addReg(SPReg) 964 .addReg(ScratchReg); 965 } else if (!isLargeFrame) { 966 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 967 .addReg(SPReg) 968 .addImm(NegFrameSize) 969 .addReg(SPReg); 970 } else { 971 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize); 972 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 973 .addReg(SPReg, RegState::Kill) 974 .addReg(SPReg) 975 .addReg(ScratchReg); 976 } 977 } 978 979 // Save the TOC register after the stack pointer update if a prologue TOC 980 // save is required for the function. 981 if (MustSaveTOC) { 982 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 983 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 984 .addReg(TOCReg, getKillRegState(true)) 985 .addImm(TOCSaveOffset) 986 .addReg(SPReg); 987 } 988 989 if (!HasRedZone) { 990 assert(!isPPC64 && "A red zone is always available on PPC64"); 991 if (HasSTUX) { 992 // The negated frame size is in ScratchReg, and the SPReg has been 993 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 994 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 995 // the stack frame (i.e. the old SP), ideally, we would put the old 996 // SP into a register and use it as the base for the stores. The 997 // problem is that the only available register may be ScratchReg, 998 // which could be R0, and R0 cannot be used as a base address. 999 1000 // First, set ScratchReg to the old SP. This may need to be modified 1001 // later. 1002 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1003 .addReg(ScratchReg, RegState::Kill) 1004 .addReg(SPReg); 1005 1006 if (ScratchReg == PPC::R0) { 1007 // R0 cannot be used as a base register, but it can be used as an 1008 // index in a store-indexed. 1009 int LastOffset = 0; 1010 if (HasFP) { 1011 // R0 += (FPOffset-LastOffset). 1012 // Need addic, since addi treats R0 as 0. 1013 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1014 .addReg(ScratchReg) 1015 .addImm(FPOffset-LastOffset); 1016 LastOffset = FPOffset; 1017 // Store FP into *R0. 1018 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1019 .addReg(FPReg, RegState::Kill) // Save FP. 1020 .addReg(PPC::ZERO) 1021 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1022 } 1023 if (FI->usesPICBase()) { 1024 // R0 += (PBPOffset-LastOffset). 1025 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1026 .addReg(ScratchReg) 1027 .addImm(PBPOffset-LastOffset); 1028 LastOffset = PBPOffset; 1029 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1030 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1031 .addReg(PPC::ZERO) 1032 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1033 } 1034 if (HasBP) { 1035 // R0 += (BPOffset-LastOffset). 1036 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1037 .addReg(ScratchReg) 1038 .addImm(BPOffset-LastOffset); 1039 LastOffset = BPOffset; 1040 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1041 .addReg(BPReg, RegState::Kill) // Save BP. 1042 .addReg(PPC::ZERO) 1043 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1044 // BP = R0-LastOffset 1045 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1046 .addReg(ScratchReg, RegState::Kill) 1047 .addImm(-LastOffset); 1048 } 1049 } else { 1050 // ScratchReg is not R0, so use it as the base register. It is 1051 // already set to the old SP, so we can use the offsets directly. 1052 1053 // Now that the stack frame has been allocated, save all the necessary 1054 // registers using ScratchReg as the base address. 1055 if (HasFP) 1056 BuildMI(MBB, MBBI, dl, StoreInst) 1057 .addReg(FPReg) 1058 .addImm(FPOffset) 1059 .addReg(ScratchReg); 1060 if (FI->usesPICBase()) 1061 BuildMI(MBB, MBBI, dl, StoreInst) 1062 .addReg(PPC::R30) 1063 .addImm(PBPOffset) 1064 .addReg(ScratchReg); 1065 if (HasBP) { 1066 BuildMI(MBB, MBBI, dl, StoreInst) 1067 .addReg(BPReg) 1068 .addImm(BPOffset) 1069 .addReg(ScratchReg); 1070 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1071 .addReg(ScratchReg, RegState::Kill) 1072 .addReg(ScratchReg); 1073 } 1074 } 1075 } else { 1076 // The frame size is a known 16-bit constant (fitting in the immediate 1077 // field of STWU). To be here we have to be compiling for PPC32. 1078 // Since the SPReg has been decreased by FrameSize, add it back to each 1079 // offset. 1080 if (HasFP) 1081 BuildMI(MBB, MBBI, dl, StoreInst) 1082 .addReg(FPReg) 1083 .addImm(FrameSize + FPOffset) 1084 .addReg(SPReg); 1085 if (FI->usesPICBase()) 1086 BuildMI(MBB, MBBI, dl, StoreInst) 1087 .addReg(PPC::R30) 1088 .addImm(FrameSize + PBPOffset) 1089 .addReg(SPReg); 1090 if (HasBP) { 1091 BuildMI(MBB, MBBI, dl, StoreInst) 1092 .addReg(BPReg) 1093 .addImm(FrameSize + BPOffset) 1094 .addReg(SPReg); 1095 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1096 .addReg(SPReg) 1097 .addImm(FrameSize); 1098 } 1099 } 1100 } 1101 1102 // Save the LR now. 1103 if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset)) 1104 SaveLR(LROffset + FrameSize); 1105 1106 // Add Call Frame Information for the instructions we generated above. 1107 if (needsCFI) { 1108 unsigned CFIIndex; 1109 1110 if (HasBP) { 1111 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1112 // because if the stack needed aligning then CFA won't be at a fixed 1113 // offset from FP/SP. 1114 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1115 CFIIndex = MF.addFrameInst( 1116 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1117 } else { 1118 // Adjust the definition of CFA to account for the change in SP. 1119 assert(NegFrameSize); 1120 CFIIndex = MF.addFrameInst( 1121 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1122 } 1123 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1124 .addCFIIndex(CFIIndex); 1125 1126 if (HasFP) { 1127 // Describe where FP was saved, at a fixed offset from CFA. 1128 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1129 CFIIndex = MF.addFrameInst( 1130 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1131 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1132 .addCFIIndex(CFIIndex); 1133 } 1134 1135 if (FI->usesPICBase()) { 1136 // Describe where FP was saved, at a fixed offset from CFA. 1137 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1138 CFIIndex = MF.addFrameInst( 1139 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1140 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1141 .addCFIIndex(CFIIndex); 1142 } 1143 1144 if (HasBP) { 1145 // Describe where BP was saved, at a fixed offset from CFA. 1146 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1147 CFIIndex = MF.addFrameInst( 1148 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1149 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1150 .addCFIIndex(CFIIndex); 1151 } 1152 1153 if (MustSaveLR) { 1154 // Describe where LR was saved, at a fixed offset from CFA. 1155 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1156 CFIIndex = MF.addFrameInst( 1157 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1158 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1159 .addCFIIndex(CFIIndex); 1160 } 1161 } 1162 1163 // If there is a frame pointer, copy R1 into R31 1164 if (HasFP) { 1165 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1166 .addReg(SPReg) 1167 .addReg(SPReg); 1168 1169 if (!HasBP && needsCFI) { 1170 // Change the definition of CFA from SP+offset to FP+offset, because SP 1171 // will change at every alloca. 1172 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1173 unsigned CFIIndex = MF.addFrameInst( 1174 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1175 1176 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1177 .addCFIIndex(CFIIndex); 1178 } 1179 } 1180 1181 if (needsCFI) { 1182 // Describe where callee saved registers were saved, at fixed offsets from 1183 // CFA. 1184 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1185 for (const CalleeSavedInfo &I : CSI) { 1186 Register Reg = I.getReg(); 1187 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1188 1189 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1190 // subregisters of CR2. We just need to emit a move of CR2. 1191 if (PPC::CRBITRCRegClass.contains(Reg)) 1192 continue; 1193 1194 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1195 continue; 1196 1197 // For 64-bit SVR4 when we have spilled CRs, the spill location 1198 // is SP+8, not a frame-relative slot. 1199 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1200 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1201 // the whole CR word. In the ELFv2 ABI, every CR that was 1202 // actually saved gets its own CFI record. 1203 Register CRReg = isELFv2ABI? Reg : PPC::CR2; 1204 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1205 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1206 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1207 .addCFIIndex(CFIIndex); 1208 continue; 1209 } 1210 1211 if (I.isSpilledToReg()) { 1212 unsigned SpilledReg = I.getDstReg(); 1213 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1214 nullptr, MRI->getDwarfRegNum(Reg, true), 1215 MRI->getDwarfRegNum(SpilledReg, true))); 1216 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1217 .addCFIIndex(CFIRegister); 1218 } else { 1219 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()); 1220 // We have changed the object offset above but we do not want to change 1221 // the actual offsets in the CFI instruction so we have to undo the 1222 // offset change here. 1223 if (MovingStackUpdateDown) 1224 Offset -= NegFrameSize; 1225 1226 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1227 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1228 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1229 .addCFIIndex(CFIIndex); 1230 } 1231 } 1232 } 1233 } 1234 1235 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1236 MachineBasicBlock &PrologMBB) const { 1237 bool isPPC64 = Subtarget.isPPC64(); 1238 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1239 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1240 MachineFrameInfo &MFI = MF.getFrameInfo(); 1241 const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); 1242 // AIX assembler does not support cfi directives. 1243 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1244 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1245 int Opc = MI.getOpcode(); 1246 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1247 }); 1248 if (StackAllocMIPos == PrologMBB.end()) 1249 return; 1250 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1251 MachineBasicBlock *CurrentMBB = &PrologMBB; 1252 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1253 MachineInstr &MI = *StackAllocMIPos; 1254 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1255 unsigned ProbeSize = TLI.getStackProbeSize(MF); 1256 int64_t NegProbeSize = -(int64_t)ProbeSize; 1257 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1258 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1259 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1260 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1261 Register ScratchReg = MI.getOperand(0).getReg(); 1262 Register FPReg = MI.getOperand(1).getReg(); 1263 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1264 bool HasBP = RegInfo->hasBasePointer(MF); 1265 Register BPReg = RegInfo->getBaseRegister(MF); 1266 Align MaxAlign = MFI.getMaxAlign(); 1267 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1268 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1269 // Subroutines to generate .cfi_* directives. 1270 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1271 MachineBasicBlock::iterator MBBI, Register Reg) { 1272 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1273 unsigned CFIIndex = MF.addFrameInst( 1274 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1275 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1276 .addCFIIndex(CFIIndex); 1277 }; 1278 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1279 MachineBasicBlock::iterator MBBI, Register Reg, 1280 int Offset) { 1281 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1282 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1283 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1284 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1285 .addCFIIndex(CFIIndex); 1286 }; 1287 // Subroutine to determine if we can use the Imm as part of d-form. 1288 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1289 // Subroutine to materialize the Imm into TempReg. 1290 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1291 MachineBasicBlock::iterator MBBI, int64_t Imm, 1292 Register &TempReg) { 1293 assert(isInt<32>(Imm) && "Unhandled imm"); 1294 if (isInt<16>(Imm)) 1295 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1296 .addImm(Imm); 1297 else { 1298 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1299 .addImm(Imm >> 16); 1300 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1301 .addReg(TempReg) 1302 .addImm(Imm & 0xFFFF); 1303 } 1304 }; 1305 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1306 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1307 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1308 Register NegSizeReg, bool UseDForm, 1309 Register StoreReg) { 1310 if (UseDForm) 1311 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1312 .addReg(StoreReg) 1313 .addImm(NegSize) 1314 .addReg(SPReg); 1315 else 1316 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1317 .addReg(StoreReg) 1318 .addReg(SPReg) 1319 .addReg(NegSizeReg); 1320 }; 1321 // Used to probe stack when realignment is required. 1322 // Note that, according to ABI's requirement, *sp must always equals the 1323 // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. 1324 // Following is pseudo code: 1325 // final_sp = (sp & align) + negframesize; 1326 // neg_gap = final_sp - sp; 1327 // while (neg_gap < negprobesize) { 1328 // stdu fp, negprobesize(sp); 1329 // neg_gap -= negprobesize; 1330 // } 1331 // stdux fp, sp, neg_gap 1332 // 1333 // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg 1334 // before probe code, we don't need to save it, so we get one additional reg 1335 // that can be used to materialize the probeside if needed to use xform. 1336 // Otherwise, we can NOT materialize probeside, so we can only use Dform for 1337 // now. 1338 // 1339 // The allocations are: 1340 // if (HasBP && HasRedzone) { 1341 // r0: materialize the probesize if needed so that we can use xform. 1342 // r12: `neg_gap` 1343 // } else { 1344 // r0: back-chain pointer 1345 // r12: `neg_gap`. 1346 // } 1347 auto probeRealignedStack = [&](MachineBasicBlock &MBB, 1348 MachineBasicBlock::iterator MBBI, 1349 Register ScratchReg, Register TempReg) { 1350 assert(HasBP && "The function is supposed to have base pointer when its " 1351 "stack is realigned."); 1352 assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); 1353 1354 // FIXME: We can eliminate this limitation if we get more infomation about 1355 // which part of redzone are already used. Used redzone can be treated 1356 // probed. But there might be `holes' in redzone probed, this could 1357 // complicate the implementation. 1358 assert(ProbeSize >= Subtarget.getRedZoneSize() && 1359 "Probe size should be larger or equal to the size of red-zone so " 1360 "that red-zone is not clobbered by probing."); 1361 1362 Register &FinalStackPtr = TempReg; 1363 // FIXME: We only support NegProbeSize materializable by DForm currently. 1364 // When HasBP && HasRedzone, we can use xform if we have an additional idle 1365 // register. 1366 NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); 1367 assert(isInt<16>(NegProbeSize) && 1368 "NegProbeSize should be materializable by DForm"); 1369 Register CRReg = PPC::CR0; 1370 // Layout of output assembly kinda like: 1371 // bb.0: 1372 // ... 1373 // sub $scratchreg, $finalsp, r1 1374 // cmpdi $scratchreg, <negprobesize> 1375 // bge bb.2 1376 // bb.1: 1377 // stdu <backchain>, <negprobesize>(r1) 1378 // sub $scratchreg, $scratchreg, negprobesize 1379 // cmpdi $scratchreg, <negprobesize> 1380 // blt bb.1 1381 // bb.2: 1382 // stdux <backchain>, r1, $scratchreg 1383 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); 1384 MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); 1385 MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); 1386 MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1387 MF.insert(MBBInsertPoint, ProbeExitMBB); 1388 // bb.2 1389 { 1390 Register BackChainPointer = HasRedZone ? BPReg : TempReg; 1391 allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, 1392 BackChainPointer); 1393 if (HasRedZone) 1394 // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg 1395 // to TempReg to satisfy it. 1396 BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) 1397 .addReg(BPReg) 1398 .addReg(BPReg); 1399 ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); 1400 ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 1401 } 1402 // bb.0 1403 { 1404 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) 1405 .addReg(SPReg) 1406 .addReg(FinalStackPtr); 1407 if (!HasRedZone) 1408 BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); 1409 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) 1410 .addReg(ScratchReg) 1411 .addImm(NegProbeSize); 1412 BuildMI(&MBB, DL, TII.get(PPC::BCC)) 1413 .addImm(PPC::PRED_GE) 1414 .addReg(CRReg) 1415 .addMBB(ProbeExitMBB); 1416 MBB.addSuccessor(ProbeLoopBodyMBB); 1417 MBB.addSuccessor(ProbeExitMBB); 1418 } 1419 // bb.1 1420 { 1421 Register BackChainPointer = HasRedZone ? BPReg : TempReg; 1422 allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, 1423 0, true /*UseDForm*/, BackChainPointer); 1424 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), 1425 ScratchReg) 1426 .addReg(ScratchReg) 1427 .addImm(-NegProbeSize); 1428 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), 1429 CRReg) 1430 .addReg(ScratchReg) 1431 .addImm(NegProbeSize); 1432 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) 1433 .addImm(PPC::PRED_LT) 1434 .addReg(CRReg) 1435 .addMBB(ProbeLoopBodyMBB); 1436 ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); 1437 ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); 1438 } 1439 // Update liveins. 1440 fullyRecomputeLiveIns({ProbeExitMBB, ProbeLoopBodyMBB}); 1441 return ProbeExitMBB; 1442 }; 1443 // For case HasBP && MaxAlign > 1, we have to realign the SP by performing 1444 // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since 1445 // the offset subtracted from SP is determined by SP's runtime value. 1446 if (HasBP && MaxAlign > 1) { 1447 // Calculate final stack pointer. 1448 if (isPPC64) 1449 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) 1450 .addReg(SPReg) 1451 .addImm(0) 1452 .addImm(64 - Log2(MaxAlign)); 1453 else 1454 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1455 .addReg(SPReg) 1456 .addImm(0) 1457 .addImm(32 - Log2(MaxAlign)) 1458 .addImm(31); 1459 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), 1460 FPReg) 1461 .addReg(ScratchReg) 1462 .addReg(SPReg); 1463 MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); 1464 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), 1465 FPReg) 1466 .addReg(ScratchReg) 1467 .addReg(FPReg); 1468 CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); 1469 if (needsCFI) 1470 buildDefCFAReg(*CurrentMBB, {MI}, FPReg); 1471 } else { 1472 // Initialize current frame pointer. 1473 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1474 // Use FPReg to calculate CFA. 1475 if (needsCFI) 1476 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); 1477 // Probe residual part. 1478 if (NegResidualSize) { 1479 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1480 if (!ResidualUseDForm) 1481 MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); 1482 allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, 1483 ResidualUseDForm, FPReg); 1484 } 1485 bool UseDForm = CanUseDForm(NegProbeSize); 1486 // If number of blocks is small, just probe them directly. 1487 if (NumBlocks < 3) { 1488 if (!UseDForm) 1489 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1490 for (int i = 0; i < NumBlocks; ++i) 1491 allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, 1492 FPReg); 1493 if (needsCFI) { 1494 // Restore using SPReg to calculate CFA. 1495 buildDefCFAReg(*CurrentMBB, {MI}, SPReg); 1496 } 1497 } else { 1498 // Since CTR is a volatile register and current shrinkwrap implementation 1499 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1500 // CTR loop to probe. 1501 // Calculate trip count and stores it in CTRReg. 1502 MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); 1503 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1504 .addReg(ScratchReg, RegState::Kill); 1505 if (!UseDForm) 1506 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1507 // Create MBBs of the loop. 1508 MachineFunction::iterator MBBInsertPoint = 1509 std::next(CurrentMBB->getIterator()); 1510 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1511 MF.insert(MBBInsertPoint, LoopMBB); 1512 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1513 MF.insert(MBBInsertPoint, ExitMBB); 1514 // Synthesize the loop body. 1515 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1516 UseDForm, FPReg); 1517 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1518 .addMBB(LoopMBB); 1519 LoopMBB->addSuccessor(ExitMBB); 1520 LoopMBB->addSuccessor(LoopMBB); 1521 // Synthesize the exit MBB. 1522 ExitMBB->splice(ExitMBB->end(), CurrentMBB, 1523 std::next(MachineBasicBlock::iterator(MI)), 1524 CurrentMBB->end()); 1525 ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); 1526 CurrentMBB->addSuccessor(LoopMBB); 1527 if (needsCFI) { 1528 // Restore using SPReg to calculate CFA. 1529 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1530 } 1531 // Update liveins. 1532 fullyRecomputeLiveIns({ExitMBB, LoopMBB}); 1533 } 1534 } 1535 ++NumPrologProbed; 1536 MI.eraseFromParent(); 1537 } 1538 1539 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1540 MachineBasicBlock &MBB) const { 1541 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1542 DebugLoc dl; 1543 1544 if (MBBI != MBB.end()) 1545 dl = MBBI->getDebugLoc(); 1546 1547 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1548 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1549 1550 // Get alignment info so we know how to restore the SP. 1551 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1552 1553 // Get the number of bytes allocated from the FrameInfo. 1554 int64_t FrameSize = MFI.getStackSize(); 1555 1556 // Get processor type. 1557 bool isPPC64 = Subtarget.isPPC64(); 1558 1559 // Check if the link register (LR) has been saved. 1560 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1561 bool MustSaveLR = FI->mustSaveLR(); 1562 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1563 bool MustSaveCR = !MustSaveCRs.empty(); 1564 // Do we have a frame pointer and/or base pointer for this function? 1565 bool HasFP = hasFP(MF); 1566 bool HasBP = RegInfo->hasBasePointer(MF); 1567 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1568 bool HasROPProtect = Subtarget.hasROPProtect(); 1569 bool HasPrivileged = Subtarget.hasPrivileged(); 1570 1571 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1572 Register BPReg = RegInfo->getBaseRegister(MF); 1573 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1574 Register ScratchReg; 1575 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1576 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1577 : PPC::MTLR ); 1578 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1579 : PPC::LWZ ); 1580 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1581 : PPC::LIS ); 1582 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1583 : PPC::OR ); 1584 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1585 : PPC::ORI ); 1586 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1587 : PPC::ADDI ); 1588 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1589 : PPC::ADD4 ); 1590 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1591 : PPC::LWZ); 1592 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1593 : PPC::MTOCRF); 1594 const MCInstrDesc &HashChk = 1595 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8) 1596 : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK)); 1597 int64_t LROffset = getReturnSaveOffset(); 1598 1599 int64_t FPOffset = 0; 1600 1601 // Using the same bool variable as below to suppress compiler warnings. 1602 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1603 &TempReg); 1604 assert(SingleScratchReg && 1605 "Could not find an available scratch register"); 1606 1607 SingleScratchReg = ScratchReg == TempReg; 1608 1609 if (HasFP) { 1610 int FPIndex = FI->getFramePointerSaveIndex(); 1611 assert(FPIndex && "No Frame Pointer Save Slot!"); 1612 FPOffset = MFI.getObjectOffset(FPIndex); 1613 } 1614 1615 int64_t BPOffset = 0; 1616 if (HasBP) { 1617 int BPIndex = FI->getBasePointerSaveIndex(); 1618 assert(BPIndex && "No Base Pointer Save Slot!"); 1619 BPOffset = MFI.getObjectOffset(BPIndex); 1620 } 1621 1622 int64_t PBPOffset = 0; 1623 if (FI->usesPICBase()) { 1624 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1625 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1626 PBPOffset = MFI.getObjectOffset(PBPIndex); 1627 } 1628 1629 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1630 1631 if (IsReturnBlock) { 1632 unsigned RetOpcode = MBBI->getOpcode(); 1633 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1634 RetOpcode == PPC::TCRETURNdi || 1635 RetOpcode == PPC::TCRETURNai || 1636 RetOpcode == PPC::TCRETURNri8 || 1637 RetOpcode == PPC::TCRETURNdi8 || 1638 RetOpcode == PPC::TCRETURNai8; 1639 1640 if (UsesTCRet) { 1641 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1642 MachineOperand &StackAdjust = MBBI->getOperand(1); 1643 assert(StackAdjust.isImm() && "Expecting immediate value."); 1644 // Adjust stack pointer. 1645 int StackAdj = StackAdjust.getImm(); 1646 int Delta = StackAdj - MaxTCRetDelta; 1647 assert((Delta >= 0) && "Delta must be positive"); 1648 if (MaxTCRetDelta>0) 1649 FrameSize += (StackAdj +Delta); 1650 else 1651 FrameSize += StackAdj; 1652 } 1653 } 1654 1655 // Frames of 32KB & larger require special handling because they cannot be 1656 // indexed into with a simple LD/LWZ immediate offset operand. 1657 bool isLargeFrame = !isInt<16>(FrameSize); 1658 1659 // On targets without red zone, the SP needs to be restored last, so that 1660 // all live contents of the stack frame are upwards of the SP. This means 1661 // that we cannot restore SP just now, since there may be more registers 1662 // to restore from the stack frame (e.g. R31). If the frame size is not 1663 // a simple immediate value, we will need a spare register to hold the 1664 // restored SP. If the frame size is known and small, we can simply adjust 1665 // the offsets of the registers to be restored, and still use SP to restore 1666 // them. In such case, the final update of SP will be to add the frame 1667 // size to it. 1668 // To simplify the code, set RBReg to the base register used to restore 1669 // values from the stack, and set SPAdd to the value that needs to be added 1670 // to the SP at the end. The default values are as if red zone was present. 1671 unsigned RBReg = SPReg; 1672 uint64_t SPAdd = 0; 1673 1674 // Check if we can move the stack update instruction up the epilogue 1675 // past the callee saves. This will allow the move to LR instruction 1676 // to be executed before the restores of the callee saves which means 1677 // that the callee saves can hide the latency from the MTLR instrcution. 1678 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1679 if (stackUpdateCanBeMoved(MF)) { 1680 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1681 for (CalleeSavedInfo CSI : Info) { 1682 // If the callee saved register is spilled to another register abort the 1683 // stack update movement. 1684 if (CSI.isSpilledToReg()) { 1685 StackUpdateLoc = MBBI; 1686 break; 1687 } 1688 int FrIdx = CSI.getFrameIdx(); 1689 // If the frame index is not negative the callee saved info belongs to a 1690 // stack object that is not a fixed stack object. We ignore non-fixed 1691 // stack objects because we won't move the update of the stack pointer 1692 // past them. 1693 if (FrIdx >= 0) 1694 continue; 1695 1696 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1697 StackUpdateLoc--; 1698 else { 1699 // Abort the operation as we can't update all CSR restores. 1700 StackUpdateLoc = MBBI; 1701 break; 1702 } 1703 } 1704 } 1705 1706 if (FrameSize) { 1707 // In the prologue, the loaded (or persistent) stack pointer value is 1708 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1709 // zone add this offset back now. 1710 1711 // If the function has a base pointer, the stack pointer has been copied 1712 // to it so we can restore it by copying in the other direction. 1713 if (HasRedZone && HasBP) { 1714 BuildMI(MBB, MBBI, dl, OrInst, RBReg). 1715 addReg(BPReg). 1716 addReg(BPReg); 1717 } 1718 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1719 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1720 // call which invalidates the stack pointer value in SP(0). So we use the 1721 // value of R31 in this case. Similar situation exists with setjmp. 1722 else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { 1723 assert(HasFP && "Expecting a valid frame pointer."); 1724 if (!HasRedZone) 1725 RBReg = FPReg; 1726 if (!isLargeFrame) { 1727 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1728 .addReg(FPReg).addImm(FrameSize); 1729 } else { 1730 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize); 1731 BuildMI(MBB, MBBI, dl, AddInst) 1732 .addReg(RBReg) 1733 .addReg(FPReg) 1734 .addReg(ScratchReg); 1735 } 1736 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1737 if (HasRedZone) { 1738 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1739 .addReg(SPReg) 1740 .addImm(FrameSize); 1741 } else { 1742 // Make sure that adding FrameSize will not overflow the max offset 1743 // size. 1744 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1745 "Local offsets should be negative"); 1746 SPAdd = FrameSize; 1747 FPOffset += FrameSize; 1748 BPOffset += FrameSize; 1749 PBPOffset += FrameSize; 1750 } 1751 } else { 1752 // We don't want to use ScratchReg as a base register, because it 1753 // could happen to be R0. Use FP instead, but make sure to preserve it. 1754 if (!HasRedZone) { 1755 // If FP is not saved, copy it to ScratchReg. 1756 if (!HasFP) 1757 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1758 .addReg(FPReg) 1759 .addReg(FPReg); 1760 RBReg = FPReg; 1761 } 1762 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1763 .addImm(0) 1764 .addReg(SPReg); 1765 } 1766 } 1767 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1768 // If there is no red zone, ScratchReg may be needed for holding a useful 1769 // value (although not the base register). Make sure it is not overwritten 1770 // too early. 1771 1772 // If we need to restore both the LR and the CR and we only have one 1773 // available scratch register, we must do them one at a time. 1774 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1775 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1776 // is live here. 1777 assert(HasRedZone && "Expecting red zone"); 1778 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1779 .addImm(CRSaveOffset) 1780 .addReg(SPReg); 1781 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1782 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1783 .addReg(TempReg, getKillRegState(i == e-1)); 1784 } 1785 1786 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1787 // LR is stored in the caller's stack frame. ScratchReg will be needed 1788 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1789 // a base register anyway, because it may happen to be R0. 1790 bool LoadedLR = false; 1791 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1792 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1793 .addImm(LROffset+SPAdd) 1794 .addReg(RBReg); 1795 LoadedLR = true; 1796 } 1797 1798 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1799 assert(RBReg == SPReg && "Should be using SP as a base register"); 1800 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1801 .addImm(CRSaveOffset) 1802 .addReg(RBReg); 1803 } 1804 1805 if (HasFP) { 1806 // If there is red zone, restore FP directly, since SP has already been 1807 // restored. Otherwise, restore the value of FP into ScratchReg. 1808 if (HasRedZone || RBReg == SPReg) 1809 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1810 .addImm(FPOffset) 1811 .addReg(SPReg); 1812 else 1813 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1814 .addImm(FPOffset) 1815 .addReg(RBReg); 1816 } 1817 1818 if (FI->usesPICBase()) 1819 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1820 .addImm(PBPOffset) 1821 .addReg(RBReg); 1822 1823 if (HasBP) 1824 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1825 .addImm(BPOffset) 1826 .addReg(RBReg); 1827 1828 // There is nothing more to be loaded from the stack, so now we can 1829 // restore SP: SP = RBReg + SPAdd. 1830 if (RBReg != SPReg || SPAdd != 0) { 1831 assert(!HasRedZone && "This should not happen with red zone"); 1832 // If SPAdd is 0, generate a copy. 1833 if (SPAdd == 0) 1834 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1835 .addReg(RBReg) 1836 .addReg(RBReg); 1837 else 1838 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1839 .addReg(RBReg) 1840 .addImm(SPAdd); 1841 1842 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1843 if (RBReg == FPReg) 1844 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1845 .addReg(ScratchReg) 1846 .addReg(ScratchReg); 1847 1848 // Now load the LR from the caller's stack frame. 1849 if (MustSaveLR && !LoadedLR) 1850 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1851 .addImm(LROffset) 1852 .addReg(SPReg); 1853 } 1854 1855 if (MustSaveCR && 1856 !(SingleScratchReg && MustSaveLR)) 1857 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1858 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1859 .addReg(TempReg, getKillRegState(i == e-1)); 1860 1861 if (MustSaveLR) { 1862 // If ROP protection is required, an extra instruction is added to compute a 1863 // hash and then compare it to the hash stored in the prologue. 1864 if (HasROPProtect) { 1865 const int SaveIndex = FI->getROPProtectionHashSaveIndex(); 1866 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex); 1867 assert((ImmOffset <= -8 && ImmOffset >= -512) && 1868 "ROP hash check location offset out of range."); 1869 assert(((ImmOffset & 0x7) == 0) && 1870 "ROP hash check location offset must be 8 byte aligned."); 1871 BuildMI(MBB, StackUpdateLoc, dl, HashChk) 1872 .addReg(ScratchReg) 1873 .addImm(ImmOffset) 1874 .addReg(SPReg); 1875 } 1876 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1877 } 1878 1879 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1880 // call optimization 1881 if (IsReturnBlock) { 1882 unsigned RetOpcode = MBBI->getOpcode(); 1883 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1884 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1885 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1886 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1887 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1888 1889 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1890 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1891 .addReg(SPReg).addImm(CallerAllocatedAmt); 1892 } else { 1893 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1894 .addImm(CallerAllocatedAmt >> 16); 1895 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1896 .addReg(ScratchReg, RegState::Kill) 1897 .addImm(CallerAllocatedAmt & 0xFFFF); 1898 BuildMI(MBB, MBBI, dl, AddInst) 1899 .addReg(SPReg) 1900 .addReg(FPReg) 1901 .addReg(ScratchReg); 1902 } 1903 } else { 1904 createTailCallBranchInstr(MBB); 1905 } 1906 } 1907 } 1908 1909 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1910 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1911 1912 // If we got this far a first terminator should exist. 1913 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1914 1915 DebugLoc dl = MBBI->getDebugLoc(); 1916 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1917 1918 // Create branch instruction for pseudo tail call return instruction. 1919 // The TCRETURNdi variants are direct calls. Valid targets for those are 1920 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1921 // since we can tail call external functions with PC-Rel (i.e. we don't need 1922 // to worry about different TOC pointers). Some of the external functions will 1923 // be MO_GlobalAddress while others like memcpy for example, are going to 1924 // be MO_ExternalSymbol. 1925 unsigned RetOpcode = MBBI->getOpcode(); 1926 if (RetOpcode == PPC::TCRETURNdi) { 1927 MBBI = MBB.getLastNonDebugInstr(); 1928 MachineOperand &JumpTarget = MBBI->getOperand(0); 1929 if (JumpTarget.isGlobal()) 1930 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1931 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1932 else if (JumpTarget.isSymbol()) 1933 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1934 addExternalSymbol(JumpTarget.getSymbolName()); 1935 else 1936 llvm_unreachable("Expecting Global or External Symbol"); 1937 } else if (RetOpcode == PPC::TCRETURNri) { 1938 MBBI = MBB.getLastNonDebugInstr(); 1939 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1940 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1941 } else if (RetOpcode == PPC::TCRETURNai) { 1942 MBBI = MBB.getLastNonDebugInstr(); 1943 MachineOperand &JumpTarget = MBBI->getOperand(0); 1944 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1945 } else if (RetOpcode == PPC::TCRETURNdi8) { 1946 MBBI = MBB.getLastNonDebugInstr(); 1947 MachineOperand &JumpTarget = MBBI->getOperand(0); 1948 if (JumpTarget.isGlobal()) 1949 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1950 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1951 else if (JumpTarget.isSymbol()) 1952 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1953 addExternalSymbol(JumpTarget.getSymbolName()); 1954 else 1955 llvm_unreachable("Expecting Global or External Symbol"); 1956 } else if (RetOpcode == PPC::TCRETURNri8) { 1957 MBBI = MBB.getLastNonDebugInstr(); 1958 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1959 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1960 } else if (RetOpcode == PPC::TCRETURNai8) { 1961 MBBI = MBB.getLastNonDebugInstr(); 1962 MachineOperand &JumpTarget = MBBI->getOperand(0); 1963 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1964 } 1965 } 1966 1967 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1968 BitVector &SavedRegs, 1969 RegScavenger *RS) const { 1970 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1971 if (Subtarget.isAIXABI()) 1972 updateCalleeSaves(MF, SavedRegs); 1973 1974 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1975 1976 // Do not explicitly save the callee saved VSRp registers. 1977 // The individual VSR subregisters will be saved instead. 1978 SavedRegs.reset(PPC::VSRp26); 1979 SavedRegs.reset(PPC::VSRp27); 1980 SavedRegs.reset(PPC::VSRp28); 1981 SavedRegs.reset(PPC::VSRp29); 1982 SavedRegs.reset(PPC::VSRp30); 1983 SavedRegs.reset(PPC::VSRp31); 1984 1985 // Save and clear the LR state. 1986 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1987 unsigned LR = RegInfo->getRARegister(); 1988 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1989 SavedRegs.reset(LR); 1990 1991 // Save R31 if necessary 1992 int FPSI = FI->getFramePointerSaveIndex(); 1993 const bool isPPC64 = Subtarget.isPPC64(); 1994 MachineFrameInfo &MFI = MF.getFrameInfo(); 1995 1996 // If the frame pointer save index hasn't been defined yet. 1997 if (!FPSI && needsFP(MF)) { 1998 // Find out what the fix offset of the frame pointer save area. 1999 int FPOffset = getFramePointerSaveOffset(); 2000 // Allocate the frame index for frame pointer save area. 2001 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 2002 // Save the result. 2003 FI->setFramePointerSaveIndex(FPSI); 2004 } 2005 2006 int BPSI = FI->getBasePointerSaveIndex(); 2007 if (!BPSI && RegInfo->hasBasePointer(MF)) { 2008 int BPOffset = getBasePointerSaveOffset(); 2009 // Allocate the frame index for the base pointer save area. 2010 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 2011 // Save the result. 2012 FI->setBasePointerSaveIndex(BPSI); 2013 } 2014 2015 // Reserve stack space for the PIC Base register (R30). 2016 // Only used in SVR4 32-bit. 2017 if (FI->usesPICBase()) { 2018 int PBPSI = MFI.CreateFixedObject(4, -8, true); 2019 FI->setPICBasePointerSaveIndex(PBPSI); 2020 } 2021 2022 // Make sure we don't explicitly spill r31, because, for example, we have 2023 // some inline asm which explicitly clobbers it, when we otherwise have a 2024 // frame pointer and are using r31's spill slot for the prologue/epilogue 2025 // code. Same goes for the base pointer and the PIC base register. 2026 if (needsFP(MF)) 2027 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 2028 if (RegInfo->hasBasePointer(MF)) { 2029 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 2030 // On AIX, when BaseRegister(R30) is used, need to spill r31 too to match 2031 // AIX trackback table requirement. 2032 if (!needsFP(MF) && !SavedRegs.test(isPPC64 ? PPC::X31 : PPC::R31) && 2033 Subtarget.isAIXABI()) { 2034 assert( 2035 (RegInfo->getBaseRegister(MF) == (isPPC64 ? PPC::X30 : PPC::R30)) && 2036 "Invalid base register on AIX!"); 2037 SavedRegs.set(isPPC64 ? PPC::X31 : PPC::R31); 2038 } 2039 } 2040 if (FI->usesPICBase()) 2041 SavedRegs.reset(PPC::R30); 2042 2043 // Reserve stack space to move the linkage area to in case of a tail call. 2044 int TCSPDelta = 0; 2045 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2046 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 2047 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 2048 } 2049 2050 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 2051 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 2052 // object at the offset of the CR-save slot in the linkage area. The actual 2053 // save and restore of the condition register will be created as part of the 2054 // prologue and epilogue insertion, but the FixedStack object is needed to 2055 // keep the CalleSavedInfo valid. 2056 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 2057 SavedRegs.test(PPC::CR4))) { 2058 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 2059 const int64_t SpillOffset = 2060 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 2061 int FrameIdx = 2062 MFI.CreateFixedObject(SpillSize, SpillOffset, 2063 /* IsImmutable */ true, /* IsAliased */ false); 2064 FI->setCRSpillFrameIndex(FrameIdx); 2065 } 2066 } 2067 2068 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 2069 RegScavenger *RS) const { 2070 // Get callee saved register information. 2071 MachineFrameInfo &MFI = MF.getFrameInfo(); 2072 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 2073 2074 // If the function is shrink-wrapped, and if the function has a tail call, the 2075 // tail call might not be in the new RestoreBlock, so real branch instruction 2076 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 2077 // RestoreBlock. So we handle this case here. 2078 if (MFI.getSavePoint() && MFI.hasTailCall()) { 2079 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 2080 for (MachineBasicBlock &MBB : MF) { 2081 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 2082 createTailCallBranchInstr(MBB); 2083 } 2084 } 2085 2086 // Early exit if no callee saved registers are modified! 2087 if (CSI.empty() && !needsFP(MF)) { 2088 addScavengingSpillSlot(MF, RS); 2089 return; 2090 } 2091 2092 unsigned MinGPR = PPC::R31; 2093 unsigned MinG8R = PPC::X31; 2094 unsigned MinFPR = PPC::F31; 2095 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 2096 2097 bool HasGPSaveArea = false; 2098 bool HasG8SaveArea = false; 2099 bool HasFPSaveArea = false; 2100 bool HasVRSaveArea = false; 2101 2102 SmallVector<CalleeSavedInfo, 18> GPRegs; 2103 SmallVector<CalleeSavedInfo, 18> G8Regs; 2104 SmallVector<CalleeSavedInfo, 18> FPRegs; 2105 SmallVector<CalleeSavedInfo, 18> VRegs; 2106 2107 for (const CalleeSavedInfo &I : CSI) { 2108 Register Reg = I.getReg(); 2109 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 2110 (Reg != PPC::X2 && Reg != PPC::R2)) && 2111 "Not expecting to try to spill R2 in a function that must save TOC"); 2112 if (PPC::GPRCRegClass.contains(Reg)) { 2113 HasGPSaveArea = true; 2114 2115 GPRegs.push_back(I); 2116 2117 if (Reg < MinGPR) { 2118 MinGPR = Reg; 2119 } 2120 } else if (PPC::G8RCRegClass.contains(Reg)) { 2121 HasG8SaveArea = true; 2122 2123 G8Regs.push_back(I); 2124 2125 if (Reg < MinG8R) { 2126 MinG8R = Reg; 2127 } 2128 } else if (PPC::F8RCRegClass.contains(Reg)) { 2129 HasFPSaveArea = true; 2130 2131 FPRegs.push_back(I); 2132 2133 if (Reg < MinFPR) { 2134 MinFPR = Reg; 2135 } 2136 } else if (PPC::CRBITRCRegClass.contains(Reg) || 2137 PPC::CRRCRegClass.contains(Reg)) { 2138 ; // do nothing, as we already know whether CRs are spilled 2139 } else if (PPC::VRRCRegClass.contains(Reg) || 2140 PPC::SPERCRegClass.contains(Reg)) { 2141 // Altivec and SPE are mutually exclusive, but have the same stack 2142 // alignment requirements, so overload the save area for both cases. 2143 HasVRSaveArea = true; 2144 2145 VRegs.push_back(I); 2146 2147 if (Reg < MinVR) { 2148 MinVR = Reg; 2149 } 2150 } else { 2151 llvm_unreachable("Unknown RegisterClass!"); 2152 } 2153 } 2154 2155 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 2156 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2157 2158 int64_t LowerBound = 0; 2159 2160 // Take into account stack space reserved for tail calls. 2161 int TCSPDelta = 0; 2162 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2163 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 2164 LowerBound = TCSPDelta; 2165 } 2166 2167 // The Floating-point register save area is right below the back chain word 2168 // of the previous stack frame. 2169 if (HasFPSaveArea) { 2170 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 2171 int FI = FPRegs[i].getFrameIdx(); 2172 2173 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2174 } 2175 2176 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 2177 } 2178 2179 // Check whether the frame pointer register is allocated. If so, make sure it 2180 // is spilled to the correct offset. 2181 if (needsFP(MF)) { 2182 int FI = PFI->getFramePointerSaveIndex(); 2183 assert(FI && "No Frame Pointer Save Slot!"); 2184 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2185 // FP is R31/X31, so no need to update MinGPR/MinG8R. 2186 HasGPSaveArea = true; 2187 } 2188 2189 if (PFI->usesPICBase()) { 2190 int FI = PFI->getPICBasePointerSaveIndex(); 2191 assert(FI && "No PIC Base Pointer Save Slot!"); 2192 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2193 2194 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 2195 HasGPSaveArea = true; 2196 } 2197 2198 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2199 if (RegInfo->hasBasePointer(MF)) { 2200 int FI = PFI->getBasePointerSaveIndex(); 2201 assert(FI && "No Base Pointer Save Slot!"); 2202 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2203 2204 Register BP = RegInfo->getBaseRegister(MF); 2205 if (PPC::G8RCRegClass.contains(BP)) { 2206 MinG8R = std::min<unsigned>(MinG8R, BP); 2207 HasG8SaveArea = true; 2208 } else if (PPC::GPRCRegClass.contains(BP)) { 2209 MinGPR = std::min<unsigned>(MinGPR, BP); 2210 HasGPSaveArea = true; 2211 } 2212 } 2213 2214 // General register save area starts right below the Floating-point 2215 // register save area. 2216 if (HasGPSaveArea || HasG8SaveArea) { 2217 // Move general register save area spill slots down, taking into account 2218 // the size of the Floating-point register save area. 2219 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 2220 if (!GPRegs[i].isSpilledToReg()) { 2221 int FI = GPRegs[i].getFrameIdx(); 2222 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2223 } 2224 } 2225 2226 // Move general register save area spill slots down, taking into account 2227 // the size of the Floating-point register save area. 2228 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2229 if (!G8Regs[i].isSpilledToReg()) { 2230 int FI = G8Regs[i].getFrameIdx(); 2231 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2232 } 2233 } 2234 2235 unsigned MinReg = 2236 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2237 TRI->getEncodingValue(MinG8R)); 2238 2239 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2240 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2241 } 2242 2243 // For 32-bit only, the CR save area is below the general register 2244 // save area. For 64-bit SVR4, the CR save area is addressed relative 2245 // to the stack pointer and hence does not need an adjustment here. 2246 // Only CR2 (the first nonvolatile spilled) has an associated frame 2247 // index so that we have a single uniform save area. 2248 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2249 // Adjust the frame index of the CR spill slot. 2250 for (const auto &CSInfo : CSI) { 2251 if (CSInfo.getReg() == PPC::CR2) { 2252 int FI = CSInfo.getFrameIdx(); 2253 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2254 break; 2255 } 2256 } 2257 2258 LowerBound -= 4; // The CR save area is always 4 bytes long. 2259 } 2260 2261 // Both Altivec and SPE have the same alignment and padding requirements 2262 // within the stack frame. 2263 if (HasVRSaveArea) { 2264 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2265 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2266 // we are using negative number here (the stack grows downward). We should 2267 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2268 // is the alignment size ( n = 16 here) and y is the size after aligning. 2269 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2270 LowerBound &= ~(15); 2271 2272 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2273 int FI = VRegs[i].getFrameIdx(); 2274 2275 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2276 } 2277 } 2278 2279 addScavengingSpillSlot(MF, RS); 2280 } 2281 2282 void 2283 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2284 RegScavenger *RS) const { 2285 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2286 // a large stack, which will require scavenging a register to materialize a 2287 // large offset. 2288 2289 // We need to have a scavenger spill slot for spills if the frame size is 2290 // large. In case there is no free register for large-offset addressing, 2291 // this slot is used for the necessary emergency spill. Also, we need the 2292 // slot for dynamic stack allocations. 2293 2294 // The scavenger might be invoked if the frame offset does not fit into 2295 // the 16-bit immediate in case of not SPE and 8-bit in case of SPE. 2296 // We don't know the complete frame size here because we've not yet computed 2297 // callee-saved register spills or the needed alignment padding. 2298 unsigned StackSize = determineFrameLayout(MF, true); 2299 MachineFrameInfo &MFI = MF.getFrameInfo(); 2300 bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize); 2301 2302 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || 2303 (hasSpills(MF) && NeedSpills)) { 2304 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2305 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2306 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2307 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2308 unsigned Size = TRI.getSpillSize(RC); 2309 Align Alignment = TRI.getSpillAlign(RC); 2310 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2311 2312 // Might we have over-aligned allocas? 2313 bool HasAlVars = 2314 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2315 2316 // These kinds of spills might need two registers. 2317 if (spillsCR(MF) || HasAlVars) 2318 RS->addScavengingFrameIndex( 2319 MFI.CreateStackObject(Size, Alignment, false)); 2320 } 2321 } 2322 2323 // This function checks if a callee saved gpr can be spilled to a volatile 2324 // vector register. This occurs for leaf functions when the option 2325 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2326 // which were not spilled to vectors, return false so the target independent 2327 // code can handle them by assigning a FrameIdx to a stack slot. 2328 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2329 MachineFunction &MF, const TargetRegisterInfo *TRI, 2330 std::vector<CalleeSavedInfo> &CSI) const { 2331 2332 if (CSI.empty()) 2333 return true; // Early exit if no callee saved registers are modified! 2334 2335 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2336 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2337 const MachineRegisterInfo &MRI = MF.getRegInfo(); 2338 2339 if (Subtarget.hasSPE()) { 2340 // In case of SPE we only have SuperRegs and CRs 2341 // in our CalleSaveInfo vector. 2342 2343 for (auto &CalleeSaveReg : CSI) { 2344 MCPhysReg Reg = CalleeSaveReg.getReg(); 2345 MCPhysReg Lower = RegInfo->getSubReg(Reg, 1); 2346 MCPhysReg Higher = RegInfo->getSubReg(Reg, 2); 2347 2348 if ( // Check only for SuperRegs. 2349 Lower && 2350 // Replace Reg if only lower-32 bits modified 2351 !MRI.isPhysRegModified(Higher)) 2352 CalleeSaveReg = CalleeSavedInfo(Lower); 2353 } 2354 } 2355 2356 // Early exit if cannot spill gprs to volatile vector registers. 2357 MachineFrameInfo &MFI = MF.getFrameInfo(); 2358 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2359 return false; 2360 2361 // Build a BitVector of VSRs that can be used for spilling GPRs. 2362 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2363 BitVector BVCalleeSaved(TRI->getNumRegs()); 2364 for (unsigned i = 0; CSRegs[i]; ++i) 2365 BVCalleeSaved.set(CSRegs[i]); 2366 2367 for (unsigned Reg : BVAllocatable.set_bits()) { 2368 // Set to 0 if the register is not a volatile VSX register, or if it is 2369 // used in the function. 2370 if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || 2371 MRI.isPhysRegUsed(Reg)) 2372 BVAllocatable.reset(Reg); 2373 } 2374 2375 bool AllSpilledToReg = true; 2376 unsigned LastVSRUsedForSpill = 0; 2377 for (auto &CS : CSI) { 2378 if (BVAllocatable.none()) 2379 return false; 2380 2381 Register Reg = CS.getReg(); 2382 2383 if (!PPC::G8RCRegClass.contains(Reg)) { 2384 AllSpilledToReg = false; 2385 continue; 2386 } 2387 2388 // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs 2389 // into one VSR using the mtvsrdd instruction. 2390 if (LastVSRUsedForSpill != 0) { 2391 CS.setDstReg(LastVSRUsedForSpill); 2392 BVAllocatable.reset(LastVSRUsedForSpill); 2393 LastVSRUsedForSpill = 0; 2394 continue; 2395 } 2396 2397 unsigned VolatileVFReg = BVAllocatable.find_first(); 2398 if (VolatileVFReg < BVAllocatable.size()) { 2399 CS.setDstReg(VolatileVFReg); 2400 LastVSRUsedForSpill = VolatileVFReg; 2401 } else { 2402 AllSpilledToReg = false; 2403 } 2404 } 2405 return AllSpilledToReg; 2406 } 2407 2408 bool PPCFrameLowering::spillCalleeSavedRegisters( 2409 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2410 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2411 2412 MachineFunction *MF = MBB.getParent(); 2413 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2414 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2415 bool MustSaveTOC = FI->mustSaveTOC(); 2416 DebugLoc DL; 2417 bool CRSpilled = false; 2418 MachineInstrBuilder CRMIB; 2419 BitVector Spilled(TRI->getNumRegs()); 2420 2421 VSRContainingGPRs.clear(); 2422 2423 // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one 2424 // or two GPRs, so we need table to record information for later save/restore. 2425 for (const CalleeSavedInfo &Info : CSI) { 2426 if (Info.isSpilledToReg()) { 2427 auto &SpilledVSR = 2428 VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second; 2429 assert(SpilledVSR.second == 0 && 2430 "Can't spill more than two GPRs into VSR!"); 2431 if (SpilledVSR.first == 0) 2432 SpilledVSR.first = Info.getReg(); 2433 else 2434 SpilledVSR.second = Info.getReg(); 2435 } 2436 } 2437 2438 for (const CalleeSavedInfo &I : CSI) { 2439 Register Reg = I.getReg(); 2440 2441 // CR2 through CR4 are the nonvolatile CR fields. 2442 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2443 2444 // Add the callee-saved register as live-in; it's killed at the spill. 2445 // Do not do this for callee-saved registers that are live-in to the 2446 // function because they will already be marked live-in and this will be 2447 // adding it for a second time. It is an error to add the same register 2448 // to the set more than once. 2449 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2450 bool IsLiveIn = MRI.isLiveIn(Reg); 2451 if (!IsLiveIn) 2452 MBB.addLiveIn(Reg); 2453 2454 if (CRSpilled && IsCRField) { 2455 CRMIB.addReg(Reg, RegState::ImplicitKill); 2456 continue; 2457 } 2458 2459 // The actual spill will happen in the prologue. 2460 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2461 continue; 2462 2463 // Insert the spill to the stack frame. 2464 if (IsCRField) { 2465 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2466 if (!Subtarget.is32BitELFABI()) { 2467 // The actual spill will happen at the start of the prologue. 2468 FuncInfo->addMustSaveCR(Reg); 2469 } else { 2470 CRSpilled = true; 2471 FuncInfo->setSpillsCR(); 2472 2473 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2474 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2475 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2476 .addReg(Reg, RegState::ImplicitKill); 2477 2478 MBB.insert(MI, CRMIB); 2479 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2480 .addReg(PPC::R12, 2481 getKillRegState(true)), 2482 I.getFrameIdx())); 2483 } 2484 } else { 2485 if (I.isSpilledToReg()) { 2486 unsigned Dst = I.getDstReg(); 2487 2488 if (Spilled[Dst]) 2489 continue; 2490 2491 if (VSRContainingGPRs[Dst].second != 0) { 2492 assert(Subtarget.hasP9Vector() && 2493 "mtvsrdd is unavailable on pre-P9 targets."); 2494 2495 NumPESpillVSR += 2; 2496 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst) 2497 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)) 2498 .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true)); 2499 } else if (VSRContainingGPRs[Dst].second == 0) { 2500 assert(Subtarget.hasP8Vector() && 2501 "Can't move GPR to VSR on pre-P8 targets."); 2502 2503 ++NumPESpillVSR; 2504 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), 2505 TRI->getSubReg(Dst, PPC::sub_64)) 2506 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)); 2507 } else { 2508 llvm_unreachable("More than two GPRs spilled to a VSR!"); 2509 } 2510 Spilled.set(Dst); 2511 } else { 2512 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2513 // Use !IsLiveIn for the kill flag. 2514 // We do not want to kill registers that are live in this function 2515 // before their use because they will become undefined registers. 2516 // Functions without NoUnwind need to preserve the order of elements in 2517 // saved vector registers. 2518 if (Subtarget.needsSwapsForVSXMemOps() && 2519 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2520 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2521 I.getFrameIdx(), RC, TRI); 2522 else 2523 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC, 2524 TRI, Register()); 2525 } 2526 } 2527 } 2528 return true; 2529 } 2530 2531 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2532 bool CR4Spilled, MachineBasicBlock &MBB, 2533 MachineBasicBlock::iterator MI, 2534 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2535 2536 MachineFunction *MF = MBB.getParent(); 2537 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2538 DebugLoc DL; 2539 unsigned MoveReg = PPC::R12; 2540 2541 // 32-bit: FP-relative 2542 MBB.insert(MI, 2543 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2544 CSI[CSIIndex].getFrameIdx())); 2545 2546 unsigned RestoreOp = PPC::MTOCRF; 2547 if (CR2Spilled) 2548 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2549 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2550 2551 if (CR3Spilled) 2552 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2553 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2554 2555 if (CR4Spilled) 2556 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2557 .addReg(MoveReg, getKillRegState(true))); 2558 } 2559 2560 MachineBasicBlock::iterator PPCFrameLowering:: 2561 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2562 MachineBasicBlock::iterator I) const { 2563 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2564 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2565 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2566 // Add (actually subtract) back the amount the callee popped on return. 2567 if (int CalleeAmt = I->getOperand(1).getImm()) { 2568 bool is64Bit = Subtarget.isPPC64(); 2569 CalleeAmt *= -1; 2570 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2571 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2572 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2573 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2574 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2575 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2576 const DebugLoc &dl = I->getDebugLoc(); 2577 2578 if (isInt<16>(CalleeAmt)) { 2579 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2580 .addReg(StackReg, RegState::Kill) 2581 .addImm(CalleeAmt); 2582 } else { 2583 MachineBasicBlock::iterator MBBI = I; 2584 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2585 .addImm(CalleeAmt >> 16); 2586 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2587 .addReg(TmpReg, RegState::Kill) 2588 .addImm(CalleeAmt & 0xFFFF); 2589 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2590 .addReg(StackReg, RegState::Kill) 2591 .addReg(TmpReg); 2592 } 2593 } 2594 } 2595 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2596 return MBB.erase(I); 2597 } 2598 2599 static bool isCalleeSavedCR(unsigned Reg) { 2600 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2601 } 2602 2603 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2604 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2605 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2606 MachineFunction *MF = MBB.getParent(); 2607 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2608 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2609 bool MustSaveTOC = FI->mustSaveTOC(); 2610 bool CR2Spilled = false; 2611 bool CR3Spilled = false; 2612 bool CR4Spilled = false; 2613 unsigned CSIIndex = 0; 2614 BitVector Restored(TRI->getNumRegs()); 2615 2616 // Initialize insertion-point logic; we will be restoring in reverse 2617 // order of spill. 2618 MachineBasicBlock::iterator I = MI, BeforeI = I; 2619 bool AtStart = I == MBB.begin(); 2620 2621 if (!AtStart) 2622 --BeforeI; 2623 2624 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2625 Register Reg = CSI[i].getReg(); 2626 2627 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2628 continue; 2629 2630 // Restore of callee saved condition register field is handled during 2631 // epilogue insertion. 2632 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2633 continue; 2634 2635 if (Reg == PPC::CR2) { 2636 CR2Spilled = true; 2637 // The spill slot is associated only with CR2, which is the 2638 // first nonvolatile spilled. Save it here. 2639 CSIIndex = i; 2640 continue; 2641 } else if (Reg == PPC::CR3) { 2642 CR3Spilled = true; 2643 continue; 2644 } else if (Reg == PPC::CR4) { 2645 CR4Spilled = true; 2646 continue; 2647 } else { 2648 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2649 // least one CR register, restore all spilled CRs together. 2650 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2651 bool is31 = needsFP(*MF); 2652 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2653 CSIIndex); 2654 CR2Spilled = CR3Spilled = CR4Spilled = false; 2655 } 2656 2657 if (CSI[i].isSpilledToReg()) { 2658 DebugLoc DL; 2659 unsigned Dst = CSI[i].getDstReg(); 2660 2661 if (Restored[Dst]) 2662 continue; 2663 2664 if (VSRContainingGPRs[Dst].second != 0) { 2665 assert(Subtarget.hasP9Vector()); 2666 NumPEReloadVSR += 2; 2667 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD), 2668 VSRContainingGPRs[Dst].second) 2669 .addReg(Dst); 2670 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), 2671 VSRContainingGPRs[Dst].first) 2672 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); 2673 } else if (VSRContainingGPRs[Dst].second == 0) { 2674 assert(Subtarget.hasP8Vector()); 2675 ++NumPEReloadVSR; 2676 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), 2677 VSRContainingGPRs[Dst].first) 2678 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); 2679 } else { 2680 llvm_unreachable("More than two GPRs spilled to a VSR!"); 2681 } 2682 2683 Restored.set(Dst); 2684 2685 } else { 2686 // Default behavior for non-CR saves. 2687 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2688 2689 // Functions without NoUnwind need to preserve the order of elements in 2690 // saved vector registers. 2691 if (Subtarget.needsSwapsForVSXMemOps() && 2692 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2693 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2694 TRI); 2695 else 2696 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI, 2697 Register()); 2698 2699 assert(I != MBB.begin() && 2700 "loadRegFromStackSlot didn't insert any code!"); 2701 } 2702 } 2703 2704 // Insert in reverse order. 2705 if (AtStart) 2706 I = MBB.begin(); 2707 else { 2708 I = BeforeI; 2709 ++I; 2710 } 2711 } 2712 2713 // If we haven't yet spilled the CRs, do so now. 2714 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2715 assert(Subtarget.is32BitELFABI() && 2716 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2717 bool is31 = needsFP(*MF); 2718 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2719 } 2720 2721 return true; 2722 } 2723 2724 uint64_t PPCFrameLowering::getTOCSaveOffset() const { 2725 return TOCSaveOffset; 2726 } 2727 2728 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const { 2729 return FramePointerSaveOffset; 2730 } 2731 2732 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const { 2733 return BasePointerSaveOffset; 2734 } 2735 2736 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2737 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2738 return false; 2739 return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI(); 2740 } 2741 2742 void PPCFrameLowering::updateCalleeSaves(const MachineFunction &MF, 2743 BitVector &SavedRegs) const { 2744 // The AIX ABI uses traceback tables for EH which require that if callee-saved 2745 // register N is used, all registers N-31 must be saved/restored. 2746 // NOTE: The check for AIX is not actually what is relevant. Traceback tables 2747 // on Linux have the same requirements. It is just that AIX is the only ABI 2748 // for which we actually use traceback tables. If another ABI needs to be 2749 // supported that also uses them, we can add a check such as 2750 // Subtarget.usesTraceBackTables(). 2751 assert(Subtarget.isAIXABI() && 2752 "Function updateCalleeSaves should only be called for AIX."); 2753 2754 // If there are no callee saves then there is nothing to do. 2755 if (SavedRegs.none()) 2756 return; 2757 2758 const MCPhysReg *CSRegs = 2759 Subtarget.getRegisterInfo()->getCalleeSavedRegs(&MF); 2760 MCPhysReg LowestGPR = PPC::R31; 2761 MCPhysReg LowestG8R = PPC::X31; 2762 MCPhysReg LowestFPR = PPC::F31; 2763 MCPhysReg LowestVR = PPC::V31; 2764 2765 // Traverse the CSRs twice so as not to rely on ascending ordering of 2766 // registers in the array. The first pass finds the lowest numbered 2767 // register and the second pass marks all higher numbered registers 2768 // for spilling. 2769 for (int i = 0; CSRegs[i]; i++) { 2770 // Get the lowest numbered register for each class that actually needs 2771 // to be saved. 2772 MCPhysReg Cand = CSRegs[i]; 2773 if (!SavedRegs.test(Cand)) 2774 continue; 2775 if (PPC::GPRCRegClass.contains(Cand) && Cand < LowestGPR) 2776 LowestGPR = Cand; 2777 else if (PPC::G8RCRegClass.contains(Cand) && Cand < LowestG8R) 2778 LowestG8R = Cand; 2779 else if ((PPC::F4RCRegClass.contains(Cand) || 2780 PPC::F8RCRegClass.contains(Cand)) && 2781 Cand < LowestFPR) 2782 LowestFPR = Cand; 2783 else if (PPC::VRRCRegClass.contains(Cand) && Cand < LowestVR) 2784 LowestVR = Cand; 2785 } 2786 2787 for (int i = 0; CSRegs[i]; i++) { 2788 MCPhysReg Cand = CSRegs[i]; 2789 if ((PPC::GPRCRegClass.contains(Cand) && Cand > LowestGPR) || 2790 (PPC::G8RCRegClass.contains(Cand) && Cand > LowestG8R) || 2791 ((PPC::F4RCRegClass.contains(Cand) || 2792 PPC::F8RCRegClass.contains(Cand)) && 2793 Cand > LowestFPR) || 2794 (PPC::VRRCRegClass.contains(Cand) && Cand > LowestVR)) 2795 SavedRegs.set(Cand); 2796 } 2797 } 2798 2799 uint64_t PPCFrameLowering::getStackThreshold() const { 2800 // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack; 2801 // use `add r1, r1, <scratch_reg>` to release the stack frame. 2802 // Scratch register contains a signed 64-bit number, which is negative 2803 // when extending the stack and is positive when releasing the stack frame. 2804 // To make `stux` and `add` paired, the absolute value of the number contained 2805 // in the scratch register should be the same. Thus the maximum stack size 2806 // is (2^63)-1, i.e., LONG_MAX. 2807 if (Subtarget.isPPC64()) 2808 return LONG_MAX; 2809 2810 return TargetFrameLowering::getStackThreshold(); 2811 } 2812