xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 79ac3c12a714bcd3f2354c52d948aed9575c46d6)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MCTargetDesc/PPCPredicates.h"
14 #include "PPCFrameLowering.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Target/TargetOptions.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "framelowering"
33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
35 STATISTIC(NumPrologProbed, "Number of prologues probed");
36 
37 static cl::opt<bool>
38 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
39                      cl::desc("Enable spills in prologue to vector registers."),
40                      cl::init(false), cl::Hidden);
41 
42 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
43   if (STI.isAIXABI())
44     return STI.isPPC64() ? 16 : 8;
45   // SVR4 ABI:
46   return STI.isPPC64() ? 16 : 4;
47 }
48 
49 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isAIXABI())
51     return STI.isPPC64() ? 40 : 20;
52   return STI.isELFv2ABI() ? 24 : 40;
53 }
54 
55 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
56   // First slot in the general register save area.
57   return STI.isPPC64() ? -8U : -4U;
58 }
59 
60 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
61   if (STI.isAIXABI() || STI.isPPC64())
62     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
63 
64   // 32-bit SVR4 ABI:
65   return 8;
66 }
67 
68 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
69   // Third slot in the general purpose register save area.
70   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
71     return -12U;
72 
73   // Second slot in the general purpose register save area.
74   return STI.isPPC64() ? -16U : -8U;
75 }
76 
77 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
78   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
79 }
80 
81 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
82     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
83                           STI.getPlatformStackAlignment(), 0),
84       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
85       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
86       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
87       LinkageSize(computeLinkageSize(Subtarget)),
88       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
89       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
90 
91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
93     unsigned &NumEntries) const {
94 
95 // Floating-point register save area offsets.
96 #define CALLEE_SAVED_FPRS \
97       {PPC::F31, -8},     \
98       {PPC::F30, -16},    \
99       {PPC::F29, -24},    \
100       {PPC::F28, -32},    \
101       {PPC::F27, -40},    \
102       {PPC::F26, -48},    \
103       {PPC::F25, -56},    \
104       {PPC::F24, -64},    \
105       {PPC::F23, -72},    \
106       {PPC::F22, -80},    \
107       {PPC::F21, -88},    \
108       {PPC::F20, -96},    \
109       {PPC::F19, -104},   \
110       {PPC::F18, -112},   \
111       {PPC::F17, -120},   \
112       {PPC::F16, -128},   \
113       {PPC::F15, -136},   \
114       {PPC::F14, -144}
115 
116 // 32-bit general purpose register save area offsets shared by ELF and
117 // AIX. AIX has an extra CSR with r13.
118 #define CALLEE_SAVED_GPRS32 \
119       {PPC::R31, -4},       \
120       {PPC::R30, -8},       \
121       {PPC::R29, -12},      \
122       {PPC::R28, -16},      \
123       {PPC::R27, -20},      \
124       {PPC::R26, -24},      \
125       {PPC::R25, -28},      \
126       {PPC::R24, -32},      \
127       {PPC::R23, -36},      \
128       {PPC::R22, -40},      \
129       {PPC::R21, -44},      \
130       {PPC::R20, -48},      \
131       {PPC::R19, -52},      \
132       {PPC::R18, -56},      \
133       {PPC::R17, -60},      \
134       {PPC::R16, -64},      \
135       {PPC::R15, -68},      \
136       {PPC::R14, -72}
137 
138 // 64-bit general purpose register save area offsets.
139 #define CALLEE_SAVED_GPRS64 \
140       {PPC::X31, -8},       \
141       {PPC::X30, -16},      \
142       {PPC::X29, -24},      \
143       {PPC::X28, -32},      \
144       {PPC::X27, -40},      \
145       {PPC::X26, -48},      \
146       {PPC::X25, -56},      \
147       {PPC::X24, -64},      \
148       {PPC::X23, -72},      \
149       {PPC::X22, -80},      \
150       {PPC::X21, -88},      \
151       {PPC::X20, -96},      \
152       {PPC::X19, -104},     \
153       {PPC::X18, -112},     \
154       {PPC::X17, -120},     \
155       {PPC::X16, -128},     \
156       {PPC::X15, -136},     \
157       {PPC::X14, -144}
158 
159 // Vector register save area offsets.
160 #define CALLEE_SAVED_VRS \
161       {PPC::V31, -16},   \
162       {PPC::V30, -32},   \
163       {PPC::V29, -48},   \
164       {PPC::V28, -64},   \
165       {PPC::V27, -80},   \
166       {PPC::V26, -96},   \
167       {PPC::V25, -112},  \
168       {PPC::V24, -128},  \
169       {PPC::V23, -144},  \
170       {PPC::V22, -160},  \
171       {PPC::V21, -176},  \
172       {PPC::V20, -192}
173 
174   // Note that the offsets here overlap, but this is fixed up in
175   // processFunctionBeforeFrameFinalized.
176 
177   static const SpillSlot ELFOffsets32[] = {
178       CALLEE_SAVED_FPRS,
179       CALLEE_SAVED_GPRS32,
180 
181       // CR save area offset.  We map each of the nonvolatile CR fields
182       // to the slot for CR2, which is the first of the nonvolatile CR
183       // fields to be assigned, so that we only allocate one save slot.
184       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
185       {PPC::CR2, -4},
186 
187       // VRSAVE save area offset.
188       {PPC::VRSAVE, -4},
189 
190       CALLEE_SAVED_VRS,
191 
192       // SPE register save area (overlaps Vector save area).
193       {PPC::S31, -8},
194       {PPC::S30, -16},
195       {PPC::S29, -24},
196       {PPC::S28, -32},
197       {PPC::S27, -40},
198       {PPC::S26, -48},
199       {PPC::S25, -56},
200       {PPC::S24, -64},
201       {PPC::S23, -72},
202       {PPC::S22, -80},
203       {PPC::S21, -88},
204       {PPC::S20, -96},
205       {PPC::S19, -104},
206       {PPC::S18, -112},
207       {PPC::S17, -120},
208       {PPC::S16, -128},
209       {PPC::S15, -136},
210       {PPC::S14, -144}};
211 
212   static const SpillSlot ELFOffsets64[] = {
213       CALLEE_SAVED_FPRS,
214       CALLEE_SAVED_GPRS64,
215 
216       // VRSAVE save area offset.
217       {PPC::VRSAVE, -4},
218       CALLEE_SAVED_VRS
219   };
220 
221   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
222                                            CALLEE_SAVED_GPRS32,
223                                            // Add AIX's extra CSR.
224                                            {PPC::R13, -76},
225                                            CALLEE_SAVED_VRS};
226 
227   static const SpillSlot AIXOffsets64[] = {
228       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
229 
230   if (Subtarget.is64BitELFABI()) {
231     NumEntries = array_lengthof(ELFOffsets64);
232     return ELFOffsets64;
233   }
234 
235   if (Subtarget.is32BitELFABI()) {
236     NumEntries = array_lengthof(ELFOffsets32);
237     return ELFOffsets32;
238   }
239 
240   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
241 
242   if (Subtarget.isPPC64()) {
243     NumEntries = array_lengthof(AIXOffsets64);
244     return AIXOffsets64;
245   }
246 
247   NumEntries = array_lengthof(AIXOffsets32);
248   return AIXOffsets32;
249 }
250 
251 static bool spillsCR(const MachineFunction &MF) {
252   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
253   return FuncInfo->isCRSpilled();
254 }
255 
256 static bool hasSpills(const MachineFunction &MF) {
257   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
258   return FuncInfo->hasSpills();
259 }
260 
261 static bool hasNonRISpills(const MachineFunction &MF) {
262   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
263   return FuncInfo->hasNonRISpills();
264 }
265 
266 /// MustSaveLR - Return true if this function requires that we save the LR
267 /// register onto the stack in the prolog and restore it in the epilog of the
268 /// function.
269 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
270   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
271 
272   // We need a save/restore of LR if there is any def of LR (which is
273   // defined by calls, including the PIC setup sequence), or if there is
274   // some use of the LR stack slot (e.g. for builtin_return_address).
275   // (LR comes in 32 and 64 bit versions.)
276   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
277   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
278 }
279 
280 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
281 /// call frame size. Update the MachineFunction object with the stack size.
282 unsigned
283 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
284                                                 bool UseEstimate) const {
285   unsigned NewMaxCallFrameSize = 0;
286   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
287                                             &NewMaxCallFrameSize);
288   MF.getFrameInfo().setStackSize(FrameSize);
289   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
290   return FrameSize;
291 }
292 
293 /// determineFrameLayout - Determine the size of the frame and maximum call
294 /// frame size.
295 unsigned
296 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
297                                        bool UseEstimate,
298                                        unsigned *NewMaxCallFrameSize) const {
299   const MachineFrameInfo &MFI = MF.getFrameInfo();
300   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
301 
302   // Get the number of bytes to allocate from the FrameInfo
303   unsigned FrameSize =
304     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
305 
306   // Get stack alignments. The frame must be aligned to the greatest of these:
307   Align TargetAlign = getStackAlign(); // alignment required per the ABI
308   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
309   Align Alignment = std::max(TargetAlign, MaxAlign);
310 
311   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
312 
313   unsigned LR = RegInfo->getRARegister();
314   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
315   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
316                        !MFI.adjustsStack() &&       // No calls.
317                        !MustSaveLR(MF, LR) &&       // No need to save LR.
318                        !FI->mustSaveTOC() &&        // No need to save TOC.
319                        !RegInfo->hasBasePointer(MF); // No special alignment.
320 
321   // Note: for PPC32 SVR4ABI, we can still generate stackless
322   // code if all local vars are reg-allocated.
323   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
324 
325   // Check whether we can skip adjusting the stack pointer (by using red zone)
326   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
327     // No need for frame
328     return 0;
329   }
330 
331   // Get the maximum call frame size of all the calls.
332   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
333 
334   // Maximum call frame needs to be at least big enough for linkage area.
335   unsigned minCallFrameSize = getLinkageSize();
336   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
337 
338   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
339   // that allocations will be aligned.
340   if (MFI.hasVarSizedObjects())
341     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
342 
343   // Update the new max call frame size if the caller passes in a valid pointer.
344   if (NewMaxCallFrameSize)
345     *NewMaxCallFrameSize = maxCallFrameSize;
346 
347   // Include call frame size in total.
348   FrameSize += maxCallFrameSize;
349 
350   // Make sure the frame is aligned.
351   FrameSize = alignTo(FrameSize, Alignment);
352 
353   return FrameSize;
354 }
355 
356 // hasFP - Return true if the specified function actually has a dedicated frame
357 // pointer register.
358 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
359   const MachineFrameInfo &MFI = MF.getFrameInfo();
360   // FIXME: This is pretty much broken by design: hasFP() might be called really
361   // early, before the stack layout was calculated and thus hasFP() might return
362   // true or false here depending on the time of call.
363   return (MFI.getStackSize()) && needsFP(MF);
364 }
365 
366 // needsFP - Return true if the specified function should have a dedicated frame
367 // pointer register.  This is true if the function has variable sized allocas or
368 // if frame pointer elimination is disabled.
369 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
370   const MachineFrameInfo &MFI = MF.getFrameInfo();
371 
372   // Naked functions have no stack frame pushed, so we don't have a frame
373   // pointer.
374   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
375     return false;
376 
377   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
378          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
379          MF.exposesReturnsTwice() ||
380          (MF.getTarget().Options.GuaranteedTailCallOpt &&
381           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
382 }
383 
384 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
385   bool is31 = needsFP(MF);
386   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
387   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
388 
389   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
390   bool HasBP = RegInfo->hasBasePointer(MF);
391   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
392   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
393 
394   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
395        BI != BE; ++BI)
396     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
397       --MBBI;
398       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
399         MachineOperand &MO = MBBI->getOperand(I);
400         if (!MO.isReg())
401           continue;
402 
403         switch (MO.getReg()) {
404         case PPC::FP:
405           MO.setReg(FPReg);
406           break;
407         case PPC::FP8:
408           MO.setReg(FP8Reg);
409           break;
410         case PPC::BP:
411           MO.setReg(BPReg);
412           break;
413         case PPC::BP8:
414           MO.setReg(BP8Reg);
415           break;
416 
417         }
418       }
419     }
420 }
421 
422 /*  This function will do the following:
423     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
424       respectively (defaults recommended by the ABI) and return true
425     - If MBB is not an entry block, initialize the register scavenger and look
426       for available registers.
427     - If the defaults (R0/R12) are available, return true
428     - If TwoUniqueRegsRequired is set to true, it looks for two unique
429       registers. Otherwise, look for a single available register.
430       - If the required registers are found, set SR1 and SR2 and return true.
431       - If the required registers are not found, set SR2 or both SR1 and SR2 to
432         PPC::NoRegister and return false.
433 
434     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
435     is not set, this function will attempt to find two different registers, but
436     still return true if only one register is available (and set SR1 == SR2).
437 */
438 bool
439 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
440                                       bool UseAtEnd,
441                                       bool TwoUniqueRegsRequired,
442                                       Register *SR1,
443                                       Register *SR2) const {
444   RegScavenger RS;
445   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
446   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
447 
448   // Set the defaults for the two scratch registers.
449   if (SR1)
450     *SR1 = R0;
451 
452   if (SR2) {
453     assert (SR1 && "Asking for the second scratch register but not the first?");
454     *SR2 = R12;
455   }
456 
457   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
458   if ((UseAtEnd && MBB->isReturnBlock()) ||
459       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
460     return true;
461 
462   RS.enterBasicBlock(*MBB);
463 
464   if (UseAtEnd && !MBB->empty()) {
465     // The scratch register will be used at the end of the block, so must
466     // consider all registers used within the block
467 
468     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
469     // If no terminator, back iterator up to previous instruction.
470     if (MBBI == MBB->end())
471       MBBI = std::prev(MBBI);
472 
473     if (MBBI != MBB->begin())
474       RS.forward(MBBI);
475   }
476 
477   // If the two registers are available, we're all good.
478   // Note that we only return here if both R0 and R12 are available because
479   // although the function may not require two unique registers, it may benefit
480   // from having two so we should try to provide them.
481   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
482     return true;
483 
484   // Get the list of callee-saved registers for the target.
485   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
486   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
487 
488   // Get all the available registers in the block.
489   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
490                                      &PPC::GPRCRegClass);
491 
492   // We shouldn't use callee-saved registers as scratch registers as they may be
493   // available when looking for a candidate block for shrink wrapping but not
494   // available when the actual prologue/epilogue is being emitted because they
495   // were added as live-in to the prologue block by PrologueEpilogueInserter.
496   for (int i = 0; CSRegs[i]; ++i)
497     BV.reset(CSRegs[i]);
498 
499   // Set the first scratch register to the first available one.
500   if (SR1) {
501     int FirstScratchReg = BV.find_first();
502     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
503   }
504 
505   // If there is another one available, set the second scratch register to that.
506   // Otherwise, set it to either PPC::NoRegister if this function requires two
507   // or to whatever SR1 is set to if this function doesn't require two.
508   if (SR2) {
509     int SecondScratchReg = BV.find_next(*SR1);
510     if (SecondScratchReg != -1)
511       *SR2 = SecondScratchReg;
512     else
513       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
514   }
515 
516   // Now that we've done our best to provide both registers, double check
517   // whether we were unable to provide enough.
518   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
519     return false;
520 
521   return true;
522 }
523 
524 // We need a scratch register for spilling LR and for spilling CR. By default,
525 // we use two scratch registers to hide latency. However, if only one scratch
526 // register is available, we can adjust for that by not overlapping the spill
527 // code. However, if we need to realign the stack (i.e. have a base pointer)
528 // and the stack frame is large, we need two scratch registers.
529 // Also, stack probe requires two scratch registers, one for old sp, one for
530 // large frame and large probe size.
531 bool
532 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
533   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
534   MachineFunction &MF = *(MBB->getParent());
535   bool HasBP = RegInfo->hasBasePointer(MF);
536   unsigned FrameSize = determineFrameLayout(MF);
537   int NegFrameSize = -FrameSize;
538   bool IsLargeFrame = !isInt<16>(NegFrameSize);
539   MachineFrameInfo &MFI = MF.getFrameInfo();
540   Align MaxAlign = MFI.getMaxAlign();
541   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
542   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
543 
544   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
545          TLI.hasInlineStackProbe(MF);
546 }
547 
548 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
549   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
550 
551   return findScratchRegister(TmpMBB, false,
552                              twoUniqueScratchRegsRequired(TmpMBB));
553 }
554 
555 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
556   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
557 
558   return findScratchRegister(TmpMBB, true);
559 }
560 
561 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
562   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
563   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
564 
565   // Abort if there is no register info or function info.
566   if (!RegInfo || !FI)
567     return false;
568 
569   // Only move the stack update on ELFv2 ABI and PPC64.
570   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
571     return false;
572 
573   // Check the frame size first and return false if it does not fit the
574   // requirements.
575   // We need a non-zero frame size as well as a frame that will fit in the red
576   // zone. This is because by moving the stack pointer update we are now storing
577   // to the red zone until the stack pointer is updated. If we get an interrupt
578   // inside the prologue but before the stack update we now have a number of
579   // stores to the red zone and those stores must all fit.
580   MachineFrameInfo &MFI = MF.getFrameInfo();
581   unsigned FrameSize = MFI.getStackSize();
582   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
583     return false;
584 
585   // Frame pointers and base pointers complicate matters so don't do anything
586   // if we have them. For example having a frame pointer will sometimes require
587   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
588   // difficult. Similar situation exists with setjmp.
589   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
590     return false;
591 
592   // Calls to fast_cc functions use different rules for passing parameters on
593   // the stack from the ABI and using PIC base in the function imposes
594   // similar restrictions to using the base pointer. It is not generally safe
595   // to move the stack pointer update in these situations.
596   if (FI->hasFastCall() || FI->usesPICBase())
597     return false;
598 
599   // Finally we can move the stack update if we do not require register
600   // scavenging. Register scavenging can introduce more spills and so
601   // may make the frame size larger than we have computed.
602   return !RegInfo->requiresFrameIndexScavenging(MF);
603 }
604 
605 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
606                                     MachineBasicBlock &MBB) const {
607   MachineBasicBlock::iterator MBBI = MBB.begin();
608   MachineFrameInfo &MFI = MF.getFrameInfo();
609   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
610   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
611   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
612 
613   MachineModuleInfo &MMI = MF.getMMI();
614   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
615   DebugLoc dl;
616   // AIX assembler does not support cfi directives.
617   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
618 
619   // Get processor type.
620   bool isPPC64 = Subtarget.isPPC64();
621   // Get the ABI.
622   bool isSVR4ABI = Subtarget.isSVR4ABI();
623   bool isELFv2ABI = Subtarget.isELFv2ABI();
624   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
625 
626   // Work out frame sizes.
627   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
628   int NegFrameSize = -FrameSize;
629   if (!isInt<32>(NegFrameSize))
630     llvm_unreachable("Unhandled stack size!");
631 
632   if (MFI.isFrameAddressTaken())
633     replaceFPWithRealFP(MF);
634 
635   // Check if the link register (LR) must be saved.
636   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
637   bool MustSaveLR = FI->mustSaveLR();
638   bool MustSaveTOC = FI->mustSaveTOC();
639   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
640   bool MustSaveCR = !MustSaveCRs.empty();
641   // Do we have a frame pointer and/or base pointer for this function?
642   bool HasFP = hasFP(MF);
643   bool HasBP = RegInfo->hasBasePointer(MF);
644   bool HasRedZone = isPPC64 || !isSVR4ABI;
645 
646   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
647   Register BPReg = RegInfo->getBaseRegister(MF);
648   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
649   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
650   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
651   Register ScratchReg;
652   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
653   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
654   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
655                                                 : PPC::MFLR );
656   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
657                                                  : PPC::STW );
658   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
659                                                      : PPC::STWU );
660   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
661                                                         : PPC::STWUX);
662   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
663                                                           : PPC::LIS );
664   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
665                                                  : PPC::ORI );
666   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
667                                               : PPC::OR );
668   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
669                                                             : PPC::SUBFC);
670   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
671                                                                : PPC::SUBFIC);
672   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
673                                                            : PPC::MFCR);
674   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
675 
676   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
677   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
678   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
679   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
680   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
681          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
682 
683   // Using the same bool variable as below to suppress compiler warnings.
684   bool SingleScratchReg = findScratchRegister(
685       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
686   assert(SingleScratchReg &&
687          "Required number of registers not available in this block");
688 
689   SingleScratchReg = ScratchReg == TempReg;
690 
691   int LROffset = getReturnSaveOffset();
692 
693   int FPOffset = 0;
694   if (HasFP) {
695     MachineFrameInfo &MFI = MF.getFrameInfo();
696     int FPIndex = FI->getFramePointerSaveIndex();
697     assert(FPIndex && "No Frame Pointer Save Slot!");
698     FPOffset = MFI.getObjectOffset(FPIndex);
699   }
700 
701   int BPOffset = 0;
702   if (HasBP) {
703     MachineFrameInfo &MFI = MF.getFrameInfo();
704     int BPIndex = FI->getBasePointerSaveIndex();
705     assert(BPIndex && "No Base Pointer Save Slot!");
706     BPOffset = MFI.getObjectOffset(BPIndex);
707   }
708 
709   int PBPOffset = 0;
710   if (FI->usesPICBase()) {
711     MachineFrameInfo &MFI = MF.getFrameInfo();
712     int PBPIndex = FI->getPICBasePointerSaveIndex();
713     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
714     PBPOffset = MFI.getObjectOffset(PBPIndex);
715   }
716 
717   // Get stack alignments.
718   Align MaxAlign = MFI.getMaxAlign();
719   if (HasBP && MaxAlign > 1)
720     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
721 
722   // Frames of 32KB & larger require special handling because they cannot be
723   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
724   bool isLargeFrame = !isInt<16>(NegFrameSize);
725 
726   // Check if we can move the stack update instruction (stdu) down the prologue
727   // past the callee saves. Hopefully this will avoid the situation where the
728   // saves are waiting for the update on the store with update to complete.
729   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
730   bool MovingStackUpdateDown = false;
731 
732   // Check if we can move the stack update.
733   if (stackUpdateCanBeMoved(MF)) {
734     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
735     for (CalleeSavedInfo CSI : Info) {
736       int FrIdx = CSI.getFrameIdx();
737       // If the frame index is not negative the callee saved info belongs to a
738       // stack object that is not a fixed stack object. We ignore non-fixed
739       // stack objects because we won't move the stack update pointer past them.
740       if (FrIdx >= 0)
741         continue;
742 
743       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
744         StackUpdateLoc++;
745         MovingStackUpdateDown = true;
746       } else {
747         // We need all of the Frame Indices to meet these conditions.
748         // If they do not, abort the whole operation.
749         StackUpdateLoc = MBBI;
750         MovingStackUpdateDown = false;
751         break;
752       }
753     }
754 
755     // If the operation was not aborted then update the object offset.
756     if (MovingStackUpdateDown) {
757       for (CalleeSavedInfo CSI : Info) {
758         int FrIdx = CSI.getFrameIdx();
759         if (FrIdx < 0)
760           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
761       }
762     }
763   }
764 
765   // Where in the prologue we move the CR fields depends on how many scratch
766   // registers we have, and if we need to save the link register or not. This
767   // lambda is to avoid duplicating the logic in 2 places.
768   auto BuildMoveFromCR = [&]() {
769     if (isELFv2ABI && MustSaveCRs.size() == 1) {
770     // In the ELFv2 ABI, we are not required to save all CR fields.
771     // If only one CR field is clobbered, it is more efficient to use
772     // mfocrf to selectively save just that field, because mfocrf has short
773     // latency compares to mfcr.
774       assert(isPPC64 && "V2 ABI is 64-bit only.");
775       MachineInstrBuilder MIB =
776           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
777       MIB.addReg(MustSaveCRs[0], RegState::Kill);
778     } else {
779       MachineInstrBuilder MIB =
780           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
781       for (unsigned CRfield : MustSaveCRs)
782         MIB.addReg(CRfield, RegState::ImplicitKill);
783     }
784   };
785 
786   // If we need to spill the CR and the LR but we don't have two separate
787   // registers available, we must spill them one at a time
788   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
789     BuildMoveFromCR();
790     BuildMI(MBB, MBBI, dl, StoreWordInst)
791         .addReg(TempReg, getKillRegState(true))
792         .addImm(CRSaveOffset)
793         .addReg(SPReg);
794   }
795 
796   if (MustSaveLR)
797     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
798 
799   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
800     BuildMoveFromCR();
801 
802   if (HasRedZone) {
803     if (HasFP)
804       BuildMI(MBB, MBBI, dl, StoreInst)
805         .addReg(FPReg)
806         .addImm(FPOffset)
807         .addReg(SPReg);
808     if (FI->usesPICBase())
809       BuildMI(MBB, MBBI, dl, StoreInst)
810         .addReg(PPC::R30)
811         .addImm(PBPOffset)
812         .addReg(SPReg);
813     if (HasBP)
814       BuildMI(MBB, MBBI, dl, StoreInst)
815         .addReg(BPReg)
816         .addImm(BPOffset)
817         .addReg(SPReg);
818   }
819 
820   if (MustSaveLR)
821     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
822       .addReg(ScratchReg, getKillRegState(true))
823       .addImm(LROffset)
824       .addReg(SPReg);
825 
826   if (MustSaveCR &&
827       !(SingleScratchReg && MustSaveLR)) {
828     assert(HasRedZone && "A red zone is always available on PPC64");
829     BuildMI(MBB, MBBI, dl, StoreWordInst)
830       .addReg(TempReg, getKillRegState(true))
831       .addImm(CRSaveOffset)
832       .addReg(SPReg);
833   }
834 
835   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
836   if (!FrameSize)
837     return;
838 
839   // Adjust stack pointer: r1 += NegFrameSize.
840   // If there is a preferred stack alignment, align R1 now
841 
842   if (HasBP && HasRedZone) {
843     // Save a copy of r1 as the base pointer.
844     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
845       .addReg(SPReg)
846       .addReg(SPReg);
847   }
848 
849   // Have we generated a STUX instruction to claim stack frame? If so,
850   // the negated frame size will be placed in ScratchReg.
851   bool HasSTUX = false;
852 
853   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
854   // pointer is always stored at SP, we will get a free probe due to an essential
855   // STU(X) instruction.
856   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
857     // To be consistent with other targets, a pseudo instruction is emitted and
858     // will be later expanded in `inlineStackProbe`.
859     BuildMI(MBB, MBBI, dl,
860             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
861                             : PPC::PROBED_STACKALLOC_32))
862         .addDef(ScratchReg)
863         .addDef(TempReg) // TempReg stores the old sp.
864         .addImm(NegFrameSize);
865     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
866     // update the ScratchReg to meet the assumption that ScratchReg contains
867     // the NegFrameSize. This solution is rather tricky.
868     if (!HasRedZone) {
869       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
870           .addReg(TempReg)
871           .addReg(SPReg);
872       HasSTUX = true;
873     }
874   } else {
875     // This condition must be kept in sync with canUseAsPrologue.
876     if (HasBP && MaxAlign > 1) {
877       if (isPPC64)
878         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
879             .addReg(SPReg)
880             .addImm(0)
881             .addImm(64 - Log2(MaxAlign));
882       else // PPC32...
883         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
884             .addReg(SPReg)
885             .addImm(0)
886             .addImm(32 - Log2(MaxAlign))
887             .addImm(31);
888       if (!isLargeFrame) {
889         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
890             .addReg(ScratchReg, RegState::Kill)
891             .addImm(NegFrameSize);
892       } else {
893         assert(!SingleScratchReg && "Only a single scratch reg available");
894         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
895             .addImm(NegFrameSize >> 16);
896         BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
897             .addReg(TempReg, RegState::Kill)
898             .addImm(NegFrameSize & 0xFFFF);
899         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
900             .addReg(ScratchReg, RegState::Kill)
901             .addReg(TempReg, RegState::Kill);
902       }
903 
904       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
905           .addReg(SPReg, RegState::Kill)
906           .addReg(SPReg)
907           .addReg(ScratchReg);
908       HasSTUX = true;
909 
910     } else if (!isLargeFrame) {
911       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
912           .addReg(SPReg)
913           .addImm(NegFrameSize)
914           .addReg(SPReg);
915 
916     } else {
917       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
918           .addImm(NegFrameSize >> 16);
919       BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
920           .addReg(ScratchReg, RegState::Kill)
921           .addImm(NegFrameSize & 0xFFFF);
922       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
923           .addReg(SPReg, RegState::Kill)
924           .addReg(SPReg)
925           .addReg(ScratchReg);
926       HasSTUX = true;
927     }
928   }
929 
930   // Save the TOC register after the stack pointer update if a prologue TOC
931   // save is required for the function.
932   if (MustSaveTOC) {
933     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
934     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
935       .addReg(TOCReg, getKillRegState(true))
936       .addImm(TOCSaveOffset)
937       .addReg(SPReg);
938   }
939 
940   if (!HasRedZone) {
941     assert(!isPPC64 && "A red zone is always available on PPC64");
942     if (HasSTUX) {
943       // The negated frame size is in ScratchReg, and the SPReg has been
944       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
945       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
946       // the stack frame (i.e. the old SP), ideally, we would put the old
947       // SP into a register and use it as the base for the stores. The
948       // problem is that the only available register may be ScratchReg,
949       // which could be R0, and R0 cannot be used as a base address.
950 
951       // First, set ScratchReg to the old SP. This may need to be modified
952       // later.
953       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
954         .addReg(ScratchReg, RegState::Kill)
955         .addReg(SPReg);
956 
957       if (ScratchReg == PPC::R0) {
958         // R0 cannot be used as a base register, but it can be used as an
959         // index in a store-indexed.
960         int LastOffset = 0;
961         if (HasFP)  {
962           // R0 += (FPOffset-LastOffset).
963           // Need addic, since addi treats R0 as 0.
964           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
965             .addReg(ScratchReg)
966             .addImm(FPOffset-LastOffset);
967           LastOffset = FPOffset;
968           // Store FP into *R0.
969           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
970             .addReg(FPReg, RegState::Kill)  // Save FP.
971             .addReg(PPC::ZERO)
972             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
973         }
974         if (FI->usesPICBase()) {
975           // R0 += (PBPOffset-LastOffset).
976           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
977             .addReg(ScratchReg)
978             .addImm(PBPOffset-LastOffset);
979           LastOffset = PBPOffset;
980           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
981             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
982             .addReg(PPC::ZERO)
983             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
984         }
985         if (HasBP) {
986           // R0 += (BPOffset-LastOffset).
987           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
988             .addReg(ScratchReg)
989             .addImm(BPOffset-LastOffset);
990           LastOffset = BPOffset;
991           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
992             .addReg(BPReg, RegState::Kill)  // Save BP.
993             .addReg(PPC::ZERO)
994             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
995           // BP = R0-LastOffset
996           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
997             .addReg(ScratchReg, RegState::Kill)
998             .addImm(-LastOffset);
999         }
1000       } else {
1001         // ScratchReg is not R0, so use it as the base register. It is
1002         // already set to the old SP, so we can use the offsets directly.
1003 
1004         // Now that the stack frame has been allocated, save all the necessary
1005         // registers using ScratchReg as the base address.
1006         if (HasFP)
1007           BuildMI(MBB, MBBI, dl, StoreInst)
1008             .addReg(FPReg)
1009             .addImm(FPOffset)
1010             .addReg(ScratchReg);
1011         if (FI->usesPICBase())
1012           BuildMI(MBB, MBBI, dl, StoreInst)
1013             .addReg(PPC::R30)
1014             .addImm(PBPOffset)
1015             .addReg(ScratchReg);
1016         if (HasBP) {
1017           BuildMI(MBB, MBBI, dl, StoreInst)
1018             .addReg(BPReg)
1019             .addImm(BPOffset)
1020             .addReg(ScratchReg);
1021           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1022             .addReg(ScratchReg, RegState::Kill)
1023             .addReg(ScratchReg);
1024         }
1025       }
1026     } else {
1027       // The frame size is a known 16-bit constant (fitting in the immediate
1028       // field of STWU). To be here we have to be compiling for PPC32.
1029       // Since the SPReg has been decreased by FrameSize, add it back to each
1030       // offset.
1031       if (HasFP)
1032         BuildMI(MBB, MBBI, dl, StoreInst)
1033           .addReg(FPReg)
1034           .addImm(FrameSize + FPOffset)
1035           .addReg(SPReg);
1036       if (FI->usesPICBase())
1037         BuildMI(MBB, MBBI, dl, StoreInst)
1038           .addReg(PPC::R30)
1039           .addImm(FrameSize + PBPOffset)
1040           .addReg(SPReg);
1041       if (HasBP) {
1042         BuildMI(MBB, MBBI, dl, StoreInst)
1043           .addReg(BPReg)
1044           .addImm(FrameSize + BPOffset)
1045           .addReg(SPReg);
1046         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1047           .addReg(SPReg)
1048           .addImm(FrameSize);
1049       }
1050     }
1051   }
1052 
1053   // Add Call Frame Information for the instructions we generated above.
1054   if (needsCFI) {
1055     unsigned CFIIndex;
1056 
1057     if (HasBP) {
1058       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1059       // because if the stack needed aligning then CFA won't be at a fixed
1060       // offset from FP/SP.
1061       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1062       CFIIndex = MF.addFrameInst(
1063           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1064     } else {
1065       // Adjust the definition of CFA to account for the change in SP.
1066       assert(NegFrameSize);
1067       CFIIndex = MF.addFrameInst(
1068           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1069     }
1070     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1071         .addCFIIndex(CFIIndex);
1072 
1073     if (HasFP) {
1074       // Describe where FP was saved, at a fixed offset from CFA.
1075       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1076       CFIIndex = MF.addFrameInst(
1077           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1078       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1079           .addCFIIndex(CFIIndex);
1080     }
1081 
1082     if (FI->usesPICBase()) {
1083       // Describe where FP was saved, at a fixed offset from CFA.
1084       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1085       CFIIndex = MF.addFrameInst(
1086           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1087       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1088           .addCFIIndex(CFIIndex);
1089     }
1090 
1091     if (HasBP) {
1092       // Describe where BP was saved, at a fixed offset from CFA.
1093       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1094       CFIIndex = MF.addFrameInst(
1095           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1096       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1097           .addCFIIndex(CFIIndex);
1098     }
1099 
1100     if (MustSaveLR) {
1101       // Describe where LR was saved, at a fixed offset from CFA.
1102       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1103       CFIIndex = MF.addFrameInst(
1104           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1105       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1106           .addCFIIndex(CFIIndex);
1107     }
1108   }
1109 
1110   // If there is a frame pointer, copy R1 into R31
1111   if (HasFP) {
1112     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1113       .addReg(SPReg)
1114       .addReg(SPReg);
1115 
1116     if (!HasBP && needsCFI) {
1117       // Change the definition of CFA from SP+offset to FP+offset, because SP
1118       // will change at every alloca.
1119       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1120       unsigned CFIIndex = MF.addFrameInst(
1121           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1122 
1123       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1124           .addCFIIndex(CFIIndex);
1125     }
1126   }
1127 
1128   if (needsCFI) {
1129     // Describe where callee saved registers were saved, at fixed offsets from
1130     // CFA.
1131     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1132     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1133       unsigned Reg = CSI[I].getReg();
1134       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1135 
1136       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1137       // subregisters of CR2. We just need to emit a move of CR2.
1138       if (PPC::CRBITRCRegClass.contains(Reg))
1139         continue;
1140 
1141       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1142         continue;
1143 
1144       // For SVR4, don't emit a move for the CR spill slot if we haven't
1145       // spilled CRs.
1146       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1147           && !MustSaveCR)
1148         continue;
1149 
1150       // For 64-bit SVR4 when we have spilled CRs, the spill location
1151       // is SP+8, not a frame-relative slot.
1152       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1153         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1154         // the whole CR word.  In the ELFv2 ABI, every CR that was
1155         // actually saved gets its own CFI record.
1156         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1157         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1158             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1159         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1160             .addCFIIndex(CFIIndex);
1161         continue;
1162       }
1163 
1164       if (CSI[I].isSpilledToReg()) {
1165         unsigned SpilledReg = CSI[I].getDstReg();
1166         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1167             nullptr, MRI->getDwarfRegNum(Reg, true),
1168             MRI->getDwarfRegNum(SpilledReg, true)));
1169         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1170           .addCFIIndex(CFIRegister);
1171       } else {
1172         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1173         // We have changed the object offset above but we do not want to change
1174         // the actual offsets in the CFI instruction so we have to undo the
1175         // offset change here.
1176         if (MovingStackUpdateDown)
1177           Offset -= NegFrameSize;
1178 
1179         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1180             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1181         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1182             .addCFIIndex(CFIIndex);
1183       }
1184     }
1185   }
1186 }
1187 
1188 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1189                                         MachineBasicBlock &PrologMBB) const {
1190   // TODO: Generate CFI instructions.
1191   bool isPPC64 = Subtarget.isPPC64();
1192   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1193   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1194   MachineFrameInfo &MFI = MF.getFrameInfo();
1195   MachineModuleInfo &MMI = MF.getMMI();
1196   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1197   // AIX assembler does not support cfi directives.
1198   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1199   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1200     int Opc = MI.getOpcode();
1201     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1202   });
1203   if (StackAllocMIPos == PrologMBB.end())
1204     return;
1205   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1206   MachineBasicBlock *CurrentMBB = &PrologMBB;
1207   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1208   MachineInstr &MI = *StackAllocMIPos;
1209   int64_t NegFrameSize = MI.getOperand(2).getImm();
1210   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1211   int64_t NegProbeSize = -(int64_t)ProbeSize;
1212   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1213   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1214   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1215   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1216   Register ScratchReg = MI.getOperand(0).getReg();
1217   Register FPReg = MI.getOperand(1).getReg();
1218   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1219   bool HasBP = RegInfo->hasBasePointer(MF);
1220   Register BPReg = RegInfo->getBaseRegister(MF);
1221   Align MaxAlign = MFI.getMaxAlign();
1222   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1223   // Subroutines to generate .cfi_* directives.
1224   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1225                             MachineBasicBlock::iterator MBBI, Register Reg) {
1226     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1227     unsigned CFIIndex = MF.addFrameInst(
1228         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1229     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1230         .addCFIIndex(CFIIndex);
1231   };
1232   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1233                          MachineBasicBlock::iterator MBBI, Register Reg,
1234                          int Offset) {
1235     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1236     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1237         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1238     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1239         .addCFIIndex(CFIIndex);
1240   };
1241   // Subroutine to determine if we can use the Imm as part of d-form.
1242   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1243   // Subroutine to materialize the Imm into TempReg.
1244   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1245                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1246                             Register &TempReg) {
1247     assert(isInt<32>(Imm) && "Unhandled imm");
1248     if (isInt<16>(Imm))
1249       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1250           .addImm(Imm);
1251     else {
1252       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1253           .addImm(Imm >> 16);
1254       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1255           .addReg(TempReg)
1256           .addImm(Imm & 0xFFFF);
1257     }
1258   };
1259   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1260   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1261                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1262                               Register NegSizeReg, bool UseDForm,
1263                               Register StoreReg) {
1264     if (UseDForm)
1265       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1266           .addReg(StoreReg)
1267           .addImm(NegSize)
1268           .addReg(SPReg);
1269     else
1270       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1271           .addReg(StoreReg)
1272           .addReg(SPReg)
1273           .addReg(NegSizeReg);
1274   };
1275   // Used to probe realignment gap [stackptr - (stackptr % align), stackptr)
1276   // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30
1277   // available and r1 is already copied to r30 which is BPReg. So BPReg stores
1278   // the value of stackptr.
1279   // First we have to probe tail interval whose size is less than probesize,
1280   // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage,
1281   // ScratchReg stores the value of ((stackptr % align) % probesize). Then we
1282   // probe each block sized probesize until stackptr meets
1283   // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized
1284   // as negprobesize. At both stages, TempReg stores the value of
1285   // (stackptr - (stackptr % align)).
1286   auto dynamicProbe = [&](MachineBasicBlock &MBB,
1287                           MachineBasicBlock::iterator MBBI, Register ScratchReg,
1288                           Register TempReg) {
1289     assert(HasBP && isPPC64 && "Probe alignment part not available");
1290     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1291     // ScratchReg = stackptr % align
1292     BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
1293         .addReg(BPReg)
1294         .addImm(0)
1295         .addImm(64 - Log2(MaxAlign));
1296     // TempReg = stackptr - (stackptr % align)
1297     BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg)
1298         .addReg(ScratchReg)
1299         .addReg(BPReg);
1300     // ScratchReg = (stackptr % align) % probesize
1301     BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
1302         .addReg(ScratchReg)
1303         .addImm(0)
1304         .addImm(64 - Log2(ProbeSize));
1305     Register CRReg = PPC::CR0;
1306     // If (stackptr % align) % probesize == 0, we should not generate probe
1307     // code. Layout of output assembly kinda like:
1308     // bb.0:
1309     //   ...
1310     //   cmpldi $scratchreg, 0
1311     //   beq bb.2
1312     // bb.1: # Probe tail interval
1313     //   neg $scratchreg, $scratchreg
1314     //   stdux $bpreg, r1, $scratchreg
1315     // bb.2:
1316     //   <materialize negprobesize into $scratchreg>
1317     //   cmpd r1, $tempreg
1318     //   beq bb.4
1319     // bb.3: # Loop to probe each block
1320     //   stdux $bpreg, r1, $scratchreg
1321     //   cmpd r1, $tempreg
1322     //   bne bb.3
1323     // bb.4:
1324     //   ...
1325     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1326     MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB);
1327     MF.insert(MBBInsertPoint, ProbeResidualMBB);
1328     MachineBasicBlock *ProbeLoopPreHeaderMBB =
1329         MF.CreateMachineBasicBlock(ProbedBB);
1330     MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB);
1331     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1332     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1333     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1334     MF.insert(MBBInsertPoint, ProbeExitMBB);
1335     // bb.4
1336     ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1337     ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1338     // bb.0
1339     BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0);
1340     BuildMI(&MBB, DL, TII.get(PPC::BCC))
1341         .addImm(PPC::PRED_EQ)
1342         .addReg(CRReg)
1343         .addMBB(ProbeLoopPreHeaderMBB);
1344     MBB.addSuccessor(ProbeResidualMBB);
1345     MBB.addSuccessor(ProbeLoopPreHeaderMBB);
1346     // bb.1
1347     BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg)
1348         .addReg(ScratchReg);
1349     allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg,
1350                      false, BPReg);
1351     ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB);
1352     // bb.2
1353     MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(),
1354                    NegProbeSize, ScratchReg);
1355     BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg)
1356         .addReg(SPReg)
1357         .addReg(TempReg);
1358     BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC))
1359         .addImm(PPC::PRED_EQ)
1360         .addReg(CRReg)
1361         .addMBB(ProbeExitMBB);
1362     ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB);
1363     ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB);
1364     // bb.3
1365     allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg,
1366                      false, BPReg);
1367     BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg)
1368         .addReg(SPReg)
1369         .addReg(TempReg);
1370     BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1371         .addImm(PPC::PRED_NE)
1372         .addReg(CRReg)
1373         .addMBB(ProbeLoopBodyMBB);
1374     ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1375     ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1376     // Update liveins.
1377     recomputeLiveIns(*ProbeResidualMBB);
1378     recomputeLiveIns(*ProbeLoopPreHeaderMBB);
1379     recomputeLiveIns(*ProbeLoopBodyMBB);
1380     recomputeLiveIns(*ProbeExitMBB);
1381     return ProbeExitMBB;
1382   };
1383   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1384   // SP = SP - SP % MaxAlign.
1385   if (HasBP && MaxAlign > 1) {
1386     // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in
1387     // 64-bit mode.
1388     if (isPPC64) {
1389       // Use BPReg to calculate CFA.
1390       if (needsCFI)
1391         buildDefCFA(*CurrentMBB, {MI}, BPReg, 0);
1392       // Since we have SPReg copied to BPReg at the moment, FPReg can be used as
1393       // TempReg.
1394       Register TempReg = FPReg;
1395       CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg);
1396       // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64.
1397       BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
1398           .addReg(BPReg)
1399           .addReg(BPReg);
1400     } else {
1401       // Initialize current frame pointer.
1402       BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
1403           .addReg(SPReg)
1404           .addReg(SPReg);
1405       // Use FPReg to calculate CFA.
1406       if (needsCFI)
1407         buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1408       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1409           .addReg(FPReg)
1410           .addImm(0)
1411           .addImm(32 - Log2(MaxAlign))
1412           .addImm(31);
1413       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg)
1414           .addReg(ScratchReg)
1415           .addReg(SPReg);
1416     }
1417   } else {
1418     // Initialize current frame pointer.
1419     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1420     // Use FPReg to calculate CFA.
1421     if (needsCFI)
1422       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1423   }
1424   // Probe residual part.
1425   if (NegResidualSize) {
1426     bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1427     if (!ResidualUseDForm)
1428       MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1429     allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1430                      ResidualUseDForm, FPReg);
1431   }
1432   bool UseDForm = CanUseDForm(NegProbeSize);
1433   // If number of blocks is small, just probe them directly.
1434   if (NumBlocks < 3) {
1435     if (!UseDForm)
1436       MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1437     for (int i = 0; i < NumBlocks; ++i)
1438       allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1439                        FPReg);
1440     if (needsCFI) {
1441       // Restore using SPReg to calculate CFA.
1442       buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1443     }
1444   } else {
1445     // Since CTR is a volatile register and current shrinkwrap implementation
1446     // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1447     // CTR loop to probe.
1448     // Calculate trip count and stores it in CTRReg.
1449     MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1450     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1451         .addReg(ScratchReg, RegState::Kill);
1452     if (!UseDForm)
1453       MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1454     // Create MBBs of the loop.
1455     MachineFunction::iterator MBBInsertPoint =
1456         std::next(CurrentMBB->getIterator());
1457     MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1458     MF.insert(MBBInsertPoint, LoopMBB);
1459     MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1460     MF.insert(MBBInsertPoint, ExitMBB);
1461     // Synthesize the loop body.
1462     allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1463                      UseDForm, FPReg);
1464     BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1465         .addMBB(LoopMBB);
1466     LoopMBB->addSuccessor(ExitMBB);
1467     LoopMBB->addSuccessor(LoopMBB);
1468     // Synthesize the exit MBB.
1469     ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1470                     std::next(MachineBasicBlock::iterator(MI)),
1471                     CurrentMBB->end());
1472     ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1473     CurrentMBB->addSuccessor(LoopMBB);
1474     if (needsCFI) {
1475       // Restore using SPReg to calculate CFA.
1476       buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1477     }
1478     // Update liveins.
1479     recomputeLiveIns(*LoopMBB);
1480     recomputeLiveIns(*ExitMBB);
1481   }
1482   ++NumPrologProbed;
1483   MI.eraseFromParent();
1484 }
1485 
1486 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1487                                     MachineBasicBlock &MBB) const {
1488   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1489   DebugLoc dl;
1490 
1491   if (MBBI != MBB.end())
1492     dl = MBBI->getDebugLoc();
1493 
1494   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1495   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1496 
1497   // Get alignment info so we know how to restore the SP.
1498   const MachineFrameInfo &MFI = MF.getFrameInfo();
1499 
1500   // Get the number of bytes allocated from the FrameInfo.
1501   int FrameSize = MFI.getStackSize();
1502 
1503   // Get processor type.
1504   bool isPPC64 = Subtarget.isPPC64();
1505 
1506   // Check if the link register (LR) has been saved.
1507   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1508   bool MustSaveLR = FI->mustSaveLR();
1509   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1510   bool MustSaveCR = !MustSaveCRs.empty();
1511   // Do we have a frame pointer and/or base pointer for this function?
1512   bool HasFP = hasFP(MF);
1513   bool HasBP = RegInfo->hasBasePointer(MF);
1514   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1515 
1516   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1517   Register BPReg = RegInfo->getBaseRegister(MF);
1518   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1519   Register ScratchReg;
1520   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1521   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1522                                                  : PPC::MTLR );
1523   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1524                                                  : PPC::LWZ );
1525   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1526                                                            : PPC::LIS );
1527   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1528                                               : PPC::OR );
1529   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1530                                                   : PPC::ORI );
1531   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1532                                                    : PPC::ADDI );
1533   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1534                                                 : PPC::ADD4 );
1535   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1536                                                      : PPC::LWZ);
1537   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1538                                                      : PPC::MTOCRF);
1539   int LROffset = getReturnSaveOffset();
1540 
1541   int FPOffset = 0;
1542 
1543   // Using the same bool variable as below to suppress compiler warnings.
1544   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1545                                               &TempReg);
1546   assert(SingleScratchReg &&
1547          "Could not find an available scratch register");
1548 
1549   SingleScratchReg = ScratchReg == TempReg;
1550 
1551   if (HasFP) {
1552     int FPIndex = FI->getFramePointerSaveIndex();
1553     assert(FPIndex && "No Frame Pointer Save Slot!");
1554     FPOffset = MFI.getObjectOffset(FPIndex);
1555   }
1556 
1557   int BPOffset = 0;
1558   if (HasBP) {
1559       int BPIndex = FI->getBasePointerSaveIndex();
1560       assert(BPIndex && "No Base Pointer Save Slot!");
1561       BPOffset = MFI.getObjectOffset(BPIndex);
1562   }
1563 
1564   int PBPOffset = 0;
1565   if (FI->usesPICBase()) {
1566     int PBPIndex = FI->getPICBasePointerSaveIndex();
1567     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1568     PBPOffset = MFI.getObjectOffset(PBPIndex);
1569   }
1570 
1571   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1572 
1573   if (IsReturnBlock) {
1574     unsigned RetOpcode = MBBI->getOpcode();
1575     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1576                       RetOpcode == PPC::TCRETURNdi ||
1577                       RetOpcode == PPC::TCRETURNai ||
1578                       RetOpcode == PPC::TCRETURNri8 ||
1579                       RetOpcode == PPC::TCRETURNdi8 ||
1580                       RetOpcode == PPC::TCRETURNai8;
1581 
1582     if (UsesTCRet) {
1583       int MaxTCRetDelta = FI->getTailCallSPDelta();
1584       MachineOperand &StackAdjust = MBBI->getOperand(1);
1585       assert(StackAdjust.isImm() && "Expecting immediate value.");
1586       // Adjust stack pointer.
1587       int StackAdj = StackAdjust.getImm();
1588       int Delta = StackAdj - MaxTCRetDelta;
1589       assert((Delta >= 0) && "Delta must be positive");
1590       if (MaxTCRetDelta>0)
1591         FrameSize += (StackAdj +Delta);
1592       else
1593         FrameSize += StackAdj;
1594     }
1595   }
1596 
1597   // Frames of 32KB & larger require special handling because they cannot be
1598   // indexed into with a simple LD/LWZ immediate offset operand.
1599   bool isLargeFrame = !isInt<16>(FrameSize);
1600 
1601   // On targets without red zone, the SP needs to be restored last, so that
1602   // all live contents of the stack frame are upwards of the SP. This means
1603   // that we cannot restore SP just now, since there may be more registers
1604   // to restore from the stack frame (e.g. R31). If the frame size is not
1605   // a simple immediate value, we will need a spare register to hold the
1606   // restored SP. If the frame size is known and small, we can simply adjust
1607   // the offsets of the registers to be restored, and still use SP to restore
1608   // them. In such case, the final update of SP will be to add the frame
1609   // size to it.
1610   // To simplify the code, set RBReg to the base register used to restore
1611   // values from the stack, and set SPAdd to the value that needs to be added
1612   // to the SP at the end. The default values are as if red zone was present.
1613   unsigned RBReg = SPReg;
1614   unsigned SPAdd = 0;
1615 
1616   // Check if we can move the stack update instruction up the epilogue
1617   // past the callee saves. This will allow the move to LR instruction
1618   // to be executed before the restores of the callee saves which means
1619   // that the callee saves can hide the latency from the MTLR instrcution.
1620   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1621   if (stackUpdateCanBeMoved(MF)) {
1622     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1623     for (CalleeSavedInfo CSI : Info) {
1624       int FrIdx = CSI.getFrameIdx();
1625       // If the frame index is not negative the callee saved info belongs to a
1626       // stack object that is not a fixed stack object. We ignore non-fixed
1627       // stack objects because we won't move the update of the stack pointer
1628       // past them.
1629       if (FrIdx >= 0)
1630         continue;
1631 
1632       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1633         StackUpdateLoc--;
1634       else {
1635         // Abort the operation as we can't update all CSR restores.
1636         StackUpdateLoc = MBBI;
1637         break;
1638       }
1639     }
1640   }
1641 
1642   if (FrameSize) {
1643     // In the prologue, the loaded (or persistent) stack pointer value is
1644     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1645     // zone add this offset back now.
1646 
1647     // If the function has a base pointer, the stack pointer has been copied
1648     // to it so we can restore it by copying in the other direction.
1649     if (HasRedZone && HasBP) {
1650       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1651         addReg(BPReg).
1652         addReg(BPReg);
1653     }
1654     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1655     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1656     // call which invalidates the stack pointer value in SP(0). So we use the
1657     // value of R31 in this case. Similar situation exists with setjmp.
1658     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1659       assert(HasFP && "Expecting a valid frame pointer.");
1660       if (!HasRedZone)
1661         RBReg = FPReg;
1662       if (!isLargeFrame) {
1663         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1664           .addReg(FPReg).addImm(FrameSize);
1665       } else {
1666         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1667           .addImm(FrameSize >> 16);
1668         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1669           .addReg(ScratchReg, RegState::Kill)
1670           .addImm(FrameSize & 0xFFFF);
1671         BuildMI(MBB, MBBI, dl, AddInst)
1672           .addReg(RBReg)
1673           .addReg(FPReg)
1674           .addReg(ScratchReg);
1675       }
1676     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1677       if (HasRedZone) {
1678         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1679           .addReg(SPReg)
1680           .addImm(FrameSize);
1681       } else {
1682         // Make sure that adding FrameSize will not overflow the max offset
1683         // size.
1684         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1685                "Local offsets should be negative");
1686         SPAdd = FrameSize;
1687         FPOffset += FrameSize;
1688         BPOffset += FrameSize;
1689         PBPOffset += FrameSize;
1690       }
1691     } else {
1692       // We don't want to use ScratchReg as a base register, because it
1693       // could happen to be R0. Use FP instead, but make sure to preserve it.
1694       if (!HasRedZone) {
1695         // If FP is not saved, copy it to ScratchReg.
1696         if (!HasFP)
1697           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1698             .addReg(FPReg)
1699             .addReg(FPReg);
1700         RBReg = FPReg;
1701       }
1702       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1703         .addImm(0)
1704         .addReg(SPReg);
1705     }
1706   }
1707   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1708   // If there is no red zone, ScratchReg may be needed for holding a useful
1709   // value (although not the base register). Make sure it is not overwritten
1710   // too early.
1711 
1712   // If we need to restore both the LR and the CR and we only have one
1713   // available scratch register, we must do them one at a time.
1714   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1715     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1716     // is live here.
1717     assert(HasRedZone && "Expecting red zone");
1718     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1719       .addImm(CRSaveOffset)
1720       .addReg(SPReg);
1721     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1722       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1723         .addReg(TempReg, getKillRegState(i == e-1));
1724   }
1725 
1726   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1727   // LR is stored in the caller's stack frame. ScratchReg will be needed
1728   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1729   // a base register anyway, because it may happen to be R0.
1730   bool LoadedLR = false;
1731   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1732     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1733       .addImm(LROffset+SPAdd)
1734       .addReg(RBReg);
1735     LoadedLR = true;
1736   }
1737 
1738   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1739     assert(RBReg == SPReg && "Should be using SP as a base register");
1740     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1741       .addImm(CRSaveOffset)
1742       .addReg(RBReg);
1743   }
1744 
1745   if (HasFP) {
1746     // If there is red zone, restore FP directly, since SP has already been
1747     // restored. Otherwise, restore the value of FP into ScratchReg.
1748     if (HasRedZone || RBReg == SPReg)
1749       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1750         .addImm(FPOffset)
1751         .addReg(SPReg);
1752     else
1753       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1754         .addImm(FPOffset)
1755         .addReg(RBReg);
1756   }
1757 
1758   if (FI->usesPICBase())
1759     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1760       .addImm(PBPOffset)
1761       .addReg(RBReg);
1762 
1763   if (HasBP)
1764     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1765       .addImm(BPOffset)
1766       .addReg(RBReg);
1767 
1768   // There is nothing more to be loaded from the stack, so now we can
1769   // restore SP: SP = RBReg + SPAdd.
1770   if (RBReg != SPReg || SPAdd != 0) {
1771     assert(!HasRedZone && "This should not happen with red zone");
1772     // If SPAdd is 0, generate a copy.
1773     if (SPAdd == 0)
1774       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1775         .addReg(RBReg)
1776         .addReg(RBReg);
1777     else
1778       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1779         .addReg(RBReg)
1780         .addImm(SPAdd);
1781 
1782     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1783     if (RBReg == FPReg)
1784       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1785         .addReg(ScratchReg)
1786         .addReg(ScratchReg);
1787 
1788     // Now load the LR from the caller's stack frame.
1789     if (MustSaveLR && !LoadedLR)
1790       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1791         .addImm(LROffset)
1792         .addReg(SPReg);
1793   }
1794 
1795   if (MustSaveCR &&
1796       !(SingleScratchReg && MustSaveLR))
1797     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1798       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1799         .addReg(TempReg, getKillRegState(i == e-1));
1800 
1801   if (MustSaveLR)
1802     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1803 
1804   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1805   // call optimization
1806   if (IsReturnBlock) {
1807     unsigned RetOpcode = MBBI->getOpcode();
1808     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1809         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1810         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1811       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1812       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1813 
1814       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1815         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1816           .addReg(SPReg).addImm(CallerAllocatedAmt);
1817       } else {
1818         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1819           .addImm(CallerAllocatedAmt >> 16);
1820         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1821           .addReg(ScratchReg, RegState::Kill)
1822           .addImm(CallerAllocatedAmt & 0xFFFF);
1823         BuildMI(MBB, MBBI, dl, AddInst)
1824           .addReg(SPReg)
1825           .addReg(FPReg)
1826           .addReg(ScratchReg);
1827       }
1828     } else {
1829       createTailCallBranchInstr(MBB);
1830     }
1831   }
1832 }
1833 
1834 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1835   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1836 
1837   // If we got this far a first terminator should exist.
1838   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1839 
1840   DebugLoc dl = MBBI->getDebugLoc();
1841   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1842 
1843   // Create branch instruction for pseudo tail call return instruction.
1844   // The TCRETURNdi variants are direct calls. Valid targets for those are
1845   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1846   // since we can tail call external functions with PC-Rel (i.e. we don't need
1847   // to worry about different TOC pointers). Some of the external functions will
1848   // be MO_GlobalAddress while others like memcpy for example, are going to
1849   // be MO_ExternalSymbol.
1850   unsigned RetOpcode = MBBI->getOpcode();
1851   if (RetOpcode == PPC::TCRETURNdi) {
1852     MBBI = MBB.getLastNonDebugInstr();
1853     MachineOperand &JumpTarget = MBBI->getOperand(0);
1854     if (JumpTarget.isGlobal())
1855       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1856         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1857     else if (JumpTarget.isSymbol())
1858       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1859         addExternalSymbol(JumpTarget.getSymbolName());
1860     else
1861       llvm_unreachable("Expecting Global or External Symbol");
1862   } else if (RetOpcode == PPC::TCRETURNri) {
1863     MBBI = MBB.getLastNonDebugInstr();
1864     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1865     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1866   } else if (RetOpcode == PPC::TCRETURNai) {
1867     MBBI = MBB.getLastNonDebugInstr();
1868     MachineOperand &JumpTarget = MBBI->getOperand(0);
1869     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1870   } else if (RetOpcode == PPC::TCRETURNdi8) {
1871     MBBI = MBB.getLastNonDebugInstr();
1872     MachineOperand &JumpTarget = MBBI->getOperand(0);
1873     if (JumpTarget.isGlobal())
1874       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1875         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1876     else if (JumpTarget.isSymbol())
1877       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1878         addExternalSymbol(JumpTarget.getSymbolName());
1879     else
1880       llvm_unreachable("Expecting Global or External Symbol");
1881   } else if (RetOpcode == PPC::TCRETURNri8) {
1882     MBBI = MBB.getLastNonDebugInstr();
1883     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1884     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1885   } else if (RetOpcode == PPC::TCRETURNai8) {
1886     MBBI = MBB.getLastNonDebugInstr();
1887     MachineOperand &JumpTarget = MBBI->getOperand(0);
1888     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1889   }
1890 }
1891 
1892 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1893                                             BitVector &SavedRegs,
1894                                             RegScavenger *RS) const {
1895   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1896 
1897   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1898 
1899   //  Save and clear the LR state.
1900   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1901   unsigned LR = RegInfo->getRARegister();
1902   FI->setMustSaveLR(MustSaveLR(MF, LR));
1903   SavedRegs.reset(LR);
1904 
1905   //  Save R31 if necessary
1906   int FPSI = FI->getFramePointerSaveIndex();
1907   const bool isPPC64 = Subtarget.isPPC64();
1908   MachineFrameInfo &MFI = MF.getFrameInfo();
1909 
1910   // If the frame pointer save index hasn't been defined yet.
1911   if (!FPSI && needsFP(MF)) {
1912     // Find out what the fix offset of the frame pointer save area.
1913     int FPOffset = getFramePointerSaveOffset();
1914     // Allocate the frame index for frame pointer save area.
1915     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1916     // Save the result.
1917     FI->setFramePointerSaveIndex(FPSI);
1918   }
1919 
1920   int BPSI = FI->getBasePointerSaveIndex();
1921   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1922     int BPOffset = getBasePointerSaveOffset();
1923     // Allocate the frame index for the base pointer save area.
1924     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1925     // Save the result.
1926     FI->setBasePointerSaveIndex(BPSI);
1927   }
1928 
1929   // Reserve stack space for the PIC Base register (R30).
1930   // Only used in SVR4 32-bit.
1931   if (FI->usesPICBase()) {
1932     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1933     FI->setPICBasePointerSaveIndex(PBPSI);
1934   }
1935 
1936   // Make sure we don't explicitly spill r31, because, for example, we have
1937   // some inline asm which explicitly clobbers it, when we otherwise have a
1938   // frame pointer and are using r31's spill slot for the prologue/epilogue
1939   // code. Same goes for the base pointer and the PIC base register.
1940   if (needsFP(MF))
1941     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1942   if (RegInfo->hasBasePointer(MF))
1943     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1944   if (FI->usesPICBase())
1945     SavedRegs.reset(PPC::R30);
1946 
1947   // Reserve stack space to move the linkage area to in case of a tail call.
1948   int TCSPDelta = 0;
1949   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1950       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1951     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1952   }
1953 
1954   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
1955   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
1956   // object at the offset of the CR-save slot in the linkage area. The actual
1957   // save and restore of the condition register will be created as part of the
1958   // prologue and epilogue insertion, but the FixedStack object is needed to
1959   // keep the CalleSavedInfo valid.
1960   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
1961        SavedRegs.test(PPC::CR4))) {
1962     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
1963     const int64_t SpillOffset =
1964         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
1965     int FrameIdx =
1966         MFI.CreateFixedObject(SpillSize, SpillOffset,
1967                               /* IsImmutable */ true, /* IsAliased */ false);
1968     FI->setCRSpillFrameIndex(FrameIdx);
1969   }
1970 }
1971 
1972 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1973                                                        RegScavenger *RS) const {
1974   // Get callee saved register information.
1975   MachineFrameInfo &MFI = MF.getFrameInfo();
1976   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1977 
1978   // If the function is shrink-wrapped, and if the function has a tail call, the
1979   // tail call might not be in the new RestoreBlock, so real branch instruction
1980   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1981   // RestoreBlock. So we handle this case here.
1982   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1983     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1984     for (MachineBasicBlock &MBB : MF) {
1985       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1986         createTailCallBranchInstr(MBB);
1987     }
1988   }
1989 
1990   // Early exit if no callee saved registers are modified!
1991   if (CSI.empty() && !needsFP(MF)) {
1992     addScavengingSpillSlot(MF, RS);
1993     return;
1994   }
1995 
1996   unsigned MinGPR = PPC::R31;
1997   unsigned MinG8R = PPC::X31;
1998   unsigned MinFPR = PPC::F31;
1999   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2000 
2001   bool HasGPSaveArea = false;
2002   bool HasG8SaveArea = false;
2003   bool HasFPSaveArea = false;
2004   bool HasVRSaveArea = false;
2005 
2006   SmallVector<CalleeSavedInfo, 18> GPRegs;
2007   SmallVector<CalleeSavedInfo, 18> G8Regs;
2008   SmallVector<CalleeSavedInfo, 18> FPRegs;
2009   SmallVector<CalleeSavedInfo, 18> VRegs;
2010 
2011   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2012     unsigned Reg = CSI[i].getReg();
2013     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2014             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2015            "Not expecting to try to spill R2 in a function that must save TOC");
2016     if (PPC::GPRCRegClass.contains(Reg)) {
2017       HasGPSaveArea = true;
2018 
2019       GPRegs.push_back(CSI[i]);
2020 
2021       if (Reg < MinGPR) {
2022         MinGPR = Reg;
2023       }
2024     } else if (PPC::G8RCRegClass.contains(Reg)) {
2025       HasG8SaveArea = true;
2026 
2027       G8Regs.push_back(CSI[i]);
2028 
2029       if (Reg < MinG8R) {
2030         MinG8R = Reg;
2031       }
2032     } else if (PPC::F8RCRegClass.contains(Reg)) {
2033       HasFPSaveArea = true;
2034 
2035       FPRegs.push_back(CSI[i]);
2036 
2037       if (Reg < MinFPR) {
2038         MinFPR = Reg;
2039       }
2040     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2041                PPC::CRRCRegClass.contains(Reg)) {
2042       ; // do nothing, as we already know whether CRs are spilled
2043     } else if (PPC::VRRCRegClass.contains(Reg) ||
2044                PPC::SPERCRegClass.contains(Reg)) {
2045       // Altivec and SPE are mutually exclusive, but have the same stack
2046       // alignment requirements, so overload the save area for both cases.
2047       HasVRSaveArea = true;
2048 
2049       VRegs.push_back(CSI[i]);
2050 
2051       if (Reg < MinVR) {
2052         MinVR = Reg;
2053       }
2054     } else {
2055       llvm_unreachable("Unknown RegisterClass!");
2056     }
2057   }
2058 
2059   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2060   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2061 
2062   int64_t LowerBound = 0;
2063 
2064   // Take into account stack space reserved for tail calls.
2065   int TCSPDelta = 0;
2066   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2067       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2068     LowerBound = TCSPDelta;
2069   }
2070 
2071   // The Floating-point register save area is right below the back chain word
2072   // of the previous stack frame.
2073   if (HasFPSaveArea) {
2074     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2075       int FI = FPRegs[i].getFrameIdx();
2076 
2077       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2078     }
2079 
2080     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2081   }
2082 
2083   // Check whether the frame pointer register is allocated. If so, make sure it
2084   // is spilled to the correct offset.
2085   if (needsFP(MF)) {
2086     int FI = PFI->getFramePointerSaveIndex();
2087     assert(FI && "No Frame Pointer Save Slot!");
2088     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2089     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2090     HasGPSaveArea = true;
2091   }
2092 
2093   if (PFI->usesPICBase()) {
2094     int FI = PFI->getPICBasePointerSaveIndex();
2095     assert(FI && "No PIC Base Pointer Save Slot!");
2096     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2097 
2098     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2099     HasGPSaveArea = true;
2100   }
2101 
2102   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2103   if (RegInfo->hasBasePointer(MF)) {
2104     int FI = PFI->getBasePointerSaveIndex();
2105     assert(FI && "No Base Pointer Save Slot!");
2106     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2107 
2108     Register BP = RegInfo->getBaseRegister(MF);
2109     if (PPC::G8RCRegClass.contains(BP)) {
2110       MinG8R = std::min<unsigned>(MinG8R, BP);
2111       HasG8SaveArea = true;
2112     } else if (PPC::GPRCRegClass.contains(BP)) {
2113       MinGPR = std::min<unsigned>(MinGPR, BP);
2114       HasGPSaveArea = true;
2115     }
2116   }
2117 
2118   // General register save area starts right below the Floating-point
2119   // register save area.
2120   if (HasGPSaveArea || HasG8SaveArea) {
2121     // Move general register save area spill slots down, taking into account
2122     // the size of the Floating-point register save area.
2123     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2124       if (!GPRegs[i].isSpilledToReg()) {
2125         int FI = GPRegs[i].getFrameIdx();
2126         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2127       }
2128     }
2129 
2130     // Move general register save area spill slots down, taking into account
2131     // the size of the Floating-point register save area.
2132     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2133       if (!G8Regs[i].isSpilledToReg()) {
2134         int FI = G8Regs[i].getFrameIdx();
2135         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2136       }
2137     }
2138 
2139     unsigned MinReg =
2140       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2141                          TRI->getEncodingValue(MinG8R));
2142 
2143     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2144     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2145   }
2146 
2147   // For 32-bit only, the CR save area is below the general register
2148   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2149   // to the stack pointer and hence does not need an adjustment here.
2150   // Only CR2 (the first nonvolatile spilled) has an associated frame
2151   // index so that we have a single uniform save area.
2152   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2153     // Adjust the frame index of the CR spill slot.
2154     for (const auto &CSInfo : CSI) {
2155       if (CSInfo.getReg() == PPC::CR2) {
2156         int FI = CSInfo.getFrameIdx();
2157         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2158         break;
2159       }
2160     }
2161 
2162     LowerBound -= 4; // The CR save area is always 4 bytes long.
2163   }
2164 
2165   // Both Altivec and SPE have the same alignment and padding requirements
2166   // within the stack frame.
2167   if (HasVRSaveArea) {
2168     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2169     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2170     // we are using negative number here (the stack grows downward). We should
2171     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2172     // is the alignment size ( n = 16 here) and y is the size after aligning.
2173     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2174     LowerBound &= ~(15);
2175 
2176     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2177       int FI = VRegs[i].getFrameIdx();
2178 
2179       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2180     }
2181   }
2182 
2183   addScavengingSpillSlot(MF, RS);
2184 }
2185 
2186 void
2187 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2188                                          RegScavenger *RS) const {
2189   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2190   // a large stack, which will require scavenging a register to materialize a
2191   // large offset.
2192 
2193   // We need to have a scavenger spill slot for spills if the frame size is
2194   // large. In case there is no free register for large-offset addressing,
2195   // this slot is used for the necessary emergency spill. Also, we need the
2196   // slot for dynamic stack allocations.
2197 
2198   // The scavenger might be invoked if the frame offset does not fit into
2199   // the 16-bit immediate. We don't know the complete frame size here
2200   // because we've not yet computed callee-saved register spills or the
2201   // needed alignment padding.
2202   unsigned StackSize = determineFrameLayout(MF, true);
2203   MachineFrameInfo &MFI = MF.getFrameInfo();
2204   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2205       (hasSpills(MF) && !isInt<16>(StackSize))) {
2206     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2207     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2208     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2209     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2210     unsigned Size = TRI.getSpillSize(RC);
2211     Align Alignment = TRI.getSpillAlign(RC);
2212     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2213 
2214     // Might we have over-aligned allocas?
2215     bool HasAlVars =
2216         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2217 
2218     // These kinds of spills might need two registers.
2219     if (spillsCR(MF) || HasAlVars)
2220       RS->addScavengingFrameIndex(
2221           MFI.CreateStackObject(Size, Alignment, false));
2222   }
2223 }
2224 
2225 // This function checks if a callee saved gpr can be spilled to a volatile
2226 // vector register. This occurs for leaf functions when the option
2227 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2228 // which were not spilled to vectors, return false so the target independent
2229 // code can handle them by assigning a FrameIdx to a stack slot.
2230 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2231     MachineFunction &MF, const TargetRegisterInfo *TRI,
2232     std::vector<CalleeSavedInfo> &CSI) const {
2233 
2234   if (CSI.empty())
2235     return true; // Early exit if no callee saved registers are modified!
2236 
2237   // Early exit if cannot spill gprs to volatile vector registers.
2238   MachineFrameInfo &MFI = MF.getFrameInfo();
2239   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2240     return false;
2241 
2242   // Build a BitVector of VSRs that can be used for spilling GPRs.
2243   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2244   BitVector BVCalleeSaved(TRI->getNumRegs());
2245   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2246   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2247   for (unsigned i = 0; CSRegs[i]; ++i)
2248     BVCalleeSaved.set(CSRegs[i]);
2249 
2250   for (unsigned Reg : BVAllocatable.set_bits()) {
2251     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2252     // used in the function.
2253     if (BVCalleeSaved[Reg] ||
2254         (!PPC::F8RCRegClass.contains(Reg) &&
2255          !PPC::VFRCRegClass.contains(Reg)) ||
2256         (MF.getRegInfo().isPhysRegUsed(Reg)))
2257       BVAllocatable.reset(Reg);
2258   }
2259 
2260   bool AllSpilledToReg = true;
2261   for (auto &CS : CSI) {
2262     if (BVAllocatable.none())
2263       return false;
2264 
2265     unsigned Reg = CS.getReg();
2266     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2267       AllSpilledToReg = false;
2268       continue;
2269     }
2270 
2271     unsigned VolatileVFReg = BVAllocatable.find_first();
2272     if (VolatileVFReg < BVAllocatable.size()) {
2273       CS.setDstReg(VolatileVFReg);
2274       BVAllocatable.reset(VolatileVFReg);
2275     } else {
2276       AllSpilledToReg = false;
2277     }
2278   }
2279   return AllSpilledToReg;
2280 }
2281 
2282 bool PPCFrameLowering::spillCalleeSavedRegisters(
2283     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2284     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2285 
2286   MachineFunction *MF = MBB.getParent();
2287   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2288   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2289   bool MustSaveTOC = FI->mustSaveTOC();
2290   DebugLoc DL;
2291   bool CRSpilled = false;
2292   MachineInstrBuilder CRMIB;
2293 
2294   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2295     unsigned Reg = CSI[i].getReg();
2296 
2297     // CR2 through CR4 are the nonvolatile CR fields.
2298     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2299 
2300     // Add the callee-saved register as live-in; it's killed at the spill.
2301     // Do not do this for callee-saved registers that are live-in to the
2302     // function because they will already be marked live-in and this will be
2303     // adding it for a second time. It is an error to add the same register
2304     // to the set more than once.
2305     const MachineRegisterInfo &MRI = MF->getRegInfo();
2306     bool IsLiveIn = MRI.isLiveIn(Reg);
2307     if (!IsLiveIn)
2308        MBB.addLiveIn(Reg);
2309 
2310     if (CRSpilled && IsCRField) {
2311       CRMIB.addReg(Reg, RegState::ImplicitKill);
2312       continue;
2313     }
2314 
2315     // The actual spill will happen in the prologue.
2316     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2317       continue;
2318 
2319     // Insert the spill to the stack frame.
2320     if (IsCRField) {
2321       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2322       if (!Subtarget.is32BitELFABI()) {
2323         // The actual spill will happen at the start of the prologue.
2324         FuncInfo->addMustSaveCR(Reg);
2325       } else {
2326         CRSpilled = true;
2327         FuncInfo->setSpillsCR();
2328 
2329         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2330         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2331         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2332                   .addReg(Reg, RegState::ImplicitKill);
2333 
2334         MBB.insert(MI, CRMIB);
2335         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2336                                          .addReg(PPC::R12,
2337                                                  getKillRegState(true)),
2338                                          CSI[i].getFrameIdx()));
2339       }
2340     } else {
2341       if (CSI[i].isSpilledToReg()) {
2342         NumPESpillVSR++;
2343         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2344           .addReg(Reg, getKillRegState(true));
2345       } else {
2346         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2347         // Use !IsLiveIn for the kill flag.
2348         // We do not want to kill registers that are live in this function
2349         // before their use because they will become undefined registers.
2350         // Functions without NoUnwind need to preserve the order of elements in
2351         // saved vector registers.
2352         if (Subtarget.needsSwapsForVSXMemOps() &&
2353             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2354           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2355                                        CSI[i].getFrameIdx(), RC, TRI);
2356         else
2357           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2358                                   RC, TRI);
2359       }
2360     }
2361   }
2362   return true;
2363 }
2364 
2365 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2366                        bool CR4Spilled, MachineBasicBlock &MBB,
2367                        MachineBasicBlock::iterator MI,
2368                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2369 
2370   MachineFunction *MF = MBB.getParent();
2371   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2372   DebugLoc DL;
2373   unsigned MoveReg = PPC::R12;
2374 
2375   // 32-bit:  FP-relative
2376   MBB.insert(MI,
2377              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2378                                CSI[CSIIndex].getFrameIdx()));
2379 
2380   unsigned RestoreOp = PPC::MTOCRF;
2381   if (CR2Spilled)
2382     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2383                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2384 
2385   if (CR3Spilled)
2386     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2387                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2388 
2389   if (CR4Spilled)
2390     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2391                .addReg(MoveReg, getKillRegState(true)));
2392 }
2393 
2394 MachineBasicBlock::iterator PPCFrameLowering::
2395 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2396                               MachineBasicBlock::iterator I) const {
2397   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2398   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2399       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2400     // Add (actually subtract) back the amount the callee popped on return.
2401     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2402       bool is64Bit = Subtarget.isPPC64();
2403       CalleeAmt *= -1;
2404       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2405       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2406       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2407       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2408       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2409       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2410       const DebugLoc &dl = I->getDebugLoc();
2411 
2412       if (isInt<16>(CalleeAmt)) {
2413         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2414           .addReg(StackReg, RegState::Kill)
2415           .addImm(CalleeAmt);
2416       } else {
2417         MachineBasicBlock::iterator MBBI = I;
2418         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2419           .addImm(CalleeAmt >> 16);
2420         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2421           .addReg(TmpReg, RegState::Kill)
2422           .addImm(CalleeAmt & 0xFFFF);
2423         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2424           .addReg(StackReg, RegState::Kill)
2425           .addReg(TmpReg);
2426       }
2427     }
2428   }
2429   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2430   return MBB.erase(I);
2431 }
2432 
2433 static bool isCalleeSavedCR(unsigned Reg) {
2434   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2435 }
2436 
2437 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2438     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2439     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2440   MachineFunction *MF = MBB.getParent();
2441   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2442   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2443   bool MustSaveTOC = FI->mustSaveTOC();
2444   bool CR2Spilled = false;
2445   bool CR3Spilled = false;
2446   bool CR4Spilled = false;
2447   unsigned CSIIndex = 0;
2448 
2449   // Initialize insertion-point logic; we will be restoring in reverse
2450   // order of spill.
2451   MachineBasicBlock::iterator I = MI, BeforeI = I;
2452   bool AtStart = I == MBB.begin();
2453 
2454   if (!AtStart)
2455     --BeforeI;
2456 
2457   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2458     unsigned Reg = CSI[i].getReg();
2459 
2460     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2461       continue;
2462 
2463     // Restore of callee saved condition register field is handled during
2464     // epilogue insertion.
2465     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2466       continue;
2467 
2468     if (Reg == PPC::CR2) {
2469       CR2Spilled = true;
2470       // The spill slot is associated only with CR2, which is the
2471       // first nonvolatile spilled.  Save it here.
2472       CSIIndex = i;
2473       continue;
2474     } else if (Reg == PPC::CR3) {
2475       CR3Spilled = true;
2476       continue;
2477     } else if (Reg == PPC::CR4) {
2478       CR4Spilled = true;
2479       continue;
2480     } else {
2481       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2482       // least one CR register, restore all spilled CRs together.
2483       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2484         bool is31 = needsFP(*MF);
2485         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2486                    CSIIndex);
2487         CR2Spilled = CR3Spilled = CR4Spilled = false;
2488       }
2489 
2490       if (CSI[i].isSpilledToReg()) {
2491         DebugLoc DL;
2492         NumPEReloadVSR++;
2493         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2494             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2495       } else {
2496        // Default behavior for non-CR saves.
2497         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2498 
2499         // Functions without NoUnwind need to preserve the order of elements in
2500         // saved vector registers.
2501         if (Subtarget.needsSwapsForVSXMemOps() &&
2502             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2503           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2504                                         TRI);
2505         else
2506           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2507 
2508         assert(I != MBB.begin() &&
2509                "loadRegFromStackSlot didn't insert any code!");
2510       }
2511     }
2512 
2513     // Insert in reverse order.
2514     if (AtStart)
2515       I = MBB.begin();
2516     else {
2517       I = BeforeI;
2518       ++I;
2519     }
2520   }
2521 
2522   // If we haven't yet spilled the CRs, do so now.
2523   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2524     assert(Subtarget.is32BitELFABI() &&
2525            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2526     bool is31 = needsFP(*MF);
2527     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2528   }
2529 
2530   return true;
2531 }
2532 
2533 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2534   return TOCSaveOffset;
2535 }
2536 
2537 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2538   return FramePointerSaveOffset;
2539 }
2540 
2541 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2542   return BasePointerSaveOffset;
2543 }
2544 
2545 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2546   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2547     return false;
2548   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2549           MF.getSubtarget<PPCSubtarget>().isPPC64());
2550 }
2551