xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 4824e7fd18a1223177218d4aec1b3c6c5c4a444e)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MCTargetDesc/PPCPredicates.h"
14 #include "PPCFrameLowering.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/RegisterScavenging.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/Target/TargetOptions.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "framelowering"
33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
35 STATISTIC(NumPrologProbed, "Number of prologues probed");
36 
37 static cl::opt<bool>
38 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
39                      cl::desc("Enable spills in prologue to vector registers."),
40                      cl::init(false), cl::Hidden);
41 
42 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
43   if (STI.isAIXABI())
44     return STI.isPPC64() ? 16 : 8;
45   // SVR4 ABI:
46   return STI.isPPC64() ? 16 : 4;
47 }
48 
49 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isAIXABI())
51     return STI.isPPC64() ? 40 : 20;
52   return STI.isELFv2ABI() ? 24 : 40;
53 }
54 
55 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
56   // First slot in the general register save area.
57   return STI.isPPC64() ? -8U : -4U;
58 }
59 
60 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
61   if (STI.isAIXABI() || STI.isPPC64())
62     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
63 
64   // 32-bit SVR4 ABI:
65   return 8;
66 }
67 
68 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
69   // Third slot in the general purpose register save area.
70   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
71     return -12U;
72 
73   // Second slot in the general purpose register save area.
74   return STI.isPPC64() ? -16U : -8U;
75 }
76 
77 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
78   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
79 }
80 
81 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
82     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
83                           STI.getPlatformStackAlignment(), 0),
84       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
85       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
86       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
87       LinkageSize(computeLinkageSize(Subtarget)),
88       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
89       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
90 
91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
93     unsigned &NumEntries) const {
94 
95 // Floating-point register save area offsets.
96 #define CALLEE_SAVED_FPRS \
97       {PPC::F31, -8},     \
98       {PPC::F30, -16},    \
99       {PPC::F29, -24},    \
100       {PPC::F28, -32},    \
101       {PPC::F27, -40},    \
102       {PPC::F26, -48},    \
103       {PPC::F25, -56},    \
104       {PPC::F24, -64},    \
105       {PPC::F23, -72},    \
106       {PPC::F22, -80},    \
107       {PPC::F21, -88},    \
108       {PPC::F20, -96},    \
109       {PPC::F19, -104},   \
110       {PPC::F18, -112},   \
111       {PPC::F17, -120},   \
112       {PPC::F16, -128},   \
113       {PPC::F15, -136},   \
114       {PPC::F14, -144}
115 
116 // 32-bit general purpose register save area offsets shared by ELF and
117 // AIX. AIX has an extra CSR with r13.
118 #define CALLEE_SAVED_GPRS32 \
119       {PPC::R31, -4},       \
120       {PPC::R30, -8},       \
121       {PPC::R29, -12},      \
122       {PPC::R28, -16},      \
123       {PPC::R27, -20},      \
124       {PPC::R26, -24},      \
125       {PPC::R25, -28},      \
126       {PPC::R24, -32},      \
127       {PPC::R23, -36},      \
128       {PPC::R22, -40},      \
129       {PPC::R21, -44},      \
130       {PPC::R20, -48},      \
131       {PPC::R19, -52},      \
132       {PPC::R18, -56},      \
133       {PPC::R17, -60},      \
134       {PPC::R16, -64},      \
135       {PPC::R15, -68},      \
136       {PPC::R14, -72}
137 
138 // 64-bit general purpose register save area offsets.
139 #define CALLEE_SAVED_GPRS64 \
140       {PPC::X31, -8},       \
141       {PPC::X30, -16},      \
142       {PPC::X29, -24},      \
143       {PPC::X28, -32},      \
144       {PPC::X27, -40},      \
145       {PPC::X26, -48},      \
146       {PPC::X25, -56},      \
147       {PPC::X24, -64},      \
148       {PPC::X23, -72},      \
149       {PPC::X22, -80},      \
150       {PPC::X21, -88},      \
151       {PPC::X20, -96},      \
152       {PPC::X19, -104},     \
153       {PPC::X18, -112},     \
154       {PPC::X17, -120},     \
155       {PPC::X16, -128},     \
156       {PPC::X15, -136},     \
157       {PPC::X14, -144}
158 
159 // Vector register save area offsets.
160 #define CALLEE_SAVED_VRS \
161       {PPC::V31, -16},   \
162       {PPC::V30, -32},   \
163       {PPC::V29, -48},   \
164       {PPC::V28, -64},   \
165       {PPC::V27, -80},   \
166       {PPC::V26, -96},   \
167       {PPC::V25, -112},  \
168       {PPC::V24, -128},  \
169       {PPC::V23, -144},  \
170       {PPC::V22, -160},  \
171       {PPC::V21, -176},  \
172       {PPC::V20, -192}
173 
174   // Note that the offsets here overlap, but this is fixed up in
175   // processFunctionBeforeFrameFinalized.
176 
177   static const SpillSlot ELFOffsets32[] = {
178       CALLEE_SAVED_FPRS,
179       CALLEE_SAVED_GPRS32,
180 
181       // CR save area offset.  We map each of the nonvolatile CR fields
182       // to the slot for CR2, which is the first of the nonvolatile CR
183       // fields to be assigned, so that we only allocate one save slot.
184       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
185       {PPC::CR2, -4},
186 
187       // VRSAVE save area offset.
188       {PPC::VRSAVE, -4},
189 
190       CALLEE_SAVED_VRS,
191 
192       // SPE register save area (overlaps Vector save area).
193       {PPC::S31, -8},
194       {PPC::S30, -16},
195       {PPC::S29, -24},
196       {PPC::S28, -32},
197       {PPC::S27, -40},
198       {PPC::S26, -48},
199       {PPC::S25, -56},
200       {PPC::S24, -64},
201       {PPC::S23, -72},
202       {PPC::S22, -80},
203       {PPC::S21, -88},
204       {PPC::S20, -96},
205       {PPC::S19, -104},
206       {PPC::S18, -112},
207       {PPC::S17, -120},
208       {PPC::S16, -128},
209       {PPC::S15, -136},
210       {PPC::S14, -144}};
211 
212   static const SpillSlot ELFOffsets64[] = {
213       CALLEE_SAVED_FPRS,
214       CALLEE_SAVED_GPRS64,
215 
216       // VRSAVE save area offset.
217       {PPC::VRSAVE, -4},
218       CALLEE_SAVED_VRS
219   };
220 
221   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
222                                            CALLEE_SAVED_GPRS32,
223                                            // Add AIX's extra CSR.
224                                            {PPC::R13, -76},
225                                            CALLEE_SAVED_VRS};
226 
227   static const SpillSlot AIXOffsets64[] = {
228       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
229 
230   if (Subtarget.is64BitELFABI()) {
231     NumEntries = array_lengthof(ELFOffsets64);
232     return ELFOffsets64;
233   }
234 
235   if (Subtarget.is32BitELFABI()) {
236     NumEntries = array_lengthof(ELFOffsets32);
237     return ELFOffsets32;
238   }
239 
240   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
241 
242   if (Subtarget.isPPC64()) {
243     NumEntries = array_lengthof(AIXOffsets64);
244     return AIXOffsets64;
245   }
246 
247   NumEntries = array_lengthof(AIXOffsets32);
248   return AIXOffsets32;
249 }
250 
251 static bool spillsCR(const MachineFunction &MF) {
252   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
253   return FuncInfo->isCRSpilled();
254 }
255 
256 static bool hasSpills(const MachineFunction &MF) {
257   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
258   return FuncInfo->hasSpills();
259 }
260 
261 static bool hasNonRISpills(const MachineFunction &MF) {
262   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
263   return FuncInfo->hasNonRISpills();
264 }
265 
266 /// MustSaveLR - Return true if this function requires that we save the LR
267 /// register onto the stack in the prolog and restore it in the epilog of the
268 /// function.
269 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
270   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
271 
272   // We need a save/restore of LR if there is any def of LR (which is
273   // defined by calls, including the PIC setup sequence), or if there is
274   // some use of the LR stack slot (e.g. for builtin_return_address).
275   // (LR comes in 32 and 64 bit versions.)
276   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
277   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
278 }
279 
280 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
281 /// call frame size. Update the MachineFunction object with the stack size.
282 uint64_t
283 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
284                                                 bool UseEstimate) const {
285   unsigned NewMaxCallFrameSize = 0;
286   uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
287                                             &NewMaxCallFrameSize);
288   MF.getFrameInfo().setStackSize(FrameSize);
289   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
290   return FrameSize;
291 }
292 
293 /// determineFrameLayout - Determine the size of the frame and maximum call
294 /// frame size.
295 uint64_t
296 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
297                                        bool UseEstimate,
298                                        unsigned *NewMaxCallFrameSize) const {
299   const MachineFrameInfo &MFI = MF.getFrameInfo();
300   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
301 
302   // Get the number of bytes to allocate from the FrameInfo
303   uint64_t FrameSize =
304     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
305 
306   // Get stack alignments. The frame must be aligned to the greatest of these:
307   Align TargetAlign = getStackAlign(); // alignment required per the ABI
308   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
309   Align Alignment = std::max(TargetAlign, MaxAlign);
310 
311   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
312 
313   unsigned LR = RegInfo->getRARegister();
314   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
315   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
316                        !MFI.adjustsStack() &&       // No calls.
317                        !MustSaveLR(MF, LR) &&       // No need to save LR.
318                        !FI->mustSaveTOC() &&        // No need to save TOC.
319                        !RegInfo->hasBasePointer(MF); // No special alignment.
320 
321   // Note: for PPC32 SVR4ABI, we can still generate stackless
322   // code if all local vars are reg-allocated.
323   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
324 
325   // Check whether we can skip adjusting the stack pointer (by using red zone)
326   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
327     // No need for frame
328     return 0;
329   }
330 
331   // Get the maximum call frame size of all the calls.
332   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
333 
334   // Maximum call frame needs to be at least big enough for linkage area.
335   unsigned minCallFrameSize = getLinkageSize();
336   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
337 
338   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
339   // that allocations will be aligned.
340   if (MFI.hasVarSizedObjects())
341     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
342 
343   // Update the new max call frame size if the caller passes in a valid pointer.
344   if (NewMaxCallFrameSize)
345     *NewMaxCallFrameSize = maxCallFrameSize;
346 
347   // Include call frame size in total.
348   FrameSize += maxCallFrameSize;
349 
350   // Make sure the frame is aligned.
351   FrameSize = alignTo(FrameSize, Alignment);
352 
353   return FrameSize;
354 }
355 
356 // hasFP - Return true if the specified function actually has a dedicated frame
357 // pointer register.
358 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
359   const MachineFrameInfo &MFI = MF.getFrameInfo();
360   // FIXME: This is pretty much broken by design: hasFP() might be called really
361   // early, before the stack layout was calculated and thus hasFP() might return
362   // true or false here depending on the time of call.
363   return (MFI.getStackSize()) && needsFP(MF);
364 }
365 
366 // needsFP - Return true if the specified function should have a dedicated frame
367 // pointer register.  This is true if the function has variable sized allocas or
368 // if frame pointer elimination is disabled.
369 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
370   const MachineFrameInfo &MFI = MF.getFrameInfo();
371 
372   // Naked functions have no stack frame pushed, so we don't have a frame
373   // pointer.
374   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
375     return false;
376 
377   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
378          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
379          MF.exposesReturnsTwice() ||
380          (MF.getTarget().Options.GuaranteedTailCallOpt &&
381           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
382 }
383 
384 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
385   bool is31 = needsFP(MF);
386   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
387   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
388 
389   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
390   bool HasBP = RegInfo->hasBasePointer(MF);
391   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
392   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
393 
394   for (MachineBasicBlock &MBB : MF)
395     for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
396       --MBBI;
397       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
398         MachineOperand &MO = MBBI->getOperand(I);
399         if (!MO.isReg())
400           continue;
401 
402         switch (MO.getReg()) {
403         case PPC::FP:
404           MO.setReg(FPReg);
405           break;
406         case PPC::FP8:
407           MO.setReg(FP8Reg);
408           break;
409         case PPC::BP:
410           MO.setReg(BPReg);
411           break;
412         case PPC::BP8:
413           MO.setReg(BP8Reg);
414           break;
415 
416         }
417       }
418     }
419 }
420 
421 /*  This function will do the following:
422     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
423       respectively (defaults recommended by the ABI) and return true
424     - If MBB is not an entry block, initialize the register scavenger and look
425       for available registers.
426     - If the defaults (R0/R12) are available, return true
427     - If TwoUniqueRegsRequired is set to true, it looks for two unique
428       registers. Otherwise, look for a single available register.
429       - If the required registers are found, set SR1 and SR2 and return true.
430       - If the required registers are not found, set SR2 or both SR1 and SR2 to
431         PPC::NoRegister and return false.
432 
433     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
434     is not set, this function will attempt to find two different registers, but
435     still return true if only one register is available (and set SR1 == SR2).
436 */
437 bool
438 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
439                                       bool UseAtEnd,
440                                       bool TwoUniqueRegsRequired,
441                                       Register *SR1,
442                                       Register *SR2) const {
443   RegScavenger RS;
444   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
445   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
446 
447   // Set the defaults for the two scratch registers.
448   if (SR1)
449     *SR1 = R0;
450 
451   if (SR2) {
452     assert (SR1 && "Asking for the second scratch register but not the first?");
453     *SR2 = R12;
454   }
455 
456   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
457   if ((UseAtEnd && MBB->isReturnBlock()) ||
458       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
459     return true;
460 
461   RS.enterBasicBlock(*MBB);
462 
463   if (UseAtEnd && !MBB->empty()) {
464     // The scratch register will be used at the end of the block, so must
465     // consider all registers used within the block
466 
467     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
468     // If no terminator, back iterator up to previous instruction.
469     if (MBBI == MBB->end())
470       MBBI = std::prev(MBBI);
471 
472     if (MBBI != MBB->begin())
473       RS.forward(MBBI);
474   }
475 
476   // If the two registers are available, we're all good.
477   // Note that we only return here if both R0 and R12 are available because
478   // although the function may not require two unique registers, it may benefit
479   // from having two so we should try to provide them.
480   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
481     return true;
482 
483   // Get the list of callee-saved registers for the target.
484   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
485   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
486 
487   // Get all the available registers in the block.
488   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
489                                      &PPC::GPRCRegClass);
490 
491   // We shouldn't use callee-saved registers as scratch registers as they may be
492   // available when looking for a candidate block for shrink wrapping but not
493   // available when the actual prologue/epilogue is being emitted because they
494   // were added as live-in to the prologue block by PrologueEpilogueInserter.
495   for (int i = 0; CSRegs[i]; ++i)
496     BV.reset(CSRegs[i]);
497 
498   // Set the first scratch register to the first available one.
499   if (SR1) {
500     int FirstScratchReg = BV.find_first();
501     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
502   }
503 
504   // If there is another one available, set the second scratch register to that.
505   // Otherwise, set it to either PPC::NoRegister if this function requires two
506   // or to whatever SR1 is set to if this function doesn't require two.
507   if (SR2) {
508     int SecondScratchReg = BV.find_next(*SR1);
509     if (SecondScratchReg != -1)
510       *SR2 = SecondScratchReg;
511     else
512       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
513   }
514 
515   // Now that we've done our best to provide both registers, double check
516   // whether we were unable to provide enough.
517   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
518     return false;
519 
520   return true;
521 }
522 
523 // We need a scratch register for spilling LR and for spilling CR. By default,
524 // we use two scratch registers to hide latency. However, if only one scratch
525 // register is available, we can adjust for that by not overlapping the spill
526 // code. However, if we need to realign the stack (i.e. have a base pointer)
527 // and the stack frame is large, we need two scratch registers.
528 // Also, stack probe requires two scratch registers, one for old sp, one for
529 // large frame and large probe size.
530 bool
531 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
532   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
533   MachineFunction &MF = *(MBB->getParent());
534   bool HasBP = RegInfo->hasBasePointer(MF);
535   unsigned FrameSize = determineFrameLayout(MF);
536   int NegFrameSize = -FrameSize;
537   bool IsLargeFrame = !isInt<16>(NegFrameSize);
538   MachineFrameInfo &MFI = MF.getFrameInfo();
539   Align MaxAlign = MFI.getMaxAlign();
540   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
541   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
542 
543   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
544          TLI.hasInlineStackProbe(MF);
545 }
546 
547 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
548   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
549 
550   return findScratchRegister(TmpMBB, false,
551                              twoUniqueScratchRegsRequired(TmpMBB));
552 }
553 
554 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
555   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
556 
557   return findScratchRegister(TmpMBB, true);
558 }
559 
560 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
561   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
562   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
563 
564   // Abort if there is no register info or function info.
565   if (!RegInfo || !FI)
566     return false;
567 
568   // Only move the stack update on ELFv2 ABI and PPC64.
569   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
570     return false;
571 
572   // Check the frame size first and return false if it does not fit the
573   // requirements.
574   // We need a non-zero frame size as well as a frame that will fit in the red
575   // zone. This is because by moving the stack pointer update we are now storing
576   // to the red zone until the stack pointer is updated. If we get an interrupt
577   // inside the prologue but before the stack update we now have a number of
578   // stores to the red zone and those stores must all fit.
579   MachineFrameInfo &MFI = MF.getFrameInfo();
580   unsigned FrameSize = MFI.getStackSize();
581   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
582     return false;
583 
584   // Frame pointers and base pointers complicate matters so don't do anything
585   // if we have them. For example having a frame pointer will sometimes require
586   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
587   // difficult. Similar situation exists with setjmp.
588   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
589     return false;
590 
591   // Calls to fast_cc functions use different rules for passing parameters on
592   // the stack from the ABI and using PIC base in the function imposes
593   // similar restrictions to using the base pointer. It is not generally safe
594   // to move the stack pointer update in these situations.
595   if (FI->hasFastCall() || FI->usesPICBase())
596     return false;
597 
598   // Finally we can move the stack update if we do not require register
599   // scavenging. Register scavenging can introduce more spills and so
600   // may make the frame size larger than we have computed.
601   return !RegInfo->requiresFrameIndexScavenging(MF);
602 }
603 
604 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
605                                     MachineBasicBlock &MBB) const {
606   MachineBasicBlock::iterator MBBI = MBB.begin();
607   MachineFrameInfo &MFI = MF.getFrameInfo();
608   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
609   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
610   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
611 
612   MachineModuleInfo &MMI = MF.getMMI();
613   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
614   DebugLoc dl;
615   // AIX assembler does not support cfi directives.
616   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
617 
618   // Get processor type.
619   bool isPPC64 = Subtarget.isPPC64();
620   // Get the ABI.
621   bool isSVR4ABI = Subtarget.isSVR4ABI();
622   bool isELFv2ABI = Subtarget.isELFv2ABI();
623   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
624 
625   // Work out frame sizes.
626   uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
627   int64_t NegFrameSize = -FrameSize;
628   if (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize))
629     llvm_unreachable("Unhandled stack size!");
630 
631   if (MFI.isFrameAddressTaken())
632     replaceFPWithRealFP(MF);
633 
634   // Check if the link register (LR) must be saved.
635   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
636   bool MustSaveLR = FI->mustSaveLR();
637   bool MustSaveTOC = FI->mustSaveTOC();
638   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
639   bool MustSaveCR = !MustSaveCRs.empty();
640   // Do we have a frame pointer and/or base pointer for this function?
641   bool HasFP = hasFP(MF);
642   bool HasBP = RegInfo->hasBasePointer(MF);
643   bool HasRedZone = isPPC64 || !isSVR4ABI;
644   bool HasROPProtect = Subtarget.hasROPProtect();
645   bool HasPrivileged = Subtarget.hasPrivileged();
646 
647   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
648   Register BPReg = RegInfo->getBaseRegister(MF);
649   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
650   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
651   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
652   Register ScratchReg;
653   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
654   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
655   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
656                                                 : PPC::MFLR );
657   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
658                                                  : PPC::STW );
659   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
660                                                      : PPC::STWU );
661   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
662                                                         : PPC::STWUX);
663   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
664                                                           : PPC::LIS );
665   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
666                                                  : PPC::ORI );
667   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
668                                               : PPC::OR );
669   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
670                                                             : PPC::SUBFC);
671   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
672                                                                : PPC::SUBFIC);
673   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
674                                                            : PPC::MFCR);
675   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
676   const MCInstrDesc &HashST =
677       TII.get(HasPrivileged ? PPC::HASHSTP : PPC::HASHST);
678 
679   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
680   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
681   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
682   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
683   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
684          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
685 
686   // Using the same bool variable as below to suppress compiler warnings.
687   bool SingleScratchReg = findScratchRegister(
688       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
689   assert(SingleScratchReg &&
690          "Required number of registers not available in this block");
691 
692   SingleScratchReg = ScratchReg == TempReg;
693 
694   int64_t LROffset = getReturnSaveOffset();
695 
696   int64_t FPOffset = 0;
697   if (HasFP) {
698     MachineFrameInfo &MFI = MF.getFrameInfo();
699     int FPIndex = FI->getFramePointerSaveIndex();
700     assert(FPIndex && "No Frame Pointer Save Slot!");
701     FPOffset = MFI.getObjectOffset(FPIndex);
702   }
703 
704   int64_t BPOffset = 0;
705   if (HasBP) {
706     MachineFrameInfo &MFI = MF.getFrameInfo();
707     int BPIndex = FI->getBasePointerSaveIndex();
708     assert(BPIndex && "No Base Pointer Save Slot!");
709     BPOffset = MFI.getObjectOffset(BPIndex);
710   }
711 
712   int64_t PBPOffset = 0;
713   if (FI->usesPICBase()) {
714     MachineFrameInfo &MFI = MF.getFrameInfo();
715     int PBPIndex = FI->getPICBasePointerSaveIndex();
716     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
717     PBPOffset = MFI.getObjectOffset(PBPIndex);
718   }
719 
720   // Get stack alignments.
721   Align MaxAlign = MFI.getMaxAlign();
722   if (HasBP && MaxAlign > 1)
723     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
724 
725   // Frames of 32KB & larger require special handling because they cannot be
726   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
727   bool isLargeFrame = !isInt<16>(NegFrameSize);
728 
729   // Check if we can move the stack update instruction (stdu) down the prologue
730   // past the callee saves. Hopefully this will avoid the situation where the
731   // saves are waiting for the update on the store with update to complete.
732   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
733   bool MovingStackUpdateDown = false;
734 
735   // Check if we can move the stack update.
736   if (stackUpdateCanBeMoved(MF)) {
737     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
738     for (CalleeSavedInfo CSI : Info) {
739       // If the callee saved register is spilled to a register instead of the
740       // stack then the spill no longer uses the stack pointer.
741       // This can lead to two consequences:
742       // 1) We no longer need to update the stack because the function does not
743       //    spill any callee saved registers to stack.
744       // 2) We have a situation where we still have to update the stack pointer
745       //    even though some registers are spilled to other registers. In
746       //    this case the current code moves the stack update to an incorrect
747       //    position.
748       // In either case we should abort moving the stack update operation.
749       if (CSI.isSpilledToReg()) {
750         StackUpdateLoc = MBBI;
751         MovingStackUpdateDown = false;
752         break;
753       }
754 
755       int FrIdx = CSI.getFrameIdx();
756       // If the frame index is not negative the callee saved info belongs to a
757       // stack object that is not a fixed stack object. We ignore non-fixed
758       // stack objects because we won't move the stack update pointer past them.
759       if (FrIdx >= 0)
760         continue;
761 
762       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
763         StackUpdateLoc++;
764         MovingStackUpdateDown = true;
765       } else {
766         // We need all of the Frame Indices to meet these conditions.
767         // If they do not, abort the whole operation.
768         StackUpdateLoc = MBBI;
769         MovingStackUpdateDown = false;
770         break;
771       }
772     }
773 
774     // If the operation was not aborted then update the object offset.
775     if (MovingStackUpdateDown) {
776       for (CalleeSavedInfo CSI : Info) {
777         int FrIdx = CSI.getFrameIdx();
778         if (FrIdx < 0)
779           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
780       }
781     }
782   }
783 
784   // Where in the prologue we move the CR fields depends on how many scratch
785   // registers we have, and if we need to save the link register or not. This
786   // lambda is to avoid duplicating the logic in 2 places.
787   auto BuildMoveFromCR = [&]() {
788     if (isELFv2ABI && MustSaveCRs.size() == 1) {
789     // In the ELFv2 ABI, we are not required to save all CR fields.
790     // If only one CR field is clobbered, it is more efficient to use
791     // mfocrf to selectively save just that field, because mfocrf has short
792     // latency compares to mfcr.
793       assert(isPPC64 && "V2 ABI is 64-bit only.");
794       MachineInstrBuilder MIB =
795           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
796       MIB.addReg(MustSaveCRs[0], RegState::Kill);
797     } else {
798       MachineInstrBuilder MIB =
799           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
800       for (unsigned CRfield : MustSaveCRs)
801         MIB.addReg(CRfield, RegState::ImplicitKill);
802     }
803   };
804 
805   // If we need to spill the CR and the LR but we don't have two separate
806   // registers available, we must spill them one at a time
807   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
808     BuildMoveFromCR();
809     BuildMI(MBB, MBBI, dl, StoreWordInst)
810         .addReg(TempReg, getKillRegState(true))
811         .addImm(CRSaveOffset)
812         .addReg(SPReg);
813   }
814 
815   if (MustSaveLR)
816     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
817 
818   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
819     BuildMoveFromCR();
820 
821   if (HasRedZone) {
822     if (HasFP)
823       BuildMI(MBB, MBBI, dl, StoreInst)
824         .addReg(FPReg)
825         .addImm(FPOffset)
826         .addReg(SPReg);
827     if (FI->usesPICBase())
828       BuildMI(MBB, MBBI, dl, StoreInst)
829         .addReg(PPC::R30)
830         .addImm(PBPOffset)
831         .addReg(SPReg);
832     if (HasBP)
833       BuildMI(MBB, MBBI, dl, StoreInst)
834         .addReg(BPReg)
835         .addImm(BPOffset)
836         .addReg(SPReg);
837   }
838 
839   // Generate the instruction to store the LR. In the case where ROP protection
840   // is required the register holding the LR should not be killed as it will be
841   // used by the hash store instruction.
842   if (MustSaveLR) {
843     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
844         .addReg(ScratchReg, getKillRegState(!HasROPProtect))
845         .addImm(LROffset)
846         .addReg(SPReg);
847 
848     // Add the ROP protection Hash Store instruction.
849     // NOTE: This is technically a violation of the ABI. The hash can be saved
850     // up to 512 bytes into the Protected Zone. This can be outside of the
851     // initial 288 byte volatile program storage region in the Protected Zone.
852     // However, this restriction will be removed in an upcoming revision of the
853     // ABI.
854     if (HasROPProtect) {
855       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
856       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
857       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
858              "ROP hash save offset out of range.");
859       assert(((ImmOffset & 0x7) == 0) &&
860              "ROP hash save offset must be 8 byte aligned.");
861       BuildMI(MBB, StackUpdateLoc, dl, HashST)
862           .addReg(ScratchReg, getKillRegState(true))
863           .addImm(ImmOffset)
864           .addReg(SPReg);
865     }
866   }
867 
868   if (MustSaveCR &&
869       !(SingleScratchReg && MustSaveLR)) {
870     assert(HasRedZone && "A red zone is always available on PPC64");
871     BuildMI(MBB, MBBI, dl, StoreWordInst)
872       .addReg(TempReg, getKillRegState(true))
873       .addImm(CRSaveOffset)
874       .addReg(SPReg);
875   }
876 
877   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
878   if (!FrameSize)
879     return;
880 
881   // Adjust stack pointer: r1 += NegFrameSize.
882   // If there is a preferred stack alignment, align R1 now
883 
884   if (HasBP && HasRedZone) {
885     // Save a copy of r1 as the base pointer.
886     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
887       .addReg(SPReg)
888       .addReg(SPReg);
889   }
890 
891   // Have we generated a STUX instruction to claim stack frame? If so,
892   // the negated frame size will be placed in ScratchReg.
893   bool HasSTUX = false;
894 
895   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
896   // pointer is always stored at SP, we will get a free probe due to an essential
897   // STU(X) instruction.
898   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
899     // To be consistent with other targets, a pseudo instruction is emitted and
900     // will be later expanded in `inlineStackProbe`.
901     BuildMI(MBB, MBBI, dl,
902             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
903                             : PPC::PROBED_STACKALLOC_32))
904         .addDef(TempReg)
905         .addDef(ScratchReg) // ScratchReg stores the old sp.
906         .addImm(NegFrameSize);
907     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
908     // update the ScratchReg to meet the assumption that ScratchReg contains
909     // the NegFrameSize. This solution is rather tricky.
910     if (!HasRedZone) {
911       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
912           .addReg(ScratchReg)
913           .addReg(SPReg);
914       HasSTUX = true;
915     }
916   } else {
917     // This condition must be kept in sync with canUseAsPrologue.
918     if (HasBP && MaxAlign > 1) {
919       if (isPPC64)
920         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
921             .addReg(SPReg)
922             .addImm(0)
923             .addImm(64 - Log2(MaxAlign));
924       else // PPC32...
925         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
926             .addReg(SPReg)
927             .addImm(0)
928             .addImm(32 - Log2(MaxAlign))
929             .addImm(31);
930       if (!isLargeFrame) {
931         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
932             .addReg(ScratchReg, RegState::Kill)
933             .addImm(NegFrameSize);
934       } else {
935         assert(!SingleScratchReg && "Only a single scratch reg available");
936         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
937             .addImm(NegFrameSize >> 16);
938         BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
939             .addReg(TempReg, RegState::Kill)
940             .addImm(NegFrameSize & 0xFFFF);
941         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
942             .addReg(ScratchReg, RegState::Kill)
943             .addReg(TempReg, RegState::Kill);
944       }
945 
946       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
947           .addReg(SPReg, RegState::Kill)
948           .addReg(SPReg)
949           .addReg(ScratchReg);
950       HasSTUX = true;
951 
952     } else if (!isLargeFrame) {
953       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
954           .addReg(SPReg)
955           .addImm(NegFrameSize)
956           .addReg(SPReg);
957 
958     } else {
959       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
960           .addImm(NegFrameSize >> 16);
961       BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
962           .addReg(ScratchReg, RegState::Kill)
963           .addImm(NegFrameSize & 0xFFFF);
964       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
965           .addReg(SPReg, RegState::Kill)
966           .addReg(SPReg)
967           .addReg(ScratchReg);
968       HasSTUX = true;
969     }
970   }
971 
972   // Save the TOC register after the stack pointer update if a prologue TOC
973   // save is required for the function.
974   if (MustSaveTOC) {
975     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
976     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
977       .addReg(TOCReg, getKillRegState(true))
978       .addImm(TOCSaveOffset)
979       .addReg(SPReg);
980   }
981 
982   if (!HasRedZone) {
983     assert(!isPPC64 && "A red zone is always available on PPC64");
984     if (HasSTUX) {
985       // The negated frame size is in ScratchReg, and the SPReg has been
986       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
987       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
988       // the stack frame (i.e. the old SP), ideally, we would put the old
989       // SP into a register and use it as the base for the stores. The
990       // problem is that the only available register may be ScratchReg,
991       // which could be R0, and R0 cannot be used as a base address.
992 
993       // First, set ScratchReg to the old SP. This may need to be modified
994       // later.
995       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
996         .addReg(ScratchReg, RegState::Kill)
997         .addReg(SPReg);
998 
999       if (ScratchReg == PPC::R0) {
1000         // R0 cannot be used as a base register, but it can be used as an
1001         // index in a store-indexed.
1002         int LastOffset = 0;
1003         if (HasFP)  {
1004           // R0 += (FPOffset-LastOffset).
1005           // Need addic, since addi treats R0 as 0.
1006           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1007             .addReg(ScratchReg)
1008             .addImm(FPOffset-LastOffset);
1009           LastOffset = FPOffset;
1010           // Store FP into *R0.
1011           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1012             .addReg(FPReg, RegState::Kill)  // Save FP.
1013             .addReg(PPC::ZERO)
1014             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1015         }
1016         if (FI->usesPICBase()) {
1017           // R0 += (PBPOffset-LastOffset).
1018           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1019             .addReg(ScratchReg)
1020             .addImm(PBPOffset-LastOffset);
1021           LastOffset = PBPOffset;
1022           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1023             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1024             .addReg(PPC::ZERO)
1025             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1026         }
1027         if (HasBP) {
1028           // R0 += (BPOffset-LastOffset).
1029           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1030             .addReg(ScratchReg)
1031             .addImm(BPOffset-LastOffset);
1032           LastOffset = BPOffset;
1033           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1034             .addReg(BPReg, RegState::Kill)  // Save BP.
1035             .addReg(PPC::ZERO)
1036             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1037           // BP = R0-LastOffset
1038           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1039             .addReg(ScratchReg, RegState::Kill)
1040             .addImm(-LastOffset);
1041         }
1042       } else {
1043         // ScratchReg is not R0, so use it as the base register. It is
1044         // already set to the old SP, so we can use the offsets directly.
1045 
1046         // Now that the stack frame has been allocated, save all the necessary
1047         // registers using ScratchReg as the base address.
1048         if (HasFP)
1049           BuildMI(MBB, MBBI, dl, StoreInst)
1050             .addReg(FPReg)
1051             .addImm(FPOffset)
1052             .addReg(ScratchReg);
1053         if (FI->usesPICBase())
1054           BuildMI(MBB, MBBI, dl, StoreInst)
1055             .addReg(PPC::R30)
1056             .addImm(PBPOffset)
1057             .addReg(ScratchReg);
1058         if (HasBP) {
1059           BuildMI(MBB, MBBI, dl, StoreInst)
1060             .addReg(BPReg)
1061             .addImm(BPOffset)
1062             .addReg(ScratchReg);
1063           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1064             .addReg(ScratchReg, RegState::Kill)
1065             .addReg(ScratchReg);
1066         }
1067       }
1068     } else {
1069       // The frame size is a known 16-bit constant (fitting in the immediate
1070       // field of STWU). To be here we have to be compiling for PPC32.
1071       // Since the SPReg has been decreased by FrameSize, add it back to each
1072       // offset.
1073       if (HasFP)
1074         BuildMI(MBB, MBBI, dl, StoreInst)
1075           .addReg(FPReg)
1076           .addImm(FrameSize + FPOffset)
1077           .addReg(SPReg);
1078       if (FI->usesPICBase())
1079         BuildMI(MBB, MBBI, dl, StoreInst)
1080           .addReg(PPC::R30)
1081           .addImm(FrameSize + PBPOffset)
1082           .addReg(SPReg);
1083       if (HasBP) {
1084         BuildMI(MBB, MBBI, dl, StoreInst)
1085           .addReg(BPReg)
1086           .addImm(FrameSize + BPOffset)
1087           .addReg(SPReg);
1088         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1089           .addReg(SPReg)
1090           .addImm(FrameSize);
1091       }
1092     }
1093   }
1094 
1095   // Add Call Frame Information for the instructions we generated above.
1096   if (needsCFI) {
1097     unsigned CFIIndex;
1098 
1099     if (HasBP) {
1100       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1101       // because if the stack needed aligning then CFA won't be at a fixed
1102       // offset from FP/SP.
1103       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1104       CFIIndex = MF.addFrameInst(
1105           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1106     } else {
1107       // Adjust the definition of CFA to account for the change in SP.
1108       assert(NegFrameSize);
1109       CFIIndex = MF.addFrameInst(
1110           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1111     }
1112     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1113         .addCFIIndex(CFIIndex);
1114 
1115     if (HasFP) {
1116       // Describe where FP was saved, at a fixed offset from CFA.
1117       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1118       CFIIndex = MF.addFrameInst(
1119           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1120       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1121           .addCFIIndex(CFIIndex);
1122     }
1123 
1124     if (FI->usesPICBase()) {
1125       // Describe where FP was saved, at a fixed offset from CFA.
1126       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1127       CFIIndex = MF.addFrameInst(
1128           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1129       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1130           .addCFIIndex(CFIIndex);
1131     }
1132 
1133     if (HasBP) {
1134       // Describe where BP was saved, at a fixed offset from CFA.
1135       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1136       CFIIndex = MF.addFrameInst(
1137           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1138       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1139           .addCFIIndex(CFIIndex);
1140     }
1141 
1142     if (MustSaveLR) {
1143       // Describe where LR was saved, at a fixed offset from CFA.
1144       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1145       CFIIndex = MF.addFrameInst(
1146           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1147       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1148           .addCFIIndex(CFIIndex);
1149     }
1150   }
1151 
1152   // If there is a frame pointer, copy R1 into R31
1153   if (HasFP) {
1154     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1155       .addReg(SPReg)
1156       .addReg(SPReg);
1157 
1158     if (!HasBP && needsCFI) {
1159       // Change the definition of CFA from SP+offset to FP+offset, because SP
1160       // will change at every alloca.
1161       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1162       unsigned CFIIndex = MF.addFrameInst(
1163           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1164 
1165       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1166           .addCFIIndex(CFIIndex);
1167     }
1168   }
1169 
1170   if (needsCFI) {
1171     // Describe where callee saved registers were saved, at fixed offsets from
1172     // CFA.
1173     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1174     for (const CalleeSavedInfo &I : CSI) {
1175       unsigned Reg = I.getReg();
1176       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1177 
1178       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1179       // subregisters of CR2. We just need to emit a move of CR2.
1180       if (PPC::CRBITRCRegClass.contains(Reg))
1181         continue;
1182 
1183       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1184         continue;
1185 
1186       // For SVR4, don't emit a move for the CR spill slot if we haven't
1187       // spilled CRs.
1188       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1189           && !MustSaveCR)
1190         continue;
1191 
1192       // For 64-bit SVR4 when we have spilled CRs, the spill location
1193       // is SP+8, not a frame-relative slot.
1194       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1195         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1196         // the whole CR word.  In the ELFv2 ABI, every CR that was
1197         // actually saved gets its own CFI record.
1198         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1199         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1200             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1201         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1202             .addCFIIndex(CFIIndex);
1203         continue;
1204       }
1205 
1206       if (I.isSpilledToReg()) {
1207         unsigned SpilledReg = I.getDstReg();
1208         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1209             nullptr, MRI->getDwarfRegNum(Reg, true),
1210             MRI->getDwarfRegNum(SpilledReg, true)));
1211         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1212           .addCFIIndex(CFIRegister);
1213       } else {
1214         int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1215         // We have changed the object offset above but we do not want to change
1216         // the actual offsets in the CFI instruction so we have to undo the
1217         // offset change here.
1218         if (MovingStackUpdateDown)
1219           Offset -= NegFrameSize;
1220 
1221         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1222             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1223         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1224             .addCFIIndex(CFIIndex);
1225       }
1226     }
1227   }
1228 }
1229 
1230 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1231                                         MachineBasicBlock &PrologMBB) const {
1232   bool isPPC64 = Subtarget.isPPC64();
1233   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1234   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1235   MachineFrameInfo &MFI = MF.getFrameInfo();
1236   MachineModuleInfo &MMI = MF.getMMI();
1237   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
1238   // AIX assembler does not support cfi directives.
1239   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1240   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1241     int Opc = MI.getOpcode();
1242     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1243   });
1244   if (StackAllocMIPos == PrologMBB.end())
1245     return;
1246   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1247   MachineBasicBlock *CurrentMBB = &PrologMBB;
1248   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1249   MachineInstr &MI = *StackAllocMIPos;
1250   int64_t NegFrameSize = MI.getOperand(2).getImm();
1251   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1252   int64_t NegProbeSize = -(int64_t)ProbeSize;
1253   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1254   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1255   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1256   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1257   Register ScratchReg = MI.getOperand(0).getReg();
1258   Register FPReg = MI.getOperand(1).getReg();
1259   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1260   bool HasBP = RegInfo->hasBasePointer(MF);
1261   Register BPReg = RegInfo->getBaseRegister(MF);
1262   Align MaxAlign = MFI.getMaxAlign();
1263   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1264   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1265   // Subroutines to generate .cfi_* directives.
1266   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1267                             MachineBasicBlock::iterator MBBI, Register Reg) {
1268     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1269     unsigned CFIIndex = MF.addFrameInst(
1270         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1271     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1272         .addCFIIndex(CFIIndex);
1273   };
1274   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1275                          MachineBasicBlock::iterator MBBI, Register Reg,
1276                          int Offset) {
1277     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1278     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1279         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1280     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1281         .addCFIIndex(CFIIndex);
1282   };
1283   // Subroutine to determine if we can use the Imm as part of d-form.
1284   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1285   // Subroutine to materialize the Imm into TempReg.
1286   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1287                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1288                             Register &TempReg) {
1289     assert(isInt<32>(Imm) && "Unhandled imm");
1290     if (isInt<16>(Imm))
1291       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1292           .addImm(Imm);
1293     else {
1294       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1295           .addImm(Imm >> 16);
1296       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1297           .addReg(TempReg)
1298           .addImm(Imm & 0xFFFF);
1299     }
1300   };
1301   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1302   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1303                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1304                               Register NegSizeReg, bool UseDForm,
1305                               Register StoreReg) {
1306     if (UseDForm)
1307       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1308           .addReg(StoreReg)
1309           .addImm(NegSize)
1310           .addReg(SPReg);
1311     else
1312       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1313           .addReg(StoreReg)
1314           .addReg(SPReg)
1315           .addReg(NegSizeReg);
1316   };
1317   // Used to probe stack when realignment is required.
1318   // Note that, according to ABI's requirement, *sp must always equals the
1319   // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1320   // Following is pseudo code:
1321   // final_sp = (sp & align) + negframesize;
1322   // neg_gap = final_sp - sp;
1323   // while (neg_gap < negprobesize) {
1324   //   stdu fp, negprobesize(sp);
1325   //   neg_gap -= negprobesize;
1326   // }
1327   // stdux fp, sp, neg_gap
1328   //
1329   // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1330   // before probe code, we don't need to save it, so we get one additional reg
1331   // that can be used to materialize the probeside if needed to use xform.
1332   // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1333   // now.
1334   //
1335   // The allocations are:
1336   // if (HasBP && HasRedzone) {
1337   //   r0: materialize the probesize if needed so that we can use xform.
1338   //   r12: `neg_gap`
1339   // } else {
1340   //   r0: back-chain pointer
1341   //   r12: `neg_gap`.
1342   // }
1343   auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1344                                  MachineBasicBlock::iterator MBBI,
1345                                  Register ScratchReg, Register TempReg) {
1346     assert(HasBP && "The function is supposed to have base pointer when its "
1347                     "stack is realigned.");
1348     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1349 
1350     // FIXME: We can eliminate this limitation if we get more infomation about
1351     // which part of redzone are already used. Used redzone can be treated
1352     // probed. But there might be `holes' in redzone probed, this could
1353     // complicate the implementation.
1354     assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1355            "Probe size should be larger or equal to the size of red-zone so "
1356            "that red-zone is not clobbered by probing.");
1357 
1358     Register &FinalStackPtr = TempReg;
1359     // FIXME: We only support NegProbeSize materializable by DForm currently.
1360     // When HasBP && HasRedzone, we can use xform if we have an additional idle
1361     // register.
1362     NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1363     assert(isInt<16>(NegProbeSize) &&
1364            "NegProbeSize should be materializable by DForm");
1365     Register CRReg = PPC::CR0;
1366     // Layout of output assembly kinda like:
1367     // bb.0:
1368     //   ...
1369     //   sub $scratchreg, $finalsp, r1
1370     //   cmpdi $scratchreg, <negprobesize>
1371     //   bge bb.2
1372     // bb.1:
1373     //   stdu <backchain>, <negprobesize>(r1)
1374     //   sub $scratchreg, $scratchreg, negprobesize
1375     //   cmpdi $scratchreg, <negprobesize>
1376     //   blt bb.1
1377     // bb.2:
1378     //   stdux <backchain>, r1, $scratchreg
1379     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1380     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1381     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1382     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1383     MF.insert(MBBInsertPoint, ProbeExitMBB);
1384     // bb.2
1385     {
1386       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1387       allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1388                        BackChainPointer);
1389       if (HasRedZone)
1390         // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1391         // to TempReg to satisfy it.
1392         BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1393             .addReg(BPReg)
1394             .addReg(BPReg);
1395       ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1396       ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1397     }
1398     // bb.0
1399     {
1400       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1401           .addReg(SPReg)
1402           .addReg(FinalStackPtr);
1403       if (!HasRedZone)
1404         BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1405       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1406           .addReg(ScratchReg)
1407           .addImm(NegProbeSize);
1408       BuildMI(&MBB, DL, TII.get(PPC::BCC))
1409           .addImm(PPC::PRED_GE)
1410           .addReg(CRReg)
1411           .addMBB(ProbeExitMBB);
1412       MBB.addSuccessor(ProbeLoopBodyMBB);
1413       MBB.addSuccessor(ProbeExitMBB);
1414     }
1415     // bb.1
1416     {
1417       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1418       allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1419                        0, true /*UseDForm*/, BackChainPointer);
1420       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1421               ScratchReg)
1422           .addReg(ScratchReg)
1423           .addImm(-NegProbeSize);
1424       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1425               CRReg)
1426           .addReg(ScratchReg)
1427           .addImm(NegProbeSize);
1428       BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1429           .addImm(PPC::PRED_LT)
1430           .addReg(CRReg)
1431           .addMBB(ProbeLoopBodyMBB);
1432       ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1433       ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1434     }
1435     // Update liveins.
1436     recomputeLiveIns(*ProbeLoopBodyMBB);
1437     recomputeLiveIns(*ProbeExitMBB);
1438     return ProbeExitMBB;
1439   };
1440   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1441   // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1442   // the offset subtracted from SP is determined by SP's runtime value.
1443   if (HasBP && MaxAlign > 1) {
1444     // Calculate final stack pointer.
1445     if (isPPC64)
1446       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1447           .addReg(SPReg)
1448           .addImm(0)
1449           .addImm(64 - Log2(MaxAlign));
1450     else
1451       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1452           .addReg(SPReg)
1453           .addImm(0)
1454           .addImm(32 - Log2(MaxAlign))
1455           .addImm(31);
1456     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1457             FPReg)
1458         .addReg(ScratchReg)
1459         .addReg(SPReg);
1460     MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1461     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1462             FPReg)
1463         .addReg(ScratchReg)
1464         .addReg(FPReg);
1465     CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1466     if (needsCFI)
1467       buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1468   } else {
1469     // Initialize current frame pointer.
1470     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1471     // Use FPReg to calculate CFA.
1472     if (needsCFI)
1473       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1474     // Probe residual part.
1475     if (NegResidualSize) {
1476       bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1477       if (!ResidualUseDForm)
1478         MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1479       allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1480                        ResidualUseDForm, FPReg);
1481     }
1482     bool UseDForm = CanUseDForm(NegProbeSize);
1483     // If number of blocks is small, just probe them directly.
1484     if (NumBlocks < 3) {
1485       if (!UseDForm)
1486         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1487       for (int i = 0; i < NumBlocks; ++i)
1488         allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1489                          FPReg);
1490       if (needsCFI) {
1491         // Restore using SPReg to calculate CFA.
1492         buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1493       }
1494     } else {
1495       // Since CTR is a volatile register and current shrinkwrap implementation
1496       // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1497       // CTR loop to probe.
1498       // Calculate trip count and stores it in CTRReg.
1499       MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1500       BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1501           .addReg(ScratchReg, RegState::Kill);
1502       if (!UseDForm)
1503         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1504       // Create MBBs of the loop.
1505       MachineFunction::iterator MBBInsertPoint =
1506           std::next(CurrentMBB->getIterator());
1507       MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1508       MF.insert(MBBInsertPoint, LoopMBB);
1509       MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1510       MF.insert(MBBInsertPoint, ExitMBB);
1511       // Synthesize the loop body.
1512       allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1513                        UseDForm, FPReg);
1514       BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1515           .addMBB(LoopMBB);
1516       LoopMBB->addSuccessor(ExitMBB);
1517       LoopMBB->addSuccessor(LoopMBB);
1518       // Synthesize the exit MBB.
1519       ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1520                       std::next(MachineBasicBlock::iterator(MI)),
1521                       CurrentMBB->end());
1522       ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1523       CurrentMBB->addSuccessor(LoopMBB);
1524       if (needsCFI) {
1525         // Restore using SPReg to calculate CFA.
1526         buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1527       }
1528       // Update liveins.
1529       recomputeLiveIns(*LoopMBB);
1530       recomputeLiveIns(*ExitMBB);
1531     }
1532   }
1533   ++NumPrologProbed;
1534   MI.eraseFromParent();
1535 }
1536 
1537 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1538                                     MachineBasicBlock &MBB) const {
1539   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1540   DebugLoc dl;
1541 
1542   if (MBBI != MBB.end())
1543     dl = MBBI->getDebugLoc();
1544 
1545   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1546   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1547 
1548   // Get alignment info so we know how to restore the SP.
1549   const MachineFrameInfo &MFI = MF.getFrameInfo();
1550 
1551   // Get the number of bytes allocated from the FrameInfo.
1552   int64_t FrameSize = MFI.getStackSize();
1553 
1554   // Get processor type.
1555   bool isPPC64 = Subtarget.isPPC64();
1556 
1557   // Check if the link register (LR) has been saved.
1558   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1559   bool MustSaveLR = FI->mustSaveLR();
1560   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1561   bool MustSaveCR = !MustSaveCRs.empty();
1562   // Do we have a frame pointer and/or base pointer for this function?
1563   bool HasFP = hasFP(MF);
1564   bool HasBP = RegInfo->hasBasePointer(MF);
1565   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1566   bool HasROPProtect = Subtarget.hasROPProtect();
1567   bool HasPrivileged = Subtarget.hasPrivileged();
1568 
1569   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1570   Register BPReg = RegInfo->getBaseRegister(MF);
1571   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1572   Register ScratchReg;
1573   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1574   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1575                                                  : PPC::MTLR );
1576   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1577                                                  : PPC::LWZ );
1578   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1579                                                            : PPC::LIS );
1580   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1581                                               : PPC::OR );
1582   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1583                                                   : PPC::ORI );
1584   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1585                                                    : PPC::ADDI );
1586   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1587                                                 : PPC::ADD4 );
1588   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1589                                                      : PPC::LWZ);
1590   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1591                                                      : PPC::MTOCRF);
1592   const MCInstrDesc &HashChk =
1593       TII.get(HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK);
1594   int64_t LROffset = getReturnSaveOffset();
1595 
1596   int64_t FPOffset = 0;
1597 
1598   // Using the same bool variable as below to suppress compiler warnings.
1599   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1600                                               &TempReg);
1601   assert(SingleScratchReg &&
1602          "Could not find an available scratch register");
1603 
1604   SingleScratchReg = ScratchReg == TempReg;
1605 
1606   if (HasFP) {
1607     int FPIndex = FI->getFramePointerSaveIndex();
1608     assert(FPIndex && "No Frame Pointer Save Slot!");
1609     FPOffset = MFI.getObjectOffset(FPIndex);
1610   }
1611 
1612   int64_t BPOffset = 0;
1613   if (HasBP) {
1614       int BPIndex = FI->getBasePointerSaveIndex();
1615       assert(BPIndex && "No Base Pointer Save Slot!");
1616       BPOffset = MFI.getObjectOffset(BPIndex);
1617   }
1618 
1619   int64_t PBPOffset = 0;
1620   if (FI->usesPICBase()) {
1621     int PBPIndex = FI->getPICBasePointerSaveIndex();
1622     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1623     PBPOffset = MFI.getObjectOffset(PBPIndex);
1624   }
1625 
1626   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1627 
1628   if (IsReturnBlock) {
1629     unsigned RetOpcode = MBBI->getOpcode();
1630     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1631                       RetOpcode == PPC::TCRETURNdi ||
1632                       RetOpcode == PPC::TCRETURNai ||
1633                       RetOpcode == PPC::TCRETURNri8 ||
1634                       RetOpcode == PPC::TCRETURNdi8 ||
1635                       RetOpcode == PPC::TCRETURNai8;
1636 
1637     if (UsesTCRet) {
1638       int MaxTCRetDelta = FI->getTailCallSPDelta();
1639       MachineOperand &StackAdjust = MBBI->getOperand(1);
1640       assert(StackAdjust.isImm() && "Expecting immediate value.");
1641       // Adjust stack pointer.
1642       int StackAdj = StackAdjust.getImm();
1643       int Delta = StackAdj - MaxTCRetDelta;
1644       assert((Delta >= 0) && "Delta must be positive");
1645       if (MaxTCRetDelta>0)
1646         FrameSize += (StackAdj +Delta);
1647       else
1648         FrameSize += StackAdj;
1649     }
1650   }
1651 
1652   // Frames of 32KB & larger require special handling because they cannot be
1653   // indexed into with a simple LD/LWZ immediate offset operand.
1654   bool isLargeFrame = !isInt<16>(FrameSize);
1655 
1656   // On targets without red zone, the SP needs to be restored last, so that
1657   // all live contents of the stack frame are upwards of the SP. This means
1658   // that we cannot restore SP just now, since there may be more registers
1659   // to restore from the stack frame (e.g. R31). If the frame size is not
1660   // a simple immediate value, we will need a spare register to hold the
1661   // restored SP. If the frame size is known and small, we can simply adjust
1662   // the offsets of the registers to be restored, and still use SP to restore
1663   // them. In such case, the final update of SP will be to add the frame
1664   // size to it.
1665   // To simplify the code, set RBReg to the base register used to restore
1666   // values from the stack, and set SPAdd to the value that needs to be added
1667   // to the SP at the end. The default values are as if red zone was present.
1668   unsigned RBReg = SPReg;
1669   unsigned SPAdd = 0;
1670 
1671   // Check if we can move the stack update instruction up the epilogue
1672   // past the callee saves. This will allow the move to LR instruction
1673   // to be executed before the restores of the callee saves which means
1674   // that the callee saves can hide the latency from the MTLR instrcution.
1675   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1676   if (stackUpdateCanBeMoved(MF)) {
1677     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1678     for (CalleeSavedInfo CSI : Info) {
1679       // If the callee saved register is spilled to another register abort the
1680       // stack update movement.
1681       if (CSI.isSpilledToReg()) {
1682         StackUpdateLoc = MBBI;
1683         break;
1684       }
1685       int FrIdx = CSI.getFrameIdx();
1686       // If the frame index is not negative the callee saved info belongs to a
1687       // stack object that is not a fixed stack object. We ignore non-fixed
1688       // stack objects because we won't move the update of the stack pointer
1689       // past them.
1690       if (FrIdx >= 0)
1691         continue;
1692 
1693       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1694         StackUpdateLoc--;
1695       else {
1696         // Abort the operation as we can't update all CSR restores.
1697         StackUpdateLoc = MBBI;
1698         break;
1699       }
1700     }
1701   }
1702 
1703   if (FrameSize) {
1704     // In the prologue, the loaded (or persistent) stack pointer value is
1705     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1706     // zone add this offset back now.
1707 
1708     // If the function has a base pointer, the stack pointer has been copied
1709     // to it so we can restore it by copying in the other direction.
1710     if (HasRedZone && HasBP) {
1711       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1712         addReg(BPReg).
1713         addReg(BPReg);
1714     }
1715     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1716     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1717     // call which invalidates the stack pointer value in SP(0). So we use the
1718     // value of R31 in this case. Similar situation exists with setjmp.
1719     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1720       assert(HasFP && "Expecting a valid frame pointer.");
1721       if (!HasRedZone)
1722         RBReg = FPReg;
1723       if (!isLargeFrame) {
1724         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1725           .addReg(FPReg).addImm(FrameSize);
1726       } else {
1727         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1728           .addImm(FrameSize >> 16);
1729         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1730           .addReg(ScratchReg, RegState::Kill)
1731           .addImm(FrameSize & 0xFFFF);
1732         BuildMI(MBB, MBBI, dl, AddInst)
1733           .addReg(RBReg)
1734           .addReg(FPReg)
1735           .addReg(ScratchReg);
1736       }
1737     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1738       if (HasRedZone) {
1739         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1740           .addReg(SPReg)
1741           .addImm(FrameSize);
1742       } else {
1743         // Make sure that adding FrameSize will not overflow the max offset
1744         // size.
1745         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1746                "Local offsets should be negative");
1747         SPAdd = FrameSize;
1748         FPOffset += FrameSize;
1749         BPOffset += FrameSize;
1750         PBPOffset += FrameSize;
1751       }
1752     } else {
1753       // We don't want to use ScratchReg as a base register, because it
1754       // could happen to be R0. Use FP instead, but make sure to preserve it.
1755       if (!HasRedZone) {
1756         // If FP is not saved, copy it to ScratchReg.
1757         if (!HasFP)
1758           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1759             .addReg(FPReg)
1760             .addReg(FPReg);
1761         RBReg = FPReg;
1762       }
1763       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1764         .addImm(0)
1765         .addReg(SPReg);
1766     }
1767   }
1768   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1769   // If there is no red zone, ScratchReg may be needed for holding a useful
1770   // value (although not the base register). Make sure it is not overwritten
1771   // too early.
1772 
1773   // If we need to restore both the LR and the CR and we only have one
1774   // available scratch register, we must do them one at a time.
1775   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1776     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1777     // is live here.
1778     assert(HasRedZone && "Expecting red zone");
1779     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1780       .addImm(CRSaveOffset)
1781       .addReg(SPReg);
1782     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1783       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1784         .addReg(TempReg, getKillRegState(i == e-1));
1785   }
1786 
1787   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1788   // LR is stored in the caller's stack frame. ScratchReg will be needed
1789   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1790   // a base register anyway, because it may happen to be R0.
1791   bool LoadedLR = false;
1792   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1793     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1794       .addImm(LROffset+SPAdd)
1795       .addReg(RBReg);
1796     LoadedLR = true;
1797   }
1798 
1799   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1800     assert(RBReg == SPReg && "Should be using SP as a base register");
1801     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1802       .addImm(CRSaveOffset)
1803       .addReg(RBReg);
1804   }
1805 
1806   if (HasFP) {
1807     // If there is red zone, restore FP directly, since SP has already been
1808     // restored. Otherwise, restore the value of FP into ScratchReg.
1809     if (HasRedZone || RBReg == SPReg)
1810       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1811         .addImm(FPOffset)
1812         .addReg(SPReg);
1813     else
1814       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1815         .addImm(FPOffset)
1816         .addReg(RBReg);
1817   }
1818 
1819   if (FI->usesPICBase())
1820     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1821       .addImm(PBPOffset)
1822       .addReg(RBReg);
1823 
1824   if (HasBP)
1825     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1826       .addImm(BPOffset)
1827       .addReg(RBReg);
1828 
1829   // There is nothing more to be loaded from the stack, so now we can
1830   // restore SP: SP = RBReg + SPAdd.
1831   if (RBReg != SPReg || SPAdd != 0) {
1832     assert(!HasRedZone && "This should not happen with red zone");
1833     // If SPAdd is 0, generate a copy.
1834     if (SPAdd == 0)
1835       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1836         .addReg(RBReg)
1837         .addReg(RBReg);
1838     else
1839       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1840         .addReg(RBReg)
1841         .addImm(SPAdd);
1842 
1843     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1844     if (RBReg == FPReg)
1845       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1846         .addReg(ScratchReg)
1847         .addReg(ScratchReg);
1848 
1849     // Now load the LR from the caller's stack frame.
1850     if (MustSaveLR && !LoadedLR)
1851       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1852         .addImm(LROffset)
1853         .addReg(SPReg);
1854   }
1855 
1856   if (MustSaveCR &&
1857       !(SingleScratchReg && MustSaveLR))
1858     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1859       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1860         .addReg(TempReg, getKillRegState(i == e-1));
1861 
1862   if (MustSaveLR) {
1863     // If ROP protection is required, an extra instruction is added to compute a
1864     // hash and then compare it to the hash stored in the prologue.
1865     if (HasROPProtect) {
1866       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1867       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1868       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1869              "ROP hash check location offset out of range.");
1870       assert(((ImmOffset & 0x7) == 0) &&
1871              "ROP hash check location offset must be 8 byte aligned.");
1872       BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1873           .addReg(ScratchReg)
1874           .addImm(ImmOffset)
1875           .addReg(SPReg);
1876     }
1877     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1878   }
1879 
1880   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1881   // call optimization
1882   if (IsReturnBlock) {
1883     unsigned RetOpcode = MBBI->getOpcode();
1884     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1885         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1886         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1887       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1888       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1889 
1890       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1891         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1892           .addReg(SPReg).addImm(CallerAllocatedAmt);
1893       } else {
1894         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1895           .addImm(CallerAllocatedAmt >> 16);
1896         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1897           .addReg(ScratchReg, RegState::Kill)
1898           .addImm(CallerAllocatedAmt & 0xFFFF);
1899         BuildMI(MBB, MBBI, dl, AddInst)
1900           .addReg(SPReg)
1901           .addReg(FPReg)
1902           .addReg(ScratchReg);
1903       }
1904     } else {
1905       createTailCallBranchInstr(MBB);
1906     }
1907   }
1908 }
1909 
1910 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1911   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1912 
1913   // If we got this far a first terminator should exist.
1914   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1915 
1916   DebugLoc dl = MBBI->getDebugLoc();
1917   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1918 
1919   // Create branch instruction for pseudo tail call return instruction.
1920   // The TCRETURNdi variants are direct calls. Valid targets for those are
1921   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1922   // since we can tail call external functions with PC-Rel (i.e. we don't need
1923   // to worry about different TOC pointers). Some of the external functions will
1924   // be MO_GlobalAddress while others like memcpy for example, are going to
1925   // be MO_ExternalSymbol.
1926   unsigned RetOpcode = MBBI->getOpcode();
1927   if (RetOpcode == PPC::TCRETURNdi) {
1928     MBBI = MBB.getLastNonDebugInstr();
1929     MachineOperand &JumpTarget = MBBI->getOperand(0);
1930     if (JumpTarget.isGlobal())
1931       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1932         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1933     else if (JumpTarget.isSymbol())
1934       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1935         addExternalSymbol(JumpTarget.getSymbolName());
1936     else
1937       llvm_unreachable("Expecting Global or External Symbol");
1938   } else if (RetOpcode == PPC::TCRETURNri) {
1939     MBBI = MBB.getLastNonDebugInstr();
1940     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1941     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1942   } else if (RetOpcode == PPC::TCRETURNai) {
1943     MBBI = MBB.getLastNonDebugInstr();
1944     MachineOperand &JumpTarget = MBBI->getOperand(0);
1945     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1946   } else if (RetOpcode == PPC::TCRETURNdi8) {
1947     MBBI = MBB.getLastNonDebugInstr();
1948     MachineOperand &JumpTarget = MBBI->getOperand(0);
1949     if (JumpTarget.isGlobal())
1950       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1951         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1952     else if (JumpTarget.isSymbol())
1953       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1954         addExternalSymbol(JumpTarget.getSymbolName());
1955     else
1956       llvm_unreachable("Expecting Global or External Symbol");
1957   } else if (RetOpcode == PPC::TCRETURNri8) {
1958     MBBI = MBB.getLastNonDebugInstr();
1959     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1960     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1961   } else if (RetOpcode == PPC::TCRETURNai8) {
1962     MBBI = MBB.getLastNonDebugInstr();
1963     MachineOperand &JumpTarget = MBBI->getOperand(0);
1964     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1965   }
1966 }
1967 
1968 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1969                                             BitVector &SavedRegs,
1970                                             RegScavenger *RS) const {
1971   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1972 
1973   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1974 
1975   //  Save and clear the LR state.
1976   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1977   unsigned LR = RegInfo->getRARegister();
1978   FI->setMustSaveLR(MustSaveLR(MF, LR));
1979   SavedRegs.reset(LR);
1980 
1981   //  Save R31 if necessary
1982   int FPSI = FI->getFramePointerSaveIndex();
1983   const bool isPPC64 = Subtarget.isPPC64();
1984   MachineFrameInfo &MFI = MF.getFrameInfo();
1985 
1986   // If the frame pointer save index hasn't been defined yet.
1987   if (!FPSI && needsFP(MF)) {
1988     // Find out what the fix offset of the frame pointer save area.
1989     int FPOffset = getFramePointerSaveOffset();
1990     // Allocate the frame index for frame pointer save area.
1991     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1992     // Save the result.
1993     FI->setFramePointerSaveIndex(FPSI);
1994   }
1995 
1996   int BPSI = FI->getBasePointerSaveIndex();
1997   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1998     int BPOffset = getBasePointerSaveOffset();
1999     // Allocate the frame index for the base pointer save area.
2000     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2001     // Save the result.
2002     FI->setBasePointerSaveIndex(BPSI);
2003   }
2004 
2005   // Reserve stack space for the PIC Base register (R30).
2006   // Only used in SVR4 32-bit.
2007   if (FI->usesPICBase()) {
2008     int PBPSI = MFI.CreateFixedObject(4, -8, true);
2009     FI->setPICBasePointerSaveIndex(PBPSI);
2010   }
2011 
2012   // Make sure we don't explicitly spill r31, because, for example, we have
2013   // some inline asm which explicitly clobbers it, when we otherwise have a
2014   // frame pointer and are using r31's spill slot for the prologue/epilogue
2015   // code. Same goes for the base pointer and the PIC base register.
2016   if (needsFP(MF))
2017     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2018   if (RegInfo->hasBasePointer(MF))
2019     SavedRegs.reset(RegInfo->getBaseRegister(MF));
2020   if (FI->usesPICBase())
2021     SavedRegs.reset(PPC::R30);
2022 
2023   // Reserve stack space to move the linkage area to in case of a tail call.
2024   int TCSPDelta = 0;
2025   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2026       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2027     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2028   }
2029 
2030   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2031   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2032   // object at the offset of the CR-save slot in the linkage area. The actual
2033   // save and restore of the condition register will be created as part of the
2034   // prologue and epilogue insertion, but the FixedStack object is needed to
2035   // keep the CalleSavedInfo valid.
2036   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2037        SavedRegs.test(PPC::CR4))) {
2038     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2039     const int64_t SpillOffset =
2040         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2041     int FrameIdx =
2042         MFI.CreateFixedObject(SpillSize, SpillOffset,
2043                               /* IsImmutable */ true, /* IsAliased */ false);
2044     FI->setCRSpillFrameIndex(FrameIdx);
2045   }
2046 }
2047 
2048 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2049                                                        RegScavenger *RS) const {
2050   // Get callee saved register information.
2051   MachineFrameInfo &MFI = MF.getFrameInfo();
2052   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2053 
2054   // If the function is shrink-wrapped, and if the function has a tail call, the
2055   // tail call might not be in the new RestoreBlock, so real branch instruction
2056   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2057   // RestoreBlock. So we handle this case here.
2058   if (MFI.getSavePoint() && MFI.hasTailCall()) {
2059     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2060     for (MachineBasicBlock &MBB : MF) {
2061       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2062         createTailCallBranchInstr(MBB);
2063     }
2064   }
2065 
2066   // Early exit if no callee saved registers are modified!
2067   if (CSI.empty() && !needsFP(MF)) {
2068     addScavengingSpillSlot(MF, RS);
2069     return;
2070   }
2071 
2072   unsigned MinGPR = PPC::R31;
2073   unsigned MinG8R = PPC::X31;
2074   unsigned MinFPR = PPC::F31;
2075   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2076 
2077   bool HasGPSaveArea = false;
2078   bool HasG8SaveArea = false;
2079   bool HasFPSaveArea = false;
2080   bool HasVRSaveArea = false;
2081 
2082   SmallVector<CalleeSavedInfo, 18> GPRegs;
2083   SmallVector<CalleeSavedInfo, 18> G8Regs;
2084   SmallVector<CalleeSavedInfo, 18> FPRegs;
2085   SmallVector<CalleeSavedInfo, 18> VRegs;
2086 
2087   for (const CalleeSavedInfo &I : CSI) {
2088     unsigned Reg = I.getReg();
2089     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2090             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2091            "Not expecting to try to spill R2 in a function that must save TOC");
2092     if (PPC::GPRCRegClass.contains(Reg)) {
2093       HasGPSaveArea = true;
2094 
2095       GPRegs.push_back(I);
2096 
2097       if (Reg < MinGPR) {
2098         MinGPR = Reg;
2099       }
2100     } else if (PPC::G8RCRegClass.contains(Reg)) {
2101       HasG8SaveArea = true;
2102 
2103       G8Regs.push_back(I);
2104 
2105       if (Reg < MinG8R) {
2106         MinG8R = Reg;
2107       }
2108     } else if (PPC::F8RCRegClass.contains(Reg)) {
2109       HasFPSaveArea = true;
2110 
2111       FPRegs.push_back(I);
2112 
2113       if (Reg < MinFPR) {
2114         MinFPR = Reg;
2115       }
2116     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2117                PPC::CRRCRegClass.contains(Reg)) {
2118       ; // do nothing, as we already know whether CRs are spilled
2119     } else if (PPC::VRRCRegClass.contains(Reg) ||
2120                PPC::SPERCRegClass.contains(Reg)) {
2121       // Altivec and SPE are mutually exclusive, but have the same stack
2122       // alignment requirements, so overload the save area for both cases.
2123       HasVRSaveArea = true;
2124 
2125       VRegs.push_back(I);
2126 
2127       if (Reg < MinVR) {
2128         MinVR = Reg;
2129       }
2130     } else {
2131       llvm_unreachable("Unknown RegisterClass!");
2132     }
2133   }
2134 
2135   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2136   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2137 
2138   int64_t LowerBound = 0;
2139 
2140   // Take into account stack space reserved for tail calls.
2141   int TCSPDelta = 0;
2142   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2143       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2144     LowerBound = TCSPDelta;
2145   }
2146 
2147   // The Floating-point register save area is right below the back chain word
2148   // of the previous stack frame.
2149   if (HasFPSaveArea) {
2150     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2151       int FI = FPRegs[i].getFrameIdx();
2152 
2153       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2154     }
2155 
2156     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2157   }
2158 
2159   // Check whether the frame pointer register is allocated. If so, make sure it
2160   // is spilled to the correct offset.
2161   if (needsFP(MF)) {
2162     int FI = PFI->getFramePointerSaveIndex();
2163     assert(FI && "No Frame Pointer Save Slot!");
2164     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2165     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2166     HasGPSaveArea = true;
2167   }
2168 
2169   if (PFI->usesPICBase()) {
2170     int FI = PFI->getPICBasePointerSaveIndex();
2171     assert(FI && "No PIC Base Pointer Save Slot!");
2172     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2173 
2174     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2175     HasGPSaveArea = true;
2176   }
2177 
2178   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2179   if (RegInfo->hasBasePointer(MF)) {
2180     int FI = PFI->getBasePointerSaveIndex();
2181     assert(FI && "No Base Pointer Save Slot!");
2182     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2183 
2184     Register BP = RegInfo->getBaseRegister(MF);
2185     if (PPC::G8RCRegClass.contains(BP)) {
2186       MinG8R = std::min<unsigned>(MinG8R, BP);
2187       HasG8SaveArea = true;
2188     } else if (PPC::GPRCRegClass.contains(BP)) {
2189       MinGPR = std::min<unsigned>(MinGPR, BP);
2190       HasGPSaveArea = true;
2191     }
2192   }
2193 
2194   // General register save area starts right below the Floating-point
2195   // register save area.
2196   if (HasGPSaveArea || HasG8SaveArea) {
2197     // Move general register save area spill slots down, taking into account
2198     // the size of the Floating-point register save area.
2199     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2200       if (!GPRegs[i].isSpilledToReg()) {
2201         int FI = GPRegs[i].getFrameIdx();
2202         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2203       }
2204     }
2205 
2206     // Move general register save area spill slots down, taking into account
2207     // the size of the Floating-point register save area.
2208     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2209       if (!G8Regs[i].isSpilledToReg()) {
2210         int FI = G8Regs[i].getFrameIdx();
2211         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2212       }
2213     }
2214 
2215     unsigned MinReg =
2216       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2217                          TRI->getEncodingValue(MinG8R));
2218 
2219     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2220     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2221   }
2222 
2223   // For 32-bit only, the CR save area is below the general register
2224   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2225   // to the stack pointer and hence does not need an adjustment here.
2226   // Only CR2 (the first nonvolatile spilled) has an associated frame
2227   // index so that we have a single uniform save area.
2228   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2229     // Adjust the frame index of the CR spill slot.
2230     for (const auto &CSInfo : CSI) {
2231       if (CSInfo.getReg() == PPC::CR2) {
2232         int FI = CSInfo.getFrameIdx();
2233         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2234         break;
2235       }
2236     }
2237 
2238     LowerBound -= 4; // The CR save area is always 4 bytes long.
2239   }
2240 
2241   // Both Altivec and SPE have the same alignment and padding requirements
2242   // within the stack frame.
2243   if (HasVRSaveArea) {
2244     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2245     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2246     // we are using negative number here (the stack grows downward). We should
2247     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2248     // is the alignment size ( n = 16 here) and y is the size after aligning.
2249     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2250     LowerBound &= ~(15);
2251 
2252     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2253       int FI = VRegs[i].getFrameIdx();
2254 
2255       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2256     }
2257   }
2258 
2259   addScavengingSpillSlot(MF, RS);
2260 }
2261 
2262 void
2263 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2264                                          RegScavenger *RS) const {
2265   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2266   // a large stack, which will require scavenging a register to materialize a
2267   // large offset.
2268 
2269   // We need to have a scavenger spill slot for spills if the frame size is
2270   // large. In case there is no free register for large-offset addressing,
2271   // this slot is used for the necessary emergency spill. Also, we need the
2272   // slot for dynamic stack allocations.
2273 
2274   // The scavenger might be invoked if the frame offset does not fit into
2275   // the 16-bit immediate. We don't know the complete frame size here
2276   // because we've not yet computed callee-saved register spills or the
2277   // needed alignment padding.
2278   unsigned StackSize = determineFrameLayout(MF, true);
2279   MachineFrameInfo &MFI = MF.getFrameInfo();
2280   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2281       (hasSpills(MF) && !isInt<16>(StackSize))) {
2282     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2283     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2284     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2285     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2286     unsigned Size = TRI.getSpillSize(RC);
2287     Align Alignment = TRI.getSpillAlign(RC);
2288     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2289 
2290     // Might we have over-aligned allocas?
2291     bool HasAlVars =
2292         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2293 
2294     // These kinds of spills might need two registers.
2295     if (spillsCR(MF) || HasAlVars)
2296       RS->addScavengingFrameIndex(
2297           MFI.CreateStackObject(Size, Alignment, false));
2298   }
2299 }
2300 
2301 // This function checks if a callee saved gpr can be spilled to a volatile
2302 // vector register. This occurs for leaf functions when the option
2303 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2304 // which were not spilled to vectors, return false so the target independent
2305 // code can handle them by assigning a FrameIdx to a stack slot.
2306 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2307     MachineFunction &MF, const TargetRegisterInfo *TRI,
2308     std::vector<CalleeSavedInfo> &CSI) const {
2309 
2310   if (CSI.empty())
2311     return true; // Early exit if no callee saved registers are modified!
2312 
2313   // Early exit if cannot spill gprs to volatile vector registers.
2314   MachineFrameInfo &MFI = MF.getFrameInfo();
2315   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2316     return false;
2317 
2318   // Build a BitVector of VSRs that can be used for spilling GPRs.
2319   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2320   BitVector BVCalleeSaved(TRI->getNumRegs());
2321   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2322   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2323   for (unsigned i = 0; CSRegs[i]; ++i)
2324     BVCalleeSaved.set(CSRegs[i]);
2325 
2326   for (unsigned Reg : BVAllocatable.set_bits()) {
2327     // Set to 0 if the register is not a volatile VSX register, or if it is
2328     // used in the function.
2329     if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2330         MF.getRegInfo().isPhysRegUsed(Reg))
2331       BVAllocatable.reset(Reg);
2332   }
2333 
2334   bool AllSpilledToReg = true;
2335   unsigned LastVSRUsedForSpill = 0;
2336   for (auto &CS : CSI) {
2337     if (BVAllocatable.none())
2338       return false;
2339 
2340     unsigned Reg = CS.getReg();
2341 
2342     if (!PPC::G8RCRegClass.contains(Reg)) {
2343       AllSpilledToReg = false;
2344       continue;
2345     }
2346 
2347     // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2348     // into one VSR using the mtvsrdd instruction.
2349     if (LastVSRUsedForSpill != 0) {
2350       CS.setDstReg(LastVSRUsedForSpill);
2351       BVAllocatable.reset(LastVSRUsedForSpill);
2352       LastVSRUsedForSpill = 0;
2353       continue;
2354     }
2355 
2356     unsigned VolatileVFReg = BVAllocatable.find_first();
2357     if (VolatileVFReg < BVAllocatable.size()) {
2358       CS.setDstReg(VolatileVFReg);
2359       LastVSRUsedForSpill = VolatileVFReg;
2360     } else {
2361       AllSpilledToReg = false;
2362     }
2363   }
2364   return AllSpilledToReg;
2365 }
2366 
2367 bool PPCFrameLowering::spillCalleeSavedRegisters(
2368     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2369     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2370 
2371   MachineFunction *MF = MBB.getParent();
2372   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2373   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2374   bool MustSaveTOC = FI->mustSaveTOC();
2375   DebugLoc DL;
2376   bool CRSpilled = false;
2377   MachineInstrBuilder CRMIB;
2378   BitVector Spilled(TRI->getNumRegs());
2379 
2380   VSRContainingGPRs.clear();
2381 
2382   // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2383   // or two GPRs, so we need table to record information for later save/restore.
2384   llvm::for_each(CSI, [&](const CalleeSavedInfo &Info) {
2385     if (Info.isSpilledToReg()) {
2386       auto &SpilledVSR =
2387           VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2388       assert(SpilledVSR.second == 0 &&
2389              "Can't spill more than two GPRs into VSR!");
2390       if (SpilledVSR.first == 0)
2391         SpilledVSR.first = Info.getReg();
2392       else
2393         SpilledVSR.second = Info.getReg();
2394     }
2395   });
2396 
2397   for (const CalleeSavedInfo &I : CSI) {
2398     unsigned Reg = I.getReg();
2399 
2400     // CR2 through CR4 are the nonvolatile CR fields.
2401     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2402 
2403     // Add the callee-saved register as live-in; it's killed at the spill.
2404     // Do not do this for callee-saved registers that are live-in to the
2405     // function because they will already be marked live-in and this will be
2406     // adding it for a second time. It is an error to add the same register
2407     // to the set more than once.
2408     const MachineRegisterInfo &MRI = MF->getRegInfo();
2409     bool IsLiveIn = MRI.isLiveIn(Reg);
2410     if (!IsLiveIn)
2411        MBB.addLiveIn(Reg);
2412 
2413     if (CRSpilled && IsCRField) {
2414       CRMIB.addReg(Reg, RegState::ImplicitKill);
2415       continue;
2416     }
2417 
2418     // The actual spill will happen in the prologue.
2419     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2420       continue;
2421 
2422     // Insert the spill to the stack frame.
2423     if (IsCRField) {
2424       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2425       if (!Subtarget.is32BitELFABI()) {
2426         // The actual spill will happen at the start of the prologue.
2427         FuncInfo->addMustSaveCR(Reg);
2428       } else {
2429         CRSpilled = true;
2430         FuncInfo->setSpillsCR();
2431 
2432         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2433         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2434         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2435                   .addReg(Reg, RegState::ImplicitKill);
2436 
2437         MBB.insert(MI, CRMIB);
2438         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2439                                          .addReg(PPC::R12,
2440                                                  getKillRegState(true)),
2441                                          I.getFrameIdx()));
2442       }
2443     } else {
2444       if (I.isSpilledToReg()) {
2445         unsigned Dst = I.getDstReg();
2446 
2447         if (Spilled[Dst])
2448           continue;
2449 
2450         if (VSRContainingGPRs[Dst].second != 0) {
2451           assert(Subtarget.hasP9Vector() &&
2452                  "mtvsrdd is unavailable on pre-P9 targets.");
2453 
2454           NumPESpillVSR += 2;
2455           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2456               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2457               .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2458         } else if (VSRContainingGPRs[Dst].second == 0) {
2459           assert(Subtarget.hasP8Vector() &&
2460                  "Can't move GPR to VSR on pre-P8 targets.");
2461 
2462           ++NumPESpillVSR;
2463           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2464                   TRI->getSubReg(Dst, PPC::sub_64))
2465               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2466         } else {
2467           llvm_unreachable("More than two GPRs spilled to a VSR!");
2468         }
2469         Spilled.set(Dst);
2470       } else {
2471         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2472         // Use !IsLiveIn for the kill flag.
2473         // We do not want to kill registers that are live in this function
2474         // before their use because they will become undefined registers.
2475         // Functions without NoUnwind need to preserve the order of elements in
2476         // saved vector registers.
2477         if (Subtarget.needsSwapsForVSXMemOps() &&
2478             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2479           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2480                                        I.getFrameIdx(), RC, TRI);
2481         else
2482           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(),
2483                                   RC, TRI);
2484       }
2485     }
2486   }
2487   return true;
2488 }
2489 
2490 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2491                        bool CR4Spilled, MachineBasicBlock &MBB,
2492                        MachineBasicBlock::iterator MI,
2493                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2494 
2495   MachineFunction *MF = MBB.getParent();
2496   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2497   DebugLoc DL;
2498   unsigned MoveReg = PPC::R12;
2499 
2500   // 32-bit:  FP-relative
2501   MBB.insert(MI,
2502              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2503                                CSI[CSIIndex].getFrameIdx()));
2504 
2505   unsigned RestoreOp = PPC::MTOCRF;
2506   if (CR2Spilled)
2507     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2508                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2509 
2510   if (CR3Spilled)
2511     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2512                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2513 
2514   if (CR4Spilled)
2515     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2516                .addReg(MoveReg, getKillRegState(true)));
2517 }
2518 
2519 MachineBasicBlock::iterator PPCFrameLowering::
2520 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2521                               MachineBasicBlock::iterator I) const {
2522   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2523   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2524       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2525     // Add (actually subtract) back the amount the callee popped on return.
2526     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2527       bool is64Bit = Subtarget.isPPC64();
2528       CalleeAmt *= -1;
2529       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2530       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2531       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2532       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2533       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2534       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2535       const DebugLoc &dl = I->getDebugLoc();
2536 
2537       if (isInt<16>(CalleeAmt)) {
2538         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2539           .addReg(StackReg, RegState::Kill)
2540           .addImm(CalleeAmt);
2541       } else {
2542         MachineBasicBlock::iterator MBBI = I;
2543         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2544           .addImm(CalleeAmt >> 16);
2545         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2546           .addReg(TmpReg, RegState::Kill)
2547           .addImm(CalleeAmt & 0xFFFF);
2548         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2549           .addReg(StackReg, RegState::Kill)
2550           .addReg(TmpReg);
2551       }
2552     }
2553   }
2554   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2555   return MBB.erase(I);
2556 }
2557 
2558 static bool isCalleeSavedCR(unsigned Reg) {
2559   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2560 }
2561 
2562 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2563     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2564     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2565   MachineFunction *MF = MBB.getParent();
2566   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2567   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2568   bool MustSaveTOC = FI->mustSaveTOC();
2569   bool CR2Spilled = false;
2570   bool CR3Spilled = false;
2571   bool CR4Spilled = false;
2572   unsigned CSIIndex = 0;
2573   BitVector Restored(TRI->getNumRegs());
2574 
2575   // Initialize insertion-point logic; we will be restoring in reverse
2576   // order of spill.
2577   MachineBasicBlock::iterator I = MI, BeforeI = I;
2578   bool AtStart = I == MBB.begin();
2579 
2580   if (!AtStart)
2581     --BeforeI;
2582 
2583   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2584     unsigned Reg = CSI[i].getReg();
2585 
2586     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2587       continue;
2588 
2589     // Restore of callee saved condition register field is handled during
2590     // epilogue insertion.
2591     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2592       continue;
2593 
2594     if (Reg == PPC::CR2) {
2595       CR2Spilled = true;
2596       // The spill slot is associated only with CR2, which is the
2597       // first nonvolatile spilled.  Save it here.
2598       CSIIndex = i;
2599       continue;
2600     } else if (Reg == PPC::CR3) {
2601       CR3Spilled = true;
2602       continue;
2603     } else if (Reg == PPC::CR4) {
2604       CR4Spilled = true;
2605       continue;
2606     } else {
2607       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2608       // least one CR register, restore all spilled CRs together.
2609       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2610         bool is31 = needsFP(*MF);
2611         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2612                    CSIIndex);
2613         CR2Spilled = CR3Spilled = CR4Spilled = false;
2614       }
2615 
2616       if (CSI[i].isSpilledToReg()) {
2617         DebugLoc DL;
2618         unsigned Dst = CSI[i].getDstReg();
2619 
2620         if (Restored[Dst])
2621           continue;
2622 
2623         if (VSRContainingGPRs[Dst].second != 0) {
2624           assert(Subtarget.hasP9Vector());
2625           NumPEReloadVSR += 2;
2626           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2627                   VSRContainingGPRs[Dst].second)
2628               .addReg(Dst);
2629           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2630                   VSRContainingGPRs[Dst].first)
2631               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2632         } else if (VSRContainingGPRs[Dst].second == 0) {
2633           assert(Subtarget.hasP8Vector());
2634           ++NumPEReloadVSR;
2635           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2636                   VSRContainingGPRs[Dst].first)
2637               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2638         } else {
2639           llvm_unreachable("More than two GPRs spilled to a VSR!");
2640         }
2641 
2642         Restored.set(Dst);
2643 
2644       } else {
2645        // Default behavior for non-CR saves.
2646         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2647 
2648         // Functions without NoUnwind need to preserve the order of elements in
2649         // saved vector registers.
2650         if (Subtarget.needsSwapsForVSXMemOps() &&
2651             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2652           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2653                                         TRI);
2654         else
2655           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2656 
2657         assert(I != MBB.begin() &&
2658                "loadRegFromStackSlot didn't insert any code!");
2659       }
2660     }
2661 
2662     // Insert in reverse order.
2663     if (AtStart)
2664       I = MBB.begin();
2665     else {
2666       I = BeforeI;
2667       ++I;
2668     }
2669   }
2670 
2671   // If we haven't yet spilled the CRs, do so now.
2672   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2673     assert(Subtarget.is32BitELFABI() &&
2674            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2675     bool is31 = needsFP(*MF);
2676     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2677   }
2678 
2679   return true;
2680 }
2681 
2682 uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2683   return TOCSaveOffset;
2684 }
2685 
2686 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2687   return FramePointerSaveOffset;
2688 }
2689 
2690 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2691   return BasePointerSaveOffset;
2692 }
2693 
2694 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2695   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2696     return false;
2697   return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2698 }
2699