xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision da759cfa320d5076b075d15ff3f00ab3ba5634fd)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isDarwinABI() || STI.isAIXABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   if (STI.isAIXABI())
58     return STI.isPPC64() ? 40 : 20;
59   return STI.isELFv2ABI() ? 24 : 40;
60 }
61 
62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
63   // For the Darwin ABI:
64   // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
65   // for saving the frame pointer (if needed.)  While the published ABI has
66   // not used this slot since at least MacOSX 10.2, there is older code
67   // around that does use it, and that needs to continue to work.
68   if (STI.isDarwinABI())
69     return STI.isPPC64() ? -8U : -4U;
70 
71   // SVR4 ABI: First slot in the general register save area.
72   return STI.isPPC64() ? -8U : -4U;
73 }
74 
75 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
76   if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
77     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
78 
79   // 32-bit SVR4 ABI:
80   return 8;
81 }
82 
83 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
84   if (STI.isDarwinABI())
85     return STI.isPPC64() ? -16U : -8U;
86 
87   // SVR4 ABI: First slot in the general register save area.
88   return STI.isPPC64()
89              ? -16U
90              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
91 }
92 
93 static unsigned computeCRSaveOffset() {
94   // The condition register save offset needs to be updated for AIX PPC32.
95   return 8;
96 }
97 
98 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
99     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
100                           STI.getPlatformStackAlignment(), 0),
101       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
102       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
103       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
104       LinkageSize(computeLinkageSize(Subtarget)),
105       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
106       CRSaveOffset(computeCRSaveOffset()) {}
107 
108 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
109 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
110     unsigned &NumEntries) const {
111   if (Subtarget.isDarwinABI()) {
112     NumEntries = 1;
113     if (Subtarget.isPPC64()) {
114       static const SpillSlot darwin64Offsets = {PPC::X31, -8};
115       return &darwin64Offsets;
116     } else {
117       static const SpillSlot darwinOffsets = {PPC::R31, -4};
118       return &darwinOffsets;
119     }
120   }
121 
122   // Early exit if not using the SVR4 ABI.
123   if (!Subtarget.isSVR4ABI()) {
124     NumEntries = 0;
125     return nullptr;
126   }
127 
128   // Note that the offsets here overlap, but this is fixed up in
129   // processFunctionBeforeFrameFinalized.
130 
131   static const SpillSlot Offsets[] = {
132       // Floating-point register save area offsets.
133       {PPC::F31, -8},
134       {PPC::F30, -16},
135       {PPC::F29, -24},
136       {PPC::F28, -32},
137       {PPC::F27, -40},
138       {PPC::F26, -48},
139       {PPC::F25, -56},
140       {PPC::F24, -64},
141       {PPC::F23, -72},
142       {PPC::F22, -80},
143       {PPC::F21, -88},
144       {PPC::F20, -96},
145       {PPC::F19, -104},
146       {PPC::F18, -112},
147       {PPC::F17, -120},
148       {PPC::F16, -128},
149       {PPC::F15, -136},
150       {PPC::F14, -144},
151 
152       // General register save area offsets.
153       {PPC::R31, -4},
154       {PPC::R30, -8},
155       {PPC::R29, -12},
156       {PPC::R28, -16},
157       {PPC::R27, -20},
158       {PPC::R26, -24},
159       {PPC::R25, -28},
160       {PPC::R24, -32},
161       {PPC::R23, -36},
162       {PPC::R22, -40},
163       {PPC::R21, -44},
164       {PPC::R20, -48},
165       {PPC::R19, -52},
166       {PPC::R18, -56},
167       {PPC::R17, -60},
168       {PPC::R16, -64},
169       {PPC::R15, -68},
170       {PPC::R14, -72},
171 
172       // CR save area offset.  We map each of the nonvolatile CR fields
173       // to the slot for CR2, which is the first of the nonvolatile CR
174       // fields to be assigned, so that we only allocate one save slot.
175       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
176       {PPC::CR2, -4},
177 
178       // VRSAVE save area offset.
179       {PPC::VRSAVE, -4},
180 
181       // Vector register save area
182       {PPC::V31, -16},
183       {PPC::V30, -32},
184       {PPC::V29, -48},
185       {PPC::V28, -64},
186       {PPC::V27, -80},
187       {PPC::V26, -96},
188       {PPC::V25, -112},
189       {PPC::V24, -128},
190       {PPC::V23, -144},
191       {PPC::V22, -160},
192       {PPC::V21, -176},
193       {PPC::V20, -192},
194 
195       // SPE register save area (overlaps Vector save area).
196       {PPC::S31, -8},
197       {PPC::S30, -16},
198       {PPC::S29, -24},
199       {PPC::S28, -32},
200       {PPC::S27, -40},
201       {PPC::S26, -48},
202       {PPC::S25, -56},
203       {PPC::S24, -64},
204       {PPC::S23, -72},
205       {PPC::S22, -80},
206       {PPC::S21, -88},
207       {PPC::S20, -96},
208       {PPC::S19, -104},
209       {PPC::S18, -112},
210       {PPC::S17, -120},
211       {PPC::S16, -128},
212       {PPC::S15, -136},
213       {PPC::S14, -144}};
214 
215   static const SpillSlot Offsets64[] = {
216       // Floating-point register save area offsets.
217       {PPC::F31, -8},
218       {PPC::F30, -16},
219       {PPC::F29, -24},
220       {PPC::F28, -32},
221       {PPC::F27, -40},
222       {PPC::F26, -48},
223       {PPC::F25, -56},
224       {PPC::F24, -64},
225       {PPC::F23, -72},
226       {PPC::F22, -80},
227       {PPC::F21, -88},
228       {PPC::F20, -96},
229       {PPC::F19, -104},
230       {PPC::F18, -112},
231       {PPC::F17, -120},
232       {PPC::F16, -128},
233       {PPC::F15, -136},
234       {PPC::F14, -144},
235 
236       // General register save area offsets.
237       {PPC::X31, -8},
238       {PPC::X30, -16},
239       {PPC::X29, -24},
240       {PPC::X28, -32},
241       {PPC::X27, -40},
242       {PPC::X26, -48},
243       {PPC::X25, -56},
244       {PPC::X24, -64},
245       {PPC::X23, -72},
246       {PPC::X22, -80},
247       {PPC::X21, -88},
248       {PPC::X20, -96},
249       {PPC::X19, -104},
250       {PPC::X18, -112},
251       {PPC::X17, -120},
252       {PPC::X16, -128},
253       {PPC::X15, -136},
254       {PPC::X14, -144},
255 
256       // VRSAVE save area offset.
257       {PPC::VRSAVE, -4},
258 
259       // Vector register save area
260       {PPC::V31, -16},
261       {PPC::V30, -32},
262       {PPC::V29, -48},
263       {PPC::V28, -64},
264       {PPC::V27, -80},
265       {PPC::V26, -96},
266       {PPC::V25, -112},
267       {PPC::V24, -128},
268       {PPC::V23, -144},
269       {PPC::V22, -160},
270       {PPC::V21, -176},
271       {PPC::V20, -192}};
272 
273   if (Subtarget.isPPC64()) {
274     NumEntries = array_lengthof(Offsets64);
275 
276     return Offsets64;
277   } else {
278     NumEntries = array_lengthof(Offsets);
279 
280     return Offsets;
281   }
282 }
283 
284 /// RemoveVRSaveCode - We have found that this function does not need any code
285 /// to manipulate the VRSAVE register, even though it uses vector registers.
286 /// This can happen when the only registers used are known to be live in or out
287 /// of the function.  Remove all of the VRSAVE related code from the function.
288 /// FIXME: The removal of the code results in a compile failure at -O0 when the
289 /// function contains a function call, as the GPR containing original VRSAVE
290 /// contents is spilled and reloaded around the call.  Without the prolog code,
291 /// the spill instruction refers to an undefined register.  This code needs
292 /// to account for all uses of that GPR.
293 static void RemoveVRSaveCode(MachineInstr &MI) {
294   MachineBasicBlock *Entry = MI.getParent();
295   MachineFunction *MF = Entry->getParent();
296 
297   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
298   MachineBasicBlock::iterator MBBI = MI;
299   ++MBBI;
300   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
301   MBBI->eraseFromParent();
302 
303   bool RemovedAllMTVRSAVEs = true;
304   // See if we can find and remove the MTVRSAVE instruction from all of the
305   // epilog blocks.
306   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
307     // If last instruction is a return instruction, add an epilogue
308     if (I->isReturnBlock()) {
309       bool FoundIt = false;
310       for (MBBI = I->end(); MBBI != I->begin(); ) {
311         --MBBI;
312         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
313           MBBI->eraseFromParent();  // remove it.
314           FoundIt = true;
315           break;
316         }
317       }
318       RemovedAllMTVRSAVEs &= FoundIt;
319     }
320   }
321 
322   // If we found and removed all MTVRSAVE instructions, remove the read of
323   // VRSAVE as well.
324   if (RemovedAllMTVRSAVEs) {
325     MBBI = MI;
326     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
327     --MBBI;
328     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
329     MBBI->eraseFromParent();
330   }
331 
332   // Finally, nuke the UPDATE_VRSAVE.
333   MI.eraseFromParent();
334 }
335 
336 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
337 // instruction selector.  Based on the vector registers that have been used,
338 // transform this into the appropriate ORI instruction.
339 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
340   MachineFunction *MF = MI.getParent()->getParent();
341   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
342   DebugLoc dl = MI.getDebugLoc();
343 
344   const MachineRegisterInfo &MRI = MF->getRegInfo();
345   unsigned UsedRegMask = 0;
346   for (unsigned i = 0; i != 32; ++i)
347     if (MRI.isPhysRegModified(VRRegNo[i]))
348       UsedRegMask |= 1 << (31-i);
349 
350   // Live in and live out values already must be in the mask, so don't bother
351   // marking them.
352   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
353     unsigned RegNo = TRI->getEncodingValue(LI.first);
354     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
355       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
356   }
357 
358   // Live out registers appear as use operands on return instructions.
359   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
360        UsedRegMask != 0 && BI != BE; ++BI) {
361     const MachineBasicBlock &MBB = *BI;
362     if (!MBB.isReturnBlock())
363       continue;
364     const MachineInstr &Ret = MBB.back();
365     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
366       const MachineOperand &MO = Ret.getOperand(I);
367       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
368         continue;
369       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
370       UsedRegMask &= ~(1 << (31-RegNo));
371     }
372   }
373 
374   // If no registers are used, turn this into a copy.
375   if (UsedRegMask == 0) {
376     // Remove all VRSAVE code.
377     RemoveVRSaveCode(MI);
378     return;
379   }
380 
381   Register SrcReg = MI.getOperand(1).getReg();
382   Register DstReg = MI.getOperand(0).getReg();
383 
384   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
385     if (DstReg != SrcReg)
386       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
387           .addReg(SrcReg)
388           .addImm(UsedRegMask);
389     else
390       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
391           .addReg(SrcReg, RegState::Kill)
392           .addImm(UsedRegMask);
393   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
394     if (DstReg != SrcReg)
395       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
396           .addReg(SrcReg)
397           .addImm(UsedRegMask >> 16);
398     else
399       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
400           .addReg(SrcReg, RegState::Kill)
401           .addImm(UsedRegMask >> 16);
402   } else {
403     if (DstReg != SrcReg)
404       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
405           .addReg(SrcReg)
406           .addImm(UsedRegMask >> 16);
407     else
408       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
409           .addReg(SrcReg, RegState::Kill)
410           .addImm(UsedRegMask >> 16);
411 
412     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
413         .addReg(DstReg, RegState::Kill)
414         .addImm(UsedRegMask & 0xFFFF);
415   }
416 
417   // Remove the old UPDATE_VRSAVE instruction.
418   MI.eraseFromParent();
419 }
420 
421 static bool spillsCR(const MachineFunction &MF) {
422   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
423   return FuncInfo->isCRSpilled();
424 }
425 
426 static bool spillsVRSAVE(const MachineFunction &MF) {
427   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
428   return FuncInfo->isVRSAVESpilled();
429 }
430 
431 static bool hasSpills(const MachineFunction &MF) {
432   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
433   return FuncInfo->hasSpills();
434 }
435 
436 static bool hasNonRISpills(const MachineFunction &MF) {
437   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
438   return FuncInfo->hasNonRISpills();
439 }
440 
441 /// MustSaveLR - Return true if this function requires that we save the LR
442 /// register onto the stack in the prolog and restore it in the epilog of the
443 /// function.
444 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
445   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
446 
447   // We need a save/restore of LR if there is any def of LR (which is
448   // defined by calls, including the PIC setup sequence), or if there is
449   // some use of the LR stack slot (e.g. for builtin_return_address).
450   // (LR comes in 32 and 64 bit versions.)
451   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
452   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
453 }
454 
455 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
456 /// call frame size. Update the MachineFunction object with the stack size.
457 unsigned
458 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
459                                                 bool UseEstimate) const {
460   unsigned NewMaxCallFrameSize = 0;
461   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
462                                             &NewMaxCallFrameSize);
463   MF.getFrameInfo().setStackSize(FrameSize);
464   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
465   return FrameSize;
466 }
467 
468 /// determineFrameLayout - Determine the size of the frame and maximum call
469 /// frame size.
470 unsigned
471 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
472                                        bool UseEstimate,
473                                        unsigned *NewMaxCallFrameSize) const {
474   const MachineFrameInfo &MFI = MF.getFrameInfo();
475   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
476 
477   // Get the number of bytes to allocate from the FrameInfo
478   unsigned FrameSize =
479     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
480 
481   // Get stack alignments. The frame must be aligned to the greatest of these:
482   unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
483   unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
484   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
485 
486   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
487 
488   unsigned LR = RegInfo->getRARegister();
489   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
490   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
491                        !MFI.adjustsStack() &&       // No calls.
492                        !MustSaveLR(MF, LR) &&       // No need to save LR.
493                        !FI->mustSaveTOC() &&        // No need to save TOC.
494                        !RegInfo->hasBasePointer(MF); // No special alignment.
495 
496   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
497   // code if all local vars are reg-allocated.
498   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
499 
500   // Check whether we can skip adjusting the stack pointer (by using red zone)
501   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
502     // No need for frame
503     return 0;
504   }
505 
506   // Get the maximum call frame size of all the calls.
507   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
508 
509   // Maximum call frame needs to be at least big enough for linkage area.
510   unsigned minCallFrameSize = getLinkageSize();
511   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
512 
513   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
514   // that allocations will be aligned.
515   if (MFI.hasVarSizedObjects())
516     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
517 
518   // Update the new max call frame size if the caller passes in a valid pointer.
519   if (NewMaxCallFrameSize)
520     *NewMaxCallFrameSize = maxCallFrameSize;
521 
522   // Include call frame size in total.
523   FrameSize += maxCallFrameSize;
524 
525   // Make sure the frame is aligned.
526   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
527 
528   return FrameSize;
529 }
530 
531 // hasFP - Return true if the specified function actually has a dedicated frame
532 // pointer register.
533 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
534   const MachineFrameInfo &MFI = MF.getFrameInfo();
535   // FIXME: This is pretty much broken by design: hasFP() might be called really
536   // early, before the stack layout was calculated and thus hasFP() might return
537   // true or false here depending on the time of call.
538   return (MFI.getStackSize()) && needsFP(MF);
539 }
540 
541 // needsFP - Return true if the specified function should have a dedicated frame
542 // pointer register.  This is true if the function has variable sized allocas or
543 // if frame pointer elimination is disabled.
544 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
545   const MachineFrameInfo &MFI = MF.getFrameInfo();
546 
547   // Naked functions have no stack frame pushed, so we don't have a frame
548   // pointer.
549   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
550     return false;
551 
552   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
553     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
554     (MF.getTarget().Options.GuaranteedTailCallOpt &&
555      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
556 }
557 
558 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
559   bool is31 = needsFP(MF);
560   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
561   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
562 
563   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
564   bool HasBP = RegInfo->hasBasePointer(MF);
565   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
566   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
567 
568   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
569        BI != BE; ++BI)
570     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
571       --MBBI;
572       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
573         MachineOperand &MO = MBBI->getOperand(I);
574         if (!MO.isReg())
575           continue;
576 
577         switch (MO.getReg()) {
578         case PPC::FP:
579           MO.setReg(FPReg);
580           break;
581         case PPC::FP8:
582           MO.setReg(FP8Reg);
583           break;
584         case PPC::BP:
585           MO.setReg(BPReg);
586           break;
587         case PPC::BP8:
588           MO.setReg(BP8Reg);
589           break;
590 
591         }
592       }
593     }
594 }
595 
596 /*  This function will do the following:
597     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
598       respectively (defaults recommended by the ABI) and return true
599     - If MBB is not an entry block, initialize the register scavenger and look
600       for available registers.
601     - If the defaults (R0/R12) are available, return true
602     - If TwoUniqueRegsRequired is set to true, it looks for two unique
603       registers. Otherwise, look for a single available register.
604       - If the required registers are found, set SR1 and SR2 and return true.
605       - If the required registers are not found, set SR2 or both SR1 and SR2 to
606         PPC::NoRegister and return false.
607 
608     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
609     is not set, this function will attempt to find two different registers, but
610     still return true if only one register is available (and set SR1 == SR2).
611 */
612 bool
613 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
614                                       bool UseAtEnd,
615                                       bool TwoUniqueRegsRequired,
616                                       unsigned *SR1,
617                                       unsigned *SR2) const {
618   RegScavenger RS;
619   unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
620   unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
621 
622   // Set the defaults for the two scratch registers.
623   if (SR1)
624     *SR1 = R0;
625 
626   if (SR2) {
627     assert (SR1 && "Asking for the second scratch register but not the first?");
628     *SR2 = R12;
629   }
630 
631   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
632   if ((UseAtEnd && MBB->isReturnBlock()) ||
633       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
634     return true;
635 
636   RS.enterBasicBlock(*MBB);
637 
638   if (UseAtEnd && !MBB->empty()) {
639     // The scratch register will be used at the end of the block, so must
640     // consider all registers used within the block
641 
642     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
643     // If no terminator, back iterator up to previous instruction.
644     if (MBBI == MBB->end())
645       MBBI = std::prev(MBBI);
646 
647     if (MBBI != MBB->begin())
648       RS.forward(MBBI);
649   }
650 
651   // If the two registers are available, we're all good.
652   // Note that we only return here if both R0 and R12 are available because
653   // although the function may not require two unique registers, it may benefit
654   // from having two so we should try to provide them.
655   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
656     return true;
657 
658   // Get the list of callee-saved registers for the target.
659   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
660   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
661 
662   // Get all the available registers in the block.
663   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
664                                      &PPC::GPRCRegClass);
665 
666   // We shouldn't use callee-saved registers as scratch registers as they may be
667   // available when looking for a candidate block for shrink wrapping but not
668   // available when the actual prologue/epilogue is being emitted because they
669   // were added as live-in to the prologue block by PrologueEpilogueInserter.
670   for (int i = 0; CSRegs[i]; ++i)
671     BV.reset(CSRegs[i]);
672 
673   // Set the first scratch register to the first available one.
674   if (SR1) {
675     int FirstScratchReg = BV.find_first();
676     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
677   }
678 
679   // If there is another one available, set the second scratch register to that.
680   // Otherwise, set it to either PPC::NoRegister if this function requires two
681   // or to whatever SR1 is set to if this function doesn't require two.
682   if (SR2) {
683     int SecondScratchReg = BV.find_next(*SR1);
684     if (SecondScratchReg != -1)
685       *SR2 = SecondScratchReg;
686     else
687       *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
688   }
689 
690   // Now that we've done our best to provide both registers, double check
691   // whether we were unable to provide enough.
692   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
693     return false;
694 
695   return true;
696 }
697 
698 // We need a scratch register for spilling LR and for spilling CR. By default,
699 // we use two scratch registers to hide latency. However, if only one scratch
700 // register is available, we can adjust for that by not overlapping the spill
701 // code. However, if we need to realign the stack (i.e. have a base pointer)
702 // and the stack frame is large, we need two scratch registers.
703 bool
704 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
705   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
706   MachineFunction &MF = *(MBB->getParent());
707   bool HasBP = RegInfo->hasBasePointer(MF);
708   unsigned FrameSize = determineFrameLayout(MF);
709   int NegFrameSize = -FrameSize;
710   bool IsLargeFrame = !isInt<16>(NegFrameSize);
711   MachineFrameInfo &MFI = MF.getFrameInfo();
712   unsigned MaxAlign = MFI.getMaxAlignment();
713   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
714 
715   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
716 }
717 
718 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
719   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
720 
721   return findScratchRegister(TmpMBB, false,
722                              twoUniqueScratchRegsRequired(TmpMBB));
723 }
724 
725 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
726   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
727 
728   return findScratchRegister(TmpMBB, true);
729 }
730 
731 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
732   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
733   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
734 
735   // Abort if there is no register info or function info.
736   if (!RegInfo || !FI)
737     return false;
738 
739   // Only move the stack update on ELFv2 ABI and PPC64.
740   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
741     return false;
742 
743   // Check the frame size first and return false if it does not fit the
744   // requirements.
745   // We need a non-zero frame size as well as a frame that will fit in the red
746   // zone. This is because by moving the stack pointer update we are now storing
747   // to the red zone until the stack pointer is updated. If we get an interrupt
748   // inside the prologue but before the stack update we now have a number of
749   // stores to the red zone and those stores must all fit.
750   MachineFrameInfo &MFI = MF.getFrameInfo();
751   unsigned FrameSize = MFI.getStackSize();
752   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
753     return false;
754 
755   // Frame pointers and base pointers complicate matters so don't do anything
756   // if we have them. For example having a frame pointer will sometimes require
757   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
758   // difficult.
759   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
760     return false;
761 
762   // Calls to fast_cc functions use different rules for passing parameters on
763   // the stack from the ABI and using PIC base in the function imposes
764   // similar restrictions to using the base pointer. It is not generally safe
765   // to move the stack pointer update in these situations.
766   if (FI->hasFastCall() || FI->usesPICBase())
767     return false;
768 
769   // Finally we can move the stack update if we do not require register
770   // scavenging. Register scavenging can introduce more spills and so
771   // may make the frame size larger than we have computed.
772   return !RegInfo->requiresFrameIndexScavenging(MF);
773 }
774 
775 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
776                                     MachineBasicBlock &MBB) const {
777   MachineBasicBlock::iterator MBBI = MBB.begin();
778   MachineFrameInfo &MFI = MF.getFrameInfo();
779   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
780   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
781 
782   MachineModuleInfo &MMI = MF.getMMI();
783   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
784   DebugLoc dl;
785   bool needsCFI = MF.needsFrameMoves();
786 
787   // Get processor type.
788   bool isPPC64 = Subtarget.isPPC64();
789   // Get the ABI.
790   bool isSVR4ABI = Subtarget.isSVR4ABI();
791   bool isAIXABI = Subtarget.isAIXABI();
792   bool isELFv2ABI = Subtarget.isELFv2ABI();
793   assert((Subtarget.isDarwinABI() || isSVR4ABI || isAIXABI) &&
794          "Unsupported PPC ABI.");
795 
796   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
797   // process it.
798   if (!isSVR4ABI)
799     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
800       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
801         if (isAIXABI)
802           report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
803         HandleVRSaveUpdate(*MBBI, TII);
804         break;
805       }
806     }
807 
808   // Move MBBI back to the beginning of the prologue block.
809   MBBI = MBB.begin();
810 
811   // Work out frame sizes.
812   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
813   int NegFrameSize = -FrameSize;
814   if (!isInt<32>(NegFrameSize))
815     llvm_unreachable("Unhandled stack size!");
816 
817   if (MFI.isFrameAddressTaken())
818     replaceFPWithRealFP(MF);
819 
820   // Check if the link register (LR) must be saved.
821   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
822   bool MustSaveLR = FI->mustSaveLR();
823   bool MustSaveTOC = FI->mustSaveTOC();
824   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
825   bool MustSaveCR = !MustSaveCRs.empty();
826   // Do we have a frame pointer and/or base pointer for this function?
827   bool HasFP = hasFP(MF);
828   bool HasBP = RegInfo->hasBasePointer(MF);
829   bool HasRedZone = isPPC64 || !isSVR4ABI;
830 
831   unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
832   Register BPReg = RegInfo->getBaseRegister(MF);
833   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
834   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
835   unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
836   unsigned ScratchReg  = 0;
837   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
838   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
839   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
840                                                 : PPC::MFLR );
841   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
842                                                  : PPC::STW );
843   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
844                                                      : PPC::STWU );
845   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
846                                                         : PPC::STWUX);
847   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
848                                                           : PPC::LIS );
849   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
850                                                  : PPC::ORI );
851   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
852                                               : PPC::OR );
853   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
854                                                             : PPC::SUBFC);
855   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
856                                                                : PPC::SUBFIC);
857 
858   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
859   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
860   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
861   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
862   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
863          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
864 
865   // Using the same bool variable as below to suppress compiler warnings.
866   bool SingleScratchReg =
867     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
868                         &ScratchReg, &TempReg);
869   assert(SingleScratchReg &&
870          "Required number of registers not available in this block");
871 
872   SingleScratchReg = ScratchReg == TempReg;
873 
874   int LROffset = getReturnSaveOffset();
875 
876   int FPOffset = 0;
877   if (HasFP) {
878     if (isSVR4ABI) {
879       MachineFrameInfo &MFI = MF.getFrameInfo();
880       int FPIndex = FI->getFramePointerSaveIndex();
881       assert(FPIndex && "No Frame Pointer Save Slot!");
882       FPOffset = MFI.getObjectOffset(FPIndex);
883     } else {
884       FPOffset = getFramePointerSaveOffset();
885     }
886   }
887 
888   int BPOffset = 0;
889   if (HasBP) {
890     if (isSVR4ABI) {
891       MachineFrameInfo &MFI = MF.getFrameInfo();
892       int BPIndex = FI->getBasePointerSaveIndex();
893       assert(BPIndex && "No Base Pointer Save Slot!");
894       BPOffset = MFI.getObjectOffset(BPIndex);
895     } else {
896       BPOffset = getBasePointerSaveOffset();
897     }
898   }
899 
900   int PBPOffset = 0;
901   if (FI->usesPICBase()) {
902     MachineFrameInfo &MFI = MF.getFrameInfo();
903     int PBPIndex = FI->getPICBasePointerSaveIndex();
904     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
905     PBPOffset = MFI.getObjectOffset(PBPIndex);
906   }
907 
908   // Get stack alignments.
909   unsigned MaxAlign = MFI.getMaxAlignment();
910   if (HasBP && MaxAlign > 1)
911     assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
912            "Invalid alignment!");
913 
914   // Frames of 32KB & larger require special handling because they cannot be
915   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
916   bool isLargeFrame = !isInt<16>(NegFrameSize);
917 
918   assert((isPPC64 || !MustSaveCR) &&
919          "Prologue CR saving supported only in 64-bit mode");
920 
921   if (MustSaveCR && isAIXABI)
922     report_fatal_error("Prologue CR saving is unimplemented on AIX.");
923 
924   // Check if we can move the stack update instruction (stdu) down the prologue
925   // past the callee saves. Hopefully this will avoid the situation where the
926   // saves are waiting for the update on the store with update to complete.
927   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
928   bool MovingStackUpdateDown = false;
929 
930   // Check if we can move the stack update.
931   if (stackUpdateCanBeMoved(MF)) {
932     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
933     for (CalleeSavedInfo CSI : Info) {
934       int FrIdx = CSI.getFrameIdx();
935       // If the frame index is not negative the callee saved info belongs to a
936       // stack object that is not a fixed stack object. We ignore non-fixed
937       // stack objects because we won't move the stack update pointer past them.
938       if (FrIdx >= 0)
939         continue;
940 
941       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
942         StackUpdateLoc++;
943         MovingStackUpdateDown = true;
944       } else {
945         // We need all of the Frame Indices to meet these conditions.
946         // If they do not, abort the whole operation.
947         StackUpdateLoc = MBBI;
948         MovingStackUpdateDown = false;
949         break;
950       }
951     }
952 
953     // If the operation was not aborted then update the object offset.
954     if (MovingStackUpdateDown) {
955       for (CalleeSavedInfo CSI : Info) {
956         int FrIdx = CSI.getFrameIdx();
957         if (FrIdx < 0)
958           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
959       }
960     }
961   }
962 
963   // If we need to spill the CR and the LR but we don't have two separate
964   // registers available, we must spill them one at a time
965   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
966     // In the ELFv2 ABI, we are not required to save all CR fields.
967     // If only one or two CR fields are clobbered, it is more efficient to use
968     // mfocrf to selectively save just those fields, because mfocrf has short
969     // latency compares to mfcr.
970     unsigned MfcrOpcode = PPC::MFCR8;
971     unsigned CrState = RegState::ImplicitKill;
972     if (isELFv2ABI && MustSaveCRs.size() == 1) {
973       MfcrOpcode = PPC::MFOCRF8;
974       CrState = RegState::Kill;
975     }
976     MachineInstrBuilder MIB =
977       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
978     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
979       MIB.addReg(MustSaveCRs[i], CrState);
980     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
981       .addReg(TempReg, getKillRegState(true))
982       .addImm(getCRSaveOffset())
983       .addReg(SPReg);
984   }
985 
986   if (MustSaveLR)
987     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
988 
989   if (MustSaveCR &&
990       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
991     // In the ELFv2 ABI, we are not required to save all CR fields.
992     // If only one or two CR fields are clobbered, it is more efficient to use
993     // mfocrf to selectively save just those fields, because mfocrf has short
994     // latency compares to mfcr.
995     unsigned MfcrOpcode = PPC::MFCR8;
996     unsigned CrState = RegState::ImplicitKill;
997     if (isELFv2ABI && MustSaveCRs.size() == 1) {
998       MfcrOpcode = PPC::MFOCRF8;
999       CrState = RegState::Kill;
1000     }
1001     MachineInstrBuilder MIB =
1002       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
1003     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1004       MIB.addReg(MustSaveCRs[i], CrState);
1005   }
1006 
1007   if (HasRedZone) {
1008     if (HasFP)
1009       BuildMI(MBB, MBBI, dl, StoreInst)
1010         .addReg(FPReg)
1011         .addImm(FPOffset)
1012         .addReg(SPReg);
1013     if (FI->usesPICBase())
1014       BuildMI(MBB, MBBI, dl, StoreInst)
1015         .addReg(PPC::R30)
1016         .addImm(PBPOffset)
1017         .addReg(SPReg);
1018     if (HasBP)
1019       BuildMI(MBB, MBBI, dl, StoreInst)
1020         .addReg(BPReg)
1021         .addImm(BPOffset)
1022         .addReg(SPReg);
1023   }
1024 
1025   if (MustSaveLR)
1026     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
1027       .addReg(ScratchReg, getKillRegState(true))
1028       .addImm(LROffset)
1029       .addReg(SPReg);
1030 
1031   if (MustSaveCR &&
1032       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
1033     assert(HasRedZone && "A red zone is always available on PPC64");
1034     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
1035       .addReg(TempReg, getKillRegState(true))
1036       .addImm(getCRSaveOffset())
1037       .addReg(SPReg);
1038   }
1039 
1040   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1041   if (!FrameSize)
1042     return;
1043 
1044   // Adjust stack pointer: r1 += NegFrameSize.
1045   // If there is a preferred stack alignment, align R1 now
1046 
1047   if (HasBP && HasRedZone) {
1048     // Save a copy of r1 as the base pointer.
1049     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1050       .addReg(SPReg)
1051       .addReg(SPReg);
1052   }
1053 
1054   // Have we generated a STUX instruction to claim stack frame? If so,
1055   // the negated frame size will be placed in ScratchReg.
1056   bool HasSTUX = false;
1057 
1058   // This condition must be kept in sync with canUseAsPrologue.
1059   if (HasBP && MaxAlign > 1) {
1060     if (isPPC64)
1061       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1062         .addReg(SPReg)
1063         .addImm(0)
1064         .addImm(64 - Log2_32(MaxAlign));
1065     else // PPC32...
1066       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1067         .addReg(SPReg)
1068         .addImm(0)
1069         .addImm(32 - Log2_32(MaxAlign))
1070         .addImm(31);
1071     if (!isLargeFrame) {
1072       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1073         .addReg(ScratchReg, RegState::Kill)
1074         .addImm(NegFrameSize);
1075     } else {
1076       assert(!SingleScratchReg && "Only a single scratch reg available");
1077       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1078         .addImm(NegFrameSize >> 16);
1079       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1080         .addReg(TempReg, RegState::Kill)
1081         .addImm(NegFrameSize & 0xFFFF);
1082       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1083         .addReg(ScratchReg, RegState::Kill)
1084         .addReg(TempReg, RegState::Kill);
1085     }
1086 
1087     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1088       .addReg(SPReg, RegState::Kill)
1089       .addReg(SPReg)
1090       .addReg(ScratchReg);
1091     HasSTUX = true;
1092 
1093   } else if (!isLargeFrame) {
1094     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1095       .addReg(SPReg)
1096       .addImm(NegFrameSize)
1097       .addReg(SPReg);
1098 
1099   } else {
1100     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1101       .addImm(NegFrameSize >> 16);
1102     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1103       .addReg(ScratchReg, RegState::Kill)
1104       .addImm(NegFrameSize & 0xFFFF);
1105     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1106       .addReg(SPReg, RegState::Kill)
1107       .addReg(SPReg)
1108       .addReg(ScratchReg);
1109     HasSTUX = true;
1110   }
1111 
1112   // Save the TOC register after the stack pointer update if a prologue TOC
1113   // save is required for the function.
1114   if (MustSaveTOC) {
1115     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1116     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1117       .addReg(TOCReg, getKillRegState(true))
1118       .addImm(TOCSaveOffset)
1119       .addReg(SPReg);
1120   }
1121 
1122   if (!HasRedZone) {
1123     assert(!isPPC64 && "A red zone is always available on PPC64");
1124     if (HasSTUX) {
1125       // The negated frame size is in ScratchReg, and the SPReg has been
1126       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1127       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1128       // the stack frame (i.e. the old SP), ideally, we would put the old
1129       // SP into a register and use it as the base for the stores. The
1130       // problem is that the only available register may be ScratchReg,
1131       // which could be R0, and R0 cannot be used as a base address.
1132 
1133       // First, set ScratchReg to the old SP. This may need to be modified
1134       // later.
1135       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1136         .addReg(ScratchReg, RegState::Kill)
1137         .addReg(SPReg);
1138 
1139       if (ScratchReg == PPC::R0) {
1140         // R0 cannot be used as a base register, but it can be used as an
1141         // index in a store-indexed.
1142         int LastOffset = 0;
1143         if (HasFP)  {
1144           // R0 += (FPOffset-LastOffset).
1145           // Need addic, since addi treats R0 as 0.
1146           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1147             .addReg(ScratchReg)
1148             .addImm(FPOffset-LastOffset);
1149           LastOffset = FPOffset;
1150           // Store FP into *R0.
1151           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1152             .addReg(FPReg, RegState::Kill)  // Save FP.
1153             .addReg(PPC::ZERO)
1154             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1155         }
1156         if (FI->usesPICBase()) {
1157           // R0 += (PBPOffset-LastOffset).
1158           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1159             .addReg(ScratchReg)
1160             .addImm(PBPOffset-LastOffset);
1161           LastOffset = PBPOffset;
1162           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1163             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1164             .addReg(PPC::ZERO)
1165             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1166         }
1167         if (HasBP) {
1168           // R0 += (BPOffset-LastOffset).
1169           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1170             .addReg(ScratchReg)
1171             .addImm(BPOffset-LastOffset);
1172           LastOffset = BPOffset;
1173           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1174             .addReg(BPReg, RegState::Kill)  // Save BP.
1175             .addReg(PPC::ZERO)
1176             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1177           // BP = R0-LastOffset
1178           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1179             .addReg(ScratchReg, RegState::Kill)
1180             .addImm(-LastOffset);
1181         }
1182       } else {
1183         // ScratchReg is not R0, so use it as the base register. It is
1184         // already set to the old SP, so we can use the offsets directly.
1185 
1186         // Now that the stack frame has been allocated, save all the necessary
1187         // registers using ScratchReg as the base address.
1188         if (HasFP)
1189           BuildMI(MBB, MBBI, dl, StoreInst)
1190             .addReg(FPReg)
1191             .addImm(FPOffset)
1192             .addReg(ScratchReg);
1193         if (FI->usesPICBase())
1194           BuildMI(MBB, MBBI, dl, StoreInst)
1195             .addReg(PPC::R30)
1196             .addImm(PBPOffset)
1197             .addReg(ScratchReg);
1198         if (HasBP) {
1199           BuildMI(MBB, MBBI, dl, StoreInst)
1200             .addReg(BPReg)
1201             .addImm(BPOffset)
1202             .addReg(ScratchReg);
1203           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1204             .addReg(ScratchReg, RegState::Kill)
1205             .addReg(ScratchReg);
1206         }
1207       }
1208     } else {
1209       // The frame size is a known 16-bit constant (fitting in the immediate
1210       // field of STWU). To be here we have to be compiling for PPC32.
1211       // Since the SPReg has been decreased by FrameSize, add it back to each
1212       // offset.
1213       if (HasFP)
1214         BuildMI(MBB, MBBI, dl, StoreInst)
1215           .addReg(FPReg)
1216           .addImm(FrameSize + FPOffset)
1217           .addReg(SPReg);
1218       if (FI->usesPICBase())
1219         BuildMI(MBB, MBBI, dl, StoreInst)
1220           .addReg(PPC::R30)
1221           .addImm(FrameSize + PBPOffset)
1222           .addReg(SPReg);
1223       if (HasBP) {
1224         BuildMI(MBB, MBBI, dl, StoreInst)
1225           .addReg(BPReg)
1226           .addImm(FrameSize + BPOffset)
1227           .addReg(SPReg);
1228         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1229           .addReg(SPReg)
1230           .addImm(FrameSize);
1231       }
1232     }
1233   }
1234 
1235   // Add Call Frame Information for the instructions we generated above.
1236   if (needsCFI) {
1237     unsigned CFIIndex;
1238 
1239     if (HasBP) {
1240       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1241       // because if the stack needed aligning then CFA won't be at a fixed
1242       // offset from FP/SP.
1243       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1244       CFIIndex = MF.addFrameInst(
1245           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1246     } else {
1247       // Adjust the definition of CFA to account for the change in SP.
1248       assert(NegFrameSize);
1249       CFIIndex = MF.addFrameInst(
1250           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1251     }
1252     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1253         .addCFIIndex(CFIIndex);
1254 
1255     if (HasFP) {
1256       // Describe where FP was saved, at a fixed offset from CFA.
1257       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1258       CFIIndex = MF.addFrameInst(
1259           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1260       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1261           .addCFIIndex(CFIIndex);
1262     }
1263 
1264     if (FI->usesPICBase()) {
1265       // Describe where FP was saved, at a fixed offset from CFA.
1266       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1267       CFIIndex = MF.addFrameInst(
1268           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1269       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1270           .addCFIIndex(CFIIndex);
1271     }
1272 
1273     if (HasBP) {
1274       // Describe where BP was saved, at a fixed offset from CFA.
1275       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1276       CFIIndex = MF.addFrameInst(
1277           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1278       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1279           .addCFIIndex(CFIIndex);
1280     }
1281 
1282     if (MustSaveLR) {
1283       // Describe where LR was saved, at a fixed offset from CFA.
1284       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1285       CFIIndex = MF.addFrameInst(
1286           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1287       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1288           .addCFIIndex(CFIIndex);
1289     }
1290   }
1291 
1292   // If there is a frame pointer, copy R1 into R31
1293   if (HasFP) {
1294     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1295       .addReg(SPReg)
1296       .addReg(SPReg);
1297 
1298     if (!HasBP && needsCFI) {
1299       // Change the definition of CFA from SP+offset to FP+offset, because SP
1300       // will change at every alloca.
1301       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1302       unsigned CFIIndex = MF.addFrameInst(
1303           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1304 
1305       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1306           .addCFIIndex(CFIIndex);
1307     }
1308   }
1309 
1310   if (needsCFI) {
1311     // Describe where callee saved registers were saved, at fixed offsets from
1312     // CFA.
1313     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1314     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1315       unsigned Reg = CSI[I].getReg();
1316       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1317 
1318       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1319       // subregisters of CR2. We just need to emit a move of CR2.
1320       if (PPC::CRBITRCRegClass.contains(Reg))
1321         continue;
1322 
1323       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1324         continue;
1325 
1326       // For SVR4, don't emit a move for the CR spill slot if we haven't
1327       // spilled CRs.
1328       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1329           && !MustSaveCR)
1330         continue;
1331 
1332       // For 64-bit SVR4 when we have spilled CRs, the spill location
1333       // is SP+8, not a frame-relative slot.
1334       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1335         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1336         // the whole CR word.  In the ELFv2 ABI, every CR that was
1337         // actually saved gets its own CFI record.
1338         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1339         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1340             nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset()));
1341         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1342             .addCFIIndex(CFIIndex);
1343         continue;
1344       }
1345 
1346       if (CSI[I].isSpilledToReg()) {
1347         unsigned SpilledReg = CSI[I].getDstReg();
1348         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1349             nullptr, MRI->getDwarfRegNum(Reg, true),
1350             MRI->getDwarfRegNum(SpilledReg, true)));
1351         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1352           .addCFIIndex(CFIRegister);
1353       } else {
1354         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1355         // We have changed the object offset above but we do not want to change
1356         // the actual offsets in the CFI instruction so we have to undo the
1357         // offset change here.
1358         if (MovingStackUpdateDown)
1359           Offset -= NegFrameSize;
1360 
1361         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1362             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1363         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1364             .addCFIIndex(CFIIndex);
1365       }
1366     }
1367   }
1368 }
1369 
1370 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1371                                     MachineBasicBlock &MBB) const {
1372   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1373   DebugLoc dl;
1374 
1375   if (MBBI != MBB.end())
1376     dl = MBBI->getDebugLoc();
1377 
1378   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1379   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1380 
1381   // Get alignment info so we know how to restore the SP.
1382   const MachineFrameInfo &MFI = MF.getFrameInfo();
1383 
1384   // Get the number of bytes allocated from the FrameInfo.
1385   int FrameSize = MFI.getStackSize();
1386 
1387   // Get processor type.
1388   bool isPPC64 = Subtarget.isPPC64();
1389   // Get the ABI.
1390   bool isSVR4ABI = Subtarget.isSVR4ABI();
1391 
1392   // Check if the link register (LR) has been saved.
1393   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1394   bool MustSaveLR = FI->mustSaveLR();
1395   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1396   bool MustSaveCR = !MustSaveCRs.empty();
1397   // Do we have a frame pointer and/or base pointer for this function?
1398   bool HasFP = hasFP(MF);
1399   bool HasBP = RegInfo->hasBasePointer(MF);
1400   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1401 
1402   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1403   Register BPReg = RegInfo->getBaseRegister(MF);
1404   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1405   unsigned ScratchReg = 0;
1406   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1407   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1408                                                  : PPC::MTLR );
1409   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1410                                                  : PPC::LWZ );
1411   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1412                                                            : PPC::LIS );
1413   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1414                                               : PPC::OR );
1415   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1416                                                   : PPC::ORI );
1417   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1418                                                    : PPC::ADDI );
1419   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1420                                                 : PPC::ADD4 );
1421 
1422   int LROffset = getReturnSaveOffset();
1423 
1424   int FPOffset = 0;
1425 
1426   // Using the same bool variable as below to suppress compiler warnings.
1427   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1428                                               &TempReg);
1429   assert(SingleScratchReg &&
1430          "Could not find an available scratch register");
1431 
1432   SingleScratchReg = ScratchReg == TempReg;
1433 
1434   if (HasFP) {
1435     if (isSVR4ABI) {
1436       int FPIndex = FI->getFramePointerSaveIndex();
1437       assert(FPIndex && "No Frame Pointer Save Slot!");
1438       FPOffset = MFI.getObjectOffset(FPIndex);
1439     } else {
1440       FPOffset = getFramePointerSaveOffset();
1441     }
1442   }
1443 
1444   int BPOffset = 0;
1445   if (HasBP) {
1446     if (isSVR4ABI) {
1447       int BPIndex = FI->getBasePointerSaveIndex();
1448       assert(BPIndex && "No Base Pointer Save Slot!");
1449       BPOffset = MFI.getObjectOffset(BPIndex);
1450     } else {
1451       BPOffset = getBasePointerSaveOffset();
1452     }
1453   }
1454 
1455   int PBPOffset = 0;
1456   if (FI->usesPICBase()) {
1457     int PBPIndex = FI->getPICBasePointerSaveIndex();
1458     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1459     PBPOffset = MFI.getObjectOffset(PBPIndex);
1460   }
1461 
1462   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1463 
1464   if (IsReturnBlock) {
1465     unsigned RetOpcode = MBBI->getOpcode();
1466     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1467                       RetOpcode == PPC::TCRETURNdi ||
1468                       RetOpcode == PPC::TCRETURNai ||
1469                       RetOpcode == PPC::TCRETURNri8 ||
1470                       RetOpcode == PPC::TCRETURNdi8 ||
1471                       RetOpcode == PPC::TCRETURNai8;
1472 
1473     if (UsesTCRet) {
1474       int MaxTCRetDelta = FI->getTailCallSPDelta();
1475       MachineOperand &StackAdjust = MBBI->getOperand(1);
1476       assert(StackAdjust.isImm() && "Expecting immediate value.");
1477       // Adjust stack pointer.
1478       int StackAdj = StackAdjust.getImm();
1479       int Delta = StackAdj - MaxTCRetDelta;
1480       assert((Delta >= 0) && "Delta must be positive");
1481       if (MaxTCRetDelta>0)
1482         FrameSize += (StackAdj +Delta);
1483       else
1484         FrameSize += StackAdj;
1485     }
1486   }
1487 
1488   // Frames of 32KB & larger require special handling because they cannot be
1489   // indexed into with a simple LD/LWZ immediate offset operand.
1490   bool isLargeFrame = !isInt<16>(FrameSize);
1491 
1492   // On targets without red zone, the SP needs to be restored last, so that
1493   // all live contents of the stack frame are upwards of the SP. This means
1494   // that we cannot restore SP just now, since there may be more registers
1495   // to restore from the stack frame (e.g. R31). If the frame size is not
1496   // a simple immediate value, we will need a spare register to hold the
1497   // restored SP. If the frame size is known and small, we can simply adjust
1498   // the offsets of the registers to be restored, and still use SP to restore
1499   // them. In such case, the final update of SP will be to add the frame
1500   // size to it.
1501   // To simplify the code, set RBReg to the base register used to restore
1502   // values from the stack, and set SPAdd to the value that needs to be added
1503   // to the SP at the end. The default values are as if red zone was present.
1504   unsigned RBReg = SPReg;
1505   unsigned SPAdd = 0;
1506 
1507   // Check if we can move the stack update instruction up the epilogue
1508   // past the callee saves. This will allow the move to LR instruction
1509   // to be executed before the restores of the callee saves which means
1510   // that the callee saves can hide the latency from the MTLR instrcution.
1511   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1512   if (stackUpdateCanBeMoved(MF)) {
1513     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1514     for (CalleeSavedInfo CSI : Info) {
1515       int FrIdx = CSI.getFrameIdx();
1516       // If the frame index is not negative the callee saved info belongs to a
1517       // stack object that is not a fixed stack object. We ignore non-fixed
1518       // stack objects because we won't move the update of the stack pointer
1519       // past them.
1520       if (FrIdx >= 0)
1521         continue;
1522 
1523       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1524         StackUpdateLoc--;
1525       else {
1526         // Abort the operation as we can't update all CSR restores.
1527         StackUpdateLoc = MBBI;
1528         break;
1529       }
1530     }
1531   }
1532 
1533   if (FrameSize) {
1534     // In the prologue, the loaded (or persistent) stack pointer value is
1535     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1536     // zone add this offset back now.
1537 
1538     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1539     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1540     // call which invalidates the stack pointer value in SP(0). So we use the
1541     // value of R31 in this case.
1542     if (FI->hasFastCall()) {
1543       assert(HasFP && "Expecting a valid frame pointer.");
1544       if (!HasRedZone)
1545         RBReg = FPReg;
1546       if (!isLargeFrame) {
1547         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1548           .addReg(FPReg).addImm(FrameSize);
1549       } else {
1550         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1551           .addImm(FrameSize >> 16);
1552         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1553           .addReg(ScratchReg, RegState::Kill)
1554           .addImm(FrameSize & 0xFFFF);
1555         BuildMI(MBB, MBBI, dl, AddInst)
1556           .addReg(RBReg)
1557           .addReg(FPReg)
1558           .addReg(ScratchReg);
1559       }
1560     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1561       if (HasRedZone) {
1562         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1563           .addReg(SPReg)
1564           .addImm(FrameSize);
1565       } else {
1566         // Make sure that adding FrameSize will not overflow the max offset
1567         // size.
1568         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1569                "Local offsets should be negative");
1570         SPAdd = FrameSize;
1571         FPOffset += FrameSize;
1572         BPOffset += FrameSize;
1573         PBPOffset += FrameSize;
1574       }
1575     } else {
1576       // We don't want to use ScratchReg as a base register, because it
1577       // could happen to be R0. Use FP instead, but make sure to preserve it.
1578       if (!HasRedZone) {
1579         // If FP is not saved, copy it to ScratchReg.
1580         if (!HasFP)
1581           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1582             .addReg(FPReg)
1583             .addReg(FPReg);
1584         RBReg = FPReg;
1585       }
1586       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1587         .addImm(0)
1588         .addReg(SPReg);
1589     }
1590   }
1591   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1592   // If there is no red zone, ScratchReg may be needed for holding a useful
1593   // value (although not the base register). Make sure it is not overwritten
1594   // too early.
1595 
1596   assert((isPPC64 || !MustSaveCR) &&
1597          "Epilogue CR restoring supported only in 64-bit mode");
1598 
1599   // If we need to restore both the LR and the CR and we only have one
1600   // available scratch register, we must do them one at a time.
1601   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1602     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1603     // is live here.
1604     assert(HasRedZone && "Expecting red zone");
1605     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1606       .addImm(getCRSaveOffset())
1607       .addReg(SPReg);
1608     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1609       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1610         .addReg(TempReg, getKillRegState(i == e-1));
1611   }
1612 
1613   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1614   // LR is stored in the caller's stack frame. ScratchReg will be needed
1615   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1616   // a base register anyway, because it may happen to be R0.
1617   bool LoadedLR = false;
1618   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1619     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1620       .addImm(LROffset+SPAdd)
1621       .addReg(RBReg);
1622     LoadedLR = true;
1623   }
1624 
1625   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1626     // This will only occur for PPC64.
1627     assert(isPPC64 && "Expecting 64-bit mode");
1628     assert(RBReg == SPReg && "Should be using SP as a base register");
1629     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1630       .addImm(getCRSaveOffset())
1631       .addReg(RBReg);
1632   }
1633 
1634   if (HasFP) {
1635     // If there is red zone, restore FP directly, since SP has already been
1636     // restored. Otherwise, restore the value of FP into ScratchReg.
1637     if (HasRedZone || RBReg == SPReg)
1638       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1639         .addImm(FPOffset)
1640         .addReg(SPReg);
1641     else
1642       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1643         .addImm(FPOffset)
1644         .addReg(RBReg);
1645   }
1646 
1647   if (FI->usesPICBase())
1648     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1649       .addImm(PBPOffset)
1650       .addReg(RBReg);
1651 
1652   if (HasBP)
1653     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1654       .addImm(BPOffset)
1655       .addReg(RBReg);
1656 
1657   // There is nothing more to be loaded from the stack, so now we can
1658   // restore SP: SP = RBReg + SPAdd.
1659   if (RBReg != SPReg || SPAdd != 0) {
1660     assert(!HasRedZone && "This should not happen with red zone");
1661     // If SPAdd is 0, generate a copy.
1662     if (SPAdd == 0)
1663       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1664         .addReg(RBReg)
1665         .addReg(RBReg);
1666     else
1667       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1668         .addReg(RBReg)
1669         .addImm(SPAdd);
1670 
1671     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1672     if (RBReg == FPReg)
1673       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1674         .addReg(ScratchReg)
1675         .addReg(ScratchReg);
1676 
1677     // Now load the LR from the caller's stack frame.
1678     if (MustSaveLR && !LoadedLR)
1679       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1680         .addImm(LROffset)
1681         .addReg(SPReg);
1682   }
1683 
1684   if (MustSaveCR &&
1685       !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1686     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1687       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1688         .addReg(TempReg, getKillRegState(i == e-1));
1689 
1690   if (MustSaveLR)
1691     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1692 
1693   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1694   // call optimization
1695   if (IsReturnBlock) {
1696     unsigned RetOpcode = MBBI->getOpcode();
1697     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1698         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1699         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1700       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1701       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1702 
1703       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1704         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1705           .addReg(SPReg).addImm(CallerAllocatedAmt);
1706       } else {
1707         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1708           .addImm(CallerAllocatedAmt >> 16);
1709         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1710           .addReg(ScratchReg, RegState::Kill)
1711           .addImm(CallerAllocatedAmt & 0xFFFF);
1712         BuildMI(MBB, MBBI, dl, AddInst)
1713           .addReg(SPReg)
1714           .addReg(FPReg)
1715           .addReg(ScratchReg);
1716       }
1717     } else {
1718       createTailCallBranchInstr(MBB);
1719     }
1720   }
1721 }
1722 
1723 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1724   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1725 
1726   // If we got this far a first terminator should exist.
1727   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1728 
1729   DebugLoc dl = MBBI->getDebugLoc();
1730   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1731 
1732   // Create branch instruction for pseudo tail call return instruction
1733   unsigned RetOpcode = MBBI->getOpcode();
1734   if (RetOpcode == PPC::TCRETURNdi) {
1735     MBBI = MBB.getLastNonDebugInstr();
1736     MachineOperand &JumpTarget = MBBI->getOperand(0);
1737     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1738       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1739   } else if (RetOpcode == PPC::TCRETURNri) {
1740     MBBI = MBB.getLastNonDebugInstr();
1741     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1742     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1743   } else if (RetOpcode == PPC::TCRETURNai) {
1744     MBBI = MBB.getLastNonDebugInstr();
1745     MachineOperand &JumpTarget = MBBI->getOperand(0);
1746     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1747   } else if (RetOpcode == PPC::TCRETURNdi8) {
1748     MBBI = MBB.getLastNonDebugInstr();
1749     MachineOperand &JumpTarget = MBBI->getOperand(0);
1750     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1751       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1752   } else if (RetOpcode == PPC::TCRETURNri8) {
1753     MBBI = MBB.getLastNonDebugInstr();
1754     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1755     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1756   } else if (RetOpcode == PPC::TCRETURNai8) {
1757     MBBI = MBB.getLastNonDebugInstr();
1758     MachineOperand &JumpTarget = MBBI->getOperand(0);
1759     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1760   }
1761 }
1762 
1763 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1764                                             BitVector &SavedRegs,
1765                                             RegScavenger *RS) const {
1766   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1767 
1768   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1769 
1770   //  Save and clear the LR state.
1771   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1772   unsigned LR = RegInfo->getRARegister();
1773   FI->setMustSaveLR(MustSaveLR(MF, LR));
1774   SavedRegs.reset(LR);
1775 
1776   //  Save R31 if necessary
1777   int FPSI = FI->getFramePointerSaveIndex();
1778   const bool isPPC64 = Subtarget.isPPC64();
1779   const bool IsDarwinABI  = Subtarget.isDarwinABI();
1780   MachineFrameInfo &MFI = MF.getFrameInfo();
1781 
1782   // If the frame pointer save index hasn't been defined yet.
1783   if (!FPSI && needsFP(MF)) {
1784     // Find out what the fix offset of the frame pointer save area.
1785     int FPOffset = getFramePointerSaveOffset();
1786     // Allocate the frame index for frame pointer save area.
1787     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1788     // Save the result.
1789     FI->setFramePointerSaveIndex(FPSI);
1790   }
1791 
1792   int BPSI = FI->getBasePointerSaveIndex();
1793   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1794     int BPOffset = getBasePointerSaveOffset();
1795     // Allocate the frame index for the base pointer save area.
1796     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1797     // Save the result.
1798     FI->setBasePointerSaveIndex(BPSI);
1799   }
1800 
1801   // Reserve stack space for the PIC Base register (R30).
1802   // Only used in SVR4 32-bit.
1803   if (FI->usesPICBase()) {
1804     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1805     FI->setPICBasePointerSaveIndex(PBPSI);
1806   }
1807 
1808   // Make sure we don't explicitly spill r31, because, for example, we have
1809   // some inline asm which explicitly clobbers it, when we otherwise have a
1810   // frame pointer and are using r31's spill slot for the prologue/epilogue
1811   // code. Same goes for the base pointer and the PIC base register.
1812   if (needsFP(MF))
1813     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1814   if (RegInfo->hasBasePointer(MF))
1815     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1816   if (FI->usesPICBase())
1817     SavedRegs.reset(PPC::R30);
1818 
1819   // Reserve stack space to move the linkage area to in case of a tail call.
1820   int TCSPDelta = 0;
1821   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1822       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1823     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1824   }
1825 
1826   // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
1827   // function uses CR 2, 3, or 4.
1828   if (!isPPC64 && !IsDarwinABI &&
1829       (SavedRegs.test(PPC::CR2) ||
1830        SavedRegs.test(PPC::CR3) ||
1831        SavedRegs.test(PPC::CR4))) {
1832     int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
1833     FI->setCRSpillFrameIndex(FrameIdx);
1834   }
1835 }
1836 
1837 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1838                                                        RegScavenger *RS) const {
1839   // Early exit if not using the SVR4 ABI.
1840   if (!Subtarget.isSVR4ABI()) {
1841     addScavengingSpillSlot(MF, RS);
1842     return;
1843   }
1844 
1845   // Get callee saved register information.
1846   MachineFrameInfo &MFI = MF.getFrameInfo();
1847   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1848 
1849   // If the function is shrink-wrapped, and if the function has a tail call, the
1850   // tail call might not be in the new RestoreBlock, so real branch instruction
1851   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1852   // RestoreBlock. So we handle this case here.
1853   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1854     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1855     for (MachineBasicBlock &MBB : MF) {
1856       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1857         createTailCallBranchInstr(MBB);
1858     }
1859   }
1860 
1861   // Early exit if no callee saved registers are modified!
1862   if (CSI.empty() && !needsFP(MF)) {
1863     addScavengingSpillSlot(MF, RS);
1864     return;
1865   }
1866 
1867   unsigned MinGPR = PPC::R31;
1868   unsigned MinG8R = PPC::X31;
1869   unsigned MinFPR = PPC::F31;
1870   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1871 
1872   bool HasGPSaveArea = false;
1873   bool HasG8SaveArea = false;
1874   bool HasFPSaveArea = false;
1875   bool HasVRSAVESaveArea = false;
1876   bool HasVRSaveArea = false;
1877 
1878   SmallVector<CalleeSavedInfo, 18> GPRegs;
1879   SmallVector<CalleeSavedInfo, 18> G8Regs;
1880   SmallVector<CalleeSavedInfo, 18> FPRegs;
1881   SmallVector<CalleeSavedInfo, 18> VRegs;
1882 
1883   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1884     unsigned Reg = CSI[i].getReg();
1885     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1886             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1887            "Not expecting to try to spill R2 in a function that must save TOC");
1888     if (PPC::GPRCRegClass.contains(Reg)) {
1889       HasGPSaveArea = true;
1890 
1891       GPRegs.push_back(CSI[i]);
1892 
1893       if (Reg < MinGPR) {
1894         MinGPR = Reg;
1895       }
1896     } else if (PPC::G8RCRegClass.contains(Reg)) {
1897       HasG8SaveArea = true;
1898 
1899       G8Regs.push_back(CSI[i]);
1900 
1901       if (Reg < MinG8R) {
1902         MinG8R = Reg;
1903       }
1904     } else if (PPC::F8RCRegClass.contains(Reg)) {
1905       HasFPSaveArea = true;
1906 
1907       FPRegs.push_back(CSI[i]);
1908 
1909       if (Reg < MinFPR) {
1910         MinFPR = Reg;
1911       }
1912     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1913                PPC::CRRCRegClass.contains(Reg)) {
1914       ; // do nothing, as we already know whether CRs are spilled
1915     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1916       HasVRSAVESaveArea = true;
1917     } else if (PPC::VRRCRegClass.contains(Reg) ||
1918                PPC::SPERCRegClass.contains(Reg)) {
1919       // Altivec and SPE are mutually exclusive, but have the same stack
1920       // alignment requirements, so overload the save area for both cases.
1921       HasVRSaveArea = true;
1922 
1923       VRegs.push_back(CSI[i]);
1924 
1925       if (Reg < MinVR) {
1926         MinVR = Reg;
1927       }
1928     } else {
1929       llvm_unreachable("Unknown RegisterClass!");
1930     }
1931   }
1932 
1933   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1934   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1935 
1936   int64_t LowerBound = 0;
1937 
1938   // Take into account stack space reserved for tail calls.
1939   int TCSPDelta = 0;
1940   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1941       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1942     LowerBound = TCSPDelta;
1943   }
1944 
1945   // The Floating-point register save area is right below the back chain word
1946   // of the previous stack frame.
1947   if (HasFPSaveArea) {
1948     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1949       int FI = FPRegs[i].getFrameIdx();
1950 
1951       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1952     }
1953 
1954     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1955   }
1956 
1957   // Check whether the frame pointer register is allocated. If so, make sure it
1958   // is spilled to the correct offset.
1959   if (needsFP(MF)) {
1960     int FI = PFI->getFramePointerSaveIndex();
1961     assert(FI && "No Frame Pointer Save Slot!");
1962     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1963     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1964     HasGPSaveArea = true;
1965   }
1966 
1967   if (PFI->usesPICBase()) {
1968     int FI = PFI->getPICBasePointerSaveIndex();
1969     assert(FI && "No PIC Base Pointer Save Slot!");
1970     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1971 
1972     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1973     HasGPSaveArea = true;
1974   }
1975 
1976   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1977   if (RegInfo->hasBasePointer(MF)) {
1978     int FI = PFI->getBasePointerSaveIndex();
1979     assert(FI && "No Base Pointer Save Slot!");
1980     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1981 
1982     Register BP = RegInfo->getBaseRegister(MF);
1983     if (PPC::G8RCRegClass.contains(BP)) {
1984       MinG8R = std::min<unsigned>(MinG8R, BP);
1985       HasG8SaveArea = true;
1986     } else if (PPC::GPRCRegClass.contains(BP)) {
1987       MinGPR = std::min<unsigned>(MinGPR, BP);
1988       HasGPSaveArea = true;
1989     }
1990   }
1991 
1992   // General register save area starts right below the Floating-point
1993   // register save area.
1994   if (HasGPSaveArea || HasG8SaveArea) {
1995     // Move general register save area spill slots down, taking into account
1996     // the size of the Floating-point register save area.
1997     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1998       if (!GPRegs[i].isSpilledToReg()) {
1999         int FI = GPRegs[i].getFrameIdx();
2000         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2001       }
2002     }
2003 
2004     // Move general register save area spill slots down, taking into account
2005     // the size of the Floating-point register save area.
2006     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2007       if (!G8Regs[i].isSpilledToReg()) {
2008         int FI = G8Regs[i].getFrameIdx();
2009         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2010       }
2011     }
2012 
2013     unsigned MinReg =
2014       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2015                          TRI->getEncodingValue(MinG8R));
2016 
2017     if (Subtarget.isPPC64()) {
2018       LowerBound -= (31 - MinReg + 1) * 8;
2019     } else {
2020       LowerBound -= (31 - MinReg + 1) * 4;
2021     }
2022   }
2023 
2024   // For 32-bit only, the CR save area is below the general register
2025   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2026   // to the stack pointer and hence does not need an adjustment here.
2027   // Only CR2 (the first nonvolatile spilled) has an associated frame
2028   // index so that we have a single uniform save area.
2029   if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
2030     // Adjust the frame index of the CR spill slot.
2031     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2032       unsigned Reg = CSI[i].getReg();
2033 
2034       if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
2035           // Leave Darwin logic as-is.
2036           || (!Subtarget.isSVR4ABI() &&
2037               (PPC::CRBITRCRegClass.contains(Reg) ||
2038                PPC::CRRCRegClass.contains(Reg)))) {
2039         int FI = CSI[i].getFrameIdx();
2040 
2041         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2042       }
2043     }
2044 
2045     LowerBound -= 4; // The CR save area is always 4 bytes long.
2046   }
2047 
2048   if (HasVRSAVESaveArea) {
2049     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2050     //             which have the VRSAVE register class?
2051     // Adjust the frame index of the VRSAVE spill slot.
2052     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2053       unsigned Reg = CSI[i].getReg();
2054 
2055       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2056         int FI = CSI[i].getFrameIdx();
2057 
2058         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2059       }
2060     }
2061 
2062     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2063   }
2064 
2065   // Both Altivec and SPE have the same alignment and padding requirements
2066   // within the stack frame.
2067   if (HasVRSaveArea) {
2068     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2069     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2070     // we are using negative number here (the stack grows downward). We should
2071     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2072     // is the alignment size ( n = 16 here) and y is the size after aligning.
2073     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2074     LowerBound &= ~(15);
2075 
2076     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2077       int FI = VRegs[i].getFrameIdx();
2078 
2079       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2080     }
2081   }
2082 
2083   addScavengingSpillSlot(MF, RS);
2084 }
2085 
2086 void
2087 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2088                                          RegScavenger *RS) const {
2089   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2090   // a large stack, which will require scavenging a register to materialize a
2091   // large offset.
2092 
2093   // We need to have a scavenger spill slot for spills if the frame size is
2094   // large. In case there is no free register for large-offset addressing,
2095   // this slot is used for the necessary emergency spill. Also, we need the
2096   // slot for dynamic stack allocations.
2097 
2098   // The scavenger might be invoked if the frame offset does not fit into
2099   // the 16-bit immediate. We don't know the complete frame size here
2100   // because we've not yet computed callee-saved register spills or the
2101   // needed alignment padding.
2102   unsigned StackSize = determineFrameLayout(MF, true);
2103   MachineFrameInfo &MFI = MF.getFrameInfo();
2104   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2105       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2106     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2107     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2108     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2109     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2110     unsigned Size = TRI.getSpillSize(RC);
2111     unsigned Align = TRI.getSpillAlignment(RC);
2112     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2113 
2114     // Might we have over-aligned allocas?
2115     bool HasAlVars = MFI.hasVarSizedObjects() &&
2116                      MFI.getMaxAlignment() > getStackAlignment();
2117 
2118     // These kinds of spills might need two registers.
2119     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2120       RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2121 
2122   }
2123 }
2124 
2125 // This function checks if a callee saved gpr can be spilled to a volatile
2126 // vector register. This occurs for leaf functions when the option
2127 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2128 // which were not spilled to vectors, return false so the target independent
2129 // code can handle them by assigning a FrameIdx to a stack slot.
2130 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2131     MachineFunction &MF, const TargetRegisterInfo *TRI,
2132     std::vector<CalleeSavedInfo> &CSI) const {
2133 
2134   if (CSI.empty())
2135     return true; // Early exit if no callee saved registers are modified!
2136 
2137   // Early exit if cannot spill gprs to volatile vector registers.
2138   MachineFrameInfo &MFI = MF.getFrameInfo();
2139   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2140     return false;
2141 
2142   // Build a BitVector of VSRs that can be used for spilling GPRs.
2143   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2144   BitVector BVCalleeSaved(TRI->getNumRegs());
2145   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2146   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2147   for (unsigned i = 0; CSRegs[i]; ++i)
2148     BVCalleeSaved.set(CSRegs[i]);
2149 
2150   for (unsigned Reg : BVAllocatable.set_bits()) {
2151     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2152     // used in the function.
2153     if (BVCalleeSaved[Reg] ||
2154         (!PPC::F8RCRegClass.contains(Reg) &&
2155          !PPC::VFRCRegClass.contains(Reg)) ||
2156         (MF.getRegInfo().isPhysRegUsed(Reg)))
2157       BVAllocatable.reset(Reg);
2158   }
2159 
2160   bool AllSpilledToReg = true;
2161   for (auto &CS : CSI) {
2162     if (BVAllocatable.none())
2163       return false;
2164 
2165     unsigned Reg = CS.getReg();
2166     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2167       AllSpilledToReg = false;
2168       continue;
2169     }
2170 
2171     unsigned VolatileVFReg = BVAllocatable.find_first();
2172     if (VolatileVFReg < BVAllocatable.size()) {
2173       CS.setDstReg(VolatileVFReg);
2174       BVAllocatable.reset(VolatileVFReg);
2175     } else {
2176       AllSpilledToReg = false;
2177     }
2178   }
2179   return AllSpilledToReg;
2180 }
2181 
2182 
2183 bool
2184 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
2185                                      MachineBasicBlock::iterator MI,
2186                                      const std::vector<CalleeSavedInfo> &CSI,
2187                                      const TargetRegisterInfo *TRI) const {
2188 
2189   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2190   // Return false otherwise to maintain pre-existing behavior.
2191   if (!Subtarget.isSVR4ABI())
2192     return false;
2193 
2194   MachineFunction *MF = MBB.getParent();
2195   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2196   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2197   bool MustSaveTOC = FI->mustSaveTOC();
2198   DebugLoc DL;
2199   bool CRSpilled = false;
2200   MachineInstrBuilder CRMIB;
2201 
2202   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2203     unsigned Reg = CSI[i].getReg();
2204     // Only Darwin actually uses the VRSAVE register, but it can still appear
2205     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2206     // Darwin, ignore it.
2207     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2208       continue;
2209 
2210     // CR2 through CR4 are the nonvolatile CR fields.
2211     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2212 
2213     // Add the callee-saved register as live-in; it's killed at the spill.
2214     // Do not do this for callee-saved registers that are live-in to the
2215     // function because they will already be marked live-in and this will be
2216     // adding it for a second time. It is an error to add the same register
2217     // to the set more than once.
2218     const MachineRegisterInfo &MRI = MF->getRegInfo();
2219     bool IsLiveIn = MRI.isLiveIn(Reg);
2220     if (!IsLiveIn)
2221        MBB.addLiveIn(Reg);
2222 
2223     if (CRSpilled && IsCRField) {
2224       CRMIB.addReg(Reg, RegState::ImplicitKill);
2225       continue;
2226     }
2227 
2228     // The actual spill will happen in the prologue.
2229     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2230       continue;
2231 
2232     // Insert the spill to the stack frame.
2233     if (IsCRField) {
2234       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2235       if (Subtarget.isPPC64()) {
2236         // The actual spill will happen at the start of the prologue.
2237         FuncInfo->addMustSaveCR(Reg);
2238       } else {
2239         CRSpilled = true;
2240         FuncInfo->setSpillsCR();
2241 
2242         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2243         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2244         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2245                   .addReg(Reg, RegState::ImplicitKill);
2246 
2247         MBB.insert(MI, CRMIB);
2248         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2249                                          .addReg(PPC::R12,
2250                                                  getKillRegState(true)),
2251                                          CSI[i].getFrameIdx()));
2252       }
2253     } else {
2254       if (CSI[i].isSpilledToReg()) {
2255         NumPESpillVSR++;
2256         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2257           .addReg(Reg, getKillRegState(true));
2258       } else {
2259         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2260         // Use !IsLiveIn for the kill flag.
2261         // We do not want to kill registers that are live in this function
2262         // before their use because they will become undefined registers.
2263         TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
2264                                 CSI[i].getFrameIdx(), RC, TRI);
2265       }
2266     }
2267   }
2268   return true;
2269 }
2270 
2271 static void
2272 restoreCRs(bool isPPC64, bool is31,
2273            bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
2274            MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2275            const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
2276 
2277   MachineFunction *MF = MBB.getParent();
2278   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2279   DebugLoc DL;
2280   unsigned RestoreOp, MoveReg;
2281 
2282   if (isPPC64)
2283     // This is handled during epilogue generation.
2284     return;
2285   else {
2286     // 32-bit:  FP-relative
2287     MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
2288                                              PPC::R12),
2289                                      CSI[CSIIndex].getFrameIdx()));
2290     RestoreOp = PPC::MTOCRF;
2291     MoveReg = PPC::R12;
2292   }
2293 
2294   if (CR2Spilled)
2295     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2296                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2297 
2298   if (CR3Spilled)
2299     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2300                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2301 
2302   if (CR4Spilled)
2303     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2304                .addReg(MoveReg, getKillRegState(true)));
2305 }
2306 
2307 MachineBasicBlock::iterator PPCFrameLowering::
2308 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2309                               MachineBasicBlock::iterator I) const {
2310   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2311   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2312       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2313     // Add (actually subtract) back the amount the callee popped on return.
2314     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2315       bool is64Bit = Subtarget.isPPC64();
2316       CalleeAmt *= -1;
2317       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2318       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2319       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2320       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2321       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2322       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2323       const DebugLoc &dl = I->getDebugLoc();
2324 
2325       if (isInt<16>(CalleeAmt)) {
2326         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2327           .addReg(StackReg, RegState::Kill)
2328           .addImm(CalleeAmt);
2329       } else {
2330         MachineBasicBlock::iterator MBBI = I;
2331         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2332           .addImm(CalleeAmt >> 16);
2333         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2334           .addReg(TmpReg, RegState::Kill)
2335           .addImm(CalleeAmt & 0xFFFF);
2336         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2337           .addReg(StackReg, RegState::Kill)
2338           .addReg(TmpReg);
2339       }
2340     }
2341   }
2342   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2343   return MBB.erase(I);
2344 }
2345 
2346 bool
2347 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2348                                         MachineBasicBlock::iterator MI,
2349                                         std::vector<CalleeSavedInfo> &CSI,
2350                                         const TargetRegisterInfo *TRI) const {
2351 
2352   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2353   // Return false otherwise to maintain pre-existing behavior.
2354   if (!Subtarget.isSVR4ABI())
2355     return false;
2356 
2357   MachineFunction *MF = MBB.getParent();
2358   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2359   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2360   bool MustSaveTOC = FI->mustSaveTOC();
2361   bool CR2Spilled = false;
2362   bool CR3Spilled = false;
2363   bool CR4Spilled = false;
2364   unsigned CSIIndex = 0;
2365 
2366   // Initialize insertion-point logic; we will be restoring in reverse
2367   // order of spill.
2368   MachineBasicBlock::iterator I = MI, BeforeI = I;
2369   bool AtStart = I == MBB.begin();
2370 
2371   if (!AtStart)
2372     --BeforeI;
2373 
2374   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2375     unsigned Reg = CSI[i].getReg();
2376 
2377     // Only Darwin actually uses the VRSAVE register, but it can still appear
2378     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2379     // Darwin, ignore it.
2380     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2381       continue;
2382 
2383     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2384       continue;
2385 
2386     if (Reg == PPC::CR2) {
2387       CR2Spilled = true;
2388       // The spill slot is associated only with CR2, which is the
2389       // first nonvolatile spilled.  Save it here.
2390       CSIIndex = i;
2391       continue;
2392     } else if (Reg == PPC::CR3) {
2393       CR3Spilled = true;
2394       continue;
2395     } else if (Reg == PPC::CR4) {
2396       CR4Spilled = true;
2397       continue;
2398     } else {
2399       // When we first encounter a non-CR register after seeing at
2400       // least one CR register, restore all spilled CRs together.
2401       if ((CR2Spilled || CR3Spilled || CR4Spilled)
2402           && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
2403         bool is31 = needsFP(*MF);
2404         restoreCRs(Subtarget.isPPC64(), is31,
2405                    CR2Spilled, CR3Spilled, CR4Spilled,
2406                    MBB, I, CSI, CSIIndex);
2407         CR2Spilled = CR3Spilled = CR4Spilled = false;
2408       }
2409 
2410       if (CSI[i].isSpilledToReg()) {
2411         DebugLoc DL;
2412         NumPEReloadVSR++;
2413         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2414             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2415       } else {
2416        // Default behavior for non-CR saves.
2417         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2418         TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2419         assert(I != MBB.begin() &&
2420                "loadRegFromStackSlot didn't insert any code!");
2421       }
2422     }
2423 
2424     // Insert in reverse order.
2425     if (AtStart)
2426       I = MBB.begin();
2427     else {
2428       I = BeforeI;
2429       ++I;
2430     }
2431   }
2432 
2433   // If we haven't yet spilled the CRs, do so now.
2434   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2435     bool is31 = needsFP(*MF);
2436     restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
2437                MBB, I, CSI, CSIIndex);
2438   }
2439 
2440   return true;
2441 }
2442 
2443 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2444   return TOCSaveOffset;
2445 }
2446 
2447 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2448   if (Subtarget.isAIXABI())
2449     report_fatal_error("FramePointer is not implemented on AIX yet.");
2450   return FramePointerSaveOffset;
2451 }
2452 
2453 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2454   if (Subtarget.isAIXABI())
2455     report_fatal_error("BasePointer is not implemented on AIX yet.");
2456   return BasePointerSaveOffset;
2457 }
2458 
2459 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2460   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2461     return false;
2462   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2463           MF.getSubtarget<PPCSubtarget>().isPPC64());
2464 }
2465