xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 62987288060ff68c817b7056815aa9fb8ba8ecd7)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPCInstrBuilder.h"
16 #include "PPCInstrInfo.h"
17 #include "PPCMachineFunctionInfo.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/RegisterScavenging.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/Target/TargetOptions.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "framelowering"
34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
36 STATISTIC(NumPrologProbed, "Number of prologues probed");
37 
38 static cl::opt<bool>
39 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
40                      cl::desc("Enable spills in prologue to vector registers."),
41                      cl::init(false), cl::Hidden);
42 
computeReturnSaveOffset(const PPCSubtarget & STI)43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
44   if (STI.isAIXABI())
45     return STI.isPPC64() ? 16 : 8;
46   // SVR4 ABI:
47   return STI.isPPC64() ? 16 : 4;
48 }
49 
computeTOCSaveOffset(const PPCSubtarget & STI)50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
51   if (STI.isAIXABI())
52     return STI.isPPC64() ? 40 : 20;
53   return STI.isELFv2ABI() ? 24 : 40;
54 }
55 
computeFramePointerSaveOffset(const PPCSubtarget & STI)56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
57   // First slot in the general register save area.
58   return STI.isPPC64() ? -8U : -4U;
59 }
60 
computeLinkageSize(const PPCSubtarget & STI)61 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
62   if (STI.isAIXABI() || STI.isPPC64())
63     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
64 
65   // 32-bit SVR4 ABI:
66   return 8;
67 }
68 
computeBasePointerSaveOffset(const PPCSubtarget & STI)69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
70   // Third slot in the general purpose register save area.
71   if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent())
72     return -12U;
73 
74   // Second slot in the general purpose register save area.
75   return STI.isPPC64() ? -16U : -8U;
76 }
77 
computeCRSaveOffset(const PPCSubtarget & STI)78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
79   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
80 }
81 
PPCFrameLowering(const PPCSubtarget & STI)82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
83     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
84                           STI.getPlatformStackAlignment(), 0),
85       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
86       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
87       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
88       LinkageSize(computeLinkageSize(Subtarget)),
89       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
90       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
91 
92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
getCalleeSavedSpillSlots(unsigned & NumEntries) const93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
94     unsigned &NumEntries) const {
95 
96 // Floating-point register save area offsets.
97 #define CALLEE_SAVED_FPRS \
98       {PPC::F31, -8},     \
99       {PPC::F30, -16},    \
100       {PPC::F29, -24},    \
101       {PPC::F28, -32},    \
102       {PPC::F27, -40},    \
103       {PPC::F26, -48},    \
104       {PPC::F25, -56},    \
105       {PPC::F24, -64},    \
106       {PPC::F23, -72},    \
107       {PPC::F22, -80},    \
108       {PPC::F21, -88},    \
109       {PPC::F20, -96},    \
110       {PPC::F19, -104},   \
111       {PPC::F18, -112},   \
112       {PPC::F17, -120},   \
113       {PPC::F16, -128},   \
114       {PPC::F15, -136},   \
115       {PPC::F14, -144}
116 
117 // 32-bit general purpose register save area offsets shared by ELF and
118 // AIX. AIX has an extra CSR with r13.
119 #define CALLEE_SAVED_GPRS32 \
120       {PPC::R31, -4},       \
121       {PPC::R30, -8},       \
122       {PPC::R29, -12},      \
123       {PPC::R28, -16},      \
124       {PPC::R27, -20},      \
125       {PPC::R26, -24},      \
126       {PPC::R25, -28},      \
127       {PPC::R24, -32},      \
128       {PPC::R23, -36},      \
129       {PPC::R22, -40},      \
130       {PPC::R21, -44},      \
131       {PPC::R20, -48},      \
132       {PPC::R19, -52},      \
133       {PPC::R18, -56},      \
134       {PPC::R17, -60},      \
135       {PPC::R16, -64},      \
136       {PPC::R15, -68},      \
137       {PPC::R14, -72}
138 
139 // 64-bit general purpose register save area offsets.
140 #define CALLEE_SAVED_GPRS64 \
141       {PPC::X31, -8},       \
142       {PPC::X30, -16},      \
143       {PPC::X29, -24},      \
144       {PPC::X28, -32},      \
145       {PPC::X27, -40},      \
146       {PPC::X26, -48},      \
147       {PPC::X25, -56},      \
148       {PPC::X24, -64},      \
149       {PPC::X23, -72},      \
150       {PPC::X22, -80},      \
151       {PPC::X21, -88},      \
152       {PPC::X20, -96},      \
153       {PPC::X19, -104},     \
154       {PPC::X18, -112},     \
155       {PPC::X17, -120},     \
156       {PPC::X16, -128},     \
157       {PPC::X15, -136},     \
158       {PPC::X14, -144}
159 
160 // Vector register save area offsets.
161 #define CALLEE_SAVED_VRS \
162       {PPC::V31, -16},   \
163       {PPC::V30, -32},   \
164       {PPC::V29, -48},   \
165       {PPC::V28, -64},   \
166       {PPC::V27, -80},   \
167       {PPC::V26, -96},   \
168       {PPC::V25, -112},  \
169       {PPC::V24, -128},  \
170       {PPC::V23, -144},  \
171       {PPC::V22, -160},  \
172       {PPC::V21, -176},  \
173       {PPC::V20, -192}
174 
175   // Note that the offsets here overlap, but this is fixed up in
176   // processFunctionBeforeFrameFinalized.
177 
178   static const SpillSlot ELFOffsets32[] = {
179       CALLEE_SAVED_FPRS,
180       CALLEE_SAVED_GPRS32,
181 
182       // CR save area offset.  We map each of the nonvolatile CR fields
183       // to the slot for CR2, which is the first of the nonvolatile CR
184       // fields to be assigned, so that we only allocate one save slot.
185       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
186       {PPC::CR2, -4},
187 
188       // VRSAVE save area offset.
189       {PPC::VRSAVE, -4},
190 
191       CALLEE_SAVED_VRS,
192 
193       // SPE register save area (overlaps Vector save area).
194       {PPC::S31, -8},
195       {PPC::S30, -16},
196       {PPC::S29, -24},
197       {PPC::S28, -32},
198       {PPC::S27, -40},
199       {PPC::S26, -48},
200       {PPC::S25, -56},
201       {PPC::S24, -64},
202       {PPC::S23, -72},
203       {PPC::S22, -80},
204       {PPC::S21, -88},
205       {PPC::S20, -96},
206       {PPC::S19, -104},
207       {PPC::S18, -112},
208       {PPC::S17, -120},
209       {PPC::S16, -128},
210       {PPC::S15, -136},
211       {PPC::S14, -144}};
212 
213   static const SpillSlot ELFOffsets64[] = {
214       CALLEE_SAVED_FPRS,
215       CALLEE_SAVED_GPRS64,
216 
217       // VRSAVE save area offset.
218       {PPC::VRSAVE, -4},
219       CALLEE_SAVED_VRS
220   };
221 
222   static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS,
223                                            CALLEE_SAVED_GPRS32,
224                                            // Add AIX's extra CSR.
225                                            {PPC::R13, -76},
226                                            CALLEE_SAVED_VRS};
227 
228   static const SpillSlot AIXOffsets64[] = {
229       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
230 
231   if (Subtarget.is64BitELFABI()) {
232     NumEntries = std::size(ELFOffsets64);
233     return ELFOffsets64;
234   }
235 
236   if (Subtarget.is32BitELFABI()) {
237     NumEntries = std::size(ELFOffsets32);
238     return ELFOffsets32;
239   }
240 
241   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
242 
243   if (Subtarget.isPPC64()) {
244     NumEntries = std::size(AIXOffsets64);
245     return AIXOffsets64;
246   }
247 
248   NumEntries = std::size(AIXOffsets32);
249   return AIXOffsets32;
250 }
251 
spillsCR(const MachineFunction & MF)252 static bool spillsCR(const MachineFunction &MF) {
253   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
254   return FuncInfo->isCRSpilled();
255 }
256 
hasSpills(const MachineFunction & MF)257 static bool hasSpills(const MachineFunction &MF) {
258   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
259   return FuncInfo->hasSpills();
260 }
261 
hasNonRISpills(const MachineFunction & MF)262 static bool hasNonRISpills(const MachineFunction &MF) {
263   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
264   return FuncInfo->hasNonRISpills();
265 }
266 
267 /// MustSaveLR - Return true if this function requires that we save the LR
268 /// register onto the stack in the prolog and restore it in the epilog of the
269 /// function.
MustSaveLR(const MachineFunction & MF,unsigned LR)270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
271   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
272 
273   // We need a save/restore of LR if there is any def of LR (which is
274   // defined by calls, including the PIC setup sequence), or if there is
275   // some use of the LR stack slot (e.g. for builtin_return_address).
276   // (LR comes in 32 and 64 bit versions.)
277   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
278   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
279 }
280 
281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
282 /// call frame size. Update the MachineFunction object with the stack size.
283 uint64_t
determineFrameLayoutAndUpdate(MachineFunction & MF,bool UseEstimate) const284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
285                                                 bool UseEstimate) const {
286   unsigned NewMaxCallFrameSize = 0;
287   uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
288                                             &NewMaxCallFrameSize);
289   MF.getFrameInfo().setStackSize(FrameSize);
290   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
291   return FrameSize;
292 }
293 
294 /// determineFrameLayout - Determine the size of the frame and maximum call
295 /// frame size.
296 uint64_t
determineFrameLayout(const MachineFunction & MF,bool UseEstimate,unsigned * NewMaxCallFrameSize) const297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
298                                        bool UseEstimate,
299                                        unsigned *NewMaxCallFrameSize) const {
300   const MachineFrameInfo &MFI = MF.getFrameInfo();
301   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
302 
303   // Get the number of bytes to allocate from the FrameInfo
304   uint64_t FrameSize =
305     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
306 
307   // Get stack alignments. The frame must be aligned to the greatest of these:
308   Align TargetAlign = getStackAlign(); // alignment required per the ABI
309   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
310   Align Alignment = std::max(TargetAlign, MaxAlign);
311 
312   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
313 
314   unsigned LR = RegInfo->getRARegister();
315   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
316   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
317                        !MFI.adjustsStack() &&       // No calls.
318                        !MustSaveLR(MF, LR) &&       // No need to save LR.
319                        !FI->mustSaveTOC() &&        // No need to save TOC.
320                        !RegInfo->hasBasePointer(MF) && // No special alignment.
321                        !MFI.isFrameAddressTaken();
322 
323   // Note: for PPC32 SVR4ABI, we can still generate stackless
324   // code if all local vars are reg-allocated.
325   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
326 
327   // Check whether we can skip adjusting the stack pointer (by using red zone)
328   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
329     // No need for frame
330     return 0;
331   }
332 
333   // Get the maximum call frame size of all the calls.
334   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
335 
336   // Maximum call frame needs to be at least big enough for linkage area.
337   unsigned minCallFrameSize = getLinkageSize();
338   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
339 
340   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
341   // that allocations will be aligned.
342   if (MFI.hasVarSizedObjects())
343     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
344 
345   // Update the new max call frame size if the caller passes in a valid pointer.
346   if (NewMaxCallFrameSize)
347     *NewMaxCallFrameSize = maxCallFrameSize;
348 
349   // Include call frame size in total.
350   FrameSize += maxCallFrameSize;
351 
352   // Make sure the frame is aligned.
353   FrameSize = alignTo(FrameSize, Alignment);
354 
355   return FrameSize;
356 }
357 
358 // hasFP - Return true if the specified function actually has a dedicated frame
359 // pointer register.
hasFP(const MachineFunction & MF) const360 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
361   const MachineFrameInfo &MFI = MF.getFrameInfo();
362   // FIXME: This is pretty much broken by design: hasFP() might be called really
363   // early, before the stack layout was calculated and thus hasFP() might return
364   // true or false here depending on the time of call.
365   return (MFI.getStackSize()) && needsFP(MF);
366 }
367 
368 // needsFP - Return true if the specified function should have a dedicated frame
369 // pointer register.  This is true if the function has variable sized allocas or
370 // if frame pointer elimination is disabled.
needsFP(const MachineFunction & MF) const371 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
372   const MachineFrameInfo &MFI = MF.getFrameInfo();
373 
374   // Naked functions have no stack frame pushed, so we don't have a frame
375   // pointer.
376   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
377     return false;
378 
379   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
380          MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
381          MF.exposesReturnsTwice() ||
382          (MF.getTarget().Options.GuaranteedTailCallOpt &&
383           MF.getInfo<PPCFunctionInfo>()->hasFastCall());
384 }
385 
replaceFPWithRealFP(MachineFunction & MF) const386 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
387   // When there is dynamic alloca in this function, we can not use the frame
388   // pointer X31/R31 for the frameaddress lowering. In this case, only X1/R1
389   // always points to the backchain.
390   bool is31 = needsFP(MF) && !MF.getFrameInfo().hasVarSizedObjects();
391   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
392   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
393 
394   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
395   bool HasBP = RegInfo->hasBasePointer(MF);
396   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
397   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
398 
399   for (MachineBasicBlock &MBB : MF)
400     for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
401       --MBBI;
402       for (MachineOperand &MO : MBBI->operands()) {
403         if (!MO.isReg())
404           continue;
405 
406         switch (MO.getReg()) {
407         case PPC::FP:
408           MO.setReg(FPReg);
409           break;
410         case PPC::FP8:
411           MO.setReg(FP8Reg);
412           break;
413         case PPC::BP:
414           MO.setReg(BPReg);
415           break;
416         case PPC::BP8:
417           MO.setReg(BP8Reg);
418           break;
419 
420         }
421       }
422     }
423 }
424 
425 /*  This function will do the following:
426     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
427       respectively (defaults recommended by the ABI) and return true
428     - If MBB is not an entry block, initialize the register scavenger and look
429       for available registers.
430     - If the defaults (R0/R12) are available, return true
431     - If TwoUniqueRegsRequired is set to true, it looks for two unique
432       registers. Otherwise, look for a single available register.
433       - If the required registers are found, set SR1 and SR2 and return true.
434       - If the required registers are not found, set SR2 or both SR1 and SR2 to
435         PPC::NoRegister and return false.
436 
437     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
438     is not set, this function will attempt to find two different registers, but
439     still return true if only one register is available (and set SR1 == SR2).
440 */
441 bool
findScratchRegister(MachineBasicBlock * MBB,bool UseAtEnd,bool TwoUniqueRegsRequired,Register * SR1,Register * SR2) const442 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
443                                       bool UseAtEnd,
444                                       bool TwoUniqueRegsRequired,
445                                       Register *SR1,
446                                       Register *SR2) const {
447   RegScavenger RS;
448   Register R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
449   Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
450 
451   // Set the defaults for the two scratch registers.
452   if (SR1)
453     *SR1 = R0;
454 
455   if (SR2) {
456     assert (SR1 && "Asking for the second scratch register but not the first?");
457     *SR2 = R12;
458   }
459 
460   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
461   if ((UseAtEnd && MBB->isReturnBlock()) ||
462       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
463     return true;
464 
465   if (UseAtEnd) {
466     // The scratch register will be used before the first terminator (or at the
467     // end of the block if there are no terminators).
468     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
469     if (MBBI == MBB->begin()) {
470       RS.enterBasicBlock(*MBB);
471     } else {
472       RS.enterBasicBlockEnd(*MBB);
473       RS.backward(MBBI);
474     }
475   } else {
476     // The scratch register will be used at the start of the block.
477     RS.enterBasicBlock(*MBB);
478   }
479 
480   // If the two registers are available, we're all good.
481   // Note that we only return here if both R0 and R12 are available because
482   // although the function may not require two unique registers, it may benefit
483   // from having two so we should try to provide them.
484   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
485     return true;
486 
487   // Get the list of callee-saved registers for the target.
488   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
489   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
490 
491   // Get all the available registers in the block.
492   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
493                                      &PPC::GPRCRegClass);
494 
495   // We shouldn't use callee-saved registers as scratch registers as they may be
496   // available when looking for a candidate block for shrink wrapping but not
497   // available when the actual prologue/epilogue is being emitted because they
498   // were added as live-in to the prologue block by PrologueEpilogueInserter.
499   for (int i = 0; CSRegs[i]; ++i)
500     BV.reset(CSRegs[i]);
501 
502   // Set the first scratch register to the first available one.
503   if (SR1) {
504     int FirstScratchReg = BV.find_first();
505     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
506   }
507 
508   // If there is another one available, set the second scratch register to that.
509   // Otherwise, set it to either PPC::NoRegister if this function requires two
510   // or to whatever SR1 is set to if this function doesn't require two.
511   if (SR2) {
512     int SecondScratchReg = BV.find_next(*SR1);
513     if (SecondScratchReg != -1)
514       *SR2 = SecondScratchReg;
515     else
516       *SR2 = TwoUniqueRegsRequired ? Register() : *SR1;
517   }
518 
519   // Now that we've done our best to provide both registers, double check
520   // whether we were unable to provide enough.
521   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
522     return false;
523 
524   return true;
525 }
526 
527 // We need a scratch register for spilling LR and for spilling CR. By default,
528 // we use two scratch registers to hide latency. However, if only one scratch
529 // register is available, we can adjust for that by not overlapping the spill
530 // code. However, if we need to realign the stack (i.e. have a base pointer)
531 // and the stack frame is large, we need two scratch registers.
532 // Also, stack probe requires two scratch registers, one for old sp, one for
533 // large frame and large probe size.
534 bool
twoUniqueScratchRegsRequired(MachineBasicBlock * MBB) const535 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
536   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
537   MachineFunction &MF = *(MBB->getParent());
538   bool HasBP = RegInfo->hasBasePointer(MF);
539   unsigned FrameSize = determineFrameLayout(MF);
540   int NegFrameSize = -FrameSize;
541   bool IsLargeFrame = !isInt<16>(NegFrameSize);
542   MachineFrameInfo &MFI = MF.getFrameInfo();
543   Align MaxAlign = MFI.getMaxAlign();
544   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
545   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
546 
547   return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) ||
548          TLI.hasInlineStackProbe(MF);
549 }
550 
canUseAsPrologue(const MachineBasicBlock & MBB) const551 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
552   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
553 
554   return findScratchRegister(TmpMBB, false,
555                              twoUniqueScratchRegsRequired(TmpMBB));
556 }
557 
canUseAsEpilogue(const MachineBasicBlock & MBB) const558 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
559   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
560 
561   return findScratchRegister(TmpMBB, true);
562 }
563 
stackUpdateCanBeMoved(MachineFunction & MF) const564 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
565   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
566   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
567 
568   // Abort if there is no register info or function info.
569   if (!RegInfo || !FI)
570     return false;
571 
572   // Only move the stack update on ELFv2 ABI and PPC64.
573   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
574     return false;
575 
576   // Check the frame size first and return false if it does not fit the
577   // requirements.
578   // We need a non-zero frame size as well as a frame that will fit in the red
579   // zone. This is because by moving the stack pointer update we are now storing
580   // to the red zone until the stack pointer is updated. If we get an interrupt
581   // inside the prologue but before the stack update we now have a number of
582   // stores to the red zone and those stores must all fit.
583   MachineFrameInfo &MFI = MF.getFrameInfo();
584   unsigned FrameSize = MFI.getStackSize();
585   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
586     return false;
587 
588   // Frame pointers and base pointers complicate matters so don't do anything
589   // if we have them. For example having a frame pointer will sometimes require
590   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
591   // difficult. Similar situation exists with setjmp.
592   if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice())
593     return false;
594 
595   // Calls to fast_cc functions use different rules for passing parameters on
596   // the stack from the ABI and using PIC base in the function imposes
597   // similar restrictions to using the base pointer. It is not generally safe
598   // to move the stack pointer update in these situations.
599   if (FI->hasFastCall() || FI->usesPICBase())
600     return false;
601 
602   // Finally we can move the stack update if we do not require register
603   // scavenging. Register scavenging can introduce more spills and so
604   // may make the frame size larger than we have computed.
605   return !RegInfo->requiresFrameIndexScavenging(MF);
606 }
607 
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const608 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
609                                     MachineBasicBlock &MBB) const {
610   MachineBasicBlock::iterator MBBI = MBB.begin();
611   MachineFrameInfo &MFI = MF.getFrameInfo();
612   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
613   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
614   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
615 
616   const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
617   DebugLoc dl;
618   // AIX assembler does not support cfi directives.
619   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
620 
621   const bool HasFastMFLR = Subtarget.hasFastMFLR();
622 
623   // Get processor type.
624   bool isPPC64 = Subtarget.isPPC64();
625   // Get the ABI.
626   bool isSVR4ABI = Subtarget.isSVR4ABI();
627   bool isELFv2ABI = Subtarget.isELFv2ABI();
628   assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
629 
630   // Work out frame sizes.
631   uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
632   int64_t NegFrameSize = -FrameSize;
633   if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize)))
634     llvm_unreachable("Unhandled stack size!");
635 
636   if (MFI.isFrameAddressTaken())
637     replaceFPWithRealFP(MF);
638 
639   // Check if the link register (LR) must be saved.
640   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
641   bool MustSaveLR = FI->mustSaveLR();
642   bool MustSaveTOC = FI->mustSaveTOC();
643   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
644   bool MustSaveCR = !MustSaveCRs.empty();
645   // Do we have a frame pointer and/or base pointer for this function?
646   bool HasFP = hasFP(MF);
647   bool HasBP = RegInfo->hasBasePointer(MF);
648   bool HasRedZone = isPPC64 || !isSVR4ABI;
649   bool HasROPProtect = Subtarget.hasROPProtect();
650   bool HasPrivileged = Subtarget.hasPrivileged();
651 
652   Register SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
653   Register BPReg = RegInfo->getBaseRegister(MF);
654   Register FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
655   Register LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
656   Register TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
657   Register ScratchReg;
658   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
659   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
660   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
661                                                 : PPC::MFLR );
662   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
663                                                  : PPC::STW );
664   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
665                                                      : PPC::STWU );
666   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
667                                                         : PPC::STWUX);
668   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
669                                               : PPC::OR );
670   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
671                                                             : PPC::SUBFC);
672   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
673                                                                : PPC::SUBFIC);
674   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
675                                                            : PPC::MFCR);
676   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
677   const MCInstrDesc &HashST =
678       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8)
679                       : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST));
680 
681   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
682   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
683   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
684   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
685   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
686          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
687 
688   // Using the same bool variable as below to suppress compiler warnings.
689   bool SingleScratchReg = findScratchRegister(
690       &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg);
691   assert(SingleScratchReg &&
692          "Required number of registers not available in this block");
693 
694   SingleScratchReg = ScratchReg == TempReg;
695 
696   int64_t LROffset = getReturnSaveOffset();
697 
698   int64_t FPOffset = 0;
699   if (HasFP) {
700     MachineFrameInfo &MFI = MF.getFrameInfo();
701     int FPIndex = FI->getFramePointerSaveIndex();
702     assert(FPIndex && "No Frame Pointer Save Slot!");
703     FPOffset = MFI.getObjectOffset(FPIndex);
704   }
705 
706   int64_t BPOffset = 0;
707   if (HasBP) {
708     MachineFrameInfo &MFI = MF.getFrameInfo();
709     int BPIndex = FI->getBasePointerSaveIndex();
710     assert(BPIndex && "No Base Pointer Save Slot!");
711     BPOffset = MFI.getObjectOffset(BPIndex);
712   }
713 
714   int64_t PBPOffset = 0;
715   if (FI->usesPICBase()) {
716     MachineFrameInfo &MFI = MF.getFrameInfo();
717     int PBPIndex = FI->getPICBasePointerSaveIndex();
718     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
719     PBPOffset = MFI.getObjectOffset(PBPIndex);
720   }
721 
722   // Get stack alignments.
723   Align MaxAlign = MFI.getMaxAlign();
724   if (HasBP && MaxAlign > 1)
725     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
726 
727   // Frames of 32KB & larger require special handling because they cannot be
728   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
729   bool isLargeFrame = !isInt<16>(NegFrameSize);
730 
731   // Check if we can move the stack update instruction (stdu) down the prologue
732   // past the callee saves. Hopefully this will avoid the situation where the
733   // saves are waiting for the update on the store with update to complete.
734   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
735   bool MovingStackUpdateDown = false;
736 
737   // Check if we can move the stack update.
738   if (stackUpdateCanBeMoved(MF)) {
739     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
740     for (CalleeSavedInfo CSI : Info) {
741       // If the callee saved register is spilled to a register instead of the
742       // stack then the spill no longer uses the stack pointer.
743       // This can lead to two consequences:
744       // 1) We no longer need to update the stack because the function does not
745       //    spill any callee saved registers to stack.
746       // 2) We have a situation where we still have to update the stack pointer
747       //    even though some registers are spilled to other registers. In
748       //    this case the current code moves the stack update to an incorrect
749       //    position.
750       // In either case we should abort moving the stack update operation.
751       if (CSI.isSpilledToReg()) {
752         StackUpdateLoc = MBBI;
753         MovingStackUpdateDown = false;
754         break;
755       }
756 
757       int FrIdx = CSI.getFrameIdx();
758       // If the frame index is not negative the callee saved info belongs to a
759       // stack object that is not a fixed stack object. We ignore non-fixed
760       // stack objects because we won't move the stack update pointer past them.
761       if (FrIdx >= 0)
762         continue;
763 
764       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
765         StackUpdateLoc++;
766         MovingStackUpdateDown = true;
767       } else {
768         // We need all of the Frame Indices to meet these conditions.
769         // If they do not, abort the whole operation.
770         StackUpdateLoc = MBBI;
771         MovingStackUpdateDown = false;
772         break;
773       }
774     }
775 
776     // If the operation was not aborted then update the object offset.
777     if (MovingStackUpdateDown) {
778       for (CalleeSavedInfo CSI : Info) {
779         int FrIdx = CSI.getFrameIdx();
780         if (FrIdx < 0)
781           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
782       }
783     }
784   }
785 
786   // Where in the prologue we move the CR fields depends on how many scratch
787   // registers we have, and if we need to save the link register or not. This
788   // lambda is to avoid duplicating the logic in 2 places.
789   auto BuildMoveFromCR = [&]() {
790     if (isELFv2ABI && MustSaveCRs.size() == 1) {
791     // In the ELFv2 ABI, we are not required to save all CR fields.
792     // If only one CR field is clobbered, it is more efficient to use
793     // mfocrf to selectively save just that field, because mfocrf has short
794     // latency compares to mfcr.
795       assert(isPPC64 && "V2 ABI is 64-bit only.");
796       MachineInstrBuilder MIB =
797           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
798       MIB.addReg(MustSaveCRs[0], RegState::Kill);
799     } else {
800       MachineInstrBuilder MIB =
801           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
802       for (unsigned CRfield : MustSaveCRs)
803         MIB.addReg(CRfield, RegState::ImplicitKill);
804     }
805   };
806 
807   // If we need to spill the CR and the LR but we don't have two separate
808   // registers available, we must spill them one at a time
809   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
810     BuildMoveFromCR();
811     BuildMI(MBB, MBBI, dl, StoreWordInst)
812         .addReg(TempReg, getKillRegState(true))
813         .addImm(CRSaveOffset)
814         .addReg(SPReg);
815   }
816 
817   if (MustSaveLR)
818     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
819 
820   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
821     BuildMoveFromCR();
822 
823   if (HasRedZone) {
824     if (HasFP)
825       BuildMI(MBB, MBBI, dl, StoreInst)
826         .addReg(FPReg)
827         .addImm(FPOffset)
828         .addReg(SPReg);
829     if (FI->usesPICBase())
830       BuildMI(MBB, MBBI, dl, StoreInst)
831         .addReg(PPC::R30)
832         .addImm(PBPOffset)
833         .addReg(SPReg);
834     if (HasBP)
835       BuildMI(MBB, MBBI, dl, StoreInst)
836         .addReg(BPReg)
837         .addImm(BPOffset)
838         .addReg(SPReg);
839   }
840 
841   // Generate the instruction to store the LR. In the case where ROP protection
842   // is required the register holding the LR should not be killed as it will be
843   // used by the hash store instruction.
844   auto SaveLR = [&](int64_t Offset) {
845     assert(MustSaveLR && "LR is not required to be saved!");
846     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
847         .addReg(ScratchReg, getKillRegState(!HasROPProtect))
848         .addImm(Offset)
849         .addReg(SPReg);
850 
851     // Add the ROP protection Hash Store instruction.
852     // NOTE: This is technically a violation of the ABI. The hash can be saved
853     // up to 512 bytes into the Protected Zone. This can be outside of the
854     // initial 288 byte volatile program storage region in the Protected Zone.
855     // However, this restriction will be removed in an upcoming revision of the
856     // ABI.
857     if (HasROPProtect) {
858       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
859       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
860       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
861              "ROP hash save offset out of range.");
862       assert(((ImmOffset & 0x7) == 0) &&
863              "ROP hash save offset must be 8 byte aligned.");
864       BuildMI(MBB, StackUpdateLoc, dl, HashST)
865           .addReg(ScratchReg, getKillRegState(true))
866           .addImm(ImmOffset)
867           .addReg(SPReg);
868     }
869   };
870 
871   if (MustSaveLR && HasFastMFLR)
872       SaveLR(LROffset);
873 
874   if (MustSaveCR &&
875       !(SingleScratchReg && MustSaveLR)) {
876     assert(HasRedZone && "A red zone is always available on PPC64");
877     BuildMI(MBB, MBBI, dl, StoreWordInst)
878       .addReg(TempReg, getKillRegState(true))
879       .addImm(CRSaveOffset)
880       .addReg(SPReg);
881   }
882 
883   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
884   if (!FrameSize) {
885     if (MustSaveLR && !HasFastMFLR)
886       SaveLR(LROffset);
887     return;
888   }
889 
890   // Adjust stack pointer: r1 += NegFrameSize.
891   // If there is a preferred stack alignment, align R1 now
892 
893   if (HasBP && HasRedZone) {
894     // Save a copy of r1 as the base pointer.
895     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
896       .addReg(SPReg)
897       .addReg(SPReg);
898   }
899 
900   // Have we generated a STUX instruction to claim stack frame? If so,
901   // the negated frame size will be placed in ScratchReg.
902   bool HasSTUX =
903       (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) ||
904       (HasBP && MaxAlign > 1) || isLargeFrame;
905 
906   // If we use STUX to update the stack pointer, we need the two scratch
907   // registers TempReg and ScratchReg, we have to save LR here which is stored
908   // in ScratchReg.
909   // If the offset can not be encoded into the store instruction, we also have
910   // to save LR here.
911   if (MustSaveLR && !HasFastMFLR &&
912       (HasSTUX || !isInt<16>(FrameSize + LROffset)))
913     SaveLR(LROffset);
914 
915   // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain
916   // pointer is always stored at SP, we will get a free probe due to an essential
917   // STU(X) instruction.
918   if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) {
919     // To be consistent with other targets, a pseudo instruction is emitted and
920     // will be later expanded in `inlineStackProbe`.
921     BuildMI(MBB, MBBI, dl,
922             TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
923                             : PPC::PROBED_STACKALLOC_32))
924         .addDef(TempReg)
925         .addDef(ScratchReg) // ScratchReg stores the old sp.
926         .addImm(NegFrameSize);
927     // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
928     // update the ScratchReg to meet the assumption that ScratchReg contains
929     // the NegFrameSize. This solution is rather tricky.
930     if (!HasRedZone) {
931       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
932           .addReg(ScratchReg)
933           .addReg(SPReg);
934     }
935   } else {
936     // This condition must be kept in sync with canUseAsPrologue.
937     if (HasBP && MaxAlign > 1) {
938       if (isPPC64)
939         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
940             .addReg(SPReg)
941             .addImm(0)
942             .addImm(64 - Log2(MaxAlign));
943       else // PPC32...
944         BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
945             .addReg(SPReg)
946             .addImm(0)
947             .addImm(32 - Log2(MaxAlign))
948             .addImm(31);
949       if (!isLargeFrame) {
950         BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
951             .addReg(ScratchReg, RegState::Kill)
952             .addImm(NegFrameSize);
953       } else {
954         assert(!SingleScratchReg && "Only a single scratch reg available");
955         TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize);
956         BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
957             .addReg(ScratchReg, RegState::Kill)
958             .addReg(TempReg, RegState::Kill);
959       }
960 
961       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
962           .addReg(SPReg, RegState::Kill)
963           .addReg(SPReg)
964           .addReg(ScratchReg);
965     } else if (!isLargeFrame) {
966       BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
967           .addReg(SPReg)
968           .addImm(NegFrameSize)
969           .addReg(SPReg);
970     } else {
971       TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize);
972       BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
973           .addReg(SPReg, RegState::Kill)
974           .addReg(SPReg)
975           .addReg(ScratchReg);
976     }
977   }
978 
979   // Save the TOC register after the stack pointer update if a prologue TOC
980   // save is required for the function.
981   if (MustSaveTOC) {
982     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
983     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
984       .addReg(TOCReg, getKillRegState(true))
985       .addImm(TOCSaveOffset)
986       .addReg(SPReg);
987   }
988 
989   if (!HasRedZone) {
990     assert(!isPPC64 && "A red zone is always available on PPC64");
991     if (HasSTUX) {
992       // The negated frame size is in ScratchReg, and the SPReg has been
993       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
994       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
995       // the stack frame (i.e. the old SP), ideally, we would put the old
996       // SP into a register and use it as the base for the stores. The
997       // problem is that the only available register may be ScratchReg,
998       // which could be R0, and R0 cannot be used as a base address.
999 
1000       // First, set ScratchReg to the old SP. This may need to be modified
1001       // later.
1002       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1003         .addReg(ScratchReg, RegState::Kill)
1004         .addReg(SPReg);
1005 
1006       if (ScratchReg == PPC::R0) {
1007         // R0 cannot be used as a base register, but it can be used as an
1008         // index in a store-indexed.
1009         int LastOffset = 0;
1010         if (HasFP) {
1011           // R0 += (FPOffset-LastOffset).
1012           // Need addic, since addi treats R0 as 0.
1013           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1014             .addReg(ScratchReg)
1015             .addImm(FPOffset-LastOffset);
1016           LastOffset = FPOffset;
1017           // Store FP into *R0.
1018           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1019             .addReg(FPReg, RegState::Kill)  // Save FP.
1020             .addReg(PPC::ZERO)
1021             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1022         }
1023         if (FI->usesPICBase()) {
1024           // R0 += (PBPOffset-LastOffset).
1025           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1026             .addReg(ScratchReg)
1027             .addImm(PBPOffset-LastOffset);
1028           LastOffset = PBPOffset;
1029           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1030             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1031             .addReg(PPC::ZERO)
1032             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1033         }
1034         if (HasBP) {
1035           // R0 += (BPOffset-LastOffset).
1036           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1037             .addReg(ScratchReg)
1038             .addImm(BPOffset-LastOffset);
1039           LastOffset = BPOffset;
1040           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1041             .addReg(BPReg, RegState::Kill)  // Save BP.
1042             .addReg(PPC::ZERO)
1043             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1044           // BP = R0-LastOffset
1045           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1046             .addReg(ScratchReg, RegState::Kill)
1047             .addImm(-LastOffset);
1048         }
1049       } else {
1050         // ScratchReg is not R0, so use it as the base register. It is
1051         // already set to the old SP, so we can use the offsets directly.
1052 
1053         // Now that the stack frame has been allocated, save all the necessary
1054         // registers using ScratchReg as the base address.
1055         if (HasFP)
1056           BuildMI(MBB, MBBI, dl, StoreInst)
1057             .addReg(FPReg)
1058             .addImm(FPOffset)
1059             .addReg(ScratchReg);
1060         if (FI->usesPICBase())
1061           BuildMI(MBB, MBBI, dl, StoreInst)
1062             .addReg(PPC::R30)
1063             .addImm(PBPOffset)
1064             .addReg(ScratchReg);
1065         if (HasBP) {
1066           BuildMI(MBB, MBBI, dl, StoreInst)
1067             .addReg(BPReg)
1068             .addImm(BPOffset)
1069             .addReg(ScratchReg);
1070           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1071             .addReg(ScratchReg, RegState::Kill)
1072             .addReg(ScratchReg);
1073         }
1074       }
1075     } else {
1076       // The frame size is a known 16-bit constant (fitting in the immediate
1077       // field of STWU). To be here we have to be compiling for PPC32.
1078       // Since the SPReg has been decreased by FrameSize, add it back to each
1079       // offset.
1080       if (HasFP)
1081         BuildMI(MBB, MBBI, dl, StoreInst)
1082           .addReg(FPReg)
1083           .addImm(FrameSize + FPOffset)
1084           .addReg(SPReg);
1085       if (FI->usesPICBase())
1086         BuildMI(MBB, MBBI, dl, StoreInst)
1087           .addReg(PPC::R30)
1088           .addImm(FrameSize + PBPOffset)
1089           .addReg(SPReg);
1090       if (HasBP) {
1091         BuildMI(MBB, MBBI, dl, StoreInst)
1092           .addReg(BPReg)
1093           .addImm(FrameSize + BPOffset)
1094           .addReg(SPReg);
1095         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1096           .addReg(SPReg)
1097           .addImm(FrameSize);
1098       }
1099     }
1100   }
1101 
1102   // Save the LR now.
1103   if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset))
1104     SaveLR(LROffset + FrameSize);
1105 
1106   // Add Call Frame Information for the instructions we generated above.
1107   if (needsCFI) {
1108     unsigned CFIIndex;
1109 
1110     if (HasBP) {
1111       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1112       // because if the stack needed aligning then CFA won't be at a fixed
1113       // offset from FP/SP.
1114       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1115       CFIIndex = MF.addFrameInst(
1116           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1117     } else {
1118       // Adjust the definition of CFA to account for the change in SP.
1119       assert(NegFrameSize);
1120       CFIIndex = MF.addFrameInst(
1121           MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize));
1122     }
1123     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1124         .addCFIIndex(CFIIndex);
1125 
1126     if (HasFP) {
1127       // Describe where FP was saved, at a fixed offset from CFA.
1128       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1129       CFIIndex = MF.addFrameInst(
1130           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1131       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1132           .addCFIIndex(CFIIndex);
1133     }
1134 
1135     if (FI->usesPICBase()) {
1136       // Describe where FP was saved, at a fixed offset from CFA.
1137       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1138       CFIIndex = MF.addFrameInst(
1139           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1140       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1141           .addCFIIndex(CFIIndex);
1142     }
1143 
1144     if (HasBP) {
1145       // Describe where BP was saved, at a fixed offset from CFA.
1146       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1147       CFIIndex = MF.addFrameInst(
1148           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1149       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1150           .addCFIIndex(CFIIndex);
1151     }
1152 
1153     if (MustSaveLR) {
1154       // Describe where LR was saved, at a fixed offset from CFA.
1155       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1156       CFIIndex = MF.addFrameInst(
1157           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1158       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1159           .addCFIIndex(CFIIndex);
1160     }
1161   }
1162 
1163   // If there is a frame pointer, copy R1 into R31
1164   if (HasFP) {
1165     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1166       .addReg(SPReg)
1167       .addReg(SPReg);
1168 
1169     if (!HasBP && needsCFI) {
1170       // Change the definition of CFA from SP+offset to FP+offset, because SP
1171       // will change at every alloca.
1172       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1173       unsigned CFIIndex = MF.addFrameInst(
1174           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1175 
1176       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1177           .addCFIIndex(CFIIndex);
1178     }
1179   }
1180 
1181   if (needsCFI) {
1182     // Describe where callee saved registers were saved, at fixed offsets from
1183     // CFA.
1184     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1185     for (const CalleeSavedInfo &I : CSI) {
1186       Register Reg = I.getReg();
1187       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1188 
1189       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1190       // subregisters of CR2. We just need to emit a move of CR2.
1191       if (PPC::CRBITRCRegClass.contains(Reg))
1192         continue;
1193 
1194       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1195         continue;
1196 
1197       // For 64-bit SVR4 when we have spilled CRs, the spill location
1198       // is SP+8, not a frame-relative slot.
1199       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1200         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1201         // the whole CR word.  In the ELFv2 ABI, every CR that was
1202         // actually saved gets its own CFI record.
1203         Register CRReg = isELFv2ABI? Reg : PPC::CR2;
1204         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1205             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1206         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1207             .addCFIIndex(CFIIndex);
1208         continue;
1209       }
1210 
1211       if (I.isSpilledToReg()) {
1212         unsigned SpilledReg = I.getDstReg();
1213         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1214             nullptr, MRI->getDwarfRegNum(Reg, true),
1215             MRI->getDwarfRegNum(SpilledReg, true)));
1216         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1217           .addCFIIndex(CFIRegister);
1218       } else {
1219         int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
1220         // We have changed the object offset above but we do not want to change
1221         // the actual offsets in the CFI instruction so we have to undo the
1222         // offset change here.
1223         if (MovingStackUpdateDown)
1224           Offset -= NegFrameSize;
1225 
1226         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1227             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1228         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1229             .addCFIIndex(CFIIndex);
1230       }
1231     }
1232   }
1233 }
1234 
inlineStackProbe(MachineFunction & MF,MachineBasicBlock & PrologMBB) const1235 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
1236                                         MachineBasicBlock &PrologMBB) const {
1237   bool isPPC64 = Subtarget.isPPC64();
1238   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
1239   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1240   MachineFrameInfo &MFI = MF.getFrameInfo();
1241   const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo();
1242   // AIX assembler does not support cfi directives.
1243   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
1244   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
1245     int Opc = MI.getOpcode();
1246     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
1247   });
1248   if (StackAllocMIPos == PrologMBB.end())
1249     return;
1250   const BasicBlock *ProbedBB = PrologMBB.getBasicBlock();
1251   MachineBasicBlock *CurrentMBB = &PrologMBB;
1252   DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos);
1253   MachineInstr &MI = *StackAllocMIPos;
1254   int64_t NegFrameSize = MI.getOperand(2).getImm();
1255   unsigned ProbeSize = TLI.getStackProbeSize(MF);
1256   int64_t NegProbeSize = -(int64_t)ProbeSize;
1257   assert(isInt<32>(NegProbeSize) && "Unhandled probe size");
1258   int64_t NumBlocks = NegFrameSize / NegProbeSize;
1259   int64_t NegResidualSize = NegFrameSize % NegProbeSize;
1260   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1261   Register ScratchReg = MI.getOperand(0).getReg();
1262   Register FPReg = MI.getOperand(1).getReg();
1263   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1264   bool HasBP = RegInfo->hasBasePointer(MF);
1265   Register BPReg = RegInfo->getBaseRegister(MF);
1266   Align MaxAlign = MFI.getMaxAlign();
1267   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1268   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
1269   // Subroutines to generate .cfi_* directives.
1270   auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
1271                             MachineBasicBlock::iterator MBBI, Register Reg) {
1272     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1273     unsigned CFIIndex = MF.addFrameInst(
1274         MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
1275     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1276         .addCFIIndex(CFIIndex);
1277   };
1278   auto buildDefCFA = [&](MachineBasicBlock &MBB,
1279                          MachineBasicBlock::iterator MBBI, Register Reg,
1280                          int Offset) {
1281     unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
1282     unsigned CFIIndex = MBB.getParent()->addFrameInst(
1283         MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
1284     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
1285         .addCFIIndex(CFIIndex);
1286   };
1287   // Subroutine to determine if we can use the Imm as part of d-form.
1288   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
1289   // Subroutine to materialize the Imm into TempReg.
1290   auto MaterializeImm = [&](MachineBasicBlock &MBB,
1291                             MachineBasicBlock::iterator MBBI, int64_t Imm,
1292                             Register &TempReg) {
1293     assert(isInt<32>(Imm) && "Unhandled imm");
1294     if (isInt<16>(Imm))
1295       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg)
1296           .addImm(Imm);
1297     else {
1298       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
1299           .addImm(Imm >> 16);
1300       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg)
1301           .addReg(TempReg)
1302           .addImm(Imm & 0xFFFF);
1303     }
1304   };
1305   // Subroutine to store frame pointer and decrease stack pointer by probe size.
1306   auto allocateAndProbe = [&](MachineBasicBlock &MBB,
1307                               MachineBasicBlock::iterator MBBI, int64_t NegSize,
1308                               Register NegSizeReg, bool UseDForm,
1309                               Register StoreReg) {
1310     if (UseDForm)
1311       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg)
1312           .addReg(StoreReg)
1313           .addImm(NegSize)
1314           .addReg(SPReg);
1315     else
1316       BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
1317           .addReg(StoreReg)
1318           .addReg(SPReg)
1319           .addReg(NegSizeReg);
1320   };
1321   // Used to probe stack when realignment is required.
1322   // Note that, according to ABI's requirement, *sp must always equals the
1323   // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
1324   // Following is pseudo code:
1325   // final_sp = (sp & align) + negframesize;
1326   // neg_gap = final_sp - sp;
1327   // while (neg_gap < negprobesize) {
1328   //   stdu fp, negprobesize(sp);
1329   //   neg_gap -= negprobesize;
1330   // }
1331   // stdux fp, sp, neg_gap
1332   //
1333   // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
1334   // before probe code, we don't need to save it, so we get one additional reg
1335   // that can be used to materialize the probeside if needed to use xform.
1336   // Otherwise, we can NOT materialize probeside, so we can only use Dform for
1337   // now.
1338   //
1339   // The allocations are:
1340   // if (HasBP && HasRedzone) {
1341   //   r0: materialize the probesize if needed so that we can use xform.
1342   //   r12: `neg_gap`
1343   // } else {
1344   //   r0: back-chain pointer
1345   //   r12: `neg_gap`.
1346   // }
1347   auto probeRealignedStack = [&](MachineBasicBlock &MBB,
1348                                  MachineBasicBlock::iterator MBBI,
1349                                  Register ScratchReg, Register TempReg) {
1350     assert(HasBP && "The function is supposed to have base pointer when its "
1351                     "stack is realigned.");
1352     assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
1353 
1354     // FIXME: We can eliminate this limitation if we get more infomation about
1355     // which part of redzone are already used. Used redzone can be treated
1356     // probed. But there might be `holes' in redzone probed, this could
1357     // complicate the implementation.
1358     assert(ProbeSize >= Subtarget.getRedZoneSize() &&
1359            "Probe size should be larger or equal to the size of red-zone so "
1360            "that red-zone is not clobbered by probing.");
1361 
1362     Register &FinalStackPtr = TempReg;
1363     // FIXME: We only support NegProbeSize materializable by DForm currently.
1364     // When HasBP && HasRedzone, we can use xform if we have an additional idle
1365     // register.
1366     NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
1367     assert(isInt<16>(NegProbeSize) &&
1368            "NegProbeSize should be materializable by DForm");
1369     Register CRReg = PPC::CR0;
1370     // Layout of output assembly kinda like:
1371     // bb.0:
1372     //   ...
1373     //   sub $scratchreg, $finalsp, r1
1374     //   cmpdi $scratchreg, <negprobesize>
1375     //   bge bb.2
1376     // bb.1:
1377     //   stdu <backchain>, <negprobesize>(r1)
1378     //   sub $scratchreg, $scratchreg, negprobesize
1379     //   cmpdi $scratchreg, <negprobesize>
1380     //   blt bb.1
1381     // bb.2:
1382     //   stdux <backchain>, r1, $scratchreg
1383     MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
1384     MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
1385     MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
1386     MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1387     MF.insert(MBBInsertPoint, ProbeExitMBB);
1388     // bb.2
1389     {
1390       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1391       allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
1392                        BackChainPointer);
1393       if (HasRedZone)
1394         // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
1395         // to TempReg to satisfy it.
1396         BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
1397             .addReg(BPReg)
1398             .addReg(BPReg);
1399       ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
1400       ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
1401     }
1402     // bb.0
1403     {
1404       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
1405           .addReg(SPReg)
1406           .addReg(FinalStackPtr);
1407       if (!HasRedZone)
1408         BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
1409       BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
1410           .addReg(ScratchReg)
1411           .addImm(NegProbeSize);
1412       BuildMI(&MBB, DL, TII.get(PPC::BCC))
1413           .addImm(PPC::PRED_GE)
1414           .addReg(CRReg)
1415           .addMBB(ProbeExitMBB);
1416       MBB.addSuccessor(ProbeLoopBodyMBB);
1417       MBB.addSuccessor(ProbeExitMBB);
1418     }
1419     // bb.1
1420     {
1421       Register BackChainPointer = HasRedZone ? BPReg : TempReg;
1422       allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
1423                        0, true /*UseDForm*/, BackChainPointer);
1424       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
1425               ScratchReg)
1426           .addReg(ScratchReg)
1427           .addImm(-NegProbeSize);
1428       BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
1429               CRReg)
1430           .addReg(ScratchReg)
1431           .addImm(NegProbeSize);
1432       BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
1433           .addImm(PPC::PRED_LT)
1434           .addReg(CRReg)
1435           .addMBB(ProbeLoopBodyMBB);
1436       ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
1437       ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
1438     }
1439     // Update liveins.
1440     fullyRecomputeLiveIns({ProbeExitMBB, ProbeLoopBodyMBB});
1441     return ProbeExitMBB;
1442   };
1443   // For case HasBP && MaxAlign > 1, we have to realign the SP by performing
1444   // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
1445   // the offset subtracted from SP is determined by SP's runtime value.
1446   if (HasBP && MaxAlign > 1) {
1447     // Calculate final stack pointer.
1448     if (isPPC64)
1449       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
1450           .addReg(SPReg)
1451           .addImm(0)
1452           .addImm(64 - Log2(MaxAlign));
1453     else
1454       BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
1455           .addReg(SPReg)
1456           .addImm(0)
1457           .addImm(32 - Log2(MaxAlign))
1458           .addImm(31);
1459     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
1460             FPReg)
1461         .addReg(ScratchReg)
1462         .addReg(SPReg);
1463     MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
1464     BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
1465             FPReg)
1466         .addReg(ScratchReg)
1467         .addReg(FPReg);
1468     CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
1469     if (needsCFI)
1470       buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
1471   } else {
1472     // Initialize current frame pointer.
1473     BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
1474     // Use FPReg to calculate CFA.
1475     if (needsCFI)
1476       buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
1477     // Probe residual part.
1478     if (NegResidualSize) {
1479       bool ResidualUseDForm = CanUseDForm(NegResidualSize);
1480       if (!ResidualUseDForm)
1481         MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
1482       allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
1483                        ResidualUseDForm, FPReg);
1484     }
1485     bool UseDForm = CanUseDForm(NegProbeSize);
1486     // If number of blocks is small, just probe them directly.
1487     if (NumBlocks < 3) {
1488       if (!UseDForm)
1489         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1490       for (int i = 0; i < NumBlocks; ++i)
1491         allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
1492                          FPReg);
1493       if (needsCFI) {
1494         // Restore using SPReg to calculate CFA.
1495         buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
1496       }
1497     } else {
1498       // Since CTR is a volatile register and current shrinkwrap implementation
1499       // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
1500       // CTR loop to probe.
1501       // Calculate trip count and stores it in CTRReg.
1502       MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
1503       BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
1504           .addReg(ScratchReg, RegState::Kill);
1505       if (!UseDForm)
1506         MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
1507       // Create MBBs of the loop.
1508       MachineFunction::iterator MBBInsertPoint =
1509           std::next(CurrentMBB->getIterator());
1510       MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
1511       MF.insert(MBBInsertPoint, LoopMBB);
1512       MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
1513       MF.insert(MBBInsertPoint, ExitMBB);
1514       // Synthesize the loop body.
1515       allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
1516                        UseDForm, FPReg);
1517       BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
1518           .addMBB(LoopMBB);
1519       LoopMBB->addSuccessor(ExitMBB);
1520       LoopMBB->addSuccessor(LoopMBB);
1521       // Synthesize the exit MBB.
1522       ExitMBB->splice(ExitMBB->end(), CurrentMBB,
1523                       std::next(MachineBasicBlock::iterator(MI)),
1524                       CurrentMBB->end());
1525       ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
1526       CurrentMBB->addSuccessor(LoopMBB);
1527       if (needsCFI) {
1528         // Restore using SPReg to calculate CFA.
1529         buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
1530       }
1531       // Update liveins.
1532       fullyRecomputeLiveIns({ExitMBB, LoopMBB});
1533     }
1534   }
1535   ++NumPrologProbed;
1536   MI.eraseFromParent();
1537 }
1538 
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const1539 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1540                                     MachineBasicBlock &MBB) const {
1541   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1542   DebugLoc dl;
1543 
1544   if (MBBI != MBB.end())
1545     dl = MBBI->getDebugLoc();
1546 
1547   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1548   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1549 
1550   // Get alignment info so we know how to restore the SP.
1551   const MachineFrameInfo &MFI = MF.getFrameInfo();
1552 
1553   // Get the number of bytes allocated from the FrameInfo.
1554   int64_t FrameSize = MFI.getStackSize();
1555 
1556   // Get processor type.
1557   bool isPPC64 = Subtarget.isPPC64();
1558 
1559   // Check if the link register (LR) has been saved.
1560   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1561   bool MustSaveLR = FI->mustSaveLR();
1562   const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
1563   bool MustSaveCR = !MustSaveCRs.empty();
1564   // Do we have a frame pointer and/or base pointer for this function?
1565   bool HasFP = hasFP(MF);
1566   bool HasBP = RegInfo->hasBasePointer(MF);
1567   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1568   bool HasROPProtect = Subtarget.hasROPProtect();
1569   bool HasPrivileged = Subtarget.hasPrivileged();
1570 
1571   Register SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1572   Register BPReg = RegInfo->getBaseRegister(MF);
1573   Register FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1574   Register ScratchReg;
1575   Register TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1576   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1577                                                  : PPC::MTLR );
1578   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1579                                                  : PPC::LWZ );
1580   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1581                                                            : PPC::LIS );
1582   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1583                                               : PPC::OR );
1584   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1585                                                   : PPC::ORI );
1586   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1587                                                    : PPC::ADDI );
1588   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1589                                                 : PPC::ADD4 );
1590   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1591                                                      : PPC::LWZ);
1592   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1593                                                      : PPC::MTOCRF);
1594   const MCInstrDesc &HashChk =
1595       TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8)
1596                       : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK));
1597   int64_t LROffset = getReturnSaveOffset();
1598 
1599   int64_t FPOffset = 0;
1600 
1601   // Using the same bool variable as below to suppress compiler warnings.
1602   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1603                                               &TempReg);
1604   assert(SingleScratchReg &&
1605          "Could not find an available scratch register");
1606 
1607   SingleScratchReg = ScratchReg == TempReg;
1608 
1609   if (HasFP) {
1610     int FPIndex = FI->getFramePointerSaveIndex();
1611     assert(FPIndex && "No Frame Pointer Save Slot!");
1612     FPOffset = MFI.getObjectOffset(FPIndex);
1613   }
1614 
1615   int64_t BPOffset = 0;
1616   if (HasBP) {
1617       int BPIndex = FI->getBasePointerSaveIndex();
1618       assert(BPIndex && "No Base Pointer Save Slot!");
1619       BPOffset = MFI.getObjectOffset(BPIndex);
1620   }
1621 
1622   int64_t PBPOffset = 0;
1623   if (FI->usesPICBase()) {
1624     int PBPIndex = FI->getPICBasePointerSaveIndex();
1625     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1626     PBPOffset = MFI.getObjectOffset(PBPIndex);
1627   }
1628 
1629   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1630 
1631   if (IsReturnBlock) {
1632     unsigned RetOpcode = MBBI->getOpcode();
1633     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1634                       RetOpcode == PPC::TCRETURNdi ||
1635                       RetOpcode == PPC::TCRETURNai ||
1636                       RetOpcode == PPC::TCRETURNri8 ||
1637                       RetOpcode == PPC::TCRETURNdi8 ||
1638                       RetOpcode == PPC::TCRETURNai8;
1639 
1640     if (UsesTCRet) {
1641       int MaxTCRetDelta = FI->getTailCallSPDelta();
1642       MachineOperand &StackAdjust = MBBI->getOperand(1);
1643       assert(StackAdjust.isImm() && "Expecting immediate value.");
1644       // Adjust stack pointer.
1645       int StackAdj = StackAdjust.getImm();
1646       int Delta = StackAdj - MaxTCRetDelta;
1647       assert((Delta >= 0) && "Delta must be positive");
1648       if (MaxTCRetDelta>0)
1649         FrameSize += (StackAdj +Delta);
1650       else
1651         FrameSize += StackAdj;
1652     }
1653   }
1654 
1655   // Frames of 32KB & larger require special handling because they cannot be
1656   // indexed into with a simple LD/LWZ immediate offset operand.
1657   bool isLargeFrame = !isInt<16>(FrameSize);
1658 
1659   // On targets without red zone, the SP needs to be restored last, so that
1660   // all live contents of the stack frame are upwards of the SP. This means
1661   // that we cannot restore SP just now, since there may be more registers
1662   // to restore from the stack frame (e.g. R31). If the frame size is not
1663   // a simple immediate value, we will need a spare register to hold the
1664   // restored SP. If the frame size is known and small, we can simply adjust
1665   // the offsets of the registers to be restored, and still use SP to restore
1666   // them. In such case, the final update of SP will be to add the frame
1667   // size to it.
1668   // To simplify the code, set RBReg to the base register used to restore
1669   // values from the stack, and set SPAdd to the value that needs to be added
1670   // to the SP at the end. The default values are as if red zone was present.
1671   unsigned RBReg = SPReg;
1672   uint64_t SPAdd = 0;
1673 
1674   // Check if we can move the stack update instruction up the epilogue
1675   // past the callee saves. This will allow the move to LR instruction
1676   // to be executed before the restores of the callee saves which means
1677   // that the callee saves can hide the latency from the MTLR instrcution.
1678   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1679   if (stackUpdateCanBeMoved(MF)) {
1680     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1681     for (CalleeSavedInfo CSI : Info) {
1682       // If the callee saved register is spilled to another register abort the
1683       // stack update movement.
1684       if (CSI.isSpilledToReg()) {
1685         StackUpdateLoc = MBBI;
1686         break;
1687       }
1688       int FrIdx = CSI.getFrameIdx();
1689       // If the frame index is not negative the callee saved info belongs to a
1690       // stack object that is not a fixed stack object. We ignore non-fixed
1691       // stack objects because we won't move the update of the stack pointer
1692       // past them.
1693       if (FrIdx >= 0)
1694         continue;
1695 
1696       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1697         StackUpdateLoc--;
1698       else {
1699         // Abort the operation as we can't update all CSR restores.
1700         StackUpdateLoc = MBBI;
1701         break;
1702       }
1703     }
1704   }
1705 
1706   if (FrameSize) {
1707     // In the prologue, the loaded (or persistent) stack pointer value is
1708     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1709     // zone add this offset back now.
1710 
1711     // If the function has a base pointer, the stack pointer has been copied
1712     // to it so we can restore it by copying in the other direction.
1713     if (HasRedZone && HasBP) {
1714       BuildMI(MBB, MBBI, dl, OrInst, RBReg).
1715         addReg(BPReg).
1716         addReg(BPReg);
1717     }
1718     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1719     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1720     // call which invalidates the stack pointer value in SP(0). So we use the
1721     // value of R31 in this case. Similar situation exists with setjmp.
1722     else if (FI->hasFastCall() || MF.exposesReturnsTwice()) {
1723       assert(HasFP && "Expecting a valid frame pointer.");
1724       if (!HasRedZone)
1725         RBReg = FPReg;
1726       if (!isLargeFrame) {
1727         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1728           .addReg(FPReg).addImm(FrameSize);
1729       } else {
1730         TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize);
1731         BuildMI(MBB, MBBI, dl, AddInst)
1732           .addReg(RBReg)
1733           .addReg(FPReg)
1734           .addReg(ScratchReg);
1735       }
1736     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1737       if (HasRedZone) {
1738         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1739           .addReg(SPReg)
1740           .addImm(FrameSize);
1741       } else {
1742         // Make sure that adding FrameSize will not overflow the max offset
1743         // size.
1744         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1745                "Local offsets should be negative");
1746         SPAdd = FrameSize;
1747         FPOffset += FrameSize;
1748         BPOffset += FrameSize;
1749         PBPOffset += FrameSize;
1750       }
1751     } else {
1752       // We don't want to use ScratchReg as a base register, because it
1753       // could happen to be R0. Use FP instead, but make sure to preserve it.
1754       if (!HasRedZone) {
1755         // If FP is not saved, copy it to ScratchReg.
1756         if (!HasFP)
1757           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1758             .addReg(FPReg)
1759             .addReg(FPReg);
1760         RBReg = FPReg;
1761       }
1762       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1763         .addImm(0)
1764         .addReg(SPReg);
1765     }
1766   }
1767   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1768   // If there is no red zone, ScratchReg may be needed for holding a useful
1769   // value (although not the base register). Make sure it is not overwritten
1770   // too early.
1771 
1772   // If we need to restore both the LR and the CR and we only have one
1773   // available scratch register, we must do them one at a time.
1774   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1775     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1776     // is live here.
1777     assert(HasRedZone && "Expecting red zone");
1778     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1779       .addImm(CRSaveOffset)
1780       .addReg(SPReg);
1781     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1782       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1783         .addReg(TempReg, getKillRegState(i == e-1));
1784   }
1785 
1786   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1787   // LR is stored in the caller's stack frame. ScratchReg will be needed
1788   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1789   // a base register anyway, because it may happen to be R0.
1790   bool LoadedLR = false;
1791   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1792     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1793       .addImm(LROffset+SPAdd)
1794       .addReg(RBReg);
1795     LoadedLR = true;
1796   }
1797 
1798   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1799     assert(RBReg == SPReg && "Should be using SP as a base register");
1800     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1801       .addImm(CRSaveOffset)
1802       .addReg(RBReg);
1803   }
1804 
1805   if (HasFP) {
1806     // If there is red zone, restore FP directly, since SP has already been
1807     // restored. Otherwise, restore the value of FP into ScratchReg.
1808     if (HasRedZone || RBReg == SPReg)
1809       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1810         .addImm(FPOffset)
1811         .addReg(SPReg);
1812     else
1813       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1814         .addImm(FPOffset)
1815         .addReg(RBReg);
1816   }
1817 
1818   if (FI->usesPICBase())
1819     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1820       .addImm(PBPOffset)
1821       .addReg(RBReg);
1822 
1823   if (HasBP)
1824     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1825       .addImm(BPOffset)
1826       .addReg(RBReg);
1827 
1828   // There is nothing more to be loaded from the stack, so now we can
1829   // restore SP: SP = RBReg + SPAdd.
1830   if (RBReg != SPReg || SPAdd != 0) {
1831     assert(!HasRedZone && "This should not happen with red zone");
1832     // If SPAdd is 0, generate a copy.
1833     if (SPAdd == 0)
1834       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1835         .addReg(RBReg)
1836         .addReg(RBReg);
1837     else
1838       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1839         .addReg(RBReg)
1840         .addImm(SPAdd);
1841 
1842     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1843     if (RBReg == FPReg)
1844       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1845         .addReg(ScratchReg)
1846         .addReg(ScratchReg);
1847 
1848     // Now load the LR from the caller's stack frame.
1849     if (MustSaveLR && !LoadedLR)
1850       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1851         .addImm(LROffset)
1852         .addReg(SPReg);
1853   }
1854 
1855   if (MustSaveCR &&
1856       !(SingleScratchReg && MustSaveLR))
1857     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1858       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1859         .addReg(TempReg, getKillRegState(i == e-1));
1860 
1861   if (MustSaveLR) {
1862     // If ROP protection is required, an extra instruction is added to compute a
1863     // hash and then compare it to the hash stored in the prologue.
1864     if (HasROPProtect) {
1865       const int SaveIndex = FI->getROPProtectionHashSaveIndex();
1866       const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
1867       assert((ImmOffset <= -8 && ImmOffset >= -512) &&
1868              "ROP hash check location offset out of range.");
1869       assert(((ImmOffset & 0x7) == 0) &&
1870              "ROP hash check location offset must be 8 byte aligned.");
1871       BuildMI(MBB, StackUpdateLoc, dl, HashChk)
1872           .addReg(ScratchReg)
1873           .addImm(ImmOffset)
1874           .addReg(SPReg);
1875     }
1876     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1877   }
1878 
1879   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1880   // call optimization
1881   if (IsReturnBlock) {
1882     unsigned RetOpcode = MBBI->getOpcode();
1883     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1884         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1885         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1886       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1887       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1888 
1889       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1890         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1891           .addReg(SPReg).addImm(CallerAllocatedAmt);
1892       } else {
1893         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1894           .addImm(CallerAllocatedAmt >> 16);
1895         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1896           .addReg(ScratchReg, RegState::Kill)
1897           .addImm(CallerAllocatedAmt & 0xFFFF);
1898         BuildMI(MBB, MBBI, dl, AddInst)
1899           .addReg(SPReg)
1900           .addReg(FPReg)
1901           .addReg(ScratchReg);
1902       }
1903     } else {
1904       createTailCallBranchInstr(MBB);
1905     }
1906   }
1907 }
1908 
createTailCallBranchInstr(MachineBasicBlock & MBB) const1909 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1910   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1911 
1912   // If we got this far a first terminator should exist.
1913   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1914 
1915   DebugLoc dl = MBBI->getDebugLoc();
1916   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1917 
1918   // Create branch instruction for pseudo tail call return instruction.
1919   // The TCRETURNdi variants are direct calls. Valid targets for those are
1920   // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
1921   // since we can tail call external functions with PC-Rel (i.e. we don't need
1922   // to worry about different TOC pointers). Some of the external functions will
1923   // be MO_GlobalAddress while others like memcpy for example, are going to
1924   // be MO_ExternalSymbol.
1925   unsigned RetOpcode = MBBI->getOpcode();
1926   if (RetOpcode == PPC::TCRETURNdi) {
1927     MBBI = MBB.getLastNonDebugInstr();
1928     MachineOperand &JumpTarget = MBBI->getOperand(0);
1929     if (JumpTarget.isGlobal())
1930       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1931         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1932     else if (JumpTarget.isSymbol())
1933       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1934         addExternalSymbol(JumpTarget.getSymbolName());
1935     else
1936       llvm_unreachable("Expecting Global or External Symbol");
1937   } else if (RetOpcode == PPC::TCRETURNri) {
1938     MBBI = MBB.getLastNonDebugInstr();
1939     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1940     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1941   } else if (RetOpcode == PPC::TCRETURNai) {
1942     MBBI = MBB.getLastNonDebugInstr();
1943     MachineOperand &JumpTarget = MBBI->getOperand(0);
1944     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1945   } else if (RetOpcode == PPC::TCRETURNdi8) {
1946     MBBI = MBB.getLastNonDebugInstr();
1947     MachineOperand &JumpTarget = MBBI->getOperand(0);
1948     if (JumpTarget.isGlobal())
1949       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1950         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1951     else if (JumpTarget.isSymbol())
1952       BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1953         addExternalSymbol(JumpTarget.getSymbolName());
1954     else
1955       llvm_unreachable("Expecting Global or External Symbol");
1956   } else if (RetOpcode == PPC::TCRETURNri8) {
1957     MBBI = MBB.getLastNonDebugInstr();
1958     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1959     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1960   } else if (RetOpcode == PPC::TCRETURNai8) {
1961     MBBI = MBB.getLastNonDebugInstr();
1962     MachineOperand &JumpTarget = MBBI->getOperand(0);
1963     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1964   }
1965 }
1966 
determineCalleeSaves(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const1967 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1968                                             BitVector &SavedRegs,
1969                                             RegScavenger *RS) const {
1970   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1971   if (Subtarget.isAIXABI())
1972     updateCalleeSaves(MF, SavedRegs);
1973 
1974   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1975 
1976   // Do not explicitly save the callee saved VSRp registers.
1977   // The individual VSR subregisters will be saved instead.
1978   SavedRegs.reset(PPC::VSRp26);
1979   SavedRegs.reset(PPC::VSRp27);
1980   SavedRegs.reset(PPC::VSRp28);
1981   SavedRegs.reset(PPC::VSRp29);
1982   SavedRegs.reset(PPC::VSRp30);
1983   SavedRegs.reset(PPC::VSRp31);
1984 
1985   //  Save and clear the LR state.
1986   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1987   unsigned LR = RegInfo->getRARegister();
1988   FI->setMustSaveLR(MustSaveLR(MF, LR));
1989   SavedRegs.reset(LR);
1990 
1991   //  Save R31 if necessary
1992   int FPSI = FI->getFramePointerSaveIndex();
1993   const bool isPPC64 = Subtarget.isPPC64();
1994   MachineFrameInfo &MFI = MF.getFrameInfo();
1995 
1996   // If the frame pointer save index hasn't been defined yet.
1997   if (!FPSI && needsFP(MF)) {
1998     // Find out what the fix offset of the frame pointer save area.
1999     int FPOffset = getFramePointerSaveOffset();
2000     // Allocate the frame index for frame pointer save area.
2001     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
2002     // Save the result.
2003     FI->setFramePointerSaveIndex(FPSI);
2004   }
2005 
2006   int BPSI = FI->getBasePointerSaveIndex();
2007   if (!BPSI && RegInfo->hasBasePointer(MF)) {
2008     int BPOffset = getBasePointerSaveOffset();
2009     // Allocate the frame index for the base pointer save area.
2010     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
2011     // Save the result.
2012     FI->setBasePointerSaveIndex(BPSI);
2013   }
2014 
2015   // Reserve stack space for the PIC Base register (R30).
2016   // Only used in SVR4 32-bit.
2017   if (FI->usesPICBase()) {
2018     int PBPSI = MFI.CreateFixedObject(4, -8, true);
2019     FI->setPICBasePointerSaveIndex(PBPSI);
2020   }
2021 
2022   // Make sure we don't explicitly spill r31, because, for example, we have
2023   // some inline asm which explicitly clobbers it, when we otherwise have a
2024   // frame pointer and are using r31's spill slot for the prologue/epilogue
2025   // code. Same goes for the base pointer and the PIC base register.
2026   if (needsFP(MF))
2027     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
2028   if (RegInfo->hasBasePointer(MF)) {
2029     SavedRegs.reset(RegInfo->getBaseRegister(MF));
2030     // On AIX, when BaseRegister(R30) is used, need to spill r31 too to match
2031     // AIX trackback table requirement.
2032     if (!needsFP(MF) && !SavedRegs.test(isPPC64 ? PPC::X31 : PPC::R31) &&
2033         Subtarget.isAIXABI()) {
2034       assert(
2035           (RegInfo->getBaseRegister(MF) == (isPPC64 ? PPC::X30 : PPC::R30)) &&
2036           "Invalid base register on AIX!");
2037       SavedRegs.set(isPPC64 ? PPC::X31 : PPC::R31);
2038     }
2039   }
2040   if (FI->usesPICBase())
2041     SavedRegs.reset(PPC::R30);
2042 
2043   // Reserve stack space to move the linkage area to in case of a tail call.
2044   int TCSPDelta = 0;
2045   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2046       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
2047     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
2048   }
2049 
2050   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
2051   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
2052   // object at the offset of the CR-save slot in the linkage area. The actual
2053   // save and restore of the condition register will be created as part of the
2054   // prologue and epilogue insertion, but the FixedStack object is needed to
2055   // keep the CalleSavedInfo valid.
2056   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
2057        SavedRegs.test(PPC::CR4))) {
2058     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
2059     const int64_t SpillOffset =
2060         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
2061     int FrameIdx =
2062         MFI.CreateFixedObject(SpillSize, SpillOffset,
2063                               /* IsImmutable */ true, /* IsAliased */ false);
2064     FI->setCRSpillFrameIndex(FrameIdx);
2065   }
2066 }
2067 
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const2068 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
2069                                                        RegScavenger *RS) const {
2070   // Get callee saved register information.
2071   MachineFrameInfo &MFI = MF.getFrameInfo();
2072   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
2073 
2074   // If the function is shrink-wrapped, and if the function has a tail call, the
2075   // tail call might not be in the new RestoreBlock, so real branch instruction
2076   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
2077   // RestoreBlock. So we handle this case here.
2078   if (MFI.getSavePoint() && MFI.hasTailCall()) {
2079     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
2080     for (MachineBasicBlock &MBB : MF) {
2081       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
2082         createTailCallBranchInstr(MBB);
2083     }
2084   }
2085 
2086   // Early exit if no callee saved registers are modified!
2087   if (CSI.empty() && !needsFP(MF)) {
2088     addScavengingSpillSlot(MF, RS);
2089     return;
2090   }
2091 
2092   unsigned MinGPR = PPC::R31;
2093   unsigned MinG8R = PPC::X31;
2094   unsigned MinFPR = PPC::F31;
2095   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
2096 
2097   bool HasGPSaveArea = false;
2098   bool HasG8SaveArea = false;
2099   bool HasFPSaveArea = false;
2100   bool HasVRSaveArea = false;
2101 
2102   SmallVector<CalleeSavedInfo, 18> GPRegs;
2103   SmallVector<CalleeSavedInfo, 18> G8Regs;
2104   SmallVector<CalleeSavedInfo, 18> FPRegs;
2105   SmallVector<CalleeSavedInfo, 18> VRegs;
2106 
2107   for (const CalleeSavedInfo &I : CSI) {
2108     Register Reg = I.getReg();
2109     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
2110             (Reg != PPC::X2 && Reg != PPC::R2)) &&
2111            "Not expecting to try to spill R2 in a function that must save TOC");
2112     if (PPC::GPRCRegClass.contains(Reg)) {
2113       HasGPSaveArea = true;
2114 
2115       GPRegs.push_back(I);
2116 
2117       if (Reg < MinGPR) {
2118         MinGPR = Reg;
2119       }
2120     } else if (PPC::G8RCRegClass.contains(Reg)) {
2121       HasG8SaveArea = true;
2122 
2123       G8Regs.push_back(I);
2124 
2125       if (Reg < MinG8R) {
2126         MinG8R = Reg;
2127       }
2128     } else if (PPC::F8RCRegClass.contains(Reg)) {
2129       HasFPSaveArea = true;
2130 
2131       FPRegs.push_back(I);
2132 
2133       if (Reg < MinFPR) {
2134         MinFPR = Reg;
2135       }
2136     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
2137                PPC::CRRCRegClass.contains(Reg)) {
2138       ; // do nothing, as we already know whether CRs are spilled
2139     } else if (PPC::VRRCRegClass.contains(Reg) ||
2140                PPC::SPERCRegClass.contains(Reg)) {
2141       // Altivec and SPE are mutually exclusive, but have the same stack
2142       // alignment requirements, so overload the save area for both cases.
2143       HasVRSaveArea = true;
2144 
2145       VRegs.push_back(I);
2146 
2147       if (Reg < MinVR) {
2148         MinVR = Reg;
2149       }
2150     } else {
2151       llvm_unreachable("Unknown RegisterClass!");
2152     }
2153   }
2154 
2155   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
2156   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2157 
2158   int64_t LowerBound = 0;
2159 
2160   // Take into account stack space reserved for tail calls.
2161   int TCSPDelta = 0;
2162   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2163       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
2164     LowerBound = TCSPDelta;
2165   }
2166 
2167   // The Floating-point register save area is right below the back chain word
2168   // of the previous stack frame.
2169   if (HasFPSaveArea) {
2170     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
2171       int FI = FPRegs[i].getFrameIdx();
2172 
2173       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2174     }
2175 
2176     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
2177   }
2178 
2179   // Check whether the frame pointer register is allocated. If so, make sure it
2180   // is spilled to the correct offset.
2181   if (needsFP(MF)) {
2182     int FI = PFI->getFramePointerSaveIndex();
2183     assert(FI && "No Frame Pointer Save Slot!");
2184     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2185     // FP is R31/X31, so no need to update MinGPR/MinG8R.
2186     HasGPSaveArea = true;
2187   }
2188 
2189   if (PFI->usesPICBase()) {
2190     int FI = PFI->getPICBasePointerSaveIndex();
2191     assert(FI && "No PIC Base Pointer Save Slot!");
2192     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2193 
2194     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
2195     HasGPSaveArea = true;
2196   }
2197 
2198   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2199   if (RegInfo->hasBasePointer(MF)) {
2200     int FI = PFI->getBasePointerSaveIndex();
2201     assert(FI && "No Base Pointer Save Slot!");
2202     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2203 
2204     Register BP = RegInfo->getBaseRegister(MF);
2205     if (PPC::G8RCRegClass.contains(BP)) {
2206       MinG8R = std::min<unsigned>(MinG8R, BP);
2207       HasG8SaveArea = true;
2208     } else if (PPC::GPRCRegClass.contains(BP)) {
2209       MinGPR = std::min<unsigned>(MinGPR, BP);
2210       HasGPSaveArea = true;
2211     }
2212   }
2213 
2214   // General register save area starts right below the Floating-point
2215   // register save area.
2216   if (HasGPSaveArea || HasG8SaveArea) {
2217     // Move general register save area spill slots down, taking into account
2218     // the size of the Floating-point register save area.
2219     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
2220       if (!GPRegs[i].isSpilledToReg()) {
2221         int FI = GPRegs[i].getFrameIdx();
2222         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2223       }
2224     }
2225 
2226     // Move general register save area spill slots down, taking into account
2227     // the size of the Floating-point register save area.
2228     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2229       if (!G8Regs[i].isSpilledToReg()) {
2230         int FI = G8Regs[i].getFrameIdx();
2231         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2232       }
2233     }
2234 
2235     unsigned MinReg =
2236       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2237                          TRI->getEncodingValue(MinG8R));
2238 
2239     const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4;
2240     LowerBound -= (31 - MinReg + 1) * GPRegSize;
2241   }
2242 
2243   // For 32-bit only, the CR save area is below the general register
2244   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2245   // to the stack pointer and hence does not need an adjustment here.
2246   // Only CR2 (the first nonvolatile spilled) has an associated frame
2247   // index so that we have a single uniform save area.
2248   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
2249     // Adjust the frame index of the CR spill slot.
2250     for (const auto &CSInfo : CSI) {
2251       if (CSInfo.getReg() == PPC::CR2) {
2252         int FI = CSInfo.getFrameIdx();
2253         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2254         break;
2255       }
2256     }
2257 
2258     LowerBound -= 4; // The CR save area is always 4 bytes long.
2259   }
2260 
2261   // Both Altivec and SPE have the same alignment and padding requirements
2262   // within the stack frame.
2263   if (HasVRSaveArea) {
2264     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2265     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2266     // we are using negative number here (the stack grows downward). We should
2267     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2268     // is the alignment size ( n = 16 here) and y is the size after aligning.
2269     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2270     LowerBound &= ~(15);
2271 
2272     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2273       int FI = VRegs[i].getFrameIdx();
2274 
2275       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2276     }
2277   }
2278 
2279   addScavengingSpillSlot(MF, RS);
2280 }
2281 
2282 void
addScavengingSpillSlot(MachineFunction & MF,RegScavenger * RS) const2283 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2284                                          RegScavenger *RS) const {
2285   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2286   // a large stack, which will require scavenging a register to materialize a
2287   // large offset.
2288 
2289   // We need to have a scavenger spill slot for spills if the frame size is
2290   // large. In case there is no free register for large-offset addressing,
2291   // this slot is used for the necessary emergency spill. Also, we need the
2292   // slot for dynamic stack allocations.
2293 
2294   // The scavenger might be invoked if the frame offset does not fit into
2295   // the 16-bit immediate in case of not SPE and 8-bit in case of SPE.
2296   // We don't know the complete frame size here because we've not yet computed
2297   // callee-saved register spills or the needed alignment padding.
2298   unsigned StackSize = determineFrameLayout(MF, true);
2299   MachineFrameInfo &MFI = MF.getFrameInfo();
2300   bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize);
2301 
2302   if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
2303       (hasSpills(MF) && NeedSpills)) {
2304     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2305     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2306     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2307     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2308     unsigned Size = TRI.getSpillSize(RC);
2309     Align Alignment = TRI.getSpillAlign(RC);
2310     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false));
2311 
2312     // Might we have over-aligned allocas?
2313     bool HasAlVars =
2314         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2315 
2316     // These kinds of spills might need two registers.
2317     if (spillsCR(MF) || HasAlVars)
2318       RS->addScavengingFrameIndex(
2319           MFI.CreateStackObject(Size, Alignment, false));
2320   }
2321 }
2322 
2323 // This function checks if a callee saved gpr can be spilled to a volatile
2324 // vector register. This occurs for leaf functions when the option
2325 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2326 // which were not spilled to vectors, return false so the target independent
2327 // code can handle them by assigning a FrameIdx to a stack slot.
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const2328 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2329     MachineFunction &MF, const TargetRegisterInfo *TRI,
2330     std::vector<CalleeSavedInfo> &CSI) const {
2331 
2332   if (CSI.empty())
2333     return true; // Early exit if no callee saved registers are modified!
2334 
2335   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2336   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2337   const MachineRegisterInfo &MRI = MF.getRegInfo();
2338 
2339   if (Subtarget.hasSPE()) {
2340     // In case of SPE we only have SuperRegs and CRs
2341     // in our CalleSaveInfo vector.
2342 
2343     for (auto &CalleeSaveReg : CSI) {
2344       MCPhysReg Reg = CalleeSaveReg.getReg();
2345       MCPhysReg Lower = RegInfo->getSubReg(Reg, 1);
2346       MCPhysReg Higher = RegInfo->getSubReg(Reg, 2);
2347 
2348       if ( // Check only for SuperRegs.
2349           Lower &&
2350           // Replace Reg if only lower-32 bits modified
2351           !MRI.isPhysRegModified(Higher))
2352         CalleeSaveReg = CalleeSavedInfo(Lower);
2353     }
2354   }
2355 
2356   // Early exit if cannot spill gprs to volatile vector registers.
2357   MachineFrameInfo &MFI = MF.getFrameInfo();
2358   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2359     return false;
2360 
2361   // Build a BitVector of VSRs that can be used for spilling GPRs.
2362   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2363   BitVector BVCalleeSaved(TRI->getNumRegs());
2364   for (unsigned i = 0; CSRegs[i]; ++i)
2365     BVCalleeSaved.set(CSRegs[i]);
2366 
2367   for (unsigned Reg : BVAllocatable.set_bits()) {
2368     // Set to 0 if the register is not a volatile VSX register, or if it is
2369     // used in the function.
2370     if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
2371         MRI.isPhysRegUsed(Reg))
2372       BVAllocatable.reset(Reg);
2373   }
2374 
2375   bool AllSpilledToReg = true;
2376   unsigned LastVSRUsedForSpill = 0;
2377   for (auto &CS : CSI) {
2378     if (BVAllocatable.none())
2379       return false;
2380 
2381     Register Reg = CS.getReg();
2382 
2383     if (!PPC::G8RCRegClass.contains(Reg)) {
2384       AllSpilledToReg = false;
2385       continue;
2386     }
2387 
2388     // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs
2389     // into one VSR using the mtvsrdd instruction.
2390     if (LastVSRUsedForSpill != 0) {
2391       CS.setDstReg(LastVSRUsedForSpill);
2392       BVAllocatable.reset(LastVSRUsedForSpill);
2393       LastVSRUsedForSpill = 0;
2394       continue;
2395     }
2396 
2397     unsigned VolatileVFReg = BVAllocatable.find_first();
2398     if (VolatileVFReg < BVAllocatable.size()) {
2399       CS.setDstReg(VolatileVFReg);
2400       LastVSRUsedForSpill = VolatileVFReg;
2401     } else {
2402       AllSpilledToReg = false;
2403     }
2404   }
2405   return AllSpilledToReg;
2406 }
2407 
spillCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const2408 bool PPCFrameLowering::spillCalleeSavedRegisters(
2409     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2410     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2411 
2412   MachineFunction *MF = MBB.getParent();
2413   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2414   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2415   bool MustSaveTOC = FI->mustSaveTOC();
2416   DebugLoc DL;
2417   bool CRSpilled = false;
2418   MachineInstrBuilder CRMIB;
2419   BitVector Spilled(TRI->getNumRegs());
2420 
2421   VSRContainingGPRs.clear();
2422 
2423   // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one
2424   // or two GPRs, so we need table to record information for later save/restore.
2425   for (const CalleeSavedInfo &Info : CSI) {
2426     if (Info.isSpilledToReg()) {
2427       auto &SpilledVSR =
2428           VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second;
2429       assert(SpilledVSR.second == 0 &&
2430              "Can't spill more than two GPRs into VSR!");
2431       if (SpilledVSR.first == 0)
2432         SpilledVSR.first = Info.getReg();
2433       else
2434         SpilledVSR.second = Info.getReg();
2435     }
2436   }
2437 
2438   for (const CalleeSavedInfo &I : CSI) {
2439     Register Reg = I.getReg();
2440 
2441     // CR2 through CR4 are the nonvolatile CR fields.
2442     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2443 
2444     // Add the callee-saved register as live-in; it's killed at the spill.
2445     // Do not do this for callee-saved registers that are live-in to the
2446     // function because they will already be marked live-in and this will be
2447     // adding it for a second time. It is an error to add the same register
2448     // to the set more than once.
2449     const MachineRegisterInfo &MRI = MF->getRegInfo();
2450     bool IsLiveIn = MRI.isLiveIn(Reg);
2451     if (!IsLiveIn)
2452        MBB.addLiveIn(Reg);
2453 
2454     if (CRSpilled && IsCRField) {
2455       CRMIB.addReg(Reg, RegState::ImplicitKill);
2456       continue;
2457     }
2458 
2459     // The actual spill will happen in the prologue.
2460     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2461       continue;
2462 
2463     // Insert the spill to the stack frame.
2464     if (IsCRField) {
2465       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2466       if (!Subtarget.is32BitELFABI()) {
2467         // The actual spill will happen at the start of the prologue.
2468         FuncInfo->addMustSaveCR(Reg);
2469       } else {
2470         CRSpilled = true;
2471         FuncInfo->setSpillsCR();
2472 
2473         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2474         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2475         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2476                   .addReg(Reg, RegState::ImplicitKill);
2477 
2478         MBB.insert(MI, CRMIB);
2479         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2480                                          .addReg(PPC::R12,
2481                                                  getKillRegState(true)),
2482                                          I.getFrameIdx()));
2483       }
2484     } else {
2485       if (I.isSpilledToReg()) {
2486         unsigned Dst = I.getDstReg();
2487 
2488         if (Spilled[Dst])
2489           continue;
2490 
2491         if (VSRContainingGPRs[Dst].second != 0) {
2492           assert(Subtarget.hasP9Vector() &&
2493                  "mtvsrdd is unavailable on pre-P9 targets.");
2494 
2495           NumPESpillVSR += 2;
2496           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst)
2497               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true))
2498               .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true));
2499         } else if (VSRContainingGPRs[Dst].second == 0) {
2500           assert(Subtarget.hasP8Vector() &&
2501                  "Can't move GPR to VSR on pre-P8 targets.");
2502 
2503           ++NumPESpillVSR;
2504           BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD),
2505                   TRI->getSubReg(Dst, PPC::sub_64))
2506               .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true));
2507         } else {
2508           llvm_unreachable("More than two GPRs spilled to a VSR!");
2509         }
2510         Spilled.set(Dst);
2511       } else {
2512         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2513         // Use !IsLiveIn for the kill flag.
2514         // We do not want to kill registers that are live in this function
2515         // before their use because they will become undefined registers.
2516         // Functions without NoUnwind need to preserve the order of elements in
2517         // saved vector registers.
2518         if (Subtarget.needsSwapsForVSXMemOps() &&
2519             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2520           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2521                                        I.getFrameIdx(), RC, TRI);
2522         else
2523           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC,
2524                                   TRI, Register());
2525       }
2526     }
2527   }
2528   return true;
2529 }
2530 
restoreCRs(bool is31,bool CR2Spilled,bool CR3Spilled,bool CR4Spilled,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,ArrayRef<CalleeSavedInfo> CSI,unsigned CSIIndex)2531 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2532                        bool CR4Spilled, MachineBasicBlock &MBB,
2533                        MachineBasicBlock::iterator MI,
2534                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2535 
2536   MachineFunction *MF = MBB.getParent();
2537   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2538   DebugLoc DL;
2539   unsigned MoveReg = PPC::R12;
2540 
2541   // 32-bit:  FP-relative
2542   MBB.insert(MI,
2543              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2544                                CSI[CSIIndex].getFrameIdx()));
2545 
2546   unsigned RestoreOp = PPC::MTOCRF;
2547   if (CR2Spilled)
2548     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2549                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2550 
2551   if (CR3Spilled)
2552     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2553                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2554 
2555   if (CR4Spilled)
2556     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2557                .addReg(MoveReg, getKillRegState(true)));
2558 }
2559 
2560 MachineBasicBlock::iterator PPCFrameLowering::
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const2561 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2562                               MachineBasicBlock::iterator I) const {
2563   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2564   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2565       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2566     // Add (actually subtract) back the amount the callee popped on return.
2567     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2568       bool is64Bit = Subtarget.isPPC64();
2569       CalleeAmt *= -1;
2570       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2571       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2572       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2573       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2574       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2575       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2576       const DebugLoc &dl = I->getDebugLoc();
2577 
2578       if (isInt<16>(CalleeAmt)) {
2579         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2580           .addReg(StackReg, RegState::Kill)
2581           .addImm(CalleeAmt);
2582       } else {
2583         MachineBasicBlock::iterator MBBI = I;
2584         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2585           .addImm(CalleeAmt >> 16);
2586         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2587           .addReg(TmpReg, RegState::Kill)
2588           .addImm(CalleeAmt & 0xFFFF);
2589         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2590           .addReg(StackReg, RegState::Kill)
2591           .addReg(TmpReg);
2592       }
2593     }
2594   }
2595   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2596   return MBB.erase(I);
2597 }
2598 
isCalleeSavedCR(unsigned Reg)2599 static bool isCalleeSavedCR(unsigned Reg) {
2600   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2601 }
2602 
restoreCalleeSavedRegisters(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,MutableArrayRef<CalleeSavedInfo> CSI,const TargetRegisterInfo * TRI) const2603 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2604     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2605     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2606   MachineFunction *MF = MBB.getParent();
2607   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2608   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2609   bool MustSaveTOC = FI->mustSaveTOC();
2610   bool CR2Spilled = false;
2611   bool CR3Spilled = false;
2612   bool CR4Spilled = false;
2613   unsigned CSIIndex = 0;
2614   BitVector Restored(TRI->getNumRegs());
2615 
2616   // Initialize insertion-point logic; we will be restoring in reverse
2617   // order of spill.
2618   MachineBasicBlock::iterator I = MI, BeforeI = I;
2619   bool AtStart = I == MBB.begin();
2620 
2621   if (!AtStart)
2622     --BeforeI;
2623 
2624   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2625     Register Reg = CSI[i].getReg();
2626 
2627     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2628       continue;
2629 
2630     // Restore of callee saved condition register field is handled during
2631     // epilogue insertion.
2632     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2633       continue;
2634 
2635     if (Reg == PPC::CR2) {
2636       CR2Spilled = true;
2637       // The spill slot is associated only with CR2, which is the
2638       // first nonvolatile spilled.  Save it here.
2639       CSIIndex = i;
2640       continue;
2641     } else if (Reg == PPC::CR3) {
2642       CR3Spilled = true;
2643       continue;
2644     } else if (Reg == PPC::CR4) {
2645       CR4Spilled = true;
2646       continue;
2647     } else {
2648       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2649       // least one CR register, restore all spilled CRs together.
2650       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2651         bool is31 = needsFP(*MF);
2652         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2653                    CSIIndex);
2654         CR2Spilled = CR3Spilled = CR4Spilled = false;
2655       }
2656 
2657       if (CSI[i].isSpilledToReg()) {
2658         DebugLoc DL;
2659         unsigned Dst = CSI[i].getDstReg();
2660 
2661         if (Restored[Dst])
2662           continue;
2663 
2664         if (VSRContainingGPRs[Dst].second != 0) {
2665           assert(Subtarget.hasP9Vector());
2666           NumPEReloadVSR += 2;
2667           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD),
2668                   VSRContainingGPRs[Dst].second)
2669               .addReg(Dst);
2670           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2671                   VSRContainingGPRs[Dst].first)
2672               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2673         } else if (VSRContainingGPRs[Dst].second == 0) {
2674           assert(Subtarget.hasP8Vector());
2675           ++NumPEReloadVSR;
2676           BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD),
2677                   VSRContainingGPRs[Dst].first)
2678               .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true));
2679         } else {
2680           llvm_unreachable("More than two GPRs spilled to a VSR!");
2681         }
2682 
2683         Restored.set(Dst);
2684 
2685       } else {
2686         // Default behavior for non-CR saves.
2687         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2688 
2689         // Functions without NoUnwind need to preserve the order of elements in
2690         // saved vector registers.
2691         if (Subtarget.needsSwapsForVSXMemOps() &&
2692             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2693           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2694                                         TRI);
2695         else
2696           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI,
2697                                    Register());
2698 
2699         assert(I != MBB.begin() &&
2700                "loadRegFromStackSlot didn't insert any code!");
2701       }
2702     }
2703 
2704     // Insert in reverse order.
2705     if (AtStart)
2706       I = MBB.begin();
2707     else {
2708       I = BeforeI;
2709       ++I;
2710     }
2711   }
2712 
2713   // If we haven't yet spilled the CRs, do so now.
2714   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2715     assert(Subtarget.is32BitELFABI() &&
2716            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2717     bool is31 = needsFP(*MF);
2718     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2719   }
2720 
2721   return true;
2722 }
2723 
getTOCSaveOffset() const2724 uint64_t PPCFrameLowering::getTOCSaveOffset() const {
2725   return TOCSaveOffset;
2726 }
2727 
getFramePointerSaveOffset() const2728 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
2729   return FramePointerSaveOffset;
2730 }
2731 
getBasePointerSaveOffset() const2732 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
2733   return BasePointerSaveOffset;
2734 }
2735 
enableShrinkWrapping(const MachineFunction & MF) const2736 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2737   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2738     return false;
2739   return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI();
2740 }
2741 
updateCalleeSaves(const MachineFunction & MF,BitVector & SavedRegs) const2742 void PPCFrameLowering::updateCalleeSaves(const MachineFunction &MF,
2743                                          BitVector &SavedRegs) const {
2744   // The AIX ABI uses traceback tables for EH which require that if callee-saved
2745   // register N is used, all registers N-31 must be saved/restored.
2746   // NOTE: The check for AIX is not actually what is relevant. Traceback tables
2747   // on Linux have the same requirements. It is just that AIX is the only ABI
2748   // for which we actually use traceback tables. If another ABI needs to be
2749   // supported that also uses them, we can add a check such as
2750   // Subtarget.usesTraceBackTables().
2751   assert(Subtarget.isAIXABI() &&
2752          "Function updateCalleeSaves should only be called for AIX.");
2753 
2754   // If there are no callee saves then there is nothing to do.
2755   if (SavedRegs.none())
2756     return;
2757 
2758   const MCPhysReg *CSRegs =
2759       Subtarget.getRegisterInfo()->getCalleeSavedRegs(&MF);
2760   MCPhysReg LowestGPR = PPC::R31;
2761   MCPhysReg LowestG8R = PPC::X31;
2762   MCPhysReg LowestFPR = PPC::F31;
2763   MCPhysReg LowestVR = PPC::V31;
2764 
2765   // Traverse the CSRs twice so as not to rely on ascending ordering of
2766   // registers in the array. The first pass finds the lowest numbered
2767   // register and the second pass marks all higher numbered registers
2768   // for spilling.
2769   for (int i = 0; CSRegs[i]; i++) {
2770     // Get the lowest numbered register for each class that actually needs
2771     // to be saved.
2772     MCPhysReg Cand = CSRegs[i];
2773     if (!SavedRegs.test(Cand))
2774       continue;
2775     if (PPC::GPRCRegClass.contains(Cand) && Cand < LowestGPR)
2776       LowestGPR = Cand;
2777     else if (PPC::G8RCRegClass.contains(Cand) && Cand < LowestG8R)
2778       LowestG8R = Cand;
2779     else if ((PPC::F4RCRegClass.contains(Cand) ||
2780               PPC::F8RCRegClass.contains(Cand)) &&
2781              Cand < LowestFPR)
2782       LowestFPR = Cand;
2783     else if (PPC::VRRCRegClass.contains(Cand) && Cand < LowestVR)
2784       LowestVR = Cand;
2785   }
2786 
2787   for (int i = 0; CSRegs[i]; i++) {
2788     MCPhysReg Cand = CSRegs[i];
2789     if ((PPC::GPRCRegClass.contains(Cand) && Cand > LowestGPR) ||
2790         (PPC::G8RCRegClass.contains(Cand) && Cand > LowestG8R) ||
2791         ((PPC::F4RCRegClass.contains(Cand) ||
2792           PPC::F8RCRegClass.contains(Cand)) &&
2793          Cand > LowestFPR) ||
2794         (PPC::VRRCRegClass.contains(Cand) && Cand > LowestVR))
2795       SavedRegs.set(Cand);
2796   }
2797 }
2798 
getStackThreshold() const2799 uint64_t PPCFrameLowering::getStackThreshold() const {
2800   // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack;
2801   // use `add r1, r1, <scratch_reg>` to release the stack frame.
2802   // Scratch register contains a signed 64-bit number, which is negative
2803   // when extending the stack and is positive when releasing the stack frame.
2804   // To make `stux` and `add` paired, the absolute value of the number contained
2805   // in the scratch register should be the same. Thus the maximum stack size
2806   // is (2^63)-1, i.e., LONG_MAX.
2807   if (Subtarget.isPPC64())
2808     return LONG_MAX;
2809 
2810   return TargetFrameLowering::getStackThreshold();
2811 }
2812