1 //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that lowers homogeneous prolog/epilog instructions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64InstrInfo.h"
14 #include "AArch64Subtarget.h"
15 #include "MCTargetDesc/AArch64InstPrinter.h"
16 #include "Utils/AArch64BaseInfo.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstr.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineModuleInfo.h"
23 #include "llvm/CodeGen/MachineOperand.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/IR/DebugLoc.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <optional>
31 #include <sstream>
32 
33 using namespace llvm;
34 
35 #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME                           \
36   "AArch64 homogeneous prolog/epilog lowering pass"
37 
38 cl::opt<int> FrameHelperSizeThreshold(
39     "frame-helper-size-threshold", cl::init(2), cl::Hidden,
40     cl::desc("The minimum number of instructions that are outlined in a frame "
41              "helper (default = 2)"));
42 
43 namespace {
44 
45 class AArch64LowerHomogeneousPE {
46 public:
47   const AArch64InstrInfo *TII;
48 
AArch64LowerHomogeneousPE(Module * M,MachineModuleInfo * MMI)49   AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
50       : M(M), MMI(MMI) {}
51 
52   bool run();
53   bool runOnMachineFunction(MachineFunction &Fn);
54 
55 private:
56   Module *M;
57   MachineModuleInfo *MMI;
58 
59   bool runOnMBB(MachineBasicBlock &MBB);
60   bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
61                MachineBasicBlock::iterator &NextMBBI);
62 
63   /// Lower a HOM_Prolog pseudo instruction into a helper call
64   /// or a sequence of homogeneous stores.
65   /// When a fp setup follows, it can be optimized.
66   bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
67                    MachineBasicBlock::iterator &NextMBBI);
68   /// Lower a HOM_Epilog pseudo instruction into a helper call
69   /// or a sequence of homogeneous loads.
70   /// When a return follow, it can be optimized.
71   bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
72                    MachineBasicBlock::iterator &NextMBBI);
73 };
74 
75 class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
76 public:
77   static char ID;
78 
AArch64LowerHomogeneousPrologEpilog()79   AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {
80     initializeAArch64LowerHomogeneousPrologEpilogPass(
81         *PassRegistry::getPassRegistry());
82   }
getAnalysisUsage(AnalysisUsage & AU) const83   void getAnalysisUsage(AnalysisUsage &AU) const override {
84     AU.addRequired<MachineModuleInfoWrapperPass>();
85     AU.addPreserved<MachineModuleInfoWrapperPass>();
86     AU.setPreservesAll();
87     ModulePass::getAnalysisUsage(AU);
88   }
89   bool runOnModule(Module &M) override;
90 
getPassName() const91   StringRef getPassName() const override {
92     return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
93   }
94 };
95 
96 } // end anonymous namespace
97 
98 char AArch64LowerHomogeneousPrologEpilog::ID = 0;
99 
100 INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
101                 "aarch64-lower-homogeneous-prolog-epilog",
102                 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
103 
runOnModule(Module & M)104 bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
105   if (skipModule(M))
106     return false;
107 
108   MachineModuleInfo *MMI =
109       &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
110   return AArch64LowerHomogeneousPE(&M, MMI).run();
111 }
112 
run()113 bool AArch64LowerHomogeneousPE::run() {
114   bool Changed = false;
115   for (auto &F : *M) {
116     if (F.empty())
117       continue;
118 
119     MachineFunction *MF = MMI->getMachineFunction(F);
120     if (!MF)
121       continue;
122     Changed |= runOnMachineFunction(*MF);
123   }
124 
125   return Changed;
126 }
127 enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
128 
129 /// Return a frame helper name with the given CSRs and the helper type.
130 /// For instance, a prolog helper that saves x19 and x20 is named as
131 /// OUTLINED_FUNCTION_PROLOG_x19x20.
getFrameHelperName(SmallVectorImpl<unsigned> & Regs,FrameHelperType Type,unsigned FpOffset)132 static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
133                                       FrameHelperType Type, unsigned FpOffset) {
134   std::ostringstream RegStream;
135   switch (Type) {
136   case FrameHelperType::Prolog:
137     RegStream << "OUTLINED_FUNCTION_PROLOG_";
138     break;
139   case FrameHelperType::PrologFrame:
140     RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
141     break;
142   case FrameHelperType::Epilog:
143     RegStream << "OUTLINED_FUNCTION_EPILOG_";
144     break;
145   case FrameHelperType::EpilogTail:
146     RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
147     break;
148   }
149 
150   for (auto Reg : Regs) {
151     if (Reg == AArch64::NoRegister)
152       continue;
153     RegStream << AArch64InstPrinter::getRegisterName(Reg);
154   }
155 
156   return RegStream.str();
157 }
158 
159 /// Create a Function for the unique frame helper with the given name.
160 /// Return a newly created MachineFunction with an empty MachineBasicBlock.
createFrameHelperMachineFunction(Module * M,MachineModuleInfo * MMI,StringRef Name)161 static MachineFunction &createFrameHelperMachineFunction(Module *M,
162                                                          MachineModuleInfo *MMI,
163                                                          StringRef Name) {
164   LLVMContext &C = M->getContext();
165   Function *F = M->getFunction(Name);
166   assert(F == nullptr && "Function has been created before");
167   F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
168                        Function::ExternalLinkage, Name, M);
169   assert(F && "Function was null!");
170 
171   // Use ODR linkage to avoid duplication.
172   F->setLinkage(GlobalValue::LinkOnceODRLinkage);
173   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
174 
175   // Set no-opt/minsize, so we don't insert padding between outlined
176   // functions.
177   F->addFnAttr(Attribute::OptimizeNone);
178   F->addFnAttr(Attribute::NoInline);
179   F->addFnAttr(Attribute::MinSize);
180   F->addFnAttr(Attribute::Naked);
181 
182   MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
183   // Remove unnecessary register liveness and set NoVRegs.
184   MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
185   MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
186   MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
187   MF.getRegInfo().freezeReservedRegs();
188 
189   // Create entry block.
190   BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
191   IRBuilder<> Builder(EntryBB);
192   Builder.CreateRetVoid();
193 
194   // Insert the new block into the function.
195   MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
196   MF.insert(MF.begin(), MBB);
197 
198   return MF;
199 }
200 
201 /// Emit a store-pair instruction for frame-setup.
202 /// If Reg2 is AArch64::NoRegister, emit STR instead.
emitStore(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator Pos,const TargetInstrInfo & TII,unsigned Reg1,unsigned Reg2,int Offset,bool IsPreDec)203 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
204                       MachineBasicBlock::iterator Pos,
205                       const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
206                       int Offset, bool IsPreDec) {
207   assert(Reg1 != AArch64::NoRegister);
208   const bool IsPaired = Reg2 != AArch64::NoRegister;
209   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
210   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
211   unsigned Opc;
212   if (IsPreDec) {
213     if (IsFloat)
214       Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
215     else
216       Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
217   } else {
218     if (IsFloat)
219       Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
220     else
221       Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
222   }
223   // The implicit scale for Offset is 8.
224   TypeSize Scale(0U, false), Width(0U, false);
225   int64_t MinOffset, MaxOffset;
226   [[maybe_unused]] bool Success =
227       AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
228   assert(Success && "Invalid Opcode");
229   Offset *= (8 / (int)Scale);
230 
231   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
232   if (IsPreDec)
233     MIB.addDef(AArch64::SP);
234   if (IsPaired)
235     MIB.addReg(Reg2);
236   MIB.addReg(Reg1)
237       .addReg(AArch64::SP)
238       .addImm(Offset)
239       .setMIFlag(MachineInstr::FrameSetup);
240 }
241 
242 /// Emit a load-pair instruction for frame-destroy.
243 /// If Reg2 is AArch64::NoRegister, emit LDR instead.
emitLoad(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator Pos,const TargetInstrInfo & TII,unsigned Reg1,unsigned Reg2,int Offset,bool IsPostDec)244 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
245                      MachineBasicBlock::iterator Pos,
246                      const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
247                      int Offset, bool IsPostDec) {
248   assert(Reg1 != AArch64::NoRegister);
249   const bool IsPaired = Reg2 != AArch64::NoRegister;
250   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
251   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
252   unsigned Opc;
253   if (IsPostDec) {
254     if (IsFloat)
255       Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
256     else
257       Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
258   } else {
259     if (IsFloat)
260       Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
261     else
262       Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
263   }
264   // The implicit scale for Offset is 8.
265   TypeSize Scale(0U, false), Width(0U, false);
266   int64_t MinOffset, MaxOffset;
267   [[maybe_unused]] bool Success =
268       AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
269   assert(Success && "Invalid Opcode");
270   Offset *= (8 / (int)Scale);
271 
272   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
273   if (IsPostDec)
274     MIB.addDef(AArch64::SP);
275   if (IsPaired)
276     MIB.addReg(Reg2, getDefRegState(true));
277   MIB.addReg(Reg1, getDefRegState(true))
278       .addReg(AArch64::SP)
279       .addImm(Offset)
280       .setMIFlag(MachineInstr::FrameDestroy);
281 }
282 
283 /// Return a unique function if a helper can be formed with the given Regs
284 /// and frame type.
285 /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
286 ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
287 ///    stp x20, x19, [sp, #16]
288 ///    ret
289 ///
290 /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
291 ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
292 ///    stp x20, x19, [sp, #16]
293 ///    add fp, sp, #32
294 ///    ret
295 ///
296 /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
297 ///    mov x16, x30
298 ///    ldp x29, x30, [sp, #32]
299 ///    ldp x20, x19, [sp, #16]
300 ///    ldp x22, x21, [sp], #48
301 ///    ret x16
302 ///
303 /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
304 ///    ldp x29, x30, [sp, #32]
305 ///    ldp x20, x19, [sp, #16]
306 ///    ldp x22, x21, [sp], #48
307 ///    ret
308 /// @param M module
309 /// @param MMI machine module info
310 /// @param Regs callee save regs that the helper will handle
311 /// @param Type frame helper type
312 /// @return a helper function
getOrCreateFrameHelper(Module * M,MachineModuleInfo * MMI,SmallVectorImpl<unsigned> & Regs,FrameHelperType Type,unsigned FpOffset=0)313 static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
314                                         SmallVectorImpl<unsigned> &Regs,
315                                         FrameHelperType Type,
316                                         unsigned FpOffset = 0) {
317   assert(Regs.size() >= 2);
318   auto Name = getFrameHelperName(Regs, Type, FpOffset);
319   auto *F = M->getFunction(Name);
320   if (F)
321     return F;
322 
323   auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
324   MachineBasicBlock &MBB = *MF.begin();
325   const TargetSubtargetInfo &STI = MF.getSubtarget();
326   const TargetInstrInfo &TII = *STI.getInstrInfo();
327 
328   int Size = (int)Regs.size();
329   switch (Type) {
330   case FrameHelperType::Prolog:
331   case FrameHelperType::PrologFrame: {
332     // Compute the remaining SP adjust beyond FP/LR.
333     auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR));
334 
335     // If the register stored to the lowest address is not LR, we must subtract
336     // more from SP here.
337     if (LRIdx != Size - 2) {
338       assert(Regs[Size - 2] != AArch64::LR);
339       emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1],
340                 LRIdx - Size + 2, true);
341     }
342 
343     // Store CSRs in the reverse order.
344     for (int I = Size - 3; I >= 0; I -= 2) {
345       // FP/LR has been stored at call-site.
346       if (Regs[I - 1] == AArch64::LR)
347         continue;
348       emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1,
349                 false);
350     }
351     if (Type == FrameHelperType::PrologFrame)
352       BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri))
353           .addDef(AArch64::FP)
354           .addUse(AArch64::SP)
355           .addImm(FpOffset)
356           .addImm(0)
357           .setMIFlag(MachineInstr::FrameSetup);
358 
359     BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
360         .addReg(AArch64::LR);
361     break;
362   }
363   case FrameHelperType::Epilog:
364   case FrameHelperType::EpilogTail:
365     if (Type == FrameHelperType::Epilog)
366       // Stash LR to X16
367       BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs))
368           .addDef(AArch64::X16)
369           .addReg(AArch64::XZR)
370           .addUse(AArch64::LR)
371           .addImm(0);
372 
373     for (int I = 0; I < Size - 2; I += 2)
374       emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2,
375                false);
376     // Restore the last CSR with post-increment of SP.
377     emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size,
378              true);
379 
380     BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
381         .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
382     break;
383   }
384 
385   return M->getFunction(Name);
386 }
387 
388 /// This function checks if a frame helper should be used for
389 /// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
390 /// @param MBB machine basic block
391 /// @param NextMBBI  next instruction following HOM_Prolog/HOM_Epilog
392 /// @param Regs callee save registers that are saved or restored.
393 /// @param Type frame helper type
394 /// @return True if a use of helper is qualified.
shouldUseFrameHelper(MachineBasicBlock & MBB,MachineBasicBlock::iterator & NextMBBI,SmallVectorImpl<unsigned> & Regs,FrameHelperType Type)395 static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
396                                  MachineBasicBlock::iterator &NextMBBI,
397                                  SmallVectorImpl<unsigned> &Regs,
398                                  FrameHelperType Type) {
399   const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
400   auto RegCount = Regs.size();
401   assert(RegCount > 0 && (RegCount % 2 == 0));
402   // # of instructions that will be outlined.
403   int InstCount = RegCount / 2;
404 
405   // Do not use a helper call when not saving LR.
406   if (!llvm::is_contained(Regs, AArch64::LR))
407     return false;
408 
409   switch (Type) {
410   case FrameHelperType::Prolog:
411     // Prolog helper cannot save FP/LR.
412     InstCount--;
413     break;
414   case FrameHelperType::PrologFrame: {
415     // Effecitvely no change in InstCount since FpAdjusment is included.
416     break;
417   }
418   case FrameHelperType::Epilog:
419     // Bail-out if X16 is live across the epilog helper because it is used in
420     // the helper to handle X30.
421     for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
422       if (NextMI->readsRegister(AArch64::W16, TRI))
423         return false;
424     }
425     // Epilog may not be in the last block. Check the liveness in successors.
426     for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
427       if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
428         return false;
429     }
430     // No change in InstCount for the regular epilog case.
431     break;
432   case FrameHelperType::EpilogTail: {
433     // EpilogTail helper includes the caller's return.
434     if (NextMBBI == MBB.end())
435       return false;
436     if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
437       return false;
438     InstCount++;
439     break;
440   }
441   }
442 
443   return InstCount >= FrameHelperSizeThreshold;
444 }
445 
446 /// Lower a HOM_Epilog pseudo instruction into a helper call while
447 /// creating the helper on demand. Or emit a sequence of loads in place when not
448 /// using a helper call.
449 ///
450 /// 1. With a helper including ret
451 ///    HOM_Epilog x30, x29, x19, x20, x21, x22              ; MBBI
452 ///    ret                                                  ; NextMBBI
453 ///    =>
454 ///    b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
455 ///    ...                                                  ; NextMBBI
456 ///
457 /// 2. With a helper
458 ///    HOM_Epilog x30, x29, x19, x20, x21, x22
459 ///    =>
460 ///    bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
461 ///
462 /// 3. Without a helper
463 ///    HOM_Epilog x30, x29, x19, x20, x21, x22
464 ///    =>
465 ///    ldp x29, x30, [sp, #32]
466 ///    ldp x20, x19, [sp, #16]
467 ///    ldp x22, x21, [sp], #48
lowerEpilog(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)468 bool AArch64LowerHomogeneousPE::lowerEpilog(
469     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
470     MachineBasicBlock::iterator &NextMBBI) {
471   auto &MF = *MBB.getParent();
472   MachineInstr &MI = *MBBI;
473 
474   DebugLoc DL = MI.getDebugLoc();
475   SmallVector<unsigned, 8> Regs;
476   bool HasUnpairedReg = false;
477   for (auto &MO : MI.operands())
478     if (MO.isReg()) {
479       if (!MO.getReg().isValid()) {
480         // For now we are only expecting unpaired GP registers which should
481         // occur exactly once.
482         assert(!HasUnpairedReg);
483         HasUnpairedReg = true;
484       }
485       Regs.push_back(MO.getReg());
486     }
487   (void)HasUnpairedReg;
488   int Size = (int)Regs.size();
489   if (Size == 0)
490     return false;
491   // Registers are in pair.
492   assert(Size % 2 == 0);
493   assert(MI.getOpcode() == AArch64::HOM_Epilog);
494 
495   auto Return = NextMBBI;
496   if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) {
497     // When MBB ends with a return, emit a tail-call to the epilog helper
498     auto *EpilogTailHelper =
499         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail);
500     BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi))
501         .addGlobalAddress(EpilogTailHelper)
502         .addImm(0)
503         .setMIFlag(MachineInstr::FrameDestroy)
504         .copyImplicitOps(MI)
505         .copyImplicitOps(*Return);
506     NextMBBI = std::next(Return);
507     Return->removeFromParent();
508   } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
509                                   FrameHelperType::Epilog)) {
510     // The default epilog helper case.
511     auto *EpilogHelper =
512         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog);
513     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
514         .addGlobalAddress(EpilogHelper)
515         .setMIFlag(MachineInstr::FrameDestroy)
516         .copyImplicitOps(MI);
517   } else {
518     // Fall back to no-helper.
519     for (int I = 0; I < Size - 2; I += 2)
520       emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false);
521     // Restore the last CSR with post-increment of SP.
522     emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true);
523   }
524 
525   MBBI->removeFromParent();
526   return true;
527 }
528 
529 /// Lower a HOM_Prolog pseudo instruction into a helper call while
530 /// creating the helper on demand. Or emit a sequence of stores in place when
531 /// not using a helper call.
532 ///
533 /// 1. With a helper including frame-setup
534 ///    HOM_Prolog x30, x29, x19, x20, x21, x22, 32
535 ///    =>
536 ///    stp x29, x30, [sp, #-16]!
537 ///    bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
538 ///
539 /// 2. With a helper
540 ///    HOM_Prolog x30, x29, x19, x20, x21, x22
541 ///    =>
542 ///    stp x29, x30, [sp, #-16]!
543 ///    bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
544 ///
545 /// 3. Without a helper
546 ///    HOM_Prolog x30, x29, x19, x20, x21, x22
547 ///    =>
548 ///    stp	x22, x21, [sp, #-48]!
549 ///    stp	x20, x19, [sp, #16]
550 ///    stp	x29, x30, [sp, #32]
lowerProlog(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)551 bool AArch64LowerHomogeneousPE::lowerProlog(
552     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
553     MachineBasicBlock::iterator &NextMBBI) {
554   auto &MF = *MBB.getParent();
555   MachineInstr &MI = *MBBI;
556 
557   DebugLoc DL = MI.getDebugLoc();
558   SmallVector<unsigned, 8> Regs;
559   bool HasUnpairedReg = false;
560   int LRIdx = 0;
561   std::optional<int> FpOffset;
562   for (auto &MO : MI.operands()) {
563     if (MO.isReg()) {
564       if (MO.getReg().isValid()) {
565         if (MO.getReg() == AArch64::LR)
566           LRIdx = Regs.size();
567       } else {
568         // For now we are only expecting unpaired GP registers which should
569         // occur exactly once.
570         assert(!HasUnpairedReg);
571         HasUnpairedReg = true;
572       }
573       Regs.push_back(MO.getReg());
574     } else if (MO.isImm()) {
575       FpOffset = MO.getImm();
576     }
577   }
578   (void)HasUnpairedReg;
579   int Size = (int)Regs.size();
580   if (Size == 0)
581     return false;
582   // Allow compact unwind case only for oww.
583   assert(Size % 2 == 0);
584   assert(MI.getOpcode() == AArch64::HOM_Prolog);
585 
586   if (FpOffset &&
587       shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) {
588     // FP/LR is stored at the top of stack before the prolog helper call.
589     emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
590     auto *PrologFrameHelper = getOrCreateFrameHelper(
591         M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset);
592     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
593         .addGlobalAddress(PrologFrameHelper)
594         .setMIFlag(MachineInstr::FrameSetup)
595         .copyImplicitOps(MI)
596         .addReg(AArch64::FP, RegState::Implicit | RegState::Define)
597         .addReg(AArch64::SP, RegState::Implicit);
598   } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
599                                                FrameHelperType::Prolog)) {
600     // FP/LR is stored at the top of stack before the prolog helper call.
601     emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
602     auto *PrologHelper =
603         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog);
604     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
605         .addGlobalAddress(PrologHelper)
606         .setMIFlag(MachineInstr::FrameSetup)
607         .copyImplicitOps(MI);
608   } else {
609     // Fall back to no-helper.
610     emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true);
611     for (int I = Size - 3; I >= 0; I -= 2)
612       emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false);
613     if (FpOffset) {
614       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri))
615           .addDef(AArch64::FP)
616           .addUse(AArch64::SP)
617           .addImm(*FpOffset)
618           .addImm(0)
619           .setMIFlag(MachineInstr::FrameSetup);
620     }
621   }
622 
623   MBBI->removeFromParent();
624   return true;
625 }
626 
627 /// Process each machine instruction
628 /// @param MBB machine basic block
629 /// @param MBBI current instruction iterator
630 /// @param NextMBBI next instruction iterator which can be updated
631 /// @return True when IR is changed.
runOnMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)632 bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
633                                         MachineBasicBlock::iterator MBBI,
634                                         MachineBasicBlock::iterator &NextMBBI) {
635   MachineInstr &MI = *MBBI;
636   unsigned Opcode = MI.getOpcode();
637   switch (Opcode) {
638   default:
639     break;
640   case AArch64::HOM_Prolog:
641     return lowerProlog(MBB, MBBI, NextMBBI);
642   case AArch64::HOM_Epilog:
643     return lowerEpilog(MBB, MBBI, NextMBBI);
644   }
645   return false;
646 }
647 
runOnMBB(MachineBasicBlock & MBB)648 bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
649   bool Modified = false;
650 
651   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
652   while (MBBI != E) {
653     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
654     Modified |= runOnMI(MBB, MBBI, NMBBI);
655     MBBI = NMBBI;
656   }
657 
658   return Modified;
659 }
660 
runOnMachineFunction(MachineFunction & MF)661 bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
662   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
663 
664   bool Modified = false;
665   for (auto &MBB : MF)
666     Modified |= runOnMBB(MBB);
667   return Modified;
668 }
669 
createAArch64LowerHomogeneousPrologEpilogPass()670 ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
671   return new AArch64LowerHomogeneousPrologEpilog();
672 }
673