xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp (revision 1f1e2261e341e6ca6862f82261066ef1705f0a7a)
1 //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that lowers homogeneous prolog/epilog instructions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64InstrInfo.h"
14 #include "AArch64Subtarget.h"
15 #include "MCTargetDesc/AArch64InstPrinter.h"
16 #include "Utils/AArch64BaseInfo.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstr.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineModuleInfo.h"
23 #include "llvm/CodeGen/MachineOperand.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/IR/DebugLoc.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <sstream>
30 
31 using namespace llvm;
32 
33 #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME                           \
34   "AArch64 homogeneous prolog/epilog lowering pass"
35 
36 cl::opt<int> FrameHelperSizeThreshold(
37     "frame-helper-size-threshold", cl::init(2), cl::Hidden,
38     cl::desc("The minimum number of instructions that are outlined in a frame "
39              "helper (default = 2)"));
40 
41 namespace {
42 
43 class AArch64LowerHomogeneousPE {
44 public:
45   const AArch64InstrInfo *TII;
46 
47   AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
48       : M(M), MMI(MMI) {}
49 
50   bool run();
51   bool runOnMachineFunction(MachineFunction &Fn);
52 
53 private:
54   Module *M;
55   MachineModuleInfo *MMI;
56 
57   bool runOnMBB(MachineBasicBlock &MBB);
58   bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
59                MachineBasicBlock::iterator &NextMBBI);
60 
61   /// Lower a HOM_Prolog pseudo instruction into a helper call
62   /// or a sequence of homogeneous stores.
63   /// When a a fp setup follows, it can be optimized.
64   bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
65                    MachineBasicBlock::iterator &NextMBBI);
66   /// Lower a HOM_Epilog pseudo instruction into a helper call
67   /// or a sequence of homogeneous loads.
68   /// When a return follow, it can be optimized.
69   bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
70                    MachineBasicBlock::iterator &NextMBBI);
71 };
72 
73 class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
74 public:
75   static char ID;
76 
77   AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {
78     initializeAArch64LowerHomogeneousPrologEpilogPass(
79         *PassRegistry::getPassRegistry());
80   }
81   void getAnalysisUsage(AnalysisUsage &AU) const override {
82     AU.addRequired<MachineModuleInfoWrapperPass>();
83     AU.addPreserved<MachineModuleInfoWrapperPass>();
84     AU.setPreservesAll();
85     ModulePass::getAnalysisUsage(AU);
86   }
87   bool runOnModule(Module &M) override;
88 
89   StringRef getPassName() const override {
90     return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
91   }
92 };
93 
94 } // end anonymous namespace
95 
96 char AArch64LowerHomogeneousPrologEpilog::ID = 0;
97 
98 INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
99                 "aarch64-lower-homogeneous-prolog-epilog",
100                 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
101 
102 bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
103   if (skipModule(M))
104     return false;
105 
106   MachineModuleInfo *MMI =
107       &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
108   return AArch64LowerHomogeneousPE(&M, MMI).run();
109 }
110 
111 bool AArch64LowerHomogeneousPE::run() {
112   bool Changed = false;
113   for (auto &F : *M) {
114     if (F.empty())
115       continue;
116 
117     MachineFunction *MF = MMI->getMachineFunction(F);
118     if (!MF)
119       continue;
120     Changed |= runOnMachineFunction(*MF);
121   }
122 
123   return Changed;
124 }
125 enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
126 
127 /// Return a frame helper name with the given CSRs and the helper type.
128 /// For instance, a prolog helper that saves x19 and x20 is named as
129 /// OUTLINED_FUNCTION_PROLOG_x19x20.
130 static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
131                                       FrameHelperType Type, unsigned FpOffset) {
132   std::ostringstream RegStream;
133   switch (Type) {
134   case FrameHelperType::Prolog:
135     RegStream << "OUTLINED_FUNCTION_PROLOG_";
136     break;
137   case FrameHelperType::PrologFrame:
138     RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
139     break;
140   case FrameHelperType::Epilog:
141     RegStream << "OUTLINED_FUNCTION_EPILOG_";
142     break;
143   case FrameHelperType::EpilogTail:
144     RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
145     break;
146   }
147 
148   for (auto Reg : Regs)
149     RegStream << AArch64InstPrinter::getRegisterName(Reg);
150 
151   return RegStream.str();
152 }
153 
154 /// Create a Function for the unique frame helper with the given name.
155 /// Return a newly created MachineFunction with an empty MachineBasicBlock.
156 static MachineFunction &createFrameHelperMachineFunction(Module *M,
157                                                          MachineModuleInfo *MMI,
158                                                          StringRef Name) {
159   LLVMContext &C = M->getContext();
160   Function *F = M->getFunction(Name);
161   assert(F == nullptr && "Function has been created before");
162   F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
163                        Function::ExternalLinkage, Name, M);
164   assert(F && "Function was null!");
165 
166   // Use ODR linkage to avoid duplication.
167   F->setLinkage(GlobalValue::LinkOnceODRLinkage);
168   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
169 
170   // Set no-opt/minsize, so we don't insert padding between outlined
171   // functions.
172   F->addFnAttr(Attribute::OptimizeNone);
173   F->addFnAttr(Attribute::NoInline);
174   F->addFnAttr(Attribute::MinSize);
175   F->addFnAttr(Attribute::Naked);
176 
177   MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
178   // Remove unnecessary register liveness and set NoVRegs.
179   MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
180   MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
181   MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
182   MF.getRegInfo().freezeReservedRegs(MF);
183 
184   // Create entry block.
185   BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
186   IRBuilder<> Builder(EntryBB);
187   Builder.CreateRetVoid();
188 
189   // Insert the new block into the function.
190   MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
191   MF.insert(MF.begin(), MBB);
192 
193   return MF;
194 }
195 
196 /// Emit a store-pair instruction for frame-setup.
197 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
198                       MachineBasicBlock::iterator Pos,
199                       const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
200                       int Offset, bool IsPreDec) {
201   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
202   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
203   unsigned Opc;
204   if (IsPreDec)
205     Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre;
206   else
207     Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi;
208 
209   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
210   if (IsPreDec)
211     MIB.addDef(AArch64::SP);
212   MIB.addReg(Reg2)
213       .addReg(Reg1)
214       .addReg(AArch64::SP)
215       .addImm(Offset)
216       .setMIFlag(MachineInstr::FrameSetup);
217 }
218 
219 /// Emit a load-pair instruction for frame-destroy.
220 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
221                      MachineBasicBlock::iterator Pos,
222                      const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
223                      int Offset, bool IsPostDec) {
224   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
225   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
226   unsigned Opc;
227   if (IsPostDec)
228     Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost;
229   else
230     Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi;
231 
232   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
233   if (IsPostDec)
234     MIB.addDef(AArch64::SP);
235   MIB.addReg(Reg2, getDefRegState(true))
236       .addReg(Reg1, getDefRegState(true))
237       .addReg(AArch64::SP)
238       .addImm(Offset)
239       .setMIFlag(MachineInstr::FrameDestroy);
240 }
241 
242 /// Return a unique function if a helper can be formed with the given Regs
243 /// and frame type.
244 /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
245 ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
246 ///    stp x20, x19, [sp, #16]
247 ///    ret
248 ///
249 /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
250 ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
251 ///    stp x20, x19, [sp, #16]
252 ///    add fp, sp, #32
253 ///    ret
254 ///
255 /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
256 ///    mov x16, x30
257 ///    ldp x29, x30, [sp, #32]
258 ///    ldp x20, x19, [sp, #16]
259 ///    ldp x22, x21, [sp], #48
260 ///    ret x16
261 ///
262 /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
263 ///    ldp x29, x30, [sp, #32]
264 ///    ldp x20, x19, [sp, #16]
265 ///    ldp x22, x21, [sp], #48
266 ///    ret
267 /// @param M module
268 /// @param MMI machine module info
269 /// @param Regs callee save regs that the helper will handle
270 /// @param Type frame helper type
271 /// @return a helper function
272 static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
273                                         SmallVectorImpl<unsigned> &Regs,
274                                         FrameHelperType Type,
275                                         unsigned FpOffset = 0) {
276   assert(Regs.size() >= 2);
277   auto Name = getFrameHelperName(Regs, Type, FpOffset);
278   auto *F = M->getFunction(Name);
279   if (F)
280     return F;
281 
282   auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
283   MachineBasicBlock &MBB = *MF.begin();
284   const TargetSubtargetInfo &STI = MF.getSubtarget();
285   const TargetInstrInfo &TII = *STI.getInstrInfo();
286 
287   int Size = (int)Regs.size();
288   switch (Type) {
289   case FrameHelperType::Prolog:
290   case FrameHelperType::PrologFrame: {
291     // Compute the remaining SP adjust beyond FP/LR.
292     auto LRIdx = std::distance(
293         Regs.begin(), std::find(Regs.begin(), Regs.end(), AArch64::LR));
294 
295     // If the register stored to the lowest address is not LR, we must subtract
296     // more from SP here.
297     if (LRIdx != Size - 2) {
298       assert(Regs[Size - 2] != AArch64::LR);
299       emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1],
300                 LRIdx - Size + 2, true);
301     }
302 
303     // Store CSRs in the reverse order.
304     for (int I = Size - 3; I >= 0; I -= 2) {
305       // FP/LR has been stored at call-site.
306       if (Regs[I - 1] == AArch64::LR)
307         continue;
308       emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1,
309                 false);
310     }
311     if (Type == FrameHelperType::PrologFrame)
312       BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri))
313           .addDef(AArch64::FP)
314           .addUse(AArch64::SP)
315           .addImm(FpOffset)
316           .addImm(0)
317           .setMIFlag(MachineInstr::FrameSetup);
318 
319     BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
320         .addReg(AArch64::LR);
321     break;
322   }
323   case FrameHelperType::Epilog:
324   case FrameHelperType::EpilogTail:
325     if (Type == FrameHelperType::Epilog)
326       // Stash LR to X16
327       BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs))
328           .addDef(AArch64::X16)
329           .addReg(AArch64::XZR)
330           .addUse(AArch64::LR)
331           .addImm(0);
332 
333     for (int I = 0; I < Size - 2; I += 2)
334       emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2,
335                false);
336     // Restore the last CSR with post-increment of SP.
337     emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size,
338              true);
339 
340     BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
341         .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
342     break;
343   }
344 
345   return M->getFunction(Name);
346 }
347 
348 /// This function checks if a frame helper should be used for
349 /// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
350 /// @param MBB machine basic block
351 /// @param NextMBBI  next instruction following HOM_Prolog/HOM_Epilog
352 /// @param Regs callee save registers that are saved or restored.
353 /// @param Type frame helper type
354 /// @return True if a use of helper is qualified.
355 static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
356                                  MachineBasicBlock::iterator &NextMBBI,
357                                  SmallVectorImpl<unsigned> &Regs,
358                                  FrameHelperType Type) {
359   const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
360   auto RegCount = Regs.size();
361   assert(RegCount > 0 && (RegCount % 2 == 0));
362   // # of instructions that will be outlined.
363   int InstCount = RegCount / 2;
364 
365   // Do not use a helper call when not saving LR.
366   if (!llvm::is_contained(Regs, AArch64::LR))
367     return false;
368 
369   switch (Type) {
370   case FrameHelperType::Prolog:
371     // Prolog helper cannot save FP/LR.
372     InstCount--;
373     break;
374   case FrameHelperType::PrologFrame: {
375     // Effecitvely no change in InstCount since FpAdjusment is included.
376     break;
377   }
378   case FrameHelperType::Epilog:
379     // Bail-out if X16 is live across the epilog helper because it is used in
380     // the helper to handle X30.
381     for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
382       if (NextMI->readsRegister(AArch64::W16, TRI))
383         return false;
384     }
385     // Epilog may not be in the last block. Check the liveness in successors.
386     for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
387       if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
388         return false;
389     }
390     // No change in InstCount for the regular epilog case.
391     break;
392   case FrameHelperType::EpilogTail: {
393     // EpilogTail helper includes the caller's return.
394     if (NextMBBI == MBB.end())
395       return false;
396     if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
397       return false;
398     InstCount++;
399     break;
400   }
401   }
402 
403   return InstCount >= FrameHelperSizeThreshold;
404 }
405 
406 /// Lower a HOM_Epilog pseudo instruction into a helper call while
407 /// creating the helper on demand. Or emit a sequence of loads in place when not
408 /// using a helper call.
409 ///
410 /// 1. With a helper including ret
411 ///    HOM_Epilog x30, x29, x19, x20, x21, x22              ; MBBI
412 ///    ret                                                  ; NextMBBI
413 ///    =>
414 ///    b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
415 ///    ...                                                  ; NextMBBI
416 ///
417 /// 2. With a helper
418 ///    HOM_Epilog x30, x29, x19, x20, x21, x22
419 ///    =>
420 ///    bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
421 ///
422 /// 3. Without a helper
423 ///    HOM_Epilog x30, x29, x19, x20, x21, x22
424 ///    =>
425 ///    ldp x29, x30, [sp, #32]
426 ///    ldp x20, x19, [sp, #16]
427 ///    ldp x22, x21, [sp], #48
428 bool AArch64LowerHomogeneousPE::lowerEpilog(
429     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
430     MachineBasicBlock::iterator &NextMBBI) {
431   auto &MF = *MBB.getParent();
432   MachineInstr &MI = *MBBI;
433 
434   DebugLoc DL = MI.getDebugLoc();
435   SmallVector<unsigned, 8> Regs;
436   for (auto &MO : MI.operands())
437     if (MO.isReg())
438       Regs.push_back(MO.getReg());
439   int Size = (int)Regs.size();
440   if (Size == 0)
441     return false;
442   // Registers are in pair.
443   assert(Size % 2 == 0);
444   assert(MI.getOpcode() == AArch64::HOM_Epilog);
445 
446   auto Return = NextMBBI;
447   if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) {
448     // When MBB ends with a return, emit a tail-call to the epilog helper
449     auto *EpilogTailHelper =
450         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail);
451     BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi))
452         .addGlobalAddress(EpilogTailHelper)
453         .addImm(0)
454         .setMIFlag(MachineInstr::FrameDestroy)
455         .copyImplicitOps(MI)
456         .copyImplicitOps(*Return);
457     NextMBBI = std::next(Return);
458     Return->removeFromParent();
459   } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
460                                   FrameHelperType::Epilog)) {
461     // The default epilog helper case.
462     auto *EpilogHelper =
463         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog);
464     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
465         .addGlobalAddress(EpilogHelper)
466         .setMIFlag(MachineInstr::FrameDestroy)
467         .copyImplicitOps(MI);
468   } else {
469     // Fall back to no-helper.
470     for (int I = 0; I < Size - 2; I += 2)
471       emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false);
472     // Restore the last CSR with post-increment of SP.
473     emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true);
474   }
475 
476   MBBI->removeFromParent();
477   return true;
478 }
479 
480 /// Lower a HOM_Prolog pseudo instruction into a helper call while
481 /// creating the helper on demand. Or emit a sequence of stores in place when
482 /// not using a helper call.
483 ///
484 /// 1. With a helper including frame-setup
485 ///    HOM_Prolog x30, x29, x19, x20, x21, x22, 32
486 ///    =>
487 ///    stp x29, x30, [sp, #-16]!
488 ///    bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
489 ///
490 /// 2. With a helper
491 ///    HOM_Prolog x30, x29, x19, x20, x21, x22
492 ///    =>
493 ///    stp x29, x30, [sp, #-16]!
494 ///    bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
495 ///
496 /// 3. Without a helper
497 ///    HOM_Prolog x30, x29, x19, x20, x21, x22
498 ///    =>
499 ///    stp	x22, x21, [sp, #-48]!
500 ///    stp	x20, x19, [sp, #16]
501 ///    stp	x29, x30, [sp, #32]
502 bool AArch64LowerHomogeneousPE::lowerProlog(
503     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
504     MachineBasicBlock::iterator &NextMBBI) {
505   auto &MF = *MBB.getParent();
506   MachineInstr &MI = *MBBI;
507 
508   DebugLoc DL = MI.getDebugLoc();
509   SmallVector<unsigned, 8> Regs;
510   int LRIdx = 0;
511   Optional<int> FpOffset;
512   for (auto &MO : MI.operands()) {
513     if (MO.isReg()) {
514       if (MO.getReg() == AArch64::LR)
515         LRIdx = Regs.size();
516       Regs.push_back(MO.getReg());
517     } else if (MO.isImm()) {
518       FpOffset = MO.getImm();
519     }
520   }
521   int Size = (int)Regs.size();
522   if (Size == 0)
523     return false;
524   // Allow compact unwind case only for oww.
525   assert(Size % 2 == 0);
526   assert(MI.getOpcode() == AArch64::HOM_Prolog);
527 
528   if (FpOffset &&
529       shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) {
530     // FP/LR is stored at the top of stack before the prolog helper call.
531     emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
532     auto *PrologFrameHelper = getOrCreateFrameHelper(
533         M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset);
534     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
535         .addGlobalAddress(PrologFrameHelper)
536         .setMIFlag(MachineInstr::FrameSetup)
537         .copyImplicitOps(MI)
538         .addReg(AArch64::FP, RegState::Implicit | RegState::Define)
539         .addReg(AArch64::SP, RegState::Implicit);
540   } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
541                                                FrameHelperType::Prolog)) {
542     // FP/LR is stored at the top of stack before the prolog helper call.
543     emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
544     auto *PrologHelper =
545         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog);
546     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
547         .addGlobalAddress(PrologHelper)
548         .setMIFlag(MachineInstr::FrameSetup)
549         .copyImplicitOps(MI);
550   } else {
551     // Fall back to no-helper.
552     emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true);
553     for (int I = Size - 3; I >= 0; I -= 2)
554       emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false);
555     if (FpOffset) {
556       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri))
557           .addDef(AArch64::FP)
558           .addUse(AArch64::SP)
559           .addImm(*FpOffset)
560           .addImm(0)
561           .setMIFlag(MachineInstr::FrameSetup);
562     }
563   }
564 
565   MBBI->removeFromParent();
566   return true;
567 }
568 
569 /// Process each machine instruction
570 /// @param MBB machine basic block
571 /// @param MBBI current instruction iterator
572 /// @param NextMBBI next instruction iterator which can be updated
573 /// @return True when IR is changed.
574 bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
575                                         MachineBasicBlock::iterator MBBI,
576                                         MachineBasicBlock::iterator &NextMBBI) {
577   MachineInstr &MI = *MBBI;
578   unsigned Opcode = MI.getOpcode();
579   switch (Opcode) {
580   default:
581     break;
582   case AArch64::HOM_Prolog:
583     return lowerProlog(MBB, MBBI, NextMBBI);
584   case AArch64::HOM_Epilog:
585     return lowerEpilog(MBB, MBBI, NextMBBI);
586   }
587   return false;
588 }
589 
590 bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
591   bool Modified = false;
592 
593   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
594   while (MBBI != E) {
595     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
596     Modified |= runOnMI(MBB, MBBI, NMBBI);
597     MBBI = NMBBI;
598   }
599 
600   return Modified;
601 }
602 
603 bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
604   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
605 
606   bool Modified = false;
607   for (auto &MBB : MF)
608     Modified |= runOnMBB(MBB);
609   return Modified;
610 }
611 
612 ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
613   return new AArch64LowerHomogeneousPrologEpilog();
614 }
615