xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp (revision 2c2ec6bbc9cc7762a250ffe903bda6c2e44d25ff)
1 //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that lowers homogeneous prolog/epilog instructions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64InstrInfo.h"
14 #include "AArch64Subtarget.h"
15 #include "MCTargetDesc/AArch64InstPrinter.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineInstr.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineModuleInfo.h"
21 #include "llvm/CodeGen/MachineOperand.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/DebugLoc.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Pass.h"
27 #include <optional>
28 #include <sstream>
29 
30 using namespace llvm;
31 
32 #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME                           \
33   "AArch64 homogeneous prolog/epilog lowering pass"
34 
35 static cl::opt<int> FrameHelperSizeThreshold(
36     "frame-helper-size-threshold", cl::init(2), cl::Hidden,
37     cl::desc("The minimum number of instructions that are outlined in a frame "
38              "helper (default = 2)"));
39 
40 namespace {
41 
42 class AArch64LowerHomogeneousPE {
43 public:
44   const AArch64InstrInfo *TII;
45 
46   AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
47       : M(M), MMI(MMI) {}
48 
49   bool run();
50   bool runOnMachineFunction(MachineFunction &Fn);
51 
52 private:
53   Module *M;
54   MachineModuleInfo *MMI;
55 
56   bool runOnMBB(MachineBasicBlock &MBB);
57   bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
58                MachineBasicBlock::iterator &NextMBBI);
59 
60   /// Lower a HOM_Prolog pseudo instruction into a helper call
61   /// or a sequence of homogeneous stores.
62   /// When a fp setup follows, it can be optimized.
63   bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
64                    MachineBasicBlock::iterator &NextMBBI);
65   /// Lower a HOM_Epilog pseudo instruction into a helper call
66   /// or a sequence of homogeneous loads.
67   /// When a return follow, it can be optimized.
68   bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
69                    MachineBasicBlock::iterator &NextMBBI);
70 };
71 
72 class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
73 public:
74   static char ID;
75 
76   AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {}
77   void getAnalysisUsage(AnalysisUsage &AU) const override {
78     AU.addRequired<MachineModuleInfoWrapperPass>();
79     AU.addPreserved<MachineModuleInfoWrapperPass>();
80     AU.setPreservesAll();
81     ModulePass::getAnalysisUsage(AU);
82   }
83   bool runOnModule(Module &M) override;
84 
85   StringRef getPassName() const override {
86     return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
87   }
88 };
89 
90 } // end anonymous namespace
91 
92 char AArch64LowerHomogeneousPrologEpilog::ID = 0;
93 
94 INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
95                 "aarch64-lower-homogeneous-prolog-epilog",
96                 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
97 
98 bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
99   if (skipModule(M))
100     return false;
101 
102   MachineModuleInfo *MMI =
103       &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
104   return AArch64LowerHomogeneousPE(&M, MMI).run();
105 }
106 
107 bool AArch64LowerHomogeneousPE::run() {
108   bool Changed = false;
109   for (auto &F : *M) {
110     if (F.empty())
111       continue;
112 
113     MachineFunction *MF = MMI->getMachineFunction(F);
114     if (!MF)
115       continue;
116     Changed |= runOnMachineFunction(*MF);
117   }
118 
119   return Changed;
120 }
121 enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
122 
123 /// Return a frame helper name with the given CSRs and the helper type.
124 /// For instance, a prolog helper that saves x19 and x20 is named as
125 /// OUTLINED_FUNCTION_PROLOG_x19x20.
126 static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
127                                       FrameHelperType Type, unsigned FpOffset) {
128   std::ostringstream RegStream;
129   switch (Type) {
130   case FrameHelperType::Prolog:
131     RegStream << "OUTLINED_FUNCTION_PROLOG_";
132     break;
133   case FrameHelperType::PrologFrame:
134     RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
135     break;
136   case FrameHelperType::Epilog:
137     RegStream << "OUTLINED_FUNCTION_EPILOG_";
138     break;
139   case FrameHelperType::EpilogTail:
140     RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
141     break;
142   }
143 
144   for (auto Reg : Regs) {
145     if (Reg == AArch64::NoRegister)
146       continue;
147     RegStream << AArch64InstPrinter::getRegisterName(Reg);
148   }
149 
150   return RegStream.str();
151 }
152 
153 /// Create a Function for the unique frame helper with the given name.
154 /// Return a newly created MachineFunction with an empty MachineBasicBlock.
155 static MachineFunction &createFrameHelperMachineFunction(Module *M,
156                                                          MachineModuleInfo *MMI,
157                                                          StringRef Name) {
158   LLVMContext &C = M->getContext();
159   Function *F = M->getFunction(Name);
160   assert(F == nullptr && "Function has been created before");
161   F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
162                        Function::ExternalLinkage, Name, M);
163   assert(F && "Function was null!");
164 
165   // Use ODR linkage to avoid duplication.
166   F->setLinkage(GlobalValue::LinkOnceODRLinkage);
167   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
168 
169   // Set minsize, so we don't insert padding between outlined functions.
170   F->addFnAttr(Attribute::NoInline);
171   F->addFnAttr(Attribute::MinSize);
172   F->addFnAttr(Attribute::Naked);
173 
174   MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
175   // Remove unnecessary register liveness and set NoVRegs.
176   MF.getProperties().resetTracksLiveness().resetIsSSA().setNoVRegs();
177   MF.getRegInfo().freezeReservedRegs();
178 
179   // Create entry block.
180   BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
181   IRBuilder<> Builder(EntryBB);
182   Builder.CreateRetVoid();
183 
184   // Insert the new block into the function.
185   MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
186   MF.insert(MF.begin(), MBB);
187 
188   return MF;
189 }
190 
191 /// Emit a store-pair instruction for frame-setup.
192 /// If Reg2 is AArch64::NoRegister, emit STR instead.
193 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
194                       MachineBasicBlock::iterator Pos,
195                       const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
196                       int Offset, bool IsPreDec) {
197   assert(Reg1 != AArch64::NoRegister);
198   const bool IsPaired = Reg2 != AArch64::NoRegister;
199   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
200   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
201   unsigned Opc;
202   if (IsPreDec) {
203     if (IsFloat)
204       Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
205     else
206       Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
207   } else {
208     if (IsFloat)
209       Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
210     else
211       Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
212   }
213   // The implicit scale for Offset is 8.
214   TypeSize Scale(0U, false), Width(0U, false);
215   int64_t MinOffset, MaxOffset;
216   [[maybe_unused]] bool Success =
217       AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
218   assert(Success && "Invalid Opcode");
219   Offset *= (8 / (int)Scale);
220 
221   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
222   if (IsPreDec)
223     MIB.addDef(AArch64::SP);
224   if (IsPaired)
225     MIB.addReg(Reg2);
226   MIB.addReg(Reg1)
227       .addReg(AArch64::SP)
228       .addImm(Offset)
229       .setMIFlag(MachineInstr::FrameSetup);
230 }
231 
232 /// Emit a load-pair instruction for frame-destroy.
233 /// If Reg2 is AArch64::NoRegister, emit LDR instead.
234 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
235                      MachineBasicBlock::iterator Pos,
236                      const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
237                      int Offset, bool IsPostDec) {
238   assert(Reg1 != AArch64::NoRegister);
239   const bool IsPaired = Reg2 != AArch64::NoRegister;
240   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
241   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
242   unsigned Opc;
243   if (IsPostDec) {
244     if (IsFloat)
245       Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
246     else
247       Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
248   } else {
249     if (IsFloat)
250       Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
251     else
252       Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
253   }
254   // The implicit scale for Offset is 8.
255   TypeSize Scale(0U, false), Width(0U, false);
256   int64_t MinOffset, MaxOffset;
257   [[maybe_unused]] bool Success =
258       AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
259   assert(Success && "Invalid Opcode");
260   Offset *= (8 / (int)Scale);
261 
262   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
263   if (IsPostDec)
264     MIB.addDef(AArch64::SP);
265   if (IsPaired)
266     MIB.addReg(Reg2, getDefRegState(true));
267   MIB.addReg(Reg1, getDefRegState(true))
268       .addReg(AArch64::SP)
269       .addImm(Offset)
270       .setMIFlag(MachineInstr::FrameDestroy);
271 }
272 
273 /// Return a unique function if a helper can be formed with the given Regs
274 /// and frame type.
275 /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
276 ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
277 ///    stp x20, x19, [sp, #16]
278 ///    ret
279 ///
280 /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
281 ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
282 ///    stp x20, x19, [sp, #16]
283 ///    add fp, sp, #32
284 ///    ret
285 ///
286 /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
287 ///    mov x16, x30
288 ///    ldp x29, x30, [sp, #32]
289 ///    ldp x20, x19, [sp, #16]
290 ///    ldp x22, x21, [sp], #48
291 ///    ret x16
292 ///
293 /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
294 ///    ldp x29, x30, [sp, #32]
295 ///    ldp x20, x19, [sp, #16]
296 ///    ldp x22, x21, [sp], #48
297 ///    ret
298 /// @param M module
299 /// @param MMI machine module info
300 /// @param Regs callee save regs that the helper will handle
301 /// @param Type frame helper type
302 /// @return a helper function
303 static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
304                                         SmallVectorImpl<unsigned> &Regs,
305                                         FrameHelperType Type,
306                                         unsigned FpOffset = 0) {
307   assert(Regs.size() >= 2);
308   auto Name = getFrameHelperName(Regs, Type, FpOffset);
309   auto *F = M->getFunction(Name);
310   if (F)
311     return F;
312 
313   auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
314   MachineBasicBlock &MBB = *MF.begin();
315   const TargetSubtargetInfo &STI = MF.getSubtarget();
316   const TargetInstrInfo &TII = *STI.getInstrInfo();
317 
318   int Size = (int)Regs.size();
319   switch (Type) {
320   case FrameHelperType::Prolog:
321   case FrameHelperType::PrologFrame: {
322     // Compute the remaining SP adjust beyond FP/LR.
323     auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR));
324 
325     // If the register stored to the lowest address is not LR, we must subtract
326     // more from SP here.
327     if (LRIdx != Size - 2) {
328       assert(Regs[Size - 2] != AArch64::LR);
329       emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1],
330                 LRIdx - Size + 2, true);
331     }
332 
333     // Store CSRs in the reverse order.
334     for (int I = Size - 3; I >= 0; I -= 2) {
335       // FP/LR has been stored at call-site.
336       if (Regs[I - 1] == AArch64::LR)
337         continue;
338       emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1,
339                 false);
340     }
341     if (Type == FrameHelperType::PrologFrame)
342       BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri))
343           .addDef(AArch64::FP)
344           .addUse(AArch64::SP)
345           .addImm(FpOffset)
346           .addImm(0)
347           .setMIFlag(MachineInstr::FrameSetup);
348 
349     BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
350         .addReg(AArch64::LR);
351     break;
352   }
353   case FrameHelperType::Epilog:
354   case FrameHelperType::EpilogTail:
355     if (Type == FrameHelperType::Epilog)
356       // Stash LR to X16
357       BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs))
358           .addDef(AArch64::X16)
359           .addReg(AArch64::XZR)
360           .addUse(AArch64::LR)
361           .addImm(0);
362 
363     for (int I = 0; I < Size - 2; I += 2)
364       emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2,
365                false);
366     // Restore the last CSR with post-increment of SP.
367     emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size,
368              true);
369 
370     BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
371         .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
372     break;
373   }
374 
375   return M->getFunction(Name);
376 }
377 
378 /// This function checks if a frame helper should be used for
379 /// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
380 /// @param MBB machine basic block
381 /// @param NextMBBI  next instruction following HOM_Prolog/HOM_Epilog
382 /// @param Regs callee save registers that are saved or restored.
383 /// @param Type frame helper type
384 /// @return True if a use of helper is qualified.
385 static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
386                                  MachineBasicBlock::iterator &NextMBBI,
387                                  SmallVectorImpl<unsigned> &Regs,
388                                  FrameHelperType Type) {
389   const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
390   auto RegCount = Regs.size();
391   assert(RegCount > 0 && (RegCount % 2 == 0));
392   // # of instructions that will be outlined.
393   int InstCount = RegCount / 2;
394 
395   // Do not use a helper call when not saving LR.
396   if (!llvm::is_contained(Regs, AArch64::LR))
397     return false;
398 
399   switch (Type) {
400   case FrameHelperType::Prolog:
401     // Prolog helper cannot save FP/LR.
402     InstCount--;
403     break;
404   case FrameHelperType::PrologFrame: {
405     // Effectively no change in InstCount since FpAdjustment is included.
406     break;
407   }
408   case FrameHelperType::Epilog:
409     // Bail-out if X16 is live across the epilog helper because it is used in
410     // the helper to handle X30.
411     for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
412       if (NextMI->readsRegister(AArch64::W16, TRI))
413         return false;
414     }
415     // Epilog may not be in the last block. Check the liveness in successors.
416     for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
417       if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
418         return false;
419     }
420     // No change in InstCount for the regular epilog case.
421     break;
422   case FrameHelperType::EpilogTail: {
423     // EpilogTail helper includes the caller's return.
424     if (NextMBBI == MBB.end())
425       return false;
426     if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
427       return false;
428     InstCount++;
429     break;
430   }
431   }
432 
433   return InstCount >= FrameHelperSizeThreshold;
434 }
435 
436 /// Lower a HOM_Epilog pseudo instruction into a helper call while
437 /// creating the helper on demand. Or emit a sequence of loads in place when not
438 /// using a helper call.
439 ///
440 /// 1. With a helper including ret
441 ///    HOM_Epilog x30, x29, x19, x20, x21, x22              ; MBBI
442 ///    ret                                                  ; NextMBBI
443 ///    =>
444 ///    b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
445 ///    ...                                                  ; NextMBBI
446 ///
447 /// 2. With a helper
448 ///    HOM_Epilog x30, x29, x19, x20, x21, x22
449 ///    =>
450 ///    bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
451 ///
452 /// 3. Without a helper
453 ///    HOM_Epilog x30, x29, x19, x20, x21, x22
454 ///    =>
455 ///    ldp x29, x30, [sp, #32]
456 ///    ldp x20, x19, [sp, #16]
457 ///    ldp x22, x21, [sp], #48
458 bool AArch64LowerHomogeneousPE::lowerEpilog(
459     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
460     MachineBasicBlock::iterator &NextMBBI) {
461   auto &MF = *MBB.getParent();
462   MachineInstr &MI = *MBBI;
463 
464   DebugLoc DL = MI.getDebugLoc();
465   SmallVector<unsigned, 8> Regs;
466   bool HasUnpairedReg = false;
467   for (auto &MO : MI.operands())
468     if (MO.isReg()) {
469       if (!MO.getReg().isValid()) {
470         // For now we are only expecting unpaired GP registers which should
471         // occur exactly once.
472         assert(!HasUnpairedReg);
473         HasUnpairedReg = true;
474       }
475       Regs.push_back(MO.getReg());
476     }
477   (void)HasUnpairedReg;
478   int Size = (int)Regs.size();
479   if (Size == 0)
480     return false;
481   // Registers are in pair.
482   assert(Size % 2 == 0);
483   assert(MI.getOpcode() == AArch64::HOM_Epilog);
484 
485   auto Return = NextMBBI;
486   if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) {
487     // When MBB ends with a return, emit a tail-call to the epilog helper
488     auto *EpilogTailHelper =
489         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail);
490     BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi))
491         .addGlobalAddress(EpilogTailHelper)
492         .addImm(0)
493         .setMIFlag(MachineInstr::FrameDestroy)
494         .copyImplicitOps(MI)
495         .copyImplicitOps(*Return);
496     NextMBBI = std::next(Return);
497     Return->removeFromParent();
498   } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
499                                   FrameHelperType::Epilog)) {
500     // The default epilog helper case.
501     auto *EpilogHelper =
502         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog);
503     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
504         .addGlobalAddress(EpilogHelper)
505         .setMIFlag(MachineInstr::FrameDestroy)
506         .copyImplicitOps(MI);
507   } else {
508     // Fall back to no-helper.
509     for (int I = 0; I < Size - 2; I += 2)
510       emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false);
511     // Restore the last CSR with post-increment of SP.
512     emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true);
513   }
514 
515   MBBI->removeFromParent();
516   return true;
517 }
518 
519 /// Lower a HOM_Prolog pseudo instruction into a helper call while
520 /// creating the helper on demand. Or emit a sequence of stores in place when
521 /// not using a helper call.
522 ///
523 /// 1. With a helper including frame-setup
524 ///    HOM_Prolog x30, x29, x19, x20, x21, x22, 32
525 ///    =>
526 ///    stp x29, x30, [sp, #-16]!
527 ///    bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
528 ///
529 /// 2. With a helper
530 ///    HOM_Prolog x30, x29, x19, x20, x21, x22
531 ///    =>
532 ///    stp x29, x30, [sp, #-16]!
533 ///    bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
534 ///
535 /// 3. Without a helper
536 ///    HOM_Prolog x30, x29, x19, x20, x21, x22
537 ///    =>
538 ///    stp	x22, x21, [sp, #-48]!
539 ///    stp	x20, x19, [sp, #16]
540 ///    stp	x29, x30, [sp, #32]
541 bool AArch64LowerHomogeneousPE::lowerProlog(
542     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
543     MachineBasicBlock::iterator &NextMBBI) {
544   auto &MF = *MBB.getParent();
545   MachineInstr &MI = *MBBI;
546 
547   DebugLoc DL = MI.getDebugLoc();
548   SmallVector<unsigned, 8> Regs;
549   bool HasUnpairedReg = false;
550   int LRIdx = 0;
551   std::optional<int> FpOffset;
552   for (auto &MO : MI.operands()) {
553     if (MO.isReg()) {
554       if (MO.getReg().isValid()) {
555         if (MO.getReg() == AArch64::LR)
556           LRIdx = Regs.size();
557       } else {
558         // For now we are only expecting unpaired GP registers which should
559         // occur exactly once.
560         assert(!HasUnpairedReg);
561         HasUnpairedReg = true;
562       }
563       Regs.push_back(MO.getReg());
564     } else if (MO.isImm()) {
565       FpOffset = MO.getImm();
566     }
567   }
568   (void)HasUnpairedReg;
569   int Size = (int)Regs.size();
570   if (Size == 0)
571     return false;
572   // Allow compact unwind case only for oww.
573   assert(Size % 2 == 0);
574   assert(MI.getOpcode() == AArch64::HOM_Prolog);
575 
576   if (FpOffset &&
577       shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) {
578     // FP/LR is stored at the top of stack before the prolog helper call.
579     emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
580     auto *PrologFrameHelper = getOrCreateFrameHelper(
581         M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset);
582     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
583         .addGlobalAddress(PrologFrameHelper)
584         .setMIFlag(MachineInstr::FrameSetup)
585         .copyImplicitOps(MI)
586         .addReg(AArch64::FP, RegState::Implicit | RegState::Define)
587         .addReg(AArch64::SP, RegState::Implicit);
588   } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
589                                                FrameHelperType::Prolog)) {
590     // FP/LR is stored at the top of stack before the prolog helper call.
591     emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
592     auto *PrologHelper =
593         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog);
594     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
595         .addGlobalAddress(PrologHelper)
596         .setMIFlag(MachineInstr::FrameSetup)
597         .copyImplicitOps(MI);
598   } else {
599     // Fall back to no-helper.
600     emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true);
601     for (int I = Size - 3; I >= 0; I -= 2)
602       emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false);
603     if (FpOffset) {
604       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri))
605           .addDef(AArch64::FP)
606           .addUse(AArch64::SP)
607           .addImm(*FpOffset)
608           .addImm(0)
609           .setMIFlag(MachineInstr::FrameSetup);
610     }
611   }
612 
613   MBBI->removeFromParent();
614   return true;
615 }
616 
617 /// Process each machine instruction
618 /// @param MBB machine basic block
619 /// @param MBBI current instruction iterator
620 /// @param NextMBBI next instruction iterator which can be updated
621 /// @return True when IR is changed.
622 bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
623                                         MachineBasicBlock::iterator MBBI,
624                                         MachineBasicBlock::iterator &NextMBBI) {
625   MachineInstr &MI = *MBBI;
626   unsigned Opcode = MI.getOpcode();
627   switch (Opcode) {
628   default:
629     break;
630   case AArch64::HOM_Prolog:
631     return lowerProlog(MBB, MBBI, NextMBBI);
632   case AArch64::HOM_Epilog:
633     return lowerEpilog(MBB, MBBI, NextMBBI);
634   }
635   return false;
636 }
637 
638 bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
639   bool Modified = false;
640 
641   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
642   while (MBBI != E) {
643     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
644     Modified |= runOnMI(MBB, MBBI, NMBBI);
645     MBBI = NMBBI;
646   }
647 
648   return Modified;
649 }
650 
651 bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
652   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
653 
654   bool Modified = false;
655   for (auto &MBB : MF)
656     Modified |= runOnMBB(MBB);
657   return Modified;
658 }
659 
660 ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
661   return new AArch64LowerHomogeneousPrologEpilog();
662 }
663