xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp (revision 725a9f47324d42037db93c27ceb40d4956872f3e)
1 //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that lowers homogeneous prolog/epilog instructions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64InstrInfo.h"
14 #include "AArch64Subtarget.h"
15 #include "MCTargetDesc/AArch64InstPrinter.h"
16 #include "Utils/AArch64BaseInfo.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstr.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineModuleInfo.h"
23 #include "llvm/CodeGen/MachineOperand.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/IR/DebugLoc.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <optional>
30 #include <sstream>
31 
32 using namespace llvm;
33 
34 #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME                           \
35   "AArch64 homogeneous prolog/epilog lowering pass"
36 
37 cl::opt<int> FrameHelperSizeThreshold(
38     "frame-helper-size-threshold", cl::init(2), cl::Hidden,
39     cl::desc("The minimum number of instructions that are outlined in a frame "
40              "helper (default = 2)"));
41 
42 namespace {
43 
44 class AArch64LowerHomogeneousPE {
45 public:
46   const AArch64InstrInfo *TII;
47 
48   AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
49       : M(M), MMI(MMI) {}
50 
51   bool run();
52   bool runOnMachineFunction(MachineFunction &Fn);
53 
54 private:
55   Module *M;
56   MachineModuleInfo *MMI;
57 
58   bool runOnMBB(MachineBasicBlock &MBB);
59   bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
60                MachineBasicBlock::iterator &NextMBBI);
61 
62   /// Lower a HOM_Prolog pseudo instruction into a helper call
63   /// or a sequence of homogeneous stores.
64   /// When a fp setup follows, it can be optimized.
65   bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
66                    MachineBasicBlock::iterator &NextMBBI);
67   /// Lower a HOM_Epilog pseudo instruction into a helper call
68   /// or a sequence of homogeneous loads.
69   /// When a return follow, it can be optimized.
70   bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
71                    MachineBasicBlock::iterator &NextMBBI);
72 };
73 
74 class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
75 public:
76   static char ID;
77 
78   AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {
79     initializeAArch64LowerHomogeneousPrologEpilogPass(
80         *PassRegistry::getPassRegistry());
81   }
82   void getAnalysisUsage(AnalysisUsage &AU) const override {
83     AU.addRequired<MachineModuleInfoWrapperPass>();
84     AU.addPreserved<MachineModuleInfoWrapperPass>();
85     AU.setPreservesAll();
86     ModulePass::getAnalysisUsage(AU);
87   }
88   bool runOnModule(Module &M) override;
89 
90   StringRef getPassName() const override {
91     return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
92   }
93 };
94 
95 } // end anonymous namespace
96 
97 char AArch64LowerHomogeneousPrologEpilog::ID = 0;
98 
99 INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
100                 "aarch64-lower-homogeneous-prolog-epilog",
101                 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
102 
103 bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
104   if (skipModule(M))
105     return false;
106 
107   MachineModuleInfo *MMI =
108       &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
109   return AArch64LowerHomogeneousPE(&M, MMI).run();
110 }
111 
112 bool AArch64LowerHomogeneousPE::run() {
113   bool Changed = false;
114   for (auto &F : *M) {
115     if (F.empty())
116       continue;
117 
118     MachineFunction *MF = MMI->getMachineFunction(F);
119     if (!MF)
120       continue;
121     Changed |= runOnMachineFunction(*MF);
122   }
123 
124   return Changed;
125 }
126 enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
127 
128 /// Return a frame helper name with the given CSRs and the helper type.
129 /// For instance, a prolog helper that saves x19 and x20 is named as
130 /// OUTLINED_FUNCTION_PROLOG_x19x20.
131 static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
132                                       FrameHelperType Type, unsigned FpOffset) {
133   std::ostringstream RegStream;
134   switch (Type) {
135   case FrameHelperType::Prolog:
136     RegStream << "OUTLINED_FUNCTION_PROLOG_";
137     break;
138   case FrameHelperType::PrologFrame:
139     RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
140     break;
141   case FrameHelperType::Epilog:
142     RegStream << "OUTLINED_FUNCTION_EPILOG_";
143     break;
144   case FrameHelperType::EpilogTail:
145     RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
146     break;
147   }
148 
149   for (auto Reg : Regs) {
150     if (Reg == AArch64::NoRegister)
151       continue;
152     RegStream << AArch64InstPrinter::getRegisterName(Reg);
153   }
154 
155   return RegStream.str();
156 }
157 
158 /// Create a Function for the unique frame helper with the given name.
159 /// Return a newly created MachineFunction with an empty MachineBasicBlock.
160 static MachineFunction &createFrameHelperMachineFunction(Module *M,
161                                                          MachineModuleInfo *MMI,
162                                                          StringRef Name) {
163   LLVMContext &C = M->getContext();
164   Function *F = M->getFunction(Name);
165   assert(F == nullptr && "Function has been created before");
166   F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
167                        Function::ExternalLinkage, Name, M);
168   assert(F && "Function was null!");
169 
170   // Use ODR linkage to avoid duplication.
171   F->setLinkage(GlobalValue::LinkOnceODRLinkage);
172   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
173 
174   // Set no-opt/minsize, so we don't insert padding between outlined
175   // functions.
176   F->addFnAttr(Attribute::OptimizeNone);
177   F->addFnAttr(Attribute::NoInline);
178   F->addFnAttr(Attribute::MinSize);
179   F->addFnAttr(Attribute::Naked);
180 
181   MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
182   // Remove unnecessary register liveness and set NoVRegs.
183   MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
184   MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
185   MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
186   MF.getRegInfo().freezeReservedRegs(MF);
187 
188   // Create entry block.
189   BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
190   IRBuilder<> Builder(EntryBB);
191   Builder.CreateRetVoid();
192 
193   // Insert the new block into the function.
194   MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
195   MF.insert(MF.begin(), MBB);
196 
197   return MF;
198 }
199 
200 /// Emit a store-pair instruction for frame-setup.
201 /// If Reg2 is AArch64::NoRegister, emit STR instead.
202 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
203                       MachineBasicBlock::iterator Pos,
204                       const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
205                       int Offset, bool IsPreDec) {
206   assert(Reg1 != AArch64::NoRegister);
207   const bool IsPaired = Reg2 != AArch64::NoRegister;
208   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
209   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
210   unsigned Opc;
211   if (IsPreDec) {
212     if (IsFloat)
213       Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
214     else
215       Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
216   } else {
217     if (IsFloat)
218       Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
219     else
220       Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
221   }
222   // The implicit scale for Offset is 8.
223   TypeSize Scale(0U, false), Width(0U, false);
224   int64_t MinOffset, MaxOffset;
225   [[maybe_unused]] bool Success =
226       AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
227   assert(Success && "Invalid Opcode");
228   Offset *= (8 / (int)Scale);
229 
230   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
231   if (IsPreDec)
232     MIB.addDef(AArch64::SP);
233   if (IsPaired)
234     MIB.addReg(Reg2);
235   MIB.addReg(Reg1)
236       .addReg(AArch64::SP)
237       .addImm(Offset)
238       .setMIFlag(MachineInstr::FrameSetup);
239 }
240 
241 /// Emit a load-pair instruction for frame-destroy.
242 /// If Reg2 is AArch64::NoRegister, emit LDR instead.
243 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
244                      MachineBasicBlock::iterator Pos,
245                      const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
246                      int Offset, bool IsPostDec) {
247   assert(Reg1 != AArch64::NoRegister);
248   const bool IsPaired = Reg2 != AArch64::NoRegister;
249   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
250   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
251   unsigned Opc;
252   if (IsPostDec) {
253     if (IsFloat)
254       Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
255     else
256       Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
257   } else {
258     if (IsFloat)
259       Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
260     else
261       Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
262   }
263   // The implicit scale for Offset is 8.
264   TypeSize Scale(0U, false), Width(0U, false);
265   int64_t MinOffset, MaxOffset;
266   [[maybe_unused]] bool Success =
267       AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
268   assert(Success && "Invalid Opcode");
269   Offset *= (8 / (int)Scale);
270 
271   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
272   if (IsPostDec)
273     MIB.addDef(AArch64::SP);
274   if (IsPaired)
275     MIB.addReg(Reg2, getDefRegState(true));
276   MIB.addReg(Reg1, getDefRegState(true))
277       .addReg(AArch64::SP)
278       .addImm(Offset)
279       .setMIFlag(MachineInstr::FrameDestroy);
280 }
281 
282 /// Return a unique function if a helper can be formed with the given Regs
283 /// and frame type.
284 /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
285 ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
286 ///    stp x20, x19, [sp, #16]
287 ///    ret
288 ///
289 /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
290 ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
291 ///    stp x20, x19, [sp, #16]
292 ///    add fp, sp, #32
293 ///    ret
294 ///
295 /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
296 ///    mov x16, x30
297 ///    ldp x29, x30, [sp, #32]
298 ///    ldp x20, x19, [sp, #16]
299 ///    ldp x22, x21, [sp], #48
300 ///    ret x16
301 ///
302 /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
303 ///    ldp x29, x30, [sp, #32]
304 ///    ldp x20, x19, [sp, #16]
305 ///    ldp x22, x21, [sp], #48
306 ///    ret
307 /// @param M module
308 /// @param MMI machine module info
309 /// @param Regs callee save regs that the helper will handle
310 /// @param Type frame helper type
311 /// @return a helper function
312 static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
313                                         SmallVectorImpl<unsigned> &Regs,
314                                         FrameHelperType Type,
315                                         unsigned FpOffset = 0) {
316   assert(Regs.size() >= 2);
317   auto Name = getFrameHelperName(Regs, Type, FpOffset);
318   auto *F = M->getFunction(Name);
319   if (F)
320     return F;
321 
322   auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
323   MachineBasicBlock &MBB = *MF.begin();
324   const TargetSubtargetInfo &STI = MF.getSubtarget();
325   const TargetInstrInfo &TII = *STI.getInstrInfo();
326 
327   int Size = (int)Regs.size();
328   switch (Type) {
329   case FrameHelperType::Prolog:
330   case FrameHelperType::PrologFrame: {
331     // Compute the remaining SP adjust beyond FP/LR.
332     auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR));
333 
334     // If the register stored to the lowest address is not LR, we must subtract
335     // more from SP here.
336     if (LRIdx != Size - 2) {
337       assert(Regs[Size - 2] != AArch64::LR);
338       emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1],
339                 LRIdx - Size + 2, true);
340     }
341 
342     // Store CSRs in the reverse order.
343     for (int I = Size - 3; I >= 0; I -= 2) {
344       // FP/LR has been stored at call-site.
345       if (Regs[I - 1] == AArch64::LR)
346         continue;
347       emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1,
348                 false);
349     }
350     if (Type == FrameHelperType::PrologFrame)
351       BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri))
352           .addDef(AArch64::FP)
353           .addUse(AArch64::SP)
354           .addImm(FpOffset)
355           .addImm(0)
356           .setMIFlag(MachineInstr::FrameSetup);
357 
358     BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
359         .addReg(AArch64::LR);
360     break;
361   }
362   case FrameHelperType::Epilog:
363   case FrameHelperType::EpilogTail:
364     if (Type == FrameHelperType::Epilog)
365       // Stash LR to X16
366       BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs))
367           .addDef(AArch64::X16)
368           .addReg(AArch64::XZR)
369           .addUse(AArch64::LR)
370           .addImm(0);
371 
372     for (int I = 0; I < Size - 2; I += 2)
373       emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2,
374                false);
375     // Restore the last CSR with post-increment of SP.
376     emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size,
377              true);
378 
379     BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
380         .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
381     break;
382   }
383 
384   return M->getFunction(Name);
385 }
386 
387 /// This function checks if a frame helper should be used for
388 /// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
389 /// @param MBB machine basic block
390 /// @param NextMBBI  next instruction following HOM_Prolog/HOM_Epilog
391 /// @param Regs callee save registers that are saved or restored.
392 /// @param Type frame helper type
393 /// @return True if a use of helper is qualified.
394 static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
395                                  MachineBasicBlock::iterator &NextMBBI,
396                                  SmallVectorImpl<unsigned> &Regs,
397                                  FrameHelperType Type) {
398   const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
399   auto RegCount = Regs.size();
400   assert(RegCount > 0 && (RegCount % 2 == 0));
401   // # of instructions that will be outlined.
402   int InstCount = RegCount / 2;
403 
404   // Do not use a helper call when not saving LR.
405   if (!llvm::is_contained(Regs, AArch64::LR))
406     return false;
407 
408   switch (Type) {
409   case FrameHelperType::Prolog:
410     // Prolog helper cannot save FP/LR.
411     InstCount--;
412     break;
413   case FrameHelperType::PrologFrame: {
414     // Effecitvely no change in InstCount since FpAdjusment is included.
415     break;
416   }
417   case FrameHelperType::Epilog:
418     // Bail-out if X16 is live across the epilog helper because it is used in
419     // the helper to handle X30.
420     for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
421       if (NextMI->readsRegister(AArch64::W16, TRI))
422         return false;
423     }
424     // Epilog may not be in the last block. Check the liveness in successors.
425     for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
426       if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
427         return false;
428     }
429     // No change in InstCount for the regular epilog case.
430     break;
431   case FrameHelperType::EpilogTail: {
432     // EpilogTail helper includes the caller's return.
433     if (NextMBBI == MBB.end())
434       return false;
435     if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
436       return false;
437     InstCount++;
438     break;
439   }
440   }
441 
442   return InstCount >= FrameHelperSizeThreshold;
443 }
444 
445 /// Lower a HOM_Epilog pseudo instruction into a helper call while
446 /// creating the helper on demand. Or emit a sequence of loads in place when not
447 /// using a helper call.
448 ///
449 /// 1. With a helper including ret
450 ///    HOM_Epilog x30, x29, x19, x20, x21, x22              ; MBBI
451 ///    ret                                                  ; NextMBBI
452 ///    =>
453 ///    b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
454 ///    ...                                                  ; NextMBBI
455 ///
456 /// 2. With a helper
457 ///    HOM_Epilog x30, x29, x19, x20, x21, x22
458 ///    =>
459 ///    bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
460 ///
461 /// 3. Without a helper
462 ///    HOM_Epilog x30, x29, x19, x20, x21, x22
463 ///    =>
464 ///    ldp x29, x30, [sp, #32]
465 ///    ldp x20, x19, [sp, #16]
466 ///    ldp x22, x21, [sp], #48
467 bool AArch64LowerHomogeneousPE::lowerEpilog(
468     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
469     MachineBasicBlock::iterator &NextMBBI) {
470   auto &MF = *MBB.getParent();
471   MachineInstr &MI = *MBBI;
472 
473   DebugLoc DL = MI.getDebugLoc();
474   SmallVector<unsigned, 8> Regs;
475   bool HasUnpairedReg = false;
476   for (auto &MO : MI.operands())
477     if (MO.isReg()) {
478       if (!MO.getReg().isValid()) {
479         // For now we are only expecting unpaired GP registers which should
480         // occur exactly once.
481         assert(!HasUnpairedReg);
482         HasUnpairedReg = true;
483       }
484       Regs.push_back(MO.getReg());
485     }
486   (void)HasUnpairedReg;
487   int Size = (int)Regs.size();
488   if (Size == 0)
489     return false;
490   // Registers are in pair.
491   assert(Size % 2 == 0);
492   assert(MI.getOpcode() == AArch64::HOM_Epilog);
493 
494   auto Return = NextMBBI;
495   if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) {
496     // When MBB ends with a return, emit a tail-call to the epilog helper
497     auto *EpilogTailHelper =
498         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail);
499     BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi))
500         .addGlobalAddress(EpilogTailHelper)
501         .addImm(0)
502         .setMIFlag(MachineInstr::FrameDestroy)
503         .copyImplicitOps(MI)
504         .copyImplicitOps(*Return);
505     NextMBBI = std::next(Return);
506     Return->removeFromParent();
507   } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
508                                   FrameHelperType::Epilog)) {
509     // The default epilog helper case.
510     auto *EpilogHelper =
511         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog);
512     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
513         .addGlobalAddress(EpilogHelper)
514         .setMIFlag(MachineInstr::FrameDestroy)
515         .copyImplicitOps(MI);
516   } else {
517     // Fall back to no-helper.
518     for (int I = 0; I < Size - 2; I += 2)
519       emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false);
520     // Restore the last CSR with post-increment of SP.
521     emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true);
522   }
523 
524   MBBI->removeFromParent();
525   return true;
526 }
527 
528 /// Lower a HOM_Prolog pseudo instruction into a helper call while
529 /// creating the helper on demand. Or emit a sequence of stores in place when
530 /// not using a helper call.
531 ///
532 /// 1. With a helper including frame-setup
533 ///    HOM_Prolog x30, x29, x19, x20, x21, x22, 32
534 ///    =>
535 ///    stp x29, x30, [sp, #-16]!
536 ///    bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
537 ///
538 /// 2. With a helper
539 ///    HOM_Prolog x30, x29, x19, x20, x21, x22
540 ///    =>
541 ///    stp x29, x30, [sp, #-16]!
542 ///    bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
543 ///
544 /// 3. Without a helper
545 ///    HOM_Prolog x30, x29, x19, x20, x21, x22
546 ///    =>
547 ///    stp	x22, x21, [sp, #-48]!
548 ///    stp	x20, x19, [sp, #16]
549 ///    stp	x29, x30, [sp, #32]
550 bool AArch64LowerHomogeneousPE::lowerProlog(
551     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
552     MachineBasicBlock::iterator &NextMBBI) {
553   auto &MF = *MBB.getParent();
554   MachineInstr &MI = *MBBI;
555 
556   DebugLoc DL = MI.getDebugLoc();
557   SmallVector<unsigned, 8> Regs;
558   bool HasUnpairedReg = false;
559   int LRIdx = 0;
560   std::optional<int> FpOffset;
561   for (auto &MO : MI.operands()) {
562     if (MO.isReg()) {
563       if (MO.getReg().isValid()) {
564         if (MO.getReg() == AArch64::LR)
565           LRIdx = Regs.size();
566       } else {
567         // For now we are only expecting unpaired GP registers which should
568         // occur exactly once.
569         assert(!HasUnpairedReg);
570         HasUnpairedReg = true;
571       }
572       Regs.push_back(MO.getReg());
573     } else if (MO.isImm()) {
574       FpOffset = MO.getImm();
575     }
576   }
577   (void)HasUnpairedReg;
578   int Size = (int)Regs.size();
579   if (Size == 0)
580     return false;
581   // Allow compact unwind case only for oww.
582   assert(Size % 2 == 0);
583   assert(MI.getOpcode() == AArch64::HOM_Prolog);
584 
585   if (FpOffset &&
586       shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) {
587     // FP/LR is stored at the top of stack before the prolog helper call.
588     emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
589     auto *PrologFrameHelper = getOrCreateFrameHelper(
590         M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset);
591     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
592         .addGlobalAddress(PrologFrameHelper)
593         .setMIFlag(MachineInstr::FrameSetup)
594         .copyImplicitOps(MI)
595         .addReg(AArch64::FP, RegState::Implicit | RegState::Define)
596         .addReg(AArch64::SP, RegState::Implicit);
597   } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
598                                                FrameHelperType::Prolog)) {
599     // FP/LR is stored at the top of stack before the prolog helper call.
600     emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
601     auto *PrologHelper =
602         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog);
603     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
604         .addGlobalAddress(PrologHelper)
605         .setMIFlag(MachineInstr::FrameSetup)
606         .copyImplicitOps(MI);
607   } else {
608     // Fall back to no-helper.
609     emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true);
610     for (int I = Size - 3; I >= 0; I -= 2)
611       emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false);
612     if (FpOffset) {
613       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri))
614           .addDef(AArch64::FP)
615           .addUse(AArch64::SP)
616           .addImm(*FpOffset)
617           .addImm(0)
618           .setMIFlag(MachineInstr::FrameSetup);
619     }
620   }
621 
622   MBBI->removeFromParent();
623   return true;
624 }
625 
626 /// Process each machine instruction
627 /// @param MBB machine basic block
628 /// @param MBBI current instruction iterator
629 /// @param NextMBBI next instruction iterator which can be updated
630 /// @return True when IR is changed.
631 bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
632                                         MachineBasicBlock::iterator MBBI,
633                                         MachineBasicBlock::iterator &NextMBBI) {
634   MachineInstr &MI = *MBBI;
635   unsigned Opcode = MI.getOpcode();
636   switch (Opcode) {
637   default:
638     break;
639   case AArch64::HOM_Prolog:
640     return lowerProlog(MBB, MBBI, NextMBBI);
641   case AArch64::HOM_Epilog:
642     return lowerEpilog(MBB, MBBI, NextMBBI);
643   }
644   return false;
645 }
646 
647 bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
648   bool Modified = false;
649 
650   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
651   while (MBBI != E) {
652     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
653     Modified |= runOnMI(MBB, MBBI, NMBBI);
654     MBBI = NMBBI;
655   }
656 
657   return Modified;
658 }
659 
660 bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
661   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
662 
663   bool Modified = false;
664   for (auto &MBB : MF)
665     Modified |= runOnMBB(MBB);
666   return Modified;
667 }
668 
669 ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
670   return new AArch64LowerHomogeneousPrologEpilog();
671 }
672