1*fe6060f1SDimitry Andric //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===// 2*fe6060f1SDimitry Andric // 3*fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*fe6060f1SDimitry Andric // 7*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8*fe6060f1SDimitry Andric // 9*fe6060f1SDimitry Andric // This file contains a pass that lowers homogeneous prolog/epilog instructions. 10*fe6060f1SDimitry Andric // 11*fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 12*fe6060f1SDimitry Andric 13*fe6060f1SDimitry Andric #include "AArch64InstrInfo.h" 14*fe6060f1SDimitry Andric #include "AArch64Subtarget.h" 15*fe6060f1SDimitry Andric #include "MCTargetDesc/AArch64InstPrinter.h" 16*fe6060f1SDimitry Andric #include "Utils/AArch64BaseInfo.h" 17*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 18*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 19*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 20*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 21*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 22*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 23*fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 24*fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 25*fe6060f1SDimitry Andric #include "llvm/IR/DebugLoc.h" 26*fe6060f1SDimitry Andric #include "llvm/IR/IRBuilder.h" 27*fe6060f1SDimitry Andric #include "llvm/Pass.h" 28*fe6060f1SDimitry Andric #include "llvm/Support/raw_ostream.h" 29*fe6060f1SDimitry Andric #include <sstream> 30*fe6060f1SDimitry Andric 31*fe6060f1SDimitry Andric using namespace llvm; 32*fe6060f1SDimitry Andric 33*fe6060f1SDimitry Andric #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \ 34*fe6060f1SDimitry Andric "AArch64 homogeneous prolog/epilog lowering pass" 35*fe6060f1SDimitry Andric 36*fe6060f1SDimitry Andric cl::opt<int> FrameHelperSizeThreshold( 37*fe6060f1SDimitry Andric "frame-helper-size-threshold", cl::init(2), cl::Hidden, 38*fe6060f1SDimitry Andric cl::desc("The minimum number of instructions that are outlined in a frame " 39*fe6060f1SDimitry Andric "helper (default = 2)")); 40*fe6060f1SDimitry Andric 41*fe6060f1SDimitry Andric namespace { 42*fe6060f1SDimitry Andric 43*fe6060f1SDimitry Andric class AArch64LowerHomogeneousPE { 44*fe6060f1SDimitry Andric public: 45*fe6060f1SDimitry Andric const AArch64InstrInfo *TII; 46*fe6060f1SDimitry Andric 47*fe6060f1SDimitry Andric AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) 48*fe6060f1SDimitry Andric : M(M), MMI(MMI) {} 49*fe6060f1SDimitry Andric 50*fe6060f1SDimitry Andric bool run(); 51*fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &Fn); 52*fe6060f1SDimitry Andric 53*fe6060f1SDimitry Andric private: 54*fe6060f1SDimitry Andric Module *M; 55*fe6060f1SDimitry Andric MachineModuleInfo *MMI; 56*fe6060f1SDimitry Andric 57*fe6060f1SDimitry Andric bool runOnMBB(MachineBasicBlock &MBB); 58*fe6060f1SDimitry Andric bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 59*fe6060f1SDimitry Andric MachineBasicBlock::iterator &NextMBBI); 60*fe6060f1SDimitry Andric 61*fe6060f1SDimitry Andric /// Lower a HOM_Prolog pseudo instruction into a helper call 62*fe6060f1SDimitry Andric /// or a sequence of homogeneous stores. 63*fe6060f1SDimitry Andric /// When a a fp setup follows, it can be optimized. 64*fe6060f1SDimitry Andric bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 65*fe6060f1SDimitry Andric MachineBasicBlock::iterator &NextMBBI); 66*fe6060f1SDimitry Andric /// Lower a HOM_Epilog pseudo instruction into a helper call 67*fe6060f1SDimitry Andric /// or a sequence of homogeneous loads. 68*fe6060f1SDimitry Andric /// When a return follow, it can be optimized. 69*fe6060f1SDimitry Andric bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 70*fe6060f1SDimitry Andric MachineBasicBlock::iterator &NextMBBI); 71*fe6060f1SDimitry Andric }; 72*fe6060f1SDimitry Andric 73*fe6060f1SDimitry Andric class AArch64LowerHomogeneousPrologEpilog : public ModulePass { 74*fe6060f1SDimitry Andric public: 75*fe6060f1SDimitry Andric static char ID; 76*fe6060f1SDimitry Andric 77*fe6060f1SDimitry Andric AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) { 78*fe6060f1SDimitry Andric initializeAArch64LowerHomogeneousPrologEpilogPass( 79*fe6060f1SDimitry Andric *PassRegistry::getPassRegistry()); 80*fe6060f1SDimitry Andric } 81*fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 82*fe6060f1SDimitry Andric AU.addRequired<MachineModuleInfoWrapperPass>(); 83*fe6060f1SDimitry Andric AU.addPreserved<MachineModuleInfoWrapperPass>(); 84*fe6060f1SDimitry Andric AU.setPreservesAll(); 85*fe6060f1SDimitry Andric ModulePass::getAnalysisUsage(AU); 86*fe6060f1SDimitry Andric } 87*fe6060f1SDimitry Andric bool runOnModule(Module &M) override; 88*fe6060f1SDimitry Andric 89*fe6060f1SDimitry Andric StringRef getPassName() const override { 90*fe6060f1SDimitry Andric return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME; 91*fe6060f1SDimitry Andric } 92*fe6060f1SDimitry Andric }; 93*fe6060f1SDimitry Andric 94*fe6060f1SDimitry Andric } // end anonymous namespace 95*fe6060f1SDimitry Andric 96*fe6060f1SDimitry Andric char AArch64LowerHomogeneousPrologEpilog::ID = 0; 97*fe6060f1SDimitry Andric 98*fe6060f1SDimitry Andric INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, 99*fe6060f1SDimitry Andric "aarch64-lower-homogeneous-prolog-epilog", 100*fe6060f1SDimitry Andric AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) 101*fe6060f1SDimitry Andric 102*fe6060f1SDimitry Andric bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { 103*fe6060f1SDimitry Andric if (skipModule(M)) 104*fe6060f1SDimitry Andric return false; 105*fe6060f1SDimitry Andric 106*fe6060f1SDimitry Andric MachineModuleInfo *MMI = 107*fe6060f1SDimitry Andric &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); 108*fe6060f1SDimitry Andric return AArch64LowerHomogeneousPE(&M, MMI).run(); 109*fe6060f1SDimitry Andric } 110*fe6060f1SDimitry Andric 111*fe6060f1SDimitry Andric bool AArch64LowerHomogeneousPE::run() { 112*fe6060f1SDimitry Andric bool Changed = false; 113*fe6060f1SDimitry Andric for (auto &F : *M) { 114*fe6060f1SDimitry Andric if (F.empty()) 115*fe6060f1SDimitry Andric continue; 116*fe6060f1SDimitry Andric 117*fe6060f1SDimitry Andric MachineFunction *MF = MMI->getMachineFunction(F); 118*fe6060f1SDimitry Andric if (!MF) 119*fe6060f1SDimitry Andric continue; 120*fe6060f1SDimitry Andric Changed |= runOnMachineFunction(*MF); 121*fe6060f1SDimitry Andric } 122*fe6060f1SDimitry Andric 123*fe6060f1SDimitry Andric return Changed; 124*fe6060f1SDimitry Andric } 125*fe6060f1SDimitry Andric enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail }; 126*fe6060f1SDimitry Andric 127*fe6060f1SDimitry Andric /// Return a frame helper name with the given CSRs and the helper type. 128*fe6060f1SDimitry Andric /// For instance, a prolog helper that saves x19 and x20 is named as 129*fe6060f1SDimitry Andric /// OUTLINED_FUNCTION_PROLOG_x19x20. 130*fe6060f1SDimitry Andric static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs, 131*fe6060f1SDimitry Andric FrameHelperType Type, unsigned FpOffset) { 132*fe6060f1SDimitry Andric std::ostringstream RegStream; 133*fe6060f1SDimitry Andric switch (Type) { 134*fe6060f1SDimitry Andric case FrameHelperType::Prolog: 135*fe6060f1SDimitry Andric RegStream << "OUTLINED_FUNCTION_PROLOG_"; 136*fe6060f1SDimitry Andric break; 137*fe6060f1SDimitry Andric case FrameHelperType::PrologFrame: 138*fe6060f1SDimitry Andric RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_"; 139*fe6060f1SDimitry Andric break; 140*fe6060f1SDimitry Andric case FrameHelperType::Epilog: 141*fe6060f1SDimitry Andric RegStream << "OUTLINED_FUNCTION_EPILOG_"; 142*fe6060f1SDimitry Andric break; 143*fe6060f1SDimitry Andric case FrameHelperType::EpilogTail: 144*fe6060f1SDimitry Andric RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_"; 145*fe6060f1SDimitry Andric break; 146*fe6060f1SDimitry Andric } 147*fe6060f1SDimitry Andric 148*fe6060f1SDimitry Andric for (auto Reg : Regs) 149*fe6060f1SDimitry Andric RegStream << AArch64InstPrinter::getRegisterName(Reg); 150*fe6060f1SDimitry Andric 151*fe6060f1SDimitry Andric return RegStream.str(); 152*fe6060f1SDimitry Andric } 153*fe6060f1SDimitry Andric 154*fe6060f1SDimitry Andric /// Create a Function for the unique frame helper with the given name. 155*fe6060f1SDimitry Andric /// Return a newly created MachineFunction with an empty MachineBasicBlock. 156*fe6060f1SDimitry Andric static MachineFunction &createFrameHelperMachineFunction(Module *M, 157*fe6060f1SDimitry Andric MachineModuleInfo *MMI, 158*fe6060f1SDimitry Andric StringRef Name) { 159*fe6060f1SDimitry Andric LLVMContext &C = M->getContext(); 160*fe6060f1SDimitry Andric Function *F = M->getFunction(Name); 161*fe6060f1SDimitry Andric assert(F == nullptr && "Function has been created before"); 162*fe6060f1SDimitry Andric F = Function::Create(FunctionType::get(Type::getVoidTy(C), false), 163*fe6060f1SDimitry Andric Function::ExternalLinkage, Name, M); 164*fe6060f1SDimitry Andric assert(F && "Function was null!"); 165*fe6060f1SDimitry Andric 166*fe6060f1SDimitry Andric // Use ODR linkage to avoid duplication. 167*fe6060f1SDimitry Andric F->setLinkage(GlobalValue::LinkOnceODRLinkage); 168*fe6060f1SDimitry Andric F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 169*fe6060f1SDimitry Andric 170*fe6060f1SDimitry Andric // Set no-opt/minsize, so we don't insert padding between outlined 171*fe6060f1SDimitry Andric // functions. 172*fe6060f1SDimitry Andric F->addFnAttr(Attribute::OptimizeNone); 173*fe6060f1SDimitry Andric F->addFnAttr(Attribute::NoInline); 174*fe6060f1SDimitry Andric F->addFnAttr(Attribute::MinSize); 175*fe6060f1SDimitry Andric F->addFnAttr(Attribute::Naked); 176*fe6060f1SDimitry Andric 177*fe6060f1SDimitry Andric MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); 178*fe6060f1SDimitry Andric // Remove unnecessary register liveness and set NoVRegs. 179*fe6060f1SDimitry Andric MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); 180*fe6060f1SDimitry Andric MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); 181*fe6060f1SDimitry Andric MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); 182*fe6060f1SDimitry Andric MF.getRegInfo().freezeReservedRegs(MF); 183*fe6060f1SDimitry Andric 184*fe6060f1SDimitry Andric // Create entry block. 185*fe6060f1SDimitry Andric BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); 186*fe6060f1SDimitry Andric IRBuilder<> Builder(EntryBB); 187*fe6060f1SDimitry Andric Builder.CreateRetVoid(); 188*fe6060f1SDimitry Andric 189*fe6060f1SDimitry Andric // Insert the new block into the function. 190*fe6060f1SDimitry Andric MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); 191*fe6060f1SDimitry Andric MF.insert(MF.begin(), MBB); 192*fe6060f1SDimitry Andric 193*fe6060f1SDimitry Andric return MF; 194*fe6060f1SDimitry Andric } 195*fe6060f1SDimitry Andric 196*fe6060f1SDimitry Andric /// Emit a store-pair instruction for frame-setup. 197*fe6060f1SDimitry Andric static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, 198*fe6060f1SDimitry Andric MachineBasicBlock::iterator Pos, 199*fe6060f1SDimitry Andric const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, 200*fe6060f1SDimitry Andric int Offset, bool IsPreDec) { 201*fe6060f1SDimitry Andric bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); 202*fe6060f1SDimitry Andric assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); 203*fe6060f1SDimitry Andric unsigned Opc; 204*fe6060f1SDimitry Andric if (IsPreDec) 205*fe6060f1SDimitry Andric Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre; 206*fe6060f1SDimitry Andric else 207*fe6060f1SDimitry Andric Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi; 208*fe6060f1SDimitry Andric 209*fe6060f1SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); 210*fe6060f1SDimitry Andric if (IsPreDec) 211*fe6060f1SDimitry Andric MIB.addDef(AArch64::SP); 212*fe6060f1SDimitry Andric MIB.addReg(Reg2) 213*fe6060f1SDimitry Andric .addReg(Reg1) 214*fe6060f1SDimitry Andric .addReg(AArch64::SP) 215*fe6060f1SDimitry Andric .addImm(Offset) 216*fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 217*fe6060f1SDimitry Andric } 218*fe6060f1SDimitry Andric 219*fe6060f1SDimitry Andric /// Emit a load-pair instruction for frame-destroy. 220*fe6060f1SDimitry Andric static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, 221*fe6060f1SDimitry Andric MachineBasicBlock::iterator Pos, 222*fe6060f1SDimitry Andric const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, 223*fe6060f1SDimitry Andric int Offset, bool IsPostDec) { 224*fe6060f1SDimitry Andric bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); 225*fe6060f1SDimitry Andric assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); 226*fe6060f1SDimitry Andric unsigned Opc; 227*fe6060f1SDimitry Andric if (IsPostDec) 228*fe6060f1SDimitry Andric Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost; 229*fe6060f1SDimitry Andric else 230*fe6060f1SDimitry Andric Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi; 231*fe6060f1SDimitry Andric 232*fe6060f1SDimitry Andric MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); 233*fe6060f1SDimitry Andric if (IsPostDec) 234*fe6060f1SDimitry Andric MIB.addDef(AArch64::SP); 235*fe6060f1SDimitry Andric MIB.addReg(Reg2, getDefRegState(true)) 236*fe6060f1SDimitry Andric .addReg(Reg1, getDefRegState(true)) 237*fe6060f1SDimitry Andric .addReg(AArch64::SP) 238*fe6060f1SDimitry Andric .addImm(Offset) 239*fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy); 240*fe6060f1SDimitry Andric } 241*fe6060f1SDimitry Andric 242*fe6060f1SDimitry Andric /// Return a unique function if a helper can be formed with the given Regs 243*fe6060f1SDimitry Andric /// and frame type. 244*fe6060f1SDimitry Andric /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22: 245*fe6060f1SDimitry Andric /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller 246*fe6060f1SDimitry Andric /// stp x20, x19, [sp, #16] 247*fe6060f1SDimitry Andric /// ret 248*fe6060f1SDimitry Andric /// 249*fe6060f1SDimitry Andric /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22: 250*fe6060f1SDimitry Andric /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller 251*fe6060f1SDimitry Andric /// stp x20, x19, [sp, #16] 252*fe6060f1SDimitry Andric /// add fp, sp, #32 253*fe6060f1SDimitry Andric /// ret 254*fe6060f1SDimitry Andric /// 255*fe6060f1SDimitry Andric /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22: 256*fe6060f1SDimitry Andric /// mov x16, x30 257*fe6060f1SDimitry Andric /// ldp x29, x30, [sp, #32] 258*fe6060f1SDimitry Andric /// ldp x20, x19, [sp, #16] 259*fe6060f1SDimitry Andric /// ldp x22, x21, [sp], #48 260*fe6060f1SDimitry Andric /// ret x16 261*fe6060f1SDimitry Andric /// 262*fe6060f1SDimitry Andric /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22: 263*fe6060f1SDimitry Andric /// ldp x29, x30, [sp, #32] 264*fe6060f1SDimitry Andric /// ldp x20, x19, [sp, #16] 265*fe6060f1SDimitry Andric /// ldp x22, x21, [sp], #48 266*fe6060f1SDimitry Andric /// ret 267*fe6060f1SDimitry Andric /// @param M module 268*fe6060f1SDimitry Andric /// @param MMI machine module info 269*fe6060f1SDimitry Andric /// @param Regs callee save regs that the helper will handle 270*fe6060f1SDimitry Andric /// @param Type frame helper type 271*fe6060f1SDimitry Andric /// @return a helper function 272*fe6060f1SDimitry Andric static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, 273*fe6060f1SDimitry Andric SmallVectorImpl<unsigned> &Regs, 274*fe6060f1SDimitry Andric FrameHelperType Type, 275*fe6060f1SDimitry Andric unsigned FpOffset = 0) { 276*fe6060f1SDimitry Andric assert(Regs.size() >= 2); 277*fe6060f1SDimitry Andric auto Name = getFrameHelperName(Regs, Type, FpOffset); 278*fe6060f1SDimitry Andric auto *F = M->getFunction(Name); 279*fe6060f1SDimitry Andric if (F) 280*fe6060f1SDimitry Andric return F; 281*fe6060f1SDimitry Andric 282*fe6060f1SDimitry Andric auto &MF = createFrameHelperMachineFunction(M, MMI, Name); 283*fe6060f1SDimitry Andric MachineBasicBlock &MBB = *MF.begin(); 284*fe6060f1SDimitry Andric const TargetSubtargetInfo &STI = MF.getSubtarget(); 285*fe6060f1SDimitry Andric const TargetInstrInfo &TII = *STI.getInstrInfo(); 286*fe6060f1SDimitry Andric 287*fe6060f1SDimitry Andric int Size = (int)Regs.size(); 288*fe6060f1SDimitry Andric switch (Type) { 289*fe6060f1SDimitry Andric case FrameHelperType::Prolog: 290*fe6060f1SDimitry Andric case FrameHelperType::PrologFrame: { 291*fe6060f1SDimitry Andric // Compute the remaining SP adjust beyond FP/LR. 292*fe6060f1SDimitry Andric auto LRIdx = std::distance( 293*fe6060f1SDimitry Andric Regs.begin(), std::find(Regs.begin(), Regs.end(), AArch64::LR)); 294*fe6060f1SDimitry Andric 295*fe6060f1SDimitry Andric // If the register stored to the lowest address is not LR, we must subtract 296*fe6060f1SDimitry Andric // more from SP here. 297*fe6060f1SDimitry Andric if (LRIdx != Size - 2) { 298*fe6060f1SDimitry Andric assert(Regs[Size - 2] != AArch64::LR); 299*fe6060f1SDimitry Andric emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], 300*fe6060f1SDimitry Andric LRIdx - Size + 2, true); 301*fe6060f1SDimitry Andric } 302*fe6060f1SDimitry Andric 303*fe6060f1SDimitry Andric // Store CSRs in the reverse order. 304*fe6060f1SDimitry Andric for (int I = Size - 3; I >= 0; I -= 2) { 305*fe6060f1SDimitry Andric // FP/LR has been stored at call-site. 306*fe6060f1SDimitry Andric if (Regs[I - 1] == AArch64::LR) 307*fe6060f1SDimitry Andric continue; 308*fe6060f1SDimitry Andric emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1, 309*fe6060f1SDimitry Andric false); 310*fe6060f1SDimitry Andric } 311*fe6060f1SDimitry Andric if (Type == FrameHelperType::PrologFrame) 312*fe6060f1SDimitry Andric BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri)) 313*fe6060f1SDimitry Andric .addDef(AArch64::FP) 314*fe6060f1SDimitry Andric .addUse(AArch64::SP) 315*fe6060f1SDimitry Andric .addImm(FpOffset) 316*fe6060f1SDimitry Andric .addImm(0) 317*fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 318*fe6060f1SDimitry Andric 319*fe6060f1SDimitry Andric BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) 320*fe6060f1SDimitry Andric .addReg(AArch64::LR); 321*fe6060f1SDimitry Andric break; 322*fe6060f1SDimitry Andric } 323*fe6060f1SDimitry Andric case FrameHelperType::Epilog: 324*fe6060f1SDimitry Andric case FrameHelperType::EpilogTail: 325*fe6060f1SDimitry Andric if (Type == FrameHelperType::Epilog) 326*fe6060f1SDimitry Andric // Stash LR to X16 327*fe6060f1SDimitry Andric BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs)) 328*fe6060f1SDimitry Andric .addDef(AArch64::X16) 329*fe6060f1SDimitry Andric .addReg(AArch64::XZR) 330*fe6060f1SDimitry Andric .addUse(AArch64::LR) 331*fe6060f1SDimitry Andric .addImm(0); 332*fe6060f1SDimitry Andric 333*fe6060f1SDimitry Andric for (int I = 0; I < Size - 2; I += 2) 334*fe6060f1SDimitry Andric emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2, 335*fe6060f1SDimitry Andric false); 336*fe6060f1SDimitry Andric // Restore the last CSR with post-increment of SP. 337*fe6060f1SDimitry Andric emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size, 338*fe6060f1SDimitry Andric true); 339*fe6060f1SDimitry Andric 340*fe6060f1SDimitry Andric BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) 341*fe6060f1SDimitry Andric .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR); 342*fe6060f1SDimitry Andric break; 343*fe6060f1SDimitry Andric } 344*fe6060f1SDimitry Andric 345*fe6060f1SDimitry Andric return M->getFunction(Name); 346*fe6060f1SDimitry Andric } 347*fe6060f1SDimitry Andric 348*fe6060f1SDimitry Andric /// This function checks if a frame helper should be used for 349*fe6060f1SDimitry Andric /// HOM_Prolog/HOM_Epilog pseudo instruction expansion. 350*fe6060f1SDimitry Andric /// @param MBB machine basic block 351*fe6060f1SDimitry Andric /// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog 352*fe6060f1SDimitry Andric /// @param Regs callee save registers that are saved or restored. 353*fe6060f1SDimitry Andric /// @param Type frame helper type 354*fe6060f1SDimitry Andric /// @return True if a use of helper is qualified. 355*fe6060f1SDimitry Andric static bool shouldUseFrameHelper(MachineBasicBlock &MBB, 356*fe6060f1SDimitry Andric MachineBasicBlock::iterator &NextMBBI, 357*fe6060f1SDimitry Andric SmallVectorImpl<unsigned> &Regs, 358*fe6060f1SDimitry Andric FrameHelperType Type) { 359*fe6060f1SDimitry Andric const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 360*fe6060f1SDimitry Andric auto RegCount = Regs.size(); 361*fe6060f1SDimitry Andric assert(RegCount > 0 && (RegCount % 2 == 0)); 362*fe6060f1SDimitry Andric // # of instructions that will be outlined. 363*fe6060f1SDimitry Andric int InstCount = RegCount / 2; 364*fe6060f1SDimitry Andric 365*fe6060f1SDimitry Andric // Do not use a helper call when not saving LR. 366*fe6060f1SDimitry Andric if (std::find(Regs.begin(), Regs.end(), AArch64::LR) == Regs.end()) 367*fe6060f1SDimitry Andric return false; 368*fe6060f1SDimitry Andric 369*fe6060f1SDimitry Andric switch (Type) { 370*fe6060f1SDimitry Andric case FrameHelperType::Prolog: 371*fe6060f1SDimitry Andric // Prolog helper cannot save FP/LR. 372*fe6060f1SDimitry Andric InstCount--; 373*fe6060f1SDimitry Andric break; 374*fe6060f1SDimitry Andric case FrameHelperType::PrologFrame: { 375*fe6060f1SDimitry Andric // Effecitvely no change in InstCount since FpAdjusment is included. 376*fe6060f1SDimitry Andric break; 377*fe6060f1SDimitry Andric } 378*fe6060f1SDimitry Andric case FrameHelperType::Epilog: 379*fe6060f1SDimitry Andric // Bail-out if X16 is live across the epilog helper because it is used in 380*fe6060f1SDimitry Andric // the helper to handle X30. 381*fe6060f1SDimitry Andric for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) { 382*fe6060f1SDimitry Andric if (NextMI->readsRegister(AArch64::W16, TRI)) 383*fe6060f1SDimitry Andric return false; 384*fe6060f1SDimitry Andric } 385*fe6060f1SDimitry Andric // Epilog may not be in the last block. Check the liveness in successors. 386*fe6060f1SDimitry Andric for (const MachineBasicBlock *SuccMBB : MBB.successors()) { 387*fe6060f1SDimitry Andric if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16)) 388*fe6060f1SDimitry Andric return false; 389*fe6060f1SDimitry Andric } 390*fe6060f1SDimitry Andric // No change in InstCount for the regular epilog case. 391*fe6060f1SDimitry Andric break; 392*fe6060f1SDimitry Andric case FrameHelperType::EpilogTail: { 393*fe6060f1SDimitry Andric // EpilogTail helper includes the caller's return. 394*fe6060f1SDimitry Andric if (NextMBBI == MBB.end()) 395*fe6060f1SDimitry Andric return false; 396*fe6060f1SDimitry Andric if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR) 397*fe6060f1SDimitry Andric return false; 398*fe6060f1SDimitry Andric InstCount++; 399*fe6060f1SDimitry Andric break; 400*fe6060f1SDimitry Andric } 401*fe6060f1SDimitry Andric } 402*fe6060f1SDimitry Andric 403*fe6060f1SDimitry Andric return InstCount >= FrameHelperSizeThreshold; 404*fe6060f1SDimitry Andric } 405*fe6060f1SDimitry Andric 406*fe6060f1SDimitry Andric /// Lower a HOM_Epilog pseudo instruction into a helper call while 407*fe6060f1SDimitry Andric /// creating the helper on demand. Or emit a sequence of loads in place when not 408*fe6060f1SDimitry Andric /// using a helper call. 409*fe6060f1SDimitry Andric /// 410*fe6060f1SDimitry Andric /// 1. With a helper including ret 411*fe6060f1SDimitry Andric /// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI 412*fe6060f1SDimitry Andric /// ret ; NextMBBI 413*fe6060f1SDimitry Andric /// => 414*fe6060f1SDimitry Andric /// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22 415*fe6060f1SDimitry Andric /// ... ; NextMBBI 416*fe6060f1SDimitry Andric /// 417*fe6060f1SDimitry Andric /// 2. With a helper 418*fe6060f1SDimitry Andric /// HOM_Epilog x30, x29, x19, x20, x21, x22 419*fe6060f1SDimitry Andric /// => 420*fe6060f1SDimitry Andric /// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22 421*fe6060f1SDimitry Andric /// 422*fe6060f1SDimitry Andric /// 3. Without a helper 423*fe6060f1SDimitry Andric /// HOM_Epilog x30, x29, x19, x20, x21, x22 424*fe6060f1SDimitry Andric /// => 425*fe6060f1SDimitry Andric /// ldp x29, x30, [sp, #32] 426*fe6060f1SDimitry Andric /// ldp x20, x19, [sp, #16] 427*fe6060f1SDimitry Andric /// ldp x22, x21, [sp], #48 428*fe6060f1SDimitry Andric bool AArch64LowerHomogeneousPE::lowerEpilog( 429*fe6060f1SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 430*fe6060f1SDimitry Andric MachineBasicBlock::iterator &NextMBBI) { 431*fe6060f1SDimitry Andric auto &MF = *MBB.getParent(); 432*fe6060f1SDimitry Andric MachineInstr &MI = *MBBI; 433*fe6060f1SDimitry Andric 434*fe6060f1SDimitry Andric DebugLoc DL = MI.getDebugLoc(); 435*fe6060f1SDimitry Andric SmallVector<unsigned, 8> Regs; 436*fe6060f1SDimitry Andric for (auto &MO : MI.operands()) 437*fe6060f1SDimitry Andric if (MO.isReg()) 438*fe6060f1SDimitry Andric Regs.push_back(MO.getReg()); 439*fe6060f1SDimitry Andric int Size = (int)Regs.size(); 440*fe6060f1SDimitry Andric if (Size == 0) 441*fe6060f1SDimitry Andric return false; 442*fe6060f1SDimitry Andric // Registers are in pair. 443*fe6060f1SDimitry Andric assert(Size % 2 == 0); 444*fe6060f1SDimitry Andric assert(MI.getOpcode() == AArch64::HOM_Epilog); 445*fe6060f1SDimitry Andric 446*fe6060f1SDimitry Andric auto Return = NextMBBI; 447*fe6060f1SDimitry Andric if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) { 448*fe6060f1SDimitry Andric // When MBB ends with a return, emit a tail-call to the epilog helper 449*fe6060f1SDimitry Andric auto *EpilogTailHelper = 450*fe6060f1SDimitry Andric getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail); 451*fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi)) 452*fe6060f1SDimitry Andric .addGlobalAddress(EpilogTailHelper) 453*fe6060f1SDimitry Andric .addImm(0) 454*fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy) 455*fe6060f1SDimitry Andric .copyImplicitOps(MI) 456*fe6060f1SDimitry Andric .copyImplicitOps(*Return); 457*fe6060f1SDimitry Andric NextMBBI = std::next(Return); 458*fe6060f1SDimitry Andric Return->removeFromParent(); 459*fe6060f1SDimitry Andric } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs, 460*fe6060f1SDimitry Andric FrameHelperType::Epilog)) { 461*fe6060f1SDimitry Andric // The default epilog helper case. 462*fe6060f1SDimitry Andric auto *EpilogHelper = 463*fe6060f1SDimitry Andric getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog); 464*fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 465*fe6060f1SDimitry Andric .addGlobalAddress(EpilogHelper) 466*fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy) 467*fe6060f1SDimitry Andric .copyImplicitOps(MI); 468*fe6060f1SDimitry Andric } else { 469*fe6060f1SDimitry Andric // Fall back to no-helper. 470*fe6060f1SDimitry Andric for (int I = 0; I < Size - 2; I += 2) 471*fe6060f1SDimitry Andric emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false); 472*fe6060f1SDimitry Andric // Restore the last CSR with post-increment of SP. 473*fe6060f1SDimitry Andric emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true); 474*fe6060f1SDimitry Andric } 475*fe6060f1SDimitry Andric 476*fe6060f1SDimitry Andric MBBI->removeFromParent(); 477*fe6060f1SDimitry Andric return true; 478*fe6060f1SDimitry Andric } 479*fe6060f1SDimitry Andric 480*fe6060f1SDimitry Andric /// Lower a HOM_Prolog pseudo instruction into a helper call while 481*fe6060f1SDimitry Andric /// creating the helper on demand. Or emit a sequence of stores in place when 482*fe6060f1SDimitry Andric /// not using a helper call. 483*fe6060f1SDimitry Andric /// 484*fe6060f1SDimitry Andric /// 1. With a helper including frame-setup 485*fe6060f1SDimitry Andric /// HOM_Prolog x30, x29, x19, x20, x21, x22, 32 486*fe6060f1SDimitry Andric /// => 487*fe6060f1SDimitry Andric /// stp x29, x30, [sp, #-16]! 488*fe6060f1SDimitry Andric /// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22 489*fe6060f1SDimitry Andric /// 490*fe6060f1SDimitry Andric /// 2. With a helper 491*fe6060f1SDimitry Andric /// HOM_Prolog x30, x29, x19, x20, x21, x22 492*fe6060f1SDimitry Andric /// => 493*fe6060f1SDimitry Andric /// stp x29, x30, [sp, #-16]! 494*fe6060f1SDimitry Andric /// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22 495*fe6060f1SDimitry Andric /// 496*fe6060f1SDimitry Andric /// 3. Without a helper 497*fe6060f1SDimitry Andric /// HOM_Prolog x30, x29, x19, x20, x21, x22 498*fe6060f1SDimitry Andric /// => 499*fe6060f1SDimitry Andric /// stp x22, x21, [sp, #-48]! 500*fe6060f1SDimitry Andric /// stp x20, x19, [sp, #16] 501*fe6060f1SDimitry Andric /// stp x29, x30, [sp, #32] 502*fe6060f1SDimitry Andric bool AArch64LowerHomogeneousPE::lowerProlog( 503*fe6060f1SDimitry Andric MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 504*fe6060f1SDimitry Andric MachineBasicBlock::iterator &NextMBBI) { 505*fe6060f1SDimitry Andric auto &MF = *MBB.getParent(); 506*fe6060f1SDimitry Andric MachineInstr &MI = *MBBI; 507*fe6060f1SDimitry Andric 508*fe6060f1SDimitry Andric DebugLoc DL = MI.getDebugLoc(); 509*fe6060f1SDimitry Andric SmallVector<unsigned, 8> Regs; 510*fe6060f1SDimitry Andric int LRIdx = 0; 511*fe6060f1SDimitry Andric Optional<int> FpOffset; 512*fe6060f1SDimitry Andric for (auto &MO : MI.operands()) { 513*fe6060f1SDimitry Andric if (MO.isReg()) { 514*fe6060f1SDimitry Andric if (MO.getReg() == AArch64::LR) 515*fe6060f1SDimitry Andric LRIdx = Regs.size(); 516*fe6060f1SDimitry Andric Regs.push_back(MO.getReg()); 517*fe6060f1SDimitry Andric } else if (MO.isImm()) { 518*fe6060f1SDimitry Andric FpOffset = MO.getImm(); 519*fe6060f1SDimitry Andric } 520*fe6060f1SDimitry Andric } 521*fe6060f1SDimitry Andric int Size = (int)Regs.size(); 522*fe6060f1SDimitry Andric if (Size == 0) 523*fe6060f1SDimitry Andric return false; 524*fe6060f1SDimitry Andric // Allow compact unwind case only for oww. 525*fe6060f1SDimitry Andric assert(Size % 2 == 0); 526*fe6060f1SDimitry Andric assert(MI.getOpcode() == AArch64::HOM_Prolog); 527*fe6060f1SDimitry Andric 528*fe6060f1SDimitry Andric if (FpOffset && 529*fe6060f1SDimitry Andric shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) { 530*fe6060f1SDimitry Andric // FP/LR is stored at the top of stack before the prolog helper call. 531*fe6060f1SDimitry Andric emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); 532*fe6060f1SDimitry Andric auto *PrologFrameHelper = getOrCreateFrameHelper( 533*fe6060f1SDimitry Andric M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset); 534*fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 535*fe6060f1SDimitry Andric .addGlobalAddress(PrologFrameHelper) 536*fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup) 537*fe6060f1SDimitry Andric .copyImplicitOps(MI) 538*fe6060f1SDimitry Andric .addReg(AArch64::FP, RegState::Implicit | RegState::Define) 539*fe6060f1SDimitry Andric .addReg(AArch64::SP, RegState::Implicit); 540*fe6060f1SDimitry Andric } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs, 541*fe6060f1SDimitry Andric FrameHelperType::Prolog)) { 542*fe6060f1SDimitry Andric // FP/LR is stored at the top of stack before the prolog helper call. 543*fe6060f1SDimitry Andric emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); 544*fe6060f1SDimitry Andric auto *PrologHelper = 545*fe6060f1SDimitry Andric getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog); 546*fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 547*fe6060f1SDimitry Andric .addGlobalAddress(PrologHelper) 548*fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup) 549*fe6060f1SDimitry Andric .copyImplicitOps(MI); 550*fe6060f1SDimitry Andric } else { 551*fe6060f1SDimitry Andric // Fall back to no-helper. 552*fe6060f1SDimitry Andric emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true); 553*fe6060f1SDimitry Andric for (int I = Size - 3; I >= 0; I -= 2) 554*fe6060f1SDimitry Andric emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false); 555*fe6060f1SDimitry Andric if (FpOffset) { 556*fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri)) 557*fe6060f1SDimitry Andric .addDef(AArch64::FP) 558*fe6060f1SDimitry Andric .addUse(AArch64::SP) 559*fe6060f1SDimitry Andric .addImm(*FpOffset) 560*fe6060f1SDimitry Andric .addImm(0) 561*fe6060f1SDimitry Andric .setMIFlag(MachineInstr::FrameSetup); 562*fe6060f1SDimitry Andric } 563*fe6060f1SDimitry Andric } 564*fe6060f1SDimitry Andric 565*fe6060f1SDimitry Andric MBBI->removeFromParent(); 566*fe6060f1SDimitry Andric return true; 567*fe6060f1SDimitry Andric } 568*fe6060f1SDimitry Andric 569*fe6060f1SDimitry Andric /// Process each machine instruction 570*fe6060f1SDimitry Andric /// @param MBB machine basic block 571*fe6060f1SDimitry Andric /// @param MBBI current instruction iterator 572*fe6060f1SDimitry Andric /// @param NextMBBI next instruction iterator which can be updated 573*fe6060f1SDimitry Andric /// @return True when IR is changed. 574*fe6060f1SDimitry Andric bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, 575*fe6060f1SDimitry Andric MachineBasicBlock::iterator MBBI, 576*fe6060f1SDimitry Andric MachineBasicBlock::iterator &NextMBBI) { 577*fe6060f1SDimitry Andric MachineInstr &MI = *MBBI; 578*fe6060f1SDimitry Andric unsigned Opcode = MI.getOpcode(); 579*fe6060f1SDimitry Andric switch (Opcode) { 580*fe6060f1SDimitry Andric default: 581*fe6060f1SDimitry Andric break; 582*fe6060f1SDimitry Andric case AArch64::HOM_Prolog: 583*fe6060f1SDimitry Andric return lowerProlog(MBB, MBBI, NextMBBI); 584*fe6060f1SDimitry Andric case AArch64::HOM_Epilog: 585*fe6060f1SDimitry Andric return lowerEpilog(MBB, MBBI, NextMBBI); 586*fe6060f1SDimitry Andric } 587*fe6060f1SDimitry Andric return false; 588*fe6060f1SDimitry Andric } 589*fe6060f1SDimitry Andric 590*fe6060f1SDimitry Andric bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { 591*fe6060f1SDimitry Andric bool Modified = false; 592*fe6060f1SDimitry Andric 593*fe6060f1SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 594*fe6060f1SDimitry Andric while (MBBI != E) { 595*fe6060f1SDimitry Andric MachineBasicBlock::iterator NMBBI = std::next(MBBI); 596*fe6060f1SDimitry Andric Modified |= runOnMI(MBB, MBBI, NMBBI); 597*fe6060f1SDimitry Andric MBBI = NMBBI; 598*fe6060f1SDimitry Andric } 599*fe6060f1SDimitry Andric 600*fe6060f1SDimitry Andric return Modified; 601*fe6060f1SDimitry Andric } 602*fe6060f1SDimitry Andric 603*fe6060f1SDimitry Andric bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { 604*fe6060f1SDimitry Andric TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 605*fe6060f1SDimitry Andric 606*fe6060f1SDimitry Andric bool Modified = false; 607*fe6060f1SDimitry Andric for (auto &MBB : MF) 608*fe6060f1SDimitry Andric Modified |= runOnMBB(MBB); 609*fe6060f1SDimitry Andric return Modified; 610*fe6060f1SDimitry Andric } 611*fe6060f1SDimitry Andric 612*fe6060f1SDimitry Andric ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { 613*fe6060f1SDimitry Andric return new AArch64LowerHomogeneousPrologEpilog(); 614*fe6060f1SDimitry Andric } 615