1 //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that lowers homogeneous prolog/epilog instructions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64Subtarget.h" 15 #include "MCTargetDesc/AArch64InstPrinter.h" 16 #include "Utils/AArch64BaseInfo.h" 17 #include "llvm/CodeGen/MachineBasicBlock.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 #include "llvm/CodeGen/MachineInstr.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineModuleInfo.h" 23 #include "llvm/CodeGen/MachineOperand.h" 24 #include "llvm/CodeGen/TargetSubtargetInfo.h" 25 #include "llvm/IR/DebugLoc.h" 26 #include "llvm/IR/IRBuilder.h" 27 #include "llvm/Pass.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <optional> 30 #include <sstream> 31 32 using namespace llvm; 33 34 #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \ 35 "AArch64 homogeneous prolog/epilog lowering pass" 36 37 cl::opt<int> FrameHelperSizeThreshold( 38 "frame-helper-size-threshold", cl::init(2), cl::Hidden, 39 cl::desc("The minimum number of instructions that are outlined in a frame " 40 "helper (default = 2)")); 41 42 namespace { 43 44 class AArch64LowerHomogeneousPE { 45 public: 46 const AArch64InstrInfo *TII; 47 48 AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) 49 : M(M), MMI(MMI) {} 50 51 bool run(); 52 bool runOnMachineFunction(MachineFunction &Fn); 53 54 private: 55 Module *M; 56 MachineModuleInfo *MMI; 57 58 bool runOnMBB(MachineBasicBlock &MBB); 59 bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 60 MachineBasicBlock::iterator &NextMBBI); 61 62 /// Lower a HOM_Prolog pseudo instruction into a helper call 63 /// or a sequence of homogeneous stores. 64 /// When a fp setup follows, it can be optimized. 65 bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 66 MachineBasicBlock::iterator &NextMBBI); 67 /// Lower a HOM_Epilog pseudo instruction into a helper call 68 /// or a sequence of homogeneous loads. 69 /// When a return follow, it can be optimized. 70 bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 71 MachineBasicBlock::iterator &NextMBBI); 72 }; 73 74 class AArch64LowerHomogeneousPrologEpilog : public ModulePass { 75 public: 76 static char ID; 77 78 AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) { 79 initializeAArch64LowerHomogeneousPrologEpilogPass( 80 *PassRegistry::getPassRegistry()); 81 } 82 void getAnalysisUsage(AnalysisUsage &AU) const override { 83 AU.addRequired<MachineModuleInfoWrapperPass>(); 84 AU.addPreserved<MachineModuleInfoWrapperPass>(); 85 AU.setPreservesAll(); 86 ModulePass::getAnalysisUsage(AU); 87 } 88 bool runOnModule(Module &M) override; 89 90 StringRef getPassName() const override { 91 return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME; 92 } 93 }; 94 95 } // end anonymous namespace 96 97 char AArch64LowerHomogeneousPrologEpilog::ID = 0; 98 99 INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, 100 "aarch64-lower-homogeneous-prolog-epilog", 101 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) 102 103 bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { 104 if (skipModule(M)) 105 return false; 106 107 MachineModuleInfo *MMI = 108 &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); 109 return AArch64LowerHomogeneousPE(&M, MMI).run(); 110 } 111 112 bool AArch64LowerHomogeneousPE::run() { 113 bool Changed = false; 114 for (auto &F : *M) { 115 if (F.empty()) 116 continue; 117 118 MachineFunction *MF = MMI->getMachineFunction(F); 119 if (!MF) 120 continue; 121 Changed |= runOnMachineFunction(*MF); 122 } 123 124 return Changed; 125 } 126 enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail }; 127 128 /// Return a frame helper name with the given CSRs and the helper type. 129 /// For instance, a prolog helper that saves x19 and x20 is named as 130 /// OUTLINED_FUNCTION_PROLOG_x19x20. 131 static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs, 132 FrameHelperType Type, unsigned FpOffset) { 133 std::ostringstream RegStream; 134 switch (Type) { 135 case FrameHelperType::Prolog: 136 RegStream << "OUTLINED_FUNCTION_PROLOG_"; 137 break; 138 case FrameHelperType::PrologFrame: 139 RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_"; 140 break; 141 case FrameHelperType::Epilog: 142 RegStream << "OUTLINED_FUNCTION_EPILOG_"; 143 break; 144 case FrameHelperType::EpilogTail: 145 RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_"; 146 break; 147 } 148 149 for (auto Reg : Regs) { 150 if (Reg == AArch64::NoRegister) 151 continue; 152 RegStream << AArch64InstPrinter::getRegisterName(Reg); 153 } 154 155 return RegStream.str(); 156 } 157 158 /// Create a Function for the unique frame helper with the given name. 159 /// Return a newly created MachineFunction with an empty MachineBasicBlock. 160 static MachineFunction &createFrameHelperMachineFunction(Module *M, 161 MachineModuleInfo *MMI, 162 StringRef Name) { 163 LLVMContext &C = M->getContext(); 164 Function *F = M->getFunction(Name); 165 assert(F == nullptr && "Function has been created before"); 166 F = Function::Create(FunctionType::get(Type::getVoidTy(C), false), 167 Function::ExternalLinkage, Name, M); 168 assert(F && "Function was null!"); 169 170 // Use ODR linkage to avoid duplication. 171 F->setLinkage(GlobalValue::LinkOnceODRLinkage); 172 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 173 174 // Set no-opt/minsize, so we don't insert padding between outlined 175 // functions. 176 F->addFnAttr(Attribute::OptimizeNone); 177 F->addFnAttr(Attribute::NoInline); 178 F->addFnAttr(Attribute::MinSize); 179 F->addFnAttr(Attribute::Naked); 180 181 MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); 182 // Remove unnecessary register liveness and set NoVRegs. 183 MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); 184 MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); 185 MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); 186 MF.getRegInfo().freezeReservedRegs(MF); 187 188 // Create entry block. 189 BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); 190 IRBuilder<> Builder(EntryBB); 191 Builder.CreateRetVoid(); 192 193 // Insert the new block into the function. 194 MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); 195 MF.insert(MF.begin(), MBB); 196 197 return MF; 198 } 199 200 /// Emit a store-pair instruction for frame-setup. 201 /// If Reg2 is AArch64::NoRegister, emit STR instead. 202 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, 203 MachineBasicBlock::iterator Pos, 204 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, 205 int Offset, bool IsPreDec) { 206 assert(Reg1 != AArch64::NoRegister); 207 const bool IsPaired = Reg2 != AArch64::NoRegister; 208 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); 209 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); 210 unsigned Opc; 211 if (IsPreDec) { 212 if (IsFloat) 213 Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre; 214 else 215 Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre; 216 } else { 217 if (IsFloat) 218 Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui; 219 else 220 Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui; 221 } 222 // The implicit scale for Offset is 8. 223 TypeSize Scale(0U, false), Width(0U, false); 224 int64_t MinOffset, MaxOffset; 225 [[maybe_unused]] bool Success = 226 AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); 227 assert(Success && "Invalid Opcode"); 228 Offset *= (8 / (int)Scale); 229 230 MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); 231 if (IsPreDec) 232 MIB.addDef(AArch64::SP); 233 if (IsPaired) 234 MIB.addReg(Reg2); 235 MIB.addReg(Reg1) 236 .addReg(AArch64::SP) 237 .addImm(Offset) 238 .setMIFlag(MachineInstr::FrameSetup); 239 } 240 241 /// Emit a load-pair instruction for frame-destroy. 242 /// If Reg2 is AArch64::NoRegister, emit LDR instead. 243 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, 244 MachineBasicBlock::iterator Pos, 245 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, 246 int Offset, bool IsPostDec) { 247 assert(Reg1 != AArch64::NoRegister); 248 const bool IsPaired = Reg2 != AArch64::NoRegister; 249 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); 250 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); 251 unsigned Opc; 252 if (IsPostDec) { 253 if (IsFloat) 254 Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost; 255 else 256 Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost; 257 } else { 258 if (IsFloat) 259 Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui; 260 else 261 Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui; 262 } 263 // The implicit scale for Offset is 8. 264 TypeSize Scale(0U, false), Width(0U, false); 265 int64_t MinOffset, MaxOffset; 266 [[maybe_unused]] bool Success = 267 AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); 268 assert(Success && "Invalid Opcode"); 269 Offset *= (8 / (int)Scale); 270 271 MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); 272 if (IsPostDec) 273 MIB.addDef(AArch64::SP); 274 if (IsPaired) 275 MIB.addReg(Reg2, getDefRegState(true)); 276 MIB.addReg(Reg1, getDefRegState(true)) 277 .addReg(AArch64::SP) 278 .addImm(Offset) 279 .setMIFlag(MachineInstr::FrameDestroy); 280 } 281 282 /// Return a unique function if a helper can be formed with the given Regs 283 /// and frame type. 284 /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22: 285 /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller 286 /// stp x20, x19, [sp, #16] 287 /// ret 288 /// 289 /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22: 290 /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller 291 /// stp x20, x19, [sp, #16] 292 /// add fp, sp, #32 293 /// ret 294 /// 295 /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22: 296 /// mov x16, x30 297 /// ldp x29, x30, [sp, #32] 298 /// ldp x20, x19, [sp, #16] 299 /// ldp x22, x21, [sp], #48 300 /// ret x16 301 /// 302 /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22: 303 /// ldp x29, x30, [sp, #32] 304 /// ldp x20, x19, [sp, #16] 305 /// ldp x22, x21, [sp], #48 306 /// ret 307 /// @param M module 308 /// @param MMI machine module info 309 /// @param Regs callee save regs that the helper will handle 310 /// @param Type frame helper type 311 /// @return a helper function 312 static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, 313 SmallVectorImpl<unsigned> &Regs, 314 FrameHelperType Type, 315 unsigned FpOffset = 0) { 316 assert(Regs.size() >= 2); 317 auto Name = getFrameHelperName(Regs, Type, FpOffset); 318 auto *F = M->getFunction(Name); 319 if (F) 320 return F; 321 322 auto &MF = createFrameHelperMachineFunction(M, MMI, Name); 323 MachineBasicBlock &MBB = *MF.begin(); 324 const TargetSubtargetInfo &STI = MF.getSubtarget(); 325 const TargetInstrInfo &TII = *STI.getInstrInfo(); 326 327 int Size = (int)Regs.size(); 328 switch (Type) { 329 case FrameHelperType::Prolog: 330 case FrameHelperType::PrologFrame: { 331 // Compute the remaining SP adjust beyond FP/LR. 332 auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR)); 333 334 // If the register stored to the lowest address is not LR, we must subtract 335 // more from SP here. 336 if (LRIdx != Size - 2) { 337 assert(Regs[Size - 2] != AArch64::LR); 338 emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], 339 LRIdx - Size + 2, true); 340 } 341 342 // Store CSRs in the reverse order. 343 for (int I = Size - 3; I >= 0; I -= 2) { 344 // FP/LR has been stored at call-site. 345 if (Regs[I - 1] == AArch64::LR) 346 continue; 347 emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1, 348 false); 349 } 350 if (Type == FrameHelperType::PrologFrame) 351 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri)) 352 .addDef(AArch64::FP) 353 .addUse(AArch64::SP) 354 .addImm(FpOffset) 355 .addImm(0) 356 .setMIFlag(MachineInstr::FrameSetup); 357 358 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) 359 .addReg(AArch64::LR); 360 break; 361 } 362 case FrameHelperType::Epilog: 363 case FrameHelperType::EpilogTail: 364 if (Type == FrameHelperType::Epilog) 365 // Stash LR to X16 366 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs)) 367 .addDef(AArch64::X16) 368 .addReg(AArch64::XZR) 369 .addUse(AArch64::LR) 370 .addImm(0); 371 372 for (int I = 0; I < Size - 2; I += 2) 373 emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2, 374 false); 375 // Restore the last CSR with post-increment of SP. 376 emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size, 377 true); 378 379 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) 380 .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR); 381 break; 382 } 383 384 return M->getFunction(Name); 385 } 386 387 /// This function checks if a frame helper should be used for 388 /// HOM_Prolog/HOM_Epilog pseudo instruction expansion. 389 /// @param MBB machine basic block 390 /// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog 391 /// @param Regs callee save registers that are saved or restored. 392 /// @param Type frame helper type 393 /// @return True if a use of helper is qualified. 394 static bool shouldUseFrameHelper(MachineBasicBlock &MBB, 395 MachineBasicBlock::iterator &NextMBBI, 396 SmallVectorImpl<unsigned> &Regs, 397 FrameHelperType Type) { 398 const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 399 auto RegCount = Regs.size(); 400 assert(RegCount > 0 && (RegCount % 2 == 0)); 401 // # of instructions that will be outlined. 402 int InstCount = RegCount / 2; 403 404 // Do not use a helper call when not saving LR. 405 if (!llvm::is_contained(Regs, AArch64::LR)) 406 return false; 407 408 switch (Type) { 409 case FrameHelperType::Prolog: 410 // Prolog helper cannot save FP/LR. 411 InstCount--; 412 break; 413 case FrameHelperType::PrologFrame: { 414 // Effecitvely no change in InstCount since FpAdjusment is included. 415 break; 416 } 417 case FrameHelperType::Epilog: 418 // Bail-out if X16 is live across the epilog helper because it is used in 419 // the helper to handle X30. 420 for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) { 421 if (NextMI->readsRegister(AArch64::W16, TRI)) 422 return false; 423 } 424 // Epilog may not be in the last block. Check the liveness in successors. 425 for (const MachineBasicBlock *SuccMBB : MBB.successors()) { 426 if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16)) 427 return false; 428 } 429 // No change in InstCount for the regular epilog case. 430 break; 431 case FrameHelperType::EpilogTail: { 432 // EpilogTail helper includes the caller's return. 433 if (NextMBBI == MBB.end()) 434 return false; 435 if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR) 436 return false; 437 InstCount++; 438 break; 439 } 440 } 441 442 return InstCount >= FrameHelperSizeThreshold; 443 } 444 445 /// Lower a HOM_Epilog pseudo instruction into a helper call while 446 /// creating the helper on demand. Or emit a sequence of loads in place when not 447 /// using a helper call. 448 /// 449 /// 1. With a helper including ret 450 /// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI 451 /// ret ; NextMBBI 452 /// => 453 /// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22 454 /// ... ; NextMBBI 455 /// 456 /// 2. With a helper 457 /// HOM_Epilog x30, x29, x19, x20, x21, x22 458 /// => 459 /// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22 460 /// 461 /// 3. Without a helper 462 /// HOM_Epilog x30, x29, x19, x20, x21, x22 463 /// => 464 /// ldp x29, x30, [sp, #32] 465 /// ldp x20, x19, [sp, #16] 466 /// ldp x22, x21, [sp], #48 467 bool AArch64LowerHomogeneousPE::lowerEpilog( 468 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 469 MachineBasicBlock::iterator &NextMBBI) { 470 auto &MF = *MBB.getParent(); 471 MachineInstr &MI = *MBBI; 472 473 DebugLoc DL = MI.getDebugLoc(); 474 SmallVector<unsigned, 8> Regs; 475 bool HasUnpairedReg = false; 476 for (auto &MO : MI.operands()) 477 if (MO.isReg()) { 478 if (!MO.getReg().isValid()) { 479 // For now we are only expecting unpaired GP registers which should 480 // occur exactly once. 481 assert(!HasUnpairedReg); 482 HasUnpairedReg = true; 483 } 484 Regs.push_back(MO.getReg()); 485 } 486 (void)HasUnpairedReg; 487 int Size = (int)Regs.size(); 488 if (Size == 0) 489 return false; 490 // Registers are in pair. 491 assert(Size % 2 == 0); 492 assert(MI.getOpcode() == AArch64::HOM_Epilog); 493 494 auto Return = NextMBBI; 495 if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) { 496 // When MBB ends with a return, emit a tail-call to the epilog helper 497 auto *EpilogTailHelper = 498 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail); 499 BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi)) 500 .addGlobalAddress(EpilogTailHelper) 501 .addImm(0) 502 .setMIFlag(MachineInstr::FrameDestroy) 503 .copyImplicitOps(MI) 504 .copyImplicitOps(*Return); 505 NextMBBI = std::next(Return); 506 Return->removeFromParent(); 507 } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs, 508 FrameHelperType::Epilog)) { 509 // The default epilog helper case. 510 auto *EpilogHelper = 511 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog); 512 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 513 .addGlobalAddress(EpilogHelper) 514 .setMIFlag(MachineInstr::FrameDestroy) 515 .copyImplicitOps(MI); 516 } else { 517 // Fall back to no-helper. 518 for (int I = 0; I < Size - 2; I += 2) 519 emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false); 520 // Restore the last CSR with post-increment of SP. 521 emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true); 522 } 523 524 MBBI->removeFromParent(); 525 return true; 526 } 527 528 /// Lower a HOM_Prolog pseudo instruction into a helper call while 529 /// creating the helper on demand. Or emit a sequence of stores in place when 530 /// not using a helper call. 531 /// 532 /// 1. With a helper including frame-setup 533 /// HOM_Prolog x30, x29, x19, x20, x21, x22, 32 534 /// => 535 /// stp x29, x30, [sp, #-16]! 536 /// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22 537 /// 538 /// 2. With a helper 539 /// HOM_Prolog x30, x29, x19, x20, x21, x22 540 /// => 541 /// stp x29, x30, [sp, #-16]! 542 /// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22 543 /// 544 /// 3. Without a helper 545 /// HOM_Prolog x30, x29, x19, x20, x21, x22 546 /// => 547 /// stp x22, x21, [sp, #-48]! 548 /// stp x20, x19, [sp, #16] 549 /// stp x29, x30, [sp, #32] 550 bool AArch64LowerHomogeneousPE::lowerProlog( 551 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 552 MachineBasicBlock::iterator &NextMBBI) { 553 auto &MF = *MBB.getParent(); 554 MachineInstr &MI = *MBBI; 555 556 DebugLoc DL = MI.getDebugLoc(); 557 SmallVector<unsigned, 8> Regs; 558 bool HasUnpairedReg = false; 559 int LRIdx = 0; 560 std::optional<int> FpOffset; 561 for (auto &MO : MI.operands()) { 562 if (MO.isReg()) { 563 if (MO.getReg().isValid()) { 564 if (MO.getReg() == AArch64::LR) 565 LRIdx = Regs.size(); 566 } else { 567 // For now we are only expecting unpaired GP registers which should 568 // occur exactly once. 569 assert(!HasUnpairedReg); 570 HasUnpairedReg = true; 571 } 572 Regs.push_back(MO.getReg()); 573 } else if (MO.isImm()) { 574 FpOffset = MO.getImm(); 575 } 576 } 577 (void)HasUnpairedReg; 578 int Size = (int)Regs.size(); 579 if (Size == 0) 580 return false; 581 // Allow compact unwind case only for oww. 582 assert(Size % 2 == 0); 583 assert(MI.getOpcode() == AArch64::HOM_Prolog); 584 585 if (FpOffset && 586 shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) { 587 // FP/LR is stored at the top of stack before the prolog helper call. 588 emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); 589 auto *PrologFrameHelper = getOrCreateFrameHelper( 590 M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset); 591 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 592 .addGlobalAddress(PrologFrameHelper) 593 .setMIFlag(MachineInstr::FrameSetup) 594 .copyImplicitOps(MI) 595 .addReg(AArch64::FP, RegState::Implicit | RegState::Define) 596 .addReg(AArch64::SP, RegState::Implicit); 597 } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs, 598 FrameHelperType::Prolog)) { 599 // FP/LR is stored at the top of stack before the prolog helper call. 600 emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); 601 auto *PrologHelper = 602 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog); 603 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 604 .addGlobalAddress(PrologHelper) 605 .setMIFlag(MachineInstr::FrameSetup) 606 .copyImplicitOps(MI); 607 } else { 608 // Fall back to no-helper. 609 emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true); 610 for (int I = Size - 3; I >= 0; I -= 2) 611 emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false); 612 if (FpOffset) { 613 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri)) 614 .addDef(AArch64::FP) 615 .addUse(AArch64::SP) 616 .addImm(*FpOffset) 617 .addImm(0) 618 .setMIFlag(MachineInstr::FrameSetup); 619 } 620 } 621 622 MBBI->removeFromParent(); 623 return true; 624 } 625 626 /// Process each machine instruction 627 /// @param MBB machine basic block 628 /// @param MBBI current instruction iterator 629 /// @param NextMBBI next instruction iterator which can be updated 630 /// @return True when IR is changed. 631 bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, 632 MachineBasicBlock::iterator MBBI, 633 MachineBasicBlock::iterator &NextMBBI) { 634 MachineInstr &MI = *MBBI; 635 unsigned Opcode = MI.getOpcode(); 636 switch (Opcode) { 637 default: 638 break; 639 case AArch64::HOM_Prolog: 640 return lowerProlog(MBB, MBBI, NextMBBI); 641 case AArch64::HOM_Epilog: 642 return lowerEpilog(MBB, MBBI, NextMBBI); 643 } 644 return false; 645 } 646 647 bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { 648 bool Modified = false; 649 650 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 651 while (MBBI != E) { 652 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 653 Modified |= runOnMI(MBB, MBBI, NMBBI); 654 MBBI = NMBBI; 655 } 656 657 return Modified; 658 } 659 660 bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { 661 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 662 663 bool Modified = false; 664 for (auto &MBB : MF) 665 Modified |= runOnMBB(MBB); 666 return Modified; 667 } 668 669 ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { 670 return new AArch64LowerHomogeneousPrologEpilog(); 671 } 672