1 //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that lowers homogeneous prolog/epilog instructions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64Subtarget.h" 15 #include "MCTargetDesc/AArch64InstPrinter.h" 16 #include "Utils/AArch64BaseInfo.h" 17 #include "llvm/CodeGen/MachineBasicBlock.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 #include "llvm/CodeGen/MachineInstr.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineModuleInfo.h" 23 #include "llvm/CodeGen/MachineOperand.h" 24 #include "llvm/CodeGen/TargetSubtargetInfo.h" 25 #include "llvm/IR/DebugLoc.h" 26 #include "llvm/IR/IRBuilder.h" 27 #include "llvm/IR/Module.h" 28 #include "llvm/Pass.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <optional> 31 #include <sstream> 32 33 using namespace llvm; 34 35 #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \ 36 "AArch64 homogeneous prolog/epilog lowering pass" 37 38 cl::opt<int> FrameHelperSizeThreshold( 39 "frame-helper-size-threshold", cl::init(2), cl::Hidden, 40 cl::desc("The minimum number of instructions that are outlined in a frame " 41 "helper (default = 2)")); 42 43 namespace { 44 45 class AArch64LowerHomogeneousPE { 46 public: 47 const AArch64InstrInfo *TII; 48 49 AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) 50 : M(M), MMI(MMI) {} 51 52 bool run(); 53 bool runOnMachineFunction(MachineFunction &Fn); 54 55 private: 56 Module *M; 57 MachineModuleInfo *MMI; 58 59 bool runOnMBB(MachineBasicBlock &MBB); 60 bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 61 MachineBasicBlock::iterator &NextMBBI); 62 63 /// Lower a HOM_Prolog pseudo instruction into a helper call 64 /// or a sequence of homogeneous stores. 65 /// When a fp setup follows, it can be optimized. 66 bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 67 MachineBasicBlock::iterator &NextMBBI); 68 /// Lower a HOM_Epilog pseudo instruction into a helper call 69 /// or a sequence of homogeneous loads. 70 /// When a return follow, it can be optimized. 71 bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 72 MachineBasicBlock::iterator &NextMBBI); 73 }; 74 75 class AArch64LowerHomogeneousPrologEpilog : public ModulePass { 76 public: 77 static char ID; 78 79 AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) { 80 initializeAArch64LowerHomogeneousPrologEpilogPass( 81 *PassRegistry::getPassRegistry()); 82 } 83 void getAnalysisUsage(AnalysisUsage &AU) const override { 84 AU.addRequired<MachineModuleInfoWrapperPass>(); 85 AU.addPreserved<MachineModuleInfoWrapperPass>(); 86 AU.setPreservesAll(); 87 ModulePass::getAnalysisUsage(AU); 88 } 89 bool runOnModule(Module &M) override; 90 91 StringRef getPassName() const override { 92 return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME; 93 } 94 }; 95 96 } // end anonymous namespace 97 98 char AArch64LowerHomogeneousPrologEpilog::ID = 0; 99 100 INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, 101 "aarch64-lower-homogeneous-prolog-epilog", 102 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) 103 104 bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { 105 if (skipModule(M)) 106 return false; 107 108 MachineModuleInfo *MMI = 109 &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); 110 return AArch64LowerHomogeneousPE(&M, MMI).run(); 111 } 112 113 bool AArch64LowerHomogeneousPE::run() { 114 bool Changed = false; 115 for (auto &F : *M) { 116 if (F.empty()) 117 continue; 118 119 MachineFunction *MF = MMI->getMachineFunction(F); 120 if (!MF) 121 continue; 122 Changed |= runOnMachineFunction(*MF); 123 } 124 125 return Changed; 126 } 127 enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail }; 128 129 /// Return a frame helper name with the given CSRs and the helper type. 130 /// For instance, a prolog helper that saves x19 and x20 is named as 131 /// OUTLINED_FUNCTION_PROLOG_x19x20. 132 static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs, 133 FrameHelperType Type, unsigned FpOffset) { 134 std::ostringstream RegStream; 135 switch (Type) { 136 case FrameHelperType::Prolog: 137 RegStream << "OUTLINED_FUNCTION_PROLOG_"; 138 break; 139 case FrameHelperType::PrologFrame: 140 RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_"; 141 break; 142 case FrameHelperType::Epilog: 143 RegStream << "OUTLINED_FUNCTION_EPILOG_"; 144 break; 145 case FrameHelperType::EpilogTail: 146 RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_"; 147 break; 148 } 149 150 for (auto Reg : Regs) { 151 if (Reg == AArch64::NoRegister) 152 continue; 153 RegStream << AArch64InstPrinter::getRegisterName(Reg); 154 } 155 156 return RegStream.str(); 157 } 158 159 /// Create a Function for the unique frame helper with the given name. 160 /// Return a newly created MachineFunction with an empty MachineBasicBlock. 161 static MachineFunction &createFrameHelperMachineFunction(Module *M, 162 MachineModuleInfo *MMI, 163 StringRef Name) { 164 LLVMContext &C = M->getContext(); 165 Function *F = M->getFunction(Name); 166 assert(F == nullptr && "Function has been created before"); 167 F = Function::Create(FunctionType::get(Type::getVoidTy(C), false), 168 Function::ExternalLinkage, Name, M); 169 assert(F && "Function was null!"); 170 171 // Use ODR linkage to avoid duplication. 172 F->setLinkage(GlobalValue::LinkOnceODRLinkage); 173 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 174 175 // Set no-opt/minsize, so we don't insert padding between outlined 176 // functions. 177 F->addFnAttr(Attribute::OptimizeNone); 178 F->addFnAttr(Attribute::NoInline); 179 F->addFnAttr(Attribute::MinSize); 180 F->addFnAttr(Attribute::Naked); 181 182 MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); 183 // Remove unnecessary register liveness and set NoVRegs. 184 MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); 185 MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); 186 MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); 187 MF.getRegInfo().freezeReservedRegs(); 188 189 // Create entry block. 190 BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); 191 IRBuilder<> Builder(EntryBB); 192 Builder.CreateRetVoid(); 193 194 // Insert the new block into the function. 195 MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); 196 MF.insert(MF.begin(), MBB); 197 198 return MF; 199 } 200 201 /// Emit a store-pair instruction for frame-setup. 202 /// If Reg2 is AArch64::NoRegister, emit STR instead. 203 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, 204 MachineBasicBlock::iterator Pos, 205 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, 206 int Offset, bool IsPreDec) { 207 assert(Reg1 != AArch64::NoRegister); 208 const bool IsPaired = Reg2 != AArch64::NoRegister; 209 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); 210 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); 211 unsigned Opc; 212 if (IsPreDec) { 213 if (IsFloat) 214 Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre; 215 else 216 Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre; 217 } else { 218 if (IsFloat) 219 Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui; 220 else 221 Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui; 222 } 223 // The implicit scale for Offset is 8. 224 TypeSize Scale(0U, false), Width(0U, false); 225 int64_t MinOffset, MaxOffset; 226 [[maybe_unused]] bool Success = 227 AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); 228 assert(Success && "Invalid Opcode"); 229 Offset *= (8 / (int)Scale); 230 231 MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); 232 if (IsPreDec) 233 MIB.addDef(AArch64::SP); 234 if (IsPaired) 235 MIB.addReg(Reg2); 236 MIB.addReg(Reg1) 237 .addReg(AArch64::SP) 238 .addImm(Offset) 239 .setMIFlag(MachineInstr::FrameSetup); 240 } 241 242 /// Emit a load-pair instruction for frame-destroy. 243 /// If Reg2 is AArch64::NoRegister, emit LDR instead. 244 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, 245 MachineBasicBlock::iterator Pos, 246 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, 247 int Offset, bool IsPostDec) { 248 assert(Reg1 != AArch64::NoRegister); 249 const bool IsPaired = Reg2 != AArch64::NoRegister; 250 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); 251 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); 252 unsigned Opc; 253 if (IsPostDec) { 254 if (IsFloat) 255 Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost; 256 else 257 Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost; 258 } else { 259 if (IsFloat) 260 Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui; 261 else 262 Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui; 263 } 264 // The implicit scale for Offset is 8. 265 TypeSize Scale(0U, false), Width(0U, false); 266 int64_t MinOffset, MaxOffset; 267 [[maybe_unused]] bool Success = 268 AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); 269 assert(Success && "Invalid Opcode"); 270 Offset *= (8 / (int)Scale); 271 272 MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); 273 if (IsPostDec) 274 MIB.addDef(AArch64::SP); 275 if (IsPaired) 276 MIB.addReg(Reg2, getDefRegState(true)); 277 MIB.addReg(Reg1, getDefRegState(true)) 278 .addReg(AArch64::SP) 279 .addImm(Offset) 280 .setMIFlag(MachineInstr::FrameDestroy); 281 } 282 283 /// Return a unique function if a helper can be formed with the given Regs 284 /// and frame type. 285 /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22: 286 /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller 287 /// stp x20, x19, [sp, #16] 288 /// ret 289 /// 290 /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22: 291 /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller 292 /// stp x20, x19, [sp, #16] 293 /// add fp, sp, #32 294 /// ret 295 /// 296 /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22: 297 /// mov x16, x30 298 /// ldp x29, x30, [sp, #32] 299 /// ldp x20, x19, [sp, #16] 300 /// ldp x22, x21, [sp], #48 301 /// ret x16 302 /// 303 /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22: 304 /// ldp x29, x30, [sp, #32] 305 /// ldp x20, x19, [sp, #16] 306 /// ldp x22, x21, [sp], #48 307 /// ret 308 /// @param M module 309 /// @param MMI machine module info 310 /// @param Regs callee save regs that the helper will handle 311 /// @param Type frame helper type 312 /// @return a helper function 313 static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, 314 SmallVectorImpl<unsigned> &Regs, 315 FrameHelperType Type, 316 unsigned FpOffset = 0) { 317 assert(Regs.size() >= 2); 318 auto Name = getFrameHelperName(Regs, Type, FpOffset); 319 auto *F = M->getFunction(Name); 320 if (F) 321 return F; 322 323 auto &MF = createFrameHelperMachineFunction(M, MMI, Name); 324 MachineBasicBlock &MBB = *MF.begin(); 325 const TargetSubtargetInfo &STI = MF.getSubtarget(); 326 const TargetInstrInfo &TII = *STI.getInstrInfo(); 327 328 int Size = (int)Regs.size(); 329 switch (Type) { 330 case FrameHelperType::Prolog: 331 case FrameHelperType::PrologFrame: { 332 // Compute the remaining SP adjust beyond FP/LR. 333 auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR)); 334 335 // If the register stored to the lowest address is not LR, we must subtract 336 // more from SP here. 337 if (LRIdx != Size - 2) { 338 assert(Regs[Size - 2] != AArch64::LR); 339 emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], 340 LRIdx - Size + 2, true); 341 } 342 343 // Store CSRs in the reverse order. 344 for (int I = Size - 3; I >= 0; I -= 2) { 345 // FP/LR has been stored at call-site. 346 if (Regs[I - 1] == AArch64::LR) 347 continue; 348 emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1, 349 false); 350 } 351 if (Type == FrameHelperType::PrologFrame) 352 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri)) 353 .addDef(AArch64::FP) 354 .addUse(AArch64::SP) 355 .addImm(FpOffset) 356 .addImm(0) 357 .setMIFlag(MachineInstr::FrameSetup); 358 359 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) 360 .addReg(AArch64::LR); 361 break; 362 } 363 case FrameHelperType::Epilog: 364 case FrameHelperType::EpilogTail: 365 if (Type == FrameHelperType::Epilog) 366 // Stash LR to X16 367 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs)) 368 .addDef(AArch64::X16) 369 .addReg(AArch64::XZR) 370 .addUse(AArch64::LR) 371 .addImm(0); 372 373 for (int I = 0; I < Size - 2; I += 2) 374 emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2, 375 false); 376 // Restore the last CSR with post-increment of SP. 377 emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size, 378 true); 379 380 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) 381 .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR); 382 break; 383 } 384 385 return M->getFunction(Name); 386 } 387 388 /// This function checks if a frame helper should be used for 389 /// HOM_Prolog/HOM_Epilog pseudo instruction expansion. 390 /// @param MBB machine basic block 391 /// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog 392 /// @param Regs callee save registers that are saved or restored. 393 /// @param Type frame helper type 394 /// @return True if a use of helper is qualified. 395 static bool shouldUseFrameHelper(MachineBasicBlock &MBB, 396 MachineBasicBlock::iterator &NextMBBI, 397 SmallVectorImpl<unsigned> &Regs, 398 FrameHelperType Type) { 399 const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 400 auto RegCount = Regs.size(); 401 assert(RegCount > 0 && (RegCount % 2 == 0)); 402 // # of instructions that will be outlined. 403 int InstCount = RegCount / 2; 404 405 // Do not use a helper call when not saving LR. 406 if (!llvm::is_contained(Regs, AArch64::LR)) 407 return false; 408 409 switch (Type) { 410 case FrameHelperType::Prolog: 411 // Prolog helper cannot save FP/LR. 412 InstCount--; 413 break; 414 case FrameHelperType::PrologFrame: { 415 // Effecitvely no change in InstCount since FpAdjusment is included. 416 break; 417 } 418 case FrameHelperType::Epilog: 419 // Bail-out if X16 is live across the epilog helper because it is used in 420 // the helper to handle X30. 421 for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) { 422 if (NextMI->readsRegister(AArch64::W16, TRI)) 423 return false; 424 } 425 // Epilog may not be in the last block. Check the liveness in successors. 426 for (const MachineBasicBlock *SuccMBB : MBB.successors()) { 427 if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16)) 428 return false; 429 } 430 // No change in InstCount for the regular epilog case. 431 break; 432 case FrameHelperType::EpilogTail: { 433 // EpilogTail helper includes the caller's return. 434 if (NextMBBI == MBB.end()) 435 return false; 436 if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR) 437 return false; 438 InstCount++; 439 break; 440 } 441 } 442 443 return InstCount >= FrameHelperSizeThreshold; 444 } 445 446 /// Lower a HOM_Epilog pseudo instruction into a helper call while 447 /// creating the helper on demand. Or emit a sequence of loads in place when not 448 /// using a helper call. 449 /// 450 /// 1. With a helper including ret 451 /// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI 452 /// ret ; NextMBBI 453 /// => 454 /// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22 455 /// ... ; NextMBBI 456 /// 457 /// 2. With a helper 458 /// HOM_Epilog x30, x29, x19, x20, x21, x22 459 /// => 460 /// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22 461 /// 462 /// 3. Without a helper 463 /// HOM_Epilog x30, x29, x19, x20, x21, x22 464 /// => 465 /// ldp x29, x30, [sp, #32] 466 /// ldp x20, x19, [sp, #16] 467 /// ldp x22, x21, [sp], #48 468 bool AArch64LowerHomogeneousPE::lowerEpilog( 469 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 470 MachineBasicBlock::iterator &NextMBBI) { 471 auto &MF = *MBB.getParent(); 472 MachineInstr &MI = *MBBI; 473 474 DebugLoc DL = MI.getDebugLoc(); 475 SmallVector<unsigned, 8> Regs; 476 bool HasUnpairedReg = false; 477 for (auto &MO : MI.operands()) 478 if (MO.isReg()) { 479 if (!MO.getReg().isValid()) { 480 // For now we are only expecting unpaired GP registers which should 481 // occur exactly once. 482 assert(!HasUnpairedReg); 483 HasUnpairedReg = true; 484 } 485 Regs.push_back(MO.getReg()); 486 } 487 (void)HasUnpairedReg; 488 int Size = (int)Regs.size(); 489 if (Size == 0) 490 return false; 491 // Registers are in pair. 492 assert(Size % 2 == 0); 493 assert(MI.getOpcode() == AArch64::HOM_Epilog); 494 495 auto Return = NextMBBI; 496 if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) { 497 // When MBB ends with a return, emit a tail-call to the epilog helper 498 auto *EpilogTailHelper = 499 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail); 500 BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi)) 501 .addGlobalAddress(EpilogTailHelper) 502 .addImm(0) 503 .setMIFlag(MachineInstr::FrameDestroy) 504 .copyImplicitOps(MI) 505 .copyImplicitOps(*Return); 506 NextMBBI = std::next(Return); 507 Return->removeFromParent(); 508 } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs, 509 FrameHelperType::Epilog)) { 510 // The default epilog helper case. 511 auto *EpilogHelper = 512 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog); 513 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 514 .addGlobalAddress(EpilogHelper) 515 .setMIFlag(MachineInstr::FrameDestroy) 516 .copyImplicitOps(MI); 517 } else { 518 // Fall back to no-helper. 519 for (int I = 0; I < Size - 2; I += 2) 520 emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false); 521 // Restore the last CSR with post-increment of SP. 522 emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true); 523 } 524 525 MBBI->removeFromParent(); 526 return true; 527 } 528 529 /// Lower a HOM_Prolog pseudo instruction into a helper call while 530 /// creating the helper on demand. Or emit a sequence of stores in place when 531 /// not using a helper call. 532 /// 533 /// 1. With a helper including frame-setup 534 /// HOM_Prolog x30, x29, x19, x20, x21, x22, 32 535 /// => 536 /// stp x29, x30, [sp, #-16]! 537 /// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22 538 /// 539 /// 2. With a helper 540 /// HOM_Prolog x30, x29, x19, x20, x21, x22 541 /// => 542 /// stp x29, x30, [sp, #-16]! 543 /// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22 544 /// 545 /// 3. Without a helper 546 /// HOM_Prolog x30, x29, x19, x20, x21, x22 547 /// => 548 /// stp x22, x21, [sp, #-48]! 549 /// stp x20, x19, [sp, #16] 550 /// stp x29, x30, [sp, #32] 551 bool AArch64LowerHomogeneousPE::lowerProlog( 552 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 553 MachineBasicBlock::iterator &NextMBBI) { 554 auto &MF = *MBB.getParent(); 555 MachineInstr &MI = *MBBI; 556 557 DebugLoc DL = MI.getDebugLoc(); 558 SmallVector<unsigned, 8> Regs; 559 bool HasUnpairedReg = false; 560 int LRIdx = 0; 561 std::optional<int> FpOffset; 562 for (auto &MO : MI.operands()) { 563 if (MO.isReg()) { 564 if (MO.getReg().isValid()) { 565 if (MO.getReg() == AArch64::LR) 566 LRIdx = Regs.size(); 567 } else { 568 // For now we are only expecting unpaired GP registers which should 569 // occur exactly once. 570 assert(!HasUnpairedReg); 571 HasUnpairedReg = true; 572 } 573 Regs.push_back(MO.getReg()); 574 } else if (MO.isImm()) { 575 FpOffset = MO.getImm(); 576 } 577 } 578 (void)HasUnpairedReg; 579 int Size = (int)Regs.size(); 580 if (Size == 0) 581 return false; 582 // Allow compact unwind case only for oww. 583 assert(Size % 2 == 0); 584 assert(MI.getOpcode() == AArch64::HOM_Prolog); 585 586 if (FpOffset && 587 shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) { 588 // FP/LR is stored at the top of stack before the prolog helper call. 589 emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); 590 auto *PrologFrameHelper = getOrCreateFrameHelper( 591 M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset); 592 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 593 .addGlobalAddress(PrologFrameHelper) 594 .setMIFlag(MachineInstr::FrameSetup) 595 .copyImplicitOps(MI) 596 .addReg(AArch64::FP, RegState::Implicit | RegState::Define) 597 .addReg(AArch64::SP, RegState::Implicit); 598 } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs, 599 FrameHelperType::Prolog)) { 600 // FP/LR is stored at the top of stack before the prolog helper call. 601 emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); 602 auto *PrologHelper = 603 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog); 604 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 605 .addGlobalAddress(PrologHelper) 606 .setMIFlag(MachineInstr::FrameSetup) 607 .copyImplicitOps(MI); 608 } else { 609 // Fall back to no-helper. 610 emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true); 611 for (int I = Size - 3; I >= 0; I -= 2) 612 emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false); 613 if (FpOffset) { 614 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri)) 615 .addDef(AArch64::FP) 616 .addUse(AArch64::SP) 617 .addImm(*FpOffset) 618 .addImm(0) 619 .setMIFlag(MachineInstr::FrameSetup); 620 } 621 } 622 623 MBBI->removeFromParent(); 624 return true; 625 } 626 627 /// Process each machine instruction 628 /// @param MBB machine basic block 629 /// @param MBBI current instruction iterator 630 /// @param NextMBBI next instruction iterator which can be updated 631 /// @return True when IR is changed. 632 bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, 633 MachineBasicBlock::iterator MBBI, 634 MachineBasicBlock::iterator &NextMBBI) { 635 MachineInstr &MI = *MBBI; 636 unsigned Opcode = MI.getOpcode(); 637 switch (Opcode) { 638 default: 639 break; 640 case AArch64::HOM_Prolog: 641 return lowerProlog(MBB, MBBI, NextMBBI); 642 case AArch64::HOM_Epilog: 643 return lowerEpilog(MBB, MBBI, NextMBBI); 644 } 645 return false; 646 } 647 648 bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { 649 bool Modified = false; 650 651 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 652 while (MBBI != E) { 653 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 654 Modified |= runOnMI(MBB, MBBI, NMBBI); 655 MBBI = NMBBI; 656 } 657 658 return Modified; 659 } 660 661 bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { 662 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 663 664 bool Modified = false; 665 for (auto &MBB : MF) 666 Modified |= runOnMBB(MBB); 667 return Modified; 668 } 669 670 ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { 671 return new AArch64LowerHomogeneousPrologEpilog(); 672 } 673