1 //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that lowers homogeneous prolog/epilog instructions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64Subtarget.h" 15 #include "MCTargetDesc/AArch64InstPrinter.h" 16 #include "llvm/CodeGen/MachineBasicBlock.h" 17 #include "llvm/CodeGen/MachineFunction.h" 18 #include "llvm/CodeGen/MachineInstr.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/MachineModuleInfo.h" 21 #include "llvm/CodeGen/MachineOperand.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/IR/DebugLoc.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Module.h" 26 #include "llvm/Pass.h" 27 #include <optional> 28 #include <sstream> 29 30 using namespace llvm; 31 32 #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \ 33 "AArch64 homogeneous prolog/epilog lowering pass" 34 35 static cl::opt<int> FrameHelperSizeThreshold( 36 "frame-helper-size-threshold", cl::init(2), cl::Hidden, 37 cl::desc("The minimum number of instructions that are outlined in a frame " 38 "helper (default = 2)")); 39 40 namespace { 41 42 class AArch64LowerHomogeneousPE { 43 public: 44 const AArch64InstrInfo *TII; 45 46 AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) 47 : M(M), MMI(MMI) {} 48 49 bool run(); 50 bool runOnMachineFunction(MachineFunction &Fn); 51 52 private: 53 Module *M; 54 MachineModuleInfo *MMI; 55 56 bool runOnMBB(MachineBasicBlock &MBB); 57 bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 58 MachineBasicBlock::iterator &NextMBBI); 59 60 /// Lower a HOM_Prolog pseudo instruction into a helper call 61 /// or a sequence of homogeneous stores. 62 /// When a fp setup follows, it can be optimized. 63 bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 64 MachineBasicBlock::iterator &NextMBBI); 65 /// Lower a HOM_Epilog pseudo instruction into a helper call 66 /// or a sequence of homogeneous loads. 67 /// When a return follow, it can be optimized. 68 bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 69 MachineBasicBlock::iterator &NextMBBI); 70 }; 71 72 class AArch64LowerHomogeneousPrologEpilog : public ModulePass { 73 public: 74 static char ID; 75 76 AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {} 77 void getAnalysisUsage(AnalysisUsage &AU) const override { 78 AU.addRequired<MachineModuleInfoWrapperPass>(); 79 AU.addPreserved<MachineModuleInfoWrapperPass>(); 80 AU.setPreservesAll(); 81 ModulePass::getAnalysisUsage(AU); 82 } 83 bool runOnModule(Module &M) override; 84 85 StringRef getPassName() const override { 86 return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME; 87 } 88 }; 89 90 } // end anonymous namespace 91 92 char AArch64LowerHomogeneousPrologEpilog::ID = 0; 93 94 INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, 95 "aarch64-lower-homogeneous-prolog-epilog", 96 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) 97 98 bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { 99 if (skipModule(M)) 100 return false; 101 102 MachineModuleInfo *MMI = 103 &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); 104 return AArch64LowerHomogeneousPE(&M, MMI).run(); 105 } 106 107 bool AArch64LowerHomogeneousPE::run() { 108 bool Changed = false; 109 for (auto &F : *M) { 110 if (F.empty()) 111 continue; 112 113 MachineFunction *MF = MMI->getMachineFunction(F); 114 if (!MF) 115 continue; 116 Changed |= runOnMachineFunction(*MF); 117 } 118 119 return Changed; 120 } 121 enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail }; 122 123 /// Return a frame helper name with the given CSRs and the helper type. 124 /// For instance, a prolog helper that saves x19 and x20 is named as 125 /// OUTLINED_FUNCTION_PROLOG_x19x20. 126 static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs, 127 FrameHelperType Type, unsigned FpOffset) { 128 std::ostringstream RegStream; 129 switch (Type) { 130 case FrameHelperType::Prolog: 131 RegStream << "OUTLINED_FUNCTION_PROLOG_"; 132 break; 133 case FrameHelperType::PrologFrame: 134 RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_"; 135 break; 136 case FrameHelperType::Epilog: 137 RegStream << "OUTLINED_FUNCTION_EPILOG_"; 138 break; 139 case FrameHelperType::EpilogTail: 140 RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_"; 141 break; 142 } 143 144 for (auto Reg : Regs) { 145 if (Reg == AArch64::NoRegister) 146 continue; 147 RegStream << AArch64InstPrinter::getRegisterName(Reg); 148 } 149 150 return RegStream.str(); 151 } 152 153 /// Create a Function for the unique frame helper with the given name. 154 /// Return a newly created MachineFunction with an empty MachineBasicBlock. 155 static MachineFunction &createFrameHelperMachineFunction(Module *M, 156 MachineModuleInfo *MMI, 157 StringRef Name) { 158 LLVMContext &C = M->getContext(); 159 Function *F = M->getFunction(Name); 160 assert(F == nullptr && "Function has been created before"); 161 F = Function::Create(FunctionType::get(Type::getVoidTy(C), false), 162 Function::ExternalLinkage, Name, M); 163 assert(F && "Function was null!"); 164 165 // Use ODR linkage to avoid duplication. 166 F->setLinkage(GlobalValue::LinkOnceODRLinkage); 167 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 168 169 // Set minsize, so we don't insert padding between outlined functions. 170 F->addFnAttr(Attribute::NoInline); 171 F->addFnAttr(Attribute::MinSize); 172 F->addFnAttr(Attribute::Naked); 173 174 MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); 175 // Remove unnecessary register liveness and set NoVRegs. 176 MF.getProperties().resetTracksLiveness().resetIsSSA().setNoVRegs(); 177 MF.getRegInfo().freezeReservedRegs(); 178 179 // Create entry block. 180 BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); 181 IRBuilder<> Builder(EntryBB); 182 Builder.CreateRetVoid(); 183 184 // Insert the new block into the function. 185 MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); 186 MF.insert(MF.begin(), MBB); 187 188 return MF; 189 } 190 191 /// Emit a store-pair instruction for frame-setup. 192 /// If Reg2 is AArch64::NoRegister, emit STR instead. 193 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, 194 MachineBasicBlock::iterator Pos, 195 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, 196 int Offset, bool IsPreDec) { 197 assert(Reg1 != AArch64::NoRegister); 198 const bool IsPaired = Reg2 != AArch64::NoRegister; 199 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); 200 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); 201 unsigned Opc; 202 if (IsPreDec) { 203 if (IsFloat) 204 Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre; 205 else 206 Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre; 207 } else { 208 if (IsFloat) 209 Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui; 210 else 211 Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui; 212 } 213 // The implicit scale for Offset is 8. 214 TypeSize Scale(0U, false), Width(0U, false); 215 int64_t MinOffset, MaxOffset; 216 [[maybe_unused]] bool Success = 217 AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); 218 assert(Success && "Invalid Opcode"); 219 Offset *= (8 / (int)Scale); 220 221 MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); 222 if (IsPreDec) 223 MIB.addDef(AArch64::SP); 224 if (IsPaired) 225 MIB.addReg(Reg2); 226 MIB.addReg(Reg1) 227 .addReg(AArch64::SP) 228 .addImm(Offset) 229 .setMIFlag(MachineInstr::FrameSetup); 230 } 231 232 /// Emit a load-pair instruction for frame-destroy. 233 /// If Reg2 is AArch64::NoRegister, emit LDR instead. 234 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, 235 MachineBasicBlock::iterator Pos, 236 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, 237 int Offset, bool IsPostDec) { 238 assert(Reg1 != AArch64::NoRegister); 239 const bool IsPaired = Reg2 != AArch64::NoRegister; 240 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); 241 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); 242 unsigned Opc; 243 if (IsPostDec) { 244 if (IsFloat) 245 Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost; 246 else 247 Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost; 248 } else { 249 if (IsFloat) 250 Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui; 251 else 252 Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui; 253 } 254 // The implicit scale for Offset is 8. 255 TypeSize Scale(0U, false), Width(0U, false); 256 int64_t MinOffset, MaxOffset; 257 [[maybe_unused]] bool Success = 258 AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); 259 assert(Success && "Invalid Opcode"); 260 Offset *= (8 / (int)Scale); 261 262 MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); 263 if (IsPostDec) 264 MIB.addDef(AArch64::SP); 265 if (IsPaired) 266 MIB.addReg(Reg2, getDefRegState(true)); 267 MIB.addReg(Reg1, getDefRegState(true)) 268 .addReg(AArch64::SP) 269 .addImm(Offset) 270 .setMIFlag(MachineInstr::FrameDestroy); 271 } 272 273 /// Return a unique function if a helper can be formed with the given Regs 274 /// and frame type. 275 /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22: 276 /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller 277 /// stp x20, x19, [sp, #16] 278 /// ret 279 /// 280 /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22: 281 /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller 282 /// stp x20, x19, [sp, #16] 283 /// add fp, sp, #32 284 /// ret 285 /// 286 /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22: 287 /// mov x16, x30 288 /// ldp x29, x30, [sp, #32] 289 /// ldp x20, x19, [sp, #16] 290 /// ldp x22, x21, [sp], #48 291 /// ret x16 292 /// 293 /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22: 294 /// ldp x29, x30, [sp, #32] 295 /// ldp x20, x19, [sp, #16] 296 /// ldp x22, x21, [sp], #48 297 /// ret 298 /// @param M module 299 /// @param MMI machine module info 300 /// @param Regs callee save regs that the helper will handle 301 /// @param Type frame helper type 302 /// @return a helper function 303 static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, 304 SmallVectorImpl<unsigned> &Regs, 305 FrameHelperType Type, 306 unsigned FpOffset = 0) { 307 assert(Regs.size() >= 2); 308 auto Name = getFrameHelperName(Regs, Type, FpOffset); 309 auto *F = M->getFunction(Name); 310 if (F) 311 return F; 312 313 auto &MF = createFrameHelperMachineFunction(M, MMI, Name); 314 MachineBasicBlock &MBB = *MF.begin(); 315 const TargetSubtargetInfo &STI = MF.getSubtarget(); 316 const TargetInstrInfo &TII = *STI.getInstrInfo(); 317 318 int Size = (int)Regs.size(); 319 switch (Type) { 320 case FrameHelperType::Prolog: 321 case FrameHelperType::PrologFrame: { 322 // Compute the remaining SP adjust beyond FP/LR. 323 auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR)); 324 325 // If the register stored to the lowest address is not LR, we must subtract 326 // more from SP here. 327 if (LRIdx != Size - 2) { 328 assert(Regs[Size - 2] != AArch64::LR); 329 emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], 330 LRIdx - Size + 2, true); 331 } 332 333 // Store CSRs in the reverse order. 334 for (int I = Size - 3; I >= 0; I -= 2) { 335 // FP/LR has been stored at call-site. 336 if (Regs[I - 1] == AArch64::LR) 337 continue; 338 emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1, 339 false); 340 } 341 if (Type == FrameHelperType::PrologFrame) 342 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri)) 343 .addDef(AArch64::FP) 344 .addUse(AArch64::SP) 345 .addImm(FpOffset) 346 .addImm(0) 347 .setMIFlag(MachineInstr::FrameSetup); 348 349 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) 350 .addReg(AArch64::LR); 351 break; 352 } 353 case FrameHelperType::Epilog: 354 case FrameHelperType::EpilogTail: 355 if (Type == FrameHelperType::Epilog) 356 // Stash LR to X16 357 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs)) 358 .addDef(AArch64::X16) 359 .addReg(AArch64::XZR) 360 .addUse(AArch64::LR) 361 .addImm(0); 362 363 for (int I = 0; I < Size - 2; I += 2) 364 emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2, 365 false); 366 // Restore the last CSR with post-increment of SP. 367 emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size, 368 true); 369 370 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) 371 .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR); 372 break; 373 } 374 375 return M->getFunction(Name); 376 } 377 378 /// This function checks if a frame helper should be used for 379 /// HOM_Prolog/HOM_Epilog pseudo instruction expansion. 380 /// @param MBB machine basic block 381 /// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog 382 /// @param Regs callee save registers that are saved or restored. 383 /// @param Type frame helper type 384 /// @return True if a use of helper is qualified. 385 static bool shouldUseFrameHelper(MachineBasicBlock &MBB, 386 MachineBasicBlock::iterator &NextMBBI, 387 SmallVectorImpl<unsigned> &Regs, 388 FrameHelperType Type) { 389 const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 390 auto RegCount = Regs.size(); 391 assert(RegCount > 0 && (RegCount % 2 == 0)); 392 // # of instructions that will be outlined. 393 int InstCount = RegCount / 2; 394 395 // Do not use a helper call when not saving LR. 396 if (!llvm::is_contained(Regs, AArch64::LR)) 397 return false; 398 399 switch (Type) { 400 case FrameHelperType::Prolog: 401 // Prolog helper cannot save FP/LR. 402 InstCount--; 403 break; 404 case FrameHelperType::PrologFrame: { 405 // Effectively no change in InstCount since FpAdjustment is included. 406 break; 407 } 408 case FrameHelperType::Epilog: 409 // Bail-out if X16 is live across the epilog helper because it is used in 410 // the helper to handle X30. 411 for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) { 412 if (NextMI->readsRegister(AArch64::W16, TRI)) 413 return false; 414 } 415 // Epilog may not be in the last block. Check the liveness in successors. 416 for (const MachineBasicBlock *SuccMBB : MBB.successors()) { 417 if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16)) 418 return false; 419 } 420 // No change in InstCount for the regular epilog case. 421 break; 422 case FrameHelperType::EpilogTail: { 423 // EpilogTail helper includes the caller's return. 424 if (NextMBBI == MBB.end()) 425 return false; 426 if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR) 427 return false; 428 InstCount++; 429 break; 430 } 431 } 432 433 return InstCount >= FrameHelperSizeThreshold; 434 } 435 436 /// Lower a HOM_Epilog pseudo instruction into a helper call while 437 /// creating the helper on demand. Or emit a sequence of loads in place when not 438 /// using a helper call. 439 /// 440 /// 1. With a helper including ret 441 /// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI 442 /// ret ; NextMBBI 443 /// => 444 /// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22 445 /// ... ; NextMBBI 446 /// 447 /// 2. With a helper 448 /// HOM_Epilog x30, x29, x19, x20, x21, x22 449 /// => 450 /// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22 451 /// 452 /// 3. Without a helper 453 /// HOM_Epilog x30, x29, x19, x20, x21, x22 454 /// => 455 /// ldp x29, x30, [sp, #32] 456 /// ldp x20, x19, [sp, #16] 457 /// ldp x22, x21, [sp], #48 458 bool AArch64LowerHomogeneousPE::lowerEpilog( 459 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 460 MachineBasicBlock::iterator &NextMBBI) { 461 auto &MF = *MBB.getParent(); 462 MachineInstr &MI = *MBBI; 463 464 DebugLoc DL = MI.getDebugLoc(); 465 SmallVector<unsigned, 8> Regs; 466 bool HasUnpairedReg = false; 467 for (auto &MO : MI.operands()) 468 if (MO.isReg()) { 469 if (!MO.getReg().isValid()) { 470 // For now we are only expecting unpaired GP registers which should 471 // occur exactly once. 472 assert(!HasUnpairedReg); 473 HasUnpairedReg = true; 474 } 475 Regs.push_back(MO.getReg()); 476 } 477 (void)HasUnpairedReg; 478 int Size = (int)Regs.size(); 479 if (Size == 0) 480 return false; 481 // Registers are in pair. 482 assert(Size % 2 == 0); 483 assert(MI.getOpcode() == AArch64::HOM_Epilog); 484 485 auto Return = NextMBBI; 486 if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) { 487 // When MBB ends with a return, emit a tail-call to the epilog helper 488 auto *EpilogTailHelper = 489 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail); 490 BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi)) 491 .addGlobalAddress(EpilogTailHelper) 492 .addImm(0) 493 .setMIFlag(MachineInstr::FrameDestroy) 494 .copyImplicitOps(MI) 495 .copyImplicitOps(*Return); 496 NextMBBI = std::next(Return); 497 Return->removeFromParent(); 498 } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs, 499 FrameHelperType::Epilog)) { 500 // The default epilog helper case. 501 auto *EpilogHelper = 502 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog); 503 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 504 .addGlobalAddress(EpilogHelper) 505 .setMIFlag(MachineInstr::FrameDestroy) 506 .copyImplicitOps(MI); 507 } else { 508 // Fall back to no-helper. 509 for (int I = 0; I < Size - 2; I += 2) 510 emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false); 511 // Restore the last CSR with post-increment of SP. 512 emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true); 513 } 514 515 MBBI->removeFromParent(); 516 return true; 517 } 518 519 /// Lower a HOM_Prolog pseudo instruction into a helper call while 520 /// creating the helper on demand. Or emit a sequence of stores in place when 521 /// not using a helper call. 522 /// 523 /// 1. With a helper including frame-setup 524 /// HOM_Prolog x30, x29, x19, x20, x21, x22, 32 525 /// => 526 /// stp x29, x30, [sp, #-16]! 527 /// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22 528 /// 529 /// 2. With a helper 530 /// HOM_Prolog x30, x29, x19, x20, x21, x22 531 /// => 532 /// stp x29, x30, [sp, #-16]! 533 /// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22 534 /// 535 /// 3. Without a helper 536 /// HOM_Prolog x30, x29, x19, x20, x21, x22 537 /// => 538 /// stp x22, x21, [sp, #-48]! 539 /// stp x20, x19, [sp, #16] 540 /// stp x29, x30, [sp, #32] 541 bool AArch64LowerHomogeneousPE::lowerProlog( 542 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 543 MachineBasicBlock::iterator &NextMBBI) { 544 auto &MF = *MBB.getParent(); 545 MachineInstr &MI = *MBBI; 546 547 DebugLoc DL = MI.getDebugLoc(); 548 SmallVector<unsigned, 8> Regs; 549 bool HasUnpairedReg = false; 550 int LRIdx = 0; 551 std::optional<int> FpOffset; 552 for (auto &MO : MI.operands()) { 553 if (MO.isReg()) { 554 if (MO.getReg().isValid()) { 555 if (MO.getReg() == AArch64::LR) 556 LRIdx = Regs.size(); 557 } else { 558 // For now we are only expecting unpaired GP registers which should 559 // occur exactly once. 560 assert(!HasUnpairedReg); 561 HasUnpairedReg = true; 562 } 563 Regs.push_back(MO.getReg()); 564 } else if (MO.isImm()) { 565 FpOffset = MO.getImm(); 566 } 567 } 568 (void)HasUnpairedReg; 569 int Size = (int)Regs.size(); 570 if (Size == 0) 571 return false; 572 // Allow compact unwind case only for oww. 573 assert(Size % 2 == 0); 574 assert(MI.getOpcode() == AArch64::HOM_Prolog); 575 576 if (FpOffset && 577 shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) { 578 // FP/LR is stored at the top of stack before the prolog helper call. 579 emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); 580 auto *PrologFrameHelper = getOrCreateFrameHelper( 581 M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset); 582 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 583 .addGlobalAddress(PrologFrameHelper) 584 .setMIFlag(MachineInstr::FrameSetup) 585 .copyImplicitOps(MI) 586 .addReg(AArch64::FP, RegState::Implicit | RegState::Define) 587 .addReg(AArch64::SP, RegState::Implicit); 588 } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs, 589 FrameHelperType::Prolog)) { 590 // FP/LR is stored at the top of stack before the prolog helper call. 591 emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); 592 auto *PrologHelper = 593 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog); 594 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 595 .addGlobalAddress(PrologHelper) 596 .setMIFlag(MachineInstr::FrameSetup) 597 .copyImplicitOps(MI); 598 } else { 599 // Fall back to no-helper. 600 emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true); 601 for (int I = Size - 3; I >= 0; I -= 2) 602 emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false); 603 if (FpOffset) { 604 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri)) 605 .addDef(AArch64::FP) 606 .addUse(AArch64::SP) 607 .addImm(*FpOffset) 608 .addImm(0) 609 .setMIFlag(MachineInstr::FrameSetup); 610 } 611 } 612 613 MBBI->removeFromParent(); 614 return true; 615 } 616 617 /// Process each machine instruction 618 /// @param MBB machine basic block 619 /// @param MBBI current instruction iterator 620 /// @param NextMBBI next instruction iterator which can be updated 621 /// @return True when IR is changed. 622 bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, 623 MachineBasicBlock::iterator MBBI, 624 MachineBasicBlock::iterator &NextMBBI) { 625 MachineInstr &MI = *MBBI; 626 unsigned Opcode = MI.getOpcode(); 627 switch (Opcode) { 628 default: 629 break; 630 case AArch64::HOM_Prolog: 631 return lowerProlog(MBB, MBBI, NextMBBI); 632 case AArch64::HOM_Epilog: 633 return lowerEpilog(MBB, MBBI, NextMBBI); 634 } 635 return false; 636 } 637 638 bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { 639 bool Modified = false; 640 641 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 642 while (MBBI != E) { 643 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 644 Modified |= runOnMI(MBB, MBBI, NMBBI); 645 MBBI = NMBBI; 646 } 647 648 return Modified; 649 } 650 651 bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { 652 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 653 654 bool Modified = false; 655 for (auto &MBB : MF) 656 Modified |= runOnMBB(MBB); 657 return Modified; 658 } 659 660 ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { 661 return new AArch64LowerHomogeneousPrologEpilog(); 662 } 663