1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass generates machine instructions for the CTR loops related pseudos: 10 // 1: MTCTRPseudo/DecreaseCTRPseudo 11 // 2: MTCTR8Pseudo/DecreaseCTR8Pseudo 12 // 13 // If a CTR loop can be generated: 14 // 1: MTCTRPseudo/MTCTR8Pseudo will be converted to "mtctr" 15 // 2: DecreaseCTRPseudo/DecreaseCTR8Pseudo will be converted to "bdnz/bdz" and 16 // its user branch instruction can be deleted. 17 // 18 // If a CTR loop can not be generated due to clobber of CTR: 19 // 1: MTCTRPseudo/MTCTR8Pseudo can be deleted. 20 // 2: DecreaseCTRPseudo/DecreaseCTR8Pseudo will be converted to "addi -1" and 21 // a "cmplwi/cmpldi". 22 // 23 // This pass runs just before register allocation, because we don't want 24 // register allocator to allocate register for DecreaseCTRPseudo if a CTR can be 25 // generated or if a CTR loop can not be generated, we don't have any condition 26 // register for the new added "cmplwi/cmpldi". 27 // 28 //===----------------------------------------------------------------------===// 29 30 #include "PPC.h" 31 #include "PPCInstrInfo.h" 32 #include "PPCSubtarget.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/CodeGen/MachineBasicBlock.h" 35 #include "llvm/CodeGen/MachineFunction.h" 36 #include "llvm/CodeGen/MachineFunctionPass.h" 37 #include "llvm/CodeGen/MachineInstr.h" 38 #include "llvm/CodeGen/MachineLoopInfo.h" 39 #include "llvm/CodeGen/MachineOperand.h" 40 #include "llvm/CodeGen/MachineRegisterInfo.h" 41 #include "llvm/CodeGen/Register.h" 42 #include "llvm/InitializePasses.h" 43 #include "llvm/Pass.h" 44 #include "llvm/PassRegistry.h" 45 #include "llvm/Support/CodeGen.h" 46 #include "llvm/Support/Debug.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include <cassert> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "ppc-ctrloops" 53 54 STATISTIC(NumCTRLoops, "Number of CTR loops generated"); 55 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated"); 56 57 namespace { 58 class PPCCTRLoops : public MachineFunctionPass { 59 public: 60 static char ID; 61 62 PPCCTRLoops() : MachineFunctionPass(ID) { 63 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 64 } 65 66 void getAnalysisUsage(AnalysisUsage &AU) const override { 67 AU.addRequired<MachineLoopInfo>(); 68 MachineFunctionPass::getAnalysisUsage(AU); 69 } 70 71 bool runOnMachineFunction(MachineFunction &MF) override; 72 73 private: 74 const PPCInstrInfo *TII = nullptr; 75 MachineRegisterInfo *MRI = nullptr; 76 77 bool processLoop(MachineLoop *ML); 78 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const; 79 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start, 80 MachineInstr *Dec); 81 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec); 82 }; 83 } // namespace 84 85 char PPCCTRLoops::ID = 0; 86 87 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", 88 false, false) 89 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 90 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", 91 false, false) 92 93 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); } 94 95 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { 96 bool Changed = false; 97 98 auto &MLI = getAnalysis<MachineLoopInfo>(); 99 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); 100 MRI = &MF.getRegInfo(); 101 102 for (auto ML : MLI) { 103 if (ML->isOutermost()) 104 Changed |= processLoop(ML); 105 } 106 107 return Changed; 108 } 109 110 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const { 111 if (!CheckReads) { 112 // If we are only checking for defs, that is we are going to find 113 // definitions before MTCTRloop, for this case: 114 // CTR defination inside the callee of a call instruction will not impact 115 // the defination of MTCTRloop, so we can use definesRegister() for the 116 // check, no need to check the regmask. 117 return (MI->definesRegister(PPC::CTR) && 118 !MI->registerDefIsDead(PPC::CTR)) || 119 (MI->definesRegister(PPC::CTR8) && 120 !MI->registerDefIsDead(PPC::CTR8)); 121 } 122 123 if ((MI->modifiesRegister(PPC::CTR) && !MI->registerDefIsDead(PPC::CTR)) || 124 (MI->modifiesRegister(PPC::CTR8) && !MI->registerDefIsDead(PPC::CTR8))) 125 return true; 126 127 if (MI->getDesc().isCall()) 128 return true; 129 130 // We define the CTR in the loop preheader, so if there is any CTR reader in 131 // the loop, we also can not use CTR loop form. 132 if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8)) 133 return true; 134 135 return false; 136 } 137 138 bool PPCCTRLoops::processLoop(MachineLoop *ML) { 139 bool Changed = false; 140 141 // Align with HardwareLoop pass, process inner loops first. 142 for (auto I = ML->begin(), E = ML->end(); I != E; ++I) 143 Changed |= processLoop(*I); 144 145 // If any inner loop is changed, outter loop must be without hardware loop 146 // intrinsics. 147 if (Changed) 148 return true; 149 150 auto IsLoopStart = [](MachineInstr &MI) { 151 return MI.getOpcode() == PPC::MTCTRPseudo || 152 MI.getOpcode() == PPC::MTCTR8Pseudo; 153 }; 154 155 auto SearchForStart = 156 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * { 157 for (auto &MI : *MBB) { 158 if (IsLoopStart(MI)) 159 return &MI; 160 } 161 return nullptr; 162 }; 163 164 MachineInstr *Start = nullptr; 165 MachineInstr *Dec = nullptr; 166 bool InvalidCTRLoop = false; 167 168 MachineBasicBlock *Preheader = ML->getLoopPreheader(); 169 // If there is no preheader for this loop, there must be no MTCTRPseudo 170 // either. 171 if (!Preheader) 172 return false; 173 174 Start = SearchForStart(Preheader); 175 // This is not a CTR loop candidate. 176 if (!Start) 177 return false; 178 179 // If CTR is live to the preheader, we can not redefine the CTR register. 180 if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8)) 181 InvalidCTRLoop = true; 182 183 // Make sure there is also no CTR clobber in the block preheader between the 184 // begin and MTCTR. 185 for (MachineBasicBlock::reverse_instr_iterator I = 186 std::next(Start->getReverseIterator()); 187 I != Preheader->instr_rend(); ++I) 188 // Only check the definitions of CTR. If there is non-dead definition for 189 // the CTR, we conservatively don't generate a CTR loop. 190 if (isCTRClobber(&*I, /* CheckReads */ false)) { 191 InvalidCTRLoop = true; 192 break; 193 } 194 195 // Make sure there is also no CTR clobber/user in the block preheader between 196 // MTCTR and the end. 197 for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator()); 198 I != Preheader->instr_end(); ++I) 199 if (isCTRClobber(&*I, /* CheckReads */ true)) { 200 InvalidCTRLoop = true; 201 break; 202 } 203 204 // Find the CTR loop components and decide whether or not to fall back to a 205 // normal loop. 206 for (auto *MBB : reverse(ML->getBlocks())) { 207 for (auto &MI : *MBB) { 208 if (MI.getOpcode() == PPC::DecreaseCTRPseudo || 209 MI.getOpcode() == PPC::DecreaseCTR8Pseudo) 210 Dec = &MI; 211 else if (!InvalidCTRLoop) 212 // If any instruction clobber CTR, then we can not generate a CTR loop. 213 InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true); 214 } 215 if (Dec && InvalidCTRLoop) 216 break; 217 } 218 219 assert(Dec && "CTR loop is not complete!"); 220 221 if (InvalidCTRLoop) { 222 expandNormalLoops(ML, Start, Dec); 223 ++NumNormalLoops; 224 } 225 else { 226 expandCTRLoops(ML, Start, Dec); 227 ++NumCTRLoops; 228 } 229 return true; 230 } 231 232 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start, 233 MachineInstr *Dec) { 234 bool Is64Bit = 235 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); 236 237 MachineBasicBlock *Preheader = Start->getParent(); 238 MachineBasicBlock *Exiting = Dec->getParent(); 239 assert((Preheader && Exiting) && 240 "Preheader and exiting should exist for CTR loop!"); 241 242 assert(Dec->getOperand(1).getImm() == 1 && 243 "Loop decrement stride must be 1"); 244 245 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI; 246 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI; 247 248 Register PHIDef = 249 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass 250 : &PPC::GPRC_and_GPRC_NOR0RegClass); 251 252 Start->getParent()->getParent()->getProperties().reset( 253 MachineFunctionProperties::Property::NoPHIs); 254 255 // Generate "PHI" in the header block. 256 auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(), 257 DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef); 258 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader); 259 260 Register ADDIDef = 261 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass 262 : &PPC::GPRC_and_GPRC_NOR0RegClass); 263 // Generate "addi -1" in the exiting block. 264 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef) 265 .addReg(PHIDef) 266 .addImm(-1); 267 268 // Add other inputs for the PHI node. 269 if (ML->isLoopLatch(Exiting)) { 270 // There must be only two predecessors for the loop header, one is the 271 // Preheader and the other one is loop latch Exiting. In hardware loop 272 // insertion pass, the block containing DecreaseCTRloop must dominate all 273 // loop latches. So there must be only one latch. 274 assert(ML->getHeader()->pred_size() == 2 && 275 "Loop header predecessor is not right!"); 276 PHIMIB.addReg(ADDIDef).addMBB(Exiting); 277 } else { 278 // If the block containing DecreaseCTRloop is not a loop latch, we can use 279 // ADDIDef as the value for all other blocks for the PHI. In hardware loop 280 // insertion pass, the block containing DecreaseCTRloop must dominate all 281 // loop latches. 282 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) { 283 if (ML->contains(P)) { 284 assert(ML->isLoopLatch(P) && 285 "Loop's header in-loop predecessor is not loop latch!"); 286 PHIMIB.addReg(ADDIDef).addMBB(P); 287 } else 288 assert(P == Preheader && 289 "CTR loop should not be generated for irreducible loop!"); 290 } 291 } 292 293 // Generate the compare in the exiting block. 294 Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass); 295 auto CMPMIB = 296 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef) 297 .addReg(ADDIDef) 298 .addImm(0); 299 300 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY), 301 Dec->getOperand(0).getReg()) 302 .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt); 303 304 // Remove the pseudo instructions. 305 Start->eraseFromParent(); 306 Dec->eraseFromParent(); 307 } 308 309 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start, 310 MachineInstr *Dec) { 311 bool Is64Bit = 312 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); 313 314 MachineBasicBlock *Preheader = Start->getParent(); 315 MachineBasicBlock *Exiting = Dec->getParent(); 316 assert((Preheader && Exiting) && 317 "Preheader and exiting should exist for CTR loop!"); 318 319 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!"); 320 321 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ; 322 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ; 323 auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg()); 324 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) && 325 "There should be only one user for loop decrement pseudo!"); 326 327 unsigned Opcode = 0; 328 switch (BrInstr->getOpcode()) { 329 case PPC::BC: 330 Opcode = BDNZOpcode; 331 (void) ML; 332 assert(ML->contains(BrInstr->getOperand(1).getMBB()) && 333 "Invalid ctr loop!"); 334 break; 335 case PPC::BCn: 336 Opcode = BDZOpcode; 337 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) && 338 "Invalid ctr loop!"); 339 break; 340 default: 341 llvm_unreachable("Unhandled branch user for DecreaseCTRloop."); 342 } 343 344 unsigned MTCTROpcode = Is64Bit ? PPC::MTCTR8 : PPC::MTCTR; 345 346 // Generate "mtctr" in the loop preheader. 347 BuildMI(*Preheader, Start, Start->getDebugLoc(), TII->get(MTCTROpcode)) 348 .addReg(Start->getOperand(0).getReg()); 349 350 // Generate "bdnz/bdz" in the exiting block just before the terminator. 351 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode)) 352 .addMBB(BrInstr->getOperand(1).getMBB()); 353 354 // Remove the pseudo instructions. 355 Start->eraseFromParent(); 356 BrInstr->eraseFromParent(); 357 Dec->eraseFromParent(); 358 } 359