1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass generates machine instructions for the CTR loops related pseudos: 10 // 1: MTCTRloop/DecreaseCTRloop 11 // 2: MTCTR8loop/DecreaseCTR8loop 12 // 13 // If a CTR loop can be generated: 14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr" 15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and 16 // its user branch instruction can be deleted. 17 // 18 // If a CTR loop can not be generated due to clobber of CTR: 19 // 1: MTCTRloop/MTCTR8loop can be deleted. 20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and 21 // a "cmplwi/cmpldi". 22 // 23 // This pass runs just before register allocation, because we don't want 24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be 25 // generated or if a CTR loop can not be generated, we don't have any condition 26 // register for the new added "cmplwi/cmpldi". 27 // 28 //===----------------------------------------------------------------------===// 29 30 #include "PPC.h" 31 #include "PPCInstrInfo.h" 32 #include "PPCSubtarget.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/CodeGen/MachineBasicBlock.h" 35 #include "llvm/CodeGen/MachineFunction.h" 36 #include "llvm/CodeGen/MachineFunctionPass.h" 37 #include "llvm/CodeGen/MachineInstr.h" 38 #include "llvm/CodeGen/MachineLoopInfo.h" 39 #include "llvm/CodeGen/MachineOperand.h" 40 #include "llvm/CodeGen/MachineRegisterInfo.h" 41 #include "llvm/CodeGen/Register.h" 42 #include "llvm/InitializePasses.h" 43 #include "llvm/Pass.h" 44 #include "llvm/PassRegistry.h" 45 #include "llvm/Support/CodeGen.h" 46 #include "llvm/Support/Debug.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include <cassert> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "ppc-ctrloops" 53 54 STATISTIC(NumCTRLoops, "Number of CTR loops generated"); 55 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated"); 56 57 namespace { 58 class PPCCTRLoops : public MachineFunctionPass { 59 public: 60 static char ID; 61 62 PPCCTRLoops() : MachineFunctionPass(ID) { 63 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 64 } 65 66 void getAnalysisUsage(AnalysisUsage &AU) const override { 67 AU.addRequired<MachineLoopInfo>(); 68 MachineFunctionPass::getAnalysisUsage(AU); 69 } 70 71 bool runOnMachineFunction(MachineFunction &MF) override; 72 73 private: 74 const PPCInstrInfo *TII = nullptr; 75 MachineRegisterInfo *MRI = nullptr; 76 77 bool processLoop(MachineLoop *ML); 78 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const; 79 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start, 80 MachineInstr *Dec); 81 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec); 82 }; 83 } // namespace 84 85 char PPCCTRLoops::ID = 0; 86 87 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", 88 false, false) 89 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 90 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", 91 false, false) 92 93 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); } 94 95 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { 96 bool Changed = false; 97 98 auto &MLI = getAnalysis<MachineLoopInfo>(); 99 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); 100 MRI = &MF.getRegInfo(); 101 102 for (auto *ML : MLI) { 103 if (ML->isOutermost()) 104 Changed |= processLoop(ML); 105 } 106 107 #ifndef NDEBUG 108 for (const MachineBasicBlock &BB : MF) { 109 for (const MachineInstr &I : BB) 110 assert((I.getOpcode() != PPC::DecreaseCTRloop && 111 I.getOpcode() != PPC::DecreaseCTR8loop) && 112 "CTR loop pseudo is not expanded!"); 113 } 114 #endif 115 116 return Changed; 117 } 118 119 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const { 120 if (!CheckReads) { 121 // If we are only checking for defs, that is we are going to find 122 // definitions before MTCTRloop, for this case: 123 // CTR defination inside the callee of a call instruction will not impact 124 // the defination of MTCTRloop, so we can use definesRegister() for the 125 // check, no need to check the regmask. 126 return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8); 127 } 128 129 if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8)) 130 return true; 131 132 if (MI->getDesc().isCall()) 133 return true; 134 135 // We define the CTR in the loop preheader, so if there is any CTR reader in 136 // the loop, we also can not use CTR loop form. 137 if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8)) 138 return true; 139 140 return false; 141 } 142 143 bool PPCCTRLoops::processLoop(MachineLoop *ML) { 144 bool Changed = false; 145 146 // Align with HardwareLoop pass, process inner loops first. 147 for (MachineLoop *I : *ML) 148 Changed |= processLoop(I); 149 150 // If any inner loop is changed, outter loop must be without hardware loop 151 // intrinsics. 152 if (Changed) 153 return true; 154 155 auto IsLoopStart = [](MachineInstr &MI) { 156 return MI.getOpcode() == PPC::MTCTRloop || 157 MI.getOpcode() == PPC::MTCTR8loop; 158 }; 159 160 auto SearchForStart = 161 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * { 162 for (auto &MI : *MBB) { 163 if (IsLoopStart(MI)) 164 return &MI; 165 } 166 return nullptr; 167 }; 168 169 MachineInstr *Start = nullptr; 170 MachineInstr *Dec = nullptr; 171 bool InvalidCTRLoop = false; 172 173 MachineBasicBlock *Preheader = ML->getLoopPreheader(); 174 // If there is no preheader for this loop, there must be no MTCTRloop 175 // either. 176 if (!Preheader) 177 return false; 178 179 Start = SearchForStart(Preheader); 180 // This is not a CTR loop candidate. 181 if (!Start) 182 return false; 183 184 // If CTR is live to the preheader, we can not redefine the CTR register. 185 if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8)) 186 InvalidCTRLoop = true; 187 188 // Make sure there is also no CTR clobber in the block preheader between the 189 // begin and MTCTR. 190 for (MachineBasicBlock::reverse_instr_iterator I = 191 std::next(Start->getReverseIterator()); 192 I != Preheader->instr_rend(); ++I) 193 // Only check the definitions of CTR. If there is non-dead definition for 194 // the CTR, we conservatively don't generate a CTR loop. 195 if (isCTRClobber(&*I, /* CheckReads */ false)) { 196 InvalidCTRLoop = true; 197 break; 198 } 199 200 // Make sure there is also no CTR clobber/user in the block preheader between 201 // MTCTR and the end. 202 for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator()); 203 I != Preheader->instr_end(); ++I) 204 if (isCTRClobber(&*I, /* CheckReads */ true)) { 205 InvalidCTRLoop = true; 206 break; 207 } 208 209 // Find the CTR loop components and decide whether or not to fall back to a 210 // normal loop. 211 for (auto *MBB : reverse(ML->getBlocks())) { 212 for (auto &MI : *MBB) { 213 if (MI.getOpcode() == PPC::DecreaseCTRloop || 214 MI.getOpcode() == PPC::DecreaseCTR8loop) 215 Dec = &MI; 216 else if (!InvalidCTRLoop) 217 // If any instruction clobber CTR, then we can not generate a CTR loop. 218 InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true); 219 } 220 if (Dec && InvalidCTRLoop) 221 break; 222 } 223 224 assert(Dec && "CTR loop is not complete!"); 225 226 if (InvalidCTRLoop) { 227 expandNormalLoops(ML, Start, Dec); 228 ++NumNormalLoops; 229 } 230 else { 231 expandCTRLoops(ML, Start, Dec); 232 ++NumCTRLoops; 233 } 234 return true; 235 } 236 237 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start, 238 MachineInstr *Dec) { 239 bool Is64Bit = 240 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); 241 242 MachineBasicBlock *Preheader = Start->getParent(); 243 MachineBasicBlock *Exiting = Dec->getParent(); 244 assert((Preheader && Exiting) && 245 "Preheader and exiting should exist for CTR loop!"); 246 247 assert(Dec->getOperand(1).getImm() == 1 && 248 "Loop decrement stride must be 1"); 249 250 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI; 251 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI; 252 253 Register PHIDef = 254 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass 255 : &PPC::GPRC_and_GPRC_NOR0RegClass); 256 257 Start->getParent()->getParent()->getProperties().reset( 258 MachineFunctionProperties::Property::NoPHIs); 259 260 // Generate "PHI" in the header block. 261 auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(), 262 DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef); 263 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader); 264 265 Register ADDIDef = 266 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass 267 : &PPC::GPRC_and_GPRC_NOR0RegClass); 268 // Generate "addi -1" in the exiting block. 269 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef) 270 .addReg(PHIDef) 271 .addImm(-1); 272 273 // Add other inputs for the PHI node. 274 if (ML->isLoopLatch(Exiting)) { 275 // There must be only two predecessors for the loop header, one is the 276 // Preheader and the other one is loop latch Exiting. In hardware loop 277 // insertion pass, the block containing DecreaseCTRloop must dominate all 278 // loop latches. So there must be only one latch. 279 assert(ML->getHeader()->pred_size() == 2 && 280 "Loop header predecessor is not right!"); 281 PHIMIB.addReg(ADDIDef).addMBB(Exiting); 282 } else { 283 // If the block containing DecreaseCTRloop is not a loop latch, we can use 284 // ADDIDef as the value for all other blocks for the PHI. In hardware loop 285 // insertion pass, the block containing DecreaseCTRloop must dominate all 286 // loop latches. 287 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) { 288 if (ML->contains(P)) { 289 assert(ML->isLoopLatch(P) && 290 "Loop's header in-loop predecessor is not loop latch!"); 291 PHIMIB.addReg(ADDIDef).addMBB(P); 292 } else 293 assert(P == Preheader && 294 "CTR loop should not be generated for irreducible loop!"); 295 } 296 } 297 298 // Generate the compare in the exiting block. 299 Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass); 300 auto CMPMIB = 301 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef) 302 .addReg(ADDIDef) 303 .addImm(0); 304 305 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY), 306 Dec->getOperand(0).getReg()) 307 .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt); 308 309 // Remove the pseudo instructions. 310 Start->eraseFromParent(); 311 Dec->eraseFromParent(); 312 } 313 314 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start, 315 MachineInstr *Dec) { 316 bool Is64Bit = 317 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); 318 319 MachineBasicBlock *Preheader = Start->getParent(); 320 MachineBasicBlock *Exiting = Dec->getParent(); 321 322 (void)Preheader; 323 assert((Preheader && Exiting) && 324 "Preheader and exiting should exist for CTR loop!"); 325 326 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!"); 327 328 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ; 329 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ; 330 auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg()); 331 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) && 332 "There should be only one user for loop decrement pseudo!"); 333 334 unsigned Opcode = 0; 335 switch (BrInstr->getOpcode()) { 336 case PPC::BC: 337 Opcode = BDNZOpcode; 338 (void) ML; 339 assert(ML->contains(BrInstr->getOperand(1).getMBB()) && 340 "Invalid ctr loop!"); 341 break; 342 case PPC::BCn: 343 Opcode = BDZOpcode; 344 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) && 345 "Invalid ctr loop!"); 346 break; 347 default: 348 llvm_unreachable("Unhandled branch user for DecreaseCTRloop."); 349 } 350 351 // Generate "bdnz/bdz" in the exiting block just before the terminator. 352 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode)) 353 .addMBB(BrInstr->getOperand(1).getMBB()); 354 355 // Remove the pseudo instructions. 356 BrInstr->eraseFromParent(); 357 Dec->eraseFromParent(); 358 } 359