1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass generates machine instructions for the CTR loops related pseudos: 10 // 1: MTCTRloop/DecreaseCTRloop 11 // 2: MTCTR8loop/DecreaseCTR8loop 12 // 13 // If a CTR loop can be generated: 14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr" 15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and 16 // its user branch instruction can be deleted. 17 // 18 // If a CTR loop can not be generated due to clobber of CTR: 19 // 1: MTCTRloop/MTCTR8loop can be deleted. 20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and 21 // a "cmplwi/cmpldi". 22 // 23 // This pass runs just before register allocation, because we don't want 24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be 25 // generated or if a CTR loop can not be generated, we don't have any condition 26 // register for the new added "cmplwi/cmpldi". 27 // 28 //===----------------------------------------------------------------------===// 29 30 #include "PPC.h" 31 #include "PPCInstrInfo.h" 32 #include "PPCSubtarget.h" 33 #include "llvm/ADT/Statistic.h" 34 #include "llvm/CodeGen/MachineBasicBlock.h" 35 #include "llvm/CodeGen/MachineFunction.h" 36 #include "llvm/CodeGen/MachineFunctionPass.h" 37 #include "llvm/CodeGen/MachineInstr.h" 38 #include "llvm/CodeGen/MachineLoopInfo.h" 39 #include "llvm/CodeGen/MachineOperand.h" 40 #include "llvm/CodeGen/MachineRegisterInfo.h" 41 #include "llvm/CodeGen/Register.h" 42 #include "llvm/InitializePasses.h" 43 #include "llvm/Pass.h" 44 #include "llvm/PassRegistry.h" 45 #include "llvm/Support/CodeGen.h" 46 #include "llvm/Support/Debug.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include <cassert> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "ppc-ctrloops" 53 54 STATISTIC(NumCTRLoops, "Number of CTR loops generated"); 55 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated"); 56 57 namespace { 58 class PPCCTRLoops : public MachineFunctionPass { 59 public: 60 static char ID; 61 62 PPCCTRLoops() : MachineFunctionPass(ID) { 63 initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); 64 } 65 66 void getAnalysisUsage(AnalysisUsage &AU) const override { 67 AU.addRequired<MachineLoopInfoWrapperPass>(); 68 MachineFunctionPass::getAnalysisUsage(AU); 69 } 70 71 bool runOnMachineFunction(MachineFunction &MF) override; 72 73 private: 74 const PPCInstrInfo *TII = nullptr; 75 MachineRegisterInfo *MRI = nullptr; 76 77 bool processLoop(MachineLoop *ML); 78 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const; 79 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start, 80 MachineInstr *Dec); 81 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec); 82 }; 83 } // namespace 84 85 char PPCCTRLoops::ID = 0; 86 87 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", 88 false, false) 89 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 90 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation", 91 false, false) 92 93 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); } 94 95 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) { 96 bool Changed = false; 97 98 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 99 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); 100 MRI = &MF.getRegInfo(); 101 102 for (auto *ML : MLI) { 103 if (ML->isOutermost()) 104 Changed |= processLoop(ML); 105 } 106 107 #ifndef NDEBUG 108 for (const MachineBasicBlock &BB : MF) { 109 for (const MachineInstr &I : BB) 110 assert((I.getOpcode() != PPC::DecreaseCTRloop && 111 I.getOpcode() != PPC::DecreaseCTR8loop) && 112 "CTR loop pseudo is not expanded!"); 113 } 114 #endif 115 116 return Changed; 117 } 118 119 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const { 120 if (!CheckReads) { 121 // If we are only checking for defs, that is we are going to find 122 // definitions before MTCTRloop, for this case: 123 // CTR defination inside the callee of a call instruction will not impact 124 // the defination of MTCTRloop, so we can use definesRegister() for the 125 // check, no need to check the regmask. 126 return MI->definesRegister(PPC::CTR, /*TRI=*/nullptr) || 127 MI->definesRegister(PPC::CTR8, /*TRI=*/nullptr); 128 } 129 130 if (MI->modifiesRegister(PPC::CTR, /*TRI=*/nullptr) || 131 MI->modifiesRegister(PPC::CTR8, /*TRI=*/nullptr)) 132 return true; 133 134 if (MI->getDesc().isCall()) 135 return true; 136 137 // We define the CTR in the loop preheader, so if there is any CTR reader in 138 // the loop, we also can not use CTR loop form. 139 if (MI->readsRegister(PPC::CTR, /*TRI=*/nullptr) || 140 MI->readsRegister(PPC::CTR8, /*TRI=*/nullptr)) 141 return true; 142 143 return false; 144 } 145 146 bool PPCCTRLoops::processLoop(MachineLoop *ML) { 147 bool Changed = false; 148 149 // Align with HardwareLoop pass, process inner loops first. 150 for (MachineLoop *I : *ML) 151 Changed |= processLoop(I); 152 153 // If any inner loop is changed, outter loop must be without hardware loop 154 // intrinsics. 155 if (Changed) 156 return true; 157 158 auto IsLoopStart = [](MachineInstr &MI) { 159 return MI.getOpcode() == PPC::MTCTRloop || 160 MI.getOpcode() == PPC::MTCTR8loop; 161 }; 162 163 auto SearchForStart = 164 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * { 165 for (auto &MI : *MBB) { 166 if (IsLoopStart(MI)) 167 return &MI; 168 } 169 return nullptr; 170 }; 171 172 MachineInstr *Start = nullptr; 173 MachineInstr *Dec = nullptr; 174 bool InvalidCTRLoop = false; 175 176 MachineBasicBlock *Preheader = ML->getLoopPreheader(); 177 // If there is no preheader for this loop, there must be no MTCTRloop 178 // either. 179 if (!Preheader) 180 return false; 181 182 Start = SearchForStart(Preheader); 183 // This is not a CTR loop candidate. 184 if (!Start) 185 return false; 186 187 // If CTR is live to the preheader, we can not redefine the CTR register. 188 if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8)) 189 InvalidCTRLoop = true; 190 191 // Make sure there is also no CTR clobber in the block preheader between the 192 // begin and MTCTR. 193 for (MachineBasicBlock::reverse_instr_iterator I = 194 std::next(Start->getReverseIterator()); 195 I != Preheader->instr_rend(); ++I) 196 // Only check the definitions of CTR. If there is non-dead definition for 197 // the CTR, we conservatively don't generate a CTR loop. 198 if (isCTRClobber(&*I, /* CheckReads */ false)) { 199 InvalidCTRLoop = true; 200 break; 201 } 202 203 // Make sure there is also no CTR clobber/user in the block preheader between 204 // MTCTR and the end. 205 for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator()); 206 I != Preheader->instr_end(); ++I) 207 if (isCTRClobber(&*I, /* CheckReads */ true)) { 208 InvalidCTRLoop = true; 209 break; 210 } 211 212 // Find the CTR loop components and decide whether or not to fall back to a 213 // normal loop. 214 for (auto *MBB : reverse(ML->getBlocks())) { 215 for (auto &MI : *MBB) { 216 if (MI.getOpcode() == PPC::DecreaseCTRloop || 217 MI.getOpcode() == PPC::DecreaseCTR8loop) 218 Dec = &MI; 219 else if (!InvalidCTRLoop) 220 // If any instruction clobber CTR, then we can not generate a CTR loop. 221 InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true); 222 } 223 if (Dec && InvalidCTRLoop) 224 break; 225 } 226 227 assert(Dec && "CTR loop is not complete!"); 228 229 if (InvalidCTRLoop) { 230 expandNormalLoops(ML, Start, Dec); 231 ++NumNormalLoops; 232 } 233 else { 234 expandCTRLoops(ML, Start, Dec); 235 ++NumCTRLoops; 236 } 237 return true; 238 } 239 240 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start, 241 MachineInstr *Dec) { 242 bool Is64Bit = 243 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); 244 245 MachineBasicBlock *Preheader = Start->getParent(); 246 MachineBasicBlock *Exiting = Dec->getParent(); 247 assert((Preheader && Exiting) && 248 "Preheader and exiting should exist for CTR loop!"); 249 250 assert(Dec->getOperand(1).getImm() == 1 && 251 "Loop decrement stride must be 1"); 252 253 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI; 254 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI; 255 256 Register PHIDef = 257 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass 258 : &PPC::GPRC_and_GPRC_NOR0RegClass); 259 260 Start->getParent()->getParent()->getProperties().reset( 261 MachineFunctionProperties::Property::NoPHIs); 262 263 // Generate "PHI" in the header block. 264 auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(), 265 DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef); 266 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader); 267 268 Register ADDIDef = 269 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass 270 : &PPC::GPRC_and_GPRC_NOR0RegClass); 271 // Generate "addi -1" in the exiting block. 272 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef) 273 .addReg(PHIDef) 274 .addImm(-1); 275 276 // Add other inputs for the PHI node. 277 if (ML->isLoopLatch(Exiting)) { 278 // There must be only two predecessors for the loop header, one is the 279 // Preheader and the other one is loop latch Exiting. In hardware loop 280 // insertion pass, the block containing DecreaseCTRloop must dominate all 281 // loop latches. So there must be only one latch. 282 assert(ML->getHeader()->pred_size() == 2 && 283 "Loop header predecessor is not right!"); 284 PHIMIB.addReg(ADDIDef).addMBB(Exiting); 285 } else { 286 // If the block containing DecreaseCTRloop is not a loop latch, we can use 287 // ADDIDef as the value for all other blocks for the PHI. In hardware loop 288 // insertion pass, the block containing DecreaseCTRloop must dominate all 289 // loop latches. 290 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) { 291 if (ML->contains(P)) { 292 assert(ML->isLoopLatch(P) && 293 "Loop's header in-loop predecessor is not loop latch!"); 294 PHIMIB.addReg(ADDIDef).addMBB(P); 295 } else 296 assert(P == Preheader && 297 "CTR loop should not be generated for irreducible loop!"); 298 } 299 } 300 301 // Generate the compare in the exiting block. 302 Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass); 303 auto CMPMIB = 304 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef) 305 .addReg(ADDIDef) 306 .addImm(0); 307 308 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY), 309 Dec->getOperand(0).getReg()) 310 .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt); 311 312 // Remove the pseudo instructions. 313 Start->eraseFromParent(); 314 Dec->eraseFromParent(); 315 } 316 317 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start, 318 MachineInstr *Dec) { 319 bool Is64Bit = 320 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64(); 321 322 MachineBasicBlock *Preheader = Start->getParent(); 323 MachineBasicBlock *Exiting = Dec->getParent(); 324 325 (void)Preheader; 326 assert((Preheader && Exiting) && 327 "Preheader and exiting should exist for CTR loop!"); 328 329 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!"); 330 331 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ; 332 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ; 333 auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg()); 334 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) && 335 "There should be only one user for loop decrement pseudo!"); 336 337 unsigned Opcode = 0; 338 switch (BrInstr->getOpcode()) { 339 case PPC::BC: 340 Opcode = BDNZOpcode; 341 (void) ML; 342 assert(ML->contains(BrInstr->getOperand(1).getMBB()) && 343 "Invalid ctr loop!"); 344 break; 345 case PPC::BCn: 346 Opcode = BDZOpcode; 347 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) && 348 "Invalid ctr loop!"); 349 break; 350 default: 351 llvm_unreachable("Unhandled branch user for DecreaseCTRloop."); 352 } 353 354 // Generate "bdnz/bdz" in the exiting block just before the terminator. 355 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode)) 356 .addMBB(BrInstr->getOperand(1).getMBB()); 357 358 // Remove the pseudo instructions. 359 BrInstr->eraseFromParent(); 360 Dec->eraseFromParent(); 361 } 362