1 //===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===---------------------------------------------------------------------===// 8 // 9 // This pass aims to reduce the number of logical operations on bits in the CR 10 // register. These instructions have a fairly high latency and only a single 11 // pipeline at their disposal in modern PPC cores. Furthermore, they have a 12 // tendency to occur in fairly small blocks where there's little opportunity 13 // to hide the latency between the CR logical operation and its user. 14 // 15 //===---------------------------------------------------------------------===// 16 17 #include "PPC.h" 18 #include "PPCInstrInfo.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 22 #include "llvm/CodeGen/MachineDominators.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/Config/llvm-config.h" 27 #include "llvm/Support/Debug.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "ppc-reduce-cr-ops" 32 33 STATISTIC(NumContainedSingleUseBinOps, 34 "Number of single-use binary CR logical ops contained in a block"); 35 STATISTIC(NumToSplitBlocks, 36 "Number of binary CR logical ops that can be used to split blocks"); 37 STATISTIC(TotalCRLogicals, "Number of CR logical ops."); 38 STATISTIC(TotalNullaryCRLogicals, 39 "Number of nullary CR logical ops (CRSET/CRUNSET)."); 40 STATISTIC(TotalUnaryCRLogicals, "Number of unary CR logical ops."); 41 STATISTIC(TotalBinaryCRLogicals, "Number of CR logical ops."); 42 STATISTIC(NumBlocksSplitOnBinaryCROp, 43 "Number of blocks split on CR binary logical ops."); 44 STATISTIC(NumNotSplitIdenticalOperands, 45 "Number of blocks not split due to operands being identical."); 46 STATISTIC(NumNotSplitChainCopies, 47 "Number of blocks not split due to operands being chained copies."); 48 STATISTIC(NumNotSplitWrongOpcode, 49 "Number of blocks not split due to the wrong opcode."); 50 51 /// Given a basic block \p Successor that potentially contains PHIs, this 52 /// function will look for any incoming values in the PHIs that are supposed to 53 /// be coming from \p OrigMBB but whose definition is actually in \p NewMBB. 54 /// Any such PHIs will be updated to reflect reality. 55 static void updatePHIs(MachineBasicBlock *Successor, MachineBasicBlock *OrigMBB, 56 MachineBasicBlock *NewMBB, MachineRegisterInfo *MRI) { 57 for (auto &MI : Successor->instrs()) { 58 if (!MI.isPHI()) 59 continue; 60 // This is a really ugly-looking loop, but it was pillaged directly from 61 // MachineBasicBlock::transferSuccessorsAndUpdatePHIs(). 62 for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { 63 MachineOperand &MO = MI.getOperand(i); 64 if (MO.getMBB() == OrigMBB) { 65 // Check if the instruction is actually defined in NewMBB. 66 if (MI.getOperand(i - 1).isReg()) { 67 MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(i - 1).getReg()); 68 if (DefMI->getParent() == NewMBB || 69 !OrigMBB->isSuccessor(Successor)) { 70 MO.setMBB(NewMBB); 71 break; 72 } 73 } 74 } 75 } 76 } 77 } 78 79 /// Given a basic block \p Successor that potentially contains PHIs, this 80 /// function will look for PHIs that have an incoming value from \p OrigMBB 81 /// and will add the same incoming value from \p NewMBB. 82 /// NOTE: This should only be used if \p NewMBB is an immediate dominator of 83 /// \p OrigMBB. 84 static void addIncomingValuesToPHIs(MachineBasicBlock *Successor, 85 MachineBasicBlock *OrigMBB, 86 MachineBasicBlock *NewMBB, 87 MachineRegisterInfo *MRI) { 88 assert(OrigMBB->isSuccessor(NewMBB) && 89 "NewMBB must be a successor of OrigMBB"); 90 for (auto &MI : Successor->instrs()) { 91 if (!MI.isPHI()) 92 continue; 93 // This is a really ugly-looking loop, but it was pillaged directly from 94 // MachineBasicBlock::transferSuccessorsAndUpdatePHIs(). 95 for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { 96 MachineOperand &MO = MI.getOperand(i); 97 if (MO.getMBB() == OrigMBB) { 98 MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); 99 MIB.addReg(MI.getOperand(i - 1).getReg()).addMBB(NewMBB); 100 break; 101 } 102 } 103 } 104 } 105 106 namespace { 107 struct BlockSplitInfo { 108 MachineInstr *OrigBranch; 109 MachineInstr *SplitBefore; 110 MachineInstr *SplitCond; 111 unsigned OrigSubreg; 112 unsigned SplitCondSubreg; 113 bool InvertNewBranch; 114 bool InvertOrigBranch; 115 bool BranchToFallThrough; 116 const MachineBranchProbabilityInfo *MBPI; 117 MachineInstr *MIToDelete; 118 MachineInstr *NewCond; 119 bool allInstrsInSameMBB() { 120 if (!OrigBranch || !SplitBefore || !SplitCond) 121 return false; 122 MachineBasicBlock *MBB = OrigBranch->getParent(); 123 if (SplitBefore->getParent() != MBB || SplitCond->getParent() != MBB) 124 return false; 125 if (MIToDelete && MIToDelete->getParent() != MBB) 126 return false; 127 if (NewCond && NewCond->getParent() != MBB) 128 return false; 129 return true; 130 } 131 }; 132 } // end anonymous namespace 133 134 /// Splits a MachineBasicBlock to branch before \p SplitBefore. The original 135 /// branch is \p OrigBranch. The target of the new branch can either be the same 136 /// as the target of the original branch or the fallthrough successor of the 137 /// original block as determined by \p BranchToFallThrough. The branch 138 /// conditions will be inverted according to \p InvertNewBranch and 139 /// \p InvertOrigBranch. If an instruction that previously fed the branch is to 140 /// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as 141 /// the branch condition. The branch probabilities will be set if the 142 /// MachineBranchProbabilityInfo isn't null. 143 static bool splitMBB(BlockSplitInfo &BSI) { 144 assert(BSI.allInstrsInSameMBB() && 145 "All instructions must be in the same block."); 146 147 MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent(); 148 MachineFunction *MF = ThisMBB->getParent(); 149 MachineRegisterInfo *MRI = &MF->getRegInfo(); 150 assert(MRI->isSSA() && "Can only do this while the function is in SSA form."); 151 if (ThisMBB->succ_size() != 2) { 152 LLVM_DEBUG( 153 dbgs() << "Don't know how to handle blocks that don't have exactly" 154 << " two successors.\n"); 155 return false; 156 } 157 158 const PPCInstrInfo *TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 159 unsigned OrigBROpcode = BSI.OrigBranch->getOpcode(); 160 unsigned InvertedOpcode = 161 OrigBROpcode == PPC::BC 162 ? PPC::BCn 163 : OrigBROpcode == PPC::BCn 164 ? PPC::BC 165 : OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR; 166 unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode; 167 MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB(); 168 MachineBasicBlock *OrigFallThrough = OrigTarget == *ThisMBB->succ_begin() 169 ? *ThisMBB->succ_rbegin() 170 : *ThisMBB->succ_begin(); 171 MachineBasicBlock *NewBRTarget = 172 BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget; 173 174 // It's impossible to know the precise branch probability after the split. 175 // But it still needs to be reasonable, the whole probability to original 176 // targets should not be changed. 177 // After split NewBRTarget will get two incoming edges. Assume P0 is the 178 // original branch probability to NewBRTarget, P1 and P2 are new branch 179 // probabilies to NewBRTarget after split. If the two edge frequencies are 180 // same, then 181 // F * P1 = F * P0 / 2 ==> P1 = P0 / 2 182 // F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1) 183 BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br. 184 BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br. 185 ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown(); 186 ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown(); 187 if (BSI.MBPI) { 188 if (BSI.BranchToFallThrough) { 189 ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2; 190 ProbFallThrough = ProbToNewTarget.getCompl(); 191 ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl(); 192 ProbOrigTarget = ProbOrigFallThrough.getCompl(); 193 } else { 194 ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2; 195 ProbFallThrough = ProbToNewTarget.getCompl(); 196 ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl(); 197 ProbOrigFallThrough = ProbOrigTarget.getCompl(); 198 } 199 } 200 201 // Create a new basic block. 202 MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore; 203 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); 204 MachineFunction::iterator It = ThisMBB->getIterator(); 205 MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB); 206 MF->insert(++It, NewMBB); 207 208 // Move everything after SplitBefore into the new block. 209 NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end()); 210 NewMBB->transferSuccessors(ThisMBB); 211 if (!ProbOrigTarget.isUnknown()) { 212 auto MBBI = find(NewMBB->successors(), OrigTarget); 213 NewMBB->setSuccProbability(MBBI, ProbOrigTarget); 214 MBBI = find(NewMBB->successors(), OrigFallThrough); 215 NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough); 216 } 217 218 // Add the two successors to ThisMBB. 219 ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget); 220 ThisMBB->addSuccessor(NewMBB, ProbFallThrough); 221 222 // Add the branches to ThisMBB. 223 BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), 224 TII->get(NewBROpcode)) 225 .addReg(BSI.SplitCond->getOperand(0).getReg(), 0, BSI.SplitCondSubreg) 226 .addMBB(NewBRTarget); 227 BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), 228 TII->get(PPC::B)) 229 .addMBB(NewMBB); 230 if (BSI.MIToDelete) 231 BSI.MIToDelete->eraseFromParent(); 232 233 // Change the condition on the original branch and invert it if requested. 234 auto FirstTerminator = NewMBB->getFirstTerminator(); 235 if (BSI.NewCond) { 236 assert(FirstTerminator->getOperand(0).isReg() && 237 "Can't update condition of unconditional branch."); 238 FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg()); 239 FirstTerminator->getOperand(0).setSubReg(BSI.OrigSubreg); 240 } 241 if (BSI.InvertOrigBranch) 242 FirstTerminator->setDesc(TII->get(InvertedOpcode)); 243 244 // If any of the PHIs in the successors of NewMBB reference values that 245 // now come from NewMBB, they need to be updated. 246 for (auto *Succ : NewMBB->successors()) { 247 updatePHIs(Succ, ThisMBB, NewMBB, MRI); 248 } 249 addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI); 250 251 // Set the call frame size on ThisMBB to the new basic blocks. 252 // See https://reviews.llvm.org/D156113. 253 NewMBB->setCallFrameSize(TII->getCallFrameSizeAt(ThisMBB->back())); 254 255 LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump()); 256 LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump()); 257 LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump()); 258 return true; 259 } 260 261 static bool isBinary(MachineInstr &MI) { 262 return MI.getNumOperands() == 3; 263 } 264 265 static bool isNullary(MachineInstr &MI) { 266 return MI.getNumOperands() == 1; 267 } 268 269 /// Given a CR logical operation \p CROp, branch opcode \p BROp as well as 270 /// a flag to indicate if the first operand of \p CROp is used as the 271 /// SplitBefore operand, determines whether either of the branches are to be 272 /// inverted as well as whether the new target should be the original 273 /// fall-through block. 274 static void 275 computeBranchTargetAndInversion(unsigned CROp, unsigned BROp, bool UsingDef1, 276 bool &InvertNewBranch, bool &InvertOrigBranch, 277 bool &TargetIsFallThrough) { 278 // The conditions under which each of the output operands should be [un]set 279 // can certainly be written much more concisely with just 3 if statements or 280 // ternary expressions. However, this provides a much clearer overview to the 281 // reader as to what is set for each <CROp, BROp, OpUsed> combination. 282 if (BROp == PPC::BC || BROp == PPC::BCLR) { 283 // Regular branches. 284 switch (CROp) { 285 default: 286 llvm_unreachable("Don't know how to handle this CR logical."); 287 case PPC::CROR: 288 InvertNewBranch = false; 289 InvertOrigBranch = false; 290 TargetIsFallThrough = false; 291 return; 292 case PPC::CRAND: 293 InvertNewBranch = true; 294 InvertOrigBranch = false; 295 TargetIsFallThrough = true; 296 return; 297 case PPC::CRNAND: 298 InvertNewBranch = true; 299 InvertOrigBranch = true; 300 TargetIsFallThrough = false; 301 return; 302 case PPC::CRNOR: 303 InvertNewBranch = false; 304 InvertOrigBranch = true; 305 TargetIsFallThrough = true; 306 return; 307 case PPC::CRORC: 308 InvertNewBranch = UsingDef1; 309 InvertOrigBranch = !UsingDef1; 310 TargetIsFallThrough = false; 311 return; 312 case PPC::CRANDC: 313 InvertNewBranch = !UsingDef1; 314 InvertOrigBranch = !UsingDef1; 315 TargetIsFallThrough = true; 316 return; 317 } 318 } else if (BROp == PPC::BCn || BROp == PPC::BCLRn) { 319 // Negated branches. 320 switch (CROp) { 321 default: 322 llvm_unreachable("Don't know how to handle this CR logical."); 323 case PPC::CROR: 324 InvertNewBranch = true; 325 InvertOrigBranch = false; 326 TargetIsFallThrough = true; 327 return; 328 case PPC::CRAND: 329 InvertNewBranch = false; 330 InvertOrigBranch = false; 331 TargetIsFallThrough = false; 332 return; 333 case PPC::CRNAND: 334 InvertNewBranch = false; 335 InvertOrigBranch = true; 336 TargetIsFallThrough = true; 337 return; 338 case PPC::CRNOR: 339 InvertNewBranch = true; 340 InvertOrigBranch = true; 341 TargetIsFallThrough = false; 342 return; 343 case PPC::CRORC: 344 InvertNewBranch = !UsingDef1; 345 InvertOrigBranch = !UsingDef1; 346 TargetIsFallThrough = true; 347 return; 348 case PPC::CRANDC: 349 InvertNewBranch = UsingDef1; 350 InvertOrigBranch = !UsingDef1; 351 TargetIsFallThrough = false; 352 return; 353 } 354 } else 355 llvm_unreachable("Don't know how to handle this branch."); 356 } 357 358 namespace { 359 360 class PPCReduceCRLogicals : public MachineFunctionPass { 361 public: 362 static char ID; 363 struct CRLogicalOpInfo { 364 MachineInstr *MI; 365 // FIXME: If chains of copies are to be handled, this should be a vector. 366 std::pair<MachineInstr*, MachineInstr*> CopyDefs; 367 std::pair<MachineInstr*, MachineInstr*> TrueDefs; 368 unsigned IsBinary : 1; 369 unsigned IsNullary : 1; 370 unsigned ContainedInBlock : 1; 371 unsigned FeedsISEL : 1; 372 unsigned FeedsBR : 1; 373 unsigned FeedsLogical : 1; 374 unsigned SingleUse : 1; 375 unsigned DefsSingleUse : 1; 376 unsigned SubregDef1; 377 unsigned SubregDef2; 378 CRLogicalOpInfo() : MI(nullptr), IsBinary(0), IsNullary(0), 379 ContainedInBlock(0), FeedsISEL(0), FeedsBR(0), 380 FeedsLogical(0), SingleUse(0), DefsSingleUse(1), 381 SubregDef1(0), SubregDef2(0) { } 382 void dump(); 383 }; 384 385 private: 386 const PPCInstrInfo *TII = nullptr; 387 MachineFunction *MF = nullptr; 388 MachineRegisterInfo *MRI = nullptr; 389 const MachineBranchProbabilityInfo *MBPI = nullptr; 390 391 // A vector to contain all the CR logical operations 392 SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps; 393 void initialize(MachineFunction &MFParm); 394 void collectCRLogicals(); 395 bool handleCROp(unsigned Idx); 396 bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI); 397 static bool isCRLogical(MachineInstr &MI) { 398 unsigned Opc = MI.getOpcode(); 399 return Opc == PPC::CRAND || Opc == PPC::CRNAND || Opc == PPC::CROR || 400 Opc == PPC::CRXOR || Opc == PPC::CRNOR || Opc == PPC::CRNOT || 401 Opc == PPC::CREQV || Opc == PPC::CRANDC || Opc == PPC::CRORC || 402 Opc == PPC::CRSET || Opc == PPC::CRUNSET || Opc == PPC::CR6SET || 403 Opc == PPC::CR6UNSET; 404 } 405 bool simplifyCode() { 406 bool Changed = false; 407 // Not using a range-based for loop here as the vector may grow while being 408 // operated on. 409 for (unsigned i = 0; i < AllCRLogicalOps.size(); i++) 410 Changed |= handleCROp(i); 411 return Changed; 412 } 413 414 public: 415 PPCReduceCRLogicals() : MachineFunctionPass(ID) {} 416 417 MachineInstr *lookThroughCRCopy(unsigned Reg, unsigned &Subreg, 418 MachineInstr *&CpDef); 419 bool runOnMachineFunction(MachineFunction &MF) override { 420 if (skipFunction(MF.getFunction())) 421 return false; 422 423 // If the subtarget doesn't use CR bits, there's nothing to do. 424 const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>(); 425 if (!STI.useCRBits()) 426 return false; 427 428 initialize(MF); 429 collectCRLogicals(); 430 return simplifyCode(); 431 } 432 CRLogicalOpInfo createCRLogicalOpInfo(MachineInstr &MI); 433 void getAnalysisUsage(AnalysisUsage &AU) const override { 434 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); 435 AU.addRequired<MachineDominatorTreeWrapperPass>(); 436 MachineFunctionPass::getAnalysisUsage(AU); 437 } 438 }; 439 } // end anonymous namespace 440 441 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 442 LLVM_DUMP_METHOD void PPCReduceCRLogicals::CRLogicalOpInfo::dump() { 443 dbgs() << "CRLogicalOpMI: "; 444 MI->dump(); 445 dbgs() << "IsBinary: " << IsBinary << ", FeedsISEL: " << FeedsISEL; 446 dbgs() << ", FeedsBR: " << FeedsBR << ", FeedsLogical: "; 447 dbgs() << FeedsLogical << ", SingleUse: " << SingleUse; 448 dbgs() << ", DefsSingleUse: " << DefsSingleUse; 449 dbgs() << ", SubregDef1: " << SubregDef1 << ", SubregDef2: "; 450 dbgs() << SubregDef2 << ", ContainedInBlock: " << ContainedInBlock; 451 if (!IsNullary) { 452 dbgs() << "\nDefs:\n"; 453 TrueDefs.first->dump(); 454 } 455 if (IsBinary) 456 TrueDefs.second->dump(); 457 dbgs() << "\n"; 458 if (CopyDefs.first) { 459 dbgs() << "CopyDef1: "; 460 CopyDefs.first->dump(); 461 } 462 if (CopyDefs.second) { 463 dbgs() << "CopyDef2: "; 464 CopyDefs.second->dump(); 465 } 466 } 467 #endif 468 469 PPCReduceCRLogicals::CRLogicalOpInfo 470 PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) { 471 CRLogicalOpInfo Ret; 472 Ret.MI = &MIParam; 473 // Get the defs 474 if (isNullary(MIParam)) { 475 Ret.IsNullary = 1; 476 Ret.TrueDefs = std::make_pair(nullptr, nullptr); 477 Ret.CopyDefs = std::make_pair(nullptr, nullptr); 478 } else { 479 MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(), 480 Ret.SubregDef1, Ret.CopyDefs.first); 481 Ret.SubregDef1 = MIParam.getOperand(1).getSubReg(); 482 assert(Def1 && "Must be able to find a definition of operand 1."); 483 Ret.DefsSingleUse &= 484 MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg()); 485 Ret.DefsSingleUse &= 486 MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg()); 487 if (isBinary(MIParam)) { 488 Ret.IsBinary = 1; 489 MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(), 490 Ret.SubregDef2, 491 Ret.CopyDefs.second); 492 Ret.SubregDef2 = MIParam.getOperand(2).getSubReg(); 493 assert(Def2 && "Must be able to find a definition of operand 2."); 494 Ret.DefsSingleUse &= 495 MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg()); 496 Ret.DefsSingleUse &= 497 MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg()); 498 Ret.TrueDefs = std::make_pair(Def1, Def2); 499 } else { 500 Ret.TrueDefs = std::make_pair(Def1, nullptr); 501 Ret.CopyDefs.second = nullptr; 502 } 503 } 504 505 Ret.ContainedInBlock = 1; 506 // Get the uses 507 for (MachineInstr &UseMI : 508 MRI->use_nodbg_instructions(MIParam.getOperand(0).getReg())) { 509 unsigned Opc = UseMI.getOpcode(); 510 if (Opc == PPC::ISEL || Opc == PPC::ISEL8) 511 Ret.FeedsISEL = 1; 512 if (Opc == PPC::BC || Opc == PPC::BCn || Opc == PPC::BCLR || 513 Opc == PPC::BCLRn) 514 Ret.FeedsBR = 1; 515 Ret.FeedsLogical = isCRLogical(UseMI); 516 if (UseMI.getParent() != MIParam.getParent()) 517 Ret.ContainedInBlock = 0; 518 } 519 Ret.SingleUse = MRI->hasOneNonDBGUse(MIParam.getOperand(0).getReg()) ? 1 : 0; 520 521 // We now know whether all the uses of the CR logical are in the same block. 522 if (!Ret.IsNullary) { 523 Ret.ContainedInBlock &= 524 (MIParam.getParent() == Ret.TrueDefs.first->getParent()); 525 if (Ret.IsBinary) 526 Ret.ContainedInBlock &= 527 (MIParam.getParent() == Ret.TrueDefs.second->getParent()); 528 } 529 LLVM_DEBUG(Ret.dump()); 530 if (Ret.IsBinary && Ret.ContainedInBlock && Ret.SingleUse) { 531 NumContainedSingleUseBinOps++; 532 if (Ret.FeedsBR && Ret.DefsSingleUse) 533 NumToSplitBlocks++; 534 } 535 return Ret; 536 } 537 538 /// Looks through a COPY instruction to the actual definition of the CR-bit 539 /// register and returns the instruction that defines it. 540 /// FIXME: This currently handles what is by-far the most common case: 541 /// an instruction that defines a CR field followed by a single copy of a bit 542 /// from that field into a virtual register. If chains of copies need to be 543 /// handled, this should have a loop until a non-copy instruction is found. 544 MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg, 545 unsigned &Subreg, 546 MachineInstr *&CpDef) { 547 if (!Register::isVirtualRegister(Reg)) 548 return nullptr; 549 MachineInstr *Copy = MRI->getVRegDef(Reg); 550 CpDef = Copy; 551 if (!Copy->isCopy()) 552 return Copy; 553 Register CopySrc = Copy->getOperand(1).getReg(); 554 if (!CopySrc.isVirtual()) { 555 const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); 556 // Loop backwards and return the first MI that modifies the physical CR Reg. 557 MachineBasicBlock::iterator Me = Copy, B = Copy->getParent()->begin(); 558 while (Me != B) 559 if ((--Me)->modifiesRegister(CopySrc, TRI)) 560 return &*Me; 561 return nullptr; 562 } 563 return MRI->getVRegDef(CopySrc); 564 } 565 566 void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) { 567 MF = &MFParam; 568 MRI = &MF->getRegInfo(); 569 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 570 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); 571 572 AllCRLogicalOps.clear(); 573 } 574 575 /// Contains all the implemented transformations on CR logical operations. 576 /// For example, a binary CR logical can be used to split a block on its inputs, 577 /// a unary CR logical might be used to change the condition code on a 578 /// comparison feeding it. A nullary CR logical might simply be removable 579 /// if the user of the bit it [un]sets can be transformed. 580 bool PPCReduceCRLogicals::handleCROp(unsigned Idx) { 581 // We can definitely split a block on the inputs to a binary CR operation 582 // whose defs and (single) use are within the same block. 583 bool Changed = false; 584 CRLogicalOpInfo CRI = AllCRLogicalOps[Idx]; 585 if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR && 586 CRI.DefsSingleUse) { 587 Changed = splitBlockOnBinaryCROp(CRI); 588 if (Changed) 589 NumBlocksSplitOnBinaryCROp++; 590 } 591 return Changed; 592 } 593 594 /// Splits a block that contains a CR-logical operation that feeds a branch 595 /// and whose operands are produced within the block. 596 /// Example: 597 /// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2 598 /// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5 599 /// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3 600 /// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7 601 /// %vr9<def> = CROR %vr6<kill>, %vr8<kill>; CRBITRC:%vr9,%vr6,%vr8 602 /// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9 603 /// Becomes: 604 /// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2 605 /// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5 606 /// BC %vr6<kill>, <BB#2>; CRBITRC:%vr6 607 /// 608 /// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3 609 /// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7 610 /// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9 611 bool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI) { 612 if (CRI.CopyDefs.first == CRI.CopyDefs.second) { 613 LLVM_DEBUG(dbgs() << "Unable to split as the two operands are the same\n"); 614 NumNotSplitIdenticalOperands++; 615 return false; 616 } 617 if (CRI.TrueDefs.first->isCopy() || CRI.TrueDefs.second->isCopy() || 618 CRI.TrueDefs.first->isPHI() || CRI.TrueDefs.second->isPHI()) { 619 LLVM_DEBUG( 620 dbgs() << "Unable to split because one of the operands is a PHI or " 621 "chain of copies.\n"); 622 NumNotSplitChainCopies++; 623 return false; 624 } 625 // Note: keep in sync with computeBranchTargetAndInversion(). 626 if (CRI.MI->getOpcode() != PPC::CROR && 627 CRI.MI->getOpcode() != PPC::CRAND && 628 CRI.MI->getOpcode() != PPC::CRNOR && 629 CRI.MI->getOpcode() != PPC::CRNAND && 630 CRI.MI->getOpcode() != PPC::CRORC && 631 CRI.MI->getOpcode() != PPC::CRANDC) { 632 LLVM_DEBUG(dbgs() << "Unable to split blocks on this opcode.\n"); 633 NumNotSplitWrongOpcode++; 634 return false; 635 } 636 LLVM_DEBUG(dbgs() << "Splitting the following CR op:\n"; CRI.dump()); 637 MachineBasicBlock::iterator Def1It = CRI.TrueDefs.first; 638 MachineBasicBlock::iterator Def2It = CRI.TrueDefs.second; 639 640 bool UsingDef1 = false; 641 MachineInstr *SplitBefore = &*Def2It; 642 for (auto E = CRI.MI->getParent()->end(); Def2It != E; ++Def2It) { 643 if (Def1It == Def2It) { // Def2 comes before Def1. 644 SplitBefore = &*Def1It; 645 UsingDef1 = true; 646 break; 647 } 648 } 649 650 LLVM_DEBUG(dbgs() << "We will split the following block:\n";); 651 LLVM_DEBUG(CRI.MI->getParent()->dump()); 652 LLVM_DEBUG(dbgs() << "Before instruction:\n"; SplitBefore->dump()); 653 654 // Get the branch instruction. 655 MachineInstr *Branch = 656 MRI->use_nodbg_begin(CRI.MI->getOperand(0).getReg())->getParent(); 657 658 // We want the new block to have no code in it other than the definition 659 // of the input to the CR logical and the CR logical itself. So we move 660 // those to the bottom of the block (just before the branch). Then we 661 // will split before the CR logical. 662 MachineBasicBlock *MBB = SplitBefore->getParent(); 663 auto FirstTerminator = MBB->getFirstTerminator(); 664 MachineBasicBlock::iterator FirstInstrToMove = 665 UsingDef1 ? CRI.TrueDefs.first : CRI.TrueDefs.second; 666 MachineBasicBlock::iterator SecondInstrToMove = 667 UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second; 668 669 // The instructions that need to be moved are not guaranteed to be 670 // contiguous. Move them individually. 671 // FIXME: If one of the operands is a chain of (single use) copies, they 672 // can all be moved and we can still split. 673 MBB->splice(FirstTerminator, MBB, FirstInstrToMove); 674 if (FirstInstrToMove != SecondInstrToMove) 675 MBB->splice(FirstTerminator, MBB, SecondInstrToMove); 676 MBB->splice(FirstTerminator, MBB, CRI.MI); 677 678 unsigned Opc = CRI.MI->getOpcode(); 679 bool InvertOrigBranch, InvertNewBranch, TargetIsFallThrough; 680 computeBranchTargetAndInversion(Opc, Branch->getOpcode(), UsingDef1, 681 InvertNewBranch, InvertOrigBranch, 682 TargetIsFallThrough); 683 MachineInstr *NewCond = CRI.CopyDefs.first; 684 MachineInstr *SplitCond = CRI.CopyDefs.second; 685 if (!UsingDef1) { 686 std::swap(NewCond, SplitCond); 687 std::swap(CRI.SubregDef1, CRI.SubregDef2); 688 } 689 LLVM_DEBUG(dbgs() << "We will " << (InvertNewBranch ? "invert" : "copy")); 690 LLVM_DEBUG(dbgs() << " the original branch and the target is the " 691 << (TargetIsFallThrough ? "fallthrough block\n" 692 : "orig. target block\n")); 693 LLVM_DEBUG(dbgs() << "Original branch instruction: "; Branch->dump()); 694 BlockSplitInfo BSI{ 695 Branch, SplitBefore, SplitCond, CRI.SubregDef1, 696 CRI.SubregDef2, InvertNewBranch, InvertOrigBranch, TargetIsFallThrough, 697 MBPI, CRI.MI, NewCond}; 698 bool Changed = splitMBB(BSI); 699 // If we've split on a CR logical that is fed by a CR logical, 700 // recompute the source CR logical as it may be usable for splitting. 701 if (Changed) { 702 bool Input1CRlogical = 703 CRI.TrueDefs.first && isCRLogical(*CRI.TrueDefs.first); 704 bool Input2CRlogical = 705 CRI.TrueDefs.second && isCRLogical(*CRI.TrueDefs.second); 706 if (Input1CRlogical) 707 AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.first)); 708 if (Input2CRlogical) 709 AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.second)); 710 } 711 return Changed; 712 } 713 714 void PPCReduceCRLogicals::collectCRLogicals() { 715 for (MachineBasicBlock &MBB : *MF) { 716 for (MachineInstr &MI : MBB) { 717 if (isCRLogical(MI)) { 718 AllCRLogicalOps.push_back(createCRLogicalOpInfo(MI)); 719 TotalCRLogicals++; 720 if (AllCRLogicalOps.back().IsNullary) 721 TotalNullaryCRLogicals++; 722 else if (AllCRLogicalOps.back().IsBinary) 723 TotalBinaryCRLogicals++; 724 else 725 TotalUnaryCRLogicals++; 726 } 727 } 728 } 729 } 730 731 INITIALIZE_PASS_BEGIN(PPCReduceCRLogicals, DEBUG_TYPE, 732 "PowerPC Reduce CR logical Operation", false, false) 733 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 734 INITIALIZE_PASS_END(PPCReduceCRLogicals, DEBUG_TYPE, 735 "PowerPC Reduce CR logical Operation", false, false) 736 737 char PPCReduceCRLogicals::ID = 0; 738 FunctionPass* 739 llvm::createPPCReduceCRLogicalsPass() { return new PPCReduceCRLogicals(); } 740