1 //===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===---------------------------------------------------------------------===// 8 // 9 // This pass aims to reduce the number of logical operations on bits in the CR 10 // register. These instructions have a fairly high latency and only a single 11 // pipeline at their disposal in modern PPC cores. Furthermore, they have a 12 // tendency to occur in fairly small blocks where there's little opportunity 13 // to hide the latency between the CR logical operation and its user. 14 // 15 //===---------------------------------------------------------------------===// 16 17 #include "PPC.h" 18 #include "PPCInstrInfo.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 22 #include "llvm/CodeGen/MachineDominators.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/Config/llvm-config.h" 27 #include "llvm/InitializePasses.h" 28 #include "llvm/Support/Debug.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "ppc-reduce-cr-ops" 33 34 STATISTIC(NumContainedSingleUseBinOps, 35 "Number of single-use binary CR logical ops contained in a block"); 36 STATISTIC(NumToSplitBlocks, 37 "Number of binary CR logical ops that can be used to split blocks"); 38 STATISTIC(TotalCRLogicals, "Number of CR logical ops."); 39 STATISTIC(TotalNullaryCRLogicals, 40 "Number of nullary CR logical ops (CRSET/CRUNSET)."); 41 STATISTIC(TotalUnaryCRLogicals, "Number of unary CR logical ops."); 42 STATISTIC(TotalBinaryCRLogicals, "Number of CR logical ops."); 43 STATISTIC(NumBlocksSplitOnBinaryCROp, 44 "Number of blocks split on CR binary logical ops."); 45 STATISTIC(NumNotSplitIdenticalOperands, 46 "Number of blocks not split due to operands being identical."); 47 STATISTIC(NumNotSplitChainCopies, 48 "Number of blocks not split due to operands being chained copies."); 49 STATISTIC(NumNotSplitWrongOpcode, 50 "Number of blocks not split due to the wrong opcode."); 51 52 /// Given a basic block \p Successor that potentially contains PHIs, this 53 /// function will look for any incoming values in the PHIs that are supposed to 54 /// be coming from \p OrigMBB but whose definition is actually in \p NewMBB. 55 /// Any such PHIs will be updated to reflect reality. 56 static void updatePHIs(MachineBasicBlock *Successor, MachineBasicBlock *OrigMBB, 57 MachineBasicBlock *NewMBB, MachineRegisterInfo *MRI) { 58 for (auto &MI : Successor->instrs()) { 59 if (!MI.isPHI()) 60 continue; 61 // This is a really ugly-looking loop, but it was pillaged directly from 62 // MachineBasicBlock::transferSuccessorsAndUpdatePHIs(). 63 for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { 64 MachineOperand &MO = MI.getOperand(i); 65 if (MO.getMBB() == OrigMBB) { 66 // Check if the instruction is actually defined in NewMBB. 67 if (MI.getOperand(i - 1).isReg()) { 68 MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(i - 1).getReg()); 69 if (DefMI->getParent() == NewMBB || 70 !OrigMBB->isSuccessor(Successor)) { 71 MO.setMBB(NewMBB); 72 break; 73 } 74 } 75 } 76 } 77 } 78 } 79 80 /// Given a basic block \p Successor that potentially contains PHIs, this 81 /// function will look for PHIs that have an incoming value from \p OrigMBB 82 /// and will add the same incoming value from \p NewMBB. 83 /// NOTE: This should only be used if \p NewMBB is an immediate dominator of 84 /// \p OrigMBB. 85 static void addIncomingValuesToPHIs(MachineBasicBlock *Successor, 86 MachineBasicBlock *OrigMBB, 87 MachineBasicBlock *NewMBB, 88 MachineRegisterInfo *MRI) { 89 assert(OrigMBB->isSuccessor(NewMBB) && 90 "NewMBB must be a successor of OrigMBB"); 91 for (auto &MI : Successor->instrs()) { 92 if (!MI.isPHI()) 93 continue; 94 // This is a really ugly-looking loop, but it was pillaged directly from 95 // MachineBasicBlock::transferSuccessorsAndUpdatePHIs(). 96 for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) { 97 MachineOperand &MO = MI.getOperand(i); 98 if (MO.getMBB() == OrigMBB) { 99 MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI); 100 MIB.addReg(MI.getOperand(i - 1).getReg()).addMBB(NewMBB); 101 break; 102 } 103 } 104 } 105 } 106 107 struct BlockSplitInfo { 108 MachineInstr *OrigBranch; 109 MachineInstr *SplitBefore; 110 MachineInstr *SplitCond; 111 bool InvertNewBranch; 112 bool InvertOrigBranch; 113 bool BranchToFallThrough; 114 const MachineBranchProbabilityInfo *MBPI; 115 MachineInstr *MIToDelete; 116 MachineInstr *NewCond; 117 bool allInstrsInSameMBB() { 118 if (!OrigBranch || !SplitBefore || !SplitCond) 119 return false; 120 MachineBasicBlock *MBB = OrigBranch->getParent(); 121 if (SplitBefore->getParent() != MBB || SplitCond->getParent() != MBB) 122 return false; 123 if (MIToDelete && MIToDelete->getParent() != MBB) 124 return false; 125 if (NewCond && NewCond->getParent() != MBB) 126 return false; 127 return true; 128 } 129 }; 130 131 /// Splits a MachineBasicBlock to branch before \p SplitBefore. The original 132 /// branch is \p OrigBranch. The target of the new branch can either be the same 133 /// as the target of the original branch or the fallthrough successor of the 134 /// original block as determined by \p BranchToFallThrough. The branch 135 /// conditions will be inverted according to \p InvertNewBranch and 136 /// \p InvertOrigBranch. If an instruction that previously fed the branch is to 137 /// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as 138 /// the branch condition. The branch probabilities will be set if the 139 /// MachineBranchProbabilityInfo isn't null. 140 static bool splitMBB(BlockSplitInfo &BSI) { 141 assert(BSI.allInstrsInSameMBB() && 142 "All instructions must be in the same block."); 143 144 MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent(); 145 MachineFunction *MF = ThisMBB->getParent(); 146 MachineRegisterInfo *MRI = &MF->getRegInfo(); 147 assert(MRI->isSSA() && "Can only do this while the function is in SSA form."); 148 if (ThisMBB->succ_size() != 2) { 149 LLVM_DEBUG( 150 dbgs() << "Don't know how to handle blocks that don't have exactly" 151 << " two successors.\n"); 152 return false; 153 } 154 155 const PPCInstrInfo *TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 156 unsigned OrigBROpcode = BSI.OrigBranch->getOpcode(); 157 unsigned InvertedOpcode = 158 OrigBROpcode == PPC::BC 159 ? PPC::BCn 160 : OrigBROpcode == PPC::BCn 161 ? PPC::BC 162 : OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR; 163 unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode; 164 MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB(); 165 MachineBasicBlock *OrigFallThrough = OrigTarget == *ThisMBB->succ_begin() 166 ? *ThisMBB->succ_rbegin() 167 : *ThisMBB->succ_begin(); 168 MachineBasicBlock *NewBRTarget = 169 BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget; 170 171 // It's impossible to know the precise branch probability after the split. 172 // But it still needs to be reasonable, the whole probability to original 173 // targets should not be changed. 174 // After split NewBRTarget will get two incoming edges. Assume P0 is the 175 // original branch probability to NewBRTarget, P1 and P2 are new branch 176 // probabilies to NewBRTarget after split. If the two edge frequencies are 177 // same, then 178 // F * P1 = F * P0 / 2 ==> P1 = P0 / 2 179 // F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1) 180 BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br. 181 BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br. 182 ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown(); 183 ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown(); 184 if (BSI.MBPI) { 185 if (BSI.BranchToFallThrough) { 186 ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2; 187 ProbFallThrough = ProbToNewTarget.getCompl(); 188 ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl(); 189 ProbOrigTarget = ProbOrigFallThrough.getCompl(); 190 } else { 191 ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2; 192 ProbFallThrough = ProbToNewTarget.getCompl(); 193 ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl(); 194 ProbOrigFallThrough = ProbOrigTarget.getCompl(); 195 } 196 } 197 198 // Create a new basic block. 199 MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore; 200 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); 201 MachineFunction::iterator It = ThisMBB->getIterator(); 202 MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB); 203 MF->insert(++It, NewMBB); 204 205 // Move everything after SplitBefore into the new block. 206 NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end()); 207 NewMBB->transferSuccessors(ThisMBB); 208 if (!ProbOrigTarget.isUnknown()) { 209 auto MBBI = find(NewMBB->successors(), OrigTarget); 210 NewMBB->setSuccProbability(MBBI, ProbOrigTarget); 211 MBBI = find(NewMBB->successors(), OrigFallThrough); 212 NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough); 213 } 214 215 // Add the two successors to ThisMBB. 216 ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget); 217 ThisMBB->addSuccessor(NewMBB, ProbFallThrough); 218 219 // Add the branches to ThisMBB. 220 BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), 221 TII->get(NewBROpcode)) 222 .addReg(BSI.SplitCond->getOperand(0).getReg()) 223 .addMBB(NewBRTarget); 224 BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(), 225 TII->get(PPC::B)) 226 .addMBB(NewMBB); 227 if (BSI.MIToDelete) 228 BSI.MIToDelete->eraseFromParent(); 229 230 // Change the condition on the original branch and invert it if requested. 231 auto FirstTerminator = NewMBB->getFirstTerminator(); 232 if (BSI.NewCond) { 233 assert(FirstTerminator->getOperand(0).isReg() && 234 "Can't update condition of unconditional branch."); 235 FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg()); 236 } 237 if (BSI.InvertOrigBranch) 238 FirstTerminator->setDesc(TII->get(InvertedOpcode)); 239 240 // If any of the PHIs in the successors of NewMBB reference values that 241 // now come from NewMBB, they need to be updated. 242 for (auto *Succ : NewMBB->successors()) { 243 updatePHIs(Succ, ThisMBB, NewMBB, MRI); 244 } 245 addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI); 246 247 LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump()); 248 LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump()); 249 LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump()); 250 return true; 251 } 252 253 static bool isBinary(MachineInstr &MI) { 254 return MI.getNumOperands() == 3; 255 } 256 257 static bool isNullary(MachineInstr &MI) { 258 return MI.getNumOperands() == 1; 259 } 260 261 /// Given a CR logical operation \p CROp, branch opcode \p BROp as well as 262 /// a flag to indicate if the first operand of \p CROp is used as the 263 /// SplitBefore operand, determines whether either of the branches are to be 264 /// inverted as well as whether the new target should be the original 265 /// fall-through block. 266 static void 267 computeBranchTargetAndInversion(unsigned CROp, unsigned BROp, bool UsingDef1, 268 bool &InvertNewBranch, bool &InvertOrigBranch, 269 bool &TargetIsFallThrough) { 270 // The conditions under which each of the output operands should be [un]set 271 // can certainly be written much more concisely with just 3 if statements or 272 // ternary expressions. However, this provides a much clearer overview to the 273 // reader as to what is set for each <CROp, BROp, OpUsed> combination. 274 if (BROp == PPC::BC || BROp == PPC::BCLR) { 275 // Regular branches. 276 switch (CROp) { 277 default: 278 llvm_unreachable("Don't know how to handle this CR logical."); 279 case PPC::CROR: 280 InvertNewBranch = false; 281 InvertOrigBranch = false; 282 TargetIsFallThrough = false; 283 return; 284 case PPC::CRAND: 285 InvertNewBranch = true; 286 InvertOrigBranch = false; 287 TargetIsFallThrough = true; 288 return; 289 case PPC::CRNAND: 290 InvertNewBranch = true; 291 InvertOrigBranch = true; 292 TargetIsFallThrough = false; 293 return; 294 case PPC::CRNOR: 295 InvertNewBranch = false; 296 InvertOrigBranch = true; 297 TargetIsFallThrough = true; 298 return; 299 case PPC::CRORC: 300 InvertNewBranch = UsingDef1; 301 InvertOrigBranch = !UsingDef1; 302 TargetIsFallThrough = false; 303 return; 304 case PPC::CRANDC: 305 InvertNewBranch = !UsingDef1; 306 InvertOrigBranch = !UsingDef1; 307 TargetIsFallThrough = true; 308 return; 309 } 310 } else if (BROp == PPC::BCn || BROp == PPC::BCLRn) { 311 // Negated branches. 312 switch (CROp) { 313 default: 314 llvm_unreachable("Don't know how to handle this CR logical."); 315 case PPC::CROR: 316 InvertNewBranch = true; 317 InvertOrigBranch = false; 318 TargetIsFallThrough = true; 319 return; 320 case PPC::CRAND: 321 InvertNewBranch = false; 322 InvertOrigBranch = false; 323 TargetIsFallThrough = false; 324 return; 325 case PPC::CRNAND: 326 InvertNewBranch = false; 327 InvertOrigBranch = true; 328 TargetIsFallThrough = true; 329 return; 330 case PPC::CRNOR: 331 InvertNewBranch = true; 332 InvertOrigBranch = true; 333 TargetIsFallThrough = false; 334 return; 335 case PPC::CRORC: 336 InvertNewBranch = !UsingDef1; 337 InvertOrigBranch = !UsingDef1; 338 TargetIsFallThrough = true; 339 return; 340 case PPC::CRANDC: 341 InvertNewBranch = UsingDef1; 342 InvertOrigBranch = !UsingDef1; 343 TargetIsFallThrough = false; 344 return; 345 } 346 } else 347 llvm_unreachable("Don't know how to handle this branch."); 348 } 349 350 namespace { 351 352 class PPCReduceCRLogicals : public MachineFunctionPass { 353 354 public: 355 static char ID; 356 struct CRLogicalOpInfo { 357 MachineInstr *MI; 358 // FIXME: If chains of copies are to be handled, this should be a vector. 359 std::pair<MachineInstr*, MachineInstr*> CopyDefs; 360 std::pair<MachineInstr*, MachineInstr*> TrueDefs; 361 unsigned IsBinary : 1; 362 unsigned IsNullary : 1; 363 unsigned ContainedInBlock : 1; 364 unsigned FeedsISEL : 1; 365 unsigned FeedsBR : 1; 366 unsigned FeedsLogical : 1; 367 unsigned SingleUse : 1; 368 unsigned DefsSingleUse : 1; 369 unsigned SubregDef1; 370 unsigned SubregDef2; 371 CRLogicalOpInfo() : MI(nullptr), IsBinary(0), IsNullary(0), 372 ContainedInBlock(0), FeedsISEL(0), FeedsBR(0), 373 FeedsLogical(0), SingleUse(0), DefsSingleUse(1), 374 SubregDef1(0), SubregDef2(0) { } 375 void dump(); 376 }; 377 378 private: 379 const PPCInstrInfo *TII = nullptr; 380 MachineFunction *MF = nullptr; 381 MachineRegisterInfo *MRI = nullptr; 382 const MachineBranchProbabilityInfo *MBPI = nullptr; 383 384 // A vector to contain all the CR logical operations 385 SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps; 386 void initialize(MachineFunction &MFParm); 387 void collectCRLogicals(); 388 bool handleCROp(unsigned Idx); 389 bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI); 390 static bool isCRLogical(MachineInstr &MI) { 391 unsigned Opc = MI.getOpcode(); 392 return Opc == PPC::CRAND || Opc == PPC::CRNAND || Opc == PPC::CROR || 393 Opc == PPC::CRXOR || Opc == PPC::CRNOR || Opc == PPC::CRNOT || 394 Opc == PPC::CREQV || Opc == PPC::CRANDC || Opc == PPC::CRORC || 395 Opc == PPC::CRSET || Opc == PPC::CRUNSET || Opc == PPC::CR6SET || 396 Opc == PPC::CR6UNSET; 397 } 398 bool simplifyCode() { 399 bool Changed = false; 400 // Not using a range-based for loop here as the vector may grow while being 401 // operated on. 402 for (unsigned i = 0; i < AllCRLogicalOps.size(); i++) 403 Changed |= handleCROp(i); 404 return Changed; 405 } 406 407 public: 408 PPCReduceCRLogicals() : MachineFunctionPass(ID) { 409 initializePPCReduceCRLogicalsPass(*PassRegistry::getPassRegistry()); 410 } 411 412 MachineInstr *lookThroughCRCopy(unsigned Reg, unsigned &Subreg, 413 MachineInstr *&CpDef); 414 bool runOnMachineFunction(MachineFunction &MF) override { 415 if (skipFunction(MF.getFunction())) 416 return false; 417 418 // If the subtarget doesn't use CR bits, there's nothing to do. 419 const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>(); 420 if (!STI.useCRBits()) 421 return false; 422 423 initialize(MF); 424 collectCRLogicals(); 425 return simplifyCode(); 426 } 427 CRLogicalOpInfo createCRLogicalOpInfo(MachineInstr &MI); 428 void getAnalysisUsage(AnalysisUsage &AU) const override { 429 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>(); 430 AU.addRequired<MachineDominatorTreeWrapperPass>(); 431 MachineFunctionPass::getAnalysisUsage(AU); 432 } 433 }; 434 435 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 436 LLVM_DUMP_METHOD void PPCReduceCRLogicals::CRLogicalOpInfo::dump() { 437 dbgs() << "CRLogicalOpMI: "; 438 MI->dump(); 439 dbgs() << "IsBinary: " << IsBinary << ", FeedsISEL: " << FeedsISEL; 440 dbgs() << ", FeedsBR: " << FeedsBR << ", FeedsLogical: "; 441 dbgs() << FeedsLogical << ", SingleUse: " << SingleUse; 442 dbgs() << ", DefsSingleUse: " << DefsSingleUse; 443 dbgs() << ", SubregDef1: " << SubregDef1 << ", SubregDef2: "; 444 dbgs() << SubregDef2 << ", ContainedInBlock: " << ContainedInBlock; 445 if (!IsNullary) { 446 dbgs() << "\nDefs:\n"; 447 TrueDefs.first->dump(); 448 } 449 if (IsBinary) 450 TrueDefs.second->dump(); 451 dbgs() << "\n"; 452 if (CopyDefs.first) { 453 dbgs() << "CopyDef1: "; 454 CopyDefs.first->dump(); 455 } 456 if (CopyDefs.second) { 457 dbgs() << "CopyDef2: "; 458 CopyDefs.second->dump(); 459 } 460 } 461 #endif 462 463 PPCReduceCRLogicals::CRLogicalOpInfo 464 PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) { 465 CRLogicalOpInfo Ret; 466 Ret.MI = &MIParam; 467 // Get the defs 468 if (isNullary(MIParam)) { 469 Ret.IsNullary = 1; 470 Ret.TrueDefs = std::make_pair(nullptr, nullptr); 471 Ret.CopyDefs = std::make_pair(nullptr, nullptr); 472 } else { 473 MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(), 474 Ret.SubregDef1, Ret.CopyDefs.first); 475 assert(Def1 && "Must be able to find a definition of operand 1."); 476 Ret.DefsSingleUse &= 477 MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg()); 478 Ret.DefsSingleUse &= 479 MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg()); 480 if (isBinary(MIParam)) { 481 Ret.IsBinary = 1; 482 MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(), 483 Ret.SubregDef2, 484 Ret.CopyDefs.second); 485 assert(Def2 && "Must be able to find a definition of operand 2."); 486 Ret.DefsSingleUse &= 487 MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg()); 488 Ret.DefsSingleUse &= 489 MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg()); 490 Ret.TrueDefs = std::make_pair(Def1, Def2); 491 } else { 492 Ret.TrueDefs = std::make_pair(Def1, nullptr); 493 Ret.CopyDefs.second = nullptr; 494 } 495 } 496 497 Ret.ContainedInBlock = 1; 498 // Get the uses 499 for (MachineInstr &UseMI : 500 MRI->use_nodbg_instructions(MIParam.getOperand(0).getReg())) { 501 unsigned Opc = UseMI.getOpcode(); 502 if (Opc == PPC::ISEL || Opc == PPC::ISEL8) 503 Ret.FeedsISEL = 1; 504 if (Opc == PPC::BC || Opc == PPC::BCn || Opc == PPC::BCLR || 505 Opc == PPC::BCLRn) 506 Ret.FeedsBR = 1; 507 Ret.FeedsLogical = isCRLogical(UseMI); 508 if (UseMI.getParent() != MIParam.getParent()) 509 Ret.ContainedInBlock = 0; 510 } 511 Ret.SingleUse = MRI->hasOneNonDBGUse(MIParam.getOperand(0).getReg()) ? 1 : 0; 512 513 // We now know whether all the uses of the CR logical are in the same block. 514 if (!Ret.IsNullary) { 515 Ret.ContainedInBlock &= 516 (MIParam.getParent() == Ret.TrueDefs.first->getParent()); 517 if (Ret.IsBinary) 518 Ret.ContainedInBlock &= 519 (MIParam.getParent() == Ret.TrueDefs.second->getParent()); 520 } 521 LLVM_DEBUG(Ret.dump()); 522 if (Ret.IsBinary && Ret.ContainedInBlock && Ret.SingleUse) { 523 NumContainedSingleUseBinOps++; 524 if (Ret.FeedsBR && Ret.DefsSingleUse) 525 NumToSplitBlocks++; 526 } 527 return Ret; 528 } 529 530 /// Looks through a COPY instruction to the actual definition of the CR-bit 531 /// register and returns the instruction that defines it. 532 /// FIXME: This currently handles what is by-far the most common case: 533 /// an instruction that defines a CR field followed by a single copy of a bit 534 /// from that field into a virtual register. If chains of copies need to be 535 /// handled, this should have a loop until a non-copy instruction is found. 536 MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg, 537 unsigned &Subreg, 538 MachineInstr *&CpDef) { 539 Subreg = -1; 540 if (!Register::isVirtualRegister(Reg)) 541 return nullptr; 542 MachineInstr *Copy = MRI->getVRegDef(Reg); 543 CpDef = Copy; 544 if (!Copy->isCopy()) 545 return Copy; 546 Register CopySrc = Copy->getOperand(1).getReg(); 547 Subreg = Copy->getOperand(1).getSubReg(); 548 if (!CopySrc.isVirtual()) { 549 const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); 550 // Set the Subreg 551 if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ) 552 Subreg = PPC::sub_eq; 553 if (CopySrc == PPC::CR0LT || CopySrc == PPC::CR6LT) 554 Subreg = PPC::sub_lt; 555 if (CopySrc == PPC::CR0GT || CopySrc == PPC::CR6GT) 556 Subreg = PPC::sub_gt; 557 if (CopySrc == PPC::CR0UN || CopySrc == PPC::CR6UN) 558 Subreg = PPC::sub_un; 559 // Loop backwards and return the first MI that modifies the physical CR Reg. 560 MachineBasicBlock::iterator Me = Copy, B = Copy->getParent()->begin(); 561 while (Me != B) 562 if ((--Me)->modifiesRegister(CopySrc, TRI)) 563 return &*Me; 564 return nullptr; 565 } 566 return MRI->getVRegDef(CopySrc); 567 } 568 569 void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) { 570 MF = &MFParam; 571 MRI = &MF->getRegInfo(); 572 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 573 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(); 574 575 AllCRLogicalOps.clear(); 576 } 577 578 /// Contains all the implemented transformations on CR logical operations. 579 /// For example, a binary CR logical can be used to split a block on its inputs, 580 /// a unary CR logical might be used to change the condition code on a 581 /// comparison feeding it. A nullary CR logical might simply be removable 582 /// if the user of the bit it [un]sets can be transformed. 583 bool PPCReduceCRLogicals::handleCROp(unsigned Idx) { 584 // We can definitely split a block on the inputs to a binary CR operation 585 // whose defs and (single) use are within the same block. 586 bool Changed = false; 587 CRLogicalOpInfo CRI = AllCRLogicalOps[Idx]; 588 if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR && 589 CRI.DefsSingleUse) { 590 Changed = splitBlockOnBinaryCROp(CRI); 591 if (Changed) 592 NumBlocksSplitOnBinaryCROp++; 593 } 594 return Changed; 595 } 596 597 /// Splits a block that contains a CR-logical operation that feeds a branch 598 /// and whose operands are produced within the block. 599 /// Example: 600 /// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2 601 /// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5 602 /// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3 603 /// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7 604 /// %vr9<def> = CROR %vr6<kill>, %vr8<kill>; CRBITRC:%vr9,%vr6,%vr8 605 /// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9 606 /// Becomes: 607 /// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2 608 /// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5 609 /// BC %vr6<kill>, <BB#2>; CRBITRC:%vr6 610 /// 611 /// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3 612 /// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7 613 /// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9 614 bool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI) { 615 if (CRI.CopyDefs.first == CRI.CopyDefs.second) { 616 LLVM_DEBUG(dbgs() << "Unable to split as the two operands are the same\n"); 617 NumNotSplitIdenticalOperands++; 618 return false; 619 } 620 if (CRI.TrueDefs.first->isCopy() || CRI.TrueDefs.second->isCopy() || 621 CRI.TrueDefs.first->isPHI() || CRI.TrueDefs.second->isPHI()) { 622 LLVM_DEBUG( 623 dbgs() << "Unable to split because one of the operands is a PHI or " 624 "chain of copies.\n"); 625 NumNotSplitChainCopies++; 626 return false; 627 } 628 // Note: keep in sync with computeBranchTargetAndInversion(). 629 if (CRI.MI->getOpcode() != PPC::CROR && 630 CRI.MI->getOpcode() != PPC::CRAND && 631 CRI.MI->getOpcode() != PPC::CRNOR && 632 CRI.MI->getOpcode() != PPC::CRNAND && 633 CRI.MI->getOpcode() != PPC::CRORC && 634 CRI.MI->getOpcode() != PPC::CRANDC) { 635 LLVM_DEBUG(dbgs() << "Unable to split blocks on this opcode.\n"); 636 NumNotSplitWrongOpcode++; 637 return false; 638 } 639 LLVM_DEBUG(dbgs() << "Splitting the following CR op:\n"; CRI.dump()); 640 MachineBasicBlock::iterator Def1It = CRI.TrueDefs.first; 641 MachineBasicBlock::iterator Def2It = CRI.TrueDefs.second; 642 643 bool UsingDef1 = false; 644 MachineInstr *SplitBefore = &*Def2It; 645 for (auto E = CRI.MI->getParent()->end(); Def2It != E; ++Def2It) { 646 if (Def1It == Def2It) { // Def2 comes before Def1. 647 SplitBefore = &*Def1It; 648 UsingDef1 = true; 649 break; 650 } 651 } 652 653 LLVM_DEBUG(dbgs() << "We will split the following block:\n";); 654 LLVM_DEBUG(CRI.MI->getParent()->dump()); 655 LLVM_DEBUG(dbgs() << "Before instruction:\n"; SplitBefore->dump()); 656 657 // Get the branch instruction. 658 MachineInstr *Branch = 659 MRI->use_nodbg_begin(CRI.MI->getOperand(0).getReg())->getParent(); 660 661 // We want the new block to have no code in it other than the definition 662 // of the input to the CR logical and the CR logical itself. So we move 663 // those to the bottom of the block (just before the branch). Then we 664 // will split before the CR logical. 665 MachineBasicBlock *MBB = SplitBefore->getParent(); 666 auto FirstTerminator = MBB->getFirstTerminator(); 667 MachineBasicBlock::iterator FirstInstrToMove = 668 UsingDef1 ? CRI.TrueDefs.first : CRI.TrueDefs.second; 669 MachineBasicBlock::iterator SecondInstrToMove = 670 UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second; 671 672 // The instructions that need to be moved are not guaranteed to be 673 // contiguous. Move them individually. 674 // FIXME: If one of the operands is a chain of (single use) copies, they 675 // can all be moved and we can still split. 676 MBB->splice(FirstTerminator, MBB, FirstInstrToMove); 677 if (FirstInstrToMove != SecondInstrToMove) 678 MBB->splice(FirstTerminator, MBB, SecondInstrToMove); 679 MBB->splice(FirstTerminator, MBB, CRI.MI); 680 681 unsigned Opc = CRI.MI->getOpcode(); 682 bool InvertOrigBranch, InvertNewBranch, TargetIsFallThrough; 683 computeBranchTargetAndInversion(Opc, Branch->getOpcode(), UsingDef1, 684 InvertNewBranch, InvertOrigBranch, 685 TargetIsFallThrough); 686 MachineInstr *SplitCond = 687 UsingDef1 ? CRI.CopyDefs.second : CRI.CopyDefs.first; 688 LLVM_DEBUG(dbgs() << "We will " << (InvertNewBranch ? "invert" : "copy")); 689 LLVM_DEBUG(dbgs() << " the original branch and the target is the " 690 << (TargetIsFallThrough ? "fallthrough block\n" 691 : "orig. target block\n")); 692 LLVM_DEBUG(dbgs() << "Original branch instruction: "; Branch->dump()); 693 BlockSplitInfo BSI { Branch, SplitBefore, SplitCond, InvertNewBranch, 694 InvertOrigBranch, TargetIsFallThrough, MBPI, CRI.MI, 695 UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second }; 696 bool Changed = splitMBB(BSI); 697 // If we've split on a CR logical that is fed by a CR logical, 698 // recompute the source CR logical as it may be usable for splitting. 699 if (Changed) { 700 bool Input1CRlogical = 701 CRI.TrueDefs.first && isCRLogical(*CRI.TrueDefs.first); 702 bool Input2CRlogical = 703 CRI.TrueDefs.second && isCRLogical(*CRI.TrueDefs.second); 704 if (Input1CRlogical) 705 AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.first)); 706 if (Input2CRlogical) 707 AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.second)); 708 } 709 return Changed; 710 } 711 712 void PPCReduceCRLogicals::collectCRLogicals() { 713 for (MachineBasicBlock &MBB : *MF) { 714 for (MachineInstr &MI : MBB) { 715 if (isCRLogical(MI)) { 716 AllCRLogicalOps.push_back(createCRLogicalOpInfo(MI)); 717 TotalCRLogicals++; 718 if (AllCRLogicalOps.back().IsNullary) 719 TotalNullaryCRLogicals++; 720 else if (AllCRLogicalOps.back().IsBinary) 721 TotalBinaryCRLogicals++; 722 else 723 TotalUnaryCRLogicals++; 724 } 725 } 726 } 727 } 728 729 } // end anonymous namespace 730 731 INITIALIZE_PASS_BEGIN(PPCReduceCRLogicals, DEBUG_TYPE, 732 "PowerPC Reduce CR logical Operation", false, false) 733 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 734 INITIALIZE_PASS_END(PPCReduceCRLogicals, DEBUG_TYPE, 735 "PowerPC Reduce CR logical Operation", false, false) 736 737 char PPCReduceCRLogicals::ID = 0; 738 FunctionPass* 739 llvm::createPPCReduceCRLogicalsPass() { return new PPCReduceCRLogicals(); } 740