xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp (revision 48c35ae6ebfc6d9a2259979d915fd3bc5d6c01db)
1 //===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 //
9 // This pass aims to reduce the number of logical operations on bits in the CR
10 // register. These instructions have a fairly high latency and only a single
11 // pipeline at their disposal in modern PPC cores. Furthermore, they have a
12 // tendency to occur in fairly small blocks where there's little opportunity
13 // to hide the latency between the CR logical operation and its user.
14 //
15 //===---------------------------------------------------------------------===//
16 
17 #include "PPC.h"
18 #include "PPCInstrInfo.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/Config/llvm-config.h"
27 #include "llvm/Support/Debug.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "ppc-reduce-cr-ops"
32 
33 STATISTIC(NumContainedSingleUseBinOps,
34           "Number of single-use binary CR logical ops contained in a block");
35 STATISTIC(NumToSplitBlocks,
36           "Number of binary CR logical ops that can be used to split blocks");
37 STATISTIC(TotalCRLogicals, "Number of CR logical ops.");
38 STATISTIC(TotalNullaryCRLogicals,
39           "Number of nullary CR logical ops (CRSET/CRUNSET).");
40 STATISTIC(TotalUnaryCRLogicals, "Number of unary CR logical ops.");
41 STATISTIC(TotalBinaryCRLogicals, "Number of CR logical ops.");
42 STATISTIC(NumBlocksSplitOnBinaryCROp,
43           "Number of blocks split on CR binary logical ops.");
44 STATISTIC(NumNotSplitIdenticalOperands,
45           "Number of blocks not split due to operands being identical.");
46 STATISTIC(NumNotSplitChainCopies,
47           "Number of blocks not split due to operands being chained copies.");
48 STATISTIC(NumNotSplitWrongOpcode,
49           "Number of blocks not split due to the wrong opcode.");
50 
51 /// Given a basic block \p Successor that potentially contains PHIs, this
52 /// function will look for any incoming values in the PHIs that are supposed to
53 /// be coming from \p OrigMBB but whose definition is actually in \p NewMBB.
54 /// Any such PHIs will be updated to reflect reality.
updatePHIs(MachineBasicBlock * Successor,MachineBasicBlock * OrigMBB,MachineBasicBlock * NewMBB,MachineRegisterInfo * MRI)55 static void updatePHIs(MachineBasicBlock *Successor, MachineBasicBlock *OrigMBB,
56                        MachineBasicBlock *NewMBB, MachineRegisterInfo *MRI) {
57   for (auto &MI : Successor->instrs()) {
58     if (!MI.isPHI())
59       continue;
60     // This is a really ugly-looking loop, but it was pillaged directly from
61     // MachineBasicBlock::transferSuccessorsAndUpdatePHIs().
62     for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
63       MachineOperand &MO = MI.getOperand(i);
64       if (MO.getMBB() == OrigMBB) {
65         // Check if the instruction is actually defined in NewMBB.
66         if (MI.getOperand(i - 1).isReg()) {
67           MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(i - 1).getReg());
68           if (DefMI->getParent() == NewMBB ||
69               !OrigMBB->isSuccessor(Successor)) {
70             MO.setMBB(NewMBB);
71             break;
72           }
73         }
74       }
75     }
76   }
77 }
78 
79 /// Given a basic block \p Successor that potentially contains PHIs, this
80 /// function will look for PHIs that have an incoming value from \p OrigMBB
81 /// and will add the same incoming value from \p NewMBB.
82 /// NOTE: This should only be used if \p NewMBB is an immediate dominator of
83 /// \p OrigMBB.
addIncomingValuesToPHIs(MachineBasicBlock * Successor,MachineBasicBlock * OrigMBB,MachineBasicBlock * NewMBB,MachineRegisterInfo * MRI)84 static void addIncomingValuesToPHIs(MachineBasicBlock *Successor,
85                                     MachineBasicBlock *OrigMBB,
86                                     MachineBasicBlock *NewMBB,
87                                     MachineRegisterInfo *MRI) {
88   assert(OrigMBB->isSuccessor(NewMBB) &&
89          "NewMBB must be a successor of OrigMBB");
90   for (auto &MI : Successor->instrs()) {
91     if (!MI.isPHI())
92       continue;
93     // This is a really ugly-looking loop, but it was pillaged directly from
94     // MachineBasicBlock::transferSuccessorsAndUpdatePHIs().
95     for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
96       MachineOperand &MO = MI.getOperand(i);
97       if (MO.getMBB() == OrigMBB) {
98         MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
99         MIB.addReg(MI.getOperand(i - 1).getReg()).addMBB(NewMBB);
100         break;
101       }
102     }
103   }
104 }
105 
106 namespace {
107 struct BlockSplitInfo {
108   MachineInstr *OrigBranch;
109   MachineInstr *SplitBefore;
110   MachineInstr *SplitCond;
111   unsigned OrigSubreg;
112   unsigned SplitCondSubreg;
113   bool InvertNewBranch;
114   bool InvertOrigBranch;
115   bool BranchToFallThrough;
116   const MachineBranchProbabilityInfo *MBPI;
117   MachineInstr *MIToDelete;
118   MachineInstr *NewCond;
allInstrsInSameMBB__anoncfc979730111::BlockSplitInfo119   bool allInstrsInSameMBB() {
120     if (!OrigBranch || !SplitBefore || !SplitCond)
121       return false;
122     MachineBasicBlock *MBB = OrigBranch->getParent();
123     if (SplitBefore->getParent() != MBB || SplitCond->getParent() != MBB)
124       return false;
125     if (MIToDelete && MIToDelete->getParent() != MBB)
126       return false;
127     if (NewCond && NewCond->getParent() != MBB)
128       return false;
129     return true;
130   }
131 };
132 } // end anonymous namespace
133 
134 /// Splits a MachineBasicBlock to branch before \p SplitBefore. The original
135 /// branch is \p OrigBranch. The target of the new branch can either be the same
136 /// as the target of the original branch or the fallthrough successor of the
137 /// original block as determined by \p BranchToFallThrough. The branch
138 /// conditions will be inverted according to \p InvertNewBranch and
139 /// \p InvertOrigBranch. If an instruction that previously fed the branch is to
140 /// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as
141 /// the branch condition. The branch probabilities will be set if the
142 /// MachineBranchProbabilityInfo isn't null.
splitMBB(BlockSplitInfo & BSI)143 static bool splitMBB(BlockSplitInfo &BSI) {
144   assert(BSI.allInstrsInSameMBB() &&
145          "All instructions must be in the same block.");
146 
147   MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent();
148   MachineFunction *MF = ThisMBB->getParent();
149   MachineRegisterInfo *MRI = &MF->getRegInfo();
150   assert(MRI->isSSA() && "Can only do this while the function is in SSA form.");
151   if (ThisMBB->succ_size() != 2) {
152     LLVM_DEBUG(
153         dbgs() << "Don't know how to handle blocks that don't have exactly"
154                << " two successors.\n");
155     return false;
156   }
157 
158   const PPCInstrInfo *TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
159   unsigned OrigBROpcode = BSI.OrigBranch->getOpcode();
160   unsigned InvertedOpcode =
161       OrigBROpcode == PPC::BC
162           ? PPC::BCn
163           : OrigBROpcode == PPC::BCn
164                 ? PPC::BC
165                 : OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR;
166   unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode;
167   MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB();
168   MachineBasicBlock *OrigFallThrough = OrigTarget == *ThisMBB->succ_begin()
169                                            ? *ThisMBB->succ_rbegin()
170                                            : *ThisMBB->succ_begin();
171   MachineBasicBlock *NewBRTarget =
172       BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
173 
174   // It's impossible to know the precise branch probability after the split.
175   // But it still needs to be reasonable, the whole probability to original
176   // targets should not be changed.
177   // After split NewBRTarget will get two incoming edges. Assume P0 is the
178   // original branch probability to NewBRTarget, P1 and P2 are new branch
179   // probabilies to NewBRTarget after split. If the two edge frequencies are
180   // same, then
181   //      F * P1 = F * P0 / 2            ==>  P1 = P0 / 2
182   //      F * (1 - P1) * P2 = F * P1     ==>  P2 = P1 / (1 - P1)
183   BranchProbability ProbToNewTarget, ProbFallThrough;     // Prob for new Br.
184   BranchProbability ProbOrigTarget, ProbOrigFallThrough;  // Prob for orig Br.
185   ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
186   ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
187   if (BSI.MBPI) {
188     if (BSI.BranchToFallThrough) {
189       ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
190       ProbFallThrough = ProbToNewTarget.getCompl();
191       ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
192       ProbOrigTarget = ProbOrigFallThrough.getCompl();
193     } else {
194       ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
195       ProbFallThrough = ProbToNewTarget.getCompl();
196       ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
197       ProbOrigFallThrough = ProbOrigTarget.getCompl();
198     }
199   }
200 
201   // Create a new basic block.
202   MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
203   const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
204   MachineFunction::iterator It = ThisMBB->getIterator();
205   MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB);
206   MF->insert(++It, NewMBB);
207 
208   // Move everything after SplitBefore into the new block.
209   NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
210   NewMBB->transferSuccessors(ThisMBB);
211   if (!ProbOrigTarget.isUnknown()) {
212     auto MBBI = find(NewMBB->successors(), OrigTarget);
213     NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
214     MBBI = find(NewMBB->successors(), OrigFallThrough);
215     NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
216   }
217 
218   // Add the two successors to ThisMBB.
219   ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
220   ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
221 
222   // Add the branches to ThisMBB.
223   BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
224           TII->get(NewBROpcode))
225       .addReg(BSI.SplitCond->getOperand(0).getReg(), 0, BSI.SplitCondSubreg)
226       .addMBB(NewBRTarget);
227   BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
228           TII->get(PPC::B))
229       .addMBB(NewMBB);
230   if (BSI.MIToDelete)
231     BSI.MIToDelete->eraseFromParent();
232 
233   // Change the condition on the original branch and invert it if requested.
234   auto FirstTerminator = NewMBB->getFirstTerminator();
235   if (BSI.NewCond) {
236     assert(FirstTerminator->getOperand(0).isReg() &&
237            "Can't update condition of unconditional branch.");
238     FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg());
239     FirstTerminator->getOperand(0).setSubReg(BSI.OrigSubreg);
240   }
241   if (BSI.InvertOrigBranch)
242     FirstTerminator->setDesc(TII->get(InvertedOpcode));
243 
244   // If any of the PHIs in the successors of NewMBB reference values that
245   // now come from NewMBB, they need to be updated.
246   for (auto *Succ : NewMBB->successors()) {
247     updatePHIs(Succ, ThisMBB, NewMBB, MRI);
248   }
249   addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI);
250 
251   // Set the call frame size on ThisMBB to the new basic blocks.
252   // See https://reviews.llvm.org/D156113.
253   NewMBB->setCallFrameSize(TII->getCallFrameSizeAt(ThisMBB->back()));
254 
255   LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump());
256   LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump());
257   LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump());
258   return true;
259 }
260 
isBinary(MachineInstr & MI)261 static bool isBinary(MachineInstr &MI) {
262   return MI.getNumOperands() == 3;
263 }
264 
isNullary(MachineInstr & MI)265 static bool isNullary(MachineInstr &MI) {
266   return MI.getNumOperands() == 1;
267 }
268 
269 /// Given a CR logical operation \p CROp, branch opcode \p BROp as well as
270 /// a flag to indicate if the first operand of \p CROp is used as the
271 /// SplitBefore operand, determines whether either of the branches are to be
272 /// inverted as well as whether the new target should be the original
273 /// fall-through block.
274 static void
computeBranchTargetAndInversion(unsigned CROp,unsigned BROp,bool UsingDef1,bool & InvertNewBranch,bool & InvertOrigBranch,bool & TargetIsFallThrough)275 computeBranchTargetAndInversion(unsigned CROp, unsigned BROp, bool UsingDef1,
276                                 bool &InvertNewBranch, bool &InvertOrigBranch,
277                                 bool &TargetIsFallThrough) {
278   // The conditions under which each of the output operands should be [un]set
279   // can certainly be written much more concisely with just 3 if statements or
280   // ternary expressions. However, this provides a much clearer overview to the
281   // reader as to what is set for each <CROp, BROp, OpUsed> combination.
282   if (BROp == PPC::BC || BROp == PPC::BCLR) {
283     // Regular branches.
284     switch (CROp) {
285     default:
286       llvm_unreachable("Don't know how to handle this CR logical.");
287     case PPC::CROR:
288       InvertNewBranch = false;
289       InvertOrigBranch = false;
290       TargetIsFallThrough = false;
291       return;
292     case PPC::CRAND:
293       InvertNewBranch = true;
294       InvertOrigBranch = false;
295       TargetIsFallThrough = true;
296       return;
297     case PPC::CRNAND:
298       InvertNewBranch = true;
299       InvertOrigBranch = true;
300       TargetIsFallThrough = false;
301       return;
302     case PPC::CRNOR:
303       InvertNewBranch = false;
304       InvertOrigBranch = true;
305       TargetIsFallThrough = true;
306       return;
307     case PPC::CRORC:
308       InvertNewBranch = UsingDef1;
309       InvertOrigBranch = !UsingDef1;
310       TargetIsFallThrough = false;
311       return;
312     case PPC::CRANDC:
313       InvertNewBranch = !UsingDef1;
314       InvertOrigBranch = !UsingDef1;
315       TargetIsFallThrough = true;
316       return;
317     }
318   } else if (BROp == PPC::BCn || BROp == PPC::BCLRn) {
319     // Negated branches.
320     switch (CROp) {
321     default:
322       llvm_unreachable("Don't know how to handle this CR logical.");
323     case PPC::CROR:
324       InvertNewBranch = true;
325       InvertOrigBranch = false;
326       TargetIsFallThrough = true;
327       return;
328     case PPC::CRAND:
329       InvertNewBranch = false;
330       InvertOrigBranch = false;
331       TargetIsFallThrough = false;
332       return;
333     case PPC::CRNAND:
334       InvertNewBranch = false;
335       InvertOrigBranch = true;
336       TargetIsFallThrough = true;
337       return;
338     case PPC::CRNOR:
339       InvertNewBranch = true;
340       InvertOrigBranch = true;
341       TargetIsFallThrough = false;
342       return;
343     case PPC::CRORC:
344       InvertNewBranch = !UsingDef1;
345       InvertOrigBranch = !UsingDef1;
346       TargetIsFallThrough = true;
347       return;
348     case PPC::CRANDC:
349       InvertNewBranch = UsingDef1;
350       InvertOrigBranch = !UsingDef1;
351       TargetIsFallThrough = false;
352       return;
353     }
354   } else
355     llvm_unreachable("Don't know how to handle this branch.");
356 }
357 
358 namespace {
359 
360 class PPCReduceCRLogicals : public MachineFunctionPass {
361 public:
362   static char ID;
363   struct CRLogicalOpInfo {
364     MachineInstr *MI;
365     // FIXME: If chains of copies are to be handled, this should be a vector.
366     std::pair<MachineInstr*, MachineInstr*> CopyDefs;
367     std::pair<MachineInstr*, MachineInstr*> TrueDefs;
368     unsigned IsBinary : 1;
369     unsigned IsNullary : 1;
370     unsigned ContainedInBlock : 1;
371     unsigned FeedsISEL : 1;
372     unsigned FeedsBR : 1;
373     unsigned FeedsLogical : 1;
374     unsigned SingleUse : 1;
375     unsigned DefsSingleUse : 1;
376     unsigned SubregDef1;
377     unsigned SubregDef2;
CRLogicalOpInfo__anoncfc979730211::PPCReduceCRLogicals::CRLogicalOpInfo378     CRLogicalOpInfo() : MI(nullptr), IsBinary(0), IsNullary(0),
379                         ContainedInBlock(0), FeedsISEL(0), FeedsBR(0),
380                         FeedsLogical(0), SingleUse(0), DefsSingleUse(1),
381                         SubregDef1(0), SubregDef2(0) { }
382     void dump();
383   };
384 
385 private:
386   const PPCInstrInfo *TII = nullptr;
387   MachineFunction *MF = nullptr;
388   MachineRegisterInfo *MRI = nullptr;
389   const MachineBranchProbabilityInfo *MBPI = nullptr;
390 
391   // A vector to contain all the CR logical operations
392   SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps;
393   void initialize(MachineFunction &MFParm);
394   void collectCRLogicals();
395   bool handleCROp(unsigned Idx);
396   bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI);
isCRLogical(MachineInstr & MI)397   static bool isCRLogical(MachineInstr &MI) {
398     unsigned Opc = MI.getOpcode();
399     return Opc == PPC::CRAND || Opc == PPC::CRNAND || Opc == PPC::CROR ||
400            Opc == PPC::CRXOR || Opc == PPC::CRNOR || Opc == PPC::CRNOT ||
401            Opc == PPC::CREQV || Opc == PPC::CRANDC || Opc == PPC::CRORC ||
402            Opc == PPC::CRSET || Opc == PPC::CRUNSET || Opc == PPC::CR6SET ||
403            Opc == PPC::CR6UNSET;
404   }
simplifyCode()405   bool simplifyCode() {
406     bool Changed = false;
407     // Not using a range-based for loop here as the vector may grow while being
408     // operated on.
409     for (unsigned i = 0; i < AllCRLogicalOps.size(); i++)
410       Changed |= handleCROp(i);
411     return Changed;
412   }
413 
414 public:
PPCReduceCRLogicals()415   PPCReduceCRLogicals() : MachineFunctionPass(ID) {}
416 
417   MachineInstr *lookThroughCRCopy(unsigned Reg, unsigned &Subreg,
418                                   MachineInstr *&CpDef);
runOnMachineFunction(MachineFunction & MF)419   bool runOnMachineFunction(MachineFunction &MF) override {
420     if (skipFunction(MF.getFunction()))
421       return false;
422 
423     // If the subtarget doesn't use CR bits, there's nothing to do.
424     const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
425     if (!STI.useCRBits())
426       return false;
427 
428     initialize(MF);
429     collectCRLogicals();
430     return simplifyCode();
431   }
432   CRLogicalOpInfo createCRLogicalOpInfo(MachineInstr &MI);
getAnalysisUsage(AnalysisUsage & AU) const433   void getAnalysisUsage(AnalysisUsage &AU) const override {
434     AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
435     AU.addRequired<MachineDominatorTreeWrapperPass>();
436     MachineFunctionPass::getAnalysisUsage(AU);
437   }
438 };
439 } // end anonymous namespace
440 
441 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump()442 LLVM_DUMP_METHOD void PPCReduceCRLogicals::CRLogicalOpInfo::dump() {
443   dbgs() << "CRLogicalOpMI: ";
444   MI->dump();
445   dbgs() << "IsBinary: " << IsBinary << ", FeedsISEL: " << FeedsISEL;
446   dbgs() << ", FeedsBR: " << FeedsBR << ", FeedsLogical: ";
447   dbgs() << FeedsLogical << ", SingleUse: " << SingleUse;
448   dbgs() << ", DefsSingleUse: " << DefsSingleUse;
449   dbgs() << ", SubregDef1: " << SubregDef1 << ", SubregDef2: ";
450   dbgs() << SubregDef2 << ", ContainedInBlock: " << ContainedInBlock;
451   if (!IsNullary) {
452     dbgs() << "\nDefs:\n";
453     TrueDefs.first->dump();
454   }
455   if (IsBinary)
456     TrueDefs.second->dump();
457   dbgs() << "\n";
458   if (CopyDefs.first) {
459     dbgs() << "CopyDef1: ";
460     CopyDefs.first->dump();
461   }
462   if (CopyDefs.second) {
463     dbgs() << "CopyDef2: ";
464     CopyDefs.second->dump();
465   }
466 }
467 #endif
468 
469 PPCReduceCRLogicals::CRLogicalOpInfo
createCRLogicalOpInfo(MachineInstr & MIParam)470 PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
471   CRLogicalOpInfo Ret;
472   Ret.MI = &MIParam;
473   // Get the defs
474   if (isNullary(MIParam)) {
475     Ret.IsNullary = 1;
476     Ret.TrueDefs = std::make_pair(nullptr, nullptr);
477     Ret.CopyDefs = std::make_pair(nullptr, nullptr);
478   } else {
479     MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(),
480                                            Ret.SubregDef1, Ret.CopyDefs.first);
481     Ret.SubregDef1 = MIParam.getOperand(1).getSubReg();
482     assert(Def1 && "Must be able to find a definition of operand 1.");
483     Ret.DefsSingleUse &=
484       MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg());
485     Ret.DefsSingleUse &=
486       MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg());
487     if (isBinary(MIParam)) {
488       Ret.IsBinary = 1;
489       MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(),
490                                              Ret.SubregDef2,
491                                              Ret.CopyDefs.second);
492       Ret.SubregDef2 = MIParam.getOperand(2).getSubReg();
493       assert(Def2 && "Must be able to find a definition of operand 2.");
494       Ret.DefsSingleUse &=
495         MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg());
496       Ret.DefsSingleUse &=
497         MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg());
498       Ret.TrueDefs = std::make_pair(Def1, Def2);
499     } else {
500       Ret.TrueDefs = std::make_pair(Def1, nullptr);
501       Ret.CopyDefs.second = nullptr;
502     }
503   }
504 
505   Ret.ContainedInBlock = 1;
506   // Get the uses
507   for (MachineInstr &UseMI :
508        MRI->use_nodbg_instructions(MIParam.getOperand(0).getReg())) {
509     unsigned Opc = UseMI.getOpcode();
510     if (Opc == PPC::ISEL || Opc == PPC::ISEL8)
511       Ret.FeedsISEL = 1;
512     if (Opc == PPC::BC || Opc == PPC::BCn || Opc == PPC::BCLR ||
513         Opc == PPC::BCLRn)
514       Ret.FeedsBR = 1;
515     Ret.FeedsLogical = isCRLogical(UseMI);
516     if (UseMI.getParent() != MIParam.getParent())
517       Ret.ContainedInBlock = 0;
518   }
519   Ret.SingleUse = MRI->hasOneNonDBGUse(MIParam.getOperand(0).getReg()) ? 1 : 0;
520 
521   // We now know whether all the uses of the CR logical are in the same block.
522   if (!Ret.IsNullary) {
523     Ret.ContainedInBlock &=
524       (MIParam.getParent() == Ret.TrueDefs.first->getParent());
525     if (Ret.IsBinary)
526       Ret.ContainedInBlock &=
527         (MIParam.getParent() == Ret.TrueDefs.second->getParent());
528   }
529   LLVM_DEBUG(Ret.dump());
530   if (Ret.IsBinary && Ret.ContainedInBlock && Ret.SingleUse) {
531     NumContainedSingleUseBinOps++;
532     if (Ret.FeedsBR && Ret.DefsSingleUse)
533       NumToSplitBlocks++;
534   }
535   return Ret;
536 }
537 
538 /// Looks through a COPY instruction to the actual definition of the CR-bit
539 /// register and returns the instruction that defines it.
540 /// FIXME: This currently handles what is by-far the most common case:
541 /// an instruction that defines a CR field followed by a single copy of a bit
542 /// from that field into a virtual register. If chains of copies need to be
543 /// handled, this should have a loop until a non-copy instruction is found.
lookThroughCRCopy(unsigned Reg,unsigned & Subreg,MachineInstr * & CpDef)544 MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg,
545                                                      unsigned &Subreg,
546                                                      MachineInstr *&CpDef) {
547   if (!Register::isVirtualRegister(Reg))
548     return nullptr;
549   MachineInstr *Copy = MRI->getVRegDef(Reg);
550   CpDef = Copy;
551   if (!Copy->isCopy())
552     return Copy;
553   Register CopySrc = Copy->getOperand(1).getReg();
554   if (!CopySrc.isVirtual()) {
555     const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
556     // Loop backwards and return the first MI that modifies the physical CR Reg.
557     MachineBasicBlock::iterator Me = Copy, B = Copy->getParent()->begin();
558     while (Me != B)
559       if ((--Me)->modifiesRegister(CopySrc, TRI))
560         return &*Me;
561     return nullptr;
562   }
563   return MRI->getVRegDef(CopySrc);
564 }
565 
initialize(MachineFunction & MFParam)566 void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) {
567   MF = &MFParam;
568   MRI = &MF->getRegInfo();
569   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
570   MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
571 
572   AllCRLogicalOps.clear();
573 }
574 
575 /// Contains all the implemented transformations on CR logical operations.
576 /// For example, a binary CR logical can be used to split a block on its inputs,
577 /// a unary CR logical might be used to change the condition code on a
578 /// comparison feeding it. A nullary CR logical might simply be removable
579 /// if the user of the bit it [un]sets can be transformed.
handleCROp(unsigned Idx)580 bool PPCReduceCRLogicals::handleCROp(unsigned Idx) {
581   // We can definitely split a block on the inputs to a binary CR operation
582   // whose defs and (single) use are within the same block.
583   bool Changed = false;
584   CRLogicalOpInfo CRI = AllCRLogicalOps[Idx];
585   if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR &&
586       CRI.DefsSingleUse) {
587     Changed = splitBlockOnBinaryCROp(CRI);
588     if (Changed)
589       NumBlocksSplitOnBinaryCROp++;
590   }
591   return Changed;
592 }
593 
594 /// Splits a block that contains a CR-logical operation that feeds a branch
595 /// and whose operands are produced within the block.
596 /// Example:
597 ///    %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
598 ///    %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
599 ///    %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
600 ///    %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
601 ///    %vr9<def> = CROR %vr6<kill>, %vr8<kill>; CRBITRC:%vr9,%vr6,%vr8
602 ///    BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
603 /// Becomes:
604 ///    %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
605 ///    %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
606 ///    BC %vr6<kill>, <BB#2>; CRBITRC:%vr6
607 ///
608 ///    %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
609 ///    %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
610 ///    BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
splitBlockOnBinaryCROp(CRLogicalOpInfo & CRI)611 bool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI) {
612   if (CRI.CopyDefs.first == CRI.CopyDefs.second) {
613     LLVM_DEBUG(dbgs() << "Unable to split as the two operands are the same\n");
614     NumNotSplitIdenticalOperands++;
615     return false;
616   }
617   if (CRI.TrueDefs.first->isCopy() || CRI.TrueDefs.second->isCopy() ||
618       CRI.TrueDefs.first->isPHI() || CRI.TrueDefs.second->isPHI()) {
619     LLVM_DEBUG(
620         dbgs() << "Unable to split because one of the operands is a PHI or "
621                   "chain of copies.\n");
622     NumNotSplitChainCopies++;
623     return false;
624   }
625   // Note: keep in sync with computeBranchTargetAndInversion().
626   if (CRI.MI->getOpcode() != PPC::CROR &&
627       CRI.MI->getOpcode() != PPC::CRAND &&
628       CRI.MI->getOpcode() != PPC::CRNOR &&
629       CRI.MI->getOpcode() != PPC::CRNAND &&
630       CRI.MI->getOpcode() != PPC::CRORC &&
631       CRI.MI->getOpcode() != PPC::CRANDC) {
632     LLVM_DEBUG(dbgs() << "Unable to split blocks on this opcode.\n");
633     NumNotSplitWrongOpcode++;
634     return false;
635   }
636   LLVM_DEBUG(dbgs() << "Splitting the following CR op:\n"; CRI.dump());
637   MachineBasicBlock::iterator Def1It = CRI.TrueDefs.first;
638   MachineBasicBlock::iterator Def2It = CRI.TrueDefs.second;
639 
640   bool UsingDef1 = false;
641   MachineInstr *SplitBefore = &*Def2It;
642   for (auto E = CRI.MI->getParent()->end(); Def2It != E; ++Def2It) {
643     if (Def1It == Def2It) { // Def2 comes before Def1.
644       SplitBefore = &*Def1It;
645       UsingDef1 = true;
646       break;
647     }
648   }
649 
650   LLVM_DEBUG(dbgs() << "We will split the following block:\n";);
651   LLVM_DEBUG(CRI.MI->getParent()->dump());
652   LLVM_DEBUG(dbgs() << "Before instruction:\n"; SplitBefore->dump());
653 
654   // Get the branch instruction.
655   MachineInstr *Branch =
656     MRI->use_nodbg_begin(CRI.MI->getOperand(0).getReg())->getParent();
657 
658   // We want the new block to have no code in it other than the definition
659   // of the input to the CR logical and the CR logical itself. So we move
660   // those to the bottom of the block (just before the branch). Then we
661   // will split before the CR logical.
662   MachineBasicBlock *MBB = SplitBefore->getParent();
663   auto FirstTerminator = MBB->getFirstTerminator();
664   MachineBasicBlock::iterator FirstInstrToMove =
665     UsingDef1 ? CRI.TrueDefs.first : CRI.TrueDefs.second;
666   MachineBasicBlock::iterator SecondInstrToMove =
667     UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second;
668 
669   // The instructions that need to be moved are not guaranteed to be
670   // contiguous. Move them individually.
671   // FIXME: If one of the operands is a chain of (single use) copies, they
672   // can all be moved and we can still split.
673   MBB->splice(FirstTerminator, MBB, FirstInstrToMove);
674   if (FirstInstrToMove != SecondInstrToMove)
675     MBB->splice(FirstTerminator, MBB, SecondInstrToMove);
676   MBB->splice(FirstTerminator, MBB, CRI.MI);
677 
678   unsigned Opc = CRI.MI->getOpcode();
679   bool InvertOrigBranch, InvertNewBranch, TargetIsFallThrough;
680   computeBranchTargetAndInversion(Opc, Branch->getOpcode(), UsingDef1,
681                                   InvertNewBranch, InvertOrigBranch,
682                                   TargetIsFallThrough);
683   MachineInstr *NewCond = CRI.CopyDefs.first;
684   MachineInstr *SplitCond = CRI.CopyDefs.second;
685   if (!UsingDef1) {
686     std::swap(NewCond, SplitCond);
687     std::swap(CRI.SubregDef1, CRI.SubregDef2);
688   }
689   LLVM_DEBUG(dbgs() << "We will " << (InvertNewBranch ? "invert" : "copy"));
690   LLVM_DEBUG(dbgs() << " the original branch and the target is the "
691                     << (TargetIsFallThrough ? "fallthrough block\n"
692                                             : "orig. target block\n"));
693   LLVM_DEBUG(dbgs() << "Original branch instruction: "; Branch->dump());
694   BlockSplitInfo BSI{
695       Branch,         SplitBefore,     SplitCond,        CRI.SubregDef1,
696       CRI.SubregDef2, InvertNewBranch, InvertOrigBranch, TargetIsFallThrough,
697       MBPI,           CRI.MI,          NewCond};
698   bool Changed = splitMBB(BSI);
699   // If we've split on a CR logical that is fed by a CR logical,
700   // recompute the source CR logical as it may be usable for splitting.
701   if (Changed) {
702     bool Input1CRlogical =
703       CRI.TrueDefs.first && isCRLogical(*CRI.TrueDefs.first);
704     bool Input2CRlogical =
705       CRI.TrueDefs.second && isCRLogical(*CRI.TrueDefs.second);
706     if (Input1CRlogical)
707       AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.first));
708     if (Input2CRlogical)
709       AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.second));
710   }
711   return Changed;
712 }
713 
collectCRLogicals()714 void PPCReduceCRLogicals::collectCRLogicals() {
715   for (MachineBasicBlock &MBB : *MF) {
716     for (MachineInstr &MI : MBB) {
717       if (isCRLogical(MI)) {
718         AllCRLogicalOps.push_back(createCRLogicalOpInfo(MI));
719         TotalCRLogicals++;
720         if (AllCRLogicalOps.back().IsNullary)
721           TotalNullaryCRLogicals++;
722         else if (AllCRLogicalOps.back().IsBinary)
723           TotalBinaryCRLogicals++;
724         else
725           TotalUnaryCRLogicals++;
726       }
727     }
728   }
729 }
730 
731 INITIALIZE_PASS_BEGIN(PPCReduceCRLogicals, DEBUG_TYPE,
732                       "PowerPC Reduce CR logical Operation", false, false)
733 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
734 INITIALIZE_PASS_END(PPCReduceCRLogicals, DEBUG_TYPE,
735                     "PowerPC Reduce CR logical Operation", false, false)
736 
737 char PPCReduceCRLogicals::ID = 0;
738 FunctionPass*
createPPCReduceCRLogicalsPass()739 llvm::createPPCReduceCRLogicalsPass() { return new PPCReduceCRLogicals(); }
740