1 //===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 //
9 // This pass aims to reduce the number of logical operations on bits in the CR
10 // register. These instructions have a fairly high latency and only a single
11 // pipeline at their disposal in modern PPC cores. Furthermore, they have a
12 // tendency to occur in fairly small blocks where there's little opportunity
13 // to hide the latency between the CR logical operation and its user.
14 //
15 //===---------------------------------------------------------------------===//
16
17 #include "PPC.h"
18 #include "PPCInstrInfo.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/Config/llvm-config.h"
27 #include "llvm/Support/Debug.h"
28
29 using namespace llvm;
30
31 #define DEBUG_TYPE "ppc-reduce-cr-ops"
32
33 STATISTIC(NumContainedSingleUseBinOps,
34 "Number of single-use binary CR logical ops contained in a block");
35 STATISTIC(NumToSplitBlocks,
36 "Number of binary CR logical ops that can be used to split blocks");
37 STATISTIC(TotalCRLogicals, "Number of CR logical ops.");
38 STATISTIC(TotalNullaryCRLogicals,
39 "Number of nullary CR logical ops (CRSET/CRUNSET).");
40 STATISTIC(TotalUnaryCRLogicals, "Number of unary CR logical ops.");
41 STATISTIC(TotalBinaryCRLogicals, "Number of CR logical ops.");
42 STATISTIC(NumBlocksSplitOnBinaryCROp,
43 "Number of blocks split on CR binary logical ops.");
44 STATISTIC(NumNotSplitIdenticalOperands,
45 "Number of blocks not split due to operands being identical.");
46 STATISTIC(NumNotSplitChainCopies,
47 "Number of blocks not split due to operands being chained copies.");
48 STATISTIC(NumNotSplitWrongOpcode,
49 "Number of blocks not split due to the wrong opcode.");
50
51 /// Given a basic block \p Successor that potentially contains PHIs, this
52 /// function will look for any incoming values in the PHIs that are supposed to
53 /// be coming from \p OrigMBB but whose definition is actually in \p NewMBB.
54 /// Any such PHIs will be updated to reflect reality.
updatePHIs(MachineBasicBlock * Successor,MachineBasicBlock * OrigMBB,MachineBasicBlock * NewMBB,MachineRegisterInfo * MRI)55 static void updatePHIs(MachineBasicBlock *Successor, MachineBasicBlock *OrigMBB,
56 MachineBasicBlock *NewMBB, MachineRegisterInfo *MRI) {
57 for (auto &MI : Successor->instrs()) {
58 if (!MI.isPHI())
59 continue;
60 // This is a really ugly-looking loop, but it was pillaged directly from
61 // MachineBasicBlock::transferSuccessorsAndUpdatePHIs().
62 for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
63 MachineOperand &MO = MI.getOperand(i);
64 if (MO.getMBB() == OrigMBB) {
65 // Check if the instruction is actually defined in NewMBB.
66 if (MI.getOperand(i - 1).isReg()) {
67 MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(i - 1).getReg());
68 if (DefMI->getParent() == NewMBB ||
69 !OrigMBB->isSuccessor(Successor)) {
70 MO.setMBB(NewMBB);
71 break;
72 }
73 }
74 }
75 }
76 }
77 }
78
79 /// Given a basic block \p Successor that potentially contains PHIs, this
80 /// function will look for PHIs that have an incoming value from \p OrigMBB
81 /// and will add the same incoming value from \p NewMBB.
82 /// NOTE: This should only be used if \p NewMBB is an immediate dominator of
83 /// \p OrigMBB.
addIncomingValuesToPHIs(MachineBasicBlock * Successor,MachineBasicBlock * OrigMBB,MachineBasicBlock * NewMBB,MachineRegisterInfo * MRI)84 static void addIncomingValuesToPHIs(MachineBasicBlock *Successor,
85 MachineBasicBlock *OrigMBB,
86 MachineBasicBlock *NewMBB,
87 MachineRegisterInfo *MRI) {
88 assert(OrigMBB->isSuccessor(NewMBB) &&
89 "NewMBB must be a successor of OrigMBB");
90 for (auto &MI : Successor->instrs()) {
91 if (!MI.isPHI())
92 continue;
93 // This is a really ugly-looking loop, but it was pillaged directly from
94 // MachineBasicBlock::transferSuccessorsAndUpdatePHIs().
95 for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
96 MachineOperand &MO = MI.getOperand(i);
97 if (MO.getMBB() == OrigMBB) {
98 MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
99 MIB.addReg(MI.getOperand(i - 1).getReg()).addMBB(NewMBB);
100 break;
101 }
102 }
103 }
104 }
105
106 namespace {
107 struct BlockSplitInfo {
108 MachineInstr *OrigBranch;
109 MachineInstr *SplitBefore;
110 MachineInstr *SplitCond;
111 unsigned OrigSubreg;
112 unsigned SplitCondSubreg;
113 bool InvertNewBranch;
114 bool InvertOrigBranch;
115 bool BranchToFallThrough;
116 const MachineBranchProbabilityInfo *MBPI;
117 MachineInstr *MIToDelete;
118 MachineInstr *NewCond;
allInstrsInSameMBB__anoncfc979730111::BlockSplitInfo119 bool allInstrsInSameMBB() {
120 if (!OrigBranch || !SplitBefore || !SplitCond)
121 return false;
122 MachineBasicBlock *MBB = OrigBranch->getParent();
123 if (SplitBefore->getParent() != MBB || SplitCond->getParent() != MBB)
124 return false;
125 if (MIToDelete && MIToDelete->getParent() != MBB)
126 return false;
127 if (NewCond && NewCond->getParent() != MBB)
128 return false;
129 return true;
130 }
131 };
132 } // end anonymous namespace
133
134 /// Splits a MachineBasicBlock to branch before \p SplitBefore. The original
135 /// branch is \p OrigBranch. The target of the new branch can either be the same
136 /// as the target of the original branch or the fallthrough successor of the
137 /// original block as determined by \p BranchToFallThrough. The branch
138 /// conditions will be inverted according to \p InvertNewBranch and
139 /// \p InvertOrigBranch. If an instruction that previously fed the branch is to
140 /// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as
141 /// the branch condition. The branch probabilities will be set if the
142 /// MachineBranchProbabilityInfo isn't null.
splitMBB(BlockSplitInfo & BSI)143 static bool splitMBB(BlockSplitInfo &BSI) {
144 assert(BSI.allInstrsInSameMBB() &&
145 "All instructions must be in the same block.");
146
147 MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent();
148 MachineFunction *MF = ThisMBB->getParent();
149 MachineRegisterInfo *MRI = &MF->getRegInfo();
150 assert(MRI->isSSA() && "Can only do this while the function is in SSA form.");
151 if (ThisMBB->succ_size() != 2) {
152 LLVM_DEBUG(
153 dbgs() << "Don't know how to handle blocks that don't have exactly"
154 << " two successors.\n");
155 return false;
156 }
157
158 const PPCInstrInfo *TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
159 unsigned OrigBROpcode = BSI.OrigBranch->getOpcode();
160 unsigned InvertedOpcode =
161 OrigBROpcode == PPC::BC
162 ? PPC::BCn
163 : OrigBROpcode == PPC::BCn
164 ? PPC::BC
165 : OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR;
166 unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode;
167 MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB();
168 MachineBasicBlock *OrigFallThrough = OrigTarget == *ThisMBB->succ_begin()
169 ? *ThisMBB->succ_rbegin()
170 : *ThisMBB->succ_begin();
171 MachineBasicBlock *NewBRTarget =
172 BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
173
174 // It's impossible to know the precise branch probability after the split.
175 // But it still needs to be reasonable, the whole probability to original
176 // targets should not be changed.
177 // After split NewBRTarget will get two incoming edges. Assume P0 is the
178 // original branch probability to NewBRTarget, P1 and P2 are new branch
179 // probabilies to NewBRTarget after split. If the two edge frequencies are
180 // same, then
181 // F * P1 = F * P0 / 2 ==> P1 = P0 / 2
182 // F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1)
183 BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br.
184 BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br.
185 ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
186 ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
187 if (BSI.MBPI) {
188 if (BSI.BranchToFallThrough) {
189 ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
190 ProbFallThrough = ProbToNewTarget.getCompl();
191 ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
192 ProbOrigTarget = ProbOrigFallThrough.getCompl();
193 } else {
194 ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
195 ProbFallThrough = ProbToNewTarget.getCompl();
196 ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
197 ProbOrigFallThrough = ProbOrigTarget.getCompl();
198 }
199 }
200
201 // Create a new basic block.
202 MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
203 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
204 MachineFunction::iterator It = ThisMBB->getIterator();
205 MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB);
206 MF->insert(++It, NewMBB);
207
208 // Move everything after SplitBefore into the new block.
209 NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
210 NewMBB->transferSuccessors(ThisMBB);
211 if (!ProbOrigTarget.isUnknown()) {
212 auto MBBI = find(NewMBB->successors(), OrigTarget);
213 NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
214 MBBI = find(NewMBB->successors(), OrigFallThrough);
215 NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
216 }
217
218 // Add the two successors to ThisMBB.
219 ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
220 ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
221
222 // Add the branches to ThisMBB.
223 BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
224 TII->get(NewBROpcode))
225 .addReg(BSI.SplitCond->getOperand(0).getReg(), 0, BSI.SplitCondSubreg)
226 .addMBB(NewBRTarget);
227 BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
228 TII->get(PPC::B))
229 .addMBB(NewMBB);
230 if (BSI.MIToDelete)
231 BSI.MIToDelete->eraseFromParent();
232
233 // Change the condition on the original branch and invert it if requested.
234 auto FirstTerminator = NewMBB->getFirstTerminator();
235 if (BSI.NewCond) {
236 assert(FirstTerminator->getOperand(0).isReg() &&
237 "Can't update condition of unconditional branch.");
238 FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg());
239 FirstTerminator->getOperand(0).setSubReg(BSI.OrigSubreg);
240 }
241 if (BSI.InvertOrigBranch)
242 FirstTerminator->setDesc(TII->get(InvertedOpcode));
243
244 // If any of the PHIs in the successors of NewMBB reference values that
245 // now come from NewMBB, they need to be updated.
246 for (auto *Succ : NewMBB->successors()) {
247 updatePHIs(Succ, ThisMBB, NewMBB, MRI);
248 }
249 addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI);
250
251 // Set the call frame size on ThisMBB to the new basic blocks.
252 // See https://reviews.llvm.org/D156113.
253 NewMBB->setCallFrameSize(TII->getCallFrameSizeAt(ThisMBB->back()));
254
255 LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump());
256 LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump());
257 LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump());
258 return true;
259 }
260
isBinary(MachineInstr & MI)261 static bool isBinary(MachineInstr &MI) {
262 return MI.getNumOperands() == 3;
263 }
264
isNullary(MachineInstr & MI)265 static bool isNullary(MachineInstr &MI) {
266 return MI.getNumOperands() == 1;
267 }
268
269 /// Given a CR logical operation \p CROp, branch opcode \p BROp as well as
270 /// a flag to indicate if the first operand of \p CROp is used as the
271 /// SplitBefore operand, determines whether either of the branches are to be
272 /// inverted as well as whether the new target should be the original
273 /// fall-through block.
274 static void
computeBranchTargetAndInversion(unsigned CROp,unsigned BROp,bool UsingDef1,bool & InvertNewBranch,bool & InvertOrigBranch,bool & TargetIsFallThrough)275 computeBranchTargetAndInversion(unsigned CROp, unsigned BROp, bool UsingDef1,
276 bool &InvertNewBranch, bool &InvertOrigBranch,
277 bool &TargetIsFallThrough) {
278 // The conditions under which each of the output operands should be [un]set
279 // can certainly be written much more concisely with just 3 if statements or
280 // ternary expressions. However, this provides a much clearer overview to the
281 // reader as to what is set for each <CROp, BROp, OpUsed> combination.
282 if (BROp == PPC::BC || BROp == PPC::BCLR) {
283 // Regular branches.
284 switch (CROp) {
285 default:
286 llvm_unreachable("Don't know how to handle this CR logical.");
287 case PPC::CROR:
288 InvertNewBranch = false;
289 InvertOrigBranch = false;
290 TargetIsFallThrough = false;
291 return;
292 case PPC::CRAND:
293 InvertNewBranch = true;
294 InvertOrigBranch = false;
295 TargetIsFallThrough = true;
296 return;
297 case PPC::CRNAND:
298 InvertNewBranch = true;
299 InvertOrigBranch = true;
300 TargetIsFallThrough = false;
301 return;
302 case PPC::CRNOR:
303 InvertNewBranch = false;
304 InvertOrigBranch = true;
305 TargetIsFallThrough = true;
306 return;
307 case PPC::CRORC:
308 InvertNewBranch = UsingDef1;
309 InvertOrigBranch = !UsingDef1;
310 TargetIsFallThrough = false;
311 return;
312 case PPC::CRANDC:
313 InvertNewBranch = !UsingDef1;
314 InvertOrigBranch = !UsingDef1;
315 TargetIsFallThrough = true;
316 return;
317 }
318 } else if (BROp == PPC::BCn || BROp == PPC::BCLRn) {
319 // Negated branches.
320 switch (CROp) {
321 default:
322 llvm_unreachable("Don't know how to handle this CR logical.");
323 case PPC::CROR:
324 InvertNewBranch = true;
325 InvertOrigBranch = false;
326 TargetIsFallThrough = true;
327 return;
328 case PPC::CRAND:
329 InvertNewBranch = false;
330 InvertOrigBranch = false;
331 TargetIsFallThrough = false;
332 return;
333 case PPC::CRNAND:
334 InvertNewBranch = false;
335 InvertOrigBranch = true;
336 TargetIsFallThrough = true;
337 return;
338 case PPC::CRNOR:
339 InvertNewBranch = true;
340 InvertOrigBranch = true;
341 TargetIsFallThrough = false;
342 return;
343 case PPC::CRORC:
344 InvertNewBranch = !UsingDef1;
345 InvertOrigBranch = !UsingDef1;
346 TargetIsFallThrough = true;
347 return;
348 case PPC::CRANDC:
349 InvertNewBranch = UsingDef1;
350 InvertOrigBranch = !UsingDef1;
351 TargetIsFallThrough = false;
352 return;
353 }
354 } else
355 llvm_unreachable("Don't know how to handle this branch.");
356 }
357
358 namespace {
359
360 class PPCReduceCRLogicals : public MachineFunctionPass {
361 public:
362 static char ID;
363 struct CRLogicalOpInfo {
364 MachineInstr *MI;
365 // FIXME: If chains of copies are to be handled, this should be a vector.
366 std::pair<MachineInstr*, MachineInstr*> CopyDefs;
367 std::pair<MachineInstr*, MachineInstr*> TrueDefs;
368 unsigned IsBinary : 1;
369 unsigned IsNullary : 1;
370 unsigned ContainedInBlock : 1;
371 unsigned FeedsISEL : 1;
372 unsigned FeedsBR : 1;
373 unsigned FeedsLogical : 1;
374 unsigned SingleUse : 1;
375 unsigned DefsSingleUse : 1;
376 unsigned SubregDef1;
377 unsigned SubregDef2;
CRLogicalOpInfo__anoncfc979730211::PPCReduceCRLogicals::CRLogicalOpInfo378 CRLogicalOpInfo() : MI(nullptr), IsBinary(0), IsNullary(0),
379 ContainedInBlock(0), FeedsISEL(0), FeedsBR(0),
380 FeedsLogical(0), SingleUse(0), DefsSingleUse(1),
381 SubregDef1(0), SubregDef2(0) { }
382 void dump();
383 };
384
385 private:
386 const PPCInstrInfo *TII = nullptr;
387 MachineFunction *MF = nullptr;
388 MachineRegisterInfo *MRI = nullptr;
389 const MachineBranchProbabilityInfo *MBPI = nullptr;
390
391 // A vector to contain all the CR logical operations
392 SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps;
393 void initialize(MachineFunction &MFParm);
394 void collectCRLogicals();
395 bool handleCROp(unsigned Idx);
396 bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI);
isCRLogical(MachineInstr & MI)397 static bool isCRLogical(MachineInstr &MI) {
398 unsigned Opc = MI.getOpcode();
399 return Opc == PPC::CRAND || Opc == PPC::CRNAND || Opc == PPC::CROR ||
400 Opc == PPC::CRXOR || Opc == PPC::CRNOR || Opc == PPC::CRNOT ||
401 Opc == PPC::CREQV || Opc == PPC::CRANDC || Opc == PPC::CRORC ||
402 Opc == PPC::CRSET || Opc == PPC::CRUNSET || Opc == PPC::CR6SET ||
403 Opc == PPC::CR6UNSET;
404 }
simplifyCode()405 bool simplifyCode() {
406 bool Changed = false;
407 // Not using a range-based for loop here as the vector may grow while being
408 // operated on.
409 for (unsigned i = 0; i < AllCRLogicalOps.size(); i++)
410 Changed |= handleCROp(i);
411 return Changed;
412 }
413
414 public:
PPCReduceCRLogicals()415 PPCReduceCRLogicals() : MachineFunctionPass(ID) {}
416
417 MachineInstr *lookThroughCRCopy(unsigned Reg, unsigned &Subreg,
418 MachineInstr *&CpDef);
runOnMachineFunction(MachineFunction & MF)419 bool runOnMachineFunction(MachineFunction &MF) override {
420 if (skipFunction(MF.getFunction()))
421 return false;
422
423 // If the subtarget doesn't use CR bits, there's nothing to do.
424 const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
425 if (!STI.useCRBits())
426 return false;
427
428 initialize(MF);
429 collectCRLogicals();
430 return simplifyCode();
431 }
432 CRLogicalOpInfo createCRLogicalOpInfo(MachineInstr &MI);
getAnalysisUsage(AnalysisUsage & AU) const433 void getAnalysisUsage(AnalysisUsage &AU) const override {
434 AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
435 AU.addRequired<MachineDominatorTreeWrapperPass>();
436 MachineFunctionPass::getAnalysisUsage(AU);
437 }
438 };
439 } // end anonymous namespace
440
441 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump()442 LLVM_DUMP_METHOD void PPCReduceCRLogicals::CRLogicalOpInfo::dump() {
443 dbgs() << "CRLogicalOpMI: ";
444 MI->dump();
445 dbgs() << "IsBinary: " << IsBinary << ", FeedsISEL: " << FeedsISEL;
446 dbgs() << ", FeedsBR: " << FeedsBR << ", FeedsLogical: ";
447 dbgs() << FeedsLogical << ", SingleUse: " << SingleUse;
448 dbgs() << ", DefsSingleUse: " << DefsSingleUse;
449 dbgs() << ", SubregDef1: " << SubregDef1 << ", SubregDef2: ";
450 dbgs() << SubregDef2 << ", ContainedInBlock: " << ContainedInBlock;
451 if (!IsNullary) {
452 dbgs() << "\nDefs:\n";
453 TrueDefs.first->dump();
454 }
455 if (IsBinary)
456 TrueDefs.second->dump();
457 dbgs() << "\n";
458 if (CopyDefs.first) {
459 dbgs() << "CopyDef1: ";
460 CopyDefs.first->dump();
461 }
462 if (CopyDefs.second) {
463 dbgs() << "CopyDef2: ";
464 CopyDefs.second->dump();
465 }
466 }
467 #endif
468
469 PPCReduceCRLogicals::CRLogicalOpInfo
createCRLogicalOpInfo(MachineInstr & MIParam)470 PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
471 CRLogicalOpInfo Ret;
472 Ret.MI = &MIParam;
473 // Get the defs
474 if (isNullary(MIParam)) {
475 Ret.IsNullary = 1;
476 Ret.TrueDefs = std::make_pair(nullptr, nullptr);
477 Ret.CopyDefs = std::make_pair(nullptr, nullptr);
478 } else {
479 MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(),
480 Ret.SubregDef1, Ret.CopyDefs.first);
481 Ret.SubregDef1 = MIParam.getOperand(1).getSubReg();
482 assert(Def1 && "Must be able to find a definition of operand 1.");
483 Ret.DefsSingleUse &=
484 MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg());
485 Ret.DefsSingleUse &=
486 MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg());
487 if (isBinary(MIParam)) {
488 Ret.IsBinary = 1;
489 MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(),
490 Ret.SubregDef2,
491 Ret.CopyDefs.second);
492 Ret.SubregDef2 = MIParam.getOperand(2).getSubReg();
493 assert(Def2 && "Must be able to find a definition of operand 2.");
494 Ret.DefsSingleUse &=
495 MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg());
496 Ret.DefsSingleUse &=
497 MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg());
498 Ret.TrueDefs = std::make_pair(Def1, Def2);
499 } else {
500 Ret.TrueDefs = std::make_pair(Def1, nullptr);
501 Ret.CopyDefs.second = nullptr;
502 }
503 }
504
505 Ret.ContainedInBlock = 1;
506 // Get the uses
507 for (MachineInstr &UseMI :
508 MRI->use_nodbg_instructions(MIParam.getOperand(0).getReg())) {
509 unsigned Opc = UseMI.getOpcode();
510 if (Opc == PPC::ISEL || Opc == PPC::ISEL8)
511 Ret.FeedsISEL = 1;
512 if (Opc == PPC::BC || Opc == PPC::BCn || Opc == PPC::BCLR ||
513 Opc == PPC::BCLRn)
514 Ret.FeedsBR = 1;
515 Ret.FeedsLogical = isCRLogical(UseMI);
516 if (UseMI.getParent() != MIParam.getParent())
517 Ret.ContainedInBlock = 0;
518 }
519 Ret.SingleUse = MRI->hasOneNonDBGUse(MIParam.getOperand(0).getReg()) ? 1 : 0;
520
521 // We now know whether all the uses of the CR logical are in the same block.
522 if (!Ret.IsNullary) {
523 Ret.ContainedInBlock &=
524 (MIParam.getParent() == Ret.TrueDefs.first->getParent());
525 if (Ret.IsBinary)
526 Ret.ContainedInBlock &=
527 (MIParam.getParent() == Ret.TrueDefs.second->getParent());
528 }
529 LLVM_DEBUG(Ret.dump());
530 if (Ret.IsBinary && Ret.ContainedInBlock && Ret.SingleUse) {
531 NumContainedSingleUseBinOps++;
532 if (Ret.FeedsBR && Ret.DefsSingleUse)
533 NumToSplitBlocks++;
534 }
535 return Ret;
536 }
537
538 /// Looks through a COPY instruction to the actual definition of the CR-bit
539 /// register and returns the instruction that defines it.
540 /// FIXME: This currently handles what is by-far the most common case:
541 /// an instruction that defines a CR field followed by a single copy of a bit
542 /// from that field into a virtual register. If chains of copies need to be
543 /// handled, this should have a loop until a non-copy instruction is found.
lookThroughCRCopy(unsigned Reg,unsigned & Subreg,MachineInstr * & CpDef)544 MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg,
545 unsigned &Subreg,
546 MachineInstr *&CpDef) {
547 if (!Register::isVirtualRegister(Reg))
548 return nullptr;
549 MachineInstr *Copy = MRI->getVRegDef(Reg);
550 CpDef = Copy;
551 if (!Copy->isCopy())
552 return Copy;
553 Register CopySrc = Copy->getOperand(1).getReg();
554 if (!CopySrc.isVirtual()) {
555 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
556 // Loop backwards and return the first MI that modifies the physical CR Reg.
557 MachineBasicBlock::iterator Me = Copy, B = Copy->getParent()->begin();
558 while (Me != B)
559 if ((--Me)->modifiesRegister(CopySrc, TRI))
560 return &*Me;
561 return nullptr;
562 }
563 return MRI->getVRegDef(CopySrc);
564 }
565
initialize(MachineFunction & MFParam)566 void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) {
567 MF = &MFParam;
568 MRI = &MF->getRegInfo();
569 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
570 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
571
572 AllCRLogicalOps.clear();
573 }
574
575 /// Contains all the implemented transformations on CR logical operations.
576 /// For example, a binary CR logical can be used to split a block on its inputs,
577 /// a unary CR logical might be used to change the condition code on a
578 /// comparison feeding it. A nullary CR logical might simply be removable
579 /// if the user of the bit it [un]sets can be transformed.
handleCROp(unsigned Idx)580 bool PPCReduceCRLogicals::handleCROp(unsigned Idx) {
581 // We can definitely split a block on the inputs to a binary CR operation
582 // whose defs and (single) use are within the same block.
583 bool Changed = false;
584 CRLogicalOpInfo CRI = AllCRLogicalOps[Idx];
585 if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR &&
586 CRI.DefsSingleUse) {
587 Changed = splitBlockOnBinaryCROp(CRI);
588 if (Changed)
589 NumBlocksSplitOnBinaryCROp++;
590 }
591 return Changed;
592 }
593
594 /// Splits a block that contains a CR-logical operation that feeds a branch
595 /// and whose operands are produced within the block.
596 /// Example:
597 /// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
598 /// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
599 /// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
600 /// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
601 /// %vr9<def> = CROR %vr6<kill>, %vr8<kill>; CRBITRC:%vr9,%vr6,%vr8
602 /// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
603 /// Becomes:
604 /// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
605 /// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
606 /// BC %vr6<kill>, <BB#2>; CRBITRC:%vr6
607 ///
608 /// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
609 /// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
610 /// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
splitBlockOnBinaryCROp(CRLogicalOpInfo & CRI)611 bool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI) {
612 if (CRI.CopyDefs.first == CRI.CopyDefs.second) {
613 LLVM_DEBUG(dbgs() << "Unable to split as the two operands are the same\n");
614 NumNotSplitIdenticalOperands++;
615 return false;
616 }
617 if (CRI.TrueDefs.first->isCopy() || CRI.TrueDefs.second->isCopy() ||
618 CRI.TrueDefs.first->isPHI() || CRI.TrueDefs.second->isPHI()) {
619 LLVM_DEBUG(
620 dbgs() << "Unable to split because one of the operands is a PHI or "
621 "chain of copies.\n");
622 NumNotSplitChainCopies++;
623 return false;
624 }
625 // Note: keep in sync with computeBranchTargetAndInversion().
626 if (CRI.MI->getOpcode() != PPC::CROR &&
627 CRI.MI->getOpcode() != PPC::CRAND &&
628 CRI.MI->getOpcode() != PPC::CRNOR &&
629 CRI.MI->getOpcode() != PPC::CRNAND &&
630 CRI.MI->getOpcode() != PPC::CRORC &&
631 CRI.MI->getOpcode() != PPC::CRANDC) {
632 LLVM_DEBUG(dbgs() << "Unable to split blocks on this opcode.\n");
633 NumNotSplitWrongOpcode++;
634 return false;
635 }
636 LLVM_DEBUG(dbgs() << "Splitting the following CR op:\n"; CRI.dump());
637 MachineBasicBlock::iterator Def1It = CRI.TrueDefs.first;
638 MachineBasicBlock::iterator Def2It = CRI.TrueDefs.second;
639
640 bool UsingDef1 = false;
641 MachineInstr *SplitBefore = &*Def2It;
642 for (auto E = CRI.MI->getParent()->end(); Def2It != E; ++Def2It) {
643 if (Def1It == Def2It) { // Def2 comes before Def1.
644 SplitBefore = &*Def1It;
645 UsingDef1 = true;
646 break;
647 }
648 }
649
650 LLVM_DEBUG(dbgs() << "We will split the following block:\n";);
651 LLVM_DEBUG(CRI.MI->getParent()->dump());
652 LLVM_DEBUG(dbgs() << "Before instruction:\n"; SplitBefore->dump());
653
654 // Get the branch instruction.
655 MachineInstr *Branch =
656 MRI->use_nodbg_begin(CRI.MI->getOperand(0).getReg())->getParent();
657
658 // We want the new block to have no code in it other than the definition
659 // of the input to the CR logical and the CR logical itself. So we move
660 // those to the bottom of the block (just before the branch). Then we
661 // will split before the CR logical.
662 MachineBasicBlock *MBB = SplitBefore->getParent();
663 auto FirstTerminator = MBB->getFirstTerminator();
664 MachineBasicBlock::iterator FirstInstrToMove =
665 UsingDef1 ? CRI.TrueDefs.first : CRI.TrueDefs.second;
666 MachineBasicBlock::iterator SecondInstrToMove =
667 UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second;
668
669 // The instructions that need to be moved are not guaranteed to be
670 // contiguous. Move them individually.
671 // FIXME: If one of the operands is a chain of (single use) copies, they
672 // can all be moved and we can still split.
673 MBB->splice(FirstTerminator, MBB, FirstInstrToMove);
674 if (FirstInstrToMove != SecondInstrToMove)
675 MBB->splice(FirstTerminator, MBB, SecondInstrToMove);
676 MBB->splice(FirstTerminator, MBB, CRI.MI);
677
678 unsigned Opc = CRI.MI->getOpcode();
679 bool InvertOrigBranch, InvertNewBranch, TargetIsFallThrough;
680 computeBranchTargetAndInversion(Opc, Branch->getOpcode(), UsingDef1,
681 InvertNewBranch, InvertOrigBranch,
682 TargetIsFallThrough);
683 MachineInstr *NewCond = CRI.CopyDefs.first;
684 MachineInstr *SplitCond = CRI.CopyDefs.second;
685 if (!UsingDef1) {
686 std::swap(NewCond, SplitCond);
687 std::swap(CRI.SubregDef1, CRI.SubregDef2);
688 }
689 LLVM_DEBUG(dbgs() << "We will " << (InvertNewBranch ? "invert" : "copy"));
690 LLVM_DEBUG(dbgs() << " the original branch and the target is the "
691 << (TargetIsFallThrough ? "fallthrough block\n"
692 : "orig. target block\n"));
693 LLVM_DEBUG(dbgs() << "Original branch instruction: "; Branch->dump());
694 BlockSplitInfo BSI{
695 Branch, SplitBefore, SplitCond, CRI.SubregDef1,
696 CRI.SubregDef2, InvertNewBranch, InvertOrigBranch, TargetIsFallThrough,
697 MBPI, CRI.MI, NewCond};
698 bool Changed = splitMBB(BSI);
699 // If we've split on a CR logical that is fed by a CR logical,
700 // recompute the source CR logical as it may be usable for splitting.
701 if (Changed) {
702 bool Input1CRlogical =
703 CRI.TrueDefs.first && isCRLogical(*CRI.TrueDefs.first);
704 bool Input2CRlogical =
705 CRI.TrueDefs.second && isCRLogical(*CRI.TrueDefs.second);
706 if (Input1CRlogical)
707 AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.first));
708 if (Input2CRlogical)
709 AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.second));
710 }
711 return Changed;
712 }
713
collectCRLogicals()714 void PPCReduceCRLogicals::collectCRLogicals() {
715 for (MachineBasicBlock &MBB : *MF) {
716 for (MachineInstr &MI : MBB) {
717 if (isCRLogical(MI)) {
718 AllCRLogicalOps.push_back(createCRLogicalOpInfo(MI));
719 TotalCRLogicals++;
720 if (AllCRLogicalOps.back().IsNullary)
721 TotalNullaryCRLogicals++;
722 else if (AllCRLogicalOps.back().IsBinary)
723 TotalBinaryCRLogicals++;
724 else
725 TotalUnaryCRLogicals++;
726 }
727 }
728 }
729 }
730
731 INITIALIZE_PASS_BEGIN(PPCReduceCRLogicals, DEBUG_TYPE,
732 "PowerPC Reduce CR logical Operation", false, false)
733 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
734 INITIALIZE_PASS_END(PPCReduceCRLogicals, DEBUG_TYPE,
735 "PowerPC Reduce CR logical Operation", false, false)
736
737 char PPCReduceCRLogicals::ID = 0;
738 FunctionPass*
createPPCReduceCRLogicalsPass()739 llvm::createPPCReduceCRLogicalsPass() { return new PPCReduceCRLogicals(); }
740