xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // A pre-emit peephole for catching opportunities introduced by late passes such
10 // as MachineBlockPlacement.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPC.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCSubtarget.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/CodeGen/LivePhysRegs.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/RegisterScavenging.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/Debug.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "ppc-pre-emit-peephole"
31 
32 STATISTIC(NumRRConvertedInPreEmit,
33           "Number of r+r instructions converted to r+i in pre-emit peephole");
34 STATISTIC(NumRemovedInPreEmit,
35           "Number of instructions deleted in pre-emit peephole");
36 STATISTIC(NumberOfSelfCopies,
37           "Number of self copy instructions eliminated");
38 STATISTIC(NumFrameOffFoldInPreEmit,
39           "Number of folding frame offset by using r+r in pre-emit peephole");
40 STATISTIC(NumCmpsInPreEmit,
41           "Number of compares eliminated in pre-emit peephole");
42 
43 static cl::opt<bool>
44 EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
45                      cl::desc("enable PC Relative linker optimization"));
46 
47 static cl::opt<bool>
48 RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
49                    cl::desc("Run pre-emit peephole optimizations."));
50 
51 static cl::opt<uint64_t>
52 DSCRValue("ppc-set-dscr", cl::Hidden,
53           cl::desc("Set the Data Stream Control Register."));
54 
55 namespace {
56 
hasPCRelativeForm(MachineInstr & Use)57 static bool hasPCRelativeForm(MachineInstr &Use) {
58   switch (Use.getOpcode()) {
59   default:
60     return false;
61   case PPC::LBZ:
62   case PPC::LBZ8:
63   case PPC::LHA:
64   case PPC::LHA8:
65   case PPC::LHZ:
66   case PPC::LHZ8:
67   case PPC::LWZ:
68   case PPC::LWZ8:
69   case PPC::STB:
70   case PPC::STB8:
71   case PPC::STH:
72   case PPC::STH8:
73   case PPC::STW:
74   case PPC::STW8:
75   case PPC::LD:
76   case PPC::STD:
77   case PPC::LWA:
78   case PPC::LXSD:
79   case PPC::LXSSP:
80   case PPC::LXV:
81   case PPC::STXSD:
82   case PPC::STXSSP:
83   case PPC::STXV:
84   case PPC::LFD:
85   case PPC::LFS:
86   case PPC::STFD:
87   case PPC::STFS:
88   case PPC::DFLOADf32:
89   case PPC::DFLOADf64:
90   case PPC::DFSTOREf32:
91   case PPC::DFSTOREf64:
92     return true;
93   }
94 }
95 
96   class PPCPreEmitPeephole : public MachineFunctionPass {
97   public:
98     static char ID;
PPCPreEmitPeephole()99     PPCPreEmitPeephole() : MachineFunctionPass(ID) {
100       initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
101     }
102 
getAnalysisUsage(AnalysisUsage & AU) const103     void getAnalysisUsage(AnalysisUsage &AU) const override {
104       MachineFunctionPass::getAnalysisUsage(AU);
105     }
106 
getRequiredProperties() const107     MachineFunctionProperties getRequiredProperties() const override {
108       return MachineFunctionProperties().set(
109           MachineFunctionProperties::Property::NoVRegs);
110     }
111 
112     // This function removes any redundant load immediates. It has two level
113     // loops - The outer loop finds the load immediates BBI that could be used
114     // to replace following redundancy. The inner loop scans instructions that
115     // after BBI to find redundancy and update kill/dead flags accordingly. If
116     // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
117     // that modify the def register of BBI would break the scanning.
118     // DeadOrKillToUnset is a pointer to the previous operand that had the
119     // kill/dead flag set. It keeps track of the def register of BBI, the use
120     // registers of AfterBBIs and the def registers of AfterBBIs.
removeRedundantLIs(MachineBasicBlock & MBB,const TargetRegisterInfo * TRI)121     bool removeRedundantLIs(MachineBasicBlock &MBB,
122                             const TargetRegisterInfo *TRI) {
123       LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
124                  MBB.dump(); dbgs() << "\n");
125 
126       DenseSet<MachineInstr *> InstrsToErase;
127       for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
128         // Skip load immediate that is marked to be erased later because it
129         // cannot be used to replace any other instructions.
130         if (InstrsToErase.contains(&*BBI))
131           continue;
132         // Skip non-load immediate.
133         unsigned Opc = BBI->getOpcode();
134         if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
135             Opc != PPC::LIS8)
136           continue;
137         // Skip load immediate, where the operand is a relocation (e.g., $r3 =
138         // LI target-flags(ppc-lo) %const.0).
139         if (!BBI->getOperand(1).isImm())
140           continue;
141         assert(BBI->getOperand(0).isReg() &&
142                "Expected a register for the first operand");
143 
144         LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
145 
146         Register Reg = BBI->getOperand(0).getReg();
147         int64_t Imm = BBI->getOperand(1).getImm();
148         MachineOperand *DeadOrKillToUnset = nullptr;
149         if (BBI->getOperand(0).isDead()) {
150           DeadOrKillToUnset = &BBI->getOperand(0);
151           LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
152                             << " from load immediate " << *BBI
153                             << " is a unsetting candidate\n");
154         }
155         // This loop scans instructions after BBI to see if there is any
156         // redundant load immediate.
157         for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
158              ++AfterBBI) {
159           // Track the operand that kill Reg. We would unset the kill flag of
160           // the operand if there is a following redundant load immediate.
161           int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, TRI, true);
162 
163           // We can't just clear implicit kills, so if we encounter one, stop
164           // looking further.
165           if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
166             LLVM_DEBUG(dbgs()
167                        << "Encountered an implicit kill, cannot proceed: ");
168             LLVM_DEBUG(AfterBBI->dump());
169             break;
170           }
171 
172           if (KillIdx != -1) {
173             assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
174             DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
175             LLVM_DEBUG(dbgs()
176                        << " Kill flag of " << *DeadOrKillToUnset << " from "
177                        << *AfterBBI << " is a unsetting candidate\n");
178           }
179 
180           if (!AfterBBI->modifiesRegister(Reg, TRI))
181             continue;
182           // Finish scanning because Reg is overwritten by a non-load
183           // instruction.
184           if (AfterBBI->getOpcode() != Opc)
185             break;
186           assert(AfterBBI->getOperand(0).isReg() &&
187                  "Expected a register for the first operand");
188           // Finish scanning because Reg is overwritten by a relocation or a
189           // different value.
190           if (!AfterBBI->getOperand(1).isImm() ||
191               AfterBBI->getOperand(1).getImm() != Imm)
192             break;
193 
194           // It loads same immediate value to the same Reg, which is redundant.
195           // We would unset kill flag in previous Reg usage to extend live range
196           // of Reg first, then remove the redundancy.
197           if (DeadOrKillToUnset) {
198             LLVM_DEBUG(dbgs()
199                        << " Unset dead/kill flag of " << *DeadOrKillToUnset
200                        << " from " << *DeadOrKillToUnset->getParent());
201             if (DeadOrKillToUnset->isDef())
202               DeadOrKillToUnset->setIsDead(false);
203             else
204               DeadOrKillToUnset->setIsKill(false);
205           }
206           DeadOrKillToUnset =
207               AfterBBI->findRegisterDefOperand(Reg, TRI, true, true);
208           if (DeadOrKillToUnset)
209             LLVM_DEBUG(dbgs()
210                        << " Dead flag of " << *DeadOrKillToUnset << " from "
211                        << *AfterBBI << " is a unsetting candidate\n");
212           InstrsToErase.insert(&*AfterBBI);
213           LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
214                      AfterBBI->dump());
215         }
216       }
217 
218       for (MachineInstr *MI : InstrsToErase) {
219         MI->eraseFromParent();
220       }
221       NumRemovedInPreEmit += InstrsToErase.size();
222       return !InstrsToErase.empty();
223     }
224 
225     // Check if this instruction is a PLDpc that is part of a GOT indirect
226     // access.
isGOTPLDpc(MachineInstr & Instr)227     bool isGOTPLDpc(MachineInstr &Instr) {
228       if (Instr.getOpcode() != PPC::PLDpc)
229         return false;
230 
231       // The result must be a register.
232       const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
233       if (!LoadedAddressReg.isReg())
234         return false;
235 
236       // Make sure that this is a global symbol.
237       const MachineOperand &SymbolOp = Instr.getOperand(1);
238       if (!SymbolOp.isGlobal())
239         return false;
240 
241       // Finally return true only if the GOT flag is present.
242       return PPCInstrInfo::hasGOTFlag(SymbolOp.getTargetFlags());
243     }
244 
addLinkerOpt(MachineBasicBlock & MBB,const TargetRegisterInfo * TRI)245     bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
246       MachineFunction *MF = MBB.getParent();
247       // If the linker opt is disabled then just return.
248       if (!EnablePCRelLinkerOpt)
249         return false;
250 
251       // Add this linker opt only if we are using PC Relative memops.
252       if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
253         return false;
254 
255       // Struct to keep track of one def/use pair for a GOT indirect access.
256       struct GOTDefUsePair {
257         MachineBasicBlock::iterator DefInst;
258         MachineBasicBlock::iterator UseInst;
259         Register DefReg;
260         Register UseReg;
261         bool StillValid;
262       };
263       // Vector of def/ues pairs in this basic block.
264       SmallVector<GOTDefUsePair, 4> CandPairs;
265       SmallVector<GOTDefUsePair, 4> ValidPairs;
266       bool MadeChange = false;
267 
268       // Run through all of the instructions in the basic block and try to
269       // collect potential pairs of GOT indirect access instructions.
270       for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
271         // Look for the initial GOT indirect load.
272         if (isGOTPLDpc(*BBI)) {
273           GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
274                                     BBI->getOperand(0).getReg(),
275                                     PPC::NoRegister, true};
276           CandPairs.push_back(CurrentPair);
277           continue;
278         }
279 
280         // We haven't encountered any new PLD instructions, nothing to check.
281         if (CandPairs.empty())
282           continue;
283 
284         // Run through the candidate pairs and see if any of the registers
285         // defined in the PLD instructions are used by this instruction.
286         // Note: the size of CandPairs can change in the loop.
287         for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
288           GOTDefUsePair &Pair = CandPairs[Idx];
289           // The instruction does not use or modify this PLD's def reg,
290           // ignore it.
291           if (!BBI->readsRegister(Pair.DefReg, TRI) &&
292               !BBI->modifiesRegister(Pair.DefReg, TRI))
293             continue;
294 
295           // The use needs to be used in the address computation and not
296           // as the register being stored for a store.
297           const MachineOperand *UseOp =
298               hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
299 
300           // Check for a valid use.
301           if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
302               UseOp->isUse() && UseOp->isKill()) {
303             Pair.UseInst = BBI;
304             Pair.UseReg = BBI->getOperand(0).getReg();
305             ValidPairs.push_back(Pair);
306           }
307           CandPairs.erase(CandPairs.begin() + Idx);
308         }
309       }
310 
311       // Go through all of the pairs and check for any more valid uses.
312       for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
313         // We shouldn't be here if we don't have a valid pair.
314         assert(Pair->UseInst.isValid() && Pair->StillValid &&
315                "Kept an invalid def/use pair for GOT PCRel opt");
316         // We have found a potential pair. Search through the instructions
317         // between the def and the use to see if it is valid to mark this as a
318         // linker opt.
319         MachineBasicBlock::iterator BBI = Pair->DefInst;
320         ++BBI;
321         for (; BBI != Pair->UseInst; ++BBI) {
322           if (BBI->readsRegister(Pair->UseReg, TRI) ||
323               BBI->modifiesRegister(Pair->UseReg, TRI)) {
324             Pair->StillValid = false;
325             break;
326           }
327         }
328 
329         if (!Pair->StillValid)
330           continue;
331 
332         // The load/store instruction that uses the address from the PLD will
333         // either use a register (for a store) or define a register (for the
334         // load). That register will be added as an implicit def to the PLD
335         // and as an implicit use on the second memory op. This is a precaution
336         // to prevent future passes from using that register between the two
337         // instructions.
338         MachineOperand ImplDef =
339             MachineOperand::CreateReg(Pair->UseReg, true, true);
340         MachineOperand ImplUse =
341             MachineOperand::CreateReg(Pair->UseReg, false, true);
342         Pair->DefInst->addOperand(ImplDef);
343         Pair->UseInst->addOperand(ImplUse);
344 
345         // Create the symbol.
346         MCContext &Context = MF->getContext();
347         MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel");
348         MachineOperand PCRelLabel =
349             MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
350         Pair->DefInst->addOperand(*MF, PCRelLabel);
351         Pair->UseInst->addOperand(*MF, PCRelLabel);
352         MadeChange |= true;
353       }
354       return MadeChange;
355     }
356 
357     // This function removes redundant pairs of accumulator prime/unprime
358     // instructions. In some situations, it's possible the compiler inserts an
359     // accumulator prime instruction followed by an unprime instruction (e.g.
360     // when we store an accumulator after restoring it from a spill). If the
361     // accumulator is not used between the two, they can be removed. This
362     // function removes these redundant pairs from basic blocks.
363     // The algorithm is quite straightforward - every time we encounter a prime
364     // instruction, the primed register is added to a candidate set. Any use
365     // other than a prime removes the candidate from the set and any de-prime
366     // of a current candidate marks both the prime and de-prime for removal.
367     // This way we ensure we only remove prime/de-prime *pairs* with no
368     // intervening uses.
removeAccPrimeUnprime(MachineBasicBlock & MBB)369     bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
370       DenseSet<MachineInstr *> InstrsToErase;
371       // Initially, none of the acc registers are candidates.
372       SmallVector<MachineInstr *, 8> Candidates(
373           PPC::UACCRCRegClass.getNumRegs(), nullptr);
374 
375       for (MachineInstr &BBI : MBB.instrs()) {
376         unsigned Opc = BBI.getOpcode();
377         // If we are visiting a xxmtacc instruction, we add it and its operand
378         // register to the candidate set.
379         if (Opc == PPC::XXMTACC) {
380           Register Acc = BBI.getOperand(0).getReg();
381           assert(PPC::ACCRCRegClass.contains(Acc) &&
382                  "Unexpected register for XXMTACC");
383           Candidates[Acc - PPC::ACC0] = &BBI;
384         }
385         // If we are visiting a xxmfacc instruction and its operand register is
386         // in the candidate set, we mark the two instructions for removal.
387         else if (Opc == PPC::XXMFACC) {
388           Register Acc = BBI.getOperand(0).getReg();
389           assert(PPC::ACCRCRegClass.contains(Acc) &&
390                  "Unexpected register for XXMFACC");
391           if (!Candidates[Acc - PPC::ACC0])
392             continue;
393           InstrsToErase.insert(&BBI);
394           InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
395         }
396         // If we are visiting an instruction using an accumulator register
397         // as operand, we remove it from the candidate set.
398         else {
399           for (MachineOperand &Operand : BBI.operands()) {
400             if (!Operand.isReg())
401               continue;
402             Register Reg = Operand.getReg();
403             if (PPC::ACCRCRegClass.contains(Reg))
404               Candidates[Reg - PPC::ACC0] = nullptr;
405           }
406         }
407       }
408 
409       for (MachineInstr *MI : InstrsToErase)
410         MI->eraseFromParent();
411       NumRemovedInPreEmit += InstrsToErase.size();
412       return !InstrsToErase.empty();
413     }
414 
runOnMachineFunction(MachineFunction & MF)415     bool runOnMachineFunction(MachineFunction &MF) override {
416       // If the user wants to set the DSCR using command-line options,
417       // load in the specified value at the start of main.
418       if (DSCRValue.getNumOccurrences() > 0 && MF.getName() == "main" &&
419           MF.getFunction().hasExternalLinkage()) {
420         DSCRValue = (uint32_t)(DSCRValue & 0x01FFFFFF); // 25-bit DSCR mask
421         RegScavenger RS;
422         MachineBasicBlock &MBB = MF.front();
423         // Find an unused GPR according to register liveness
424         RS.enterBasicBlock(MBB);
425         unsigned InDSCR = RS.FindUnusedReg(&PPC::GPRCRegClass);
426         if (InDSCR) {
427           const PPCInstrInfo *TII =
428               MF.getSubtarget<PPCSubtarget>().getInstrInfo();
429           DebugLoc dl;
430           MachineBasicBlock::iterator IP = MBB.begin(); // Insert Point
431           // Copy the 32-bit DSCRValue integer into the GPR InDSCR using LIS and
432           // ORI, then move to DSCR. If the requested DSCR value is contained
433           // in a 16-bit signed number, we can emit a single `LI`, but the
434           // impact of saving one instruction in one function does not warrant
435           // any additional complexity in the logic here.
436           BuildMI(MBB, IP, dl, TII->get(PPC::LIS), InDSCR)
437               .addImm(DSCRValue >> 16);
438           BuildMI(MBB, IP, dl, TII->get(PPC::ORI), InDSCR)
439               .addReg(InDSCR)
440               .addImm(DSCRValue & 0xFFFF);
441           BuildMI(MBB, IP, dl, TII->get(PPC::MTUDSCR))
442               .addReg(InDSCR, RegState::Kill);
443         } else
444           errs() << "Warning: Ran out of registers - Unable to set DSCR as "
445                     "requested";
446       }
447 
448       if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
449         // Remove UNENCODED_NOP even when this pass is disabled.
450         // This needs to be done unconditionally so we don't emit zeros
451         // in the instruction stream.
452         SmallVector<MachineInstr *, 4> InstrsToErase;
453         for (MachineBasicBlock &MBB : MF)
454           for (MachineInstr &MI : MBB)
455             if (MI.getOpcode() == PPC::UNENCODED_NOP)
456               InstrsToErase.push_back(&MI);
457         for (MachineInstr *MI : InstrsToErase)
458           MI->eraseFromParent();
459         return false;
460       }
461       bool Changed = false;
462       const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
463       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
464       SmallVector<MachineInstr *, 4> InstrsToErase;
465       for (MachineBasicBlock &MBB : MF) {
466         Changed |= removeRedundantLIs(MBB, TRI);
467         Changed |= addLinkerOpt(MBB, TRI);
468         Changed |= removeAccPrimeUnprime(MBB);
469         for (MachineInstr &MI : MBB) {
470           unsigned Opc = MI.getOpcode();
471           if (Opc == PPC::UNENCODED_NOP) {
472             InstrsToErase.push_back(&MI);
473             continue;
474           }
475           // Detect self copies - these can result from running AADB.
476           if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
477             const MCInstrDesc &MCID = TII->get(Opc);
478             if (MCID.getNumOperands() == 3 &&
479                 MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
480                 MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
481               NumberOfSelfCopies++;
482               LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
483               LLVM_DEBUG(MI.dump());
484               InstrsToErase.push_back(&MI);
485               continue;
486             }
487             else if (MCID.getNumOperands() == 2 &&
488                      MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
489               NumberOfSelfCopies++;
490               LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
491               LLVM_DEBUG(MI.dump());
492               InstrsToErase.push_back(&MI);
493               continue;
494             }
495           }
496           MachineInstr *DefMIToErase = nullptr;
497           SmallSet<Register, 4> UpdatedRegs;
498           if (TII->convertToImmediateForm(MI, UpdatedRegs, &DefMIToErase)) {
499             Changed = true;
500             NumRRConvertedInPreEmit++;
501             LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
502             LLVM_DEBUG(MI.dump());
503             if (DefMIToErase) {
504               InstrsToErase.push_back(DefMIToErase);
505             }
506           }
507           if (TII->foldFrameOffset(MI)) {
508             Changed = true;
509             NumFrameOffFoldInPreEmit++;
510             LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
511             LLVM_DEBUG(MI.dump());
512           }
513           if (TII->optimizeCmpPostRA(MI)) {
514             Changed = true;
515             NumCmpsInPreEmit++;
516             LLVM_DEBUG(dbgs() << "Optimize compare by using record form: ");
517             LLVM_DEBUG(MI.dump());
518             InstrsToErase.push_back(&MI);
519           }
520         }
521 
522         // Eliminate conditional branch based on a constant CR bit by
523         // CRSET or CRUNSET. We eliminate the conditional branch or
524         // convert it into an unconditional branch. Also, if the CR bit
525         // is not used by other instructions, we eliminate CRSET as well.
526         auto I = MBB.getFirstInstrTerminator();
527         if (I == MBB.instr_end())
528           continue;
529         MachineInstr *Br = &*I;
530         if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
531           continue;
532         MachineInstr *CRSetMI = nullptr;
533         Register CRBit = Br->getOperand(0).getReg();
534         unsigned CRReg = getCRFromCRBit(CRBit);
535         bool SeenUse = false;
536         MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
537         for (It++; It != Er; It++) {
538           if (It->modifiesRegister(CRBit, TRI)) {
539             if ((It->getOpcode() == PPC::CRUNSET ||
540                  It->getOpcode() == PPC::CRSET) &&
541                 It->getOperand(0).getReg() == CRBit)
542               CRSetMI = &*It;
543             break;
544           }
545           if (It->readsRegister(CRBit, TRI))
546             SeenUse = true;
547         }
548         if (!CRSetMI) continue;
549 
550         unsigned CRSetOp = CRSetMI->getOpcode();
551         if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
552             (Br->getOpcode() == PPC::BC  && CRSetOp == PPC::CRUNSET)) {
553           // Remove this branch since it cannot be taken.
554           InstrsToErase.push_back(Br);
555           MBB.removeSuccessor(Br->getOperand(1).getMBB());
556         }
557         else {
558           // This conditional branch is always taken. So, remove all branches
559           // and insert an unconditional branch to the destination of this.
560           MachineBasicBlock::iterator It = Br, Er = MBB.end();
561           for (; It != Er; It++) {
562             if (It->isDebugInstr()) continue;
563             assert(It->isTerminator() && "Non-terminator after a terminator");
564             InstrsToErase.push_back(&*It);
565           }
566           if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
567             ArrayRef<MachineOperand> NoCond;
568             TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
569                               NoCond, Br->getDebugLoc());
570           }
571           for (auto &Succ : MBB.successors())
572             if (Succ != Br->getOperand(1).getMBB()) {
573               MBB.removeSuccessor(Succ);
574               break;
575             }
576         }
577 
578         // If the CRBit is not used by another instruction, we can eliminate
579         // CRSET/CRUNSET instruction.
580         if (!SeenUse) {
581           // We need to check use of the CRBit in successors.
582           for (auto &SuccMBB : MBB.successors())
583             if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
584               SeenUse = true;
585               break;
586             }
587           if (!SeenUse)
588             InstrsToErase.push_back(CRSetMI);
589         }
590       }
591       for (MachineInstr *MI : InstrsToErase) {
592         LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
593         LLVM_DEBUG(MI->dump());
594         MI->eraseFromParent();
595         NumRemovedInPreEmit++;
596       }
597       return Changed;
598     }
599   };
600 }
601 
602 INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole",
603                 false, false)
604 char PPCPreEmitPeephole::ID = 0;
605 
createPPCPreEmitPeepholePass()606 FunctionPass *llvm::createPPCPreEmitPeepholePass() {
607   return new PPCPreEmitPeephole();
608 }
609