xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // A pre-emit peephole for catching opportunities introduced by late passes such
10 // as MachineBlockPlacement.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPC.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCSubtarget.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/CodeGen/LivePhysRegs.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/RegisterScavenging.h"
23 #include "llvm/MC/MCContext.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Debug.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "ppc-pre-emit-peephole"
30 
31 STATISTIC(NumRRConvertedInPreEmit,
32           "Number of r+r instructions converted to r+i in pre-emit peephole");
33 STATISTIC(NumRemovedInPreEmit,
34           "Number of instructions deleted in pre-emit peephole");
35 STATISTIC(NumberOfSelfCopies,
36           "Number of self copy instructions eliminated");
37 STATISTIC(NumFrameOffFoldInPreEmit,
38           "Number of folding frame offset by using r+r in pre-emit peephole");
39 STATISTIC(NumCmpsInPreEmit,
40           "Number of compares eliminated in pre-emit peephole");
41 
42 static cl::opt<bool>
43 EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
44                      cl::desc("enable PC Relative linker optimization"));
45 
46 static cl::opt<bool>
47 RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
48                    cl::desc("Run pre-emit peephole optimizations."));
49 
50 static cl::opt<uint64_t>
51 DSCRValue("ppc-set-dscr", cl::Hidden,
52           cl::desc("Set the Data Stream Control Register."));
53 
54 namespace {
55 
hasPCRelativeForm(MachineInstr & Use)56 static bool hasPCRelativeForm(MachineInstr &Use) {
57   switch (Use.getOpcode()) {
58   default:
59     return false;
60   case PPC::LBZ:
61   case PPC::LBZ8:
62   case PPC::LHA:
63   case PPC::LHA8:
64   case PPC::LHZ:
65   case PPC::LHZ8:
66   case PPC::LWZ:
67   case PPC::LWZ8:
68   case PPC::STB:
69   case PPC::STB8:
70   case PPC::STH:
71   case PPC::STH8:
72   case PPC::STW:
73   case PPC::STW8:
74   case PPC::LD:
75   case PPC::STD:
76   case PPC::LWA:
77   case PPC::LXSD:
78   case PPC::LXSSP:
79   case PPC::LXV:
80   case PPC::STXSD:
81   case PPC::STXSSP:
82   case PPC::STXV:
83   case PPC::LFD:
84   case PPC::LFS:
85   case PPC::STFD:
86   case PPC::STFS:
87   case PPC::DFLOADf32:
88   case PPC::DFLOADf64:
89   case PPC::DFSTOREf32:
90   case PPC::DFSTOREf64:
91     return true;
92   }
93 }
94 
95   class PPCPreEmitPeephole : public MachineFunctionPass {
96   public:
97     static char ID;
PPCPreEmitPeephole()98     PPCPreEmitPeephole() : MachineFunctionPass(ID) {}
99 
getAnalysisUsage(AnalysisUsage & AU) const100     void getAnalysisUsage(AnalysisUsage &AU) const override {
101       MachineFunctionPass::getAnalysisUsage(AU);
102     }
103 
getRequiredProperties() const104     MachineFunctionProperties getRequiredProperties() const override {
105       return MachineFunctionProperties().setNoVRegs();
106     }
107 
108     // This function removes any redundant load immediates. It has two level
109     // loops - The outer loop finds the load immediates BBI that could be used
110     // to replace following redundancy. The inner loop scans instructions that
111     // after BBI to find redundancy and update kill/dead flags accordingly. If
112     // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
113     // that modify the def register of BBI would break the scanning.
114     // DeadOrKillToUnset is a pointer to the previous operand that had the
115     // kill/dead flag set. It keeps track of the def register of BBI, the use
116     // registers of AfterBBIs and the def registers of AfterBBIs.
removeRedundantLIs(MachineBasicBlock & MBB,const TargetRegisterInfo * TRI)117     bool removeRedundantLIs(MachineBasicBlock &MBB,
118                             const TargetRegisterInfo *TRI) {
119       LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
120                  MBB.dump(); dbgs() << "\n");
121 
122       DenseSet<MachineInstr *> InstrsToErase;
123       for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
124         // Skip load immediate that is marked to be erased later because it
125         // cannot be used to replace any other instructions.
126         if (InstrsToErase.contains(&*BBI))
127           continue;
128         // Skip non-load immediate.
129         unsigned Opc = BBI->getOpcode();
130         if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
131             Opc != PPC::LIS8)
132           continue;
133         // Skip load immediate, where the operand is a relocation (e.g., $r3 =
134         // LI target-flags(ppc-lo) %const.0).
135         if (!BBI->getOperand(1).isImm())
136           continue;
137         assert(BBI->getOperand(0).isReg() &&
138                "Expected a register for the first operand");
139 
140         LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
141 
142         Register Reg = BBI->getOperand(0).getReg();
143         int64_t Imm = BBI->getOperand(1).getImm();
144         MachineOperand *DeadOrKillToUnset = nullptr;
145         if (BBI->getOperand(0).isDead()) {
146           DeadOrKillToUnset = &BBI->getOperand(0);
147           LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
148                             << " from load immediate " << *BBI
149                             << " is a unsetting candidate\n");
150         }
151         // This loop scans instructions after BBI to see if there is any
152         // redundant load immediate.
153         for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
154              ++AfterBBI) {
155           // Track the operand that kill Reg. We would unset the kill flag of
156           // the operand if there is a following redundant load immediate.
157           int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, TRI, true);
158 
159           // We can't just clear implicit kills, so if we encounter one, stop
160           // looking further.
161           if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
162             LLVM_DEBUG(dbgs()
163                        << "Encountered an implicit kill, cannot proceed: ");
164             LLVM_DEBUG(AfterBBI->dump());
165             break;
166           }
167 
168           if (KillIdx != -1) {
169             assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
170             DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
171             LLVM_DEBUG(dbgs()
172                        << " Kill flag of " << *DeadOrKillToUnset << " from "
173                        << *AfterBBI << " is a unsetting candidate\n");
174           }
175 
176           if (!AfterBBI->modifiesRegister(Reg, TRI))
177             continue;
178           // Finish scanning because Reg is overwritten by a non-load
179           // instruction.
180           if (AfterBBI->getOpcode() != Opc)
181             break;
182           assert(AfterBBI->getOperand(0).isReg() &&
183                  "Expected a register for the first operand");
184           // Finish scanning because Reg is overwritten by a relocation or a
185           // different value.
186           if (!AfterBBI->getOperand(1).isImm() ||
187               AfterBBI->getOperand(1).getImm() != Imm)
188             break;
189 
190           // It loads same immediate value to the same Reg, which is redundant.
191           // We would unset kill flag in previous Reg usage to extend live range
192           // of Reg first, then remove the redundancy.
193           if (DeadOrKillToUnset) {
194             LLVM_DEBUG(dbgs()
195                        << " Unset dead/kill flag of " << *DeadOrKillToUnset
196                        << " from " << *DeadOrKillToUnset->getParent());
197             if (DeadOrKillToUnset->isDef())
198               DeadOrKillToUnset->setIsDead(false);
199             else
200               DeadOrKillToUnset->setIsKill(false);
201           }
202           DeadOrKillToUnset =
203               AfterBBI->findRegisterDefOperand(Reg, TRI, true, true);
204           if (DeadOrKillToUnset)
205             LLVM_DEBUG(dbgs()
206                        << " Dead flag of " << *DeadOrKillToUnset << " from "
207                        << *AfterBBI << " is a unsetting candidate\n");
208           InstrsToErase.insert(&*AfterBBI);
209           LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
210                      AfterBBI->dump());
211         }
212       }
213 
214       for (MachineInstr *MI : InstrsToErase) {
215         MI->eraseFromParent();
216       }
217       NumRemovedInPreEmit += InstrsToErase.size();
218       return !InstrsToErase.empty();
219     }
220 
221     // Check if this instruction is a PLDpc that is part of a GOT indirect
222     // access.
isGOTPLDpc(MachineInstr & Instr)223     bool isGOTPLDpc(MachineInstr &Instr) {
224       if (Instr.getOpcode() != PPC::PLDpc)
225         return false;
226 
227       // The result must be a register.
228       const MachineOperand &LoadedAddressReg = Instr.getOperand(0);
229       if (!LoadedAddressReg.isReg())
230         return false;
231 
232       // Make sure that this is a global symbol.
233       const MachineOperand &SymbolOp = Instr.getOperand(1);
234       if (!SymbolOp.isGlobal())
235         return false;
236 
237       // Finally return true only if the GOT flag is present.
238       return PPCInstrInfo::hasGOTFlag(SymbolOp.getTargetFlags());
239     }
240 
addLinkerOpt(MachineBasicBlock & MBB,const TargetRegisterInfo * TRI)241     bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) {
242       MachineFunction *MF = MBB.getParent();
243       // If the linker opt is disabled then just return.
244       if (!EnablePCRelLinkerOpt)
245         return false;
246 
247       // Add this linker opt only if we are using PC Relative memops.
248       if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls())
249         return false;
250 
251       // Struct to keep track of one def/use pair for a GOT indirect access.
252       struct GOTDefUsePair {
253         MachineBasicBlock::iterator DefInst;
254         MachineBasicBlock::iterator UseInst;
255         Register DefReg;
256         Register UseReg;
257         bool StillValid;
258       };
259       // Vector of def/ues pairs in this basic block.
260       SmallVector<GOTDefUsePair, 4> CandPairs;
261       SmallVector<GOTDefUsePair, 4> ValidPairs;
262       bool MadeChange = false;
263 
264       // Run through all of the instructions in the basic block and try to
265       // collect potential pairs of GOT indirect access instructions.
266       for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
267         // Look for the initial GOT indirect load.
268         if (isGOTPLDpc(*BBI)) {
269           GOTDefUsePair CurrentPair{BBI, MachineBasicBlock::iterator(),
270                                     BBI->getOperand(0).getReg(),
271                                     PPC::NoRegister, true};
272           CandPairs.push_back(CurrentPair);
273           continue;
274         }
275 
276         // We haven't encountered any new PLD instructions, nothing to check.
277         if (CandPairs.empty())
278           continue;
279 
280         // Run through the candidate pairs and see if any of the registers
281         // defined in the PLD instructions are used by this instruction.
282         // Note: the size of CandPairs can change in the loop.
283         for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) {
284           GOTDefUsePair &Pair = CandPairs[Idx];
285           // The instruction does not use or modify this PLD's def reg,
286           // ignore it.
287           if (!BBI->readsRegister(Pair.DefReg, TRI) &&
288               !BBI->modifiesRegister(Pair.DefReg, TRI))
289             continue;
290 
291           // The use needs to be used in the address computation and not
292           // as the register being stored for a store.
293           const MachineOperand *UseOp =
294               hasPCRelativeForm(*BBI) ? &BBI->getOperand(2) : nullptr;
295 
296           // Check for a valid use.
297           if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg &&
298               UseOp->isUse() && UseOp->isKill()) {
299             Pair.UseInst = BBI;
300             Pair.UseReg = BBI->getOperand(0).getReg();
301             ValidPairs.push_back(Pair);
302           }
303           CandPairs.erase(CandPairs.begin() + Idx);
304         }
305       }
306 
307       // Go through all of the pairs and check for any more valid uses.
308       for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) {
309         // We shouldn't be here if we don't have a valid pair.
310         assert(Pair->UseInst.isValid() && Pair->StillValid &&
311                "Kept an invalid def/use pair for GOT PCRel opt");
312         // We have found a potential pair. Search through the instructions
313         // between the def and the use to see if it is valid to mark this as a
314         // linker opt.
315         MachineBasicBlock::iterator BBI = Pair->DefInst;
316         ++BBI;
317         for (; BBI != Pair->UseInst; ++BBI) {
318           if (BBI->readsRegister(Pair->UseReg, TRI) ||
319               BBI->modifiesRegister(Pair->UseReg, TRI)) {
320             Pair->StillValid = false;
321             break;
322           }
323         }
324 
325         if (!Pair->StillValid)
326           continue;
327 
328         // The load/store instruction that uses the address from the PLD will
329         // either use a register (for a store) or define a register (for the
330         // load). That register will be added as an implicit def to the PLD
331         // and as an implicit use on the second memory op. This is a precaution
332         // to prevent future passes from using that register between the two
333         // instructions.
334         MachineOperand ImplDef =
335             MachineOperand::CreateReg(Pair->UseReg, true, true);
336         MachineOperand ImplUse =
337             MachineOperand::CreateReg(Pair->UseReg, false, true);
338         Pair->DefInst->addOperand(ImplDef);
339         Pair->UseInst->addOperand(ImplUse);
340 
341         // Create the symbol.
342         MCContext &Context = MF->getContext();
343         MCSymbol *Symbol = Context.createNamedTempSymbol("pcrel");
344         MachineOperand PCRelLabel =
345             MachineOperand::CreateMCSymbol(Symbol, PPCII::MO_PCREL_OPT_FLAG);
346         Pair->DefInst->addOperand(*MF, PCRelLabel);
347         Pair->UseInst->addOperand(*MF, PCRelLabel);
348         MadeChange |= true;
349       }
350       return MadeChange;
351     }
352 
353     // This function removes redundant pairs of accumulator prime/unprime
354     // instructions. In some situations, it's possible the compiler inserts an
355     // accumulator prime instruction followed by an unprime instruction (e.g.
356     // when we store an accumulator after restoring it from a spill). If the
357     // accumulator is not used between the two, they can be removed. This
358     // function removes these redundant pairs from basic blocks.
359     // The algorithm is quite straightforward - every time we encounter a prime
360     // instruction, the primed register is added to a candidate set. Any use
361     // other than a prime removes the candidate from the set and any de-prime
362     // of a current candidate marks both the prime and de-prime for removal.
363     // This way we ensure we only remove prime/de-prime *pairs* with no
364     // intervening uses.
removeAccPrimeUnprime(MachineBasicBlock & MBB)365     bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
366       DenseSet<MachineInstr *> InstrsToErase;
367       // Initially, none of the acc registers are candidates.
368       SmallVector<MachineInstr *, 8> Candidates(
369           PPC::UACCRCRegClass.getNumRegs(), nullptr);
370 
371       for (MachineInstr &BBI : MBB.instrs()) {
372         unsigned Opc = BBI.getOpcode();
373         // If we are visiting a xxmtacc instruction, we add it and its operand
374         // register to the candidate set.
375         if (Opc == PPC::XXMTACC) {
376           Register Acc = BBI.getOperand(0).getReg();
377           assert(PPC::ACCRCRegClass.contains(Acc) &&
378                  "Unexpected register for XXMTACC");
379           Candidates[Acc - PPC::ACC0] = &BBI;
380         }
381         // If we are visiting a xxmfacc instruction and its operand register is
382         // in the candidate set, we mark the two instructions for removal.
383         else if (Opc == PPC::XXMFACC) {
384           Register Acc = BBI.getOperand(0).getReg();
385           assert(PPC::ACCRCRegClass.contains(Acc) &&
386                  "Unexpected register for XXMFACC");
387           if (!Candidates[Acc - PPC::ACC0])
388             continue;
389           InstrsToErase.insert(&BBI);
390           InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
391         }
392         // If we are visiting an instruction using an accumulator register
393         // as operand, we remove it from the candidate set.
394         else {
395           for (MachineOperand &Operand : BBI.operands()) {
396             if (!Operand.isReg())
397               continue;
398             Register Reg = Operand.getReg();
399             if (PPC::ACCRCRegClass.contains(Reg))
400               Candidates[Reg - PPC::ACC0] = nullptr;
401           }
402         }
403       }
404 
405       for (MachineInstr *MI : InstrsToErase)
406         MI->eraseFromParent();
407       NumRemovedInPreEmit += InstrsToErase.size();
408       return !InstrsToErase.empty();
409     }
410 
runOnMachineFunction(MachineFunction & MF)411     bool runOnMachineFunction(MachineFunction &MF) override {
412       // If the user wants to set the DSCR using command-line options,
413       // load in the specified value at the start of main.
414       if (DSCRValue.getNumOccurrences() > 0 && MF.getName() == "main" &&
415           MF.getFunction().hasExternalLinkage()) {
416         DSCRValue = (uint32_t)(DSCRValue & 0x01FFFFFF); // 25-bit DSCR mask
417         RegScavenger RS;
418         MachineBasicBlock &MBB = MF.front();
419         // Find an unused GPR according to register liveness
420         RS.enterBasicBlock(MBB);
421         unsigned InDSCR = RS.FindUnusedReg(&PPC::GPRCRegClass);
422         if (InDSCR) {
423           const PPCInstrInfo *TII =
424               MF.getSubtarget<PPCSubtarget>().getInstrInfo();
425           DebugLoc dl;
426           MachineBasicBlock::iterator IP = MBB.begin(); // Insert Point
427           // Copy the 32-bit DSCRValue integer into the GPR InDSCR using LIS and
428           // ORI, then move to DSCR. If the requested DSCR value is contained
429           // in a 16-bit signed number, we can emit a single `LI`, but the
430           // impact of saving one instruction in one function does not warrant
431           // any additional complexity in the logic here.
432           BuildMI(MBB, IP, dl, TII->get(PPC::LIS), InDSCR)
433               .addImm(DSCRValue >> 16);
434           BuildMI(MBB, IP, dl, TII->get(PPC::ORI), InDSCR)
435               .addReg(InDSCR)
436               .addImm(DSCRValue & 0xFFFF);
437           BuildMI(MBB, IP, dl, TII->get(PPC::MTUDSCR))
438               .addReg(InDSCR, RegState::Kill);
439         } else
440           errs() << "Warning: Ran out of registers - Unable to set DSCR as "
441                     "requested";
442       }
443 
444       if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
445         // Remove UNENCODED_NOP even when this pass is disabled.
446         // This needs to be done unconditionally so we don't emit zeros
447         // in the instruction stream.
448         SmallVector<MachineInstr *, 4> InstrsToErase;
449         for (MachineBasicBlock &MBB : MF)
450           for (MachineInstr &MI : MBB)
451             if (MI.getOpcode() == PPC::UNENCODED_NOP)
452               InstrsToErase.push_back(&MI);
453         for (MachineInstr *MI : InstrsToErase)
454           MI->eraseFromParent();
455         return false;
456       }
457       bool Changed = false;
458       const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
459       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
460       SmallVector<MachineInstr *, 4> InstrsToErase;
461       for (MachineBasicBlock &MBB : MF) {
462         Changed |= removeRedundantLIs(MBB, TRI);
463         Changed |= addLinkerOpt(MBB, TRI);
464         Changed |= removeAccPrimeUnprime(MBB);
465         for (MachineInstr &MI : MBB) {
466           unsigned Opc = MI.getOpcode();
467           if (Opc == PPC::UNENCODED_NOP) {
468             InstrsToErase.push_back(&MI);
469             continue;
470           }
471           // Detect self copies - these can result from running AADB.
472           if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
473             const MCInstrDesc &MCID = TII->get(Opc);
474             if (MCID.getNumOperands() == 3 &&
475                 MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
476                 MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
477               NumberOfSelfCopies++;
478               LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
479               LLVM_DEBUG(MI.dump());
480               InstrsToErase.push_back(&MI);
481               continue;
482             }
483             else if (MCID.getNumOperands() == 2 &&
484                      MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
485               NumberOfSelfCopies++;
486               LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
487               LLVM_DEBUG(MI.dump());
488               InstrsToErase.push_back(&MI);
489               continue;
490             }
491           }
492           MachineInstr *DefMIToErase = nullptr;
493           SmallSet<Register, 4> UpdatedRegs;
494           if (TII->convertToImmediateForm(MI, UpdatedRegs, &DefMIToErase)) {
495             Changed = true;
496             NumRRConvertedInPreEmit++;
497             LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
498             LLVM_DEBUG(MI.dump());
499             if (DefMIToErase) {
500               InstrsToErase.push_back(DefMIToErase);
501             }
502           }
503           if (TII->foldFrameOffset(MI)) {
504             Changed = true;
505             NumFrameOffFoldInPreEmit++;
506             LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
507             LLVM_DEBUG(MI.dump());
508           }
509           if (TII->optimizeCmpPostRA(MI)) {
510             Changed = true;
511             NumCmpsInPreEmit++;
512             LLVM_DEBUG(dbgs() << "Optimize compare by using record form: ");
513             LLVM_DEBUG(MI.dump());
514             InstrsToErase.push_back(&MI);
515           }
516         }
517 
518         // Eliminate conditional branch based on a constant CR bit by
519         // CRSET or CRUNSET. We eliminate the conditional branch or
520         // convert it into an unconditional branch. Also, if the CR bit
521         // is not used by other instructions, we eliminate CRSET as well.
522         auto I = MBB.getFirstInstrTerminator();
523         if (I == MBB.instr_end())
524           continue;
525         MachineInstr *Br = &*I;
526         if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
527           continue;
528         MachineInstr *CRSetMI = nullptr;
529         Register CRBit = Br->getOperand(0).getReg();
530         unsigned CRReg = getCRFromCRBit(CRBit);
531         bool SeenUse = false;
532         MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
533         for (It++; It != Er; It++) {
534           if (It->modifiesRegister(CRBit, TRI)) {
535             if ((It->getOpcode() == PPC::CRUNSET ||
536                  It->getOpcode() == PPC::CRSET) &&
537                 It->getOperand(0).getReg() == CRBit)
538               CRSetMI = &*It;
539             break;
540           }
541           if (It->readsRegister(CRBit, TRI))
542             SeenUse = true;
543         }
544         if (!CRSetMI) continue;
545 
546         unsigned CRSetOp = CRSetMI->getOpcode();
547         if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
548             (Br->getOpcode() == PPC::BC  && CRSetOp == PPC::CRUNSET)) {
549           // Remove this branch since it cannot be taken.
550           InstrsToErase.push_back(Br);
551           MBB.removeSuccessor(Br->getOperand(1).getMBB());
552         }
553         else {
554           // This conditional branch is always taken. So, remove all branches
555           // and insert an unconditional branch to the destination of this.
556           MachineBasicBlock::iterator It = Br, Er = MBB.end();
557           for (; It != Er; It++) {
558             if (It->isDebugInstr()) continue;
559             assert(It->isTerminator() && "Non-terminator after a terminator");
560             InstrsToErase.push_back(&*It);
561           }
562           if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
563             ArrayRef<MachineOperand> NoCond;
564             TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
565                               NoCond, Br->getDebugLoc());
566           }
567           for (auto &Succ : MBB.successors())
568             if (Succ != Br->getOperand(1).getMBB()) {
569               MBB.removeSuccessor(Succ);
570               break;
571             }
572         }
573 
574         // If the CRBit is not used by another instruction, we can eliminate
575         // CRSET/CRUNSET instruction.
576         if (!SeenUse) {
577           // We need to check use of the CRBit in successors.
578           for (auto &SuccMBB : MBB.successors())
579             if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
580               SeenUse = true;
581               break;
582             }
583           if (!SeenUse)
584             InstrsToErase.push_back(CRSetMI);
585         }
586       }
587       for (MachineInstr *MI : InstrsToErase) {
588         LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
589         LLVM_DEBUG(MI->dump());
590         MI->eraseFromParent();
591         NumRemovedInPreEmit++;
592       }
593       return Changed;
594     }
595   };
596 }
597 
598 INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole",
599                 false, false)
600 char PPCPreEmitPeephole::ID = 0;
601 
createPPCPreEmitPeepholePass()602 FunctionPass *llvm::createPPCPreEmitPeepholePass() {
603   return new PPCPreEmitPeephole();
604 }
605