xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass generates machine instructions for the CTR loops related pseudos:
10 // 1: MTCTRloop/DecreaseCTRloop
11 // 2: MTCTR8loop/DecreaseCTR8loop
12 //
13 // If a CTR loop can be generated:
14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16 //    its user branch instruction can be deleted.
17 //
18 // If a CTR loop can not be generated due to clobber of CTR:
19 // 1: MTCTRloop/MTCTR8loop can be deleted.
20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21 //    a "cmplwi/cmpldi".
22 //
23 // This pass runs just before register allocation, because we don't want
24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be
25 // generated or if a CTR loop can not be generated, we don't have any condition
26 // register for the new added "cmplwi/cmpldi".
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "PPC.h"
31 #include "PPCInstrInfo.h"
32 #include "PPCSubtarget.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineFunction.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstr.h"
38 #include "llvm/CodeGen/MachineLoopInfo.h"
39 #include "llvm/CodeGen/MachineOperand.h"
40 #include "llvm/CodeGen/MachineRegisterInfo.h"
41 #include "llvm/CodeGen/Register.h"
42 #include "llvm/Pass.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include <cassert>
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "ppc-ctrloops"
49 
50 STATISTIC(NumCTRLoops, "Number of CTR loops generated");
51 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
52 
53 namespace {
54 class PPCCTRLoops : public MachineFunctionPass {
55 public:
56   static char ID;
57 
PPCCTRLoops()58   PPCCTRLoops() : MachineFunctionPass(ID) {}
59 
getAnalysisUsage(AnalysisUsage & AU) const60   void getAnalysisUsage(AnalysisUsage &AU) const override {
61     AU.addRequired<MachineLoopInfoWrapperPass>();
62     MachineFunctionPass::getAnalysisUsage(AU);
63   }
64 
65   bool runOnMachineFunction(MachineFunction &MF) override;
66 
67 private:
68   const PPCInstrInfo *TII = nullptr;
69   MachineRegisterInfo *MRI = nullptr;
70 
71   bool processLoop(MachineLoop *ML);
72   bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
73   void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
74                          MachineInstr *Dec);
75   void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
76 };
77 } // namespace
78 
79 char PPCCTRLoops::ID = 0;
80 
81 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
82                       false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)83 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
84 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
85                     false, false)
86 
87 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
88 
runOnMachineFunction(MachineFunction & MF)89 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
90   bool Changed = false;
91 
92   auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
93   TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
94   MRI = &MF.getRegInfo();
95 
96   for (auto *ML : MLI) {
97     if (ML->isOutermost())
98       Changed |= processLoop(ML);
99   }
100 
101 #ifndef NDEBUG
102   for (const MachineBasicBlock &BB : MF) {
103     for (const MachineInstr &I : BB)
104       assert((I.getOpcode() != PPC::DecreaseCTRloop &&
105               I.getOpcode() != PPC::DecreaseCTR8loop) &&
106              "CTR loop pseudo is not expanded!");
107   }
108 #endif
109 
110   return Changed;
111 }
112 
isCTRClobber(MachineInstr * MI,bool CheckReads) const113 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
114   if (!CheckReads) {
115     // If we are only checking for defs, that is we are going to find
116     // definitions before MTCTRloop, for this case:
117     // CTR defination inside the callee of a call instruction will not impact
118     // the defination of MTCTRloop, so we can use definesRegister() for the
119     // check, no need to check the regmask.
120     return MI->definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
121            MI->definesRegister(PPC::CTR8, /*TRI=*/nullptr);
122   }
123 
124   if (MI->modifiesRegister(PPC::CTR, /*TRI=*/nullptr) ||
125       MI->modifiesRegister(PPC::CTR8, /*TRI=*/nullptr))
126     return true;
127 
128   if (MI->getDesc().isCall())
129     return true;
130 
131   // We define the CTR in the loop preheader, so if there is any CTR reader in
132   // the loop, we also can not use CTR loop form.
133   if (MI->readsRegister(PPC::CTR, /*TRI=*/nullptr) ||
134       MI->readsRegister(PPC::CTR8, /*TRI=*/nullptr))
135     return true;
136 
137   return false;
138 }
139 
processLoop(MachineLoop * ML)140 bool PPCCTRLoops::processLoop(MachineLoop *ML) {
141   bool Changed = false;
142 
143   // Align with HardwareLoop pass, process inner loops first.
144   for (MachineLoop *I : *ML)
145     Changed |= processLoop(I);
146 
147   // If any inner loop is changed, outter loop must be without hardware loop
148   // intrinsics.
149   if (Changed)
150     return true;
151 
152   auto IsLoopStart = [](MachineInstr &MI) {
153     return MI.getOpcode() == PPC::MTCTRloop ||
154            MI.getOpcode() == PPC::MTCTR8loop;
155   };
156 
157   auto SearchForStart =
158       [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
159     for (auto &MI : *MBB) {
160       if (IsLoopStart(MI))
161         return &MI;
162     }
163     return nullptr;
164   };
165 
166   MachineInstr *Start = nullptr;
167   MachineInstr *Dec = nullptr;
168   bool InvalidCTRLoop = false;
169 
170   MachineBasicBlock *Preheader = ML->getLoopPreheader();
171   // If there is no preheader for this loop, there must be no MTCTRloop
172   // either.
173   if (!Preheader)
174     return false;
175 
176   Start = SearchForStart(Preheader);
177   // This is not a CTR loop candidate.
178   if (!Start)
179     return false;
180 
181   // If CTR is live to the preheader, we can not redefine the CTR register.
182   if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
183     InvalidCTRLoop = true;
184 
185   // Make sure there is also no CTR clobber in the block preheader between the
186   // begin and MTCTR.
187   for (MachineBasicBlock::reverse_instr_iterator I =
188            std::next(Start->getReverseIterator());
189        I != Preheader->instr_rend(); ++I)
190     // Only check the definitions of CTR. If there is non-dead definition for
191     // the CTR, we conservatively don't generate a CTR loop.
192     if (isCTRClobber(&*I, /* CheckReads */ false)) {
193       InvalidCTRLoop = true;
194       break;
195     }
196 
197   // Make sure there is also no CTR clobber/user in the block preheader between
198   // MTCTR and the end.
199   for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
200        I != Preheader->instr_end(); ++I)
201     if (isCTRClobber(&*I, /* CheckReads */ true)) {
202       InvalidCTRLoop = true;
203       break;
204     }
205 
206   // Find the CTR loop components and decide whether or not to fall back to a
207   // normal loop.
208   for (auto *MBB : reverse(ML->getBlocks())) {
209     for (auto &MI : *MBB) {
210       if (MI.getOpcode() == PPC::DecreaseCTRloop ||
211           MI.getOpcode() == PPC::DecreaseCTR8loop)
212         Dec = &MI;
213       else if (!InvalidCTRLoop)
214         // If any instruction clobber CTR, then we can not generate a CTR loop.
215         InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
216     }
217     if (Dec && InvalidCTRLoop)
218       break;
219   }
220 
221   assert(Dec && "CTR loop is not complete!");
222 
223   if (InvalidCTRLoop) {
224     expandNormalLoops(ML, Start, Dec);
225     ++NumNormalLoops;
226   }
227   else {
228     expandCTRLoops(ML, Start, Dec);
229     ++NumCTRLoops;
230   }
231   return true;
232 }
233 
expandNormalLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)234 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
235                                     MachineInstr *Dec) {
236   bool Is64Bit =
237       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
238 
239   MachineBasicBlock *Preheader = Start->getParent();
240   MachineBasicBlock *Exiting = Dec->getParent();
241   assert((Preheader && Exiting) &&
242          "Preheader and exiting should exist for CTR loop!");
243 
244   assert(Dec->getOperand(1).getImm() == 1 &&
245          "Loop decrement stride must be 1");
246 
247   unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
248   unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
249 
250   Register PHIDef =
251       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
252                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
253 
254   Start->getParent()->getParent()->getProperties().resetNoPHIs();
255 
256   // Generate "PHI" in the header block.
257   auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
258                         DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
259   PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
260 
261   Register ADDIDef =
262       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
263                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
264   // Generate "addi -1" in the exiting block.
265   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
266       .addReg(PHIDef)
267       .addImm(-1);
268 
269   // Add other inputs for the PHI node.
270   if (ML->isLoopLatch(Exiting)) {
271     // There must be only two predecessors for the loop header, one is the
272     // Preheader and the other one is loop latch Exiting. In hardware loop
273     // insertion pass, the block containing DecreaseCTRloop must dominate all
274     // loop latches. So there must be only one latch.
275     assert(ML->getHeader()->pred_size() == 2 &&
276            "Loop header predecessor is not right!");
277     PHIMIB.addReg(ADDIDef).addMBB(Exiting);
278   } else {
279     // If the block containing DecreaseCTRloop is not a loop latch, we can use
280     // ADDIDef as the value for all other blocks for the PHI. In hardware loop
281     // insertion pass, the block containing DecreaseCTRloop must dominate all
282     // loop latches.
283     for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
284       if (ML->contains(P)) {
285         assert(ML->isLoopLatch(P) &&
286                "Loop's header in-loop predecessor is not loop latch!");
287         PHIMIB.addReg(ADDIDef).addMBB(P);
288       } else
289         assert(P == Preheader &&
290                "CTR loop should not be generated for irreducible loop!");
291     }
292   }
293 
294   // Generate the compare in the exiting block.
295   Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
296   auto CMPMIB =
297       BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
298           .addReg(ADDIDef)
299           .addImm(0);
300 
301   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
302           Dec->getOperand(0).getReg())
303       .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
304 
305   // Remove the pseudo instructions.
306   Start->eraseFromParent();
307   Dec->eraseFromParent();
308 }
309 
expandCTRLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)310 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
311                                  MachineInstr *Dec) {
312   bool Is64Bit =
313       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
314 
315   MachineBasicBlock *Preheader = Start->getParent();
316   MachineBasicBlock *Exiting = Dec->getParent();
317 
318   (void)Preheader;
319   assert((Preheader && Exiting) &&
320          "Preheader and exiting should exist for CTR loop!");
321 
322   assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
323 
324   unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
325   unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
326   auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
327   assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
328          "There should be only one user for loop decrement pseudo!");
329 
330   unsigned Opcode = 0;
331   switch (BrInstr->getOpcode()) {
332   case PPC::BC:
333     Opcode = BDNZOpcode;
334     (void) ML;
335     assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
336            "Invalid ctr loop!");
337     break;
338   case PPC::BCn:
339     Opcode = BDZOpcode;
340     assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
341            "Invalid ctr loop!");
342     break;
343   default:
344     llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
345   }
346 
347   // Generate "bdnz/bdz" in the exiting block just before the terminator.
348   BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
349       .addMBB(BrInstr->getOperand(1).getMBB());
350 
351   // Remove the pseudo instructions.
352   BrInstr->eraseFromParent();
353   Dec->eraseFromParent();
354 }
355