xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass generates machine instructions for the CTR loops related pseudos:
10 // 1: MTCTRloop/DecreaseCTRloop
11 // 2: MTCTR8loop/DecreaseCTR8loop
12 //
13 // If a CTR loop can be generated:
14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16 //    its user branch instruction can be deleted.
17 //
18 // If a CTR loop can not be generated due to clobber of CTR:
19 // 1: MTCTRloop/MTCTR8loop can be deleted.
20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21 //    a "cmplwi/cmpldi".
22 //
23 // This pass runs just before register allocation, because we don't want
24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be
25 // generated or if a CTR loop can not be generated, we don't have any condition
26 // register for the new added "cmplwi/cmpldi".
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "PPC.h"
31 #include "PPCInstrInfo.h"
32 #include "PPCSubtarget.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineFunction.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstr.h"
38 #include "llvm/CodeGen/MachineLoopInfo.h"
39 #include "llvm/CodeGen/MachineOperand.h"
40 #include "llvm/CodeGen/MachineRegisterInfo.h"
41 #include "llvm/CodeGen/Register.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
44 #include "llvm/PassRegistry.h"
45 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include <cassert>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "ppc-ctrloops"
53 
54 STATISTIC(NumCTRLoops, "Number of CTR loops generated");
55 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
56 
57 namespace {
58 class PPCCTRLoops : public MachineFunctionPass {
59 public:
60   static char ID;
61 
PPCCTRLoops()62   PPCCTRLoops() : MachineFunctionPass(ID) {
63     initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
64   }
65 
getAnalysisUsage(AnalysisUsage & AU) const66   void getAnalysisUsage(AnalysisUsage &AU) const override {
67     AU.addRequired<MachineLoopInfoWrapperPass>();
68     MachineFunctionPass::getAnalysisUsage(AU);
69   }
70 
71   bool runOnMachineFunction(MachineFunction &MF) override;
72 
73 private:
74   const PPCInstrInfo *TII = nullptr;
75   MachineRegisterInfo *MRI = nullptr;
76 
77   bool processLoop(MachineLoop *ML);
78   bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
79   void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
80                          MachineInstr *Dec);
81   void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
82 };
83 } // namespace
84 
85 char PPCCTRLoops::ID = 0;
86 
87 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
88                       false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)89 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
90 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
91                     false, false)
92 
93 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
94 
runOnMachineFunction(MachineFunction & MF)95 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
96   bool Changed = false;
97 
98   auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
99   TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
100   MRI = &MF.getRegInfo();
101 
102   for (auto *ML : MLI) {
103     if (ML->isOutermost())
104       Changed |= processLoop(ML);
105   }
106 
107 #ifndef NDEBUG
108   for (const MachineBasicBlock &BB : MF) {
109     for (const MachineInstr &I : BB)
110       assert((I.getOpcode() != PPC::DecreaseCTRloop &&
111               I.getOpcode() != PPC::DecreaseCTR8loop) &&
112              "CTR loop pseudo is not expanded!");
113   }
114 #endif
115 
116   return Changed;
117 }
118 
isCTRClobber(MachineInstr * MI,bool CheckReads) const119 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
120   if (!CheckReads) {
121     // If we are only checking for defs, that is we are going to find
122     // definitions before MTCTRloop, for this case:
123     // CTR defination inside the callee of a call instruction will not impact
124     // the defination of MTCTRloop, so we can use definesRegister() for the
125     // check, no need to check the regmask.
126     return MI->definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
127            MI->definesRegister(PPC::CTR8, /*TRI=*/nullptr);
128   }
129 
130   if (MI->modifiesRegister(PPC::CTR, /*TRI=*/nullptr) ||
131       MI->modifiesRegister(PPC::CTR8, /*TRI=*/nullptr))
132     return true;
133 
134   if (MI->getDesc().isCall())
135     return true;
136 
137   // We define the CTR in the loop preheader, so if there is any CTR reader in
138   // the loop, we also can not use CTR loop form.
139   if (MI->readsRegister(PPC::CTR, /*TRI=*/nullptr) ||
140       MI->readsRegister(PPC::CTR8, /*TRI=*/nullptr))
141     return true;
142 
143   return false;
144 }
145 
processLoop(MachineLoop * ML)146 bool PPCCTRLoops::processLoop(MachineLoop *ML) {
147   bool Changed = false;
148 
149   // Align with HardwareLoop pass, process inner loops first.
150   for (MachineLoop *I : *ML)
151     Changed |= processLoop(I);
152 
153   // If any inner loop is changed, outter loop must be without hardware loop
154   // intrinsics.
155   if (Changed)
156     return true;
157 
158   auto IsLoopStart = [](MachineInstr &MI) {
159     return MI.getOpcode() == PPC::MTCTRloop ||
160            MI.getOpcode() == PPC::MTCTR8loop;
161   };
162 
163   auto SearchForStart =
164       [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
165     for (auto &MI : *MBB) {
166       if (IsLoopStart(MI))
167         return &MI;
168     }
169     return nullptr;
170   };
171 
172   MachineInstr *Start = nullptr;
173   MachineInstr *Dec = nullptr;
174   bool InvalidCTRLoop = false;
175 
176   MachineBasicBlock *Preheader = ML->getLoopPreheader();
177   // If there is no preheader for this loop, there must be no MTCTRloop
178   // either.
179   if (!Preheader)
180     return false;
181 
182   Start = SearchForStart(Preheader);
183   // This is not a CTR loop candidate.
184   if (!Start)
185     return false;
186 
187   // If CTR is live to the preheader, we can not redefine the CTR register.
188   if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
189     InvalidCTRLoop = true;
190 
191   // Make sure there is also no CTR clobber in the block preheader between the
192   // begin and MTCTR.
193   for (MachineBasicBlock::reverse_instr_iterator I =
194            std::next(Start->getReverseIterator());
195        I != Preheader->instr_rend(); ++I)
196     // Only check the definitions of CTR. If there is non-dead definition for
197     // the CTR, we conservatively don't generate a CTR loop.
198     if (isCTRClobber(&*I, /* CheckReads */ false)) {
199       InvalidCTRLoop = true;
200       break;
201     }
202 
203   // Make sure there is also no CTR clobber/user in the block preheader between
204   // MTCTR and the end.
205   for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
206        I != Preheader->instr_end(); ++I)
207     if (isCTRClobber(&*I, /* CheckReads */ true)) {
208       InvalidCTRLoop = true;
209       break;
210     }
211 
212   // Find the CTR loop components and decide whether or not to fall back to a
213   // normal loop.
214   for (auto *MBB : reverse(ML->getBlocks())) {
215     for (auto &MI : *MBB) {
216       if (MI.getOpcode() == PPC::DecreaseCTRloop ||
217           MI.getOpcode() == PPC::DecreaseCTR8loop)
218         Dec = &MI;
219       else if (!InvalidCTRLoop)
220         // If any instruction clobber CTR, then we can not generate a CTR loop.
221         InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
222     }
223     if (Dec && InvalidCTRLoop)
224       break;
225   }
226 
227   assert(Dec && "CTR loop is not complete!");
228 
229   if (InvalidCTRLoop) {
230     expandNormalLoops(ML, Start, Dec);
231     ++NumNormalLoops;
232   }
233   else {
234     expandCTRLoops(ML, Start, Dec);
235     ++NumCTRLoops;
236   }
237   return true;
238 }
239 
expandNormalLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)240 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
241                                     MachineInstr *Dec) {
242   bool Is64Bit =
243       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
244 
245   MachineBasicBlock *Preheader = Start->getParent();
246   MachineBasicBlock *Exiting = Dec->getParent();
247   assert((Preheader && Exiting) &&
248          "Preheader and exiting should exist for CTR loop!");
249 
250   assert(Dec->getOperand(1).getImm() == 1 &&
251          "Loop decrement stride must be 1");
252 
253   unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
254   unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
255 
256   Register PHIDef =
257       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
258                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
259 
260   Start->getParent()->getParent()->getProperties().reset(
261       MachineFunctionProperties::Property::NoPHIs);
262 
263   // Generate "PHI" in the header block.
264   auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
265                         DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
266   PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
267 
268   Register ADDIDef =
269       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
270                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
271   // Generate "addi -1" in the exiting block.
272   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
273       .addReg(PHIDef)
274       .addImm(-1);
275 
276   // Add other inputs for the PHI node.
277   if (ML->isLoopLatch(Exiting)) {
278     // There must be only two predecessors for the loop header, one is the
279     // Preheader and the other one is loop latch Exiting. In hardware loop
280     // insertion pass, the block containing DecreaseCTRloop must dominate all
281     // loop latches. So there must be only one latch.
282     assert(ML->getHeader()->pred_size() == 2 &&
283            "Loop header predecessor is not right!");
284     PHIMIB.addReg(ADDIDef).addMBB(Exiting);
285   } else {
286     // If the block containing DecreaseCTRloop is not a loop latch, we can use
287     // ADDIDef as the value for all other blocks for the PHI. In hardware loop
288     // insertion pass, the block containing DecreaseCTRloop must dominate all
289     // loop latches.
290     for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
291       if (ML->contains(P)) {
292         assert(ML->isLoopLatch(P) &&
293                "Loop's header in-loop predecessor is not loop latch!");
294         PHIMIB.addReg(ADDIDef).addMBB(P);
295       } else
296         assert(P == Preheader &&
297                "CTR loop should not be generated for irreducible loop!");
298     }
299   }
300 
301   // Generate the compare in the exiting block.
302   Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
303   auto CMPMIB =
304       BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
305           .addReg(ADDIDef)
306           .addImm(0);
307 
308   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
309           Dec->getOperand(0).getReg())
310       .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
311 
312   // Remove the pseudo instructions.
313   Start->eraseFromParent();
314   Dec->eraseFromParent();
315 }
316 
expandCTRLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)317 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
318                                  MachineInstr *Dec) {
319   bool Is64Bit =
320       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
321 
322   MachineBasicBlock *Preheader = Start->getParent();
323   MachineBasicBlock *Exiting = Dec->getParent();
324 
325   (void)Preheader;
326   assert((Preheader && Exiting) &&
327          "Preheader and exiting should exist for CTR loop!");
328 
329   assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
330 
331   unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
332   unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
333   auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
334   assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
335          "There should be only one user for loop decrement pseudo!");
336 
337   unsigned Opcode = 0;
338   switch (BrInstr->getOpcode()) {
339   case PPC::BC:
340     Opcode = BDNZOpcode;
341     (void) ML;
342     assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
343            "Invalid ctr loop!");
344     break;
345   case PPC::BCn:
346     Opcode = BDZOpcode;
347     assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
348            "Invalid ctr loop!");
349     break;
350   default:
351     llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
352   }
353 
354   // Generate "bdnz/bdz" in the exiting block just before the terminator.
355   BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
356       .addMBB(BrInstr->getOperand(1).getMBB());
357 
358   // Remove the pseudo instructions.
359   BrInstr->eraseFromParent();
360   Dec->eraseFromParent();
361 }
362