1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass generates machine instructions for the CTR loops related pseudos:
10 // 1: MTCTRloop/DecreaseCTRloop
11 // 2: MTCTR8loop/DecreaseCTR8loop
12 //
13 // If a CTR loop can be generated:
14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16 // its user branch instruction can be deleted.
17 //
18 // If a CTR loop can not be generated due to clobber of CTR:
19 // 1: MTCTRloop/MTCTR8loop can be deleted.
20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21 // a "cmplwi/cmpldi".
22 //
23 // This pass runs just before register allocation, because we don't want
24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be
25 // generated or if a CTR loop can not be generated, we don't have any condition
26 // register for the new added "cmplwi/cmpldi".
27 //
28 //===----------------------------------------------------------------------===//
29
30 #include "PPC.h"
31 #include "PPCInstrInfo.h"
32 #include "PPCSubtarget.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineFunction.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstr.h"
38 #include "llvm/CodeGen/MachineLoopInfo.h"
39 #include "llvm/CodeGen/MachineOperand.h"
40 #include "llvm/CodeGen/MachineRegisterInfo.h"
41 #include "llvm/CodeGen/Register.h"
42 #include "llvm/Pass.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include <cassert>
45
46 using namespace llvm;
47
48 #define DEBUG_TYPE "ppc-ctrloops"
49
50 STATISTIC(NumCTRLoops, "Number of CTR loops generated");
51 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
52
53 namespace {
54 class PPCCTRLoops : public MachineFunctionPass {
55 public:
56 static char ID;
57
PPCCTRLoops()58 PPCCTRLoops() : MachineFunctionPass(ID) {}
59
getAnalysisUsage(AnalysisUsage & AU) const60 void getAnalysisUsage(AnalysisUsage &AU) const override {
61 AU.addRequired<MachineLoopInfoWrapperPass>();
62 MachineFunctionPass::getAnalysisUsage(AU);
63 }
64
65 bool runOnMachineFunction(MachineFunction &MF) override;
66
67 private:
68 const PPCInstrInfo *TII = nullptr;
69 MachineRegisterInfo *MRI = nullptr;
70
71 bool processLoop(MachineLoop *ML);
72 bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
73 void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
74 MachineInstr *Dec);
75 void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
76 };
77 } // namespace
78
79 char PPCCTRLoops::ID = 0;
80
81 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
82 false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)83 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
84 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
85 false, false)
86
87 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
88
runOnMachineFunction(MachineFunction & MF)89 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
90 bool Changed = false;
91
92 auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
93 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
94 MRI = &MF.getRegInfo();
95
96 for (auto *ML : MLI) {
97 if (ML->isOutermost())
98 Changed |= processLoop(ML);
99 }
100
101 #ifndef NDEBUG
102 for (const MachineBasicBlock &BB : MF) {
103 for (const MachineInstr &I : BB)
104 assert((I.getOpcode() != PPC::DecreaseCTRloop &&
105 I.getOpcode() != PPC::DecreaseCTR8loop) &&
106 "CTR loop pseudo is not expanded!");
107 }
108 #endif
109
110 return Changed;
111 }
112
isCTRClobber(MachineInstr * MI,bool CheckReads) const113 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
114 if (!CheckReads) {
115 // If we are only checking for defs, that is we are going to find
116 // definitions before MTCTRloop, for this case:
117 // CTR defination inside the callee of a call instruction will not impact
118 // the defination of MTCTRloop, so we can use definesRegister() for the
119 // check, no need to check the regmask.
120 return MI->definesRegister(PPC::CTR, /*TRI=*/nullptr) ||
121 MI->definesRegister(PPC::CTR8, /*TRI=*/nullptr);
122 }
123
124 if (MI->modifiesRegister(PPC::CTR, /*TRI=*/nullptr) ||
125 MI->modifiesRegister(PPC::CTR8, /*TRI=*/nullptr))
126 return true;
127
128 if (MI->getDesc().isCall())
129 return true;
130
131 // We define the CTR in the loop preheader, so if there is any CTR reader in
132 // the loop, we also can not use CTR loop form.
133 if (MI->readsRegister(PPC::CTR, /*TRI=*/nullptr) ||
134 MI->readsRegister(PPC::CTR8, /*TRI=*/nullptr))
135 return true;
136
137 return false;
138 }
139
processLoop(MachineLoop * ML)140 bool PPCCTRLoops::processLoop(MachineLoop *ML) {
141 bool Changed = false;
142
143 // Align with HardwareLoop pass, process inner loops first.
144 for (MachineLoop *I : *ML)
145 Changed |= processLoop(I);
146
147 // If any inner loop is changed, outter loop must be without hardware loop
148 // intrinsics.
149 if (Changed)
150 return true;
151
152 auto IsLoopStart = [](MachineInstr &MI) {
153 return MI.getOpcode() == PPC::MTCTRloop ||
154 MI.getOpcode() == PPC::MTCTR8loop;
155 };
156
157 auto SearchForStart =
158 [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
159 for (auto &MI : *MBB) {
160 if (IsLoopStart(MI))
161 return &MI;
162 }
163 return nullptr;
164 };
165
166 MachineInstr *Start = nullptr;
167 MachineInstr *Dec = nullptr;
168 bool InvalidCTRLoop = false;
169
170 MachineBasicBlock *Preheader = ML->getLoopPreheader();
171 // If there is no preheader for this loop, there must be no MTCTRloop
172 // either.
173 if (!Preheader)
174 return false;
175
176 Start = SearchForStart(Preheader);
177 // This is not a CTR loop candidate.
178 if (!Start)
179 return false;
180
181 // If CTR is live to the preheader, we can not redefine the CTR register.
182 if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
183 InvalidCTRLoop = true;
184
185 // Make sure there is also no CTR clobber in the block preheader between the
186 // begin and MTCTR.
187 for (MachineBasicBlock::reverse_instr_iterator I =
188 std::next(Start->getReverseIterator());
189 I != Preheader->instr_rend(); ++I)
190 // Only check the definitions of CTR. If there is non-dead definition for
191 // the CTR, we conservatively don't generate a CTR loop.
192 if (isCTRClobber(&*I, /* CheckReads */ false)) {
193 InvalidCTRLoop = true;
194 break;
195 }
196
197 // Make sure there is also no CTR clobber/user in the block preheader between
198 // MTCTR and the end.
199 for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
200 I != Preheader->instr_end(); ++I)
201 if (isCTRClobber(&*I, /* CheckReads */ true)) {
202 InvalidCTRLoop = true;
203 break;
204 }
205
206 // Find the CTR loop components and decide whether or not to fall back to a
207 // normal loop.
208 for (auto *MBB : reverse(ML->getBlocks())) {
209 for (auto &MI : *MBB) {
210 if (MI.getOpcode() == PPC::DecreaseCTRloop ||
211 MI.getOpcode() == PPC::DecreaseCTR8loop)
212 Dec = &MI;
213 else if (!InvalidCTRLoop)
214 // If any instruction clobber CTR, then we can not generate a CTR loop.
215 InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
216 }
217 if (Dec && InvalidCTRLoop)
218 break;
219 }
220
221 assert(Dec && "CTR loop is not complete!");
222
223 if (InvalidCTRLoop) {
224 expandNormalLoops(ML, Start, Dec);
225 ++NumNormalLoops;
226 }
227 else {
228 expandCTRLoops(ML, Start, Dec);
229 ++NumCTRLoops;
230 }
231 return true;
232 }
233
expandNormalLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)234 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
235 MachineInstr *Dec) {
236 bool Is64Bit =
237 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
238
239 MachineBasicBlock *Preheader = Start->getParent();
240 MachineBasicBlock *Exiting = Dec->getParent();
241 assert((Preheader && Exiting) &&
242 "Preheader and exiting should exist for CTR loop!");
243
244 assert(Dec->getOperand(1).getImm() == 1 &&
245 "Loop decrement stride must be 1");
246
247 unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
248 unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
249
250 Register PHIDef =
251 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
252 : &PPC::GPRC_and_GPRC_NOR0RegClass);
253
254 Start->getParent()->getParent()->getProperties().resetNoPHIs();
255
256 // Generate "PHI" in the header block.
257 auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
258 DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
259 PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
260
261 Register ADDIDef =
262 MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
263 : &PPC::GPRC_and_GPRC_NOR0RegClass);
264 // Generate "addi -1" in the exiting block.
265 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
266 .addReg(PHIDef)
267 .addImm(-1);
268
269 // Add other inputs for the PHI node.
270 if (ML->isLoopLatch(Exiting)) {
271 // There must be only two predecessors for the loop header, one is the
272 // Preheader and the other one is loop latch Exiting. In hardware loop
273 // insertion pass, the block containing DecreaseCTRloop must dominate all
274 // loop latches. So there must be only one latch.
275 assert(ML->getHeader()->pred_size() == 2 &&
276 "Loop header predecessor is not right!");
277 PHIMIB.addReg(ADDIDef).addMBB(Exiting);
278 } else {
279 // If the block containing DecreaseCTRloop is not a loop latch, we can use
280 // ADDIDef as the value for all other blocks for the PHI. In hardware loop
281 // insertion pass, the block containing DecreaseCTRloop must dominate all
282 // loop latches.
283 for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
284 if (ML->contains(P)) {
285 assert(ML->isLoopLatch(P) &&
286 "Loop's header in-loop predecessor is not loop latch!");
287 PHIMIB.addReg(ADDIDef).addMBB(P);
288 } else
289 assert(P == Preheader &&
290 "CTR loop should not be generated for irreducible loop!");
291 }
292 }
293
294 // Generate the compare in the exiting block.
295 Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
296 auto CMPMIB =
297 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
298 .addReg(ADDIDef)
299 .addImm(0);
300
301 BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
302 Dec->getOperand(0).getReg())
303 .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
304
305 // Remove the pseudo instructions.
306 Start->eraseFromParent();
307 Dec->eraseFromParent();
308 }
309
expandCTRLoops(MachineLoop * ML,MachineInstr * Start,MachineInstr * Dec)310 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
311 MachineInstr *Dec) {
312 bool Is64Bit =
313 Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
314
315 MachineBasicBlock *Preheader = Start->getParent();
316 MachineBasicBlock *Exiting = Dec->getParent();
317
318 (void)Preheader;
319 assert((Preheader && Exiting) &&
320 "Preheader and exiting should exist for CTR loop!");
321
322 assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
323
324 unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
325 unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
326 auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
327 assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
328 "There should be only one user for loop decrement pseudo!");
329
330 unsigned Opcode = 0;
331 switch (BrInstr->getOpcode()) {
332 case PPC::BC:
333 Opcode = BDNZOpcode;
334 (void) ML;
335 assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
336 "Invalid ctr loop!");
337 break;
338 case PPC::BCn:
339 Opcode = BDZOpcode;
340 assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
341 "Invalid ctr loop!");
342 break;
343 default:
344 llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
345 }
346
347 // Generate "bdnz/bdz" in the exiting block just before the terminator.
348 BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
349 .addMBB(BrInstr->getOperand(1).getMBB());
350
351 // Remove the pseudo instructions.
352 BrInstr->eraseFromParent();
353 Dec->eraseFromParent();
354 }
355