xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp (revision 9dba64be9536c28e4800e06512b7f29b43ade345)
1 //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// Finalize v8.1-m low-overhead loops by converting the associated pseudo
10 /// instructions into machine operations.
11 /// The expectation is that the loop contains three pseudo instructions:
12 /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop
13 ///   form should be in the preheader, whereas the while form should be in the
14 ///   preheaders only predecessor.
15 /// - t2LoopDec - placed within in the loop body.
16 /// - t2LoopEnd - the loop latch terminator.
17 ///
18 //===----------------------------------------------------------------------===//
19 
20 #include "ARM.h"
21 #include "ARMBaseInstrInfo.h"
22 #include "ARMBaseRegisterInfo.h"
23 #include "ARMBasicBlockInfo.h"
24 #include "ARMSubtarget.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineLoopInfo.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "arm-low-overhead-loops"
32 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
33 
34 namespace {
35 
36   class ARMLowOverheadLoops : public MachineFunctionPass {
37     MachineFunction           *MF = nullptr;
38     const ARMBaseInstrInfo    *TII = nullptr;
39     MachineRegisterInfo       *MRI = nullptr;
40     std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
41 
42   public:
43     static char ID;
44 
45     ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
46 
47     void getAnalysisUsage(AnalysisUsage &AU) const override {
48       AU.setPreservesCFG();
49       AU.addRequired<MachineLoopInfo>();
50       MachineFunctionPass::getAnalysisUsage(AU);
51     }
52 
53     bool runOnMachineFunction(MachineFunction &MF) override;
54 
55     MachineFunctionProperties getRequiredProperties() const override {
56       return MachineFunctionProperties().set(
57           MachineFunctionProperties::Property::NoVRegs);
58     }
59 
60     StringRef getPassName() const override {
61       return ARM_LOW_OVERHEAD_LOOPS_NAME;
62     }
63 
64   private:
65     bool ProcessLoop(MachineLoop *ML);
66 
67     MachineInstr * IsSafeToDefineLR(MachineInstr *MI);
68 
69     bool RevertNonLoops();
70 
71     void RevertWhile(MachineInstr *MI) const;
72 
73     bool RevertLoopDec(MachineInstr *MI, bool AllowFlags = false) const;
74 
75     void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const;
76 
77     void Expand(MachineLoop *ML, MachineInstr *Start,
78                 MachineInstr *InsertPt, MachineInstr *Dec,
79                 MachineInstr *End, bool Revert);
80 
81   };
82 }
83 
84 char ARMLowOverheadLoops::ID = 0;
85 
86 INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
87                 false, false)
88 
89 bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
90   const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(mf.getSubtarget());
91   if (!ST.hasLOB())
92     return false;
93 
94   MF = &mf;
95   LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n");
96 
97   auto &MLI = getAnalysis<MachineLoopInfo>();
98   MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
99   MRI = &MF->getRegInfo();
100   TII = static_cast<const ARMBaseInstrInfo*>(ST.getInstrInfo());
101   BBUtils = std::unique_ptr<ARMBasicBlockUtils>(new ARMBasicBlockUtils(*MF));
102   BBUtils->computeAllBlockSizes();
103   BBUtils->adjustBBOffsetsAfter(&MF->front());
104 
105   bool Changed = false;
106   for (auto ML : MLI) {
107     if (!ML->getParentLoop())
108       Changed |= ProcessLoop(ML);
109   }
110   Changed |= RevertNonLoops();
111   return Changed;
112 }
113 
114 static bool IsLoopStart(MachineInstr &MI) {
115   return MI.getOpcode() == ARM::t2DoLoopStart ||
116          MI.getOpcode() == ARM::t2WhileLoopStart;
117 }
118 
119 template<typename T>
120 static MachineInstr* SearchForDef(MachineInstr *Begin, T End, unsigned Reg) {
121   for(auto &MI : make_range(T(Begin), End)) {
122     for (auto &MO : MI.operands()) {
123       if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
124         continue;
125       return &MI;
126     }
127   }
128   return nullptr;
129 }
130 
131 static MachineInstr* SearchForUse(MachineInstr *Begin,
132                                   MachineBasicBlock::iterator End,
133                                   unsigned Reg) {
134   for(auto &MI : make_range(MachineBasicBlock::iterator(Begin), End)) {
135     for (auto &MO : MI.operands()) {
136       if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
137         continue;
138       return &MI;
139     }
140   }
141   return nullptr;
142 }
143 
144 // Is it safe to define LR with DLS/WLS?
145 // LR can defined if it is the operand to start, because it's the same value,
146 // or if it's going to be equivalent to the operand to Start.
147 MachineInstr *ARMLowOverheadLoops::IsSafeToDefineLR(MachineInstr *Start) {
148 
149   auto IsMoveLR = [](MachineInstr *MI, unsigned Reg) {
150     return MI->getOpcode() == ARM::tMOVr &&
151            MI->getOperand(0).getReg() == ARM::LR &&
152            MI->getOperand(1).getReg() == Reg &&
153            MI->getOperand(2).getImm() == ARMCC::AL;
154    };
155 
156   MachineBasicBlock *MBB = Start->getParent();
157   unsigned CountReg = Start->getOperand(0).getReg();
158   // Walk forward and backward in the block to find the closest instructions
159   // that define LR. Then also filter them out if they're not a mov lr.
160   MachineInstr *PredLRDef = SearchForDef(Start, MBB->rend(), ARM::LR);
161   if (PredLRDef && !IsMoveLR(PredLRDef, CountReg))
162     PredLRDef = nullptr;
163 
164   MachineInstr *SuccLRDef = SearchForDef(Start, MBB->end(), ARM::LR);
165   if (SuccLRDef && !IsMoveLR(SuccLRDef, CountReg))
166     SuccLRDef = nullptr;
167 
168   // We've either found one, two or none mov lr instructions... Now figure out
169   // if they are performing the equilvant mov that the Start instruction will.
170   // Do this by scanning forward and backward to see if there's a def of the
171   // register holding the count value. If we find a suitable def, return it as
172   // the insert point. Later, if InsertPt != Start, then we can remove the
173   // redundant instruction.
174   if (SuccLRDef) {
175     MachineBasicBlock::iterator End(SuccLRDef);
176     if (!SearchForDef(Start, End, CountReg)) {
177       return SuccLRDef;
178     } else
179       SuccLRDef = nullptr;
180   }
181   if (PredLRDef) {
182     MachineBasicBlock::reverse_iterator End(PredLRDef);
183     if (!SearchForDef(Start, End, CountReg)) {
184       return PredLRDef;
185     } else
186       PredLRDef = nullptr;
187   }
188 
189   // We can define LR because LR already contains the same value.
190   if (Start->getOperand(0).getReg() == ARM::LR)
191     return Start;
192 
193   // We've found no suitable LR def and Start doesn't use LR directly. Can we
194   // just define LR anyway?
195   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
196   LivePhysRegs LiveRegs(*TRI);
197   LiveRegs.addLiveOuts(*MBB);
198 
199   // Not if we've haven't found a suitable mov and LR is live out.
200   if (LiveRegs.contains(ARM::LR))
201     return nullptr;
202 
203   // If LR is not live out, we can insert the instruction if nothing else
204   // uses LR after it.
205   if (!SearchForUse(Start, MBB->end(), ARM::LR))
206     return Start;
207 
208   LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find suitable insertion point for"
209              << " LR\n");
210   return nullptr;
211 }
212 
213 bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
214 
215   bool Changed = false;
216 
217   // Process inner loops first.
218   for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
219     Changed |= ProcessLoop(*I);
220 
221   LLVM_DEBUG(dbgs() << "ARM Loops: Processing " << *ML);
222 
223   // Search the given block for a loop start instruction. If one isn't found,
224   // and there's only one predecessor block, search that one too.
225   std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
226     [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* {
227     for (auto &MI : *MBB) {
228       if (IsLoopStart(MI))
229         return &MI;
230     }
231     if (MBB->pred_size() == 1)
232       return SearchForStart(*MBB->pred_begin());
233     return nullptr;
234   };
235 
236   MachineInstr *Start = nullptr;
237   MachineInstr *Dec = nullptr;
238   MachineInstr *End = nullptr;
239   bool Revert = false;
240 
241   // Search the preheader for the start intrinsic, or look through the
242   // predecessors of the header to find exactly one set.iterations intrinsic.
243   // FIXME: I don't see why we shouldn't be supporting multiple predecessors
244   // with potentially multiple set.loop.iterations, so we need to enable this.
245   if (auto *Preheader = ML->getLoopPreheader()) {
246     Start = SearchForStart(Preheader);
247   } else {
248     LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find loop preheader!\n"
249                << " - Performing manual predecessor search.\n");
250     MachineBasicBlock *Pred = nullptr;
251     for (auto *MBB : ML->getHeader()->predecessors()) {
252       if (!ML->contains(MBB)) {
253         if (Pred) {
254           LLVM_DEBUG(dbgs() << " - Found multiple out-of-loop preds.\n");
255           Start = nullptr;
256           break;
257         }
258         Pred = MBB;
259         Start = SearchForStart(MBB);
260       }
261     }
262   }
263 
264   // Find the low-overhead loop components and decide whether or not to fall
265   // back to a normal loop.
266   for (auto *MBB : reverse(ML->getBlocks())) {
267     for (auto &MI : *MBB) {
268       if (MI.getOpcode() == ARM::t2LoopDec)
269         Dec = &MI;
270       else if (MI.getOpcode() == ARM::t2LoopEnd)
271         End = &MI;
272       else if (IsLoopStart(MI))
273         Start = &MI;
274       else if (MI.getDesc().isCall()) {
275         // TODO: Though the call will require LE to execute again, does this
276         // mean we should revert? Always executing LE hopefully should be
277         // faster than performing a sub,cmp,br or even subs,br.
278         Revert = true;
279         LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n");
280       }
281 
282       if (!Dec || End)
283         continue;
284 
285       // If we find that LR has been written or read between LoopDec and
286       // LoopEnd, expect that the decremented value is being used else where.
287       // Because this value isn't actually going to be produced until the
288       // latch, by LE, we would need to generate a real sub. The value is also
289       // likely to be copied/reloaded for use of LoopEnd - in which in case
290       // we'd need to perform an add because it gets subtracted again by LE!
291       // The other option is to then generate the other form of LE which doesn't
292       // perform the sub.
293       for (auto &MO : MI.operands()) {
294         if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() &&
295             MO.getReg() == ARM::LR) {
296           LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI);
297           Revert = true;
298           break;
299         }
300       }
301     }
302 
303     if (Dec && End && Revert)
304       break;
305   }
306 
307   LLVM_DEBUG(if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start;
308              if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec;
309              if (End) dbgs() << "ARM Loops: Found Loop End: " << *End;);
310 
311   if (!Start && !Dec && !End) {
312     LLVM_DEBUG(dbgs() << "ARM Loops: Not a low-overhead loop.\n");
313     return Changed;
314   } else if (!(Start && Dec && End)) {
315     LLVM_DEBUG(dbgs() << "ARM Loops: Failed to find all loop components.\n");
316     return false;
317   }
318 
319   if (!End->getOperand(1).isMBB())
320     report_fatal_error("Expected LoopEnd to target basic block");
321 
322   // TODO Maybe there's cases where the target doesn't have to be the header,
323   // but for now be safe and revert.
324   if (End->getOperand(1).getMBB() != ML->getHeader()) {
325     LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targetting header.\n");
326     Revert = true;
327   }
328 
329   // The WLS and LE instructions have 12-bits for the label offset. WLS
330   // requires a positive offset, while LE uses negative.
331   if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML->getHeader()) ||
332       !BBUtils->isBBInRange(End, ML->getHeader(), 4094)) {
333     LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n");
334     Revert = true;
335   }
336   if (Start->getOpcode() == ARM::t2WhileLoopStart &&
337       (BBUtils->getOffsetOf(Start) >
338        BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) ||
339        !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) {
340     LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n");
341     Revert = true;
342   }
343 
344   MachineInstr *InsertPt = Revert ? nullptr : IsSafeToDefineLR(Start);
345   if (!InsertPt) {
346     LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
347     Revert = true;
348   } else
349     LLVM_DEBUG(dbgs() << "ARM Loops: Start insertion point: " << *InsertPt);
350 
351   Expand(ML, Start, InsertPt, Dec, End, Revert);
352   return true;
353 }
354 
355 // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a
356 // beq that branches to the exit branch.
357 // TODO: We could also try to generate a cbz if the value in LR is also in
358 // another low register.
359 void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const {
360   LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI);
361   MachineBasicBlock *MBB = MI->getParent();
362   MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
363                                     TII->get(ARM::t2CMPri));
364   MIB.add(MI->getOperand(0));
365   MIB.addImm(0);
366   MIB.addImm(ARMCC::AL);
367   MIB.addReg(ARM::NoRegister);
368 
369   MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
370   unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
371     ARM::tBcc : ARM::t2Bcc;
372 
373   MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
374   MIB.add(MI->getOperand(1));   // branch target
375   MIB.addImm(ARMCC::EQ);        // condition code
376   MIB.addReg(ARM::CPSR);
377   MI->eraseFromParent();
378 }
379 
380 bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI,
381                                         bool AllowFlags) const {
382   LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI);
383   MachineBasicBlock *MBB = MI->getParent();
384 
385   // If nothing uses or defines CPSR between LoopDec and LoopEnd, use a t2SUBS.
386   bool SetFlags = false;
387   if (AllowFlags) {
388     if (auto *Def = SearchForDef(MI, MBB->end(), ARM::CPSR)) {
389       if (!SearchForUse(MI, MBB->end(), ARM::CPSR) &&
390           Def->getOpcode() == ARM::t2LoopEnd)
391         SetFlags = true;
392     }
393   }
394 
395   MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
396                                     TII->get(ARM::t2SUBri));
397   MIB.addDef(ARM::LR);
398   MIB.add(MI->getOperand(1));
399   MIB.add(MI->getOperand(2));
400   MIB.addImm(ARMCC::AL);
401   MIB.addReg(0);
402 
403   if (SetFlags) {
404     MIB.addReg(ARM::CPSR);
405     MIB->getOperand(5).setIsDef(true);
406   } else
407     MIB.addReg(0);
408 
409   MI->eraseFromParent();
410   return SetFlags;
411 }
412 
413 // Generate a subs, or sub and cmp, and a branch instead of an LE.
414 void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const {
415   LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI);
416 
417   MachineBasicBlock *MBB = MI->getParent();
418   // Create cmp
419   if (!SkipCmp) {
420     MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
421                                       TII->get(ARM::t2CMPri));
422     MIB.addReg(ARM::LR);
423     MIB.addImm(0);
424     MIB.addImm(ARMCC::AL);
425     MIB.addReg(ARM::NoRegister);
426   }
427 
428   MachineBasicBlock *DestBB = MI->getOperand(1).getMBB();
429   unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ?
430     ARM::tBcc : ARM::t2Bcc;
431 
432   // Create bne
433   MachineInstrBuilder MIB =
434     BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc));
435   MIB.add(MI->getOperand(1));   // branch target
436   MIB.addImm(ARMCC::NE);        // condition code
437   MIB.addReg(ARM::CPSR);
438   MI->eraseFromParent();
439 }
440 
441 void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start,
442                                  MachineInstr *InsertPt,
443                                  MachineInstr *Dec, MachineInstr *End,
444                                  bool Revert) {
445 
446   auto ExpandLoopStart = [this](MachineLoop *ML, MachineInstr *Start,
447                                 MachineInstr *InsertPt) {
448     MachineBasicBlock *MBB = InsertPt->getParent();
449     unsigned Opc = Start->getOpcode() == ARM::t2DoLoopStart ?
450       ARM::t2DLS : ARM::t2WLS;
451     MachineInstrBuilder MIB =
452       BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc));
453 
454     MIB.addDef(ARM::LR);
455     MIB.add(Start->getOperand(0));
456     if (Opc == ARM::t2WLS)
457       MIB.add(Start->getOperand(1));
458 
459     if (InsertPt != Start)
460       InsertPt->eraseFromParent();
461     Start->eraseFromParent();
462     LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
463     return &*MIB;
464   };
465 
466   // Combine the LoopDec and LoopEnd instructions into LE(TP).
467   auto ExpandLoopEnd = [this](MachineLoop *ML, MachineInstr *Dec,
468                               MachineInstr *End) {
469     MachineBasicBlock *MBB = End->getParent();
470     MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(),
471                                       TII->get(ARM::t2LEUpdate));
472     MIB.addDef(ARM::LR);
473     MIB.add(End->getOperand(0));
474     MIB.add(End->getOperand(1));
475     LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB);
476 
477     End->eraseFromParent();
478     Dec->eraseFromParent();
479     return &*MIB;
480   };
481 
482   // TODO: We should be able to automatically remove these branches before we
483   // get here - probably by teaching analyzeBranch about the pseudo
484   // instructions.
485   // If there is an unconditional branch, after I, that just branches to the
486   // next block, remove it.
487   auto RemoveDeadBranch = [](MachineInstr *I) {
488     MachineBasicBlock *BB = I->getParent();
489     MachineInstr *Terminator = &BB->instr_back();
490     if (Terminator->isUnconditionalBranch() && I != Terminator) {
491       MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB();
492       if (BB->isLayoutSuccessor(Succ)) {
493         LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator);
494         Terminator->eraseFromParent();
495       }
496     }
497   };
498 
499   if (Revert) {
500     if (Start->getOpcode() == ARM::t2WhileLoopStart)
501       RevertWhile(Start);
502     else
503       Start->eraseFromParent();
504     bool FlagsAlreadySet = RevertLoopDec(Dec, true);
505     RevertLoopEnd(End, FlagsAlreadySet);
506   } else {
507     Start = ExpandLoopStart(ML, Start, InsertPt);
508     RemoveDeadBranch(Start);
509     End = ExpandLoopEnd(ML, Dec, End);
510     RemoveDeadBranch(End);
511   }
512 }
513 
514 bool ARMLowOverheadLoops::RevertNonLoops() {
515   LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n");
516   bool Changed = false;
517 
518   for (auto &MBB : *MF) {
519     SmallVector<MachineInstr*, 4> Starts;
520     SmallVector<MachineInstr*, 4> Decs;
521     SmallVector<MachineInstr*, 4> Ends;
522 
523     for (auto &I : MBB) {
524       if (IsLoopStart(I))
525         Starts.push_back(&I);
526       else if (I.getOpcode() == ARM::t2LoopDec)
527         Decs.push_back(&I);
528       else if (I.getOpcode() == ARM::t2LoopEnd)
529         Ends.push_back(&I);
530     }
531 
532     if (Starts.empty() && Decs.empty() && Ends.empty())
533       continue;
534 
535     Changed = true;
536 
537     for (auto *Start : Starts) {
538       if (Start->getOpcode() == ARM::t2WhileLoopStart)
539         RevertWhile(Start);
540       else
541         Start->eraseFromParent();
542     }
543     for (auto *Dec : Decs)
544       RevertLoopDec(Dec);
545 
546     for (auto *End : Ends)
547       RevertLoopEnd(End);
548   }
549   return Changed;
550 }
551 
552 FunctionPass *llvm::createARMLowOverheadLoopsPass() {
553   return new ARMLowOverheadLoops();
554 }
555