xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric /// \file
9*0b57cec5SDimitry Andric /// Insert hardware loop intrinsics into loops which are deemed profitable by
10*0b57cec5SDimitry Andric /// the target, by querying TargetTransformInfo. A hardware loop comprises of
11*0b57cec5SDimitry Andric /// two intrinsics: one, outside the loop, to set the loop iteration count and
12*0b57cec5SDimitry Andric /// another, in the exit block, to decrement the counter. The decremented value
13*0b57cec5SDimitry Andric /// can either be carried through the loop via a phi or handled in some opaque
14*0b57cec5SDimitry Andric /// way by the target.
15*0b57cec5SDimitry Andric ///
16*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
17*0b57cec5SDimitry Andric 
18*0b57cec5SDimitry Andric #include "llvm/Pass.h"
19*0b57cec5SDimitry Andric #include "llvm/PassRegistry.h"
20*0b57cec5SDimitry Andric #include "llvm/PassSupport.h"
21*0b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
22*0b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
23*0b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
24*0b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
25*0b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpander.h"
26*0b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
27*0b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h"
28*0b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
29*0b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
30*0b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
31*0b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
32*0b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
33*0b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
34*0b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
35*0b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
36*0b57cec5SDimitry Andric #include "llvm/IR/Value.h"
37*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
38*0b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
39*0b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h"
40*0b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
41*0b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
42*0b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
43*0b57cec5SDimitry Andric 
44*0b57cec5SDimitry Andric #define DEBUG_TYPE "hardware-loops"
45*0b57cec5SDimitry Andric 
46*0b57cec5SDimitry Andric #define HW_LOOPS_NAME "Hardware Loop Insertion"
47*0b57cec5SDimitry Andric 
48*0b57cec5SDimitry Andric using namespace llvm;
49*0b57cec5SDimitry Andric 
50*0b57cec5SDimitry Andric static cl::opt<bool>
51*0b57cec5SDimitry Andric ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
52*0b57cec5SDimitry Andric                    cl::desc("Force hardware loops intrinsics to be inserted"));
53*0b57cec5SDimitry Andric 
54*0b57cec5SDimitry Andric static cl::opt<bool>
55*0b57cec5SDimitry Andric ForceHardwareLoopPHI(
56*0b57cec5SDimitry Andric   "force-hardware-loop-phi", cl::Hidden, cl::init(false),
57*0b57cec5SDimitry Andric   cl::desc("Force hardware loop counter to be updated through a phi"));
58*0b57cec5SDimitry Andric 
59*0b57cec5SDimitry Andric static cl::opt<bool>
60*0b57cec5SDimitry Andric ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
61*0b57cec5SDimitry Andric                 cl::desc("Force allowance of nested hardware loops"));
62*0b57cec5SDimitry Andric 
63*0b57cec5SDimitry Andric static cl::opt<unsigned>
64*0b57cec5SDimitry Andric LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
65*0b57cec5SDimitry Andric             cl::desc("Set the loop decrement value"));
66*0b57cec5SDimitry Andric 
67*0b57cec5SDimitry Andric static cl::opt<unsigned>
68*0b57cec5SDimitry Andric CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
69*0b57cec5SDimitry Andric                 cl::desc("Set the loop counter bitwidth"));
70*0b57cec5SDimitry Andric 
71*0b57cec5SDimitry Andric static cl::opt<bool>
72*0b57cec5SDimitry Andric ForceGuardLoopEntry(
73*0b57cec5SDimitry Andric   "force-hardware-loop-guard", cl::Hidden, cl::init(false),
74*0b57cec5SDimitry Andric   cl::desc("Force generation of loop guard intrinsic"));
75*0b57cec5SDimitry Andric 
76*0b57cec5SDimitry Andric STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
77*0b57cec5SDimitry Andric 
78*0b57cec5SDimitry Andric namespace {
79*0b57cec5SDimitry Andric 
80*0b57cec5SDimitry Andric   using TTI = TargetTransformInfo;
81*0b57cec5SDimitry Andric 
82*0b57cec5SDimitry Andric   class HardwareLoops : public FunctionPass {
83*0b57cec5SDimitry Andric   public:
84*0b57cec5SDimitry Andric     static char ID;
85*0b57cec5SDimitry Andric 
86*0b57cec5SDimitry Andric     HardwareLoops() : FunctionPass(ID) {
87*0b57cec5SDimitry Andric       initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
88*0b57cec5SDimitry Andric     }
89*0b57cec5SDimitry Andric 
90*0b57cec5SDimitry Andric     bool runOnFunction(Function &F) override;
91*0b57cec5SDimitry Andric 
92*0b57cec5SDimitry Andric     void getAnalysisUsage(AnalysisUsage &AU) const override {
93*0b57cec5SDimitry Andric       AU.addRequired<LoopInfoWrapperPass>();
94*0b57cec5SDimitry Andric       AU.addPreserved<LoopInfoWrapperPass>();
95*0b57cec5SDimitry Andric       AU.addRequired<DominatorTreeWrapperPass>();
96*0b57cec5SDimitry Andric       AU.addPreserved<DominatorTreeWrapperPass>();
97*0b57cec5SDimitry Andric       AU.addRequired<ScalarEvolutionWrapperPass>();
98*0b57cec5SDimitry Andric       AU.addRequired<AssumptionCacheTracker>();
99*0b57cec5SDimitry Andric       AU.addRequired<TargetTransformInfoWrapperPass>();
100*0b57cec5SDimitry Andric     }
101*0b57cec5SDimitry Andric 
102*0b57cec5SDimitry Andric     // Try to convert the given Loop into a hardware loop.
103*0b57cec5SDimitry Andric     bool TryConvertLoop(Loop *L);
104*0b57cec5SDimitry Andric 
105*0b57cec5SDimitry Andric     // Given that the target believes the loop to be profitable, try to
106*0b57cec5SDimitry Andric     // convert it.
107*0b57cec5SDimitry Andric     bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
108*0b57cec5SDimitry Andric 
109*0b57cec5SDimitry Andric   private:
110*0b57cec5SDimitry Andric     ScalarEvolution *SE = nullptr;
111*0b57cec5SDimitry Andric     LoopInfo *LI = nullptr;
112*0b57cec5SDimitry Andric     const DataLayout *DL = nullptr;
113*0b57cec5SDimitry Andric     const TargetTransformInfo *TTI = nullptr;
114*0b57cec5SDimitry Andric     DominatorTree *DT = nullptr;
115*0b57cec5SDimitry Andric     bool PreserveLCSSA = false;
116*0b57cec5SDimitry Andric     AssumptionCache *AC = nullptr;
117*0b57cec5SDimitry Andric     TargetLibraryInfo *LibInfo = nullptr;
118*0b57cec5SDimitry Andric     Module *M = nullptr;
119*0b57cec5SDimitry Andric     bool MadeChange = false;
120*0b57cec5SDimitry Andric   };
121*0b57cec5SDimitry Andric 
122*0b57cec5SDimitry Andric   class HardwareLoop {
123*0b57cec5SDimitry Andric     // Expand the trip count scev into a value that we can use.
124*0b57cec5SDimitry Andric     Value *InitLoopCount();
125*0b57cec5SDimitry Andric 
126*0b57cec5SDimitry Andric     // Insert the set_loop_iteration intrinsic.
127*0b57cec5SDimitry Andric     void InsertIterationSetup(Value *LoopCountInit);
128*0b57cec5SDimitry Andric 
129*0b57cec5SDimitry Andric     // Insert the loop_decrement intrinsic.
130*0b57cec5SDimitry Andric     void InsertLoopDec();
131*0b57cec5SDimitry Andric 
132*0b57cec5SDimitry Andric     // Insert the loop_decrement_reg intrinsic.
133*0b57cec5SDimitry Andric     Instruction *InsertLoopRegDec(Value *EltsRem);
134*0b57cec5SDimitry Andric 
135*0b57cec5SDimitry Andric     // If the target requires the counter value to be updated in the loop,
136*0b57cec5SDimitry Andric     // insert a phi to hold the value. The intended purpose is for use by
137*0b57cec5SDimitry Andric     // loop_decrement_reg.
138*0b57cec5SDimitry Andric     PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
139*0b57cec5SDimitry Andric 
140*0b57cec5SDimitry Andric     // Create a new cmp, that checks the returned value of loop_decrement*,
141*0b57cec5SDimitry Andric     // and update the exit branch to use it.
142*0b57cec5SDimitry Andric     void UpdateBranch(Value *EltsRem);
143*0b57cec5SDimitry Andric 
144*0b57cec5SDimitry Andric   public:
145*0b57cec5SDimitry Andric     HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
146*0b57cec5SDimitry Andric                  const DataLayout &DL) :
147*0b57cec5SDimitry Andric       SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()),
148*0b57cec5SDimitry Andric       ExitCount(Info.ExitCount),
149*0b57cec5SDimitry Andric       CountType(Info.CountType),
150*0b57cec5SDimitry Andric       ExitBranch(Info.ExitBranch),
151*0b57cec5SDimitry Andric       LoopDecrement(Info.LoopDecrement),
152*0b57cec5SDimitry Andric       UsePHICounter(Info.CounterInReg),
153*0b57cec5SDimitry Andric       UseLoopGuard(Info.PerformEntryTest) { }
154*0b57cec5SDimitry Andric 
155*0b57cec5SDimitry Andric     void Create();
156*0b57cec5SDimitry Andric 
157*0b57cec5SDimitry Andric   private:
158*0b57cec5SDimitry Andric     ScalarEvolution &SE;
159*0b57cec5SDimitry Andric     const DataLayout &DL;
160*0b57cec5SDimitry Andric     Loop *L                 = nullptr;
161*0b57cec5SDimitry Andric     Module *M               = nullptr;
162*0b57cec5SDimitry Andric     const SCEV *ExitCount   = nullptr;
163*0b57cec5SDimitry Andric     Type *CountType         = nullptr;
164*0b57cec5SDimitry Andric     BranchInst *ExitBranch  = nullptr;
165*0b57cec5SDimitry Andric     Value *LoopDecrement    = nullptr;
166*0b57cec5SDimitry Andric     bool UsePHICounter      = false;
167*0b57cec5SDimitry Andric     bool UseLoopGuard       = false;
168*0b57cec5SDimitry Andric     BasicBlock *BeginBB     = nullptr;
169*0b57cec5SDimitry Andric   };
170*0b57cec5SDimitry Andric }
171*0b57cec5SDimitry Andric 
172*0b57cec5SDimitry Andric char HardwareLoops::ID = 0;
173*0b57cec5SDimitry Andric 
174*0b57cec5SDimitry Andric bool HardwareLoops::runOnFunction(Function &F) {
175*0b57cec5SDimitry Andric   if (skipFunction(F))
176*0b57cec5SDimitry Andric     return false;
177*0b57cec5SDimitry Andric 
178*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
179*0b57cec5SDimitry Andric 
180*0b57cec5SDimitry Andric   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
181*0b57cec5SDimitry Andric   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
182*0b57cec5SDimitry Andric   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
183*0b57cec5SDimitry Andric   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
184*0b57cec5SDimitry Andric   DL = &F.getParent()->getDataLayout();
185*0b57cec5SDimitry Andric   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
186*0b57cec5SDimitry Andric   LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
187*0b57cec5SDimitry Andric   PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
188*0b57cec5SDimitry Andric   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
189*0b57cec5SDimitry Andric   M = F.getParent();
190*0b57cec5SDimitry Andric 
191*0b57cec5SDimitry Andric   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
192*0b57cec5SDimitry Andric     Loop *L = *I;
193*0b57cec5SDimitry Andric     if (!L->getParentLoop())
194*0b57cec5SDimitry Andric       TryConvertLoop(L);
195*0b57cec5SDimitry Andric   }
196*0b57cec5SDimitry Andric 
197*0b57cec5SDimitry Andric   return MadeChange;
198*0b57cec5SDimitry Andric }
199*0b57cec5SDimitry Andric 
200*0b57cec5SDimitry Andric // Return true if the search should stop, which will be when an inner loop is
201*0b57cec5SDimitry Andric // converted and the parent loop doesn't support containing a hardware loop.
202*0b57cec5SDimitry Andric bool HardwareLoops::TryConvertLoop(Loop *L) {
203*0b57cec5SDimitry Andric   // Process nested loops first.
204*0b57cec5SDimitry Andric   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
205*0b57cec5SDimitry Andric     if (TryConvertLoop(*I))
206*0b57cec5SDimitry Andric       return true; // Stop search.
207*0b57cec5SDimitry Andric 
208*0b57cec5SDimitry Andric   HardwareLoopInfo HWLoopInfo(L);
209*0b57cec5SDimitry Andric   if (!HWLoopInfo.canAnalyze(*LI))
210*0b57cec5SDimitry Andric     return false;
211*0b57cec5SDimitry Andric 
212*0b57cec5SDimitry Andric   if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) ||
213*0b57cec5SDimitry Andric       ForceHardwareLoops) {
214*0b57cec5SDimitry Andric 
215*0b57cec5SDimitry Andric     // Allow overriding of the counter width and loop decrement value.
216*0b57cec5SDimitry Andric     if (CounterBitWidth.getNumOccurrences())
217*0b57cec5SDimitry Andric       HWLoopInfo.CountType =
218*0b57cec5SDimitry Andric         IntegerType::get(M->getContext(), CounterBitWidth);
219*0b57cec5SDimitry Andric 
220*0b57cec5SDimitry Andric     if (LoopDecrement.getNumOccurrences())
221*0b57cec5SDimitry Andric       HWLoopInfo.LoopDecrement =
222*0b57cec5SDimitry Andric         ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
223*0b57cec5SDimitry Andric 
224*0b57cec5SDimitry Andric     MadeChange |= TryConvertLoop(HWLoopInfo);
225*0b57cec5SDimitry Andric     return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
226*0b57cec5SDimitry Andric   }
227*0b57cec5SDimitry Andric 
228*0b57cec5SDimitry Andric   return false;
229*0b57cec5SDimitry Andric }
230*0b57cec5SDimitry Andric 
231*0b57cec5SDimitry Andric bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
232*0b57cec5SDimitry Andric 
233*0b57cec5SDimitry Andric   Loop *L = HWLoopInfo.L;
234*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
235*0b57cec5SDimitry Andric 
236*0b57cec5SDimitry Andric   if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
237*0b57cec5SDimitry Andric                                           ForceHardwareLoopPHI))
238*0b57cec5SDimitry Andric     return false;
239*0b57cec5SDimitry Andric 
240*0b57cec5SDimitry Andric   assert(
241*0b57cec5SDimitry Andric       (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
242*0b57cec5SDimitry Andric       "Hardware Loop must have set exit info.");
243*0b57cec5SDimitry Andric 
244*0b57cec5SDimitry Andric   BasicBlock *Preheader = L->getLoopPreheader();
245*0b57cec5SDimitry Andric 
246*0b57cec5SDimitry Andric   // If we don't have a preheader, then insert one.
247*0b57cec5SDimitry Andric   if (!Preheader)
248*0b57cec5SDimitry Andric     Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
249*0b57cec5SDimitry Andric   if (!Preheader)
250*0b57cec5SDimitry Andric     return false;
251*0b57cec5SDimitry Andric 
252*0b57cec5SDimitry Andric   HardwareLoop HWLoop(HWLoopInfo, *SE, *DL);
253*0b57cec5SDimitry Andric   HWLoop.Create();
254*0b57cec5SDimitry Andric   ++NumHWLoops;
255*0b57cec5SDimitry Andric   return true;
256*0b57cec5SDimitry Andric }
257*0b57cec5SDimitry Andric 
258*0b57cec5SDimitry Andric void HardwareLoop::Create() {
259*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
260*0b57cec5SDimitry Andric 
261*0b57cec5SDimitry Andric   Value *LoopCountInit = InitLoopCount();
262*0b57cec5SDimitry Andric   if (!LoopCountInit)
263*0b57cec5SDimitry Andric     return;
264*0b57cec5SDimitry Andric 
265*0b57cec5SDimitry Andric   InsertIterationSetup(LoopCountInit);
266*0b57cec5SDimitry Andric 
267*0b57cec5SDimitry Andric   if (UsePHICounter || ForceHardwareLoopPHI) {
268*0b57cec5SDimitry Andric     Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
269*0b57cec5SDimitry Andric     Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec);
270*0b57cec5SDimitry Andric     LoopDec->setOperand(0, EltsRem);
271*0b57cec5SDimitry Andric     UpdateBranch(LoopDec);
272*0b57cec5SDimitry Andric   } else
273*0b57cec5SDimitry Andric     InsertLoopDec();
274*0b57cec5SDimitry Andric 
275*0b57cec5SDimitry Andric   // Run through the basic blocks of the loop and see if any of them have dead
276*0b57cec5SDimitry Andric   // PHIs that can be removed.
277*0b57cec5SDimitry Andric   for (auto I : L->blocks())
278*0b57cec5SDimitry Andric     DeleteDeadPHIs(I);
279*0b57cec5SDimitry Andric }
280*0b57cec5SDimitry Andric 
281*0b57cec5SDimitry Andric static bool CanGenerateTest(Loop *L, Value *Count) {
282*0b57cec5SDimitry Andric   BasicBlock *Preheader = L->getLoopPreheader();
283*0b57cec5SDimitry Andric   if (!Preheader->getSinglePredecessor())
284*0b57cec5SDimitry Andric     return false;
285*0b57cec5SDimitry Andric 
286*0b57cec5SDimitry Andric   BasicBlock *Pred = Preheader->getSinglePredecessor();
287*0b57cec5SDimitry Andric   if (!isa<BranchInst>(Pred->getTerminator()))
288*0b57cec5SDimitry Andric     return false;
289*0b57cec5SDimitry Andric 
290*0b57cec5SDimitry Andric   auto *BI = cast<BranchInst>(Pred->getTerminator());
291*0b57cec5SDimitry Andric   if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
292*0b57cec5SDimitry Andric     return false;
293*0b57cec5SDimitry Andric 
294*0b57cec5SDimitry Andric   // Check that the icmp is checking for equality of Count and zero and that
295*0b57cec5SDimitry Andric   // a non-zero value results in entering the loop.
296*0b57cec5SDimitry Andric   auto ICmp = cast<ICmpInst>(BI->getCondition());
297*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
298*0b57cec5SDimitry Andric   if (!ICmp->isEquality())
299*0b57cec5SDimitry Andric     return false;
300*0b57cec5SDimitry Andric 
301*0b57cec5SDimitry Andric   auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
302*0b57cec5SDimitry Andric     if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
303*0b57cec5SDimitry Andric       return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
304*0b57cec5SDimitry Andric     return false;
305*0b57cec5SDimitry Andric   };
306*0b57cec5SDimitry Andric 
307*0b57cec5SDimitry Andric   if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
308*0b57cec5SDimitry Andric     return false;
309*0b57cec5SDimitry Andric 
310*0b57cec5SDimitry Andric   unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
311*0b57cec5SDimitry Andric   if (BI->getSuccessor(SuccIdx) != Preheader)
312*0b57cec5SDimitry Andric     return false;
313*0b57cec5SDimitry Andric 
314*0b57cec5SDimitry Andric   return true;
315*0b57cec5SDimitry Andric }
316*0b57cec5SDimitry Andric 
317*0b57cec5SDimitry Andric Value *HardwareLoop::InitLoopCount() {
318*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
319*0b57cec5SDimitry Andric   // Can we replace a conditional branch with an intrinsic that sets the
320*0b57cec5SDimitry Andric   // loop counter and tests that is not zero?
321*0b57cec5SDimitry Andric 
322*0b57cec5SDimitry Andric   SCEVExpander SCEVE(SE, DL, "loopcnt");
323*0b57cec5SDimitry Andric   if (!ExitCount->getType()->isPointerTy() &&
324*0b57cec5SDimitry Andric       ExitCount->getType() != CountType)
325*0b57cec5SDimitry Andric     ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
326*0b57cec5SDimitry Andric 
327*0b57cec5SDimitry Andric   ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
328*0b57cec5SDimitry Andric 
329*0b57cec5SDimitry Andric   // If we're trying to use the 'test and set' form of the intrinsic, we need
330*0b57cec5SDimitry Andric   // to replace a conditional branch that is controlling entry to the loop. It
331*0b57cec5SDimitry Andric   // is likely (guaranteed?) that the preheader has an unconditional branch to
332*0b57cec5SDimitry Andric   // the loop header, so also check if it has a single predecessor.
333*0b57cec5SDimitry Andric   if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
334*0b57cec5SDimitry Andric                                   SE.getZero(ExitCount->getType()))) {
335*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
336*0b57cec5SDimitry Andric     UseLoopGuard |= ForceGuardLoopEntry;
337*0b57cec5SDimitry Andric   } else
338*0b57cec5SDimitry Andric     UseLoopGuard = false;
339*0b57cec5SDimitry Andric 
340*0b57cec5SDimitry Andric   BasicBlock *BB = L->getLoopPreheader();
341*0b57cec5SDimitry Andric   if (UseLoopGuard && BB->getSinglePredecessor() &&
342*0b57cec5SDimitry Andric       cast<BranchInst>(BB->getTerminator())->isUnconditional())
343*0b57cec5SDimitry Andric     BB = BB->getSinglePredecessor();
344*0b57cec5SDimitry Andric 
345*0b57cec5SDimitry Andric   if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
346*0b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
347*0b57cec5SDimitry Andric                << *ExitCount << "\n");
348*0b57cec5SDimitry Andric     return nullptr;
349*0b57cec5SDimitry Andric   }
350*0b57cec5SDimitry Andric 
351*0b57cec5SDimitry Andric   Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
352*0b57cec5SDimitry Andric                                      BB->getTerminator());
353*0b57cec5SDimitry Andric 
354*0b57cec5SDimitry Andric   // FIXME: We've expanded Count where we hope to insert the counter setting
355*0b57cec5SDimitry Andric   // intrinsic. But, in the case of the 'test and set' form, we may fallback to
356*0b57cec5SDimitry Andric   // the just 'set' form and in which case the insertion block is most likely
357*0b57cec5SDimitry Andric   // different. It means there will be instruction(s) in a block that possibly
358*0b57cec5SDimitry Andric   // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
359*0b57cec5SDimitry Andric   // but it's doesn't appear to work in all cases.
360*0b57cec5SDimitry Andric 
361*0b57cec5SDimitry Andric   UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
362*0b57cec5SDimitry Andric   BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
363*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
364*0b57cec5SDimitry Andric              << " - Expanded Count in " << BB->getName() << "\n"
365*0b57cec5SDimitry Andric              << " - Will insert set counter intrinsic into: "
366*0b57cec5SDimitry Andric              << BeginBB->getName() << "\n");
367*0b57cec5SDimitry Andric   return Count;
368*0b57cec5SDimitry Andric }
369*0b57cec5SDimitry Andric 
370*0b57cec5SDimitry Andric void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
371*0b57cec5SDimitry Andric   IRBuilder<> Builder(BeginBB->getTerminator());
372*0b57cec5SDimitry Andric   Type *Ty = LoopCountInit->getType();
373*0b57cec5SDimitry Andric   Intrinsic::ID ID = UseLoopGuard ?
374*0b57cec5SDimitry Andric     Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations;
375*0b57cec5SDimitry Andric   Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
376*0b57cec5SDimitry Andric   Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit);
377*0b57cec5SDimitry Andric 
378*0b57cec5SDimitry Andric   // Use the return value of the intrinsic to control the entry of the loop.
379*0b57cec5SDimitry Andric   if (UseLoopGuard) {
380*0b57cec5SDimitry Andric     assert((isa<BranchInst>(BeginBB->getTerminator()) &&
381*0b57cec5SDimitry Andric             cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
382*0b57cec5SDimitry Andric            "Expected conditional branch");
383*0b57cec5SDimitry Andric     auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
384*0b57cec5SDimitry Andric     LoopGuard->setCondition(SetCount);
385*0b57cec5SDimitry Andric     if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
386*0b57cec5SDimitry Andric       LoopGuard->swapSuccessors();
387*0b57cec5SDimitry Andric   }
388*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: "
389*0b57cec5SDimitry Andric              << *SetCount << "\n");
390*0b57cec5SDimitry Andric }
391*0b57cec5SDimitry Andric 
392*0b57cec5SDimitry Andric void HardwareLoop::InsertLoopDec() {
393*0b57cec5SDimitry Andric   IRBuilder<> CondBuilder(ExitBranch);
394*0b57cec5SDimitry Andric 
395*0b57cec5SDimitry Andric   Function *DecFunc =
396*0b57cec5SDimitry Andric     Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
397*0b57cec5SDimitry Andric                               LoopDecrement->getType());
398*0b57cec5SDimitry Andric   Value *Ops[] = { LoopDecrement };
399*0b57cec5SDimitry Andric   Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
400*0b57cec5SDimitry Andric   Value *OldCond = ExitBranch->getCondition();
401*0b57cec5SDimitry Andric   ExitBranch->setCondition(NewCond);
402*0b57cec5SDimitry Andric 
403*0b57cec5SDimitry Andric   // The false branch must exit the loop.
404*0b57cec5SDimitry Andric   if (!L->contains(ExitBranch->getSuccessor(0)))
405*0b57cec5SDimitry Andric     ExitBranch->swapSuccessors();
406*0b57cec5SDimitry Andric 
407*0b57cec5SDimitry Andric   // The old condition may be dead now, and may have even created a dead PHI
408*0b57cec5SDimitry Andric   // (the original induction variable).
409*0b57cec5SDimitry Andric   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
410*0b57cec5SDimitry Andric 
411*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
412*0b57cec5SDimitry Andric }
413*0b57cec5SDimitry Andric 
414*0b57cec5SDimitry Andric Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
415*0b57cec5SDimitry Andric   IRBuilder<> CondBuilder(ExitBranch);
416*0b57cec5SDimitry Andric 
417*0b57cec5SDimitry Andric   Function *DecFunc =
418*0b57cec5SDimitry Andric       Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
419*0b57cec5SDimitry Andric                                 { EltsRem->getType(), EltsRem->getType(),
420*0b57cec5SDimitry Andric                                   LoopDecrement->getType()
421*0b57cec5SDimitry Andric                                 });
422*0b57cec5SDimitry Andric   Value *Ops[] = { EltsRem, LoopDecrement };
423*0b57cec5SDimitry Andric   Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
424*0b57cec5SDimitry Andric 
425*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
426*0b57cec5SDimitry Andric   return cast<Instruction>(Call);
427*0b57cec5SDimitry Andric }
428*0b57cec5SDimitry Andric 
429*0b57cec5SDimitry Andric PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
430*0b57cec5SDimitry Andric   BasicBlock *Preheader = L->getLoopPreheader();
431*0b57cec5SDimitry Andric   BasicBlock *Header = L->getHeader();
432*0b57cec5SDimitry Andric   BasicBlock *Latch = ExitBranch->getParent();
433*0b57cec5SDimitry Andric   IRBuilder<> Builder(Header->getFirstNonPHI());
434*0b57cec5SDimitry Andric   PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
435*0b57cec5SDimitry Andric   Index->addIncoming(NumElts, Preheader);
436*0b57cec5SDimitry Andric   Index->addIncoming(EltsRem, Latch);
437*0b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
438*0b57cec5SDimitry Andric   return Index;
439*0b57cec5SDimitry Andric }
440*0b57cec5SDimitry Andric 
441*0b57cec5SDimitry Andric void HardwareLoop::UpdateBranch(Value *EltsRem) {
442*0b57cec5SDimitry Andric   IRBuilder<> CondBuilder(ExitBranch);
443*0b57cec5SDimitry Andric   Value *NewCond =
444*0b57cec5SDimitry Andric     CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
445*0b57cec5SDimitry Andric   Value *OldCond = ExitBranch->getCondition();
446*0b57cec5SDimitry Andric   ExitBranch->setCondition(NewCond);
447*0b57cec5SDimitry Andric 
448*0b57cec5SDimitry Andric   // The false branch must exit the loop.
449*0b57cec5SDimitry Andric   if (!L->contains(ExitBranch->getSuccessor(0)))
450*0b57cec5SDimitry Andric     ExitBranch->swapSuccessors();
451*0b57cec5SDimitry Andric 
452*0b57cec5SDimitry Andric   // The old condition may be dead now, and may have even created a dead PHI
453*0b57cec5SDimitry Andric   // (the original induction variable).
454*0b57cec5SDimitry Andric   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
455*0b57cec5SDimitry Andric }
456*0b57cec5SDimitry Andric 
457*0b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
458*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
459*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
460*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
461*0b57cec5SDimitry Andric INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
462*0b57cec5SDimitry Andric 
463*0b57cec5SDimitry Andric FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
464