xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/HardwareLoops.cpp (revision 480093f4440d54b30b3025afeac24b48f2ba7a2e)
10b57cec5SDimitry Andric //===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric /// \file
90b57cec5SDimitry Andric /// Insert hardware loop intrinsics into loops which are deemed profitable by
100b57cec5SDimitry Andric /// the target, by querying TargetTransformInfo. A hardware loop comprises of
110b57cec5SDimitry Andric /// two intrinsics: one, outside the loop, to set the loop iteration count and
120b57cec5SDimitry Andric /// another, in the exit block, to decrement the counter. The decremented value
130b57cec5SDimitry Andric /// can either be carried through the loop via a phi or handled in some opaque
140b57cec5SDimitry Andric /// way by the target.
150b57cec5SDimitry Andric ///
160b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
170b57cec5SDimitry Andric 
180b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
190b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
200b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
21*480093f4SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
230b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpander.h"
240b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
250b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h"
260b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
270b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
28*480093f4SDimitry Andric #include "llvm/IR/Constants.h"
290b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
300b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
310b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
320b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
330b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
340b57cec5SDimitry Andric #include "llvm/IR/Value.h"
35*480093f4SDimitry Andric #include "llvm/InitializePasses.h"
36*480093f4SDimitry Andric #include "llvm/Pass.h"
37*480093f4SDimitry Andric #include "llvm/PassRegistry.h"
38*480093f4SDimitry Andric #include "llvm/PassSupport.h"
39*480093f4SDimitry Andric #include "llvm/Support/CommandLine.h"
400b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
410b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
420b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h"
430b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
440b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
450b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric #define DEBUG_TYPE "hardware-loops"
480b57cec5SDimitry Andric 
490b57cec5SDimitry Andric #define HW_LOOPS_NAME "Hardware Loop Insertion"
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric using namespace llvm;
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric static cl::opt<bool>
540b57cec5SDimitry Andric ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
550b57cec5SDimitry Andric                    cl::desc("Force hardware loops intrinsics to be inserted"));
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric static cl::opt<bool>
580b57cec5SDimitry Andric ForceHardwareLoopPHI(
590b57cec5SDimitry Andric   "force-hardware-loop-phi", cl::Hidden, cl::init(false),
600b57cec5SDimitry Andric   cl::desc("Force hardware loop counter to be updated through a phi"));
610b57cec5SDimitry Andric 
620b57cec5SDimitry Andric static cl::opt<bool>
630b57cec5SDimitry Andric ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
640b57cec5SDimitry Andric                 cl::desc("Force allowance of nested hardware loops"));
650b57cec5SDimitry Andric 
660b57cec5SDimitry Andric static cl::opt<unsigned>
670b57cec5SDimitry Andric LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
680b57cec5SDimitry Andric             cl::desc("Set the loop decrement value"));
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric static cl::opt<unsigned>
710b57cec5SDimitry Andric CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
720b57cec5SDimitry Andric                 cl::desc("Set the loop counter bitwidth"));
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric static cl::opt<bool>
750b57cec5SDimitry Andric ForceGuardLoopEntry(
760b57cec5SDimitry Andric   "force-hardware-loop-guard", cl::Hidden, cl::init(false),
770b57cec5SDimitry Andric   cl::desc("Force generation of loop guard intrinsic"));
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
800b57cec5SDimitry Andric 
81*480093f4SDimitry Andric #ifndef NDEBUG
82*480093f4SDimitry Andric static void debugHWLoopFailure(const StringRef DebugMsg,
83*480093f4SDimitry Andric     Instruction *I) {
84*480093f4SDimitry Andric   dbgs() << "HWLoops: " << DebugMsg;
85*480093f4SDimitry Andric   if (I)
86*480093f4SDimitry Andric     dbgs() << ' ' << *I;
87*480093f4SDimitry Andric   else
88*480093f4SDimitry Andric     dbgs() << '.';
89*480093f4SDimitry Andric   dbgs() << '\n';
90*480093f4SDimitry Andric }
91*480093f4SDimitry Andric #endif
92*480093f4SDimitry Andric 
93*480093f4SDimitry Andric static OptimizationRemarkAnalysis
94*480093f4SDimitry Andric createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
95*480093f4SDimitry Andric   Value *CodeRegion = L->getHeader();
96*480093f4SDimitry Andric   DebugLoc DL = L->getStartLoc();
97*480093f4SDimitry Andric 
98*480093f4SDimitry Andric   if (I) {
99*480093f4SDimitry Andric     CodeRegion = I->getParent();
100*480093f4SDimitry Andric     // If there is no debug location attached to the instruction, revert back to
101*480093f4SDimitry Andric     // using the loop's.
102*480093f4SDimitry Andric     if (I->getDebugLoc())
103*480093f4SDimitry Andric       DL = I->getDebugLoc();
104*480093f4SDimitry Andric   }
105*480093f4SDimitry Andric 
106*480093f4SDimitry Andric   OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
107*480093f4SDimitry Andric   R << "hardware-loop not created: ";
108*480093f4SDimitry Andric   return R;
109*480093f4SDimitry Andric }
110*480093f4SDimitry Andric 
1110b57cec5SDimitry Andric namespace {
1120b57cec5SDimitry Andric 
113*480093f4SDimitry Andric   void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
114*480093f4SDimitry Andric       OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
115*480093f4SDimitry Andric     LLVM_DEBUG(debugHWLoopFailure(Msg, I));
116*480093f4SDimitry Andric     ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
117*480093f4SDimitry Andric   }
118*480093f4SDimitry Andric 
1190b57cec5SDimitry Andric   using TTI = TargetTransformInfo;
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric   class HardwareLoops : public FunctionPass {
1220b57cec5SDimitry Andric   public:
1230b57cec5SDimitry Andric     static char ID;
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric     HardwareLoops() : FunctionPass(ID) {
1260b57cec5SDimitry Andric       initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
1270b57cec5SDimitry Andric     }
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric     bool runOnFunction(Function &F) override;
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric     void getAnalysisUsage(AnalysisUsage &AU) const override {
1320b57cec5SDimitry Andric       AU.addRequired<LoopInfoWrapperPass>();
1330b57cec5SDimitry Andric       AU.addPreserved<LoopInfoWrapperPass>();
1340b57cec5SDimitry Andric       AU.addRequired<DominatorTreeWrapperPass>();
1350b57cec5SDimitry Andric       AU.addPreserved<DominatorTreeWrapperPass>();
1360b57cec5SDimitry Andric       AU.addRequired<ScalarEvolutionWrapperPass>();
1370b57cec5SDimitry Andric       AU.addRequired<AssumptionCacheTracker>();
1380b57cec5SDimitry Andric       AU.addRequired<TargetTransformInfoWrapperPass>();
139*480093f4SDimitry Andric       AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1400b57cec5SDimitry Andric     }
1410b57cec5SDimitry Andric 
1420b57cec5SDimitry Andric     // Try to convert the given Loop into a hardware loop.
1430b57cec5SDimitry Andric     bool TryConvertLoop(Loop *L);
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric     // Given that the target believes the loop to be profitable, try to
1460b57cec5SDimitry Andric     // convert it.
1470b57cec5SDimitry Andric     bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   private:
1500b57cec5SDimitry Andric     ScalarEvolution *SE = nullptr;
1510b57cec5SDimitry Andric     LoopInfo *LI = nullptr;
1520b57cec5SDimitry Andric     const DataLayout *DL = nullptr;
153*480093f4SDimitry Andric     OptimizationRemarkEmitter *ORE = nullptr;
1540b57cec5SDimitry Andric     const TargetTransformInfo *TTI = nullptr;
1550b57cec5SDimitry Andric     DominatorTree *DT = nullptr;
1560b57cec5SDimitry Andric     bool PreserveLCSSA = false;
1570b57cec5SDimitry Andric     AssumptionCache *AC = nullptr;
1580b57cec5SDimitry Andric     TargetLibraryInfo *LibInfo = nullptr;
1590b57cec5SDimitry Andric     Module *M = nullptr;
1600b57cec5SDimitry Andric     bool MadeChange = false;
1610b57cec5SDimitry Andric   };
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric   class HardwareLoop {
1640b57cec5SDimitry Andric     // Expand the trip count scev into a value that we can use.
1650b57cec5SDimitry Andric     Value *InitLoopCount();
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric     // Insert the set_loop_iteration intrinsic.
1680b57cec5SDimitry Andric     void InsertIterationSetup(Value *LoopCountInit);
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric     // Insert the loop_decrement intrinsic.
1710b57cec5SDimitry Andric     void InsertLoopDec();
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric     // Insert the loop_decrement_reg intrinsic.
1740b57cec5SDimitry Andric     Instruction *InsertLoopRegDec(Value *EltsRem);
1750b57cec5SDimitry Andric 
1760b57cec5SDimitry Andric     // If the target requires the counter value to be updated in the loop,
1770b57cec5SDimitry Andric     // insert a phi to hold the value. The intended purpose is for use by
1780b57cec5SDimitry Andric     // loop_decrement_reg.
1790b57cec5SDimitry Andric     PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric     // Create a new cmp, that checks the returned value of loop_decrement*,
1820b57cec5SDimitry Andric     // and update the exit branch to use it.
1830b57cec5SDimitry Andric     void UpdateBranch(Value *EltsRem);
1840b57cec5SDimitry Andric 
1850b57cec5SDimitry Andric   public:
1860b57cec5SDimitry Andric     HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
187*480093f4SDimitry Andric                  const DataLayout &DL,
188*480093f4SDimitry Andric                  OptimizationRemarkEmitter *ORE) :
189*480093f4SDimitry Andric       SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
1900b57cec5SDimitry Andric       ExitCount(Info.ExitCount),
1910b57cec5SDimitry Andric       CountType(Info.CountType),
1920b57cec5SDimitry Andric       ExitBranch(Info.ExitBranch),
1930b57cec5SDimitry Andric       LoopDecrement(Info.LoopDecrement),
1940b57cec5SDimitry Andric       UsePHICounter(Info.CounterInReg),
1950b57cec5SDimitry Andric       UseLoopGuard(Info.PerformEntryTest) { }
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric     void Create();
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric   private:
2000b57cec5SDimitry Andric     ScalarEvolution &SE;
2010b57cec5SDimitry Andric     const DataLayout &DL;
202*480093f4SDimitry Andric     OptimizationRemarkEmitter *ORE = nullptr;
2030b57cec5SDimitry Andric     Loop *L                 = nullptr;
2040b57cec5SDimitry Andric     Module *M               = nullptr;
2050b57cec5SDimitry Andric     const SCEV *ExitCount   = nullptr;
2060b57cec5SDimitry Andric     Type *CountType         = nullptr;
2070b57cec5SDimitry Andric     BranchInst *ExitBranch  = nullptr;
2080b57cec5SDimitry Andric     Value *LoopDecrement    = nullptr;
2090b57cec5SDimitry Andric     bool UsePHICounter      = false;
2100b57cec5SDimitry Andric     bool UseLoopGuard       = false;
2110b57cec5SDimitry Andric     BasicBlock *BeginBB     = nullptr;
2120b57cec5SDimitry Andric   };
2130b57cec5SDimitry Andric }
2140b57cec5SDimitry Andric 
2150b57cec5SDimitry Andric char HardwareLoops::ID = 0;
2160b57cec5SDimitry Andric 
2170b57cec5SDimitry Andric bool HardwareLoops::runOnFunction(Function &F) {
2180b57cec5SDimitry Andric   if (skipFunction(F))
2190b57cec5SDimitry Andric     return false;
2200b57cec5SDimitry Andric 
2210b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2240b57cec5SDimitry Andric   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2250b57cec5SDimitry Andric   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2260b57cec5SDimitry Andric   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2270b57cec5SDimitry Andric   DL = &F.getParent()->getDataLayout();
228*480093f4SDimitry Andric   ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
2290b57cec5SDimitry Andric   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2308bcb0991SDimitry Andric   LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
2310b57cec5SDimitry Andric   PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
2320b57cec5SDimitry Andric   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2330b57cec5SDimitry Andric   M = F.getParent();
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
2360b57cec5SDimitry Andric     Loop *L = *I;
2370b57cec5SDimitry Andric     if (!L->getParentLoop())
2380b57cec5SDimitry Andric       TryConvertLoop(L);
2390b57cec5SDimitry Andric   }
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric   return MadeChange;
2420b57cec5SDimitry Andric }
2430b57cec5SDimitry Andric 
2440b57cec5SDimitry Andric // Return true if the search should stop, which will be when an inner loop is
2450b57cec5SDimitry Andric // converted and the parent loop doesn't support containing a hardware loop.
2460b57cec5SDimitry Andric bool HardwareLoops::TryConvertLoop(Loop *L) {
2470b57cec5SDimitry Andric   // Process nested loops first.
248*480093f4SDimitry Andric   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
249*480093f4SDimitry Andric     if (TryConvertLoop(*I)) {
250*480093f4SDimitry Andric       reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
251*480093f4SDimitry Andric                           ORE, L);
2520b57cec5SDimitry Andric       return true; // Stop search.
253*480093f4SDimitry Andric     }
254*480093f4SDimitry Andric   }
2550b57cec5SDimitry Andric 
2560b57cec5SDimitry Andric   HardwareLoopInfo HWLoopInfo(L);
257*480093f4SDimitry Andric   if (!HWLoopInfo.canAnalyze(*LI)) {
258*480093f4SDimitry Andric     reportHWLoopFailure("cannot analyze loop, irreducible control flow",
259*480093f4SDimitry Andric                         "HWLoopCannotAnalyze", ORE, L);
2600b57cec5SDimitry Andric     return false;
261*480093f4SDimitry Andric   }
2620b57cec5SDimitry Andric 
263*480093f4SDimitry Andric   if (!ForceHardwareLoops &&
264*480093f4SDimitry Andric       !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
265*480093f4SDimitry Andric     reportHWLoopFailure("it's not profitable to create a hardware-loop",
266*480093f4SDimitry Andric                         "HWLoopNotProfitable", ORE, L);
267*480093f4SDimitry Andric     return false;
268*480093f4SDimitry Andric   }
2690b57cec5SDimitry Andric 
2700b57cec5SDimitry Andric   // Allow overriding of the counter width and loop decrement value.
2710b57cec5SDimitry Andric   if (CounterBitWidth.getNumOccurrences())
2720b57cec5SDimitry Andric     HWLoopInfo.CountType =
2730b57cec5SDimitry Andric       IntegerType::get(M->getContext(), CounterBitWidth);
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric   if (LoopDecrement.getNumOccurrences())
2760b57cec5SDimitry Andric     HWLoopInfo.LoopDecrement =
2770b57cec5SDimitry Andric       ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric   MadeChange |= TryConvertLoop(HWLoopInfo);
2800b57cec5SDimitry Andric   return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
2810b57cec5SDimitry Andric }
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric   Loop *L = HWLoopInfo.L;
2860b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric   if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
289*480093f4SDimitry Andric                                           ForceHardwareLoopPHI)) {
290*480093f4SDimitry Andric     // TODO: there can be many reasons a loop is not considered a
291*480093f4SDimitry Andric     // candidate, so we should let isHardwareLoopCandidate fill in the
292*480093f4SDimitry Andric     // reason and then report a better message here.
293*480093f4SDimitry Andric     reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
2940b57cec5SDimitry Andric     return false;
295*480093f4SDimitry Andric   }
2960b57cec5SDimitry Andric 
2970b57cec5SDimitry Andric   assert(
2980b57cec5SDimitry Andric       (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
2990b57cec5SDimitry Andric       "Hardware Loop must have set exit info.");
3000b57cec5SDimitry Andric 
3010b57cec5SDimitry Andric   BasicBlock *Preheader = L->getLoopPreheader();
3020b57cec5SDimitry Andric 
3030b57cec5SDimitry Andric   // If we don't have a preheader, then insert one.
3040b57cec5SDimitry Andric   if (!Preheader)
3050b57cec5SDimitry Andric     Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
3060b57cec5SDimitry Andric   if (!Preheader)
3070b57cec5SDimitry Andric     return false;
3080b57cec5SDimitry Andric 
309*480093f4SDimitry Andric   HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
3100b57cec5SDimitry Andric   HWLoop.Create();
3110b57cec5SDimitry Andric   ++NumHWLoops;
3120b57cec5SDimitry Andric   return true;
3130b57cec5SDimitry Andric }
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric void HardwareLoop::Create() {
3160b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric   Value *LoopCountInit = InitLoopCount();
319*480093f4SDimitry Andric   if (!LoopCountInit) {
320*480093f4SDimitry Andric     reportHWLoopFailure("could not safely create a loop count expression",
321*480093f4SDimitry Andric                         "HWLoopNotSafe", ORE, L);
3220b57cec5SDimitry Andric     return;
323*480093f4SDimitry Andric   }
3240b57cec5SDimitry Andric 
3250b57cec5SDimitry Andric   InsertIterationSetup(LoopCountInit);
3260b57cec5SDimitry Andric 
3270b57cec5SDimitry Andric   if (UsePHICounter || ForceHardwareLoopPHI) {
3280b57cec5SDimitry Andric     Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
3290b57cec5SDimitry Andric     Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec);
3300b57cec5SDimitry Andric     LoopDec->setOperand(0, EltsRem);
3310b57cec5SDimitry Andric     UpdateBranch(LoopDec);
3320b57cec5SDimitry Andric   } else
3330b57cec5SDimitry Andric     InsertLoopDec();
3340b57cec5SDimitry Andric 
3350b57cec5SDimitry Andric   // Run through the basic blocks of the loop and see if any of them have dead
3360b57cec5SDimitry Andric   // PHIs that can be removed.
3370b57cec5SDimitry Andric   for (auto I : L->blocks())
3380b57cec5SDimitry Andric     DeleteDeadPHIs(I);
3390b57cec5SDimitry Andric }
3400b57cec5SDimitry Andric 
3410b57cec5SDimitry Andric static bool CanGenerateTest(Loop *L, Value *Count) {
3420b57cec5SDimitry Andric   BasicBlock *Preheader = L->getLoopPreheader();
3430b57cec5SDimitry Andric   if (!Preheader->getSinglePredecessor())
3440b57cec5SDimitry Andric     return false;
3450b57cec5SDimitry Andric 
3460b57cec5SDimitry Andric   BasicBlock *Pred = Preheader->getSinglePredecessor();
3470b57cec5SDimitry Andric   if (!isa<BranchInst>(Pred->getTerminator()))
3480b57cec5SDimitry Andric     return false;
3490b57cec5SDimitry Andric 
3500b57cec5SDimitry Andric   auto *BI = cast<BranchInst>(Pred->getTerminator());
3510b57cec5SDimitry Andric   if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
3520b57cec5SDimitry Andric     return false;
3530b57cec5SDimitry Andric 
3540b57cec5SDimitry Andric   // Check that the icmp is checking for equality of Count and zero and that
3550b57cec5SDimitry Andric   // a non-zero value results in entering the loop.
3560b57cec5SDimitry Andric   auto ICmp = cast<ICmpInst>(BI->getCondition());
3570b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
3580b57cec5SDimitry Andric   if (!ICmp->isEquality())
3590b57cec5SDimitry Andric     return false;
3600b57cec5SDimitry Andric 
3610b57cec5SDimitry Andric   auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
3620b57cec5SDimitry Andric     if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
3630b57cec5SDimitry Andric       return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
3640b57cec5SDimitry Andric     return false;
3650b57cec5SDimitry Andric   };
3660b57cec5SDimitry Andric 
3670b57cec5SDimitry Andric   if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
3680b57cec5SDimitry Andric     return false;
3690b57cec5SDimitry Andric 
3700b57cec5SDimitry Andric   unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
3710b57cec5SDimitry Andric   if (BI->getSuccessor(SuccIdx) != Preheader)
3720b57cec5SDimitry Andric     return false;
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric   return true;
3750b57cec5SDimitry Andric }
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric Value *HardwareLoop::InitLoopCount() {
3780b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
3790b57cec5SDimitry Andric   // Can we replace a conditional branch with an intrinsic that sets the
3800b57cec5SDimitry Andric   // loop counter and tests that is not zero?
3810b57cec5SDimitry Andric 
3820b57cec5SDimitry Andric   SCEVExpander SCEVE(SE, DL, "loopcnt");
3830b57cec5SDimitry Andric   if (!ExitCount->getType()->isPointerTy() &&
3840b57cec5SDimitry Andric       ExitCount->getType() != CountType)
3850b57cec5SDimitry Andric     ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
3860b57cec5SDimitry Andric 
3870b57cec5SDimitry Andric   ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
3880b57cec5SDimitry Andric 
3890b57cec5SDimitry Andric   // If we're trying to use the 'test and set' form of the intrinsic, we need
3900b57cec5SDimitry Andric   // to replace a conditional branch that is controlling entry to the loop. It
3910b57cec5SDimitry Andric   // is likely (guaranteed?) that the preheader has an unconditional branch to
3920b57cec5SDimitry Andric   // the loop header, so also check if it has a single predecessor.
3930b57cec5SDimitry Andric   if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
3940b57cec5SDimitry Andric                                   SE.getZero(ExitCount->getType()))) {
3950b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
3960b57cec5SDimitry Andric     UseLoopGuard |= ForceGuardLoopEntry;
3970b57cec5SDimitry Andric   } else
3980b57cec5SDimitry Andric     UseLoopGuard = false;
3990b57cec5SDimitry Andric 
4000b57cec5SDimitry Andric   BasicBlock *BB = L->getLoopPreheader();
4010b57cec5SDimitry Andric   if (UseLoopGuard && BB->getSinglePredecessor() &&
4020b57cec5SDimitry Andric       cast<BranchInst>(BB->getTerminator())->isUnconditional())
4030b57cec5SDimitry Andric     BB = BB->getSinglePredecessor();
4040b57cec5SDimitry Andric 
4050b57cec5SDimitry Andric   if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
4060b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
4070b57cec5SDimitry Andric                << *ExitCount << "\n");
4080b57cec5SDimitry Andric     return nullptr;
4090b57cec5SDimitry Andric   }
4100b57cec5SDimitry Andric 
4110b57cec5SDimitry Andric   Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
4120b57cec5SDimitry Andric                                      BB->getTerminator());
4130b57cec5SDimitry Andric 
4140b57cec5SDimitry Andric   // FIXME: We've expanded Count where we hope to insert the counter setting
4150b57cec5SDimitry Andric   // intrinsic. But, in the case of the 'test and set' form, we may fallback to
4160b57cec5SDimitry Andric   // the just 'set' form and in which case the insertion block is most likely
4170b57cec5SDimitry Andric   // different. It means there will be instruction(s) in a block that possibly
4180b57cec5SDimitry Andric   // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
4190b57cec5SDimitry Andric   // but it's doesn't appear to work in all cases.
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric   UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
4220b57cec5SDimitry Andric   BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
4230b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
4240b57cec5SDimitry Andric              << " - Expanded Count in " << BB->getName() << "\n"
4250b57cec5SDimitry Andric              << " - Will insert set counter intrinsic into: "
4260b57cec5SDimitry Andric              << BeginBB->getName() << "\n");
4270b57cec5SDimitry Andric   return Count;
4280b57cec5SDimitry Andric }
4290b57cec5SDimitry Andric 
4300b57cec5SDimitry Andric void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
4310b57cec5SDimitry Andric   IRBuilder<> Builder(BeginBB->getTerminator());
4320b57cec5SDimitry Andric   Type *Ty = LoopCountInit->getType();
4330b57cec5SDimitry Andric   Intrinsic::ID ID = UseLoopGuard ?
4340b57cec5SDimitry Andric     Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations;
4350b57cec5SDimitry Andric   Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
4360b57cec5SDimitry Andric   Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit);
4370b57cec5SDimitry Andric 
4380b57cec5SDimitry Andric   // Use the return value of the intrinsic to control the entry of the loop.
4390b57cec5SDimitry Andric   if (UseLoopGuard) {
4400b57cec5SDimitry Andric     assert((isa<BranchInst>(BeginBB->getTerminator()) &&
4410b57cec5SDimitry Andric             cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
4420b57cec5SDimitry Andric            "Expected conditional branch");
4430b57cec5SDimitry Andric     auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
4440b57cec5SDimitry Andric     LoopGuard->setCondition(SetCount);
4450b57cec5SDimitry Andric     if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
4460b57cec5SDimitry Andric       LoopGuard->swapSuccessors();
4470b57cec5SDimitry Andric   }
4480b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: "
4490b57cec5SDimitry Andric              << *SetCount << "\n");
4500b57cec5SDimitry Andric }
4510b57cec5SDimitry Andric 
4520b57cec5SDimitry Andric void HardwareLoop::InsertLoopDec() {
4530b57cec5SDimitry Andric   IRBuilder<> CondBuilder(ExitBranch);
4540b57cec5SDimitry Andric 
4550b57cec5SDimitry Andric   Function *DecFunc =
4560b57cec5SDimitry Andric     Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
4570b57cec5SDimitry Andric                               LoopDecrement->getType());
4580b57cec5SDimitry Andric   Value *Ops[] = { LoopDecrement };
4590b57cec5SDimitry Andric   Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
4600b57cec5SDimitry Andric   Value *OldCond = ExitBranch->getCondition();
4610b57cec5SDimitry Andric   ExitBranch->setCondition(NewCond);
4620b57cec5SDimitry Andric 
4630b57cec5SDimitry Andric   // The false branch must exit the loop.
4640b57cec5SDimitry Andric   if (!L->contains(ExitBranch->getSuccessor(0)))
4650b57cec5SDimitry Andric     ExitBranch->swapSuccessors();
4660b57cec5SDimitry Andric 
4670b57cec5SDimitry Andric   // The old condition may be dead now, and may have even created a dead PHI
4680b57cec5SDimitry Andric   // (the original induction variable).
4690b57cec5SDimitry Andric   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
4700b57cec5SDimitry Andric 
4710b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric 
4740b57cec5SDimitry Andric Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
4750b57cec5SDimitry Andric   IRBuilder<> CondBuilder(ExitBranch);
4760b57cec5SDimitry Andric 
4770b57cec5SDimitry Andric   Function *DecFunc =
4780b57cec5SDimitry Andric       Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
4790b57cec5SDimitry Andric                                 { EltsRem->getType(), EltsRem->getType(),
4800b57cec5SDimitry Andric                                   LoopDecrement->getType()
4810b57cec5SDimitry Andric                                 });
4820b57cec5SDimitry Andric   Value *Ops[] = { EltsRem, LoopDecrement };
4830b57cec5SDimitry Andric   Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
4840b57cec5SDimitry Andric 
4850b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
4860b57cec5SDimitry Andric   return cast<Instruction>(Call);
4870b57cec5SDimitry Andric }
4880b57cec5SDimitry Andric 
4890b57cec5SDimitry Andric PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
4900b57cec5SDimitry Andric   BasicBlock *Preheader = L->getLoopPreheader();
4910b57cec5SDimitry Andric   BasicBlock *Header = L->getHeader();
4920b57cec5SDimitry Andric   BasicBlock *Latch = ExitBranch->getParent();
4930b57cec5SDimitry Andric   IRBuilder<> Builder(Header->getFirstNonPHI());
4940b57cec5SDimitry Andric   PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
4950b57cec5SDimitry Andric   Index->addIncoming(NumElts, Preheader);
4960b57cec5SDimitry Andric   Index->addIncoming(EltsRem, Latch);
4970b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
4980b57cec5SDimitry Andric   return Index;
4990b57cec5SDimitry Andric }
5000b57cec5SDimitry Andric 
5010b57cec5SDimitry Andric void HardwareLoop::UpdateBranch(Value *EltsRem) {
5020b57cec5SDimitry Andric   IRBuilder<> CondBuilder(ExitBranch);
5030b57cec5SDimitry Andric   Value *NewCond =
5040b57cec5SDimitry Andric     CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
5050b57cec5SDimitry Andric   Value *OldCond = ExitBranch->getCondition();
5060b57cec5SDimitry Andric   ExitBranch->setCondition(NewCond);
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric   // The false branch must exit the loop.
5090b57cec5SDimitry Andric   if (!L->contains(ExitBranch->getSuccessor(0)))
5100b57cec5SDimitry Andric     ExitBranch->swapSuccessors();
5110b57cec5SDimitry Andric 
5120b57cec5SDimitry Andric   // The old condition may be dead now, and may have even created a dead PHI
5130b57cec5SDimitry Andric   // (the original induction variable).
5140b57cec5SDimitry Andric   RecursivelyDeleteTriviallyDeadInstructions(OldCond);
5150b57cec5SDimitry Andric }
5160b57cec5SDimitry Andric 
5170b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
5180b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
5190b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
5200b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
521*480093f4SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
5220b57cec5SDimitry Andric INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
5230b57cec5SDimitry Andric 
5240b57cec5SDimitry Andric FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
525