10b57cec5SDimitry Andric //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass removes the computation of provably redundant expressions that have 100b57cec5SDimitry Andric // been computed earlier in a previous iteration. It relies on the use of PHIs 110b57cec5SDimitry Andric // to identify loop carried dependences. This is scalar replacement for vector 120b57cec5SDimitry Andric // types. 130b57cec5SDimitry Andric // 140b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 150b57cec5SDimitry Andric 16e8d8bef9SDimitry Andric #include "HexagonVectorLoopCarriedReuse.h" 170b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h" 180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 190b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 200b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h" 220b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 230b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 240b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 250b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 260b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 270b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 280b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 29480093f4SDimitry Andric #include "llvm/IR/IntrinsicsHexagon.h" 300b57cec5SDimitry Andric #include "llvm/IR/Use.h" 310b57cec5SDimitry Andric #include "llvm/IR/User.h" 320b57cec5SDimitry Andric #include "llvm/IR/Value.h" 33480093f4SDimitry Andric #include "llvm/InitializePasses.h" 340b57cec5SDimitry Andric #include "llvm/Pass.h" 350b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 360b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 370b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 380b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 390b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 400b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 410b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h" 420b57cec5SDimitry Andric #include <algorithm> 430b57cec5SDimitry Andric #include <cassert> 440b57cec5SDimitry Andric #include <cstddef> 450b57cec5SDimitry Andric #include <map> 460b57cec5SDimitry Andric #include <memory> 470b57cec5SDimitry Andric #include <set> 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric using namespace llvm; 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric #define DEBUG_TYPE "hexagon-vlcr" 520b57cec5SDimitry Andric 530b57cec5SDimitry Andric STATISTIC(HexagonNumVectorLoopCarriedReuse, 540b57cec5SDimitry Andric "Number of values that were reused from a previous iteration."); 550b57cec5SDimitry Andric 5681ad6265SDimitry Andric static cl::opt<int> HexagonVLCRIterationLim( 5781ad6265SDimitry Andric "hexagon-vlcr-iteration-lim", cl::Hidden, 580b57cec5SDimitry Andric cl::desc("Maximum distance of loop carried dependences that are handled"), 5981ad6265SDimitry Andric cl::init(2)); 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric namespace llvm { 620b57cec5SDimitry Andric 63e8d8bef9SDimitry Andric void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &); 64e8d8bef9SDimitry Andric Pass *createHexagonVectorLoopCarriedReuseLegacyPass(); 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric } // end namespace llvm 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric namespace { 690b57cec5SDimitry Andric 700b57cec5SDimitry Andric // See info about DepChain in the comments at the top of this file. 710b57cec5SDimitry Andric using ChainOfDependences = SmallVector<Instruction *, 4>; 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric class DepChain { 740b57cec5SDimitry Andric ChainOfDependences Chain; 750b57cec5SDimitry Andric 760b57cec5SDimitry Andric public: 770b57cec5SDimitry Andric bool isIdentical(DepChain &Other) const { 780b57cec5SDimitry Andric if (Other.size() != size()) 790b57cec5SDimitry Andric return false; 800b57cec5SDimitry Andric ChainOfDependences &OtherChain = Other.getChain(); 810b57cec5SDimitry Andric for (int i = 0; i < size(); ++i) { 820b57cec5SDimitry Andric if (Chain[i] != OtherChain[i]) 830b57cec5SDimitry Andric return false; 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric return true; 860b57cec5SDimitry Andric } 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric ChainOfDependences &getChain() { 890b57cec5SDimitry Andric return Chain; 900b57cec5SDimitry Andric } 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric int size() const { 930b57cec5SDimitry Andric return Chain.size(); 940b57cec5SDimitry Andric } 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric void clear() { 970b57cec5SDimitry Andric Chain.clear(); 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 1000b57cec5SDimitry Andric void push_back(Instruction *I) { 1010b57cec5SDimitry Andric Chain.push_back(I); 1020b57cec5SDimitry Andric } 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric int iterations() const { 1050b57cec5SDimitry Andric return size() - 1; 1060b57cec5SDimitry Andric } 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric Instruction *front() const { 1090b57cec5SDimitry Andric return Chain.front(); 1100b57cec5SDimitry Andric } 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric Instruction *back() const { 1130b57cec5SDimitry Andric return Chain.back(); 1140b57cec5SDimitry Andric } 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric Instruction *&operator[](const int index) { 1170b57cec5SDimitry Andric return Chain[index]; 1180b57cec5SDimitry Andric } 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); 1210b57cec5SDimitry Andric }; 1220b57cec5SDimitry Andric 1230b57cec5SDimitry Andric LLVM_ATTRIBUTE_UNUSED 1240b57cec5SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) { 1250b57cec5SDimitry Andric const ChainOfDependences &CD = D.Chain; 1260b57cec5SDimitry Andric int ChainSize = CD.size(); 1270b57cec5SDimitry Andric OS << "**DepChain Start::**\n"; 1280b57cec5SDimitry Andric for (int i = 0; i < ChainSize -1; ++i) { 1290b57cec5SDimitry Andric OS << *(CD[i]) << " -->\n"; 1300b57cec5SDimitry Andric } 1310b57cec5SDimitry Andric OS << *CD[ChainSize-1] << "\n"; 1320b57cec5SDimitry Andric return OS; 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric struct ReuseValue { 1360b57cec5SDimitry Andric Instruction *Inst2Replace = nullptr; 1370b57cec5SDimitry Andric 1380b57cec5SDimitry Andric // In the new PHI node that we'll construct this is the value that'll be 139480093f4SDimitry Andric // used over the backedge. This is the value that gets reused from a 1400b57cec5SDimitry Andric // previous iteration. 1410b57cec5SDimitry Andric Instruction *BackedgeInst = nullptr; 1420b57cec5SDimitry Andric std::map<Instruction *, DepChain *> DepChains; 1430b57cec5SDimitry Andric int Iterations = -1; 1440b57cec5SDimitry Andric 1450b57cec5SDimitry Andric ReuseValue() = default; 1460b57cec5SDimitry Andric 1470b57cec5SDimitry Andric void reset() { 1480b57cec5SDimitry Andric Inst2Replace = nullptr; 1490b57cec5SDimitry Andric BackedgeInst = nullptr; 1500b57cec5SDimitry Andric DepChains.clear(); 1510b57cec5SDimitry Andric Iterations = -1; 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric bool isDefined() { return Inst2Replace != nullptr; } 1540b57cec5SDimitry Andric }; 1550b57cec5SDimitry Andric 1560b57cec5SDimitry Andric LLVM_ATTRIBUTE_UNUSED 1570b57cec5SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { 1580b57cec5SDimitry Andric OS << "** ReuseValue ***\n"; 1590b57cec5SDimitry Andric OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n"; 1600b57cec5SDimitry Andric OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n"; 1610b57cec5SDimitry Andric return OS; 1620b57cec5SDimitry Andric } 1630b57cec5SDimitry Andric 164e8d8bef9SDimitry Andric class HexagonVectorLoopCarriedReuseLegacyPass : public LoopPass { 1650b57cec5SDimitry Andric public: 1660b57cec5SDimitry Andric static char ID; 1670b57cec5SDimitry Andric 168e8d8bef9SDimitry Andric explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) { 1690b57cec5SDimitry Andric PassRegistry *PR = PassRegistry::getPassRegistry(); 170e8d8bef9SDimitry Andric initializeHexagonVectorLoopCarriedReuseLegacyPassPass(*PR); 1710b57cec5SDimitry Andric } 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric StringRef getPassName() const override { 1740b57cec5SDimitry Andric return "Hexagon-specific loop carried reuse for HVX vectors"; 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1780b57cec5SDimitry Andric AU.addRequiredID(LoopSimplifyID); 1790b57cec5SDimitry Andric AU.addRequiredID(LCSSAID); 1800b57cec5SDimitry Andric AU.addPreservedID(LCSSAID); 1810b57cec5SDimitry Andric AU.setPreservesCFG(); 1820b57cec5SDimitry Andric } 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override; 185e8d8bef9SDimitry Andric }; 186e8d8bef9SDimitry Andric 187e8d8bef9SDimitry Andric class HexagonVectorLoopCarriedReuse { 188e8d8bef9SDimitry Andric public: 189e8d8bef9SDimitry Andric HexagonVectorLoopCarriedReuse(Loop *L) : CurLoop(L){}; 190e8d8bef9SDimitry Andric 191e8d8bef9SDimitry Andric bool run(); 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric private: 1940b57cec5SDimitry Andric SetVector<DepChain *> Dependences; 1950b57cec5SDimitry Andric std::set<Instruction *> ReplacedInsts; 1960b57cec5SDimitry Andric Loop *CurLoop; 1970b57cec5SDimitry Andric ReuseValue ReuseCandidate; 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric bool doVLCR(); 2000b57cec5SDimitry Andric void findLoopCarriedDeps(); 2010b57cec5SDimitry Andric void findValueToReuse(); 2020b57cec5SDimitry Andric void findDepChainFromPHI(Instruction *I, DepChain &D); 2030b57cec5SDimitry Andric void reuseValue(); 2040b57cec5SDimitry Andric Value *findValueInBlock(Value *Op, BasicBlock *BB); 2050b57cec5SDimitry Andric DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters); 2060b57cec5SDimitry Andric bool isEquivalentOperation(Instruction *I1, Instruction *I2); 2070b57cec5SDimitry Andric bool canReplace(Instruction *I); 2080b57cec5SDimitry Andric bool isCallInstCommutative(CallInst *C); 2090b57cec5SDimitry Andric }; 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric } // end anonymous namespace 2120b57cec5SDimitry Andric 213e8d8bef9SDimitry Andric char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0; 2140b57cec5SDimitry Andric 215e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr", 216e8d8bef9SDimitry Andric "Hexagon-specific predictive commoning for HVX vectors", 217e8d8bef9SDimitry Andric false, false) 2180b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 2190b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 220e8d8bef9SDimitry Andric INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr", 221e8d8bef9SDimitry Andric "Hexagon-specific predictive commoning for HVX vectors", 222e8d8bef9SDimitry Andric false, false) 2230b57cec5SDimitry Andric 224e8d8bef9SDimitry Andric PreservedAnalyses 225e8d8bef9SDimitry Andric HexagonVectorLoopCarriedReusePass::run(Loop &L, LoopAnalysisManager &LAM, 226e8d8bef9SDimitry Andric LoopStandardAnalysisResults &AR, 227e8d8bef9SDimitry Andric LPMUpdater &U) { 228e8d8bef9SDimitry Andric HexagonVectorLoopCarriedReuse Vlcr(&L); 229e8d8bef9SDimitry Andric if (!Vlcr.run()) 230e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 231e8d8bef9SDimitry Andric PreservedAnalyses PA; 232e8d8bef9SDimitry Andric PA.preserveSet<CFGAnalyses>(); 233e8d8bef9SDimitry Andric return PA; 234e8d8bef9SDimitry Andric } 235e8d8bef9SDimitry Andric 236e8d8bef9SDimitry Andric bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop *L, 237e8d8bef9SDimitry Andric LPPassManager &LPM) { 2380b57cec5SDimitry Andric if (skipLoop(L)) 2390b57cec5SDimitry Andric return false; 240e8d8bef9SDimitry Andric HexagonVectorLoopCarriedReuse Vlcr(L); 241e8d8bef9SDimitry Andric return Vlcr.run(); 242e8d8bef9SDimitry Andric } 2430b57cec5SDimitry Andric 244e8d8bef9SDimitry Andric bool HexagonVectorLoopCarriedReuse::run() { 245e8d8bef9SDimitry Andric if (!CurLoop->getLoopPreheader()) 2460b57cec5SDimitry Andric return false; 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric // Work only on innermost loops. 249e8d8bef9SDimitry Andric if (!CurLoop->getSubLoops().empty()) 2500b57cec5SDimitry Andric return false; 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric // Work only on single basic blocks loops. 253e8d8bef9SDimitry Andric if (CurLoop->getNumBlocks() != 1) 2540b57cec5SDimitry Andric return false; 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric return doVLCR(); 2570b57cec5SDimitry Andric } 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) { 2600b57cec5SDimitry Andric switch (C->getCalledFunction()->getIntrinsicID()) { 2610b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddb: 2620b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddb_128B: 2630b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddh: 2640b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddh_128B: 2650b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddw: 2660b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddw_128B: 2670b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddubh: 2680b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddubh_128B: 2690b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vadduhw: 2700b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vadduhw_128B: 2710b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddhw: 2720b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vaddhw_128B: 2730b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxb: 2740b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxb_128B: 2750b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxh: 2760b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxh_128B: 2770b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxw: 2780b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxw_128B: 2790b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxub: 2800b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxub_128B: 2810b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxuh: 2820b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmaxuh_128B: 2830b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminub: 2840b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminub_128B: 2850b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminuh: 2860b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminuh_128B: 2870b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminb: 2880b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminb_128B: 2890b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminh: 2900b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminh_128B: 2910b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminw: 2920b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vminw_128B: 2930b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmpyub: 2940b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmpyub_128B: 2950b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmpyuh: 2960b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vmpyuh_128B: 2970b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgub: 2980b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgub_128B: 2990b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgh: 3000b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgh_128B: 3010b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavguh: 3020b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavguh_128B: 3030b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgw: 3040b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgw_128B: 3050b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgb: 3060b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavgb_128B: 3070b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavguw: 3080b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vavguw_128B: 3090b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffh: 3100b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffh_128B: 3110b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffub: 3120b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffub_128B: 3130b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffuh: 3140b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffuh_128B: 3150b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffw: 3160b57cec5SDimitry Andric case Intrinsic::hexagon_V6_vabsdiffw_128B: 3170b57cec5SDimitry Andric return true; 3180b57cec5SDimitry Andric default: 3190b57cec5SDimitry Andric return false; 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric } 3220b57cec5SDimitry Andric 3230b57cec5SDimitry Andric bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1, 3240b57cec5SDimitry Andric Instruction *I2) { 3250b57cec5SDimitry Andric if (!I1->isSameOperationAs(I2)) 3260b57cec5SDimitry Andric return false; 3270b57cec5SDimitry Andric // This check is in place specifically for intrinsics. isSameOperationAs will 3280b57cec5SDimitry Andric // return two for any two hexagon intrinsics because they are essentially the 3290b57cec5SDimitry Andric // same instruciton (CallInst). We need to scratch the surface to see if they 3300b57cec5SDimitry Andric // are calls to the same function. 3310b57cec5SDimitry Andric if (CallInst *C1 = dyn_cast<CallInst>(I1)) { 3320b57cec5SDimitry Andric if (CallInst *C2 = dyn_cast<CallInst>(I2)) { 3330b57cec5SDimitry Andric if (C1->getCalledFunction() != C2->getCalledFunction()) 3340b57cec5SDimitry Andric return false; 3350b57cec5SDimitry Andric } 3360b57cec5SDimitry Andric } 3370b57cec5SDimitry Andric 3380b57cec5SDimitry Andric // If both the Instructions are of Vector Type and any of the element 3390b57cec5SDimitry Andric // is integer constant, check their values too for equivalence. 3400b57cec5SDimitry Andric if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) { 3410b57cec5SDimitry Andric unsigned NumOperands = I1->getNumOperands(); 3420b57cec5SDimitry Andric for (unsigned i = 0; i < NumOperands; ++i) { 3430b57cec5SDimitry Andric ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i)); 3440b57cec5SDimitry Andric ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i)); 3450b57cec5SDimitry Andric if(!C1) continue; 3460b57cec5SDimitry Andric assert(C2); 3470b57cec5SDimitry Andric if (C1->getSExtValue() != C2->getSExtValue()) 3480b57cec5SDimitry Andric return false; 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric return true; 3530b57cec5SDimitry Andric } 3540b57cec5SDimitry Andric 3550b57cec5SDimitry Andric bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) { 3560b57cec5SDimitry Andric const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 3570b57cec5SDimitry Andric if (!II) 3580b57cec5SDimitry Andric return true; 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric switch (II->getIntrinsicID()) { 3610b57cec5SDimitry Andric case Intrinsic::hexagon_V6_hi: 3620b57cec5SDimitry Andric case Intrinsic::hexagon_V6_lo: 3630b57cec5SDimitry Andric case Intrinsic::hexagon_V6_hi_128B: 3640b57cec5SDimitry Andric case Intrinsic::hexagon_V6_lo_128B: 3650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n"); 3660b57cec5SDimitry Andric return false; 3670b57cec5SDimitry Andric default: 3680b57cec5SDimitry Andric return true; 3690b57cec5SDimitry Andric } 3700b57cec5SDimitry Andric } 3710b57cec5SDimitry Andric void HexagonVectorLoopCarriedReuse::findValueToReuse() { 3720b57cec5SDimitry Andric for (auto *D : Dependences) { 3730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n"); 3740b57cec5SDimitry Andric if (D->iterations() > HexagonVLCRIterationLim) { 3750b57cec5SDimitry Andric LLVM_DEBUG( 3760b57cec5SDimitry Andric dbgs() 3770b57cec5SDimitry Andric << ".. Skipping because number of iterations > than the limit\n"); 3780b57cec5SDimitry Andric continue; 3790b57cec5SDimitry Andric } 3800b57cec5SDimitry Andric 3810b57cec5SDimitry Andric PHINode *PN = cast<PHINode>(D->front()); 3820b57cec5SDimitry Andric Instruction *BEInst = D->back(); 3830b57cec5SDimitry Andric int Iters = D->iterations(); 3840b57cec5SDimitry Andric BasicBlock *BB = PN->getParent(); 3850b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN 3860b57cec5SDimitry Andric << " can be reused\n"); 3870b57cec5SDimitry Andric 3880b57cec5SDimitry Andric SmallVector<Instruction *, 4> PNUsers; 389349cc55cSDimitry Andric for (Use &U : PN->uses()) { 3900b57cec5SDimitry Andric Instruction *User = cast<Instruction>(U.getUser()); 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric if (User->getParent() != BB) 3930b57cec5SDimitry Andric continue; 3940b57cec5SDimitry Andric if (ReplacedInsts.count(User)) { 3950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << *User 3960b57cec5SDimitry Andric << " has already been replaced. Skipping...\n"); 3970b57cec5SDimitry Andric continue; 3980b57cec5SDimitry Andric } 3990b57cec5SDimitry Andric if (isa<PHINode>(User)) 4000b57cec5SDimitry Andric continue; 4010b57cec5SDimitry Andric if (User->mayHaveSideEffects()) 4020b57cec5SDimitry Andric continue; 4030b57cec5SDimitry Andric if (!canReplace(User)) 4040b57cec5SDimitry Andric continue; 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric PNUsers.push_back(User); 4070b57cec5SDimitry Andric } 4080b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n"); 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric // For each interesting use I of PN, find an Instruction BEUser that 4110b57cec5SDimitry Andric // performs the same operation as I on BEInst and whose other operands, 4120b57cec5SDimitry Andric // if any, can also be rematerialized in OtherBB. We stop when we find the 4130b57cec5SDimitry Andric // first such Instruction BEUser. This is because once BEUser is 4140b57cec5SDimitry Andric // rematerialized in OtherBB, we may find more such "fixup" opportunities 4150b57cec5SDimitry Andric // in this block. So, we'll start over again. 4160b57cec5SDimitry Andric for (Instruction *I : PNUsers) { 417349cc55cSDimitry Andric for (Use &U : BEInst->uses()) { 4180b57cec5SDimitry Andric Instruction *BEUser = cast<Instruction>(U.getUser()); 4190b57cec5SDimitry Andric 4200b57cec5SDimitry Andric if (BEUser->getParent() != BB) 4210b57cec5SDimitry Andric continue; 4220b57cec5SDimitry Andric if (!isEquivalentOperation(I, BEUser)) 4230b57cec5SDimitry Andric continue; 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric int NumOperands = I->getNumOperands(); 4260b57cec5SDimitry Andric 4270b57cec5SDimitry Andric // Take operands of each PNUser one by one and try to find DepChain 4280b57cec5SDimitry Andric // with every operand of the BEUser. If any of the operands of BEUser 4290b57cec5SDimitry Andric // has DepChain with current operand of the PNUser, break the matcher 4300b57cec5SDimitry Andric // loop. Keep doing this for Every PNUser operand. If PNUser operand 4310b57cec5SDimitry Andric // does not have DepChain with any of the BEUser operand, break the 4320b57cec5SDimitry Andric // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate. 4330b57cec5SDimitry Andric // This ensures that DepChain exist for all the PNUser operand with 4340b57cec5SDimitry Andric // BEUser operand. This also ensures that DepChains are independent of 4350b57cec5SDimitry Andric // the positions in PNUser and BEUser. 4360b57cec5SDimitry Andric std::map<Instruction *, DepChain *> DepChains; 4370b57cec5SDimitry Andric CallInst *C1 = dyn_cast<CallInst>(I); 4380b57cec5SDimitry Andric if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) { 4390b57cec5SDimitry Andric bool Found = false; 4400b57cec5SDimitry Andric for (int OpNo = 0; OpNo < NumOperands; ++OpNo) { 4410b57cec5SDimitry Andric Value *Op = I->getOperand(OpNo); 4420b57cec5SDimitry Andric Instruction *OpInst = dyn_cast<Instruction>(Op); 4430b57cec5SDimitry Andric Found = false; 4440b57cec5SDimitry Andric for (int T = 0; T < NumOperands; ++T) { 4450b57cec5SDimitry Andric Value *BEOp = BEUser->getOperand(T); 4460b57cec5SDimitry Andric Instruction *BEOpInst = dyn_cast<Instruction>(BEOp); 4470b57cec5SDimitry Andric if (!OpInst && !BEOpInst) { 4480b57cec5SDimitry Andric if (Op == BEOp) { 4490b57cec5SDimitry Andric Found = true; 4500b57cec5SDimitry Andric break; 4510b57cec5SDimitry Andric } 4520b57cec5SDimitry Andric } 4530b57cec5SDimitry Andric 4540b57cec5SDimitry Andric if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst)) 4550b57cec5SDimitry Andric continue; 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters); 4580b57cec5SDimitry Andric 4590b57cec5SDimitry Andric if (D) { 4600b57cec5SDimitry Andric Found = true; 4610b57cec5SDimitry Andric DepChains[OpInst] = D; 4620b57cec5SDimitry Andric break; 4630b57cec5SDimitry Andric } 4640b57cec5SDimitry Andric } 4650b57cec5SDimitry Andric if (!Found) { 4660b57cec5SDimitry Andric BEUser = nullptr; 4670b57cec5SDimitry Andric break; 4680b57cec5SDimitry Andric } 4690b57cec5SDimitry Andric } 4700b57cec5SDimitry Andric } else { 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric for (int OpNo = 0; OpNo < NumOperands; ++OpNo) { 4730b57cec5SDimitry Andric Value *Op = I->getOperand(OpNo); 4740b57cec5SDimitry Andric Value *BEOp = BEUser->getOperand(OpNo); 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric Instruction *OpInst = dyn_cast<Instruction>(Op); 4770b57cec5SDimitry Andric if (!OpInst) { 4780b57cec5SDimitry Andric if (Op == BEOp) 4790b57cec5SDimitry Andric continue; 4800b57cec5SDimitry Andric // Do not allow reuse to occur when the operands may be different 4810b57cec5SDimitry Andric // values. 4820b57cec5SDimitry Andric BEUser = nullptr; 4830b57cec5SDimitry Andric break; 4840b57cec5SDimitry Andric } 4850b57cec5SDimitry Andric 4860b57cec5SDimitry Andric Instruction *BEOpInst = dyn_cast<Instruction>(BEOp); 4870b57cec5SDimitry Andric DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters); 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andric if (D) { 4900b57cec5SDimitry Andric DepChains[OpInst] = D; 4910b57cec5SDimitry Andric } else { 4920b57cec5SDimitry Andric BEUser = nullptr; 4930b57cec5SDimitry Andric break; 4940b57cec5SDimitry Andric } 4950b57cec5SDimitry Andric } 4960b57cec5SDimitry Andric } 4970b57cec5SDimitry Andric if (BEUser) { 4980b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found Value for reuse.\n"); 4990b57cec5SDimitry Andric ReuseCandidate.Inst2Replace = I; 5000b57cec5SDimitry Andric ReuseCandidate.BackedgeInst = BEUser; 5010b57cec5SDimitry Andric ReuseCandidate.DepChains = DepChains; 5020b57cec5SDimitry Andric ReuseCandidate.Iterations = Iters; 5030b57cec5SDimitry Andric return; 5040b57cec5SDimitry Andric } 5050b57cec5SDimitry Andric ReuseCandidate.reset(); 5060b57cec5SDimitry Andric } 5070b57cec5SDimitry Andric } 5080b57cec5SDimitry Andric } 5090b57cec5SDimitry Andric ReuseCandidate.reset(); 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op, 5130b57cec5SDimitry Andric BasicBlock *BB) { 5140b57cec5SDimitry Andric PHINode *PN = dyn_cast<PHINode>(Op); 5150b57cec5SDimitry Andric assert(PN); 5160b57cec5SDimitry Andric Value *ValueInBlock = PN->getIncomingValueForBlock(BB); 5170b57cec5SDimitry Andric return ValueInBlock; 5180b57cec5SDimitry Andric } 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric void HexagonVectorLoopCarriedReuse::reuseValue() { 5210b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << ReuseCandidate); 5220b57cec5SDimitry Andric Instruction *Inst2Replace = ReuseCandidate.Inst2Replace; 5230b57cec5SDimitry Andric Instruction *BEInst = ReuseCandidate.BackedgeInst; 5240b57cec5SDimitry Andric int NumOperands = Inst2Replace->getNumOperands(); 5250b57cec5SDimitry Andric std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains; 5260b57cec5SDimitry Andric int Iterations = ReuseCandidate.Iterations; 5270b57cec5SDimitry Andric BasicBlock *LoopPH = CurLoop->getLoopPreheader(); 5280b57cec5SDimitry Andric assert(!DepChains.empty() && "No DepChains"); 5290b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n"); 5300b57cec5SDimitry Andric 5310b57cec5SDimitry Andric SmallVector<Instruction *, 4> InstsInPreheader; 5320b57cec5SDimitry Andric for (int i = 0; i < Iterations; ++i) { 5330b57cec5SDimitry Andric Instruction *InstInPreheader = Inst2Replace->clone(); 5340b57cec5SDimitry Andric SmallVector<Value *, 4> Ops; 5350b57cec5SDimitry Andric for (int j = 0; j < NumOperands; ++j) { 5360b57cec5SDimitry Andric Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j)); 5370b57cec5SDimitry Andric if (!I) 5380b57cec5SDimitry Andric continue; 5390b57cec5SDimitry Andric // Get the DepChain corresponding to this operand. 5400b57cec5SDimitry Andric DepChain &D = *DepChains[I]; 5410b57cec5SDimitry Andric // Get the PHI for the iteration number and find 5420b57cec5SDimitry Andric // the incoming value from the Loop Preheader for 5430b57cec5SDimitry Andric // that PHI. 5440b57cec5SDimitry Andric Value *ValInPreheader = findValueInBlock(D[i], LoopPH); 5450b57cec5SDimitry Andric InstInPreheader->setOperand(j, ValInPreheader); 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric InstsInPreheader.push_back(InstInPreheader); 5480b57cec5SDimitry Andric InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr"); 5490b57cec5SDimitry Andric InstInPreheader->insertBefore(LoopPH->getTerminator()); 5500b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to " 5510b57cec5SDimitry Andric << LoopPH->getName() << "\n"); 5520b57cec5SDimitry Andric } 5530b57cec5SDimitry Andric BasicBlock *BB = BEInst->getParent(); 5540b57cec5SDimitry Andric IRBuilder<> IRB(BB); 555*5f757f3fSDimitry Andric IRB.SetInsertPoint(BB, BB->getFirstNonPHIIt()); 5560b57cec5SDimitry Andric Value *BEVal = BEInst; 5570b57cec5SDimitry Andric PHINode *NewPhi; 5580b57cec5SDimitry Andric for (int i = Iterations-1; i >=0 ; --i) { 5590b57cec5SDimitry Andric Instruction *InstInPreheader = InstsInPreheader[i]; 5600b57cec5SDimitry Andric NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2); 5610b57cec5SDimitry Andric NewPhi->addIncoming(InstInPreheader, LoopPH); 5620b57cec5SDimitry Andric NewPhi->addIncoming(BEVal, BB); 5630b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName() 5640b57cec5SDimitry Andric << "\n"); 5650b57cec5SDimitry Andric BEVal = NewPhi; 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric // We are in LCSSA form. So, a value defined inside the Loop is used only 5680b57cec5SDimitry Andric // inside the loop. So, the following is safe. 5690b57cec5SDimitry Andric Inst2Replace->replaceAllUsesWith(NewPhi); 5700b57cec5SDimitry Andric ReplacedInsts.insert(Inst2Replace); 5710b57cec5SDimitry Andric ++HexagonNumVectorLoopCarriedReuse; 5720b57cec5SDimitry Andric } 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric bool HexagonVectorLoopCarriedReuse::doVLCR() { 5750b57cec5SDimitry Andric assert(CurLoop->getSubLoops().empty() && 5760b57cec5SDimitry Andric "Can do VLCR on the innermost loop only"); 5770b57cec5SDimitry Andric assert((CurLoop->getNumBlocks() == 1) && 5780b57cec5SDimitry Andric "Can do VLCR only on single block loops"); 5790b57cec5SDimitry Andric 5800b57cec5SDimitry Andric bool Changed = false; 5810b57cec5SDimitry Andric bool Continue; 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n"); 5840b57cec5SDimitry Andric do { 5850b57cec5SDimitry Andric // Reset datastructures. 5860b57cec5SDimitry Andric Dependences.clear(); 5870b57cec5SDimitry Andric Continue = false; 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric findLoopCarriedDeps(); 5900b57cec5SDimitry Andric findValueToReuse(); 5910b57cec5SDimitry Andric if (ReuseCandidate.isDefined()) { 5920b57cec5SDimitry Andric reuseValue(); 5930b57cec5SDimitry Andric Changed = true; 5940b57cec5SDimitry Andric Continue = true; 5950b57cec5SDimitry Andric } 5960b57cec5SDimitry Andric llvm::for_each(Dependences, std::default_delete<DepChain>()); 5970b57cec5SDimitry Andric } while (Continue); 5980b57cec5SDimitry Andric return Changed; 5990b57cec5SDimitry Andric } 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andric void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, 6020b57cec5SDimitry Andric DepChain &D) { 6030b57cec5SDimitry Andric PHINode *PN = dyn_cast<PHINode>(I); 6040b57cec5SDimitry Andric if (!PN) { 6050b57cec5SDimitry Andric D.push_back(I); 6060b57cec5SDimitry Andric return; 6070b57cec5SDimitry Andric } else { 6080b57cec5SDimitry Andric auto NumIncomingValues = PN->getNumIncomingValues(); 6090b57cec5SDimitry Andric if (NumIncomingValues != 2) { 6100b57cec5SDimitry Andric D.clear(); 6110b57cec5SDimitry Andric return; 6120b57cec5SDimitry Andric } 6130b57cec5SDimitry Andric 6140b57cec5SDimitry Andric BasicBlock *BB = PN->getParent(); 6150b57cec5SDimitry Andric if (BB != CurLoop->getHeader()) { 6160b57cec5SDimitry Andric D.clear(); 6170b57cec5SDimitry Andric return; 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric Value *BEVal = PN->getIncomingValueForBlock(BB); 6210b57cec5SDimitry Andric Instruction *BEInst = dyn_cast<Instruction>(BEVal); 6220b57cec5SDimitry Andric // This is a single block loop with a preheader, so at least 6230b57cec5SDimitry Andric // one value should come over the backedge. 6240b57cec5SDimitry Andric assert(BEInst && "There should be a value over the backedge"); 6250b57cec5SDimitry Andric 6260b57cec5SDimitry Andric Value *PreHdrVal = 6270b57cec5SDimitry Andric PN->getIncomingValueForBlock(CurLoop->getLoopPreheader()); 6280b57cec5SDimitry Andric if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) { 6290b57cec5SDimitry Andric D.clear(); 6300b57cec5SDimitry Andric return; 6310b57cec5SDimitry Andric } 6320b57cec5SDimitry Andric D.push_back(PN); 6330b57cec5SDimitry Andric findDepChainFromPHI(BEInst, D); 6340b57cec5SDimitry Andric } 6350b57cec5SDimitry Andric } 6360b57cec5SDimitry Andric 6370b57cec5SDimitry Andric DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1, 6380b57cec5SDimitry Andric Instruction *I2, 6390b57cec5SDimitry Andric int Iters) { 6400b57cec5SDimitry Andric for (auto *D : Dependences) { 6410b57cec5SDimitry Andric if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters) 6420b57cec5SDimitry Andric return D; 6430b57cec5SDimitry Andric } 6440b57cec5SDimitry Andric return nullptr; 6450b57cec5SDimitry Andric } 6460b57cec5SDimitry Andric 6470b57cec5SDimitry Andric void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() { 6480b57cec5SDimitry Andric BasicBlock *BB = CurLoop->getHeader(); 6490b57cec5SDimitry Andric for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { 6500b57cec5SDimitry Andric auto *PN = cast<PHINode>(I); 6510b57cec5SDimitry Andric if (!isa<VectorType>(PN->getType())) 6520b57cec5SDimitry Andric continue; 6530b57cec5SDimitry Andric 6540b57cec5SDimitry Andric DepChain *D = new DepChain(); 6550b57cec5SDimitry Andric findDepChainFromPHI(PN, *D); 6560b57cec5SDimitry Andric if (D->size() != 0) 6570b57cec5SDimitry Andric Dependences.insert(D); 6580b57cec5SDimitry Andric else 6590b57cec5SDimitry Andric delete D; 6600b57cec5SDimitry Andric } 6610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n"); 66204eeddc0SDimitry Andric LLVM_DEBUG(for (const DepChain *D : Dependences) dbgs() << *D << "\n";); 6630b57cec5SDimitry Andric } 6640b57cec5SDimitry Andric 665e8d8bef9SDimitry Andric Pass *llvm::createHexagonVectorLoopCarriedReuseLegacyPass() { 666e8d8bef9SDimitry Andric return new HexagonVectorLoopCarriedReuseLegacyPass(); 6670b57cec5SDimitry Andric } 668