10b57cec5SDimitry Andric //===- HexagonLoopIdiomRecognition.cpp ------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 9*e8d8bef9SDimitry Andric #include "HexagonLoopIdiomRecognition.h" 100b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 110b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 120b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h" 130b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 140b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h" 150b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 160b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 170b57cec5SDimitry Andric #include "llvm/ADT/Triple.h" 180b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 190b57cec5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h" 20*e8d8bef9SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h" 210b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 220b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h" 230b57cec5SDimitry Andric #include "llvm/Analysis/MemoryLocation.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 250b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 260b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 270b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 280b57cec5SDimitry Andric #include "llvm/IR/Attributes.h" 290b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 300b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 310b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 320b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 330b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 340b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 350b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 360b57cec5SDimitry Andric #include "llvm/IR/Function.h" 370b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 380b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 390b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 400b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 410b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 420b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 43480093f4SDimitry Andric #include "llvm/IR/IntrinsicsHexagon.h" 440b57cec5SDimitry Andric #include "llvm/IR/Module.h" 45*e8d8bef9SDimitry Andric #include "llvm/IR/PassManager.h" 460b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 470b57cec5SDimitry Andric #include "llvm/IR/Type.h" 480b57cec5SDimitry Andric #include "llvm/IR/User.h" 490b57cec5SDimitry Andric #include "llvm/IR/Value.h" 50480093f4SDimitry Andric #include "llvm/InitializePasses.h" 510b57cec5SDimitry Andric #include "llvm/Pass.h" 520b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 530b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 540b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 550b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 560b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 570b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h" 580b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 590b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 600b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h" 61480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 625ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 630b57cec5SDimitry Andric #include <algorithm> 640b57cec5SDimitry Andric #include <array> 650b57cec5SDimitry Andric #include <cassert> 660b57cec5SDimitry Andric #include <cstdint> 670b57cec5SDimitry Andric #include <cstdlib> 680b57cec5SDimitry Andric #include <deque> 690b57cec5SDimitry Andric #include <functional> 700b57cec5SDimitry Andric #include <iterator> 710b57cec5SDimitry Andric #include <map> 720b57cec5SDimitry Andric #include <set> 730b57cec5SDimitry Andric #include <utility> 740b57cec5SDimitry Andric #include <vector> 750b57cec5SDimitry Andric 76480093f4SDimitry Andric #define DEBUG_TYPE "hexagon-lir" 77480093f4SDimitry Andric 780b57cec5SDimitry Andric using namespace llvm; 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom", 810b57cec5SDimitry Andric cl::Hidden, cl::init(false), 820b57cec5SDimitry Andric cl::desc("Disable generation of memcpy in loop idiom recognition")); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom", 850b57cec5SDimitry Andric cl::Hidden, cl::init(false), 860b57cec5SDimitry Andric cl::desc("Disable generation of memmove in loop idiom recognition")); 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold", 890b57cec5SDimitry Andric cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime " 900b57cec5SDimitry Andric "check guarding the memmove.")); 910b57cec5SDimitry Andric 920b57cec5SDimitry Andric static cl::opt<unsigned> CompileTimeMemSizeThreshold( 930b57cec5SDimitry Andric "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64), 940b57cec5SDimitry Andric cl::desc("Threshold (in bytes) to perform the transformation, if the " 950b57cec5SDimitry Andric "runtime loop count (mem transfer size) is known at compile-time.")); 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom", 980b57cec5SDimitry Andric cl::Hidden, cl::init(true), 990b57cec5SDimitry Andric cl::desc("Only enable generating memmove in non-nested loops")); 1000b57cec5SDimitry Andric 1018bcb0991SDimitry Andric static cl::opt<bool> HexagonVolatileMemcpy( 1028bcb0991SDimitry Andric "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), 1030b57cec5SDimitry Andric cl::desc("Enable Hexagon-specific memcpy for volatile destination.")); 1040b57cec5SDimitry Andric 1050b57cec5SDimitry Andric static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000), 1060b57cec5SDimitry Andric cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR")); 1070b57cec5SDimitry Andric 1080b57cec5SDimitry Andric static const char *HexagonVolatileMemcpyName 1090b57cec5SDimitry Andric = "hexagon_memcpy_forward_vp4cp4n2"; 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric 1120b57cec5SDimitry Andric namespace llvm { 1130b57cec5SDimitry Andric 114*e8d8bef9SDimitry Andric void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &); 1150b57cec5SDimitry Andric Pass *createHexagonLoopIdiomPass(); 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric } // end namespace llvm 1180b57cec5SDimitry Andric 1190b57cec5SDimitry Andric namespace { 1200b57cec5SDimitry Andric 121*e8d8bef9SDimitry Andric class HexagonLoopIdiomRecognize { 1220b57cec5SDimitry Andric public: 123*e8d8bef9SDimitry Andric explicit HexagonLoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT, 124*e8d8bef9SDimitry Andric LoopInfo *LF, const TargetLibraryInfo *TLI, 125*e8d8bef9SDimitry Andric ScalarEvolution *SE) 126*e8d8bef9SDimitry Andric : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {} 1270b57cec5SDimitry Andric 128*e8d8bef9SDimitry Andric bool run(Loop *L); 1290b57cec5SDimitry Andric 1300b57cec5SDimitry Andric private: 1310b57cec5SDimitry Andric int getSCEVStride(const SCEVAddRecExpr *StoreEv); 1320b57cec5SDimitry Andric bool isLegalStore(Loop *CurLoop, StoreInst *SI); 1330b57cec5SDimitry Andric void collectStores(Loop *CurLoop, BasicBlock *BB, 1340b57cec5SDimitry Andric SmallVectorImpl<StoreInst *> &Stores); 1350b57cec5SDimitry Andric bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount); 1360b57cec5SDimitry Andric bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts) const; 1370b57cec5SDimitry Andric bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount, 1380b57cec5SDimitry Andric SmallVectorImpl<BasicBlock *> &ExitBlocks); 1390b57cec5SDimitry Andric bool runOnCountableLoop(Loop *L); 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric AliasAnalysis *AA; 1420b57cec5SDimitry Andric const DataLayout *DL; 1430b57cec5SDimitry Andric DominatorTree *DT; 1440b57cec5SDimitry Andric LoopInfo *LF; 1450b57cec5SDimitry Andric const TargetLibraryInfo *TLI; 1460b57cec5SDimitry Andric ScalarEvolution *SE; 1470b57cec5SDimitry Andric bool HasMemcpy, HasMemmove; 1480b57cec5SDimitry Andric }; 1490b57cec5SDimitry Andric 150*e8d8bef9SDimitry Andric class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass { 151*e8d8bef9SDimitry Andric public: 152*e8d8bef9SDimitry Andric static char ID; 153*e8d8bef9SDimitry Andric 154*e8d8bef9SDimitry Andric explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) { 155*e8d8bef9SDimitry Andric initializeHexagonLoopIdiomRecognizeLegacyPassPass( 156*e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 157*e8d8bef9SDimitry Andric } 158*e8d8bef9SDimitry Andric 159*e8d8bef9SDimitry Andric StringRef getPassName() const override { 160*e8d8bef9SDimitry Andric return "Recognize Hexagon-specific loop idioms"; 161*e8d8bef9SDimitry Andric } 162*e8d8bef9SDimitry Andric 163*e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 164*e8d8bef9SDimitry Andric AU.addRequired<LoopInfoWrapperPass>(); 165*e8d8bef9SDimitry Andric AU.addRequiredID(LoopSimplifyID); 166*e8d8bef9SDimitry Andric AU.addRequiredID(LCSSAID); 167*e8d8bef9SDimitry Andric AU.addRequired<AAResultsWrapperPass>(); 168*e8d8bef9SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>(); 169*e8d8bef9SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 170*e8d8bef9SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>(); 171*e8d8bef9SDimitry Andric AU.addPreserved<TargetLibraryInfoWrapperPass>(); 172*e8d8bef9SDimitry Andric } 173*e8d8bef9SDimitry Andric 174*e8d8bef9SDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override; 175*e8d8bef9SDimitry Andric }; 176*e8d8bef9SDimitry Andric 1770b57cec5SDimitry Andric struct Simplifier { 1780b57cec5SDimitry Andric struct Rule { 1790b57cec5SDimitry Andric using FuncType = std::function<Value *(Instruction *, LLVMContext &)>; 1800b57cec5SDimitry Andric Rule(StringRef N, FuncType F) : Name(N), Fn(F) {} 1810b57cec5SDimitry Andric StringRef Name; // For debugging. 1820b57cec5SDimitry Andric FuncType Fn; 1830b57cec5SDimitry Andric }; 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric void addRule(StringRef N, const Rule::FuncType &F) { 1860b57cec5SDimitry Andric Rules.push_back(Rule(N, F)); 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric private: 1900b57cec5SDimitry Andric struct WorkListType { 1910b57cec5SDimitry Andric WorkListType() = default; 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric void push_back(Value *V) { 1940b57cec5SDimitry Andric // Do not push back duplicates. 195*e8d8bef9SDimitry Andric if (!S.count(V)) { 196*e8d8bef9SDimitry Andric Q.push_back(V); 197*e8d8bef9SDimitry Andric S.insert(V); 198*e8d8bef9SDimitry Andric } 1990b57cec5SDimitry Andric } 2000b57cec5SDimitry Andric 2010b57cec5SDimitry Andric Value *pop_front_val() { 202*e8d8bef9SDimitry Andric Value *V = Q.front(); 203*e8d8bef9SDimitry Andric Q.pop_front(); 204*e8d8bef9SDimitry Andric S.erase(V); 2050b57cec5SDimitry Andric return V; 2060b57cec5SDimitry Andric } 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric bool empty() const { return Q.empty(); } 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric private: 2110b57cec5SDimitry Andric std::deque<Value *> Q; 2120b57cec5SDimitry Andric std::set<Value *> S; 2130b57cec5SDimitry Andric }; 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric using ValueSetType = std::set<Value *>; 2160b57cec5SDimitry Andric 2170b57cec5SDimitry Andric std::vector<Rule> Rules; 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric public: 2200b57cec5SDimitry Andric struct Context { 2210b57cec5SDimitry Andric using ValueMapType = DenseMap<Value *, Value *>; 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric Value *Root; 2240b57cec5SDimitry Andric ValueSetType Used; // The set of all cloned values used by Root. 2250b57cec5SDimitry Andric ValueSetType Clones; // The set of all cloned values. 2260b57cec5SDimitry Andric LLVMContext &Ctx; 2270b57cec5SDimitry Andric 2280b57cec5SDimitry Andric Context(Instruction *Exp) 2290b57cec5SDimitry Andric : Ctx(Exp->getParent()->getParent()->getContext()) { 2300b57cec5SDimitry Andric initialize(Exp); 2310b57cec5SDimitry Andric } 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric ~Context() { cleanup(); } 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric void print(raw_ostream &OS, const Value *V) const; 2360b57cec5SDimitry Andric Value *materialize(BasicBlock *B, BasicBlock::iterator At); 2370b57cec5SDimitry Andric 2380b57cec5SDimitry Andric private: 2390b57cec5SDimitry Andric friend struct Simplifier; 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric void initialize(Instruction *Exp); 2420b57cec5SDimitry Andric void cleanup(); 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric template <typename FuncT> void traverse(Value *V, FuncT F); 2450b57cec5SDimitry Andric void record(Value *V); 2460b57cec5SDimitry Andric void use(Value *V); 2470b57cec5SDimitry Andric void unuse(Value *V); 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric bool equal(const Instruction *I, const Instruction *J) const; 2500b57cec5SDimitry Andric Value *find(Value *Tree, Value *Sub) const; 2510b57cec5SDimitry Andric Value *subst(Value *Tree, Value *OldV, Value *NewV); 2520b57cec5SDimitry Andric void replace(Value *OldV, Value *NewV); 2530b57cec5SDimitry Andric void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At); 2540b57cec5SDimitry Andric }; 2550b57cec5SDimitry Andric 2560b57cec5SDimitry Andric Value *simplify(Context &C); 2570b57cec5SDimitry Andric }; 2580b57cec5SDimitry Andric 2590b57cec5SDimitry Andric struct PE { 2600b57cec5SDimitry Andric PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {} 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric const Simplifier::Context &C; 2630b57cec5SDimitry Andric const Value *V; 2640b57cec5SDimitry Andric }; 2650b57cec5SDimitry Andric 2660b57cec5SDimitry Andric LLVM_ATTRIBUTE_USED 2670b57cec5SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const PE &P) { 2680b57cec5SDimitry Andric P.C.print(OS, P.V ? P.V : P.C.Root); 2690b57cec5SDimitry Andric return OS; 2700b57cec5SDimitry Andric } 2710b57cec5SDimitry Andric 2720b57cec5SDimitry Andric } // end anonymous namespace 2730b57cec5SDimitry Andric 274*e8d8bef9SDimitry Andric char HexagonLoopIdiomRecognizeLegacyPass::ID = 0; 2750b57cec5SDimitry Andric 276*e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom", 2770b57cec5SDimitry Andric "Recognize Hexagon-specific loop idioms", false, false) 2780b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 2790b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 2800b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 2810b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 2820b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2830b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 2840b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 285*e8d8bef9SDimitry Andric INITIALIZE_PASS_END(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom", 2860b57cec5SDimitry Andric "Recognize Hexagon-specific loop idioms", false, false) 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric template <typename FuncT> 2890b57cec5SDimitry Andric void Simplifier::Context::traverse(Value *V, FuncT F) { 2900b57cec5SDimitry Andric WorkListType Q; 2910b57cec5SDimitry Andric Q.push_back(V); 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric while (!Q.empty()) { 2940b57cec5SDimitry Andric Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); 2950b57cec5SDimitry Andric if (!U || U->getParent()) 2960b57cec5SDimitry Andric continue; 2970b57cec5SDimitry Andric if (!F(U)) 2980b57cec5SDimitry Andric continue; 2990b57cec5SDimitry Andric for (Value *Op : U->operands()) 3000b57cec5SDimitry Andric Q.push_back(Op); 3010b57cec5SDimitry Andric } 3020b57cec5SDimitry Andric } 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric void Simplifier::Context::print(raw_ostream &OS, const Value *V) const { 3050b57cec5SDimitry Andric const auto *U = dyn_cast<const Instruction>(V); 3060b57cec5SDimitry Andric if (!U) { 3070b57cec5SDimitry Andric OS << V << '(' << *V << ')'; 3080b57cec5SDimitry Andric return; 3090b57cec5SDimitry Andric } 3100b57cec5SDimitry Andric 3110b57cec5SDimitry Andric if (U->getParent()) { 3120b57cec5SDimitry Andric OS << U << '('; 3130b57cec5SDimitry Andric U->printAsOperand(OS, true); 3140b57cec5SDimitry Andric OS << ')'; 3150b57cec5SDimitry Andric return; 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric unsigned N = U->getNumOperands(); 3190b57cec5SDimitry Andric if (N != 0) 3200b57cec5SDimitry Andric OS << U << '('; 3210b57cec5SDimitry Andric OS << U->getOpcodeName(); 3220b57cec5SDimitry Andric for (const Value *Op : U->operands()) { 3230b57cec5SDimitry Andric OS << ' '; 3240b57cec5SDimitry Andric print(OS, Op); 3250b57cec5SDimitry Andric } 3260b57cec5SDimitry Andric if (N != 0) 3270b57cec5SDimitry Andric OS << ')'; 3280b57cec5SDimitry Andric } 3290b57cec5SDimitry Andric 3300b57cec5SDimitry Andric void Simplifier::Context::initialize(Instruction *Exp) { 3310b57cec5SDimitry Andric // Perform a deep clone of the expression, set Root to the root 3320b57cec5SDimitry Andric // of the clone, and build a map from the cloned values to the 3330b57cec5SDimitry Andric // original ones. 3340b57cec5SDimitry Andric ValueMapType M; 3350b57cec5SDimitry Andric BasicBlock *Block = Exp->getParent(); 3360b57cec5SDimitry Andric WorkListType Q; 3370b57cec5SDimitry Andric Q.push_back(Exp); 3380b57cec5SDimitry Andric 3390b57cec5SDimitry Andric while (!Q.empty()) { 3400b57cec5SDimitry Andric Value *V = Q.pop_front_val(); 3410b57cec5SDimitry Andric if (M.find(V) != M.end()) 3420b57cec5SDimitry Andric continue; 3430b57cec5SDimitry Andric if (Instruction *U = dyn_cast<Instruction>(V)) { 3440b57cec5SDimitry Andric if (isa<PHINode>(U) || U->getParent() != Block) 3450b57cec5SDimitry Andric continue; 3460b57cec5SDimitry Andric for (Value *Op : U->operands()) 3470b57cec5SDimitry Andric Q.push_back(Op); 3480b57cec5SDimitry Andric M.insert({U, U->clone()}); 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric for (std::pair<Value*,Value*> P : M) { 3530b57cec5SDimitry Andric Instruction *U = cast<Instruction>(P.second); 3540b57cec5SDimitry Andric for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { 3550b57cec5SDimitry Andric auto F = M.find(U->getOperand(i)); 3560b57cec5SDimitry Andric if (F != M.end()) 3570b57cec5SDimitry Andric U->setOperand(i, F->second); 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric } 3600b57cec5SDimitry Andric 3610b57cec5SDimitry Andric auto R = M.find(Exp); 3620b57cec5SDimitry Andric assert(R != M.end()); 3630b57cec5SDimitry Andric Root = R->second; 3640b57cec5SDimitry Andric 3650b57cec5SDimitry Andric record(Root); 3660b57cec5SDimitry Andric use(Root); 3670b57cec5SDimitry Andric } 3680b57cec5SDimitry Andric 3690b57cec5SDimitry Andric void Simplifier::Context::record(Value *V) { 3700b57cec5SDimitry Andric auto Record = [this](Instruction *U) -> bool { 3710b57cec5SDimitry Andric Clones.insert(U); 3720b57cec5SDimitry Andric return true; 3730b57cec5SDimitry Andric }; 3740b57cec5SDimitry Andric traverse(V, Record); 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric void Simplifier::Context::use(Value *V) { 3780b57cec5SDimitry Andric auto Use = [this](Instruction *U) -> bool { 3790b57cec5SDimitry Andric Used.insert(U); 3800b57cec5SDimitry Andric return true; 3810b57cec5SDimitry Andric }; 3820b57cec5SDimitry Andric traverse(V, Use); 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric void Simplifier::Context::unuse(Value *V) { 3860b57cec5SDimitry Andric if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr) 3870b57cec5SDimitry Andric return; 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric auto Unuse = [this](Instruction *U) -> bool { 3900b57cec5SDimitry Andric if (!U->use_empty()) 3910b57cec5SDimitry Andric return false; 3920b57cec5SDimitry Andric Used.erase(U); 3930b57cec5SDimitry Andric return true; 3940b57cec5SDimitry Andric }; 3950b57cec5SDimitry Andric traverse(V, Unuse); 3960b57cec5SDimitry Andric } 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) { 3990b57cec5SDimitry Andric if (Tree == OldV) 4000b57cec5SDimitry Andric return NewV; 4010b57cec5SDimitry Andric if (OldV == NewV) 4020b57cec5SDimitry Andric return Tree; 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric WorkListType Q; 4050b57cec5SDimitry Andric Q.push_back(Tree); 4060b57cec5SDimitry Andric while (!Q.empty()) { 4070b57cec5SDimitry Andric Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); 4080b57cec5SDimitry Andric // If U is not an instruction, or it's not a clone, skip it. 4090b57cec5SDimitry Andric if (!U || U->getParent()) 4100b57cec5SDimitry Andric continue; 4110b57cec5SDimitry Andric for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { 4120b57cec5SDimitry Andric Value *Op = U->getOperand(i); 4130b57cec5SDimitry Andric if (Op == OldV) { 4140b57cec5SDimitry Andric U->setOperand(i, NewV); 4150b57cec5SDimitry Andric unuse(OldV); 4160b57cec5SDimitry Andric } else { 4170b57cec5SDimitry Andric Q.push_back(Op); 4180b57cec5SDimitry Andric } 4190b57cec5SDimitry Andric } 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric return Tree; 4220b57cec5SDimitry Andric } 4230b57cec5SDimitry Andric 4240b57cec5SDimitry Andric void Simplifier::Context::replace(Value *OldV, Value *NewV) { 4250b57cec5SDimitry Andric if (Root == OldV) { 4260b57cec5SDimitry Andric Root = NewV; 4270b57cec5SDimitry Andric use(Root); 4280b57cec5SDimitry Andric return; 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric // NewV may be a complex tree that has just been created by one of the 4320b57cec5SDimitry Andric // transformation rules. We need to make sure that it is commoned with 4330b57cec5SDimitry Andric // the existing Root to the maximum extent possible. 4340b57cec5SDimitry Andric // Identify all subtrees of NewV (including NewV itself) that have 4350b57cec5SDimitry Andric // equivalent counterparts in Root, and replace those subtrees with 4360b57cec5SDimitry Andric // these counterparts. 4370b57cec5SDimitry Andric WorkListType Q; 4380b57cec5SDimitry Andric Q.push_back(NewV); 4390b57cec5SDimitry Andric while (!Q.empty()) { 4400b57cec5SDimitry Andric Value *V = Q.pop_front_val(); 4410b57cec5SDimitry Andric Instruction *U = dyn_cast<Instruction>(V); 4420b57cec5SDimitry Andric if (!U || U->getParent()) 4430b57cec5SDimitry Andric continue; 4440b57cec5SDimitry Andric if (Value *DupV = find(Root, V)) { 4450b57cec5SDimitry Andric if (DupV != V) 4460b57cec5SDimitry Andric NewV = subst(NewV, V, DupV); 4470b57cec5SDimitry Andric } else { 4480b57cec5SDimitry Andric for (Value *Op : U->operands()) 4490b57cec5SDimitry Andric Q.push_back(Op); 4500b57cec5SDimitry Andric } 4510b57cec5SDimitry Andric } 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric // Now, simply replace OldV with NewV in Root. 4540b57cec5SDimitry Andric Root = subst(Root, OldV, NewV); 4550b57cec5SDimitry Andric use(Root); 4560b57cec5SDimitry Andric } 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric void Simplifier::Context::cleanup() { 4590b57cec5SDimitry Andric for (Value *V : Clones) { 4600b57cec5SDimitry Andric Instruction *U = cast<Instruction>(V); 4610b57cec5SDimitry Andric if (!U->getParent()) 4620b57cec5SDimitry Andric U->dropAllReferences(); 4630b57cec5SDimitry Andric } 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric for (Value *V : Clones) { 4660b57cec5SDimitry Andric Instruction *U = cast<Instruction>(V); 4670b57cec5SDimitry Andric if (!U->getParent()) 4680b57cec5SDimitry Andric U->deleteValue(); 4690b57cec5SDimitry Andric } 4700b57cec5SDimitry Andric } 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric bool Simplifier::Context::equal(const Instruction *I, 4730b57cec5SDimitry Andric const Instruction *J) const { 4740b57cec5SDimitry Andric if (I == J) 4750b57cec5SDimitry Andric return true; 4760b57cec5SDimitry Andric if (!I->isSameOperationAs(J)) 4770b57cec5SDimitry Andric return false; 4780b57cec5SDimitry Andric if (isa<PHINode>(I)) 4790b57cec5SDimitry Andric return I->isIdenticalTo(J); 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) { 4820b57cec5SDimitry Andric Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i); 4830b57cec5SDimitry Andric if (OpI == OpJ) 4840b57cec5SDimitry Andric continue; 4850b57cec5SDimitry Andric auto *InI = dyn_cast<const Instruction>(OpI); 4860b57cec5SDimitry Andric auto *InJ = dyn_cast<const Instruction>(OpJ); 4870b57cec5SDimitry Andric if (InI && InJ) { 4880b57cec5SDimitry Andric if (!equal(InI, InJ)) 4890b57cec5SDimitry Andric return false; 4900b57cec5SDimitry Andric } else if (InI != InJ || !InI) 4910b57cec5SDimitry Andric return false; 4920b57cec5SDimitry Andric } 4930b57cec5SDimitry Andric return true; 4940b57cec5SDimitry Andric } 4950b57cec5SDimitry Andric 4960b57cec5SDimitry Andric Value *Simplifier::Context::find(Value *Tree, Value *Sub) const { 4970b57cec5SDimitry Andric Instruction *SubI = dyn_cast<Instruction>(Sub); 4980b57cec5SDimitry Andric WorkListType Q; 4990b57cec5SDimitry Andric Q.push_back(Tree); 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andric while (!Q.empty()) { 5020b57cec5SDimitry Andric Value *V = Q.pop_front_val(); 5030b57cec5SDimitry Andric if (V == Sub) 5040b57cec5SDimitry Andric return V; 5050b57cec5SDimitry Andric Instruction *U = dyn_cast<Instruction>(V); 5060b57cec5SDimitry Andric if (!U || U->getParent()) 5070b57cec5SDimitry Andric continue; 5080b57cec5SDimitry Andric if (SubI && equal(SubI, U)) 5090b57cec5SDimitry Andric return U; 5100b57cec5SDimitry Andric assert(!isa<PHINode>(U)); 5110b57cec5SDimitry Andric for (Value *Op : U->operands()) 5120b57cec5SDimitry Andric Q.push_back(Op); 5130b57cec5SDimitry Andric } 5140b57cec5SDimitry Andric return nullptr; 5150b57cec5SDimitry Andric } 5160b57cec5SDimitry Andric 5170b57cec5SDimitry Andric void Simplifier::Context::link(Instruction *I, BasicBlock *B, 5180b57cec5SDimitry Andric BasicBlock::iterator At) { 5190b57cec5SDimitry Andric if (I->getParent()) 5200b57cec5SDimitry Andric return; 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric for (Value *Op : I->operands()) { 5230b57cec5SDimitry Andric if (Instruction *OpI = dyn_cast<Instruction>(Op)) 5240b57cec5SDimitry Andric link(OpI, B, At); 5250b57cec5SDimitry Andric } 5260b57cec5SDimitry Andric 5270b57cec5SDimitry Andric B->getInstList().insert(At, I); 5280b57cec5SDimitry Andric } 5290b57cec5SDimitry Andric 5300b57cec5SDimitry Andric Value *Simplifier::Context::materialize(BasicBlock *B, 5310b57cec5SDimitry Andric BasicBlock::iterator At) { 5320b57cec5SDimitry Andric if (Instruction *RootI = dyn_cast<Instruction>(Root)) 5330b57cec5SDimitry Andric link(RootI, B, At); 5340b57cec5SDimitry Andric return Root; 5350b57cec5SDimitry Andric } 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric Value *Simplifier::simplify(Context &C) { 5380b57cec5SDimitry Andric WorkListType Q; 5390b57cec5SDimitry Andric Q.push_back(C.Root); 5400b57cec5SDimitry Andric unsigned Count = 0; 5410b57cec5SDimitry Andric const unsigned Limit = SimplifyLimit; 5420b57cec5SDimitry Andric 5430b57cec5SDimitry Andric while (!Q.empty()) { 5440b57cec5SDimitry Andric if (Count++ >= Limit) 5450b57cec5SDimitry Andric break; 5460b57cec5SDimitry Andric Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); 5470b57cec5SDimitry Andric if (!U || U->getParent() || !C.Used.count(U)) 5480b57cec5SDimitry Andric continue; 5490b57cec5SDimitry Andric bool Changed = false; 5500b57cec5SDimitry Andric for (Rule &R : Rules) { 5510b57cec5SDimitry Andric Value *W = R.Fn(U, C.Ctx); 5520b57cec5SDimitry Andric if (!W) 5530b57cec5SDimitry Andric continue; 5540b57cec5SDimitry Andric Changed = true; 5550b57cec5SDimitry Andric C.record(W); 5560b57cec5SDimitry Andric C.replace(U, W); 5570b57cec5SDimitry Andric Q.push_back(C.Root); 5580b57cec5SDimitry Andric break; 5590b57cec5SDimitry Andric } 5600b57cec5SDimitry Andric if (!Changed) { 5610b57cec5SDimitry Andric for (Value *Op : U->operands()) 5620b57cec5SDimitry Andric Q.push_back(Op); 5630b57cec5SDimitry Andric } 5640b57cec5SDimitry Andric } 5650b57cec5SDimitry Andric return Count < Limit ? C.Root : nullptr; 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric 5680b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 5690b57cec5SDimitry Andric // 5700b57cec5SDimitry Andric // Implementation of PolynomialMultiplyRecognize 5710b57cec5SDimitry Andric // 5720b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric namespace { 5750b57cec5SDimitry Andric 5760b57cec5SDimitry Andric class PolynomialMultiplyRecognize { 5770b57cec5SDimitry Andric public: 5780b57cec5SDimitry Andric explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl, 5790b57cec5SDimitry Andric const DominatorTree &dt, const TargetLibraryInfo &tli, 5800b57cec5SDimitry Andric ScalarEvolution &se) 5810b57cec5SDimitry Andric : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {} 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric bool recognize(); 5840b57cec5SDimitry Andric 5850b57cec5SDimitry Andric private: 5860b57cec5SDimitry Andric using ValueSeq = SetVector<Value *>; 5870b57cec5SDimitry Andric 5880b57cec5SDimitry Andric IntegerType *getPmpyType() const { 5890b57cec5SDimitry Andric LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext(); 5900b57cec5SDimitry Andric return IntegerType::get(Ctx, 32); 5910b57cec5SDimitry Andric } 5920b57cec5SDimitry Andric 5930b57cec5SDimitry Andric bool isPromotableTo(Value *V, IntegerType *Ty); 5940b57cec5SDimitry Andric void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB); 5950b57cec5SDimitry Andric bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB); 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andric Value *getCountIV(BasicBlock *BB); 5980b57cec5SDimitry Andric bool findCycle(Value *Out, Value *In, ValueSeq &Cycle); 5990b57cec5SDimitry Andric void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early, 6000b57cec5SDimitry Andric ValueSeq &Late); 6010b57cec5SDimitry Andric bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late); 6020b57cec5SDimitry Andric bool commutesWithShift(Instruction *I); 6030b57cec5SDimitry Andric bool highBitsAreZero(Value *V, unsigned IterCount); 6040b57cec5SDimitry Andric bool keepsHighBitsZero(Value *V, unsigned IterCount); 6050b57cec5SDimitry Andric bool isOperandShifted(Instruction *I, Value *Op); 6060b57cec5SDimitry Andric bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB, 6070b57cec5SDimitry Andric unsigned IterCount); 6080b57cec5SDimitry Andric void cleanupLoopBody(BasicBlock *LoopB); 6090b57cec5SDimitry Andric 6100b57cec5SDimitry Andric struct ParsedValues { 6110b57cec5SDimitry Andric ParsedValues() = default; 6120b57cec5SDimitry Andric 6130b57cec5SDimitry Andric Value *M = nullptr; 6140b57cec5SDimitry Andric Value *P = nullptr; 6150b57cec5SDimitry Andric Value *Q = nullptr; 6160b57cec5SDimitry Andric Value *R = nullptr; 6170b57cec5SDimitry Andric Value *X = nullptr; 6180b57cec5SDimitry Andric Instruction *Res = nullptr; 6190b57cec5SDimitry Andric unsigned IterCount = 0; 6200b57cec5SDimitry Andric bool Left = false; 6210b57cec5SDimitry Andric bool Inv = false; 6220b57cec5SDimitry Andric }; 6230b57cec5SDimitry Andric 6240b57cec5SDimitry Andric bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV); 6250b57cec5SDimitry Andric bool matchRightShift(SelectInst *SelI, ParsedValues &PV); 6260b57cec5SDimitry Andric bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB, 6270b57cec5SDimitry Andric Value *CIV, ParsedValues &PV, bool PreScan); 6280b57cec5SDimitry Andric unsigned getInverseMxN(unsigned QP); 6290b57cec5SDimitry Andric Value *generate(BasicBlock::iterator At, ParsedValues &PV); 6300b57cec5SDimitry Andric 6310b57cec5SDimitry Andric void setupPreSimplifier(Simplifier &S); 6320b57cec5SDimitry Andric void setupPostSimplifier(Simplifier &S); 6330b57cec5SDimitry Andric 6340b57cec5SDimitry Andric Loop *CurLoop; 6350b57cec5SDimitry Andric const DataLayout &DL; 6360b57cec5SDimitry Andric const DominatorTree &DT; 6370b57cec5SDimitry Andric const TargetLibraryInfo &TLI; 6380b57cec5SDimitry Andric ScalarEvolution &SE; 6390b57cec5SDimitry Andric }; 6400b57cec5SDimitry Andric 6410b57cec5SDimitry Andric } // end anonymous namespace 6420b57cec5SDimitry Andric 6430b57cec5SDimitry Andric Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) { 6440b57cec5SDimitry Andric pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 6450b57cec5SDimitry Andric if (std::distance(PI, PE) != 2) 6460b57cec5SDimitry Andric return nullptr; 6470b57cec5SDimitry Andric BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI; 6480b57cec5SDimitry Andric 6490b57cec5SDimitry Andric for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { 6500b57cec5SDimitry Andric auto *PN = cast<PHINode>(I); 6510b57cec5SDimitry Andric Value *InitV = PN->getIncomingValueForBlock(PB); 6520b57cec5SDimitry Andric if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero()) 6530b57cec5SDimitry Andric continue; 6540b57cec5SDimitry Andric Value *IterV = PN->getIncomingValueForBlock(BB); 6550b57cec5SDimitry Andric auto *BO = dyn_cast<BinaryOperator>(IterV); 6568bcb0991SDimitry Andric if (!BO) 6578bcb0991SDimitry Andric continue; 6580b57cec5SDimitry Andric if (BO->getOpcode() != Instruction::Add) 6590b57cec5SDimitry Andric continue; 6600b57cec5SDimitry Andric Value *IncV = nullptr; 6610b57cec5SDimitry Andric if (BO->getOperand(0) == PN) 6620b57cec5SDimitry Andric IncV = BO->getOperand(1); 6630b57cec5SDimitry Andric else if (BO->getOperand(1) == PN) 6640b57cec5SDimitry Andric IncV = BO->getOperand(0); 6650b57cec5SDimitry Andric if (IncV == nullptr) 6660b57cec5SDimitry Andric continue; 6670b57cec5SDimitry Andric 6680b57cec5SDimitry Andric if (auto *T = dyn_cast<ConstantInt>(IncV)) 6690b57cec5SDimitry Andric if (T->getZExtValue() == 1) 6700b57cec5SDimitry Andric return PN; 6710b57cec5SDimitry Andric } 6720b57cec5SDimitry Andric return nullptr; 6730b57cec5SDimitry Andric } 6740b57cec5SDimitry Andric 6750b57cec5SDimitry Andric static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) { 6760b57cec5SDimitry Andric for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) { 6770b57cec5SDimitry Andric Use &TheUse = UI.getUse(); 6780b57cec5SDimitry Andric ++UI; 6790b57cec5SDimitry Andric if (auto *II = dyn_cast<Instruction>(TheUse.getUser())) 6800b57cec5SDimitry Andric if (BB == II->getParent()) 6810b57cec5SDimitry Andric II->replaceUsesOfWith(I, J); 6820b57cec5SDimitry Andric } 6830b57cec5SDimitry Andric } 6840b57cec5SDimitry Andric 6850b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI, 6860b57cec5SDimitry Andric Value *CIV, ParsedValues &PV) { 6870b57cec5SDimitry Andric // Match the following: 6880b57cec5SDimitry Andric // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R 6890b57cec5SDimitry Andric // select (X & (1 << i)) == 0 ? R : R ^ (Q << i) 6900b57cec5SDimitry Andric // The condition may also check for equality with the masked value, i.e 6910b57cec5SDimitry Andric // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R 6920b57cec5SDimitry Andric // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i); 6930b57cec5SDimitry Andric 6940b57cec5SDimitry Andric Value *CondV = SelI->getCondition(); 6950b57cec5SDimitry Andric Value *TrueV = SelI->getTrueValue(); 6960b57cec5SDimitry Andric Value *FalseV = SelI->getFalseValue(); 6970b57cec5SDimitry Andric 6980b57cec5SDimitry Andric using namespace PatternMatch; 6990b57cec5SDimitry Andric 7000b57cec5SDimitry Andric CmpInst::Predicate P; 7010b57cec5SDimitry Andric Value *A = nullptr, *B = nullptr, *C = nullptr; 7020b57cec5SDimitry Andric 7030b57cec5SDimitry Andric if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) && 7040b57cec5SDimitry Andric !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B))))) 7050b57cec5SDimitry Andric return false; 7060b57cec5SDimitry Andric if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) 7070b57cec5SDimitry Andric return false; 7080b57cec5SDimitry Andric // Matched: select (A & B) == C ? ... : ... 7090b57cec5SDimitry Andric // select (A & B) != C ? ... : ... 7100b57cec5SDimitry Andric 7110b57cec5SDimitry Andric Value *X = nullptr, *Sh1 = nullptr; 7120b57cec5SDimitry Andric // Check (A & B) for (X & (1 << i)): 7130b57cec5SDimitry Andric if (match(A, m_Shl(m_One(), m_Specific(CIV)))) { 7140b57cec5SDimitry Andric Sh1 = A; 7150b57cec5SDimitry Andric X = B; 7160b57cec5SDimitry Andric } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) { 7170b57cec5SDimitry Andric Sh1 = B; 7180b57cec5SDimitry Andric X = A; 7190b57cec5SDimitry Andric } else { 7200b57cec5SDimitry Andric // TODO: Could also check for an induction variable containing single 7210b57cec5SDimitry Andric // bit shifted left by 1 in each iteration. 7220b57cec5SDimitry Andric return false; 7230b57cec5SDimitry Andric } 7240b57cec5SDimitry Andric 7250b57cec5SDimitry Andric bool TrueIfZero; 7260b57cec5SDimitry Andric 7270b57cec5SDimitry Andric // Check C against the possible values for comparison: 0 and (1 << i): 7280b57cec5SDimitry Andric if (match(C, m_Zero())) 7290b57cec5SDimitry Andric TrueIfZero = (P == CmpInst::ICMP_EQ); 7300b57cec5SDimitry Andric else if (C == Sh1) 7310b57cec5SDimitry Andric TrueIfZero = (P == CmpInst::ICMP_NE); 7320b57cec5SDimitry Andric else 7330b57cec5SDimitry Andric return false; 7340b57cec5SDimitry Andric 7350b57cec5SDimitry Andric // So far, matched: 7360b57cec5SDimitry Andric // select (X & (1 << i)) ? ... : ... 7370b57cec5SDimitry Andric // including variations of the check against zero/non-zero value. 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr; 7400b57cec5SDimitry Andric if (TrueIfZero) { 7410b57cec5SDimitry Andric ShouldSameV = TrueV; 7420b57cec5SDimitry Andric ShouldXoredV = FalseV; 7430b57cec5SDimitry Andric } else { 7440b57cec5SDimitry Andric ShouldSameV = FalseV; 7450b57cec5SDimitry Andric ShouldXoredV = TrueV; 7460b57cec5SDimitry Andric } 7470b57cec5SDimitry Andric 7480b57cec5SDimitry Andric Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr; 7490b57cec5SDimitry Andric Value *T = nullptr; 7500b57cec5SDimitry Andric if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) { 7510b57cec5SDimitry Andric // Matched: select +++ ? ... : Y ^ Z 7520b57cec5SDimitry Andric // select +++ ? Y ^ Z : ... 7530b57cec5SDimitry Andric // where +++ denotes previously checked matches. 7540b57cec5SDimitry Andric if (ShouldSameV == Y) 7550b57cec5SDimitry Andric T = Z; 7560b57cec5SDimitry Andric else if (ShouldSameV == Z) 7570b57cec5SDimitry Andric T = Y; 7580b57cec5SDimitry Andric else 7590b57cec5SDimitry Andric return false; 7600b57cec5SDimitry Andric R = ShouldSameV; 7610b57cec5SDimitry Andric // Matched: select +++ ? R : R ^ T 7620b57cec5SDimitry Andric // select +++ ? R ^ T : R 7630b57cec5SDimitry Andric // depending on TrueIfZero. 7640b57cec5SDimitry Andric 7650b57cec5SDimitry Andric } else if (match(ShouldSameV, m_Zero())) { 7660b57cec5SDimitry Andric // Matched: select +++ ? 0 : ... 7670b57cec5SDimitry Andric // select +++ ? ... : 0 7680b57cec5SDimitry Andric if (!SelI->hasOneUse()) 7690b57cec5SDimitry Andric return false; 7700b57cec5SDimitry Andric T = ShouldXoredV; 7710b57cec5SDimitry Andric // Matched: select +++ ? 0 : T 7720b57cec5SDimitry Andric // select +++ ? T : 0 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric Value *U = *SelI->user_begin(); 7750b57cec5SDimitry Andric if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) && 7760b57cec5SDimitry Andric !match(U, m_Xor(m_Value(R), m_Specific(SelI)))) 7770b57cec5SDimitry Andric return false; 7780b57cec5SDimitry Andric // Matched: xor (select +++ ? 0 : T), R 7790b57cec5SDimitry Andric // xor (select +++ ? T : 0), R 7800b57cec5SDimitry Andric } else 7810b57cec5SDimitry Andric return false; 7820b57cec5SDimitry Andric 7830b57cec5SDimitry Andric // The xor input value T is isolated into its own match so that it could 7840b57cec5SDimitry Andric // be checked against an induction variable containing a shifted bit 7850b57cec5SDimitry Andric // (todo). 7860b57cec5SDimitry Andric // For now, check against (Q << i). 7870b57cec5SDimitry Andric if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) && 7880b57cec5SDimitry Andric !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV))))) 7890b57cec5SDimitry Andric return false; 7900b57cec5SDimitry Andric // Matched: select +++ ? R : R ^ (Q << i) 7910b57cec5SDimitry Andric // select +++ ? R ^ (Q << i) : R 7920b57cec5SDimitry Andric 7930b57cec5SDimitry Andric PV.X = X; 7940b57cec5SDimitry Andric PV.Q = Q; 7950b57cec5SDimitry Andric PV.R = R; 7960b57cec5SDimitry Andric PV.Left = true; 7970b57cec5SDimitry Andric return true; 7980b57cec5SDimitry Andric } 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI, 8010b57cec5SDimitry Andric ParsedValues &PV) { 8020b57cec5SDimitry Andric // Match the following: 8030b57cec5SDimitry Andric // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1) 8040b57cec5SDimitry Andric // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q 8050b57cec5SDimitry Andric // The condition may also check for equality with the masked value, i.e 8060b57cec5SDimitry Andric // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1) 8070b57cec5SDimitry Andric // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q 8080b57cec5SDimitry Andric 8090b57cec5SDimitry Andric Value *CondV = SelI->getCondition(); 8100b57cec5SDimitry Andric Value *TrueV = SelI->getTrueValue(); 8110b57cec5SDimitry Andric Value *FalseV = SelI->getFalseValue(); 8120b57cec5SDimitry Andric 8130b57cec5SDimitry Andric using namespace PatternMatch; 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric Value *C = nullptr; 8160b57cec5SDimitry Andric CmpInst::Predicate P; 8170b57cec5SDimitry Andric bool TrueIfZero; 8180b57cec5SDimitry Andric 8190b57cec5SDimitry Andric if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) || 8200b57cec5SDimitry Andric match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) { 8210b57cec5SDimitry Andric if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) 8220b57cec5SDimitry Andric return false; 8230b57cec5SDimitry Andric // Matched: select C == 0 ? ... : ... 8240b57cec5SDimitry Andric // select C != 0 ? ... : ... 8250b57cec5SDimitry Andric TrueIfZero = (P == CmpInst::ICMP_EQ); 8260b57cec5SDimitry Andric } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) || 8270b57cec5SDimitry Andric match(CondV, m_ICmp(P, m_One(), m_Value(C)))) { 8280b57cec5SDimitry Andric if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) 8290b57cec5SDimitry Andric return false; 8300b57cec5SDimitry Andric // Matched: select C == 1 ? ... : ... 8310b57cec5SDimitry Andric // select C != 1 ? ... : ... 8320b57cec5SDimitry Andric TrueIfZero = (P == CmpInst::ICMP_NE); 8330b57cec5SDimitry Andric } else 8340b57cec5SDimitry Andric return false; 8350b57cec5SDimitry Andric 8360b57cec5SDimitry Andric Value *X = nullptr; 8370b57cec5SDimitry Andric if (!match(C, m_And(m_Value(X), m_One())) && 8380b57cec5SDimitry Andric !match(C, m_And(m_One(), m_Value(X)))) 8390b57cec5SDimitry Andric return false; 8400b57cec5SDimitry Andric // Matched: select (X & 1) == +++ ? ... : ... 8410b57cec5SDimitry Andric // select (X & 1) != +++ ? ... : ... 8420b57cec5SDimitry Andric 8430b57cec5SDimitry Andric Value *R = nullptr, *Q = nullptr; 8440b57cec5SDimitry Andric if (TrueIfZero) { 8450b57cec5SDimitry Andric // The select's condition is true if the tested bit is 0. 8460b57cec5SDimitry Andric // TrueV must be the shift, FalseV must be the xor. 8470b57cec5SDimitry Andric if (!match(TrueV, m_LShr(m_Value(R), m_One()))) 8480b57cec5SDimitry Andric return false; 8490b57cec5SDimitry Andric // Matched: select +++ ? (R >> 1) : ... 8500b57cec5SDimitry Andric if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) && 8510b57cec5SDimitry Andric !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV)))) 8520b57cec5SDimitry Andric return false; 8530b57cec5SDimitry Andric // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q 8540b57cec5SDimitry Andric // with commuting ^. 8550b57cec5SDimitry Andric } else { 8560b57cec5SDimitry Andric // The select's condition is true if the tested bit is 1. 8570b57cec5SDimitry Andric // TrueV must be the xor, FalseV must be the shift. 8580b57cec5SDimitry Andric if (!match(FalseV, m_LShr(m_Value(R), m_One()))) 8590b57cec5SDimitry Andric return false; 8600b57cec5SDimitry Andric // Matched: select +++ ? ... : (R >> 1) 8610b57cec5SDimitry Andric if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) && 8620b57cec5SDimitry Andric !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV)))) 8630b57cec5SDimitry Andric return false; 8640b57cec5SDimitry Andric // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1) 8650b57cec5SDimitry Andric // with commuting ^. 8660b57cec5SDimitry Andric } 8670b57cec5SDimitry Andric 8680b57cec5SDimitry Andric PV.X = X; 8690b57cec5SDimitry Andric PV.Q = Q; 8700b57cec5SDimitry Andric PV.R = R; 8710b57cec5SDimitry Andric PV.Left = false; 8720b57cec5SDimitry Andric return true; 8730b57cec5SDimitry Andric } 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI, 8760b57cec5SDimitry Andric BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV, 8770b57cec5SDimitry Andric bool PreScan) { 8780b57cec5SDimitry Andric using namespace PatternMatch; 8790b57cec5SDimitry Andric 8800b57cec5SDimitry Andric // The basic pattern for R = P.Q is: 8810b57cec5SDimitry Andric // for i = 0..31 8820b57cec5SDimitry Andric // R = phi (0, R') 8830b57cec5SDimitry Andric // if (P & (1 << i)) ; test-bit(P, i) 8840b57cec5SDimitry Andric // R' = R ^ (Q << i) 8850b57cec5SDimitry Andric // 8860b57cec5SDimitry Andric // Similarly, the basic pattern for R = (P/Q).Q - P 8870b57cec5SDimitry Andric // for i = 0..31 8880b57cec5SDimitry Andric // R = phi(P, R') 8890b57cec5SDimitry Andric // if (R & (1 << i)) 8900b57cec5SDimitry Andric // R' = R ^ (Q << i) 8910b57cec5SDimitry Andric 8920b57cec5SDimitry Andric // There exist idioms, where instead of Q being shifted left, P is shifted 8930b57cec5SDimitry Andric // right. This produces a result that is shifted right by 32 bits (the 8940b57cec5SDimitry Andric // non-shifted result is 64-bit). 8950b57cec5SDimitry Andric // 8960b57cec5SDimitry Andric // For R = P.Q, this would be: 8970b57cec5SDimitry Andric // for i = 0..31 8980b57cec5SDimitry Andric // R = phi (0, R') 8990b57cec5SDimitry Andric // if ((P >> i) & 1) 9000b57cec5SDimitry Andric // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must 9010b57cec5SDimitry Andric // else ; be shifted by 1, not i. 9020b57cec5SDimitry Andric // R' = R >> 1 9030b57cec5SDimitry Andric // 9040b57cec5SDimitry Andric // And for the inverse: 9050b57cec5SDimitry Andric // for i = 0..31 9060b57cec5SDimitry Andric // R = phi (P, R') 9070b57cec5SDimitry Andric // if (R & 1) 9080b57cec5SDimitry Andric // R' = (R >> 1) ^ Q 9090b57cec5SDimitry Andric // else 9100b57cec5SDimitry Andric // R' = R >> 1 9110b57cec5SDimitry Andric 9120b57cec5SDimitry Andric // The left-shifting idioms share the same pattern: 9130b57cec5SDimitry Andric // select (X & (1 << i)) ? R ^ (Q << i) : R 9140b57cec5SDimitry Andric // Similarly for right-shifting idioms: 9150b57cec5SDimitry Andric // select (X & 1) ? (R >> 1) ^ Q 9160b57cec5SDimitry Andric 9170b57cec5SDimitry Andric if (matchLeftShift(SelI, CIV, PV)) { 9180b57cec5SDimitry Andric // If this is a pre-scan, getting this far is sufficient. 9190b57cec5SDimitry Andric if (PreScan) 9200b57cec5SDimitry Andric return true; 9210b57cec5SDimitry Andric 9220b57cec5SDimitry Andric // Need to make sure that the SelI goes back into R. 9230b57cec5SDimitry Andric auto *RPhi = dyn_cast<PHINode>(PV.R); 9240b57cec5SDimitry Andric if (!RPhi) 9250b57cec5SDimitry Andric return false; 9260b57cec5SDimitry Andric if (SelI != RPhi->getIncomingValueForBlock(LoopB)) 9270b57cec5SDimitry Andric return false; 9280b57cec5SDimitry Andric PV.Res = SelI; 9290b57cec5SDimitry Andric 9300b57cec5SDimitry Andric // If X is loop invariant, it must be the input polynomial, and the 9310b57cec5SDimitry Andric // idiom is the basic polynomial multiply. 9320b57cec5SDimitry Andric if (CurLoop->isLoopInvariant(PV.X)) { 9330b57cec5SDimitry Andric PV.P = PV.X; 9340b57cec5SDimitry Andric PV.Inv = false; 9350b57cec5SDimitry Andric } else { 9360b57cec5SDimitry Andric // X is not loop invariant. If X == R, this is the inverse pmpy. 9370b57cec5SDimitry Andric // Otherwise, check for an xor with an invariant value. If the 9380b57cec5SDimitry Andric // variable argument to the xor is R, then this is still a valid 9390b57cec5SDimitry Andric // inverse pmpy. 9400b57cec5SDimitry Andric PV.Inv = true; 9410b57cec5SDimitry Andric if (PV.X != PV.R) { 9420b57cec5SDimitry Andric Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr; 9430b57cec5SDimitry Andric if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2)))) 9440b57cec5SDimitry Andric return false; 9450b57cec5SDimitry Andric auto *I1 = dyn_cast<Instruction>(X1); 9460b57cec5SDimitry Andric auto *I2 = dyn_cast<Instruction>(X2); 9470b57cec5SDimitry Andric if (!I1 || I1->getParent() != LoopB) { 9480b57cec5SDimitry Andric Var = X2; 9490b57cec5SDimitry Andric Inv = X1; 9500b57cec5SDimitry Andric } else if (!I2 || I2->getParent() != LoopB) { 9510b57cec5SDimitry Andric Var = X1; 9520b57cec5SDimitry Andric Inv = X2; 9530b57cec5SDimitry Andric } else 9540b57cec5SDimitry Andric return false; 9550b57cec5SDimitry Andric if (Var != PV.R) 9560b57cec5SDimitry Andric return false; 9570b57cec5SDimitry Andric PV.M = Inv; 9580b57cec5SDimitry Andric } 9590b57cec5SDimitry Andric // The input polynomial P still needs to be determined. It will be 9600b57cec5SDimitry Andric // the entry value of R. 9610b57cec5SDimitry Andric Value *EntryP = RPhi->getIncomingValueForBlock(PrehB); 9620b57cec5SDimitry Andric PV.P = EntryP; 9630b57cec5SDimitry Andric } 9640b57cec5SDimitry Andric 9650b57cec5SDimitry Andric return true; 9660b57cec5SDimitry Andric } 9670b57cec5SDimitry Andric 9680b57cec5SDimitry Andric if (matchRightShift(SelI, PV)) { 9690b57cec5SDimitry Andric // If this is an inverse pattern, the Q polynomial must be known at 9700b57cec5SDimitry Andric // compile time. 9710b57cec5SDimitry Andric if (PV.Inv && !isa<ConstantInt>(PV.Q)) 9720b57cec5SDimitry Andric return false; 9730b57cec5SDimitry Andric if (PreScan) 9740b57cec5SDimitry Andric return true; 9750b57cec5SDimitry Andric // There is no exact matching of right-shift pmpy. 9760b57cec5SDimitry Andric return false; 9770b57cec5SDimitry Andric } 9780b57cec5SDimitry Andric 9790b57cec5SDimitry Andric return false; 9800b57cec5SDimitry Andric } 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val, 9830b57cec5SDimitry Andric IntegerType *DestTy) { 9840b57cec5SDimitry Andric IntegerType *T = dyn_cast<IntegerType>(Val->getType()); 9850b57cec5SDimitry Andric if (!T || T->getBitWidth() > DestTy->getBitWidth()) 9860b57cec5SDimitry Andric return false; 9870b57cec5SDimitry Andric if (T->getBitWidth() == DestTy->getBitWidth()) 9880b57cec5SDimitry Andric return true; 9890b57cec5SDimitry Andric // Non-instructions are promotable. The reason why an instruction may not 9900b57cec5SDimitry Andric // be promotable is that it may produce a different result if its operands 9910b57cec5SDimitry Andric // and the result are promoted, for example, it may produce more non-zero 9920b57cec5SDimitry Andric // bits. While it would still be possible to represent the proper result 9930b57cec5SDimitry Andric // in a wider type, it may require adding additional instructions (which 9940b57cec5SDimitry Andric // we don't want to do). 9950b57cec5SDimitry Andric Instruction *In = dyn_cast<Instruction>(Val); 9960b57cec5SDimitry Andric if (!In) 9970b57cec5SDimitry Andric return true; 9980b57cec5SDimitry Andric // The bitwidth of the source type is smaller than the destination. 9990b57cec5SDimitry Andric // Check if the individual operation can be promoted. 10000b57cec5SDimitry Andric switch (In->getOpcode()) { 10010b57cec5SDimitry Andric case Instruction::PHI: 10020b57cec5SDimitry Andric case Instruction::ZExt: 10030b57cec5SDimitry Andric case Instruction::And: 10040b57cec5SDimitry Andric case Instruction::Or: 10050b57cec5SDimitry Andric case Instruction::Xor: 10060b57cec5SDimitry Andric case Instruction::LShr: // Shift right is ok. 10070b57cec5SDimitry Andric case Instruction::Select: 10080b57cec5SDimitry Andric case Instruction::Trunc: 10090b57cec5SDimitry Andric return true; 10100b57cec5SDimitry Andric case Instruction::ICmp: 10110b57cec5SDimitry Andric if (CmpInst *CI = cast<CmpInst>(In)) 10120b57cec5SDimitry Andric return CI->isEquality() || CI->isUnsigned(); 10130b57cec5SDimitry Andric llvm_unreachable("Cast failed unexpectedly"); 10140b57cec5SDimitry Andric case Instruction::Add: 10150b57cec5SDimitry Andric return In->hasNoSignedWrap() && In->hasNoUnsignedWrap(); 10160b57cec5SDimitry Andric } 10170b57cec5SDimitry Andric return false; 10180b57cec5SDimitry Andric } 10190b57cec5SDimitry Andric 10200b57cec5SDimitry Andric void PolynomialMultiplyRecognize::promoteTo(Instruction *In, 10210b57cec5SDimitry Andric IntegerType *DestTy, BasicBlock *LoopB) { 10220b57cec5SDimitry Andric Type *OrigTy = In->getType(); 10230b57cec5SDimitry Andric assert(!OrigTy->isVoidTy() && "Invalid instruction to promote"); 10240b57cec5SDimitry Andric 10250b57cec5SDimitry Andric // Leave boolean values alone. 10260b57cec5SDimitry Andric if (!In->getType()->isIntegerTy(1)) 10270b57cec5SDimitry Andric In->mutateType(DestTy); 10280b57cec5SDimitry Andric unsigned DestBW = DestTy->getBitWidth(); 10290b57cec5SDimitry Andric 10300b57cec5SDimitry Andric // Handle PHIs. 10310b57cec5SDimitry Andric if (PHINode *P = dyn_cast<PHINode>(In)) { 10320b57cec5SDimitry Andric unsigned N = P->getNumIncomingValues(); 10330b57cec5SDimitry Andric for (unsigned i = 0; i != N; ++i) { 10340b57cec5SDimitry Andric BasicBlock *InB = P->getIncomingBlock(i); 10350b57cec5SDimitry Andric if (InB == LoopB) 10360b57cec5SDimitry Andric continue; 10370b57cec5SDimitry Andric Value *InV = P->getIncomingValue(i); 10380b57cec5SDimitry Andric IntegerType *Ty = cast<IntegerType>(InV->getType()); 10390b57cec5SDimitry Andric // Do not promote values in PHI nodes of type i1. 10400b57cec5SDimitry Andric if (Ty != P->getType()) { 10410b57cec5SDimitry Andric // If the value type does not match the PHI type, the PHI type 10420b57cec5SDimitry Andric // must have been promoted. 10430b57cec5SDimitry Andric assert(Ty->getBitWidth() < DestBW); 10440b57cec5SDimitry Andric InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy); 10450b57cec5SDimitry Andric P->setIncomingValue(i, InV); 10460b57cec5SDimitry Andric } 10470b57cec5SDimitry Andric } 10480b57cec5SDimitry Andric } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) { 10490b57cec5SDimitry Andric Value *Op = Z->getOperand(0); 10500b57cec5SDimitry Andric if (Op->getType() == Z->getType()) 10510b57cec5SDimitry Andric Z->replaceAllUsesWith(Op); 10520b57cec5SDimitry Andric Z->eraseFromParent(); 10530b57cec5SDimitry Andric return; 10540b57cec5SDimitry Andric } 10550b57cec5SDimitry Andric if (TruncInst *T = dyn_cast<TruncInst>(In)) { 10560b57cec5SDimitry Andric IntegerType *TruncTy = cast<IntegerType>(OrigTy); 10570b57cec5SDimitry Andric Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1); 10580b57cec5SDimitry Andric Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask); 10590b57cec5SDimitry Andric T->replaceAllUsesWith(And); 10600b57cec5SDimitry Andric T->eraseFromParent(); 10610b57cec5SDimitry Andric return; 10620b57cec5SDimitry Andric } 10630b57cec5SDimitry Andric 10640b57cec5SDimitry Andric // Promote immediates. 10650b57cec5SDimitry Andric for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) { 10660b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i))) 10670b57cec5SDimitry Andric if (CI->getType()->getBitWidth() < DestBW) 10680b57cec5SDimitry Andric In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue())); 10690b57cec5SDimitry Andric } 10700b57cec5SDimitry Andric } 10710b57cec5SDimitry Andric 10720b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB, 10730b57cec5SDimitry Andric BasicBlock *ExitB) { 10740b57cec5SDimitry Andric assert(LoopB); 10750b57cec5SDimitry Andric // Skip loops where the exit block has more than one predecessor. The values 10760b57cec5SDimitry Andric // coming from the loop block will be promoted to another type, and so the 10770b57cec5SDimitry Andric // values coming into the exit block from other predecessors would also have 10780b57cec5SDimitry Andric // to be promoted. 10790b57cec5SDimitry Andric if (!ExitB || (ExitB->getSinglePredecessor() != LoopB)) 10800b57cec5SDimitry Andric return false; 10810b57cec5SDimitry Andric IntegerType *DestTy = getPmpyType(); 10820b57cec5SDimitry Andric // Check if the exit values have types that are no wider than the type 10830b57cec5SDimitry Andric // that we want to promote to. 10840b57cec5SDimitry Andric unsigned DestBW = DestTy->getBitWidth(); 10850b57cec5SDimitry Andric for (PHINode &P : ExitB->phis()) { 10860b57cec5SDimitry Andric if (P.getNumIncomingValues() != 1) 10870b57cec5SDimitry Andric return false; 10880b57cec5SDimitry Andric assert(P.getIncomingBlock(0) == LoopB); 10890b57cec5SDimitry Andric IntegerType *T = dyn_cast<IntegerType>(P.getType()); 10900b57cec5SDimitry Andric if (!T || T->getBitWidth() > DestBW) 10910b57cec5SDimitry Andric return false; 10920b57cec5SDimitry Andric } 10930b57cec5SDimitry Andric 10940b57cec5SDimitry Andric // Check all instructions in the loop. 10950b57cec5SDimitry Andric for (Instruction &In : *LoopB) 10960b57cec5SDimitry Andric if (!In.isTerminator() && !isPromotableTo(&In, DestTy)) 10970b57cec5SDimitry Andric return false; 10980b57cec5SDimitry Andric 10990b57cec5SDimitry Andric // Perform the promotion. 11000b57cec5SDimitry Andric std::vector<Instruction*> LoopIns; 11010b57cec5SDimitry Andric std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns), 11020b57cec5SDimitry Andric [](Instruction &In) { return &In; }); 11030b57cec5SDimitry Andric for (Instruction *In : LoopIns) 11040b57cec5SDimitry Andric if (!In->isTerminator()) 11050b57cec5SDimitry Andric promoteTo(In, DestTy, LoopB); 11060b57cec5SDimitry Andric 11070b57cec5SDimitry Andric // Fix up the PHI nodes in the exit block. 11080b57cec5SDimitry Andric Instruction *EndI = ExitB->getFirstNonPHI(); 11090b57cec5SDimitry Andric BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end(); 11100b57cec5SDimitry Andric for (auto I = ExitB->begin(); I != End; ++I) { 11110b57cec5SDimitry Andric PHINode *P = dyn_cast<PHINode>(I); 11120b57cec5SDimitry Andric if (!P) 11130b57cec5SDimitry Andric break; 11140b57cec5SDimitry Andric Type *Ty0 = P->getIncomingValue(0)->getType(); 11150b57cec5SDimitry Andric Type *PTy = P->getType(); 11160b57cec5SDimitry Andric if (PTy != Ty0) { 11170b57cec5SDimitry Andric assert(Ty0 == DestTy); 11180b57cec5SDimitry Andric // In order to create the trunc, P must have the promoted type. 11190b57cec5SDimitry Andric P->mutateType(Ty0); 11200b57cec5SDimitry Andric Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy); 11210b57cec5SDimitry Andric // In order for the RAUW to work, the types of P and T must match. 11220b57cec5SDimitry Andric P->mutateType(PTy); 11230b57cec5SDimitry Andric P->replaceAllUsesWith(T); 11240b57cec5SDimitry Andric // Final update of the P's type. 11250b57cec5SDimitry Andric P->mutateType(Ty0); 11260b57cec5SDimitry Andric cast<Instruction>(T)->setOperand(0, P); 11270b57cec5SDimitry Andric } 11280b57cec5SDimitry Andric } 11290b57cec5SDimitry Andric 11300b57cec5SDimitry Andric return true; 11310b57cec5SDimitry Andric } 11320b57cec5SDimitry Andric 11330b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In, 11340b57cec5SDimitry Andric ValueSeq &Cycle) { 11350b57cec5SDimitry Andric // Out = ..., In, ... 11360b57cec5SDimitry Andric if (Out == In) 11370b57cec5SDimitry Andric return true; 11380b57cec5SDimitry Andric 11390b57cec5SDimitry Andric auto *BB = cast<Instruction>(Out)->getParent(); 11400b57cec5SDimitry Andric bool HadPhi = false; 11410b57cec5SDimitry Andric 11420b57cec5SDimitry Andric for (auto U : Out->users()) { 11430b57cec5SDimitry Andric auto *I = dyn_cast<Instruction>(&*U); 11440b57cec5SDimitry Andric if (I == nullptr || I->getParent() != BB) 11450b57cec5SDimitry Andric continue; 11460b57cec5SDimitry Andric // Make sure that there are no multi-iteration cycles, e.g. 11470b57cec5SDimitry Andric // p1 = phi(p2) 11480b57cec5SDimitry Andric // p2 = phi(p1) 11490b57cec5SDimitry Andric // The cycle p1->p2->p1 would span two loop iterations. 11500b57cec5SDimitry Andric // Check that there is only one phi in the cycle. 11510b57cec5SDimitry Andric bool IsPhi = isa<PHINode>(I); 11520b57cec5SDimitry Andric if (IsPhi && HadPhi) 11530b57cec5SDimitry Andric return false; 11540b57cec5SDimitry Andric HadPhi |= IsPhi; 11550b57cec5SDimitry Andric if (Cycle.count(I)) 11560b57cec5SDimitry Andric return false; 11570b57cec5SDimitry Andric Cycle.insert(I); 11580b57cec5SDimitry Andric if (findCycle(I, In, Cycle)) 11590b57cec5SDimitry Andric break; 11600b57cec5SDimitry Andric Cycle.remove(I); 11610b57cec5SDimitry Andric } 11620b57cec5SDimitry Andric return !Cycle.empty(); 11630b57cec5SDimitry Andric } 11640b57cec5SDimitry Andric 11650b57cec5SDimitry Andric void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI, 11660b57cec5SDimitry Andric ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) { 11670b57cec5SDimitry Andric // All the values in the cycle that are between the phi node and the 11680b57cec5SDimitry Andric // divider instruction will be classified as "early", all other values 11690b57cec5SDimitry Andric // will be "late". 11700b57cec5SDimitry Andric 11710b57cec5SDimitry Andric bool IsE = true; 11720b57cec5SDimitry Andric unsigned I, N = Cycle.size(); 11730b57cec5SDimitry Andric for (I = 0; I < N; ++I) { 11740b57cec5SDimitry Andric Value *V = Cycle[I]; 11750b57cec5SDimitry Andric if (DivI == V) 11760b57cec5SDimitry Andric IsE = false; 11770b57cec5SDimitry Andric else if (!isa<PHINode>(V)) 11780b57cec5SDimitry Andric continue; 11790b57cec5SDimitry Andric // Stop if found either. 11800b57cec5SDimitry Andric break; 11810b57cec5SDimitry Andric } 11820b57cec5SDimitry Andric // "I" is the index of either DivI or the phi node, whichever was first. 11830b57cec5SDimitry Andric // "E" is "false" or "true" respectively. 11840b57cec5SDimitry Andric ValueSeq &First = !IsE ? Early : Late; 11850b57cec5SDimitry Andric for (unsigned J = 0; J < I; ++J) 11860b57cec5SDimitry Andric First.insert(Cycle[J]); 11870b57cec5SDimitry Andric 11880b57cec5SDimitry Andric ValueSeq &Second = IsE ? Early : Late; 11890b57cec5SDimitry Andric Second.insert(Cycle[I]); 11900b57cec5SDimitry Andric for (++I; I < N; ++I) { 11910b57cec5SDimitry Andric Value *V = Cycle[I]; 11920b57cec5SDimitry Andric if (DivI == V || isa<PHINode>(V)) 11930b57cec5SDimitry Andric break; 11940b57cec5SDimitry Andric Second.insert(V); 11950b57cec5SDimitry Andric } 11960b57cec5SDimitry Andric 11970b57cec5SDimitry Andric for (; I < N; ++I) 11980b57cec5SDimitry Andric First.insert(Cycle[I]); 11990b57cec5SDimitry Andric } 12000b57cec5SDimitry Andric 12010b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI, 12020b57cec5SDimitry Andric ValueSeq &Early, ValueSeq &Late) { 12030b57cec5SDimitry Andric // Select is an exception, since the condition value does not have to be 12040b57cec5SDimitry Andric // classified in the same way as the true/false values. The true/false 12050b57cec5SDimitry Andric // values do have to be both early or both late. 12060b57cec5SDimitry Andric if (UseI->getOpcode() == Instruction::Select) { 12070b57cec5SDimitry Andric Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2); 12080b57cec5SDimitry Andric if (Early.count(TV) || Early.count(FV)) { 12090b57cec5SDimitry Andric if (Late.count(TV) || Late.count(FV)) 12100b57cec5SDimitry Andric return false; 12110b57cec5SDimitry Andric Early.insert(UseI); 12120b57cec5SDimitry Andric } else if (Late.count(TV) || Late.count(FV)) { 12130b57cec5SDimitry Andric if (Early.count(TV) || Early.count(FV)) 12140b57cec5SDimitry Andric return false; 12150b57cec5SDimitry Andric Late.insert(UseI); 12160b57cec5SDimitry Andric } 12170b57cec5SDimitry Andric return true; 12180b57cec5SDimitry Andric } 12190b57cec5SDimitry Andric 12200b57cec5SDimitry Andric // Not sure what would be the example of this, but the code below relies 12210b57cec5SDimitry Andric // on having at least one operand. 12220b57cec5SDimitry Andric if (UseI->getNumOperands() == 0) 12230b57cec5SDimitry Andric return true; 12240b57cec5SDimitry Andric 12250b57cec5SDimitry Andric bool AE = true, AL = true; 12260b57cec5SDimitry Andric for (auto &I : UseI->operands()) { 12270b57cec5SDimitry Andric if (Early.count(&*I)) 12280b57cec5SDimitry Andric AL = false; 12290b57cec5SDimitry Andric else if (Late.count(&*I)) 12300b57cec5SDimitry Andric AE = false; 12310b57cec5SDimitry Andric } 12320b57cec5SDimitry Andric // If the operands appear "all early" and "all late" at the same time, 12330b57cec5SDimitry Andric // then it means that none of them are actually classified as either. 12340b57cec5SDimitry Andric // This is harmless. 12350b57cec5SDimitry Andric if (AE && AL) 12360b57cec5SDimitry Andric return true; 12370b57cec5SDimitry Andric // Conversely, if they are neither "all early" nor "all late", then 12380b57cec5SDimitry Andric // we have a mixture of early and late operands that is not a known 12390b57cec5SDimitry Andric // exception. 12400b57cec5SDimitry Andric if (!AE && !AL) 12410b57cec5SDimitry Andric return false; 12420b57cec5SDimitry Andric 12430b57cec5SDimitry Andric // Check that we have covered the two special cases. 12440b57cec5SDimitry Andric assert(AE != AL); 12450b57cec5SDimitry Andric 12460b57cec5SDimitry Andric if (AE) 12470b57cec5SDimitry Andric Early.insert(UseI); 12480b57cec5SDimitry Andric else 12490b57cec5SDimitry Andric Late.insert(UseI); 12500b57cec5SDimitry Andric return true; 12510b57cec5SDimitry Andric } 12520b57cec5SDimitry Andric 12530b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) { 12540b57cec5SDimitry Andric switch (I->getOpcode()) { 12550b57cec5SDimitry Andric case Instruction::And: 12560b57cec5SDimitry Andric case Instruction::Or: 12570b57cec5SDimitry Andric case Instruction::Xor: 12580b57cec5SDimitry Andric case Instruction::LShr: 12590b57cec5SDimitry Andric case Instruction::Shl: 12600b57cec5SDimitry Andric case Instruction::Select: 12610b57cec5SDimitry Andric case Instruction::ICmp: 12620b57cec5SDimitry Andric case Instruction::PHI: 12630b57cec5SDimitry Andric break; 12640b57cec5SDimitry Andric default: 12650b57cec5SDimitry Andric return false; 12660b57cec5SDimitry Andric } 12670b57cec5SDimitry Andric return true; 12680b57cec5SDimitry Andric } 12690b57cec5SDimitry Andric 12700b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V, 12710b57cec5SDimitry Andric unsigned IterCount) { 12720b57cec5SDimitry Andric auto *T = dyn_cast<IntegerType>(V->getType()); 12730b57cec5SDimitry Andric if (!T) 12740b57cec5SDimitry Andric return false; 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andric KnownBits Known(T->getBitWidth()); 12770b57cec5SDimitry Andric computeKnownBits(V, Known, DL); 12780b57cec5SDimitry Andric return Known.countMinLeadingZeros() >= IterCount; 12790b57cec5SDimitry Andric } 12800b57cec5SDimitry Andric 12810b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V, 12820b57cec5SDimitry Andric unsigned IterCount) { 12830b57cec5SDimitry Andric // Assume that all inputs to the value have the high bits zero. 12840b57cec5SDimitry Andric // Check if the value itself preserves the zeros in the high bits. 12850b57cec5SDimitry Andric if (auto *C = dyn_cast<ConstantInt>(V)) 12860b57cec5SDimitry Andric return C->getValue().countLeadingZeros() >= IterCount; 12870b57cec5SDimitry Andric 12880b57cec5SDimitry Andric if (auto *I = dyn_cast<Instruction>(V)) { 12890b57cec5SDimitry Andric switch (I->getOpcode()) { 12900b57cec5SDimitry Andric case Instruction::And: 12910b57cec5SDimitry Andric case Instruction::Or: 12920b57cec5SDimitry Andric case Instruction::Xor: 12930b57cec5SDimitry Andric case Instruction::LShr: 12940b57cec5SDimitry Andric case Instruction::Select: 12950b57cec5SDimitry Andric case Instruction::ICmp: 12960b57cec5SDimitry Andric case Instruction::PHI: 12970b57cec5SDimitry Andric case Instruction::ZExt: 12980b57cec5SDimitry Andric return true; 12990b57cec5SDimitry Andric } 13000b57cec5SDimitry Andric } 13010b57cec5SDimitry Andric 13020b57cec5SDimitry Andric return false; 13030b57cec5SDimitry Andric } 13040b57cec5SDimitry Andric 13050b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) { 13060b57cec5SDimitry Andric unsigned Opc = I->getOpcode(); 13070b57cec5SDimitry Andric if (Opc == Instruction::Shl || Opc == Instruction::LShr) 13080b57cec5SDimitry Andric return Op != I->getOperand(1); 13090b57cec5SDimitry Andric return true; 13100b57cec5SDimitry Andric } 13110b57cec5SDimitry Andric 13120b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB, 13130b57cec5SDimitry Andric BasicBlock *ExitB, unsigned IterCount) { 13140b57cec5SDimitry Andric Value *CIV = getCountIV(LoopB); 13150b57cec5SDimitry Andric if (CIV == nullptr) 13160b57cec5SDimitry Andric return false; 13170b57cec5SDimitry Andric auto *CIVTy = dyn_cast<IntegerType>(CIV->getType()); 13180b57cec5SDimitry Andric if (CIVTy == nullptr) 13190b57cec5SDimitry Andric return false; 13200b57cec5SDimitry Andric 13210b57cec5SDimitry Andric ValueSeq RShifts; 13220b57cec5SDimitry Andric ValueSeq Early, Late, Cycled; 13230b57cec5SDimitry Andric 13240b57cec5SDimitry Andric // Find all value cycles that contain logical right shifts by 1. 13250b57cec5SDimitry Andric for (Instruction &I : *LoopB) { 13260b57cec5SDimitry Andric using namespace PatternMatch; 13270b57cec5SDimitry Andric 13280b57cec5SDimitry Andric Value *V = nullptr; 13290b57cec5SDimitry Andric if (!match(&I, m_LShr(m_Value(V), m_One()))) 13300b57cec5SDimitry Andric continue; 13310b57cec5SDimitry Andric ValueSeq C; 13320b57cec5SDimitry Andric if (!findCycle(&I, V, C)) 13330b57cec5SDimitry Andric continue; 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric // Found a cycle. 13360b57cec5SDimitry Andric C.insert(&I); 13370b57cec5SDimitry Andric classifyCycle(&I, C, Early, Late); 13380b57cec5SDimitry Andric Cycled.insert(C.begin(), C.end()); 13390b57cec5SDimitry Andric RShifts.insert(&I); 13400b57cec5SDimitry Andric } 13410b57cec5SDimitry Andric 13420b57cec5SDimitry Andric // Find the set of all values affected by the shift cycles, i.e. all 13430b57cec5SDimitry Andric // cycled values, and (recursively) all their users. 13440b57cec5SDimitry Andric ValueSeq Users(Cycled.begin(), Cycled.end()); 13450b57cec5SDimitry Andric for (unsigned i = 0; i < Users.size(); ++i) { 13460b57cec5SDimitry Andric Value *V = Users[i]; 13470b57cec5SDimitry Andric if (!isa<IntegerType>(V->getType())) 13480b57cec5SDimitry Andric return false; 13490b57cec5SDimitry Andric auto *R = cast<Instruction>(V); 13500b57cec5SDimitry Andric // If the instruction does not commute with shifts, the loop cannot 13510b57cec5SDimitry Andric // be unshifted. 13520b57cec5SDimitry Andric if (!commutesWithShift(R)) 13530b57cec5SDimitry Andric return false; 13540b57cec5SDimitry Andric for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) { 13550b57cec5SDimitry Andric auto *T = cast<Instruction>(*I); 13560b57cec5SDimitry Andric // Skip users from outside of the loop. They will be handled later. 13570b57cec5SDimitry Andric // Also, skip the right-shifts and phi nodes, since they mix early 13580b57cec5SDimitry Andric // and late values. 13590b57cec5SDimitry Andric if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T)) 13600b57cec5SDimitry Andric continue; 13610b57cec5SDimitry Andric 13620b57cec5SDimitry Andric Users.insert(T); 13630b57cec5SDimitry Andric if (!classifyInst(T, Early, Late)) 13640b57cec5SDimitry Andric return false; 13650b57cec5SDimitry Andric } 13660b57cec5SDimitry Andric } 13670b57cec5SDimitry Andric 13680b57cec5SDimitry Andric if (Users.empty()) 13690b57cec5SDimitry Andric return false; 13700b57cec5SDimitry Andric 13710b57cec5SDimitry Andric // Verify that high bits remain zero. 13720b57cec5SDimitry Andric ValueSeq Internal(Users.begin(), Users.end()); 13730b57cec5SDimitry Andric ValueSeq Inputs; 13740b57cec5SDimitry Andric for (unsigned i = 0; i < Internal.size(); ++i) { 13750b57cec5SDimitry Andric auto *R = dyn_cast<Instruction>(Internal[i]); 13760b57cec5SDimitry Andric if (!R) 13770b57cec5SDimitry Andric continue; 13780b57cec5SDimitry Andric for (Value *Op : R->operands()) { 13790b57cec5SDimitry Andric auto *T = dyn_cast<Instruction>(Op); 13800b57cec5SDimitry Andric if (T && T->getParent() != LoopB) 13810b57cec5SDimitry Andric Inputs.insert(Op); 13820b57cec5SDimitry Andric else 13830b57cec5SDimitry Andric Internal.insert(Op); 13840b57cec5SDimitry Andric } 13850b57cec5SDimitry Andric } 13860b57cec5SDimitry Andric for (Value *V : Inputs) 13870b57cec5SDimitry Andric if (!highBitsAreZero(V, IterCount)) 13880b57cec5SDimitry Andric return false; 13890b57cec5SDimitry Andric for (Value *V : Internal) 13900b57cec5SDimitry Andric if (!keepsHighBitsZero(V, IterCount)) 13910b57cec5SDimitry Andric return false; 13920b57cec5SDimitry Andric 13930b57cec5SDimitry Andric // Finally, the work can be done. Unshift each user. 13940b57cec5SDimitry Andric IRBuilder<> IRB(LoopB); 13950b57cec5SDimitry Andric std::map<Value*,Value*> ShiftMap; 13960b57cec5SDimitry Andric 13970b57cec5SDimitry Andric using CastMapType = std::map<std::pair<Value *, Type *>, Value *>; 13980b57cec5SDimitry Andric 13990b57cec5SDimitry Andric CastMapType CastMap; 14000b57cec5SDimitry Andric 14010b57cec5SDimitry Andric auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V, 14020b57cec5SDimitry Andric IntegerType *Ty) -> Value* { 14030b57cec5SDimitry Andric auto H = CM.find(std::make_pair(V, Ty)); 14040b57cec5SDimitry Andric if (H != CM.end()) 14050b57cec5SDimitry Andric return H->second; 14060b57cec5SDimitry Andric Value *CV = IRB.CreateIntCast(V, Ty, false); 14070b57cec5SDimitry Andric CM.insert(std::make_pair(std::make_pair(V, Ty), CV)); 14080b57cec5SDimitry Andric return CV; 14090b57cec5SDimitry Andric }; 14100b57cec5SDimitry Andric 14110b57cec5SDimitry Andric for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) { 14120b57cec5SDimitry Andric using namespace PatternMatch; 14130b57cec5SDimitry Andric 14140b57cec5SDimitry Andric if (isa<PHINode>(I) || !Users.count(&*I)) 14150b57cec5SDimitry Andric continue; 14160b57cec5SDimitry Andric 14170b57cec5SDimitry Andric // Match lshr x, 1. 14180b57cec5SDimitry Andric Value *V = nullptr; 14190b57cec5SDimitry Andric if (match(&*I, m_LShr(m_Value(V), m_One()))) { 14200b57cec5SDimitry Andric replaceAllUsesOfWithIn(&*I, V, LoopB); 14210b57cec5SDimitry Andric continue; 14220b57cec5SDimitry Andric } 14230b57cec5SDimitry Andric // For each non-cycled operand, replace it with the corresponding 14240b57cec5SDimitry Andric // value shifted left. 14250b57cec5SDimitry Andric for (auto &J : I->operands()) { 14260b57cec5SDimitry Andric Value *Op = J.get(); 14270b57cec5SDimitry Andric if (!isOperandShifted(&*I, Op)) 14280b57cec5SDimitry Andric continue; 14290b57cec5SDimitry Andric if (Users.count(Op)) 14300b57cec5SDimitry Andric continue; 14310b57cec5SDimitry Andric // Skip shifting zeros. 14320b57cec5SDimitry Andric if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero()) 14330b57cec5SDimitry Andric continue; 14340b57cec5SDimitry Andric // Check if we have already generated a shift for this value. 14350b57cec5SDimitry Andric auto F = ShiftMap.find(Op); 14360b57cec5SDimitry Andric Value *W = (F != ShiftMap.end()) ? F->second : nullptr; 14370b57cec5SDimitry Andric if (W == nullptr) { 14380b57cec5SDimitry Andric IRB.SetInsertPoint(&*I); 14390b57cec5SDimitry Andric // First, the shift amount will be CIV or CIV+1, depending on 14400b57cec5SDimitry Andric // whether the value is early or late. Instead of creating CIV+1, 14410b57cec5SDimitry Andric // do a single shift of the value. 14420b57cec5SDimitry Andric Value *ShAmt = CIV, *ShVal = Op; 14430b57cec5SDimitry Andric auto *VTy = cast<IntegerType>(ShVal->getType()); 14440b57cec5SDimitry Andric auto *ATy = cast<IntegerType>(ShAmt->getType()); 14450b57cec5SDimitry Andric if (Late.count(&*I)) 14460b57cec5SDimitry Andric ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1)); 14470b57cec5SDimitry Andric // Second, the types of the shifted value and the shift amount 14480b57cec5SDimitry Andric // must match. 14490b57cec5SDimitry Andric if (VTy != ATy) { 14500b57cec5SDimitry Andric if (VTy->getBitWidth() < ATy->getBitWidth()) 14510b57cec5SDimitry Andric ShVal = upcast(CastMap, IRB, ShVal, ATy); 14520b57cec5SDimitry Andric else 14530b57cec5SDimitry Andric ShAmt = upcast(CastMap, IRB, ShAmt, VTy); 14540b57cec5SDimitry Andric } 14550b57cec5SDimitry Andric // Ready to generate the shift and memoize it. 14560b57cec5SDimitry Andric W = IRB.CreateShl(ShVal, ShAmt); 14570b57cec5SDimitry Andric ShiftMap.insert(std::make_pair(Op, W)); 14580b57cec5SDimitry Andric } 14590b57cec5SDimitry Andric I->replaceUsesOfWith(Op, W); 14600b57cec5SDimitry Andric } 14610b57cec5SDimitry Andric } 14620b57cec5SDimitry Andric 14630b57cec5SDimitry Andric // Update the users outside of the loop to account for having left 14640b57cec5SDimitry Andric // shifts. They would normally be shifted right in the loop, so shift 14650b57cec5SDimitry Andric // them right after the loop exit. 14660b57cec5SDimitry Andric // Take advantage of the loop-closed SSA form, which has all the post- 14670b57cec5SDimitry Andric // loop values in phi nodes. 14680b57cec5SDimitry Andric IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt()); 14690b57cec5SDimitry Andric for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) { 14700b57cec5SDimitry Andric if (!isa<PHINode>(P)) 14710b57cec5SDimitry Andric break; 14720b57cec5SDimitry Andric auto *PN = cast<PHINode>(P); 14730b57cec5SDimitry Andric Value *U = PN->getIncomingValueForBlock(LoopB); 14740b57cec5SDimitry Andric if (!Users.count(U)) 14750b57cec5SDimitry Andric continue; 14760b57cec5SDimitry Andric Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount)); 14770b57cec5SDimitry Andric PN->replaceAllUsesWith(S); 14780b57cec5SDimitry Andric // The above RAUW will create 14790b57cec5SDimitry Andric // S = lshr S, IterCount 14800b57cec5SDimitry Andric // so we need to fix it back into 14810b57cec5SDimitry Andric // S = lshr PN, IterCount 14820b57cec5SDimitry Andric cast<User>(S)->replaceUsesOfWith(S, PN); 14830b57cec5SDimitry Andric } 14840b57cec5SDimitry Andric 14850b57cec5SDimitry Andric return true; 14860b57cec5SDimitry Andric } 14870b57cec5SDimitry Andric 14880b57cec5SDimitry Andric void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) { 14890b57cec5SDimitry Andric for (auto &I : *LoopB) 14900b57cec5SDimitry Andric if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT})) 14910b57cec5SDimitry Andric I.replaceAllUsesWith(SV); 14920b57cec5SDimitry Andric 14930b57cec5SDimitry Andric for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) { 14940b57cec5SDimitry Andric N = std::next(I); 14950b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI); 14960b57cec5SDimitry Andric } 14970b57cec5SDimitry Andric } 14980b57cec5SDimitry Andric 14990b57cec5SDimitry Andric unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) { 15000b57cec5SDimitry Andric // Arrays of coefficients of Q and the inverse, C. 15010b57cec5SDimitry Andric // Q[i] = coefficient at x^i. 15020b57cec5SDimitry Andric std::array<char,32> Q, C; 15030b57cec5SDimitry Andric 15040b57cec5SDimitry Andric for (unsigned i = 0; i < 32; ++i) { 15050b57cec5SDimitry Andric Q[i] = QP & 1; 15060b57cec5SDimitry Andric QP >>= 1; 15070b57cec5SDimitry Andric } 15080b57cec5SDimitry Andric assert(Q[0] == 1); 15090b57cec5SDimitry Andric 15100b57cec5SDimitry Andric // Find C, such that 15110b57cec5SDimitry Andric // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1 15120b57cec5SDimitry Andric // 15130b57cec5SDimitry Andric // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the 15140b57cec5SDimitry Andric // operations * and + are & and ^ respectively. 15150b57cec5SDimitry Andric // 15160b57cec5SDimitry Andric // Find C[i] recursively, by comparing i-th coefficient in the product 15170b57cec5SDimitry Andric // with 0 (or 1 for i=0). 15180b57cec5SDimitry Andric // 15190b57cec5SDimitry Andric // C[0] = 1, since C[0] = Q[0], and Q[0] = 1. 15200b57cec5SDimitry Andric C[0] = 1; 15210b57cec5SDimitry Andric for (unsigned i = 1; i < 32; ++i) { 15220b57cec5SDimitry Andric // Solve for C[i] in: 15230b57cec5SDimitry Andric // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0 15240b57cec5SDimitry Andric // This is equivalent to 15250b57cec5SDimitry Andric // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0 15260b57cec5SDimitry Andric // which is 15270b57cec5SDimitry Andric // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i] 15280b57cec5SDimitry Andric unsigned T = 0; 15290b57cec5SDimitry Andric for (unsigned j = 0; j < i; ++j) 15300b57cec5SDimitry Andric T = T ^ (C[j] & Q[i-j]); 15310b57cec5SDimitry Andric C[i] = T; 15320b57cec5SDimitry Andric } 15330b57cec5SDimitry Andric 15340b57cec5SDimitry Andric unsigned QV = 0; 15350b57cec5SDimitry Andric for (unsigned i = 0; i < 32; ++i) 15360b57cec5SDimitry Andric if (C[i]) 15370b57cec5SDimitry Andric QV |= (1 << i); 15380b57cec5SDimitry Andric 15390b57cec5SDimitry Andric return QV; 15400b57cec5SDimitry Andric } 15410b57cec5SDimitry Andric 15420b57cec5SDimitry Andric Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At, 15430b57cec5SDimitry Andric ParsedValues &PV) { 15440b57cec5SDimitry Andric IRBuilder<> B(&*At); 15450b57cec5SDimitry Andric Module *M = At->getParent()->getParent()->getParent(); 15460b57cec5SDimitry Andric Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw); 15470b57cec5SDimitry Andric 15480b57cec5SDimitry Andric Value *P = PV.P, *Q = PV.Q, *P0 = P; 15490b57cec5SDimitry Andric unsigned IC = PV.IterCount; 15500b57cec5SDimitry Andric 15510b57cec5SDimitry Andric if (PV.M != nullptr) 15520b57cec5SDimitry Andric P0 = P = B.CreateXor(P, PV.M); 15530b57cec5SDimitry Andric 15540b57cec5SDimitry Andric // Create a bit mask to clear the high bits beyond IterCount. 15550b57cec5SDimitry Andric auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC)); 15560b57cec5SDimitry Andric 15570b57cec5SDimitry Andric if (PV.IterCount != 32) 15580b57cec5SDimitry Andric P = B.CreateAnd(P, BMI); 15590b57cec5SDimitry Andric 15600b57cec5SDimitry Andric if (PV.Inv) { 15610b57cec5SDimitry Andric auto *QI = dyn_cast<ConstantInt>(PV.Q); 15620b57cec5SDimitry Andric assert(QI && QI->getBitWidth() <= 32); 15630b57cec5SDimitry Andric 15640b57cec5SDimitry Andric // Again, clearing bits beyond IterCount. 15650b57cec5SDimitry Andric unsigned M = (1 << PV.IterCount) - 1; 15660b57cec5SDimitry Andric unsigned Tmp = (QI->getZExtValue() | 1) & M; 15670b57cec5SDimitry Andric unsigned QV = getInverseMxN(Tmp) & M; 15680b57cec5SDimitry Andric auto *QVI = ConstantInt::get(QI->getType(), QV); 15690b57cec5SDimitry Andric P = B.CreateCall(PMF, {P, QVI}); 15700b57cec5SDimitry Andric P = B.CreateTrunc(P, QI->getType()); 15710b57cec5SDimitry Andric if (IC != 32) 15720b57cec5SDimitry Andric P = B.CreateAnd(P, BMI); 15730b57cec5SDimitry Andric } 15740b57cec5SDimitry Andric 15750b57cec5SDimitry Andric Value *R = B.CreateCall(PMF, {P, Q}); 15760b57cec5SDimitry Andric 15770b57cec5SDimitry Andric if (PV.M != nullptr) 15780b57cec5SDimitry Andric R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false)); 15790b57cec5SDimitry Andric 15800b57cec5SDimitry Andric return R; 15810b57cec5SDimitry Andric } 15820b57cec5SDimitry Andric 15830b57cec5SDimitry Andric static bool hasZeroSignBit(const Value *V) { 15840b57cec5SDimitry Andric if (const auto *CI = dyn_cast<const ConstantInt>(V)) 15850b57cec5SDimitry Andric return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0; 15860b57cec5SDimitry Andric const Instruction *I = dyn_cast<const Instruction>(V); 15870b57cec5SDimitry Andric if (!I) 15880b57cec5SDimitry Andric return false; 15890b57cec5SDimitry Andric switch (I->getOpcode()) { 15900b57cec5SDimitry Andric case Instruction::LShr: 15910b57cec5SDimitry Andric if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1))) 15920b57cec5SDimitry Andric return SI->getZExtValue() > 0; 15930b57cec5SDimitry Andric return false; 15940b57cec5SDimitry Andric case Instruction::Or: 15950b57cec5SDimitry Andric case Instruction::Xor: 15960b57cec5SDimitry Andric return hasZeroSignBit(I->getOperand(0)) && 15970b57cec5SDimitry Andric hasZeroSignBit(I->getOperand(1)); 15980b57cec5SDimitry Andric case Instruction::And: 15990b57cec5SDimitry Andric return hasZeroSignBit(I->getOperand(0)) || 16000b57cec5SDimitry Andric hasZeroSignBit(I->getOperand(1)); 16010b57cec5SDimitry Andric } 16020b57cec5SDimitry Andric return false; 16030b57cec5SDimitry Andric } 16040b57cec5SDimitry Andric 16050b57cec5SDimitry Andric void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) { 16060b57cec5SDimitry Andric S.addRule("sink-zext", 16070b57cec5SDimitry Andric // Sink zext past bitwise operations. 16080b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 16090b57cec5SDimitry Andric if (I->getOpcode() != Instruction::ZExt) 16100b57cec5SDimitry Andric return nullptr; 16110b57cec5SDimitry Andric Instruction *T = dyn_cast<Instruction>(I->getOperand(0)); 16120b57cec5SDimitry Andric if (!T) 16130b57cec5SDimitry Andric return nullptr; 16140b57cec5SDimitry Andric switch (T->getOpcode()) { 16150b57cec5SDimitry Andric case Instruction::And: 16160b57cec5SDimitry Andric case Instruction::Or: 16170b57cec5SDimitry Andric case Instruction::Xor: 16180b57cec5SDimitry Andric break; 16190b57cec5SDimitry Andric default: 16200b57cec5SDimitry Andric return nullptr; 16210b57cec5SDimitry Andric } 16220b57cec5SDimitry Andric IRBuilder<> B(Ctx); 16230b57cec5SDimitry Andric return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(), 16240b57cec5SDimitry Andric B.CreateZExt(T->getOperand(0), I->getType()), 16250b57cec5SDimitry Andric B.CreateZExt(T->getOperand(1), I->getType())); 16260b57cec5SDimitry Andric }); 16270b57cec5SDimitry Andric S.addRule("xor/and -> and/xor", 16280b57cec5SDimitry Andric // (xor (and x a) (and y a)) -> (and (xor x y) a) 16290b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 16300b57cec5SDimitry Andric if (I->getOpcode() != Instruction::Xor) 16310b57cec5SDimitry Andric return nullptr; 16320b57cec5SDimitry Andric Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0)); 16330b57cec5SDimitry Andric Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1)); 16340b57cec5SDimitry Andric if (!And0 || !And1) 16350b57cec5SDimitry Andric return nullptr; 16360b57cec5SDimitry Andric if (And0->getOpcode() != Instruction::And || 16370b57cec5SDimitry Andric And1->getOpcode() != Instruction::And) 16380b57cec5SDimitry Andric return nullptr; 16390b57cec5SDimitry Andric if (And0->getOperand(1) != And1->getOperand(1)) 16400b57cec5SDimitry Andric return nullptr; 16410b57cec5SDimitry Andric IRBuilder<> B(Ctx); 16420b57cec5SDimitry Andric return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)), 16430b57cec5SDimitry Andric And0->getOperand(1)); 16440b57cec5SDimitry Andric }); 16450b57cec5SDimitry Andric S.addRule("sink binop into select", 16460b57cec5SDimitry Andric // (Op (select c x y) z) -> (select c (Op x z) (Op y z)) 16470b57cec5SDimitry Andric // (Op x (select c y z)) -> (select c (Op x y) (Op x z)) 16480b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 16490b57cec5SDimitry Andric BinaryOperator *BO = dyn_cast<BinaryOperator>(I); 16500b57cec5SDimitry Andric if (!BO) 16510b57cec5SDimitry Andric return nullptr; 16520b57cec5SDimitry Andric Instruction::BinaryOps Op = BO->getOpcode(); 16530b57cec5SDimitry Andric if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) { 16540b57cec5SDimitry Andric IRBuilder<> B(Ctx); 16550b57cec5SDimitry Andric Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue(); 16560b57cec5SDimitry Andric Value *Z = BO->getOperand(1); 16570b57cec5SDimitry Andric return B.CreateSelect(Sel->getCondition(), 16580b57cec5SDimitry Andric B.CreateBinOp(Op, X, Z), 16590b57cec5SDimitry Andric B.CreateBinOp(Op, Y, Z)); 16600b57cec5SDimitry Andric } 16610b57cec5SDimitry Andric if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) { 16620b57cec5SDimitry Andric IRBuilder<> B(Ctx); 16630b57cec5SDimitry Andric Value *X = BO->getOperand(0); 16640b57cec5SDimitry Andric Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue(); 16650b57cec5SDimitry Andric return B.CreateSelect(Sel->getCondition(), 16660b57cec5SDimitry Andric B.CreateBinOp(Op, X, Y), 16670b57cec5SDimitry Andric B.CreateBinOp(Op, X, Z)); 16680b57cec5SDimitry Andric } 16690b57cec5SDimitry Andric return nullptr; 16700b57cec5SDimitry Andric }); 16710b57cec5SDimitry Andric S.addRule("fold select-select", 16720b57cec5SDimitry Andric // (select c (select c x y) z) -> (select c x z) 16730b57cec5SDimitry Andric // (select c x (select c y z)) -> (select c x z) 16740b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 16750b57cec5SDimitry Andric SelectInst *Sel = dyn_cast<SelectInst>(I); 16760b57cec5SDimitry Andric if (!Sel) 16770b57cec5SDimitry Andric return nullptr; 16780b57cec5SDimitry Andric IRBuilder<> B(Ctx); 16790b57cec5SDimitry Andric Value *C = Sel->getCondition(); 16800b57cec5SDimitry Andric if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) { 16810b57cec5SDimitry Andric if (Sel0->getCondition() == C) 16820b57cec5SDimitry Andric return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue()); 16830b57cec5SDimitry Andric } 16840b57cec5SDimitry Andric if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) { 16850b57cec5SDimitry Andric if (Sel1->getCondition() == C) 16860b57cec5SDimitry Andric return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue()); 16870b57cec5SDimitry Andric } 16880b57cec5SDimitry Andric return nullptr; 16890b57cec5SDimitry Andric }); 16900b57cec5SDimitry Andric S.addRule("or-signbit -> xor-signbit", 16910b57cec5SDimitry Andric // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0) 16920b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 16930b57cec5SDimitry Andric if (I->getOpcode() != Instruction::Or) 16940b57cec5SDimitry Andric return nullptr; 16950b57cec5SDimitry Andric ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1)); 16960b57cec5SDimitry Andric if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit()) 16970b57cec5SDimitry Andric return nullptr; 16980b57cec5SDimitry Andric if (!hasZeroSignBit(I->getOperand(0))) 16990b57cec5SDimitry Andric return nullptr; 17000b57cec5SDimitry Andric return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb); 17010b57cec5SDimitry Andric }); 17020b57cec5SDimitry Andric S.addRule("sink lshr into binop", 17030b57cec5SDimitry Andric // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c)) 17040b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 17050b57cec5SDimitry Andric if (I->getOpcode() != Instruction::LShr) 17060b57cec5SDimitry Andric return nullptr; 17070b57cec5SDimitry Andric BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0)); 17080b57cec5SDimitry Andric if (!BitOp) 17090b57cec5SDimitry Andric return nullptr; 17100b57cec5SDimitry Andric switch (BitOp->getOpcode()) { 17110b57cec5SDimitry Andric case Instruction::And: 17120b57cec5SDimitry Andric case Instruction::Or: 17130b57cec5SDimitry Andric case Instruction::Xor: 17140b57cec5SDimitry Andric break; 17150b57cec5SDimitry Andric default: 17160b57cec5SDimitry Andric return nullptr; 17170b57cec5SDimitry Andric } 17180b57cec5SDimitry Andric IRBuilder<> B(Ctx); 17190b57cec5SDimitry Andric Value *S = I->getOperand(1); 17200b57cec5SDimitry Andric return B.CreateBinOp(BitOp->getOpcode(), 17210b57cec5SDimitry Andric B.CreateLShr(BitOp->getOperand(0), S), 17220b57cec5SDimitry Andric B.CreateLShr(BitOp->getOperand(1), S)); 17230b57cec5SDimitry Andric }); 17240b57cec5SDimitry Andric S.addRule("expose bitop-const", 17250b57cec5SDimitry Andric // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b)) 17260b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 17270b57cec5SDimitry Andric auto IsBitOp = [](unsigned Op) -> bool { 17280b57cec5SDimitry Andric switch (Op) { 17290b57cec5SDimitry Andric case Instruction::And: 17300b57cec5SDimitry Andric case Instruction::Or: 17310b57cec5SDimitry Andric case Instruction::Xor: 17320b57cec5SDimitry Andric return true; 17330b57cec5SDimitry Andric } 17340b57cec5SDimitry Andric return false; 17350b57cec5SDimitry Andric }; 17360b57cec5SDimitry Andric BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I); 17370b57cec5SDimitry Andric if (!BitOp1 || !IsBitOp(BitOp1->getOpcode())) 17380b57cec5SDimitry Andric return nullptr; 17390b57cec5SDimitry Andric BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0)); 17400b57cec5SDimitry Andric if (!BitOp2 || !IsBitOp(BitOp2->getOpcode())) 17410b57cec5SDimitry Andric return nullptr; 17420b57cec5SDimitry Andric ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1)); 17430b57cec5SDimitry Andric ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1)); 17440b57cec5SDimitry Andric if (!CA || !CB) 17450b57cec5SDimitry Andric return nullptr; 17460b57cec5SDimitry Andric IRBuilder<> B(Ctx); 17470b57cec5SDimitry Andric Value *X = BitOp2->getOperand(0); 17480b57cec5SDimitry Andric return B.CreateBinOp(BitOp2->getOpcode(), X, 17490b57cec5SDimitry Andric B.CreateBinOp(BitOp1->getOpcode(), CA, CB)); 17500b57cec5SDimitry Andric }); 17510b57cec5SDimitry Andric } 17520b57cec5SDimitry Andric 17530b57cec5SDimitry Andric void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) { 17540b57cec5SDimitry Andric S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a", 17550b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 17560b57cec5SDimitry Andric if (I->getOpcode() != Instruction::And) 17570b57cec5SDimitry Andric return nullptr; 17580b57cec5SDimitry Andric Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0)); 17590b57cec5SDimitry Andric ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1)); 17600b57cec5SDimitry Andric if (!Xor || !C0) 17610b57cec5SDimitry Andric return nullptr; 17620b57cec5SDimitry Andric if (Xor->getOpcode() != Instruction::Xor) 17630b57cec5SDimitry Andric return nullptr; 17640b57cec5SDimitry Andric Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0)); 17650b57cec5SDimitry Andric Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1)); 17660b57cec5SDimitry Andric // Pick the first non-null and. 17670b57cec5SDimitry Andric if (!And0 || And0->getOpcode() != Instruction::And) 17680b57cec5SDimitry Andric std::swap(And0, And1); 17690b57cec5SDimitry Andric ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1)); 17700b57cec5SDimitry Andric if (!C1) 17710b57cec5SDimitry Andric return nullptr; 17720b57cec5SDimitry Andric uint32_t V0 = C0->getZExtValue(); 17730b57cec5SDimitry Andric uint32_t V1 = C1->getZExtValue(); 17740b57cec5SDimitry Andric if (V0 != (V0 & V1)) 17750b57cec5SDimitry Andric return nullptr; 17760b57cec5SDimitry Andric IRBuilder<> B(Ctx); 17770b57cec5SDimitry Andric return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0); 17780b57cec5SDimitry Andric }); 17790b57cec5SDimitry Andric } 17800b57cec5SDimitry Andric 17810b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::recognize() { 17820b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n" 17830b57cec5SDimitry Andric << *CurLoop << '\n'); 17840b57cec5SDimitry Andric // Restrictions: 17850b57cec5SDimitry Andric // - The loop must consist of a single block. 17860b57cec5SDimitry Andric // - The iteration count must be known at compile-time. 17870b57cec5SDimitry Andric // - The loop must have an induction variable starting from 0, and 17880b57cec5SDimitry Andric // incremented in each iteration of the loop. 17890b57cec5SDimitry Andric BasicBlock *LoopB = CurLoop->getHeader(); 17900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB); 17910b57cec5SDimitry Andric 17920b57cec5SDimitry Andric if (LoopB != CurLoop->getLoopLatch()) 17930b57cec5SDimitry Andric return false; 17940b57cec5SDimitry Andric BasicBlock *ExitB = CurLoop->getExitBlock(); 17950b57cec5SDimitry Andric if (ExitB == nullptr) 17960b57cec5SDimitry Andric return false; 17970b57cec5SDimitry Andric BasicBlock *EntryB = CurLoop->getLoopPreheader(); 17980b57cec5SDimitry Andric if (EntryB == nullptr) 17990b57cec5SDimitry Andric return false; 18000b57cec5SDimitry Andric 18010b57cec5SDimitry Andric unsigned IterCount = 0; 18020b57cec5SDimitry Andric const SCEV *CT = SE.getBackedgeTakenCount(CurLoop); 18030b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(CT)) 18040b57cec5SDimitry Andric return false; 18050b57cec5SDimitry Andric if (auto *CV = dyn_cast<SCEVConstant>(CT)) 18060b57cec5SDimitry Andric IterCount = CV->getValue()->getZExtValue() + 1; 18070b57cec5SDimitry Andric 18080b57cec5SDimitry Andric Value *CIV = getCountIV(LoopB); 18090b57cec5SDimitry Andric ParsedValues PV; 18100b57cec5SDimitry Andric Simplifier PreSimp; 18110b57cec5SDimitry Andric PV.IterCount = IterCount; 18120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount 18130b57cec5SDimitry Andric << '\n'); 18140b57cec5SDimitry Andric 18150b57cec5SDimitry Andric setupPreSimplifier(PreSimp); 18160b57cec5SDimitry Andric 18170b57cec5SDimitry Andric // Perform a preliminary scan of select instructions to see if any of them 18180b57cec5SDimitry Andric // looks like a generator of the polynomial multiply steps. Assume that a 18190b57cec5SDimitry Andric // loop can only contain a single transformable operation, so stop the 18200b57cec5SDimitry Andric // traversal after the first reasonable candidate was found. 18210b57cec5SDimitry Andric // XXX: Currently this approach can modify the loop before being 100% sure 18220b57cec5SDimitry Andric // that the transformation can be carried out. 18230b57cec5SDimitry Andric bool FoundPreScan = false; 18240b57cec5SDimitry Andric auto FeedsPHI = [LoopB](const Value *V) -> bool { 18250b57cec5SDimitry Andric for (const Value *U : V->users()) { 18260b57cec5SDimitry Andric if (const auto *P = dyn_cast<const PHINode>(U)) 18270b57cec5SDimitry Andric if (P->getParent() == LoopB) 18280b57cec5SDimitry Andric return true; 18290b57cec5SDimitry Andric } 18300b57cec5SDimitry Andric return false; 18310b57cec5SDimitry Andric }; 18320b57cec5SDimitry Andric for (Instruction &In : *LoopB) { 18330b57cec5SDimitry Andric SelectInst *SI = dyn_cast<SelectInst>(&In); 18340b57cec5SDimitry Andric if (!SI || !FeedsPHI(SI)) 18350b57cec5SDimitry Andric continue; 18360b57cec5SDimitry Andric 18370b57cec5SDimitry Andric Simplifier::Context C(SI); 18380b57cec5SDimitry Andric Value *T = PreSimp.simplify(C); 18390b57cec5SDimitry Andric SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI; 18400b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n'); 18410b57cec5SDimitry Andric if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) { 18420b57cec5SDimitry Andric FoundPreScan = true; 18430b57cec5SDimitry Andric if (SelI != SI) { 18440b57cec5SDimitry Andric Value *NewSel = C.materialize(LoopB, SI->getIterator()); 18450b57cec5SDimitry Andric SI->replaceAllUsesWith(NewSel); 18460b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); 18470b57cec5SDimitry Andric } 18480b57cec5SDimitry Andric break; 18490b57cec5SDimitry Andric } 18500b57cec5SDimitry Andric } 18510b57cec5SDimitry Andric 18520b57cec5SDimitry Andric if (!FoundPreScan) { 18530b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n"); 18540b57cec5SDimitry Andric return false; 18550b57cec5SDimitry Andric } 18560b57cec5SDimitry Andric 18570b57cec5SDimitry Andric if (!PV.Left) { 18580b57cec5SDimitry Andric // The right shift version actually only returns the higher bits of 18590b57cec5SDimitry Andric // the result (each iteration discards the LSB). If we want to convert it 18600b57cec5SDimitry Andric // to a left-shifting loop, the working data type must be at least as 18610b57cec5SDimitry Andric // wide as the target's pmpy instruction. 18620b57cec5SDimitry Andric if (!promoteTypes(LoopB, ExitB)) 18630b57cec5SDimitry Andric return false; 18640b57cec5SDimitry Andric // Run post-promotion simplifications. 18650b57cec5SDimitry Andric Simplifier PostSimp; 18660b57cec5SDimitry Andric setupPostSimplifier(PostSimp); 18670b57cec5SDimitry Andric for (Instruction &In : *LoopB) { 18680b57cec5SDimitry Andric SelectInst *SI = dyn_cast<SelectInst>(&In); 18690b57cec5SDimitry Andric if (!SI || !FeedsPHI(SI)) 18700b57cec5SDimitry Andric continue; 18710b57cec5SDimitry Andric Simplifier::Context C(SI); 18720b57cec5SDimitry Andric Value *T = PostSimp.simplify(C); 18730b57cec5SDimitry Andric SelectInst *SelI = dyn_cast_or_null<SelectInst>(T); 18740b57cec5SDimitry Andric if (SelI != SI) { 18750b57cec5SDimitry Andric Value *NewSel = C.materialize(LoopB, SI->getIterator()); 18760b57cec5SDimitry Andric SI->replaceAllUsesWith(NewSel); 18770b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); 18780b57cec5SDimitry Andric } 18790b57cec5SDimitry Andric break; 18800b57cec5SDimitry Andric } 18810b57cec5SDimitry Andric 18820b57cec5SDimitry Andric if (!convertShiftsToLeft(LoopB, ExitB, IterCount)) 18830b57cec5SDimitry Andric return false; 18840b57cec5SDimitry Andric cleanupLoopBody(LoopB); 18850b57cec5SDimitry Andric } 18860b57cec5SDimitry Andric 18870b57cec5SDimitry Andric // Scan the loop again, find the generating select instruction. 18880b57cec5SDimitry Andric bool FoundScan = false; 18890b57cec5SDimitry Andric for (Instruction &In : *LoopB) { 18900b57cec5SDimitry Andric SelectInst *SelI = dyn_cast<SelectInst>(&In); 18910b57cec5SDimitry Andric if (!SelI) 18920b57cec5SDimitry Andric continue; 18930b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n'); 18940b57cec5SDimitry Andric FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false); 18950b57cec5SDimitry Andric if (FoundScan) 18960b57cec5SDimitry Andric break; 18970b57cec5SDimitry Andric } 18980b57cec5SDimitry Andric assert(FoundScan); 18990b57cec5SDimitry Andric 19000b57cec5SDimitry Andric LLVM_DEBUG({ 19010b57cec5SDimitry Andric StringRef PP = (PV.M ? "(P+M)" : "P"); 19020b57cec5SDimitry Andric if (!PV.Inv) 19030b57cec5SDimitry Andric dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; 19040b57cec5SDimitry Andric else 19050b57cec5SDimitry Andric dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " 19060b57cec5SDimitry Andric << PP << "\n"; 19070b57cec5SDimitry Andric dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; 19080b57cec5SDimitry Andric if (PV.M) 19090b57cec5SDimitry Andric dbgs() << " M:" << *PV.M << "\n"; 19100b57cec5SDimitry Andric dbgs() << " Q:" << *PV.Q << "\n"; 19110b57cec5SDimitry Andric dbgs() << " Iteration count:" << PV.IterCount << "\n"; 19120b57cec5SDimitry Andric }); 19130b57cec5SDimitry Andric 19140b57cec5SDimitry Andric BasicBlock::iterator At(EntryB->getTerminator()); 19150b57cec5SDimitry Andric Value *PM = generate(At, PV); 19160b57cec5SDimitry Andric if (PM == nullptr) 19170b57cec5SDimitry Andric return false; 19180b57cec5SDimitry Andric 19190b57cec5SDimitry Andric if (PM->getType() != PV.Res->getType()) 19200b57cec5SDimitry Andric PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false); 19210b57cec5SDimitry Andric 19220b57cec5SDimitry Andric PV.Res->replaceAllUsesWith(PM); 19230b57cec5SDimitry Andric PV.Res->eraseFromParent(); 19240b57cec5SDimitry Andric return true; 19250b57cec5SDimitry Andric } 19260b57cec5SDimitry Andric 19270b57cec5SDimitry Andric int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) { 19280b57cec5SDimitry Andric if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1))) 19290b57cec5SDimitry Andric return SC->getAPInt().getSExtValue(); 19300b57cec5SDimitry Andric return 0; 19310b57cec5SDimitry Andric } 19320b57cec5SDimitry Andric 19330b57cec5SDimitry Andric bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) { 19340b57cec5SDimitry Andric // Allow volatile stores if HexagonVolatileMemcpy is enabled. 19350b57cec5SDimitry Andric if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple()) 19360b57cec5SDimitry Andric return false; 19370b57cec5SDimitry Andric 19380b57cec5SDimitry Andric Value *StoredVal = SI->getValueOperand(); 19390b57cec5SDimitry Andric Value *StorePtr = SI->getPointerOperand(); 19400b57cec5SDimitry Andric 19410b57cec5SDimitry Andric // Reject stores that are so large that they overflow an unsigned. 19420b57cec5SDimitry Andric uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); 19430b57cec5SDimitry Andric if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) 19440b57cec5SDimitry Andric return false; 19450b57cec5SDimitry Andric 19460b57cec5SDimitry Andric // See if the pointer expression is an AddRec like {base,+,1} on the current 19470b57cec5SDimitry Andric // loop, which indicates a strided store. If we have something else, it's a 19480b57cec5SDimitry Andric // random store we can't handle. 19490b57cec5SDimitry Andric auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); 19500b57cec5SDimitry Andric if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) 19510b57cec5SDimitry Andric return false; 19520b57cec5SDimitry Andric 19530b57cec5SDimitry Andric // Check to see if the stride matches the size of the store. If so, then we 19540b57cec5SDimitry Andric // know that every byte is touched in the loop. 19550b57cec5SDimitry Andric int Stride = getSCEVStride(StoreEv); 19560b57cec5SDimitry Andric if (Stride == 0) 19570b57cec5SDimitry Andric return false; 19580b57cec5SDimitry Andric unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); 19590b57cec5SDimitry Andric if (StoreSize != unsigned(std::abs(Stride))) 19600b57cec5SDimitry Andric return false; 19610b57cec5SDimitry Andric 19620b57cec5SDimitry Andric // The store must be feeding a non-volatile load. 19630b57cec5SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); 19640b57cec5SDimitry Andric if (!LI || !LI->isSimple()) 19650b57cec5SDimitry Andric return false; 19660b57cec5SDimitry Andric 19670b57cec5SDimitry Andric // See if the pointer expression is an AddRec like {base,+,1} on the current 19680b57cec5SDimitry Andric // loop, which indicates a strided load. If we have something else, it's a 19690b57cec5SDimitry Andric // random load we can't handle. 19700b57cec5SDimitry Andric Value *LoadPtr = LI->getPointerOperand(); 19710b57cec5SDimitry Andric auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr)); 19720b57cec5SDimitry Andric if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) 19730b57cec5SDimitry Andric return false; 19740b57cec5SDimitry Andric 19750b57cec5SDimitry Andric // The store and load must share the same stride. 19760b57cec5SDimitry Andric if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) 19770b57cec5SDimitry Andric return false; 19780b57cec5SDimitry Andric 19790b57cec5SDimitry Andric // Success. This store can be converted into a memcpy. 19800b57cec5SDimitry Andric return true; 19810b57cec5SDimitry Andric } 19820b57cec5SDimitry Andric 19830b57cec5SDimitry Andric /// mayLoopAccessLocation - Return true if the specified loop might access the 19840b57cec5SDimitry Andric /// specified pointer location, which is a loop-strided access. The 'Access' 19850b57cec5SDimitry Andric /// argument specifies what the verboten forms of access are (read or write). 19860b57cec5SDimitry Andric static bool 19870b57cec5SDimitry Andric mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, 19880b57cec5SDimitry Andric const SCEV *BECount, unsigned StoreSize, 19890b57cec5SDimitry Andric AliasAnalysis &AA, 19900b57cec5SDimitry Andric SmallPtrSetImpl<Instruction *> &Ignored) { 19910b57cec5SDimitry Andric // Get the location that may be stored across the loop. Since the access 19920b57cec5SDimitry Andric // is strided positively through memory, we say that the modified location 19930b57cec5SDimitry Andric // starts at the pointer and has infinite size. 1994*e8d8bef9SDimitry Andric LocationSize AccessSize = LocationSize::afterPointer(); 19950b57cec5SDimitry Andric 19960b57cec5SDimitry Andric // If the loop iterates a fixed number of times, we can refine the access 19970b57cec5SDimitry Andric // size to be exactly the size of the memset, which is (BECount+1)*StoreSize 19980b57cec5SDimitry Andric if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 19990b57cec5SDimitry Andric AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) * 20000b57cec5SDimitry Andric StoreSize); 20010b57cec5SDimitry Andric 20020b57cec5SDimitry Andric // TODO: For this to be really effective, we have to dive into the pointer 20030b57cec5SDimitry Andric // operand in the store. Store to &A[i] of 100 will always return may alias 20040b57cec5SDimitry Andric // with store of &A[100], we need to StoreLoc to be "A" with size of 100, 20050b57cec5SDimitry Andric // which will then no-alias a store to &A[100]. 20060b57cec5SDimitry Andric MemoryLocation StoreLoc(Ptr, AccessSize); 20070b57cec5SDimitry Andric 20080b57cec5SDimitry Andric for (auto *B : L->blocks()) 20090b57cec5SDimitry Andric for (auto &I : *B) 20100b57cec5SDimitry Andric if (Ignored.count(&I) == 0 && 20110b57cec5SDimitry Andric isModOrRefSet( 20120b57cec5SDimitry Andric intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access))) 20130b57cec5SDimitry Andric return true; 20140b57cec5SDimitry Andric 20150b57cec5SDimitry Andric return false; 20160b57cec5SDimitry Andric } 20170b57cec5SDimitry Andric 20180b57cec5SDimitry Andric void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB, 20190b57cec5SDimitry Andric SmallVectorImpl<StoreInst*> &Stores) { 20200b57cec5SDimitry Andric Stores.clear(); 20210b57cec5SDimitry Andric for (Instruction &I : *BB) 20220b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(&I)) 20230b57cec5SDimitry Andric if (isLegalStore(CurLoop, SI)) 20240b57cec5SDimitry Andric Stores.push_back(SI); 20250b57cec5SDimitry Andric } 20260b57cec5SDimitry Andric 20270b57cec5SDimitry Andric bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop, 20280b57cec5SDimitry Andric StoreInst *SI, const SCEV *BECount) { 20290b57cec5SDimitry Andric assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && 20300b57cec5SDimitry Andric "Expected only non-volatile stores, or Hexagon-specific memcpy" 20310b57cec5SDimitry Andric "to volatile destination."); 20320b57cec5SDimitry Andric 20330b57cec5SDimitry Andric Value *StorePtr = SI->getPointerOperand(); 20340b57cec5SDimitry Andric auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); 20350b57cec5SDimitry Andric unsigned Stride = getSCEVStride(StoreEv); 20360b57cec5SDimitry Andric unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); 20370b57cec5SDimitry Andric if (Stride != StoreSize) 20380b57cec5SDimitry Andric return false; 20390b57cec5SDimitry Andric 20400b57cec5SDimitry Andric // See if the pointer expression is an AddRec like {base,+,1} on the current 20410b57cec5SDimitry Andric // loop, which indicates a strided load. If we have something else, it's a 20420b57cec5SDimitry Andric // random load we can't handle. 20438bcb0991SDimitry Andric auto *LI = cast<LoadInst>(SI->getValueOperand()); 20440b57cec5SDimitry Andric auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); 20450b57cec5SDimitry Andric 20460b57cec5SDimitry Andric // The trip count of the loop and the base pointer of the addrec SCEV is 20470b57cec5SDimitry Andric // guaranteed to be loop invariant, which means that it should dominate the 20480b57cec5SDimitry Andric // header. This allows us to insert code for it in the preheader. 20490b57cec5SDimitry Andric BasicBlock *Preheader = CurLoop->getLoopPreheader(); 20500b57cec5SDimitry Andric Instruction *ExpPt = Preheader->getTerminator(); 20510b57cec5SDimitry Andric IRBuilder<> Builder(ExpPt); 20520b57cec5SDimitry Andric SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom"); 20530b57cec5SDimitry Andric 20540b57cec5SDimitry Andric Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace()); 20550b57cec5SDimitry Andric 20560b57cec5SDimitry Andric // Okay, we have a strided store "p[i]" of a loaded value. We can turn 20570b57cec5SDimitry Andric // this into a memcpy/memmove in the loop preheader now if we want. However, 20580b57cec5SDimitry Andric // this would be unsafe to do if there is anything else in the loop that may 20590b57cec5SDimitry Andric // read or write the memory region we're storing to. For memcpy, this 20600b57cec5SDimitry Andric // includes the load that feeds the stores. Check for an alias by generating 20610b57cec5SDimitry Andric // the base address and checking everything. 20620b57cec5SDimitry Andric Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(), 20630b57cec5SDimitry Andric Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt); 20640b57cec5SDimitry Andric Value *LoadBasePtr = nullptr; 20650b57cec5SDimitry Andric 20660b57cec5SDimitry Andric bool Overlap = false; 20670b57cec5SDimitry Andric bool DestVolatile = SI->isVolatile(); 20680b57cec5SDimitry Andric Type *BECountTy = BECount->getType(); 20690b57cec5SDimitry Andric 20700b57cec5SDimitry Andric if (DestVolatile) { 20710b57cec5SDimitry Andric // The trip count must fit in i32, since it is the type of the "num_words" 20720b57cec5SDimitry Andric // argument to hexagon_memcpy_forward_vp4cp4n2. 20730b57cec5SDimitry Andric if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) { 20740b57cec5SDimitry Andric CleanupAndExit: 20750b57cec5SDimitry Andric // If we generated new code for the base pointer, clean up. 20760b57cec5SDimitry Andric Expander.clear(); 20770b57cec5SDimitry Andric if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) { 20780b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); 20790b57cec5SDimitry Andric StoreBasePtr = nullptr; 20800b57cec5SDimitry Andric } 20810b57cec5SDimitry Andric if (LoadBasePtr) { 20820b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); 20830b57cec5SDimitry Andric LoadBasePtr = nullptr; 20840b57cec5SDimitry Andric } 20850b57cec5SDimitry Andric return false; 20860b57cec5SDimitry Andric } 20870b57cec5SDimitry Andric } 20880b57cec5SDimitry Andric 20890b57cec5SDimitry Andric SmallPtrSet<Instruction*, 2> Ignore1; 20900b57cec5SDimitry Andric Ignore1.insert(SI); 20910b57cec5SDimitry Andric if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, 20920b57cec5SDimitry Andric StoreSize, *AA, Ignore1)) { 20930b57cec5SDimitry Andric // Check if the load is the offending instruction. 20940b57cec5SDimitry Andric Ignore1.insert(LI); 20950b57cec5SDimitry Andric if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, 20960b57cec5SDimitry Andric BECount, StoreSize, *AA, Ignore1)) { 20970b57cec5SDimitry Andric // Still bad. Nothing we can do. 20980b57cec5SDimitry Andric goto CleanupAndExit; 20990b57cec5SDimitry Andric } 21000b57cec5SDimitry Andric // It worked with the load ignored. 21010b57cec5SDimitry Andric Overlap = true; 21020b57cec5SDimitry Andric } 21030b57cec5SDimitry Andric 21040b57cec5SDimitry Andric if (!Overlap) { 21050b57cec5SDimitry Andric if (DisableMemcpyIdiom || !HasMemcpy) 21060b57cec5SDimitry Andric goto CleanupAndExit; 21070b57cec5SDimitry Andric } else { 21080b57cec5SDimitry Andric // Don't generate memmove if this function will be inlined. This is 21090b57cec5SDimitry Andric // because the caller will undergo this transformation after inlining. 21100b57cec5SDimitry Andric Function *Func = CurLoop->getHeader()->getParent(); 21110b57cec5SDimitry Andric if (Func->hasFnAttribute(Attribute::AlwaysInline)) 21120b57cec5SDimitry Andric goto CleanupAndExit; 21130b57cec5SDimitry Andric 21140b57cec5SDimitry Andric // In case of a memmove, the call to memmove will be executed instead 21150b57cec5SDimitry Andric // of the loop, so we need to make sure that there is nothing else in 21160b57cec5SDimitry Andric // the loop than the load, store and instructions that these two depend 21170b57cec5SDimitry Andric // on. 21180b57cec5SDimitry Andric SmallVector<Instruction*,2> Insts; 21190b57cec5SDimitry Andric Insts.push_back(SI); 21200b57cec5SDimitry Andric Insts.push_back(LI); 21210b57cec5SDimitry Andric if (!coverLoop(CurLoop, Insts)) 21220b57cec5SDimitry Andric goto CleanupAndExit; 21230b57cec5SDimitry Andric 21240b57cec5SDimitry Andric if (DisableMemmoveIdiom || !HasMemmove) 21250b57cec5SDimitry Andric goto CleanupAndExit; 21260b57cec5SDimitry Andric bool IsNested = CurLoop->getParentLoop() != nullptr; 21270b57cec5SDimitry Andric if (IsNested && OnlyNonNestedMemmove) 21280b57cec5SDimitry Andric goto CleanupAndExit; 21290b57cec5SDimitry Andric } 21300b57cec5SDimitry Andric 21310b57cec5SDimitry Andric // For a memcpy, we have to make sure that the input array is not being 21320b57cec5SDimitry Andric // mutated by the loop. 21330b57cec5SDimitry Andric LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), 21340b57cec5SDimitry Andric Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt); 21350b57cec5SDimitry Andric 21360b57cec5SDimitry Andric SmallPtrSet<Instruction*, 2> Ignore2; 21370b57cec5SDimitry Andric Ignore2.insert(SI); 21380b57cec5SDimitry Andric if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, 21390b57cec5SDimitry Andric StoreSize, *AA, Ignore2)) 21400b57cec5SDimitry Andric goto CleanupAndExit; 21410b57cec5SDimitry Andric 21420b57cec5SDimitry Andric // Check the stride. 21430b57cec5SDimitry Andric bool StridePos = getSCEVStride(LoadEv) >= 0; 21440b57cec5SDimitry Andric 21450b57cec5SDimitry Andric // Currently, the volatile memcpy only emulates traversing memory forward. 21460b57cec5SDimitry Andric if (!StridePos && DestVolatile) 21470b57cec5SDimitry Andric goto CleanupAndExit; 21480b57cec5SDimitry Andric 21490b57cec5SDimitry Andric bool RuntimeCheck = (Overlap || DestVolatile); 21500b57cec5SDimitry Andric 21510b57cec5SDimitry Andric BasicBlock *ExitB; 21520b57cec5SDimitry Andric if (RuntimeCheck) { 21530b57cec5SDimitry Andric // The runtime check needs a single exit block. 21540b57cec5SDimitry Andric SmallVector<BasicBlock*, 8> ExitBlocks; 21550b57cec5SDimitry Andric CurLoop->getUniqueExitBlocks(ExitBlocks); 21560b57cec5SDimitry Andric if (ExitBlocks.size() != 1) 21570b57cec5SDimitry Andric goto CleanupAndExit; 21580b57cec5SDimitry Andric ExitB = ExitBlocks[0]; 21590b57cec5SDimitry Andric } 21600b57cec5SDimitry Andric 21610b57cec5SDimitry Andric // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 21620b57cec5SDimitry Andric // pointer size if it isn't already. 21630b57cec5SDimitry Andric LLVMContext &Ctx = SI->getContext(); 21640b57cec5SDimitry Andric BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); 21650b57cec5SDimitry Andric DebugLoc DLoc = SI->getDebugLoc(); 21660b57cec5SDimitry Andric 21670b57cec5SDimitry Andric const SCEV *NumBytesS = 21680b57cec5SDimitry Andric SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); 21690b57cec5SDimitry Andric if (StoreSize != 1) 21700b57cec5SDimitry Andric NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), 21710b57cec5SDimitry Andric SCEV::FlagNUW); 21720b57cec5SDimitry Andric Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt); 21730b57cec5SDimitry Andric if (Instruction *In = dyn_cast<Instruction>(NumBytes)) 21740b57cec5SDimitry Andric if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) 21750b57cec5SDimitry Andric NumBytes = Simp; 21760b57cec5SDimitry Andric 21770b57cec5SDimitry Andric CallInst *NewCall; 21780b57cec5SDimitry Andric 21790b57cec5SDimitry Andric if (RuntimeCheck) { 21800b57cec5SDimitry Andric unsigned Threshold = RuntimeMemSizeThreshold; 21810b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) { 21820b57cec5SDimitry Andric uint64_t C = CI->getZExtValue(); 21830b57cec5SDimitry Andric if (Threshold != 0 && C < Threshold) 21840b57cec5SDimitry Andric goto CleanupAndExit; 21850b57cec5SDimitry Andric if (C < CompileTimeMemSizeThreshold) 21860b57cec5SDimitry Andric goto CleanupAndExit; 21870b57cec5SDimitry Andric } 21880b57cec5SDimitry Andric 21890b57cec5SDimitry Andric BasicBlock *Header = CurLoop->getHeader(); 21900b57cec5SDimitry Andric Function *Func = Header->getParent(); 21910b57cec5SDimitry Andric Loop *ParentL = LF->getLoopFor(Preheader); 21920b57cec5SDimitry Andric StringRef HeaderName = Header->getName(); 21930b57cec5SDimitry Andric 21940b57cec5SDimitry Andric // Create a new (empty) preheader, and update the PHI nodes in the 21950b57cec5SDimitry Andric // header to use the new preheader. 21960b57cec5SDimitry Andric BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph", 21970b57cec5SDimitry Andric Func, Header); 21980b57cec5SDimitry Andric if (ParentL) 21990b57cec5SDimitry Andric ParentL->addBasicBlockToLoop(NewPreheader, *LF); 22000b57cec5SDimitry Andric IRBuilder<>(NewPreheader).CreateBr(Header); 22010b57cec5SDimitry Andric for (auto &In : *Header) { 22020b57cec5SDimitry Andric PHINode *PN = dyn_cast<PHINode>(&In); 22030b57cec5SDimitry Andric if (!PN) 22040b57cec5SDimitry Andric break; 22050b57cec5SDimitry Andric int bx = PN->getBasicBlockIndex(Preheader); 22060b57cec5SDimitry Andric if (bx >= 0) 22070b57cec5SDimitry Andric PN->setIncomingBlock(bx, NewPreheader); 22080b57cec5SDimitry Andric } 22090b57cec5SDimitry Andric DT->addNewBlock(NewPreheader, Preheader); 22100b57cec5SDimitry Andric DT->changeImmediateDominator(Header, NewPreheader); 22110b57cec5SDimitry Andric 22120b57cec5SDimitry Andric // Check for safe conditions to execute memmove. 22130b57cec5SDimitry Andric // If stride is positive, copying things from higher to lower addresses 22140b57cec5SDimitry Andric // is equivalent to memmove. For negative stride, it's the other way 22150b57cec5SDimitry Andric // around. Copying forward in memory with positive stride may not be 22160b57cec5SDimitry Andric // same as memmove since we may be copying values that we just stored 22170b57cec5SDimitry Andric // in some previous iteration. 22180b57cec5SDimitry Andric Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy); 22190b57cec5SDimitry Andric Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy); 22200b57cec5SDimitry Andric Value *LowA = StridePos ? SA : LA; 22210b57cec5SDimitry Andric Value *HighA = StridePos ? LA : SA; 22220b57cec5SDimitry Andric Value *CmpA = Builder.CreateICmpULT(LowA, HighA); 22230b57cec5SDimitry Andric Value *Cond = CmpA; 22240b57cec5SDimitry Andric 22250b57cec5SDimitry Andric // Check for distance between pointers. Since the case LowA < HighA 22260b57cec5SDimitry Andric // is checked for above, assume LowA >= HighA. 22270b57cec5SDimitry Andric Value *Dist = Builder.CreateSub(LowA, HighA); 22280b57cec5SDimitry Andric Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist); 22290b57cec5SDimitry Andric Value *CmpEither = Builder.CreateOr(Cond, CmpD); 22300b57cec5SDimitry Andric Cond = CmpEither; 22310b57cec5SDimitry Andric 22320b57cec5SDimitry Andric if (Threshold != 0) { 22330b57cec5SDimitry Andric Type *Ty = NumBytes->getType(); 22340b57cec5SDimitry Andric Value *Thr = ConstantInt::get(Ty, Threshold); 22350b57cec5SDimitry Andric Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes); 22360b57cec5SDimitry Andric Value *CmpBoth = Builder.CreateAnd(Cond, CmpB); 22370b57cec5SDimitry Andric Cond = CmpBoth; 22380b57cec5SDimitry Andric } 22390b57cec5SDimitry Andric BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli", 22400b57cec5SDimitry Andric Func, NewPreheader); 22410b57cec5SDimitry Andric if (ParentL) 22420b57cec5SDimitry Andric ParentL->addBasicBlockToLoop(MemmoveB, *LF); 22430b57cec5SDimitry Andric Instruction *OldT = Preheader->getTerminator(); 22440b57cec5SDimitry Andric Builder.CreateCondBr(Cond, MemmoveB, NewPreheader); 22450b57cec5SDimitry Andric OldT->eraseFromParent(); 22460b57cec5SDimitry Andric Preheader->setName(Preheader->getName()+".old"); 22470b57cec5SDimitry Andric DT->addNewBlock(MemmoveB, Preheader); 22480b57cec5SDimitry Andric // Find the new immediate dominator of the exit block. 22490b57cec5SDimitry Andric BasicBlock *ExitD = Preheader; 22500b57cec5SDimitry Andric for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) { 22510b57cec5SDimitry Andric BasicBlock *PB = *PI; 22520b57cec5SDimitry Andric ExitD = DT->findNearestCommonDominator(ExitD, PB); 22530b57cec5SDimitry Andric if (!ExitD) 22540b57cec5SDimitry Andric break; 22550b57cec5SDimitry Andric } 22560b57cec5SDimitry Andric // If the prior immediate dominator of ExitB was dominated by the 22570b57cec5SDimitry Andric // old preheader, then the old preheader becomes the new immediate 22580b57cec5SDimitry Andric // dominator. Otherwise don't change anything (because the newly 22590b57cec5SDimitry Andric // added blocks are dominated by the old preheader). 22600b57cec5SDimitry Andric if (ExitD && DT->dominates(Preheader, ExitD)) { 22610b57cec5SDimitry Andric DomTreeNode *BN = DT->getNode(ExitB); 22620b57cec5SDimitry Andric DomTreeNode *DN = DT->getNode(ExitD); 22630b57cec5SDimitry Andric BN->setIDom(DN); 22640b57cec5SDimitry Andric } 22650b57cec5SDimitry Andric 22660b57cec5SDimitry Andric // Add a call to memmove to the conditional block. 22670b57cec5SDimitry Andric IRBuilder<> CondBuilder(MemmoveB); 22680b57cec5SDimitry Andric CondBuilder.CreateBr(ExitB); 22690b57cec5SDimitry Andric CondBuilder.SetInsertPoint(MemmoveB->getTerminator()); 22700b57cec5SDimitry Andric 22710b57cec5SDimitry Andric if (DestVolatile) { 22720b57cec5SDimitry Andric Type *Int32Ty = Type::getInt32Ty(Ctx); 22730b57cec5SDimitry Andric Type *Int32PtrTy = Type::getInt32PtrTy(Ctx); 22740b57cec5SDimitry Andric Type *VoidTy = Type::getVoidTy(Ctx); 22750b57cec5SDimitry Andric Module *M = Func->getParent(); 22760b57cec5SDimitry Andric FunctionCallee Fn = M->getOrInsertFunction( 22770b57cec5SDimitry Andric HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty); 22780b57cec5SDimitry Andric 22790b57cec5SDimitry Andric const SCEV *OneS = SE->getConstant(Int32Ty, 1); 22800b57cec5SDimitry Andric const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty); 22810b57cec5SDimitry Andric const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW); 22820b57cec5SDimitry Andric Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty, 22830b57cec5SDimitry Andric MemmoveB->getTerminator()); 22840b57cec5SDimitry Andric if (Instruction *In = dyn_cast<Instruction>(NumWords)) 22850b57cec5SDimitry Andric if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) 22860b57cec5SDimitry Andric NumWords = Simp; 22870b57cec5SDimitry Andric 22880b57cec5SDimitry Andric Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy) 22890b57cec5SDimitry Andric ? StoreBasePtr 22900b57cec5SDimitry Andric : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy); 22910b57cec5SDimitry Andric Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy) 22920b57cec5SDimitry Andric ? LoadBasePtr 22930b57cec5SDimitry Andric : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy); 22940b57cec5SDimitry Andric NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords}); 22950b57cec5SDimitry Andric } else { 2296480093f4SDimitry Andric NewCall = CondBuilder.CreateMemMove( 2297480093f4SDimitry Andric StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes); 22980b57cec5SDimitry Andric } 22990b57cec5SDimitry Andric } else { 2300480093f4SDimitry Andric NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr, 2301480093f4SDimitry Andric LI->getAlign(), NumBytes); 23020b57cec5SDimitry Andric // Okay, the memcpy has been formed. Zap the original store and 23030b57cec5SDimitry Andric // anything that feeds into it. 23040b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(SI, TLI); 23050b57cec5SDimitry Andric } 23060b57cec5SDimitry Andric 23070b57cec5SDimitry Andric NewCall->setDebugLoc(DLoc); 23080b57cec5SDimitry Andric 23090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") 23100b57cec5SDimitry Andric << *NewCall << "\n" 23110b57cec5SDimitry Andric << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" 23120b57cec5SDimitry Andric << " from store ptr=" << *StoreEv << " at: " << *SI 23130b57cec5SDimitry Andric << "\n"); 23140b57cec5SDimitry Andric 23150b57cec5SDimitry Andric return true; 23160b57cec5SDimitry Andric } 23170b57cec5SDimitry Andric 23180b57cec5SDimitry Andric // Check if the instructions in Insts, together with their dependencies 23190b57cec5SDimitry Andric // cover the loop in the sense that the loop could be safely eliminated once 23200b57cec5SDimitry Andric // the instructions in Insts are removed. 23210b57cec5SDimitry Andric bool HexagonLoopIdiomRecognize::coverLoop(Loop *L, 23220b57cec5SDimitry Andric SmallVectorImpl<Instruction*> &Insts) const { 23230b57cec5SDimitry Andric SmallSet<BasicBlock*,8> LoopBlocks; 23240b57cec5SDimitry Andric for (auto *B : L->blocks()) 23250b57cec5SDimitry Andric LoopBlocks.insert(B); 23260b57cec5SDimitry Andric 23270b57cec5SDimitry Andric SetVector<Instruction*> Worklist(Insts.begin(), Insts.end()); 23280b57cec5SDimitry Andric 23290b57cec5SDimitry Andric // Collect all instructions from the loop that the instructions in Insts 23300b57cec5SDimitry Andric // depend on (plus their dependencies, etc.). These instructions will 23310b57cec5SDimitry Andric // constitute the expression trees that feed those in Insts, but the trees 23320b57cec5SDimitry Andric // will be limited only to instructions contained in the loop. 23330b57cec5SDimitry Andric for (unsigned i = 0; i < Worklist.size(); ++i) { 23340b57cec5SDimitry Andric Instruction *In = Worklist[i]; 23350b57cec5SDimitry Andric for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) { 23360b57cec5SDimitry Andric Instruction *OpI = dyn_cast<Instruction>(I); 23370b57cec5SDimitry Andric if (!OpI) 23380b57cec5SDimitry Andric continue; 23390b57cec5SDimitry Andric BasicBlock *PB = OpI->getParent(); 23400b57cec5SDimitry Andric if (!LoopBlocks.count(PB)) 23410b57cec5SDimitry Andric continue; 23420b57cec5SDimitry Andric Worklist.insert(OpI); 23430b57cec5SDimitry Andric } 23440b57cec5SDimitry Andric } 23450b57cec5SDimitry Andric 23460b57cec5SDimitry Andric // Scan all instructions in the loop, if any of them have a user outside 23470b57cec5SDimitry Andric // of the loop, or outside of the expressions collected above, then either 23480b57cec5SDimitry Andric // the loop has a side-effect visible outside of it, or there are 23490b57cec5SDimitry Andric // instructions in it that are not involved in the original set Insts. 23500b57cec5SDimitry Andric for (auto *B : L->blocks()) { 23510b57cec5SDimitry Andric for (auto &In : *B) { 23520b57cec5SDimitry Andric if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In)) 23530b57cec5SDimitry Andric continue; 23540b57cec5SDimitry Andric if (!Worklist.count(&In) && In.mayHaveSideEffects()) 23550b57cec5SDimitry Andric return false; 2356480093f4SDimitry Andric for (auto K : In.users()) { 23570b57cec5SDimitry Andric Instruction *UseI = dyn_cast<Instruction>(K); 23580b57cec5SDimitry Andric if (!UseI) 23590b57cec5SDimitry Andric continue; 23600b57cec5SDimitry Andric BasicBlock *UseB = UseI->getParent(); 23610b57cec5SDimitry Andric if (LF->getLoopFor(UseB) != L) 23620b57cec5SDimitry Andric return false; 23630b57cec5SDimitry Andric } 23640b57cec5SDimitry Andric } 23650b57cec5SDimitry Andric } 23660b57cec5SDimitry Andric 23670b57cec5SDimitry Andric return true; 23680b57cec5SDimitry Andric } 23690b57cec5SDimitry Andric 23700b57cec5SDimitry Andric /// runOnLoopBlock - Process the specified block, which lives in a counted loop 23710b57cec5SDimitry Andric /// with the specified backedge count. This block is known to be in the current 23720b57cec5SDimitry Andric /// loop and not in any subloops. 23730b57cec5SDimitry Andric bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, 23740b57cec5SDimitry Andric const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) { 23750b57cec5SDimitry Andric // We can only promote stores in this block if they are unconditionally 23760b57cec5SDimitry Andric // executed in the loop. For a block to be unconditionally executed, it has 23770b57cec5SDimitry Andric // to dominate all the exit blocks of the loop. Verify this now. 23780b57cec5SDimitry Andric auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool { 23790b57cec5SDimitry Andric return DT->dominates(BB, EB); 23800b57cec5SDimitry Andric }; 23810b57cec5SDimitry Andric if (!all_of(ExitBlocks, DominatedByBB)) 23820b57cec5SDimitry Andric return false; 23830b57cec5SDimitry Andric 23840b57cec5SDimitry Andric bool MadeChange = false; 23850b57cec5SDimitry Andric // Look for store instructions, which may be optimized to memset/memcpy. 23860b57cec5SDimitry Andric SmallVector<StoreInst*,8> Stores; 23870b57cec5SDimitry Andric collectStores(CurLoop, BB, Stores); 23880b57cec5SDimitry Andric 23890b57cec5SDimitry Andric // Optimize the store into a memcpy, if it feeds an similarly strided load. 23900b57cec5SDimitry Andric for (auto &SI : Stores) 23910b57cec5SDimitry Andric MadeChange |= processCopyingStore(CurLoop, SI, BECount); 23920b57cec5SDimitry Andric 23930b57cec5SDimitry Andric return MadeChange; 23940b57cec5SDimitry Andric } 23950b57cec5SDimitry Andric 23960b57cec5SDimitry Andric bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) { 23970b57cec5SDimitry Andric PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE); 23980b57cec5SDimitry Andric if (PMR.recognize()) 23990b57cec5SDimitry Andric return true; 24000b57cec5SDimitry Andric 24010b57cec5SDimitry Andric if (!HasMemcpy && !HasMemmove) 24020b57cec5SDimitry Andric return false; 24030b57cec5SDimitry Andric 24040b57cec5SDimitry Andric const SCEV *BECount = SE->getBackedgeTakenCount(L); 24050b57cec5SDimitry Andric assert(!isa<SCEVCouldNotCompute>(BECount) && 24060b57cec5SDimitry Andric "runOnCountableLoop() called on a loop without a predictable" 24070b57cec5SDimitry Andric "backedge-taken count"); 24080b57cec5SDimitry Andric 24090b57cec5SDimitry Andric SmallVector<BasicBlock *, 8> ExitBlocks; 24100b57cec5SDimitry Andric L->getUniqueExitBlocks(ExitBlocks); 24110b57cec5SDimitry Andric 24120b57cec5SDimitry Andric bool Changed = false; 24130b57cec5SDimitry Andric 24140b57cec5SDimitry Andric // Scan all the blocks in the loop that are not in subloops. 24150b57cec5SDimitry Andric for (auto *BB : L->getBlocks()) { 24160b57cec5SDimitry Andric // Ignore blocks in subloops. 24170b57cec5SDimitry Andric if (LF->getLoopFor(BB) != L) 24180b57cec5SDimitry Andric continue; 24190b57cec5SDimitry Andric Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks); 24200b57cec5SDimitry Andric } 24210b57cec5SDimitry Andric 24220b57cec5SDimitry Andric return Changed; 24230b57cec5SDimitry Andric } 24240b57cec5SDimitry Andric 2425*e8d8bef9SDimitry Andric bool HexagonLoopIdiomRecognize::run(Loop *L) { 24260b57cec5SDimitry Andric const Module &M = *L->getHeader()->getParent()->getParent(); 24270b57cec5SDimitry Andric if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon) 24280b57cec5SDimitry Andric return false; 24290b57cec5SDimitry Andric 24300b57cec5SDimitry Andric // If the loop could not be converted to canonical form, it must have an 24310b57cec5SDimitry Andric // indirectbr in it, just give up. 24320b57cec5SDimitry Andric if (!L->getLoopPreheader()) 24330b57cec5SDimitry Andric return false; 24340b57cec5SDimitry Andric 24350b57cec5SDimitry Andric // Disable loop idiom recognition if the function's name is a common idiom. 24360b57cec5SDimitry Andric StringRef Name = L->getHeader()->getParent()->getName(); 24370b57cec5SDimitry Andric if (Name == "memset" || Name == "memcpy" || Name == "memmove") 24380b57cec5SDimitry Andric return false; 24390b57cec5SDimitry Andric 24400b57cec5SDimitry Andric DL = &L->getHeader()->getModule()->getDataLayout(); 24410b57cec5SDimitry Andric 24420b57cec5SDimitry Andric HasMemcpy = TLI->has(LibFunc_memcpy); 24430b57cec5SDimitry Andric HasMemmove = TLI->has(LibFunc_memmove); 24440b57cec5SDimitry Andric 24450b57cec5SDimitry Andric if (SE->hasLoopInvariantBackedgeTakenCount(L)) 24460b57cec5SDimitry Andric return runOnCountableLoop(L); 24470b57cec5SDimitry Andric return false; 24480b57cec5SDimitry Andric } 24490b57cec5SDimitry Andric 2450*e8d8bef9SDimitry Andric bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L, 2451*e8d8bef9SDimitry Andric LPPassManager &LPM) { 2452*e8d8bef9SDimitry Andric if (skipLoop(L)) 2453*e8d8bef9SDimitry Andric return false; 2454*e8d8bef9SDimitry Andric 2455*e8d8bef9SDimitry Andric auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 2456*e8d8bef9SDimitry Andric auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2457*e8d8bef9SDimitry Andric auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2458*e8d8bef9SDimitry Andric auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( 2459*e8d8bef9SDimitry Andric *L->getHeader()->getParent()); 2460*e8d8bef9SDimitry Andric auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2461*e8d8bef9SDimitry Andric return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L); 2462*e8d8bef9SDimitry Andric } 2463*e8d8bef9SDimitry Andric 24640b57cec5SDimitry Andric Pass *llvm::createHexagonLoopIdiomPass() { 2465*e8d8bef9SDimitry Andric return new HexagonLoopIdiomRecognizeLegacyPass(); 2466*e8d8bef9SDimitry Andric } 2467*e8d8bef9SDimitry Andric 2468*e8d8bef9SDimitry Andric PreservedAnalyses 2469*e8d8bef9SDimitry Andric HexagonLoopIdiomRecognitionPass::run(Loop &L, LoopAnalysisManager &AM, 2470*e8d8bef9SDimitry Andric LoopStandardAnalysisResults &AR, 2471*e8d8bef9SDimitry Andric LPMUpdater &U) { 2472*e8d8bef9SDimitry Andric return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE) 2473*e8d8bef9SDimitry Andric .run(&L) 2474*e8d8bef9SDimitry Andric ? getLoopPassPreservedAnalyses() 2475*e8d8bef9SDimitry Andric : PreservedAnalyses::all(); 24760b57cec5SDimitry Andric } 2477