1*0b57cec5SDimitry Andric //===- HexagonLoopIdiomRecognition.cpp ------------------------------------===// 2*0b57cec5SDimitry Andric // 3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0b57cec5SDimitry Andric // 7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 8*0b57cec5SDimitry Andric 9*0b57cec5SDimitry Andric #define DEBUG_TYPE "hexagon-lir" 10*0b57cec5SDimitry Andric 11*0b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 12*0b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 13*0b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h" 14*0b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 15*0b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h" 16*0b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 17*0b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 18*0b57cec5SDimitry Andric #include "llvm/ADT/Triple.h" 19*0b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 20*0b57cec5SDimitry Andric #include "llvm/Analysis/InstructionSimplify.h" 21*0b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 22*0b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h" 23*0b57cec5SDimitry Andric #include "llvm/Analysis/MemoryLocation.h" 24*0b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 25*0b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpander.h" 26*0b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 27*0b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 28*0b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 29*0b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 30*0b57cec5SDimitry Andric #include "llvm/IR/Attributes.h" 31*0b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 32*0b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 33*0b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 34*0b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 35*0b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 36*0b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 37*0b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 38*0b57cec5SDimitry Andric #include "llvm/IR/Function.h" 39*0b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 40*0b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 41*0b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 42*0b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 43*0b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 44*0b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 45*0b57cec5SDimitry Andric #include "llvm/IR/Module.h" 46*0b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 47*0b57cec5SDimitry Andric #include "llvm/IR/Type.h" 48*0b57cec5SDimitry Andric #include "llvm/IR/User.h" 49*0b57cec5SDimitry Andric #include "llvm/IR/Value.h" 50*0b57cec5SDimitry Andric #include "llvm/Pass.h" 51*0b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 52*0b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 53*0b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 54*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 55*0b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 56*0b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h" 57*0b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 58*0b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 59*0b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h" 60*0b57cec5SDimitry Andric #include <algorithm> 61*0b57cec5SDimitry Andric #include <array> 62*0b57cec5SDimitry Andric #include <cassert> 63*0b57cec5SDimitry Andric #include <cstdint> 64*0b57cec5SDimitry Andric #include <cstdlib> 65*0b57cec5SDimitry Andric #include <deque> 66*0b57cec5SDimitry Andric #include <functional> 67*0b57cec5SDimitry Andric #include <iterator> 68*0b57cec5SDimitry Andric #include <map> 69*0b57cec5SDimitry Andric #include <set> 70*0b57cec5SDimitry Andric #include <utility> 71*0b57cec5SDimitry Andric #include <vector> 72*0b57cec5SDimitry Andric 73*0b57cec5SDimitry Andric using namespace llvm; 74*0b57cec5SDimitry Andric 75*0b57cec5SDimitry Andric static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom", 76*0b57cec5SDimitry Andric cl::Hidden, cl::init(false), 77*0b57cec5SDimitry Andric cl::desc("Disable generation of memcpy in loop idiom recognition")); 78*0b57cec5SDimitry Andric 79*0b57cec5SDimitry Andric static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom", 80*0b57cec5SDimitry Andric cl::Hidden, cl::init(false), 81*0b57cec5SDimitry Andric cl::desc("Disable generation of memmove in loop idiom recognition")); 82*0b57cec5SDimitry Andric 83*0b57cec5SDimitry Andric static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold", 84*0b57cec5SDimitry Andric cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime " 85*0b57cec5SDimitry Andric "check guarding the memmove.")); 86*0b57cec5SDimitry Andric 87*0b57cec5SDimitry Andric static cl::opt<unsigned> CompileTimeMemSizeThreshold( 88*0b57cec5SDimitry Andric "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64), 89*0b57cec5SDimitry Andric cl::desc("Threshold (in bytes) to perform the transformation, if the " 90*0b57cec5SDimitry Andric "runtime loop count (mem transfer size) is known at compile-time.")); 91*0b57cec5SDimitry Andric 92*0b57cec5SDimitry Andric static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom", 93*0b57cec5SDimitry Andric cl::Hidden, cl::init(true), 94*0b57cec5SDimitry Andric cl::desc("Only enable generating memmove in non-nested loops")); 95*0b57cec5SDimitry Andric 96*0b57cec5SDimitry Andric cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy", 97*0b57cec5SDimitry Andric cl::Hidden, cl::init(false), 98*0b57cec5SDimitry Andric cl::desc("Enable Hexagon-specific memcpy for volatile destination.")); 99*0b57cec5SDimitry Andric 100*0b57cec5SDimitry Andric static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000), 101*0b57cec5SDimitry Andric cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR")); 102*0b57cec5SDimitry Andric 103*0b57cec5SDimitry Andric static const char *HexagonVolatileMemcpyName 104*0b57cec5SDimitry Andric = "hexagon_memcpy_forward_vp4cp4n2"; 105*0b57cec5SDimitry Andric 106*0b57cec5SDimitry Andric 107*0b57cec5SDimitry Andric namespace llvm { 108*0b57cec5SDimitry Andric 109*0b57cec5SDimitry Andric void initializeHexagonLoopIdiomRecognizePass(PassRegistry&); 110*0b57cec5SDimitry Andric Pass *createHexagonLoopIdiomPass(); 111*0b57cec5SDimitry Andric 112*0b57cec5SDimitry Andric } // end namespace llvm 113*0b57cec5SDimitry Andric 114*0b57cec5SDimitry Andric namespace { 115*0b57cec5SDimitry Andric 116*0b57cec5SDimitry Andric class HexagonLoopIdiomRecognize : public LoopPass { 117*0b57cec5SDimitry Andric public: 118*0b57cec5SDimitry Andric static char ID; 119*0b57cec5SDimitry Andric 120*0b57cec5SDimitry Andric explicit HexagonLoopIdiomRecognize() : LoopPass(ID) { 121*0b57cec5SDimitry Andric initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); 122*0b57cec5SDimitry Andric } 123*0b57cec5SDimitry Andric 124*0b57cec5SDimitry Andric StringRef getPassName() const override { 125*0b57cec5SDimitry Andric return "Recognize Hexagon-specific loop idioms"; 126*0b57cec5SDimitry Andric } 127*0b57cec5SDimitry Andric 128*0b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 129*0b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>(); 130*0b57cec5SDimitry Andric AU.addRequiredID(LoopSimplifyID); 131*0b57cec5SDimitry Andric AU.addRequiredID(LCSSAID); 132*0b57cec5SDimitry Andric AU.addRequired<AAResultsWrapperPass>(); 133*0b57cec5SDimitry Andric AU.addPreserved<AAResultsWrapperPass>(); 134*0b57cec5SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>(); 135*0b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 136*0b57cec5SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>(); 137*0b57cec5SDimitry Andric AU.addPreserved<TargetLibraryInfoWrapperPass>(); 138*0b57cec5SDimitry Andric } 139*0b57cec5SDimitry Andric 140*0b57cec5SDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override; 141*0b57cec5SDimitry Andric 142*0b57cec5SDimitry Andric private: 143*0b57cec5SDimitry Andric int getSCEVStride(const SCEVAddRecExpr *StoreEv); 144*0b57cec5SDimitry Andric bool isLegalStore(Loop *CurLoop, StoreInst *SI); 145*0b57cec5SDimitry Andric void collectStores(Loop *CurLoop, BasicBlock *BB, 146*0b57cec5SDimitry Andric SmallVectorImpl<StoreInst*> &Stores); 147*0b57cec5SDimitry Andric bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount); 148*0b57cec5SDimitry Andric bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const; 149*0b57cec5SDimitry Andric bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount, 150*0b57cec5SDimitry Andric SmallVectorImpl<BasicBlock*> &ExitBlocks); 151*0b57cec5SDimitry Andric bool runOnCountableLoop(Loop *L); 152*0b57cec5SDimitry Andric 153*0b57cec5SDimitry Andric AliasAnalysis *AA; 154*0b57cec5SDimitry Andric const DataLayout *DL; 155*0b57cec5SDimitry Andric DominatorTree *DT; 156*0b57cec5SDimitry Andric LoopInfo *LF; 157*0b57cec5SDimitry Andric const TargetLibraryInfo *TLI; 158*0b57cec5SDimitry Andric ScalarEvolution *SE; 159*0b57cec5SDimitry Andric bool HasMemcpy, HasMemmove; 160*0b57cec5SDimitry Andric }; 161*0b57cec5SDimitry Andric 162*0b57cec5SDimitry Andric struct Simplifier { 163*0b57cec5SDimitry Andric struct Rule { 164*0b57cec5SDimitry Andric using FuncType = std::function<Value* (Instruction*, LLVMContext&)>; 165*0b57cec5SDimitry Andric Rule(StringRef N, FuncType F) : Name(N), Fn(F) {} 166*0b57cec5SDimitry Andric StringRef Name; // For debugging. 167*0b57cec5SDimitry Andric FuncType Fn; 168*0b57cec5SDimitry Andric }; 169*0b57cec5SDimitry Andric 170*0b57cec5SDimitry Andric void addRule(StringRef N, const Rule::FuncType &F) { 171*0b57cec5SDimitry Andric Rules.push_back(Rule(N, F)); 172*0b57cec5SDimitry Andric } 173*0b57cec5SDimitry Andric 174*0b57cec5SDimitry Andric private: 175*0b57cec5SDimitry Andric struct WorkListType { 176*0b57cec5SDimitry Andric WorkListType() = default; 177*0b57cec5SDimitry Andric 178*0b57cec5SDimitry Andric void push_back(Value* V) { 179*0b57cec5SDimitry Andric // Do not push back duplicates. 180*0b57cec5SDimitry Andric if (!S.count(V)) { Q.push_back(V); S.insert(V); } 181*0b57cec5SDimitry Andric } 182*0b57cec5SDimitry Andric 183*0b57cec5SDimitry Andric Value *pop_front_val() { 184*0b57cec5SDimitry Andric Value *V = Q.front(); Q.pop_front(); S.erase(V); 185*0b57cec5SDimitry Andric return V; 186*0b57cec5SDimitry Andric } 187*0b57cec5SDimitry Andric 188*0b57cec5SDimitry Andric bool empty() const { return Q.empty(); } 189*0b57cec5SDimitry Andric 190*0b57cec5SDimitry Andric private: 191*0b57cec5SDimitry Andric std::deque<Value*> Q; 192*0b57cec5SDimitry Andric std::set<Value*> S; 193*0b57cec5SDimitry Andric }; 194*0b57cec5SDimitry Andric 195*0b57cec5SDimitry Andric using ValueSetType = std::set<Value *>; 196*0b57cec5SDimitry Andric 197*0b57cec5SDimitry Andric std::vector<Rule> Rules; 198*0b57cec5SDimitry Andric 199*0b57cec5SDimitry Andric public: 200*0b57cec5SDimitry Andric struct Context { 201*0b57cec5SDimitry Andric using ValueMapType = DenseMap<Value *, Value *>; 202*0b57cec5SDimitry Andric 203*0b57cec5SDimitry Andric Value *Root; 204*0b57cec5SDimitry Andric ValueSetType Used; // The set of all cloned values used by Root. 205*0b57cec5SDimitry Andric ValueSetType Clones; // The set of all cloned values. 206*0b57cec5SDimitry Andric LLVMContext &Ctx; 207*0b57cec5SDimitry Andric 208*0b57cec5SDimitry Andric Context(Instruction *Exp) 209*0b57cec5SDimitry Andric : Ctx(Exp->getParent()->getParent()->getContext()) { 210*0b57cec5SDimitry Andric initialize(Exp); 211*0b57cec5SDimitry Andric } 212*0b57cec5SDimitry Andric 213*0b57cec5SDimitry Andric ~Context() { cleanup(); } 214*0b57cec5SDimitry Andric 215*0b57cec5SDimitry Andric void print(raw_ostream &OS, const Value *V) const; 216*0b57cec5SDimitry Andric Value *materialize(BasicBlock *B, BasicBlock::iterator At); 217*0b57cec5SDimitry Andric 218*0b57cec5SDimitry Andric private: 219*0b57cec5SDimitry Andric friend struct Simplifier; 220*0b57cec5SDimitry Andric 221*0b57cec5SDimitry Andric void initialize(Instruction *Exp); 222*0b57cec5SDimitry Andric void cleanup(); 223*0b57cec5SDimitry Andric 224*0b57cec5SDimitry Andric template <typename FuncT> void traverse(Value *V, FuncT F); 225*0b57cec5SDimitry Andric void record(Value *V); 226*0b57cec5SDimitry Andric void use(Value *V); 227*0b57cec5SDimitry Andric void unuse(Value *V); 228*0b57cec5SDimitry Andric 229*0b57cec5SDimitry Andric bool equal(const Instruction *I, const Instruction *J) const; 230*0b57cec5SDimitry Andric Value *find(Value *Tree, Value *Sub) const; 231*0b57cec5SDimitry Andric Value *subst(Value *Tree, Value *OldV, Value *NewV); 232*0b57cec5SDimitry Andric void replace(Value *OldV, Value *NewV); 233*0b57cec5SDimitry Andric void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At); 234*0b57cec5SDimitry Andric }; 235*0b57cec5SDimitry Andric 236*0b57cec5SDimitry Andric Value *simplify(Context &C); 237*0b57cec5SDimitry Andric }; 238*0b57cec5SDimitry Andric 239*0b57cec5SDimitry Andric struct PE { 240*0b57cec5SDimitry Andric PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {} 241*0b57cec5SDimitry Andric 242*0b57cec5SDimitry Andric const Simplifier::Context &C; 243*0b57cec5SDimitry Andric const Value *V; 244*0b57cec5SDimitry Andric }; 245*0b57cec5SDimitry Andric 246*0b57cec5SDimitry Andric LLVM_ATTRIBUTE_USED 247*0b57cec5SDimitry Andric raw_ostream &operator<<(raw_ostream &OS, const PE &P) { 248*0b57cec5SDimitry Andric P.C.print(OS, P.V ? P.V : P.C.Root); 249*0b57cec5SDimitry Andric return OS; 250*0b57cec5SDimitry Andric } 251*0b57cec5SDimitry Andric 252*0b57cec5SDimitry Andric } // end anonymous namespace 253*0b57cec5SDimitry Andric 254*0b57cec5SDimitry Andric char HexagonLoopIdiomRecognize::ID = 0; 255*0b57cec5SDimitry Andric 256*0b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom", 257*0b57cec5SDimitry Andric "Recognize Hexagon-specific loop idioms", false, false) 258*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 259*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 260*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 261*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 262*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 263*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 264*0b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 265*0b57cec5SDimitry Andric INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom", 266*0b57cec5SDimitry Andric "Recognize Hexagon-specific loop idioms", false, false) 267*0b57cec5SDimitry Andric 268*0b57cec5SDimitry Andric template <typename FuncT> 269*0b57cec5SDimitry Andric void Simplifier::Context::traverse(Value *V, FuncT F) { 270*0b57cec5SDimitry Andric WorkListType Q; 271*0b57cec5SDimitry Andric Q.push_back(V); 272*0b57cec5SDimitry Andric 273*0b57cec5SDimitry Andric while (!Q.empty()) { 274*0b57cec5SDimitry Andric Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); 275*0b57cec5SDimitry Andric if (!U || U->getParent()) 276*0b57cec5SDimitry Andric continue; 277*0b57cec5SDimitry Andric if (!F(U)) 278*0b57cec5SDimitry Andric continue; 279*0b57cec5SDimitry Andric for (Value *Op : U->operands()) 280*0b57cec5SDimitry Andric Q.push_back(Op); 281*0b57cec5SDimitry Andric } 282*0b57cec5SDimitry Andric } 283*0b57cec5SDimitry Andric 284*0b57cec5SDimitry Andric void Simplifier::Context::print(raw_ostream &OS, const Value *V) const { 285*0b57cec5SDimitry Andric const auto *U = dyn_cast<const Instruction>(V); 286*0b57cec5SDimitry Andric if (!U) { 287*0b57cec5SDimitry Andric OS << V << '(' << *V << ')'; 288*0b57cec5SDimitry Andric return; 289*0b57cec5SDimitry Andric } 290*0b57cec5SDimitry Andric 291*0b57cec5SDimitry Andric if (U->getParent()) { 292*0b57cec5SDimitry Andric OS << U << '('; 293*0b57cec5SDimitry Andric U->printAsOperand(OS, true); 294*0b57cec5SDimitry Andric OS << ')'; 295*0b57cec5SDimitry Andric return; 296*0b57cec5SDimitry Andric } 297*0b57cec5SDimitry Andric 298*0b57cec5SDimitry Andric unsigned N = U->getNumOperands(); 299*0b57cec5SDimitry Andric if (N != 0) 300*0b57cec5SDimitry Andric OS << U << '('; 301*0b57cec5SDimitry Andric OS << U->getOpcodeName(); 302*0b57cec5SDimitry Andric for (const Value *Op : U->operands()) { 303*0b57cec5SDimitry Andric OS << ' '; 304*0b57cec5SDimitry Andric print(OS, Op); 305*0b57cec5SDimitry Andric } 306*0b57cec5SDimitry Andric if (N != 0) 307*0b57cec5SDimitry Andric OS << ')'; 308*0b57cec5SDimitry Andric } 309*0b57cec5SDimitry Andric 310*0b57cec5SDimitry Andric void Simplifier::Context::initialize(Instruction *Exp) { 311*0b57cec5SDimitry Andric // Perform a deep clone of the expression, set Root to the root 312*0b57cec5SDimitry Andric // of the clone, and build a map from the cloned values to the 313*0b57cec5SDimitry Andric // original ones. 314*0b57cec5SDimitry Andric ValueMapType M; 315*0b57cec5SDimitry Andric BasicBlock *Block = Exp->getParent(); 316*0b57cec5SDimitry Andric WorkListType Q; 317*0b57cec5SDimitry Andric Q.push_back(Exp); 318*0b57cec5SDimitry Andric 319*0b57cec5SDimitry Andric while (!Q.empty()) { 320*0b57cec5SDimitry Andric Value *V = Q.pop_front_val(); 321*0b57cec5SDimitry Andric if (M.find(V) != M.end()) 322*0b57cec5SDimitry Andric continue; 323*0b57cec5SDimitry Andric if (Instruction *U = dyn_cast<Instruction>(V)) { 324*0b57cec5SDimitry Andric if (isa<PHINode>(U) || U->getParent() != Block) 325*0b57cec5SDimitry Andric continue; 326*0b57cec5SDimitry Andric for (Value *Op : U->operands()) 327*0b57cec5SDimitry Andric Q.push_back(Op); 328*0b57cec5SDimitry Andric M.insert({U, U->clone()}); 329*0b57cec5SDimitry Andric } 330*0b57cec5SDimitry Andric } 331*0b57cec5SDimitry Andric 332*0b57cec5SDimitry Andric for (std::pair<Value*,Value*> P : M) { 333*0b57cec5SDimitry Andric Instruction *U = cast<Instruction>(P.second); 334*0b57cec5SDimitry Andric for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { 335*0b57cec5SDimitry Andric auto F = M.find(U->getOperand(i)); 336*0b57cec5SDimitry Andric if (F != M.end()) 337*0b57cec5SDimitry Andric U->setOperand(i, F->second); 338*0b57cec5SDimitry Andric } 339*0b57cec5SDimitry Andric } 340*0b57cec5SDimitry Andric 341*0b57cec5SDimitry Andric auto R = M.find(Exp); 342*0b57cec5SDimitry Andric assert(R != M.end()); 343*0b57cec5SDimitry Andric Root = R->second; 344*0b57cec5SDimitry Andric 345*0b57cec5SDimitry Andric record(Root); 346*0b57cec5SDimitry Andric use(Root); 347*0b57cec5SDimitry Andric } 348*0b57cec5SDimitry Andric 349*0b57cec5SDimitry Andric void Simplifier::Context::record(Value *V) { 350*0b57cec5SDimitry Andric auto Record = [this](Instruction *U) -> bool { 351*0b57cec5SDimitry Andric Clones.insert(U); 352*0b57cec5SDimitry Andric return true; 353*0b57cec5SDimitry Andric }; 354*0b57cec5SDimitry Andric traverse(V, Record); 355*0b57cec5SDimitry Andric } 356*0b57cec5SDimitry Andric 357*0b57cec5SDimitry Andric void Simplifier::Context::use(Value *V) { 358*0b57cec5SDimitry Andric auto Use = [this](Instruction *U) -> bool { 359*0b57cec5SDimitry Andric Used.insert(U); 360*0b57cec5SDimitry Andric return true; 361*0b57cec5SDimitry Andric }; 362*0b57cec5SDimitry Andric traverse(V, Use); 363*0b57cec5SDimitry Andric } 364*0b57cec5SDimitry Andric 365*0b57cec5SDimitry Andric void Simplifier::Context::unuse(Value *V) { 366*0b57cec5SDimitry Andric if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr) 367*0b57cec5SDimitry Andric return; 368*0b57cec5SDimitry Andric 369*0b57cec5SDimitry Andric auto Unuse = [this](Instruction *U) -> bool { 370*0b57cec5SDimitry Andric if (!U->use_empty()) 371*0b57cec5SDimitry Andric return false; 372*0b57cec5SDimitry Andric Used.erase(U); 373*0b57cec5SDimitry Andric return true; 374*0b57cec5SDimitry Andric }; 375*0b57cec5SDimitry Andric traverse(V, Unuse); 376*0b57cec5SDimitry Andric } 377*0b57cec5SDimitry Andric 378*0b57cec5SDimitry Andric Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) { 379*0b57cec5SDimitry Andric if (Tree == OldV) 380*0b57cec5SDimitry Andric return NewV; 381*0b57cec5SDimitry Andric if (OldV == NewV) 382*0b57cec5SDimitry Andric return Tree; 383*0b57cec5SDimitry Andric 384*0b57cec5SDimitry Andric WorkListType Q; 385*0b57cec5SDimitry Andric Q.push_back(Tree); 386*0b57cec5SDimitry Andric while (!Q.empty()) { 387*0b57cec5SDimitry Andric Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); 388*0b57cec5SDimitry Andric // If U is not an instruction, or it's not a clone, skip it. 389*0b57cec5SDimitry Andric if (!U || U->getParent()) 390*0b57cec5SDimitry Andric continue; 391*0b57cec5SDimitry Andric for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { 392*0b57cec5SDimitry Andric Value *Op = U->getOperand(i); 393*0b57cec5SDimitry Andric if (Op == OldV) { 394*0b57cec5SDimitry Andric U->setOperand(i, NewV); 395*0b57cec5SDimitry Andric unuse(OldV); 396*0b57cec5SDimitry Andric } else { 397*0b57cec5SDimitry Andric Q.push_back(Op); 398*0b57cec5SDimitry Andric } 399*0b57cec5SDimitry Andric } 400*0b57cec5SDimitry Andric } 401*0b57cec5SDimitry Andric return Tree; 402*0b57cec5SDimitry Andric } 403*0b57cec5SDimitry Andric 404*0b57cec5SDimitry Andric void Simplifier::Context::replace(Value *OldV, Value *NewV) { 405*0b57cec5SDimitry Andric if (Root == OldV) { 406*0b57cec5SDimitry Andric Root = NewV; 407*0b57cec5SDimitry Andric use(Root); 408*0b57cec5SDimitry Andric return; 409*0b57cec5SDimitry Andric } 410*0b57cec5SDimitry Andric 411*0b57cec5SDimitry Andric // NewV may be a complex tree that has just been created by one of the 412*0b57cec5SDimitry Andric // transformation rules. We need to make sure that it is commoned with 413*0b57cec5SDimitry Andric // the existing Root to the maximum extent possible. 414*0b57cec5SDimitry Andric // Identify all subtrees of NewV (including NewV itself) that have 415*0b57cec5SDimitry Andric // equivalent counterparts in Root, and replace those subtrees with 416*0b57cec5SDimitry Andric // these counterparts. 417*0b57cec5SDimitry Andric WorkListType Q; 418*0b57cec5SDimitry Andric Q.push_back(NewV); 419*0b57cec5SDimitry Andric while (!Q.empty()) { 420*0b57cec5SDimitry Andric Value *V = Q.pop_front_val(); 421*0b57cec5SDimitry Andric Instruction *U = dyn_cast<Instruction>(V); 422*0b57cec5SDimitry Andric if (!U || U->getParent()) 423*0b57cec5SDimitry Andric continue; 424*0b57cec5SDimitry Andric if (Value *DupV = find(Root, V)) { 425*0b57cec5SDimitry Andric if (DupV != V) 426*0b57cec5SDimitry Andric NewV = subst(NewV, V, DupV); 427*0b57cec5SDimitry Andric } else { 428*0b57cec5SDimitry Andric for (Value *Op : U->operands()) 429*0b57cec5SDimitry Andric Q.push_back(Op); 430*0b57cec5SDimitry Andric } 431*0b57cec5SDimitry Andric } 432*0b57cec5SDimitry Andric 433*0b57cec5SDimitry Andric // Now, simply replace OldV with NewV in Root. 434*0b57cec5SDimitry Andric Root = subst(Root, OldV, NewV); 435*0b57cec5SDimitry Andric use(Root); 436*0b57cec5SDimitry Andric } 437*0b57cec5SDimitry Andric 438*0b57cec5SDimitry Andric void Simplifier::Context::cleanup() { 439*0b57cec5SDimitry Andric for (Value *V : Clones) { 440*0b57cec5SDimitry Andric Instruction *U = cast<Instruction>(V); 441*0b57cec5SDimitry Andric if (!U->getParent()) 442*0b57cec5SDimitry Andric U->dropAllReferences(); 443*0b57cec5SDimitry Andric } 444*0b57cec5SDimitry Andric 445*0b57cec5SDimitry Andric for (Value *V : Clones) { 446*0b57cec5SDimitry Andric Instruction *U = cast<Instruction>(V); 447*0b57cec5SDimitry Andric if (!U->getParent()) 448*0b57cec5SDimitry Andric U->deleteValue(); 449*0b57cec5SDimitry Andric } 450*0b57cec5SDimitry Andric } 451*0b57cec5SDimitry Andric 452*0b57cec5SDimitry Andric bool Simplifier::Context::equal(const Instruction *I, 453*0b57cec5SDimitry Andric const Instruction *J) const { 454*0b57cec5SDimitry Andric if (I == J) 455*0b57cec5SDimitry Andric return true; 456*0b57cec5SDimitry Andric if (!I->isSameOperationAs(J)) 457*0b57cec5SDimitry Andric return false; 458*0b57cec5SDimitry Andric if (isa<PHINode>(I)) 459*0b57cec5SDimitry Andric return I->isIdenticalTo(J); 460*0b57cec5SDimitry Andric 461*0b57cec5SDimitry Andric for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) { 462*0b57cec5SDimitry Andric Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i); 463*0b57cec5SDimitry Andric if (OpI == OpJ) 464*0b57cec5SDimitry Andric continue; 465*0b57cec5SDimitry Andric auto *InI = dyn_cast<const Instruction>(OpI); 466*0b57cec5SDimitry Andric auto *InJ = dyn_cast<const Instruction>(OpJ); 467*0b57cec5SDimitry Andric if (InI && InJ) { 468*0b57cec5SDimitry Andric if (!equal(InI, InJ)) 469*0b57cec5SDimitry Andric return false; 470*0b57cec5SDimitry Andric } else if (InI != InJ || !InI) 471*0b57cec5SDimitry Andric return false; 472*0b57cec5SDimitry Andric } 473*0b57cec5SDimitry Andric return true; 474*0b57cec5SDimitry Andric } 475*0b57cec5SDimitry Andric 476*0b57cec5SDimitry Andric Value *Simplifier::Context::find(Value *Tree, Value *Sub) const { 477*0b57cec5SDimitry Andric Instruction *SubI = dyn_cast<Instruction>(Sub); 478*0b57cec5SDimitry Andric WorkListType Q; 479*0b57cec5SDimitry Andric Q.push_back(Tree); 480*0b57cec5SDimitry Andric 481*0b57cec5SDimitry Andric while (!Q.empty()) { 482*0b57cec5SDimitry Andric Value *V = Q.pop_front_val(); 483*0b57cec5SDimitry Andric if (V == Sub) 484*0b57cec5SDimitry Andric return V; 485*0b57cec5SDimitry Andric Instruction *U = dyn_cast<Instruction>(V); 486*0b57cec5SDimitry Andric if (!U || U->getParent()) 487*0b57cec5SDimitry Andric continue; 488*0b57cec5SDimitry Andric if (SubI && equal(SubI, U)) 489*0b57cec5SDimitry Andric return U; 490*0b57cec5SDimitry Andric assert(!isa<PHINode>(U)); 491*0b57cec5SDimitry Andric for (Value *Op : U->operands()) 492*0b57cec5SDimitry Andric Q.push_back(Op); 493*0b57cec5SDimitry Andric } 494*0b57cec5SDimitry Andric return nullptr; 495*0b57cec5SDimitry Andric } 496*0b57cec5SDimitry Andric 497*0b57cec5SDimitry Andric void Simplifier::Context::link(Instruction *I, BasicBlock *B, 498*0b57cec5SDimitry Andric BasicBlock::iterator At) { 499*0b57cec5SDimitry Andric if (I->getParent()) 500*0b57cec5SDimitry Andric return; 501*0b57cec5SDimitry Andric 502*0b57cec5SDimitry Andric for (Value *Op : I->operands()) { 503*0b57cec5SDimitry Andric if (Instruction *OpI = dyn_cast<Instruction>(Op)) 504*0b57cec5SDimitry Andric link(OpI, B, At); 505*0b57cec5SDimitry Andric } 506*0b57cec5SDimitry Andric 507*0b57cec5SDimitry Andric B->getInstList().insert(At, I); 508*0b57cec5SDimitry Andric } 509*0b57cec5SDimitry Andric 510*0b57cec5SDimitry Andric Value *Simplifier::Context::materialize(BasicBlock *B, 511*0b57cec5SDimitry Andric BasicBlock::iterator At) { 512*0b57cec5SDimitry Andric if (Instruction *RootI = dyn_cast<Instruction>(Root)) 513*0b57cec5SDimitry Andric link(RootI, B, At); 514*0b57cec5SDimitry Andric return Root; 515*0b57cec5SDimitry Andric } 516*0b57cec5SDimitry Andric 517*0b57cec5SDimitry Andric Value *Simplifier::simplify(Context &C) { 518*0b57cec5SDimitry Andric WorkListType Q; 519*0b57cec5SDimitry Andric Q.push_back(C.Root); 520*0b57cec5SDimitry Andric unsigned Count = 0; 521*0b57cec5SDimitry Andric const unsigned Limit = SimplifyLimit; 522*0b57cec5SDimitry Andric 523*0b57cec5SDimitry Andric while (!Q.empty()) { 524*0b57cec5SDimitry Andric if (Count++ >= Limit) 525*0b57cec5SDimitry Andric break; 526*0b57cec5SDimitry Andric Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); 527*0b57cec5SDimitry Andric if (!U || U->getParent() || !C.Used.count(U)) 528*0b57cec5SDimitry Andric continue; 529*0b57cec5SDimitry Andric bool Changed = false; 530*0b57cec5SDimitry Andric for (Rule &R : Rules) { 531*0b57cec5SDimitry Andric Value *W = R.Fn(U, C.Ctx); 532*0b57cec5SDimitry Andric if (!W) 533*0b57cec5SDimitry Andric continue; 534*0b57cec5SDimitry Andric Changed = true; 535*0b57cec5SDimitry Andric C.record(W); 536*0b57cec5SDimitry Andric C.replace(U, W); 537*0b57cec5SDimitry Andric Q.push_back(C.Root); 538*0b57cec5SDimitry Andric break; 539*0b57cec5SDimitry Andric } 540*0b57cec5SDimitry Andric if (!Changed) { 541*0b57cec5SDimitry Andric for (Value *Op : U->operands()) 542*0b57cec5SDimitry Andric Q.push_back(Op); 543*0b57cec5SDimitry Andric } 544*0b57cec5SDimitry Andric } 545*0b57cec5SDimitry Andric return Count < Limit ? C.Root : nullptr; 546*0b57cec5SDimitry Andric } 547*0b57cec5SDimitry Andric 548*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 549*0b57cec5SDimitry Andric // 550*0b57cec5SDimitry Andric // Implementation of PolynomialMultiplyRecognize 551*0b57cec5SDimitry Andric // 552*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 553*0b57cec5SDimitry Andric 554*0b57cec5SDimitry Andric namespace { 555*0b57cec5SDimitry Andric 556*0b57cec5SDimitry Andric class PolynomialMultiplyRecognize { 557*0b57cec5SDimitry Andric public: 558*0b57cec5SDimitry Andric explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl, 559*0b57cec5SDimitry Andric const DominatorTree &dt, const TargetLibraryInfo &tli, 560*0b57cec5SDimitry Andric ScalarEvolution &se) 561*0b57cec5SDimitry Andric : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {} 562*0b57cec5SDimitry Andric 563*0b57cec5SDimitry Andric bool recognize(); 564*0b57cec5SDimitry Andric 565*0b57cec5SDimitry Andric private: 566*0b57cec5SDimitry Andric using ValueSeq = SetVector<Value *>; 567*0b57cec5SDimitry Andric 568*0b57cec5SDimitry Andric IntegerType *getPmpyType() const { 569*0b57cec5SDimitry Andric LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext(); 570*0b57cec5SDimitry Andric return IntegerType::get(Ctx, 32); 571*0b57cec5SDimitry Andric } 572*0b57cec5SDimitry Andric 573*0b57cec5SDimitry Andric bool isPromotableTo(Value *V, IntegerType *Ty); 574*0b57cec5SDimitry Andric void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB); 575*0b57cec5SDimitry Andric bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB); 576*0b57cec5SDimitry Andric 577*0b57cec5SDimitry Andric Value *getCountIV(BasicBlock *BB); 578*0b57cec5SDimitry Andric bool findCycle(Value *Out, Value *In, ValueSeq &Cycle); 579*0b57cec5SDimitry Andric void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early, 580*0b57cec5SDimitry Andric ValueSeq &Late); 581*0b57cec5SDimitry Andric bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late); 582*0b57cec5SDimitry Andric bool commutesWithShift(Instruction *I); 583*0b57cec5SDimitry Andric bool highBitsAreZero(Value *V, unsigned IterCount); 584*0b57cec5SDimitry Andric bool keepsHighBitsZero(Value *V, unsigned IterCount); 585*0b57cec5SDimitry Andric bool isOperandShifted(Instruction *I, Value *Op); 586*0b57cec5SDimitry Andric bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB, 587*0b57cec5SDimitry Andric unsigned IterCount); 588*0b57cec5SDimitry Andric void cleanupLoopBody(BasicBlock *LoopB); 589*0b57cec5SDimitry Andric 590*0b57cec5SDimitry Andric struct ParsedValues { 591*0b57cec5SDimitry Andric ParsedValues() = default; 592*0b57cec5SDimitry Andric 593*0b57cec5SDimitry Andric Value *M = nullptr; 594*0b57cec5SDimitry Andric Value *P = nullptr; 595*0b57cec5SDimitry Andric Value *Q = nullptr; 596*0b57cec5SDimitry Andric Value *R = nullptr; 597*0b57cec5SDimitry Andric Value *X = nullptr; 598*0b57cec5SDimitry Andric Instruction *Res = nullptr; 599*0b57cec5SDimitry Andric unsigned IterCount = 0; 600*0b57cec5SDimitry Andric bool Left = false; 601*0b57cec5SDimitry Andric bool Inv = false; 602*0b57cec5SDimitry Andric }; 603*0b57cec5SDimitry Andric 604*0b57cec5SDimitry Andric bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV); 605*0b57cec5SDimitry Andric bool matchRightShift(SelectInst *SelI, ParsedValues &PV); 606*0b57cec5SDimitry Andric bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB, 607*0b57cec5SDimitry Andric Value *CIV, ParsedValues &PV, bool PreScan); 608*0b57cec5SDimitry Andric unsigned getInverseMxN(unsigned QP); 609*0b57cec5SDimitry Andric Value *generate(BasicBlock::iterator At, ParsedValues &PV); 610*0b57cec5SDimitry Andric 611*0b57cec5SDimitry Andric void setupPreSimplifier(Simplifier &S); 612*0b57cec5SDimitry Andric void setupPostSimplifier(Simplifier &S); 613*0b57cec5SDimitry Andric 614*0b57cec5SDimitry Andric Loop *CurLoop; 615*0b57cec5SDimitry Andric const DataLayout &DL; 616*0b57cec5SDimitry Andric const DominatorTree &DT; 617*0b57cec5SDimitry Andric const TargetLibraryInfo &TLI; 618*0b57cec5SDimitry Andric ScalarEvolution &SE; 619*0b57cec5SDimitry Andric }; 620*0b57cec5SDimitry Andric 621*0b57cec5SDimitry Andric } // end anonymous namespace 622*0b57cec5SDimitry Andric 623*0b57cec5SDimitry Andric Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) { 624*0b57cec5SDimitry Andric pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 625*0b57cec5SDimitry Andric if (std::distance(PI, PE) != 2) 626*0b57cec5SDimitry Andric return nullptr; 627*0b57cec5SDimitry Andric BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI; 628*0b57cec5SDimitry Andric 629*0b57cec5SDimitry Andric for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { 630*0b57cec5SDimitry Andric auto *PN = cast<PHINode>(I); 631*0b57cec5SDimitry Andric Value *InitV = PN->getIncomingValueForBlock(PB); 632*0b57cec5SDimitry Andric if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero()) 633*0b57cec5SDimitry Andric continue; 634*0b57cec5SDimitry Andric Value *IterV = PN->getIncomingValueForBlock(BB); 635*0b57cec5SDimitry Andric if (!isa<BinaryOperator>(IterV)) 636*0b57cec5SDimitry Andric continue; 637*0b57cec5SDimitry Andric auto *BO = dyn_cast<BinaryOperator>(IterV); 638*0b57cec5SDimitry Andric if (BO->getOpcode() != Instruction::Add) 639*0b57cec5SDimitry Andric continue; 640*0b57cec5SDimitry Andric Value *IncV = nullptr; 641*0b57cec5SDimitry Andric if (BO->getOperand(0) == PN) 642*0b57cec5SDimitry Andric IncV = BO->getOperand(1); 643*0b57cec5SDimitry Andric else if (BO->getOperand(1) == PN) 644*0b57cec5SDimitry Andric IncV = BO->getOperand(0); 645*0b57cec5SDimitry Andric if (IncV == nullptr) 646*0b57cec5SDimitry Andric continue; 647*0b57cec5SDimitry Andric 648*0b57cec5SDimitry Andric if (auto *T = dyn_cast<ConstantInt>(IncV)) 649*0b57cec5SDimitry Andric if (T->getZExtValue() == 1) 650*0b57cec5SDimitry Andric return PN; 651*0b57cec5SDimitry Andric } 652*0b57cec5SDimitry Andric return nullptr; 653*0b57cec5SDimitry Andric } 654*0b57cec5SDimitry Andric 655*0b57cec5SDimitry Andric static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) { 656*0b57cec5SDimitry Andric for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) { 657*0b57cec5SDimitry Andric Use &TheUse = UI.getUse(); 658*0b57cec5SDimitry Andric ++UI; 659*0b57cec5SDimitry Andric if (auto *II = dyn_cast<Instruction>(TheUse.getUser())) 660*0b57cec5SDimitry Andric if (BB == II->getParent()) 661*0b57cec5SDimitry Andric II->replaceUsesOfWith(I, J); 662*0b57cec5SDimitry Andric } 663*0b57cec5SDimitry Andric } 664*0b57cec5SDimitry Andric 665*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI, 666*0b57cec5SDimitry Andric Value *CIV, ParsedValues &PV) { 667*0b57cec5SDimitry Andric // Match the following: 668*0b57cec5SDimitry Andric // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R 669*0b57cec5SDimitry Andric // select (X & (1 << i)) == 0 ? R : R ^ (Q << i) 670*0b57cec5SDimitry Andric // The condition may also check for equality with the masked value, i.e 671*0b57cec5SDimitry Andric // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R 672*0b57cec5SDimitry Andric // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i); 673*0b57cec5SDimitry Andric 674*0b57cec5SDimitry Andric Value *CondV = SelI->getCondition(); 675*0b57cec5SDimitry Andric Value *TrueV = SelI->getTrueValue(); 676*0b57cec5SDimitry Andric Value *FalseV = SelI->getFalseValue(); 677*0b57cec5SDimitry Andric 678*0b57cec5SDimitry Andric using namespace PatternMatch; 679*0b57cec5SDimitry Andric 680*0b57cec5SDimitry Andric CmpInst::Predicate P; 681*0b57cec5SDimitry Andric Value *A = nullptr, *B = nullptr, *C = nullptr; 682*0b57cec5SDimitry Andric 683*0b57cec5SDimitry Andric if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) && 684*0b57cec5SDimitry Andric !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B))))) 685*0b57cec5SDimitry Andric return false; 686*0b57cec5SDimitry Andric if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) 687*0b57cec5SDimitry Andric return false; 688*0b57cec5SDimitry Andric // Matched: select (A & B) == C ? ... : ... 689*0b57cec5SDimitry Andric // select (A & B) != C ? ... : ... 690*0b57cec5SDimitry Andric 691*0b57cec5SDimitry Andric Value *X = nullptr, *Sh1 = nullptr; 692*0b57cec5SDimitry Andric // Check (A & B) for (X & (1 << i)): 693*0b57cec5SDimitry Andric if (match(A, m_Shl(m_One(), m_Specific(CIV)))) { 694*0b57cec5SDimitry Andric Sh1 = A; 695*0b57cec5SDimitry Andric X = B; 696*0b57cec5SDimitry Andric } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) { 697*0b57cec5SDimitry Andric Sh1 = B; 698*0b57cec5SDimitry Andric X = A; 699*0b57cec5SDimitry Andric } else { 700*0b57cec5SDimitry Andric // TODO: Could also check for an induction variable containing single 701*0b57cec5SDimitry Andric // bit shifted left by 1 in each iteration. 702*0b57cec5SDimitry Andric return false; 703*0b57cec5SDimitry Andric } 704*0b57cec5SDimitry Andric 705*0b57cec5SDimitry Andric bool TrueIfZero; 706*0b57cec5SDimitry Andric 707*0b57cec5SDimitry Andric // Check C against the possible values for comparison: 0 and (1 << i): 708*0b57cec5SDimitry Andric if (match(C, m_Zero())) 709*0b57cec5SDimitry Andric TrueIfZero = (P == CmpInst::ICMP_EQ); 710*0b57cec5SDimitry Andric else if (C == Sh1) 711*0b57cec5SDimitry Andric TrueIfZero = (P == CmpInst::ICMP_NE); 712*0b57cec5SDimitry Andric else 713*0b57cec5SDimitry Andric return false; 714*0b57cec5SDimitry Andric 715*0b57cec5SDimitry Andric // So far, matched: 716*0b57cec5SDimitry Andric // select (X & (1 << i)) ? ... : ... 717*0b57cec5SDimitry Andric // including variations of the check against zero/non-zero value. 718*0b57cec5SDimitry Andric 719*0b57cec5SDimitry Andric Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr; 720*0b57cec5SDimitry Andric if (TrueIfZero) { 721*0b57cec5SDimitry Andric ShouldSameV = TrueV; 722*0b57cec5SDimitry Andric ShouldXoredV = FalseV; 723*0b57cec5SDimitry Andric } else { 724*0b57cec5SDimitry Andric ShouldSameV = FalseV; 725*0b57cec5SDimitry Andric ShouldXoredV = TrueV; 726*0b57cec5SDimitry Andric } 727*0b57cec5SDimitry Andric 728*0b57cec5SDimitry Andric Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr; 729*0b57cec5SDimitry Andric Value *T = nullptr; 730*0b57cec5SDimitry Andric if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) { 731*0b57cec5SDimitry Andric // Matched: select +++ ? ... : Y ^ Z 732*0b57cec5SDimitry Andric // select +++ ? Y ^ Z : ... 733*0b57cec5SDimitry Andric // where +++ denotes previously checked matches. 734*0b57cec5SDimitry Andric if (ShouldSameV == Y) 735*0b57cec5SDimitry Andric T = Z; 736*0b57cec5SDimitry Andric else if (ShouldSameV == Z) 737*0b57cec5SDimitry Andric T = Y; 738*0b57cec5SDimitry Andric else 739*0b57cec5SDimitry Andric return false; 740*0b57cec5SDimitry Andric R = ShouldSameV; 741*0b57cec5SDimitry Andric // Matched: select +++ ? R : R ^ T 742*0b57cec5SDimitry Andric // select +++ ? R ^ T : R 743*0b57cec5SDimitry Andric // depending on TrueIfZero. 744*0b57cec5SDimitry Andric 745*0b57cec5SDimitry Andric } else if (match(ShouldSameV, m_Zero())) { 746*0b57cec5SDimitry Andric // Matched: select +++ ? 0 : ... 747*0b57cec5SDimitry Andric // select +++ ? ... : 0 748*0b57cec5SDimitry Andric if (!SelI->hasOneUse()) 749*0b57cec5SDimitry Andric return false; 750*0b57cec5SDimitry Andric T = ShouldXoredV; 751*0b57cec5SDimitry Andric // Matched: select +++ ? 0 : T 752*0b57cec5SDimitry Andric // select +++ ? T : 0 753*0b57cec5SDimitry Andric 754*0b57cec5SDimitry Andric Value *U = *SelI->user_begin(); 755*0b57cec5SDimitry Andric if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) && 756*0b57cec5SDimitry Andric !match(U, m_Xor(m_Value(R), m_Specific(SelI)))) 757*0b57cec5SDimitry Andric return false; 758*0b57cec5SDimitry Andric // Matched: xor (select +++ ? 0 : T), R 759*0b57cec5SDimitry Andric // xor (select +++ ? T : 0), R 760*0b57cec5SDimitry Andric } else 761*0b57cec5SDimitry Andric return false; 762*0b57cec5SDimitry Andric 763*0b57cec5SDimitry Andric // The xor input value T is isolated into its own match so that it could 764*0b57cec5SDimitry Andric // be checked against an induction variable containing a shifted bit 765*0b57cec5SDimitry Andric // (todo). 766*0b57cec5SDimitry Andric // For now, check against (Q << i). 767*0b57cec5SDimitry Andric if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) && 768*0b57cec5SDimitry Andric !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV))))) 769*0b57cec5SDimitry Andric return false; 770*0b57cec5SDimitry Andric // Matched: select +++ ? R : R ^ (Q << i) 771*0b57cec5SDimitry Andric // select +++ ? R ^ (Q << i) : R 772*0b57cec5SDimitry Andric 773*0b57cec5SDimitry Andric PV.X = X; 774*0b57cec5SDimitry Andric PV.Q = Q; 775*0b57cec5SDimitry Andric PV.R = R; 776*0b57cec5SDimitry Andric PV.Left = true; 777*0b57cec5SDimitry Andric return true; 778*0b57cec5SDimitry Andric } 779*0b57cec5SDimitry Andric 780*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI, 781*0b57cec5SDimitry Andric ParsedValues &PV) { 782*0b57cec5SDimitry Andric // Match the following: 783*0b57cec5SDimitry Andric // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1) 784*0b57cec5SDimitry Andric // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q 785*0b57cec5SDimitry Andric // The condition may also check for equality with the masked value, i.e 786*0b57cec5SDimitry Andric // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1) 787*0b57cec5SDimitry Andric // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q 788*0b57cec5SDimitry Andric 789*0b57cec5SDimitry Andric Value *CondV = SelI->getCondition(); 790*0b57cec5SDimitry Andric Value *TrueV = SelI->getTrueValue(); 791*0b57cec5SDimitry Andric Value *FalseV = SelI->getFalseValue(); 792*0b57cec5SDimitry Andric 793*0b57cec5SDimitry Andric using namespace PatternMatch; 794*0b57cec5SDimitry Andric 795*0b57cec5SDimitry Andric Value *C = nullptr; 796*0b57cec5SDimitry Andric CmpInst::Predicate P; 797*0b57cec5SDimitry Andric bool TrueIfZero; 798*0b57cec5SDimitry Andric 799*0b57cec5SDimitry Andric if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) || 800*0b57cec5SDimitry Andric match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) { 801*0b57cec5SDimitry Andric if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) 802*0b57cec5SDimitry Andric return false; 803*0b57cec5SDimitry Andric // Matched: select C == 0 ? ... : ... 804*0b57cec5SDimitry Andric // select C != 0 ? ... : ... 805*0b57cec5SDimitry Andric TrueIfZero = (P == CmpInst::ICMP_EQ); 806*0b57cec5SDimitry Andric } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) || 807*0b57cec5SDimitry Andric match(CondV, m_ICmp(P, m_One(), m_Value(C)))) { 808*0b57cec5SDimitry Andric if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) 809*0b57cec5SDimitry Andric return false; 810*0b57cec5SDimitry Andric // Matched: select C == 1 ? ... : ... 811*0b57cec5SDimitry Andric // select C != 1 ? ... : ... 812*0b57cec5SDimitry Andric TrueIfZero = (P == CmpInst::ICMP_NE); 813*0b57cec5SDimitry Andric } else 814*0b57cec5SDimitry Andric return false; 815*0b57cec5SDimitry Andric 816*0b57cec5SDimitry Andric Value *X = nullptr; 817*0b57cec5SDimitry Andric if (!match(C, m_And(m_Value(X), m_One())) && 818*0b57cec5SDimitry Andric !match(C, m_And(m_One(), m_Value(X)))) 819*0b57cec5SDimitry Andric return false; 820*0b57cec5SDimitry Andric // Matched: select (X & 1) == +++ ? ... : ... 821*0b57cec5SDimitry Andric // select (X & 1) != +++ ? ... : ... 822*0b57cec5SDimitry Andric 823*0b57cec5SDimitry Andric Value *R = nullptr, *Q = nullptr; 824*0b57cec5SDimitry Andric if (TrueIfZero) { 825*0b57cec5SDimitry Andric // The select's condition is true if the tested bit is 0. 826*0b57cec5SDimitry Andric // TrueV must be the shift, FalseV must be the xor. 827*0b57cec5SDimitry Andric if (!match(TrueV, m_LShr(m_Value(R), m_One()))) 828*0b57cec5SDimitry Andric return false; 829*0b57cec5SDimitry Andric // Matched: select +++ ? (R >> 1) : ... 830*0b57cec5SDimitry Andric if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) && 831*0b57cec5SDimitry Andric !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV)))) 832*0b57cec5SDimitry Andric return false; 833*0b57cec5SDimitry Andric // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q 834*0b57cec5SDimitry Andric // with commuting ^. 835*0b57cec5SDimitry Andric } else { 836*0b57cec5SDimitry Andric // The select's condition is true if the tested bit is 1. 837*0b57cec5SDimitry Andric // TrueV must be the xor, FalseV must be the shift. 838*0b57cec5SDimitry Andric if (!match(FalseV, m_LShr(m_Value(R), m_One()))) 839*0b57cec5SDimitry Andric return false; 840*0b57cec5SDimitry Andric // Matched: select +++ ? ... : (R >> 1) 841*0b57cec5SDimitry Andric if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) && 842*0b57cec5SDimitry Andric !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV)))) 843*0b57cec5SDimitry Andric return false; 844*0b57cec5SDimitry Andric // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1) 845*0b57cec5SDimitry Andric // with commuting ^. 846*0b57cec5SDimitry Andric } 847*0b57cec5SDimitry Andric 848*0b57cec5SDimitry Andric PV.X = X; 849*0b57cec5SDimitry Andric PV.Q = Q; 850*0b57cec5SDimitry Andric PV.R = R; 851*0b57cec5SDimitry Andric PV.Left = false; 852*0b57cec5SDimitry Andric return true; 853*0b57cec5SDimitry Andric } 854*0b57cec5SDimitry Andric 855*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI, 856*0b57cec5SDimitry Andric BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV, 857*0b57cec5SDimitry Andric bool PreScan) { 858*0b57cec5SDimitry Andric using namespace PatternMatch; 859*0b57cec5SDimitry Andric 860*0b57cec5SDimitry Andric // The basic pattern for R = P.Q is: 861*0b57cec5SDimitry Andric // for i = 0..31 862*0b57cec5SDimitry Andric // R = phi (0, R') 863*0b57cec5SDimitry Andric // if (P & (1 << i)) ; test-bit(P, i) 864*0b57cec5SDimitry Andric // R' = R ^ (Q << i) 865*0b57cec5SDimitry Andric // 866*0b57cec5SDimitry Andric // Similarly, the basic pattern for R = (P/Q).Q - P 867*0b57cec5SDimitry Andric // for i = 0..31 868*0b57cec5SDimitry Andric // R = phi(P, R') 869*0b57cec5SDimitry Andric // if (R & (1 << i)) 870*0b57cec5SDimitry Andric // R' = R ^ (Q << i) 871*0b57cec5SDimitry Andric 872*0b57cec5SDimitry Andric // There exist idioms, where instead of Q being shifted left, P is shifted 873*0b57cec5SDimitry Andric // right. This produces a result that is shifted right by 32 bits (the 874*0b57cec5SDimitry Andric // non-shifted result is 64-bit). 875*0b57cec5SDimitry Andric // 876*0b57cec5SDimitry Andric // For R = P.Q, this would be: 877*0b57cec5SDimitry Andric // for i = 0..31 878*0b57cec5SDimitry Andric // R = phi (0, R') 879*0b57cec5SDimitry Andric // if ((P >> i) & 1) 880*0b57cec5SDimitry Andric // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must 881*0b57cec5SDimitry Andric // else ; be shifted by 1, not i. 882*0b57cec5SDimitry Andric // R' = R >> 1 883*0b57cec5SDimitry Andric // 884*0b57cec5SDimitry Andric // And for the inverse: 885*0b57cec5SDimitry Andric // for i = 0..31 886*0b57cec5SDimitry Andric // R = phi (P, R') 887*0b57cec5SDimitry Andric // if (R & 1) 888*0b57cec5SDimitry Andric // R' = (R >> 1) ^ Q 889*0b57cec5SDimitry Andric // else 890*0b57cec5SDimitry Andric // R' = R >> 1 891*0b57cec5SDimitry Andric 892*0b57cec5SDimitry Andric // The left-shifting idioms share the same pattern: 893*0b57cec5SDimitry Andric // select (X & (1 << i)) ? R ^ (Q << i) : R 894*0b57cec5SDimitry Andric // Similarly for right-shifting idioms: 895*0b57cec5SDimitry Andric // select (X & 1) ? (R >> 1) ^ Q 896*0b57cec5SDimitry Andric 897*0b57cec5SDimitry Andric if (matchLeftShift(SelI, CIV, PV)) { 898*0b57cec5SDimitry Andric // If this is a pre-scan, getting this far is sufficient. 899*0b57cec5SDimitry Andric if (PreScan) 900*0b57cec5SDimitry Andric return true; 901*0b57cec5SDimitry Andric 902*0b57cec5SDimitry Andric // Need to make sure that the SelI goes back into R. 903*0b57cec5SDimitry Andric auto *RPhi = dyn_cast<PHINode>(PV.R); 904*0b57cec5SDimitry Andric if (!RPhi) 905*0b57cec5SDimitry Andric return false; 906*0b57cec5SDimitry Andric if (SelI != RPhi->getIncomingValueForBlock(LoopB)) 907*0b57cec5SDimitry Andric return false; 908*0b57cec5SDimitry Andric PV.Res = SelI; 909*0b57cec5SDimitry Andric 910*0b57cec5SDimitry Andric // If X is loop invariant, it must be the input polynomial, and the 911*0b57cec5SDimitry Andric // idiom is the basic polynomial multiply. 912*0b57cec5SDimitry Andric if (CurLoop->isLoopInvariant(PV.X)) { 913*0b57cec5SDimitry Andric PV.P = PV.X; 914*0b57cec5SDimitry Andric PV.Inv = false; 915*0b57cec5SDimitry Andric } else { 916*0b57cec5SDimitry Andric // X is not loop invariant. If X == R, this is the inverse pmpy. 917*0b57cec5SDimitry Andric // Otherwise, check for an xor with an invariant value. If the 918*0b57cec5SDimitry Andric // variable argument to the xor is R, then this is still a valid 919*0b57cec5SDimitry Andric // inverse pmpy. 920*0b57cec5SDimitry Andric PV.Inv = true; 921*0b57cec5SDimitry Andric if (PV.X != PV.R) { 922*0b57cec5SDimitry Andric Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr; 923*0b57cec5SDimitry Andric if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2)))) 924*0b57cec5SDimitry Andric return false; 925*0b57cec5SDimitry Andric auto *I1 = dyn_cast<Instruction>(X1); 926*0b57cec5SDimitry Andric auto *I2 = dyn_cast<Instruction>(X2); 927*0b57cec5SDimitry Andric if (!I1 || I1->getParent() != LoopB) { 928*0b57cec5SDimitry Andric Var = X2; 929*0b57cec5SDimitry Andric Inv = X1; 930*0b57cec5SDimitry Andric } else if (!I2 || I2->getParent() != LoopB) { 931*0b57cec5SDimitry Andric Var = X1; 932*0b57cec5SDimitry Andric Inv = X2; 933*0b57cec5SDimitry Andric } else 934*0b57cec5SDimitry Andric return false; 935*0b57cec5SDimitry Andric if (Var != PV.R) 936*0b57cec5SDimitry Andric return false; 937*0b57cec5SDimitry Andric PV.M = Inv; 938*0b57cec5SDimitry Andric } 939*0b57cec5SDimitry Andric // The input polynomial P still needs to be determined. It will be 940*0b57cec5SDimitry Andric // the entry value of R. 941*0b57cec5SDimitry Andric Value *EntryP = RPhi->getIncomingValueForBlock(PrehB); 942*0b57cec5SDimitry Andric PV.P = EntryP; 943*0b57cec5SDimitry Andric } 944*0b57cec5SDimitry Andric 945*0b57cec5SDimitry Andric return true; 946*0b57cec5SDimitry Andric } 947*0b57cec5SDimitry Andric 948*0b57cec5SDimitry Andric if (matchRightShift(SelI, PV)) { 949*0b57cec5SDimitry Andric // If this is an inverse pattern, the Q polynomial must be known at 950*0b57cec5SDimitry Andric // compile time. 951*0b57cec5SDimitry Andric if (PV.Inv && !isa<ConstantInt>(PV.Q)) 952*0b57cec5SDimitry Andric return false; 953*0b57cec5SDimitry Andric if (PreScan) 954*0b57cec5SDimitry Andric return true; 955*0b57cec5SDimitry Andric // There is no exact matching of right-shift pmpy. 956*0b57cec5SDimitry Andric return false; 957*0b57cec5SDimitry Andric } 958*0b57cec5SDimitry Andric 959*0b57cec5SDimitry Andric return false; 960*0b57cec5SDimitry Andric } 961*0b57cec5SDimitry Andric 962*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val, 963*0b57cec5SDimitry Andric IntegerType *DestTy) { 964*0b57cec5SDimitry Andric IntegerType *T = dyn_cast<IntegerType>(Val->getType()); 965*0b57cec5SDimitry Andric if (!T || T->getBitWidth() > DestTy->getBitWidth()) 966*0b57cec5SDimitry Andric return false; 967*0b57cec5SDimitry Andric if (T->getBitWidth() == DestTy->getBitWidth()) 968*0b57cec5SDimitry Andric return true; 969*0b57cec5SDimitry Andric // Non-instructions are promotable. The reason why an instruction may not 970*0b57cec5SDimitry Andric // be promotable is that it may produce a different result if its operands 971*0b57cec5SDimitry Andric // and the result are promoted, for example, it may produce more non-zero 972*0b57cec5SDimitry Andric // bits. While it would still be possible to represent the proper result 973*0b57cec5SDimitry Andric // in a wider type, it may require adding additional instructions (which 974*0b57cec5SDimitry Andric // we don't want to do). 975*0b57cec5SDimitry Andric Instruction *In = dyn_cast<Instruction>(Val); 976*0b57cec5SDimitry Andric if (!In) 977*0b57cec5SDimitry Andric return true; 978*0b57cec5SDimitry Andric // The bitwidth of the source type is smaller than the destination. 979*0b57cec5SDimitry Andric // Check if the individual operation can be promoted. 980*0b57cec5SDimitry Andric switch (In->getOpcode()) { 981*0b57cec5SDimitry Andric case Instruction::PHI: 982*0b57cec5SDimitry Andric case Instruction::ZExt: 983*0b57cec5SDimitry Andric case Instruction::And: 984*0b57cec5SDimitry Andric case Instruction::Or: 985*0b57cec5SDimitry Andric case Instruction::Xor: 986*0b57cec5SDimitry Andric case Instruction::LShr: // Shift right is ok. 987*0b57cec5SDimitry Andric case Instruction::Select: 988*0b57cec5SDimitry Andric case Instruction::Trunc: 989*0b57cec5SDimitry Andric return true; 990*0b57cec5SDimitry Andric case Instruction::ICmp: 991*0b57cec5SDimitry Andric if (CmpInst *CI = cast<CmpInst>(In)) 992*0b57cec5SDimitry Andric return CI->isEquality() || CI->isUnsigned(); 993*0b57cec5SDimitry Andric llvm_unreachable("Cast failed unexpectedly"); 994*0b57cec5SDimitry Andric case Instruction::Add: 995*0b57cec5SDimitry Andric return In->hasNoSignedWrap() && In->hasNoUnsignedWrap(); 996*0b57cec5SDimitry Andric } 997*0b57cec5SDimitry Andric return false; 998*0b57cec5SDimitry Andric } 999*0b57cec5SDimitry Andric 1000*0b57cec5SDimitry Andric void PolynomialMultiplyRecognize::promoteTo(Instruction *In, 1001*0b57cec5SDimitry Andric IntegerType *DestTy, BasicBlock *LoopB) { 1002*0b57cec5SDimitry Andric Type *OrigTy = In->getType(); 1003*0b57cec5SDimitry Andric assert(!OrigTy->isVoidTy() && "Invalid instruction to promote"); 1004*0b57cec5SDimitry Andric 1005*0b57cec5SDimitry Andric // Leave boolean values alone. 1006*0b57cec5SDimitry Andric if (!In->getType()->isIntegerTy(1)) 1007*0b57cec5SDimitry Andric In->mutateType(DestTy); 1008*0b57cec5SDimitry Andric unsigned DestBW = DestTy->getBitWidth(); 1009*0b57cec5SDimitry Andric 1010*0b57cec5SDimitry Andric // Handle PHIs. 1011*0b57cec5SDimitry Andric if (PHINode *P = dyn_cast<PHINode>(In)) { 1012*0b57cec5SDimitry Andric unsigned N = P->getNumIncomingValues(); 1013*0b57cec5SDimitry Andric for (unsigned i = 0; i != N; ++i) { 1014*0b57cec5SDimitry Andric BasicBlock *InB = P->getIncomingBlock(i); 1015*0b57cec5SDimitry Andric if (InB == LoopB) 1016*0b57cec5SDimitry Andric continue; 1017*0b57cec5SDimitry Andric Value *InV = P->getIncomingValue(i); 1018*0b57cec5SDimitry Andric IntegerType *Ty = cast<IntegerType>(InV->getType()); 1019*0b57cec5SDimitry Andric // Do not promote values in PHI nodes of type i1. 1020*0b57cec5SDimitry Andric if (Ty != P->getType()) { 1021*0b57cec5SDimitry Andric // If the value type does not match the PHI type, the PHI type 1022*0b57cec5SDimitry Andric // must have been promoted. 1023*0b57cec5SDimitry Andric assert(Ty->getBitWidth() < DestBW); 1024*0b57cec5SDimitry Andric InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy); 1025*0b57cec5SDimitry Andric P->setIncomingValue(i, InV); 1026*0b57cec5SDimitry Andric } 1027*0b57cec5SDimitry Andric } 1028*0b57cec5SDimitry Andric } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) { 1029*0b57cec5SDimitry Andric Value *Op = Z->getOperand(0); 1030*0b57cec5SDimitry Andric if (Op->getType() == Z->getType()) 1031*0b57cec5SDimitry Andric Z->replaceAllUsesWith(Op); 1032*0b57cec5SDimitry Andric Z->eraseFromParent(); 1033*0b57cec5SDimitry Andric return; 1034*0b57cec5SDimitry Andric } 1035*0b57cec5SDimitry Andric if (TruncInst *T = dyn_cast<TruncInst>(In)) { 1036*0b57cec5SDimitry Andric IntegerType *TruncTy = cast<IntegerType>(OrigTy); 1037*0b57cec5SDimitry Andric Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1); 1038*0b57cec5SDimitry Andric Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask); 1039*0b57cec5SDimitry Andric T->replaceAllUsesWith(And); 1040*0b57cec5SDimitry Andric T->eraseFromParent(); 1041*0b57cec5SDimitry Andric return; 1042*0b57cec5SDimitry Andric } 1043*0b57cec5SDimitry Andric 1044*0b57cec5SDimitry Andric // Promote immediates. 1045*0b57cec5SDimitry Andric for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) { 1046*0b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i))) 1047*0b57cec5SDimitry Andric if (CI->getType()->getBitWidth() < DestBW) 1048*0b57cec5SDimitry Andric In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue())); 1049*0b57cec5SDimitry Andric } 1050*0b57cec5SDimitry Andric } 1051*0b57cec5SDimitry Andric 1052*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB, 1053*0b57cec5SDimitry Andric BasicBlock *ExitB) { 1054*0b57cec5SDimitry Andric assert(LoopB); 1055*0b57cec5SDimitry Andric // Skip loops where the exit block has more than one predecessor. The values 1056*0b57cec5SDimitry Andric // coming from the loop block will be promoted to another type, and so the 1057*0b57cec5SDimitry Andric // values coming into the exit block from other predecessors would also have 1058*0b57cec5SDimitry Andric // to be promoted. 1059*0b57cec5SDimitry Andric if (!ExitB || (ExitB->getSinglePredecessor() != LoopB)) 1060*0b57cec5SDimitry Andric return false; 1061*0b57cec5SDimitry Andric IntegerType *DestTy = getPmpyType(); 1062*0b57cec5SDimitry Andric // Check if the exit values have types that are no wider than the type 1063*0b57cec5SDimitry Andric // that we want to promote to. 1064*0b57cec5SDimitry Andric unsigned DestBW = DestTy->getBitWidth(); 1065*0b57cec5SDimitry Andric for (PHINode &P : ExitB->phis()) { 1066*0b57cec5SDimitry Andric if (P.getNumIncomingValues() != 1) 1067*0b57cec5SDimitry Andric return false; 1068*0b57cec5SDimitry Andric assert(P.getIncomingBlock(0) == LoopB); 1069*0b57cec5SDimitry Andric IntegerType *T = dyn_cast<IntegerType>(P.getType()); 1070*0b57cec5SDimitry Andric if (!T || T->getBitWidth() > DestBW) 1071*0b57cec5SDimitry Andric return false; 1072*0b57cec5SDimitry Andric } 1073*0b57cec5SDimitry Andric 1074*0b57cec5SDimitry Andric // Check all instructions in the loop. 1075*0b57cec5SDimitry Andric for (Instruction &In : *LoopB) 1076*0b57cec5SDimitry Andric if (!In.isTerminator() && !isPromotableTo(&In, DestTy)) 1077*0b57cec5SDimitry Andric return false; 1078*0b57cec5SDimitry Andric 1079*0b57cec5SDimitry Andric // Perform the promotion. 1080*0b57cec5SDimitry Andric std::vector<Instruction*> LoopIns; 1081*0b57cec5SDimitry Andric std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns), 1082*0b57cec5SDimitry Andric [](Instruction &In) { return &In; }); 1083*0b57cec5SDimitry Andric for (Instruction *In : LoopIns) 1084*0b57cec5SDimitry Andric if (!In->isTerminator()) 1085*0b57cec5SDimitry Andric promoteTo(In, DestTy, LoopB); 1086*0b57cec5SDimitry Andric 1087*0b57cec5SDimitry Andric // Fix up the PHI nodes in the exit block. 1088*0b57cec5SDimitry Andric Instruction *EndI = ExitB->getFirstNonPHI(); 1089*0b57cec5SDimitry Andric BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end(); 1090*0b57cec5SDimitry Andric for (auto I = ExitB->begin(); I != End; ++I) { 1091*0b57cec5SDimitry Andric PHINode *P = dyn_cast<PHINode>(I); 1092*0b57cec5SDimitry Andric if (!P) 1093*0b57cec5SDimitry Andric break; 1094*0b57cec5SDimitry Andric Type *Ty0 = P->getIncomingValue(0)->getType(); 1095*0b57cec5SDimitry Andric Type *PTy = P->getType(); 1096*0b57cec5SDimitry Andric if (PTy != Ty0) { 1097*0b57cec5SDimitry Andric assert(Ty0 == DestTy); 1098*0b57cec5SDimitry Andric // In order to create the trunc, P must have the promoted type. 1099*0b57cec5SDimitry Andric P->mutateType(Ty0); 1100*0b57cec5SDimitry Andric Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy); 1101*0b57cec5SDimitry Andric // In order for the RAUW to work, the types of P and T must match. 1102*0b57cec5SDimitry Andric P->mutateType(PTy); 1103*0b57cec5SDimitry Andric P->replaceAllUsesWith(T); 1104*0b57cec5SDimitry Andric // Final update of the P's type. 1105*0b57cec5SDimitry Andric P->mutateType(Ty0); 1106*0b57cec5SDimitry Andric cast<Instruction>(T)->setOperand(0, P); 1107*0b57cec5SDimitry Andric } 1108*0b57cec5SDimitry Andric } 1109*0b57cec5SDimitry Andric 1110*0b57cec5SDimitry Andric return true; 1111*0b57cec5SDimitry Andric } 1112*0b57cec5SDimitry Andric 1113*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In, 1114*0b57cec5SDimitry Andric ValueSeq &Cycle) { 1115*0b57cec5SDimitry Andric // Out = ..., In, ... 1116*0b57cec5SDimitry Andric if (Out == In) 1117*0b57cec5SDimitry Andric return true; 1118*0b57cec5SDimitry Andric 1119*0b57cec5SDimitry Andric auto *BB = cast<Instruction>(Out)->getParent(); 1120*0b57cec5SDimitry Andric bool HadPhi = false; 1121*0b57cec5SDimitry Andric 1122*0b57cec5SDimitry Andric for (auto U : Out->users()) { 1123*0b57cec5SDimitry Andric auto *I = dyn_cast<Instruction>(&*U); 1124*0b57cec5SDimitry Andric if (I == nullptr || I->getParent() != BB) 1125*0b57cec5SDimitry Andric continue; 1126*0b57cec5SDimitry Andric // Make sure that there are no multi-iteration cycles, e.g. 1127*0b57cec5SDimitry Andric // p1 = phi(p2) 1128*0b57cec5SDimitry Andric // p2 = phi(p1) 1129*0b57cec5SDimitry Andric // The cycle p1->p2->p1 would span two loop iterations. 1130*0b57cec5SDimitry Andric // Check that there is only one phi in the cycle. 1131*0b57cec5SDimitry Andric bool IsPhi = isa<PHINode>(I); 1132*0b57cec5SDimitry Andric if (IsPhi && HadPhi) 1133*0b57cec5SDimitry Andric return false; 1134*0b57cec5SDimitry Andric HadPhi |= IsPhi; 1135*0b57cec5SDimitry Andric if (Cycle.count(I)) 1136*0b57cec5SDimitry Andric return false; 1137*0b57cec5SDimitry Andric Cycle.insert(I); 1138*0b57cec5SDimitry Andric if (findCycle(I, In, Cycle)) 1139*0b57cec5SDimitry Andric break; 1140*0b57cec5SDimitry Andric Cycle.remove(I); 1141*0b57cec5SDimitry Andric } 1142*0b57cec5SDimitry Andric return !Cycle.empty(); 1143*0b57cec5SDimitry Andric } 1144*0b57cec5SDimitry Andric 1145*0b57cec5SDimitry Andric void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI, 1146*0b57cec5SDimitry Andric ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) { 1147*0b57cec5SDimitry Andric // All the values in the cycle that are between the phi node and the 1148*0b57cec5SDimitry Andric // divider instruction will be classified as "early", all other values 1149*0b57cec5SDimitry Andric // will be "late". 1150*0b57cec5SDimitry Andric 1151*0b57cec5SDimitry Andric bool IsE = true; 1152*0b57cec5SDimitry Andric unsigned I, N = Cycle.size(); 1153*0b57cec5SDimitry Andric for (I = 0; I < N; ++I) { 1154*0b57cec5SDimitry Andric Value *V = Cycle[I]; 1155*0b57cec5SDimitry Andric if (DivI == V) 1156*0b57cec5SDimitry Andric IsE = false; 1157*0b57cec5SDimitry Andric else if (!isa<PHINode>(V)) 1158*0b57cec5SDimitry Andric continue; 1159*0b57cec5SDimitry Andric // Stop if found either. 1160*0b57cec5SDimitry Andric break; 1161*0b57cec5SDimitry Andric } 1162*0b57cec5SDimitry Andric // "I" is the index of either DivI or the phi node, whichever was first. 1163*0b57cec5SDimitry Andric // "E" is "false" or "true" respectively. 1164*0b57cec5SDimitry Andric ValueSeq &First = !IsE ? Early : Late; 1165*0b57cec5SDimitry Andric for (unsigned J = 0; J < I; ++J) 1166*0b57cec5SDimitry Andric First.insert(Cycle[J]); 1167*0b57cec5SDimitry Andric 1168*0b57cec5SDimitry Andric ValueSeq &Second = IsE ? Early : Late; 1169*0b57cec5SDimitry Andric Second.insert(Cycle[I]); 1170*0b57cec5SDimitry Andric for (++I; I < N; ++I) { 1171*0b57cec5SDimitry Andric Value *V = Cycle[I]; 1172*0b57cec5SDimitry Andric if (DivI == V || isa<PHINode>(V)) 1173*0b57cec5SDimitry Andric break; 1174*0b57cec5SDimitry Andric Second.insert(V); 1175*0b57cec5SDimitry Andric } 1176*0b57cec5SDimitry Andric 1177*0b57cec5SDimitry Andric for (; I < N; ++I) 1178*0b57cec5SDimitry Andric First.insert(Cycle[I]); 1179*0b57cec5SDimitry Andric } 1180*0b57cec5SDimitry Andric 1181*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI, 1182*0b57cec5SDimitry Andric ValueSeq &Early, ValueSeq &Late) { 1183*0b57cec5SDimitry Andric // Select is an exception, since the condition value does not have to be 1184*0b57cec5SDimitry Andric // classified in the same way as the true/false values. The true/false 1185*0b57cec5SDimitry Andric // values do have to be both early or both late. 1186*0b57cec5SDimitry Andric if (UseI->getOpcode() == Instruction::Select) { 1187*0b57cec5SDimitry Andric Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2); 1188*0b57cec5SDimitry Andric if (Early.count(TV) || Early.count(FV)) { 1189*0b57cec5SDimitry Andric if (Late.count(TV) || Late.count(FV)) 1190*0b57cec5SDimitry Andric return false; 1191*0b57cec5SDimitry Andric Early.insert(UseI); 1192*0b57cec5SDimitry Andric } else if (Late.count(TV) || Late.count(FV)) { 1193*0b57cec5SDimitry Andric if (Early.count(TV) || Early.count(FV)) 1194*0b57cec5SDimitry Andric return false; 1195*0b57cec5SDimitry Andric Late.insert(UseI); 1196*0b57cec5SDimitry Andric } 1197*0b57cec5SDimitry Andric return true; 1198*0b57cec5SDimitry Andric } 1199*0b57cec5SDimitry Andric 1200*0b57cec5SDimitry Andric // Not sure what would be the example of this, but the code below relies 1201*0b57cec5SDimitry Andric // on having at least one operand. 1202*0b57cec5SDimitry Andric if (UseI->getNumOperands() == 0) 1203*0b57cec5SDimitry Andric return true; 1204*0b57cec5SDimitry Andric 1205*0b57cec5SDimitry Andric bool AE = true, AL = true; 1206*0b57cec5SDimitry Andric for (auto &I : UseI->operands()) { 1207*0b57cec5SDimitry Andric if (Early.count(&*I)) 1208*0b57cec5SDimitry Andric AL = false; 1209*0b57cec5SDimitry Andric else if (Late.count(&*I)) 1210*0b57cec5SDimitry Andric AE = false; 1211*0b57cec5SDimitry Andric } 1212*0b57cec5SDimitry Andric // If the operands appear "all early" and "all late" at the same time, 1213*0b57cec5SDimitry Andric // then it means that none of them are actually classified as either. 1214*0b57cec5SDimitry Andric // This is harmless. 1215*0b57cec5SDimitry Andric if (AE && AL) 1216*0b57cec5SDimitry Andric return true; 1217*0b57cec5SDimitry Andric // Conversely, if they are neither "all early" nor "all late", then 1218*0b57cec5SDimitry Andric // we have a mixture of early and late operands that is not a known 1219*0b57cec5SDimitry Andric // exception. 1220*0b57cec5SDimitry Andric if (!AE && !AL) 1221*0b57cec5SDimitry Andric return false; 1222*0b57cec5SDimitry Andric 1223*0b57cec5SDimitry Andric // Check that we have covered the two special cases. 1224*0b57cec5SDimitry Andric assert(AE != AL); 1225*0b57cec5SDimitry Andric 1226*0b57cec5SDimitry Andric if (AE) 1227*0b57cec5SDimitry Andric Early.insert(UseI); 1228*0b57cec5SDimitry Andric else 1229*0b57cec5SDimitry Andric Late.insert(UseI); 1230*0b57cec5SDimitry Andric return true; 1231*0b57cec5SDimitry Andric } 1232*0b57cec5SDimitry Andric 1233*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) { 1234*0b57cec5SDimitry Andric switch (I->getOpcode()) { 1235*0b57cec5SDimitry Andric case Instruction::And: 1236*0b57cec5SDimitry Andric case Instruction::Or: 1237*0b57cec5SDimitry Andric case Instruction::Xor: 1238*0b57cec5SDimitry Andric case Instruction::LShr: 1239*0b57cec5SDimitry Andric case Instruction::Shl: 1240*0b57cec5SDimitry Andric case Instruction::Select: 1241*0b57cec5SDimitry Andric case Instruction::ICmp: 1242*0b57cec5SDimitry Andric case Instruction::PHI: 1243*0b57cec5SDimitry Andric break; 1244*0b57cec5SDimitry Andric default: 1245*0b57cec5SDimitry Andric return false; 1246*0b57cec5SDimitry Andric } 1247*0b57cec5SDimitry Andric return true; 1248*0b57cec5SDimitry Andric } 1249*0b57cec5SDimitry Andric 1250*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V, 1251*0b57cec5SDimitry Andric unsigned IterCount) { 1252*0b57cec5SDimitry Andric auto *T = dyn_cast<IntegerType>(V->getType()); 1253*0b57cec5SDimitry Andric if (!T) 1254*0b57cec5SDimitry Andric return false; 1255*0b57cec5SDimitry Andric 1256*0b57cec5SDimitry Andric KnownBits Known(T->getBitWidth()); 1257*0b57cec5SDimitry Andric computeKnownBits(V, Known, DL); 1258*0b57cec5SDimitry Andric return Known.countMinLeadingZeros() >= IterCount; 1259*0b57cec5SDimitry Andric } 1260*0b57cec5SDimitry Andric 1261*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V, 1262*0b57cec5SDimitry Andric unsigned IterCount) { 1263*0b57cec5SDimitry Andric // Assume that all inputs to the value have the high bits zero. 1264*0b57cec5SDimitry Andric // Check if the value itself preserves the zeros in the high bits. 1265*0b57cec5SDimitry Andric if (auto *C = dyn_cast<ConstantInt>(V)) 1266*0b57cec5SDimitry Andric return C->getValue().countLeadingZeros() >= IterCount; 1267*0b57cec5SDimitry Andric 1268*0b57cec5SDimitry Andric if (auto *I = dyn_cast<Instruction>(V)) { 1269*0b57cec5SDimitry Andric switch (I->getOpcode()) { 1270*0b57cec5SDimitry Andric case Instruction::And: 1271*0b57cec5SDimitry Andric case Instruction::Or: 1272*0b57cec5SDimitry Andric case Instruction::Xor: 1273*0b57cec5SDimitry Andric case Instruction::LShr: 1274*0b57cec5SDimitry Andric case Instruction::Select: 1275*0b57cec5SDimitry Andric case Instruction::ICmp: 1276*0b57cec5SDimitry Andric case Instruction::PHI: 1277*0b57cec5SDimitry Andric case Instruction::ZExt: 1278*0b57cec5SDimitry Andric return true; 1279*0b57cec5SDimitry Andric } 1280*0b57cec5SDimitry Andric } 1281*0b57cec5SDimitry Andric 1282*0b57cec5SDimitry Andric return false; 1283*0b57cec5SDimitry Andric } 1284*0b57cec5SDimitry Andric 1285*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) { 1286*0b57cec5SDimitry Andric unsigned Opc = I->getOpcode(); 1287*0b57cec5SDimitry Andric if (Opc == Instruction::Shl || Opc == Instruction::LShr) 1288*0b57cec5SDimitry Andric return Op != I->getOperand(1); 1289*0b57cec5SDimitry Andric return true; 1290*0b57cec5SDimitry Andric } 1291*0b57cec5SDimitry Andric 1292*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB, 1293*0b57cec5SDimitry Andric BasicBlock *ExitB, unsigned IterCount) { 1294*0b57cec5SDimitry Andric Value *CIV = getCountIV(LoopB); 1295*0b57cec5SDimitry Andric if (CIV == nullptr) 1296*0b57cec5SDimitry Andric return false; 1297*0b57cec5SDimitry Andric auto *CIVTy = dyn_cast<IntegerType>(CIV->getType()); 1298*0b57cec5SDimitry Andric if (CIVTy == nullptr) 1299*0b57cec5SDimitry Andric return false; 1300*0b57cec5SDimitry Andric 1301*0b57cec5SDimitry Andric ValueSeq RShifts; 1302*0b57cec5SDimitry Andric ValueSeq Early, Late, Cycled; 1303*0b57cec5SDimitry Andric 1304*0b57cec5SDimitry Andric // Find all value cycles that contain logical right shifts by 1. 1305*0b57cec5SDimitry Andric for (Instruction &I : *LoopB) { 1306*0b57cec5SDimitry Andric using namespace PatternMatch; 1307*0b57cec5SDimitry Andric 1308*0b57cec5SDimitry Andric Value *V = nullptr; 1309*0b57cec5SDimitry Andric if (!match(&I, m_LShr(m_Value(V), m_One()))) 1310*0b57cec5SDimitry Andric continue; 1311*0b57cec5SDimitry Andric ValueSeq C; 1312*0b57cec5SDimitry Andric if (!findCycle(&I, V, C)) 1313*0b57cec5SDimitry Andric continue; 1314*0b57cec5SDimitry Andric 1315*0b57cec5SDimitry Andric // Found a cycle. 1316*0b57cec5SDimitry Andric C.insert(&I); 1317*0b57cec5SDimitry Andric classifyCycle(&I, C, Early, Late); 1318*0b57cec5SDimitry Andric Cycled.insert(C.begin(), C.end()); 1319*0b57cec5SDimitry Andric RShifts.insert(&I); 1320*0b57cec5SDimitry Andric } 1321*0b57cec5SDimitry Andric 1322*0b57cec5SDimitry Andric // Find the set of all values affected by the shift cycles, i.e. all 1323*0b57cec5SDimitry Andric // cycled values, and (recursively) all their users. 1324*0b57cec5SDimitry Andric ValueSeq Users(Cycled.begin(), Cycled.end()); 1325*0b57cec5SDimitry Andric for (unsigned i = 0; i < Users.size(); ++i) { 1326*0b57cec5SDimitry Andric Value *V = Users[i]; 1327*0b57cec5SDimitry Andric if (!isa<IntegerType>(V->getType())) 1328*0b57cec5SDimitry Andric return false; 1329*0b57cec5SDimitry Andric auto *R = cast<Instruction>(V); 1330*0b57cec5SDimitry Andric // If the instruction does not commute with shifts, the loop cannot 1331*0b57cec5SDimitry Andric // be unshifted. 1332*0b57cec5SDimitry Andric if (!commutesWithShift(R)) 1333*0b57cec5SDimitry Andric return false; 1334*0b57cec5SDimitry Andric for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) { 1335*0b57cec5SDimitry Andric auto *T = cast<Instruction>(*I); 1336*0b57cec5SDimitry Andric // Skip users from outside of the loop. They will be handled later. 1337*0b57cec5SDimitry Andric // Also, skip the right-shifts and phi nodes, since they mix early 1338*0b57cec5SDimitry Andric // and late values. 1339*0b57cec5SDimitry Andric if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T)) 1340*0b57cec5SDimitry Andric continue; 1341*0b57cec5SDimitry Andric 1342*0b57cec5SDimitry Andric Users.insert(T); 1343*0b57cec5SDimitry Andric if (!classifyInst(T, Early, Late)) 1344*0b57cec5SDimitry Andric return false; 1345*0b57cec5SDimitry Andric } 1346*0b57cec5SDimitry Andric } 1347*0b57cec5SDimitry Andric 1348*0b57cec5SDimitry Andric if (Users.empty()) 1349*0b57cec5SDimitry Andric return false; 1350*0b57cec5SDimitry Andric 1351*0b57cec5SDimitry Andric // Verify that high bits remain zero. 1352*0b57cec5SDimitry Andric ValueSeq Internal(Users.begin(), Users.end()); 1353*0b57cec5SDimitry Andric ValueSeq Inputs; 1354*0b57cec5SDimitry Andric for (unsigned i = 0; i < Internal.size(); ++i) { 1355*0b57cec5SDimitry Andric auto *R = dyn_cast<Instruction>(Internal[i]); 1356*0b57cec5SDimitry Andric if (!R) 1357*0b57cec5SDimitry Andric continue; 1358*0b57cec5SDimitry Andric for (Value *Op : R->operands()) { 1359*0b57cec5SDimitry Andric auto *T = dyn_cast<Instruction>(Op); 1360*0b57cec5SDimitry Andric if (T && T->getParent() != LoopB) 1361*0b57cec5SDimitry Andric Inputs.insert(Op); 1362*0b57cec5SDimitry Andric else 1363*0b57cec5SDimitry Andric Internal.insert(Op); 1364*0b57cec5SDimitry Andric } 1365*0b57cec5SDimitry Andric } 1366*0b57cec5SDimitry Andric for (Value *V : Inputs) 1367*0b57cec5SDimitry Andric if (!highBitsAreZero(V, IterCount)) 1368*0b57cec5SDimitry Andric return false; 1369*0b57cec5SDimitry Andric for (Value *V : Internal) 1370*0b57cec5SDimitry Andric if (!keepsHighBitsZero(V, IterCount)) 1371*0b57cec5SDimitry Andric return false; 1372*0b57cec5SDimitry Andric 1373*0b57cec5SDimitry Andric // Finally, the work can be done. Unshift each user. 1374*0b57cec5SDimitry Andric IRBuilder<> IRB(LoopB); 1375*0b57cec5SDimitry Andric std::map<Value*,Value*> ShiftMap; 1376*0b57cec5SDimitry Andric 1377*0b57cec5SDimitry Andric using CastMapType = std::map<std::pair<Value *, Type *>, Value *>; 1378*0b57cec5SDimitry Andric 1379*0b57cec5SDimitry Andric CastMapType CastMap; 1380*0b57cec5SDimitry Andric 1381*0b57cec5SDimitry Andric auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V, 1382*0b57cec5SDimitry Andric IntegerType *Ty) -> Value* { 1383*0b57cec5SDimitry Andric auto H = CM.find(std::make_pair(V, Ty)); 1384*0b57cec5SDimitry Andric if (H != CM.end()) 1385*0b57cec5SDimitry Andric return H->second; 1386*0b57cec5SDimitry Andric Value *CV = IRB.CreateIntCast(V, Ty, false); 1387*0b57cec5SDimitry Andric CM.insert(std::make_pair(std::make_pair(V, Ty), CV)); 1388*0b57cec5SDimitry Andric return CV; 1389*0b57cec5SDimitry Andric }; 1390*0b57cec5SDimitry Andric 1391*0b57cec5SDimitry Andric for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) { 1392*0b57cec5SDimitry Andric using namespace PatternMatch; 1393*0b57cec5SDimitry Andric 1394*0b57cec5SDimitry Andric if (isa<PHINode>(I) || !Users.count(&*I)) 1395*0b57cec5SDimitry Andric continue; 1396*0b57cec5SDimitry Andric 1397*0b57cec5SDimitry Andric // Match lshr x, 1. 1398*0b57cec5SDimitry Andric Value *V = nullptr; 1399*0b57cec5SDimitry Andric if (match(&*I, m_LShr(m_Value(V), m_One()))) { 1400*0b57cec5SDimitry Andric replaceAllUsesOfWithIn(&*I, V, LoopB); 1401*0b57cec5SDimitry Andric continue; 1402*0b57cec5SDimitry Andric } 1403*0b57cec5SDimitry Andric // For each non-cycled operand, replace it with the corresponding 1404*0b57cec5SDimitry Andric // value shifted left. 1405*0b57cec5SDimitry Andric for (auto &J : I->operands()) { 1406*0b57cec5SDimitry Andric Value *Op = J.get(); 1407*0b57cec5SDimitry Andric if (!isOperandShifted(&*I, Op)) 1408*0b57cec5SDimitry Andric continue; 1409*0b57cec5SDimitry Andric if (Users.count(Op)) 1410*0b57cec5SDimitry Andric continue; 1411*0b57cec5SDimitry Andric // Skip shifting zeros. 1412*0b57cec5SDimitry Andric if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero()) 1413*0b57cec5SDimitry Andric continue; 1414*0b57cec5SDimitry Andric // Check if we have already generated a shift for this value. 1415*0b57cec5SDimitry Andric auto F = ShiftMap.find(Op); 1416*0b57cec5SDimitry Andric Value *W = (F != ShiftMap.end()) ? F->second : nullptr; 1417*0b57cec5SDimitry Andric if (W == nullptr) { 1418*0b57cec5SDimitry Andric IRB.SetInsertPoint(&*I); 1419*0b57cec5SDimitry Andric // First, the shift amount will be CIV or CIV+1, depending on 1420*0b57cec5SDimitry Andric // whether the value is early or late. Instead of creating CIV+1, 1421*0b57cec5SDimitry Andric // do a single shift of the value. 1422*0b57cec5SDimitry Andric Value *ShAmt = CIV, *ShVal = Op; 1423*0b57cec5SDimitry Andric auto *VTy = cast<IntegerType>(ShVal->getType()); 1424*0b57cec5SDimitry Andric auto *ATy = cast<IntegerType>(ShAmt->getType()); 1425*0b57cec5SDimitry Andric if (Late.count(&*I)) 1426*0b57cec5SDimitry Andric ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1)); 1427*0b57cec5SDimitry Andric // Second, the types of the shifted value and the shift amount 1428*0b57cec5SDimitry Andric // must match. 1429*0b57cec5SDimitry Andric if (VTy != ATy) { 1430*0b57cec5SDimitry Andric if (VTy->getBitWidth() < ATy->getBitWidth()) 1431*0b57cec5SDimitry Andric ShVal = upcast(CastMap, IRB, ShVal, ATy); 1432*0b57cec5SDimitry Andric else 1433*0b57cec5SDimitry Andric ShAmt = upcast(CastMap, IRB, ShAmt, VTy); 1434*0b57cec5SDimitry Andric } 1435*0b57cec5SDimitry Andric // Ready to generate the shift and memoize it. 1436*0b57cec5SDimitry Andric W = IRB.CreateShl(ShVal, ShAmt); 1437*0b57cec5SDimitry Andric ShiftMap.insert(std::make_pair(Op, W)); 1438*0b57cec5SDimitry Andric } 1439*0b57cec5SDimitry Andric I->replaceUsesOfWith(Op, W); 1440*0b57cec5SDimitry Andric } 1441*0b57cec5SDimitry Andric } 1442*0b57cec5SDimitry Andric 1443*0b57cec5SDimitry Andric // Update the users outside of the loop to account for having left 1444*0b57cec5SDimitry Andric // shifts. They would normally be shifted right in the loop, so shift 1445*0b57cec5SDimitry Andric // them right after the loop exit. 1446*0b57cec5SDimitry Andric // Take advantage of the loop-closed SSA form, which has all the post- 1447*0b57cec5SDimitry Andric // loop values in phi nodes. 1448*0b57cec5SDimitry Andric IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt()); 1449*0b57cec5SDimitry Andric for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) { 1450*0b57cec5SDimitry Andric if (!isa<PHINode>(P)) 1451*0b57cec5SDimitry Andric break; 1452*0b57cec5SDimitry Andric auto *PN = cast<PHINode>(P); 1453*0b57cec5SDimitry Andric Value *U = PN->getIncomingValueForBlock(LoopB); 1454*0b57cec5SDimitry Andric if (!Users.count(U)) 1455*0b57cec5SDimitry Andric continue; 1456*0b57cec5SDimitry Andric Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount)); 1457*0b57cec5SDimitry Andric PN->replaceAllUsesWith(S); 1458*0b57cec5SDimitry Andric // The above RAUW will create 1459*0b57cec5SDimitry Andric // S = lshr S, IterCount 1460*0b57cec5SDimitry Andric // so we need to fix it back into 1461*0b57cec5SDimitry Andric // S = lshr PN, IterCount 1462*0b57cec5SDimitry Andric cast<User>(S)->replaceUsesOfWith(S, PN); 1463*0b57cec5SDimitry Andric } 1464*0b57cec5SDimitry Andric 1465*0b57cec5SDimitry Andric return true; 1466*0b57cec5SDimitry Andric } 1467*0b57cec5SDimitry Andric 1468*0b57cec5SDimitry Andric void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) { 1469*0b57cec5SDimitry Andric for (auto &I : *LoopB) 1470*0b57cec5SDimitry Andric if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT})) 1471*0b57cec5SDimitry Andric I.replaceAllUsesWith(SV); 1472*0b57cec5SDimitry Andric 1473*0b57cec5SDimitry Andric for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) { 1474*0b57cec5SDimitry Andric N = std::next(I); 1475*0b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI); 1476*0b57cec5SDimitry Andric } 1477*0b57cec5SDimitry Andric } 1478*0b57cec5SDimitry Andric 1479*0b57cec5SDimitry Andric unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) { 1480*0b57cec5SDimitry Andric // Arrays of coefficients of Q and the inverse, C. 1481*0b57cec5SDimitry Andric // Q[i] = coefficient at x^i. 1482*0b57cec5SDimitry Andric std::array<char,32> Q, C; 1483*0b57cec5SDimitry Andric 1484*0b57cec5SDimitry Andric for (unsigned i = 0; i < 32; ++i) { 1485*0b57cec5SDimitry Andric Q[i] = QP & 1; 1486*0b57cec5SDimitry Andric QP >>= 1; 1487*0b57cec5SDimitry Andric } 1488*0b57cec5SDimitry Andric assert(Q[0] == 1); 1489*0b57cec5SDimitry Andric 1490*0b57cec5SDimitry Andric // Find C, such that 1491*0b57cec5SDimitry Andric // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1 1492*0b57cec5SDimitry Andric // 1493*0b57cec5SDimitry Andric // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the 1494*0b57cec5SDimitry Andric // operations * and + are & and ^ respectively. 1495*0b57cec5SDimitry Andric // 1496*0b57cec5SDimitry Andric // Find C[i] recursively, by comparing i-th coefficient in the product 1497*0b57cec5SDimitry Andric // with 0 (or 1 for i=0). 1498*0b57cec5SDimitry Andric // 1499*0b57cec5SDimitry Andric // C[0] = 1, since C[0] = Q[0], and Q[0] = 1. 1500*0b57cec5SDimitry Andric C[0] = 1; 1501*0b57cec5SDimitry Andric for (unsigned i = 1; i < 32; ++i) { 1502*0b57cec5SDimitry Andric // Solve for C[i] in: 1503*0b57cec5SDimitry Andric // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0 1504*0b57cec5SDimitry Andric // This is equivalent to 1505*0b57cec5SDimitry Andric // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0 1506*0b57cec5SDimitry Andric // which is 1507*0b57cec5SDimitry Andric // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i] 1508*0b57cec5SDimitry Andric unsigned T = 0; 1509*0b57cec5SDimitry Andric for (unsigned j = 0; j < i; ++j) 1510*0b57cec5SDimitry Andric T = T ^ (C[j] & Q[i-j]); 1511*0b57cec5SDimitry Andric C[i] = T; 1512*0b57cec5SDimitry Andric } 1513*0b57cec5SDimitry Andric 1514*0b57cec5SDimitry Andric unsigned QV = 0; 1515*0b57cec5SDimitry Andric for (unsigned i = 0; i < 32; ++i) 1516*0b57cec5SDimitry Andric if (C[i]) 1517*0b57cec5SDimitry Andric QV |= (1 << i); 1518*0b57cec5SDimitry Andric 1519*0b57cec5SDimitry Andric return QV; 1520*0b57cec5SDimitry Andric } 1521*0b57cec5SDimitry Andric 1522*0b57cec5SDimitry Andric Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At, 1523*0b57cec5SDimitry Andric ParsedValues &PV) { 1524*0b57cec5SDimitry Andric IRBuilder<> B(&*At); 1525*0b57cec5SDimitry Andric Module *M = At->getParent()->getParent()->getParent(); 1526*0b57cec5SDimitry Andric Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw); 1527*0b57cec5SDimitry Andric 1528*0b57cec5SDimitry Andric Value *P = PV.P, *Q = PV.Q, *P0 = P; 1529*0b57cec5SDimitry Andric unsigned IC = PV.IterCount; 1530*0b57cec5SDimitry Andric 1531*0b57cec5SDimitry Andric if (PV.M != nullptr) 1532*0b57cec5SDimitry Andric P0 = P = B.CreateXor(P, PV.M); 1533*0b57cec5SDimitry Andric 1534*0b57cec5SDimitry Andric // Create a bit mask to clear the high bits beyond IterCount. 1535*0b57cec5SDimitry Andric auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC)); 1536*0b57cec5SDimitry Andric 1537*0b57cec5SDimitry Andric if (PV.IterCount != 32) 1538*0b57cec5SDimitry Andric P = B.CreateAnd(P, BMI); 1539*0b57cec5SDimitry Andric 1540*0b57cec5SDimitry Andric if (PV.Inv) { 1541*0b57cec5SDimitry Andric auto *QI = dyn_cast<ConstantInt>(PV.Q); 1542*0b57cec5SDimitry Andric assert(QI && QI->getBitWidth() <= 32); 1543*0b57cec5SDimitry Andric 1544*0b57cec5SDimitry Andric // Again, clearing bits beyond IterCount. 1545*0b57cec5SDimitry Andric unsigned M = (1 << PV.IterCount) - 1; 1546*0b57cec5SDimitry Andric unsigned Tmp = (QI->getZExtValue() | 1) & M; 1547*0b57cec5SDimitry Andric unsigned QV = getInverseMxN(Tmp) & M; 1548*0b57cec5SDimitry Andric auto *QVI = ConstantInt::get(QI->getType(), QV); 1549*0b57cec5SDimitry Andric P = B.CreateCall(PMF, {P, QVI}); 1550*0b57cec5SDimitry Andric P = B.CreateTrunc(P, QI->getType()); 1551*0b57cec5SDimitry Andric if (IC != 32) 1552*0b57cec5SDimitry Andric P = B.CreateAnd(P, BMI); 1553*0b57cec5SDimitry Andric } 1554*0b57cec5SDimitry Andric 1555*0b57cec5SDimitry Andric Value *R = B.CreateCall(PMF, {P, Q}); 1556*0b57cec5SDimitry Andric 1557*0b57cec5SDimitry Andric if (PV.M != nullptr) 1558*0b57cec5SDimitry Andric R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false)); 1559*0b57cec5SDimitry Andric 1560*0b57cec5SDimitry Andric return R; 1561*0b57cec5SDimitry Andric } 1562*0b57cec5SDimitry Andric 1563*0b57cec5SDimitry Andric static bool hasZeroSignBit(const Value *V) { 1564*0b57cec5SDimitry Andric if (const auto *CI = dyn_cast<const ConstantInt>(V)) 1565*0b57cec5SDimitry Andric return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0; 1566*0b57cec5SDimitry Andric const Instruction *I = dyn_cast<const Instruction>(V); 1567*0b57cec5SDimitry Andric if (!I) 1568*0b57cec5SDimitry Andric return false; 1569*0b57cec5SDimitry Andric switch (I->getOpcode()) { 1570*0b57cec5SDimitry Andric case Instruction::LShr: 1571*0b57cec5SDimitry Andric if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1))) 1572*0b57cec5SDimitry Andric return SI->getZExtValue() > 0; 1573*0b57cec5SDimitry Andric return false; 1574*0b57cec5SDimitry Andric case Instruction::Or: 1575*0b57cec5SDimitry Andric case Instruction::Xor: 1576*0b57cec5SDimitry Andric return hasZeroSignBit(I->getOperand(0)) && 1577*0b57cec5SDimitry Andric hasZeroSignBit(I->getOperand(1)); 1578*0b57cec5SDimitry Andric case Instruction::And: 1579*0b57cec5SDimitry Andric return hasZeroSignBit(I->getOperand(0)) || 1580*0b57cec5SDimitry Andric hasZeroSignBit(I->getOperand(1)); 1581*0b57cec5SDimitry Andric } 1582*0b57cec5SDimitry Andric return false; 1583*0b57cec5SDimitry Andric } 1584*0b57cec5SDimitry Andric 1585*0b57cec5SDimitry Andric void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) { 1586*0b57cec5SDimitry Andric S.addRule("sink-zext", 1587*0b57cec5SDimitry Andric // Sink zext past bitwise operations. 1588*0b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 1589*0b57cec5SDimitry Andric if (I->getOpcode() != Instruction::ZExt) 1590*0b57cec5SDimitry Andric return nullptr; 1591*0b57cec5SDimitry Andric Instruction *T = dyn_cast<Instruction>(I->getOperand(0)); 1592*0b57cec5SDimitry Andric if (!T) 1593*0b57cec5SDimitry Andric return nullptr; 1594*0b57cec5SDimitry Andric switch (T->getOpcode()) { 1595*0b57cec5SDimitry Andric case Instruction::And: 1596*0b57cec5SDimitry Andric case Instruction::Or: 1597*0b57cec5SDimitry Andric case Instruction::Xor: 1598*0b57cec5SDimitry Andric break; 1599*0b57cec5SDimitry Andric default: 1600*0b57cec5SDimitry Andric return nullptr; 1601*0b57cec5SDimitry Andric } 1602*0b57cec5SDimitry Andric IRBuilder<> B(Ctx); 1603*0b57cec5SDimitry Andric return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(), 1604*0b57cec5SDimitry Andric B.CreateZExt(T->getOperand(0), I->getType()), 1605*0b57cec5SDimitry Andric B.CreateZExt(T->getOperand(1), I->getType())); 1606*0b57cec5SDimitry Andric }); 1607*0b57cec5SDimitry Andric S.addRule("xor/and -> and/xor", 1608*0b57cec5SDimitry Andric // (xor (and x a) (and y a)) -> (and (xor x y) a) 1609*0b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 1610*0b57cec5SDimitry Andric if (I->getOpcode() != Instruction::Xor) 1611*0b57cec5SDimitry Andric return nullptr; 1612*0b57cec5SDimitry Andric Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0)); 1613*0b57cec5SDimitry Andric Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1)); 1614*0b57cec5SDimitry Andric if (!And0 || !And1) 1615*0b57cec5SDimitry Andric return nullptr; 1616*0b57cec5SDimitry Andric if (And0->getOpcode() != Instruction::And || 1617*0b57cec5SDimitry Andric And1->getOpcode() != Instruction::And) 1618*0b57cec5SDimitry Andric return nullptr; 1619*0b57cec5SDimitry Andric if (And0->getOperand(1) != And1->getOperand(1)) 1620*0b57cec5SDimitry Andric return nullptr; 1621*0b57cec5SDimitry Andric IRBuilder<> B(Ctx); 1622*0b57cec5SDimitry Andric return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)), 1623*0b57cec5SDimitry Andric And0->getOperand(1)); 1624*0b57cec5SDimitry Andric }); 1625*0b57cec5SDimitry Andric S.addRule("sink binop into select", 1626*0b57cec5SDimitry Andric // (Op (select c x y) z) -> (select c (Op x z) (Op y z)) 1627*0b57cec5SDimitry Andric // (Op x (select c y z)) -> (select c (Op x y) (Op x z)) 1628*0b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 1629*0b57cec5SDimitry Andric BinaryOperator *BO = dyn_cast<BinaryOperator>(I); 1630*0b57cec5SDimitry Andric if (!BO) 1631*0b57cec5SDimitry Andric return nullptr; 1632*0b57cec5SDimitry Andric Instruction::BinaryOps Op = BO->getOpcode(); 1633*0b57cec5SDimitry Andric if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) { 1634*0b57cec5SDimitry Andric IRBuilder<> B(Ctx); 1635*0b57cec5SDimitry Andric Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue(); 1636*0b57cec5SDimitry Andric Value *Z = BO->getOperand(1); 1637*0b57cec5SDimitry Andric return B.CreateSelect(Sel->getCondition(), 1638*0b57cec5SDimitry Andric B.CreateBinOp(Op, X, Z), 1639*0b57cec5SDimitry Andric B.CreateBinOp(Op, Y, Z)); 1640*0b57cec5SDimitry Andric } 1641*0b57cec5SDimitry Andric if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) { 1642*0b57cec5SDimitry Andric IRBuilder<> B(Ctx); 1643*0b57cec5SDimitry Andric Value *X = BO->getOperand(0); 1644*0b57cec5SDimitry Andric Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue(); 1645*0b57cec5SDimitry Andric return B.CreateSelect(Sel->getCondition(), 1646*0b57cec5SDimitry Andric B.CreateBinOp(Op, X, Y), 1647*0b57cec5SDimitry Andric B.CreateBinOp(Op, X, Z)); 1648*0b57cec5SDimitry Andric } 1649*0b57cec5SDimitry Andric return nullptr; 1650*0b57cec5SDimitry Andric }); 1651*0b57cec5SDimitry Andric S.addRule("fold select-select", 1652*0b57cec5SDimitry Andric // (select c (select c x y) z) -> (select c x z) 1653*0b57cec5SDimitry Andric // (select c x (select c y z)) -> (select c x z) 1654*0b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 1655*0b57cec5SDimitry Andric SelectInst *Sel = dyn_cast<SelectInst>(I); 1656*0b57cec5SDimitry Andric if (!Sel) 1657*0b57cec5SDimitry Andric return nullptr; 1658*0b57cec5SDimitry Andric IRBuilder<> B(Ctx); 1659*0b57cec5SDimitry Andric Value *C = Sel->getCondition(); 1660*0b57cec5SDimitry Andric if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) { 1661*0b57cec5SDimitry Andric if (Sel0->getCondition() == C) 1662*0b57cec5SDimitry Andric return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue()); 1663*0b57cec5SDimitry Andric } 1664*0b57cec5SDimitry Andric if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) { 1665*0b57cec5SDimitry Andric if (Sel1->getCondition() == C) 1666*0b57cec5SDimitry Andric return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue()); 1667*0b57cec5SDimitry Andric } 1668*0b57cec5SDimitry Andric return nullptr; 1669*0b57cec5SDimitry Andric }); 1670*0b57cec5SDimitry Andric S.addRule("or-signbit -> xor-signbit", 1671*0b57cec5SDimitry Andric // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0) 1672*0b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 1673*0b57cec5SDimitry Andric if (I->getOpcode() != Instruction::Or) 1674*0b57cec5SDimitry Andric return nullptr; 1675*0b57cec5SDimitry Andric ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1)); 1676*0b57cec5SDimitry Andric if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit()) 1677*0b57cec5SDimitry Andric return nullptr; 1678*0b57cec5SDimitry Andric if (!hasZeroSignBit(I->getOperand(0))) 1679*0b57cec5SDimitry Andric return nullptr; 1680*0b57cec5SDimitry Andric return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb); 1681*0b57cec5SDimitry Andric }); 1682*0b57cec5SDimitry Andric S.addRule("sink lshr into binop", 1683*0b57cec5SDimitry Andric // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c)) 1684*0b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 1685*0b57cec5SDimitry Andric if (I->getOpcode() != Instruction::LShr) 1686*0b57cec5SDimitry Andric return nullptr; 1687*0b57cec5SDimitry Andric BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0)); 1688*0b57cec5SDimitry Andric if (!BitOp) 1689*0b57cec5SDimitry Andric return nullptr; 1690*0b57cec5SDimitry Andric switch (BitOp->getOpcode()) { 1691*0b57cec5SDimitry Andric case Instruction::And: 1692*0b57cec5SDimitry Andric case Instruction::Or: 1693*0b57cec5SDimitry Andric case Instruction::Xor: 1694*0b57cec5SDimitry Andric break; 1695*0b57cec5SDimitry Andric default: 1696*0b57cec5SDimitry Andric return nullptr; 1697*0b57cec5SDimitry Andric } 1698*0b57cec5SDimitry Andric IRBuilder<> B(Ctx); 1699*0b57cec5SDimitry Andric Value *S = I->getOperand(1); 1700*0b57cec5SDimitry Andric return B.CreateBinOp(BitOp->getOpcode(), 1701*0b57cec5SDimitry Andric B.CreateLShr(BitOp->getOperand(0), S), 1702*0b57cec5SDimitry Andric B.CreateLShr(BitOp->getOperand(1), S)); 1703*0b57cec5SDimitry Andric }); 1704*0b57cec5SDimitry Andric S.addRule("expose bitop-const", 1705*0b57cec5SDimitry Andric // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b)) 1706*0b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 1707*0b57cec5SDimitry Andric auto IsBitOp = [](unsigned Op) -> bool { 1708*0b57cec5SDimitry Andric switch (Op) { 1709*0b57cec5SDimitry Andric case Instruction::And: 1710*0b57cec5SDimitry Andric case Instruction::Or: 1711*0b57cec5SDimitry Andric case Instruction::Xor: 1712*0b57cec5SDimitry Andric return true; 1713*0b57cec5SDimitry Andric } 1714*0b57cec5SDimitry Andric return false; 1715*0b57cec5SDimitry Andric }; 1716*0b57cec5SDimitry Andric BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I); 1717*0b57cec5SDimitry Andric if (!BitOp1 || !IsBitOp(BitOp1->getOpcode())) 1718*0b57cec5SDimitry Andric return nullptr; 1719*0b57cec5SDimitry Andric BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0)); 1720*0b57cec5SDimitry Andric if (!BitOp2 || !IsBitOp(BitOp2->getOpcode())) 1721*0b57cec5SDimitry Andric return nullptr; 1722*0b57cec5SDimitry Andric ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1)); 1723*0b57cec5SDimitry Andric ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1)); 1724*0b57cec5SDimitry Andric if (!CA || !CB) 1725*0b57cec5SDimitry Andric return nullptr; 1726*0b57cec5SDimitry Andric IRBuilder<> B(Ctx); 1727*0b57cec5SDimitry Andric Value *X = BitOp2->getOperand(0); 1728*0b57cec5SDimitry Andric return B.CreateBinOp(BitOp2->getOpcode(), X, 1729*0b57cec5SDimitry Andric B.CreateBinOp(BitOp1->getOpcode(), CA, CB)); 1730*0b57cec5SDimitry Andric }); 1731*0b57cec5SDimitry Andric } 1732*0b57cec5SDimitry Andric 1733*0b57cec5SDimitry Andric void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) { 1734*0b57cec5SDimitry Andric S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a", 1735*0b57cec5SDimitry Andric [](Instruction *I, LLVMContext &Ctx) -> Value* { 1736*0b57cec5SDimitry Andric if (I->getOpcode() != Instruction::And) 1737*0b57cec5SDimitry Andric return nullptr; 1738*0b57cec5SDimitry Andric Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0)); 1739*0b57cec5SDimitry Andric ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1)); 1740*0b57cec5SDimitry Andric if (!Xor || !C0) 1741*0b57cec5SDimitry Andric return nullptr; 1742*0b57cec5SDimitry Andric if (Xor->getOpcode() != Instruction::Xor) 1743*0b57cec5SDimitry Andric return nullptr; 1744*0b57cec5SDimitry Andric Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0)); 1745*0b57cec5SDimitry Andric Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1)); 1746*0b57cec5SDimitry Andric // Pick the first non-null and. 1747*0b57cec5SDimitry Andric if (!And0 || And0->getOpcode() != Instruction::And) 1748*0b57cec5SDimitry Andric std::swap(And0, And1); 1749*0b57cec5SDimitry Andric ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1)); 1750*0b57cec5SDimitry Andric if (!C1) 1751*0b57cec5SDimitry Andric return nullptr; 1752*0b57cec5SDimitry Andric uint32_t V0 = C0->getZExtValue(); 1753*0b57cec5SDimitry Andric uint32_t V1 = C1->getZExtValue(); 1754*0b57cec5SDimitry Andric if (V0 != (V0 & V1)) 1755*0b57cec5SDimitry Andric return nullptr; 1756*0b57cec5SDimitry Andric IRBuilder<> B(Ctx); 1757*0b57cec5SDimitry Andric return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0); 1758*0b57cec5SDimitry Andric }); 1759*0b57cec5SDimitry Andric } 1760*0b57cec5SDimitry Andric 1761*0b57cec5SDimitry Andric bool PolynomialMultiplyRecognize::recognize() { 1762*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n" 1763*0b57cec5SDimitry Andric << *CurLoop << '\n'); 1764*0b57cec5SDimitry Andric // Restrictions: 1765*0b57cec5SDimitry Andric // - The loop must consist of a single block. 1766*0b57cec5SDimitry Andric // - The iteration count must be known at compile-time. 1767*0b57cec5SDimitry Andric // - The loop must have an induction variable starting from 0, and 1768*0b57cec5SDimitry Andric // incremented in each iteration of the loop. 1769*0b57cec5SDimitry Andric BasicBlock *LoopB = CurLoop->getHeader(); 1770*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB); 1771*0b57cec5SDimitry Andric 1772*0b57cec5SDimitry Andric if (LoopB != CurLoop->getLoopLatch()) 1773*0b57cec5SDimitry Andric return false; 1774*0b57cec5SDimitry Andric BasicBlock *ExitB = CurLoop->getExitBlock(); 1775*0b57cec5SDimitry Andric if (ExitB == nullptr) 1776*0b57cec5SDimitry Andric return false; 1777*0b57cec5SDimitry Andric BasicBlock *EntryB = CurLoop->getLoopPreheader(); 1778*0b57cec5SDimitry Andric if (EntryB == nullptr) 1779*0b57cec5SDimitry Andric return false; 1780*0b57cec5SDimitry Andric 1781*0b57cec5SDimitry Andric unsigned IterCount = 0; 1782*0b57cec5SDimitry Andric const SCEV *CT = SE.getBackedgeTakenCount(CurLoop); 1783*0b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(CT)) 1784*0b57cec5SDimitry Andric return false; 1785*0b57cec5SDimitry Andric if (auto *CV = dyn_cast<SCEVConstant>(CT)) 1786*0b57cec5SDimitry Andric IterCount = CV->getValue()->getZExtValue() + 1; 1787*0b57cec5SDimitry Andric 1788*0b57cec5SDimitry Andric Value *CIV = getCountIV(LoopB); 1789*0b57cec5SDimitry Andric ParsedValues PV; 1790*0b57cec5SDimitry Andric Simplifier PreSimp; 1791*0b57cec5SDimitry Andric PV.IterCount = IterCount; 1792*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount 1793*0b57cec5SDimitry Andric << '\n'); 1794*0b57cec5SDimitry Andric 1795*0b57cec5SDimitry Andric setupPreSimplifier(PreSimp); 1796*0b57cec5SDimitry Andric 1797*0b57cec5SDimitry Andric // Perform a preliminary scan of select instructions to see if any of them 1798*0b57cec5SDimitry Andric // looks like a generator of the polynomial multiply steps. Assume that a 1799*0b57cec5SDimitry Andric // loop can only contain a single transformable operation, so stop the 1800*0b57cec5SDimitry Andric // traversal after the first reasonable candidate was found. 1801*0b57cec5SDimitry Andric // XXX: Currently this approach can modify the loop before being 100% sure 1802*0b57cec5SDimitry Andric // that the transformation can be carried out. 1803*0b57cec5SDimitry Andric bool FoundPreScan = false; 1804*0b57cec5SDimitry Andric auto FeedsPHI = [LoopB](const Value *V) -> bool { 1805*0b57cec5SDimitry Andric for (const Value *U : V->users()) { 1806*0b57cec5SDimitry Andric if (const auto *P = dyn_cast<const PHINode>(U)) 1807*0b57cec5SDimitry Andric if (P->getParent() == LoopB) 1808*0b57cec5SDimitry Andric return true; 1809*0b57cec5SDimitry Andric } 1810*0b57cec5SDimitry Andric return false; 1811*0b57cec5SDimitry Andric }; 1812*0b57cec5SDimitry Andric for (Instruction &In : *LoopB) { 1813*0b57cec5SDimitry Andric SelectInst *SI = dyn_cast<SelectInst>(&In); 1814*0b57cec5SDimitry Andric if (!SI || !FeedsPHI(SI)) 1815*0b57cec5SDimitry Andric continue; 1816*0b57cec5SDimitry Andric 1817*0b57cec5SDimitry Andric Simplifier::Context C(SI); 1818*0b57cec5SDimitry Andric Value *T = PreSimp.simplify(C); 1819*0b57cec5SDimitry Andric SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI; 1820*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n'); 1821*0b57cec5SDimitry Andric if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) { 1822*0b57cec5SDimitry Andric FoundPreScan = true; 1823*0b57cec5SDimitry Andric if (SelI != SI) { 1824*0b57cec5SDimitry Andric Value *NewSel = C.materialize(LoopB, SI->getIterator()); 1825*0b57cec5SDimitry Andric SI->replaceAllUsesWith(NewSel); 1826*0b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); 1827*0b57cec5SDimitry Andric } 1828*0b57cec5SDimitry Andric break; 1829*0b57cec5SDimitry Andric } 1830*0b57cec5SDimitry Andric } 1831*0b57cec5SDimitry Andric 1832*0b57cec5SDimitry Andric if (!FoundPreScan) { 1833*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n"); 1834*0b57cec5SDimitry Andric return false; 1835*0b57cec5SDimitry Andric } 1836*0b57cec5SDimitry Andric 1837*0b57cec5SDimitry Andric if (!PV.Left) { 1838*0b57cec5SDimitry Andric // The right shift version actually only returns the higher bits of 1839*0b57cec5SDimitry Andric // the result (each iteration discards the LSB). If we want to convert it 1840*0b57cec5SDimitry Andric // to a left-shifting loop, the working data type must be at least as 1841*0b57cec5SDimitry Andric // wide as the target's pmpy instruction. 1842*0b57cec5SDimitry Andric if (!promoteTypes(LoopB, ExitB)) 1843*0b57cec5SDimitry Andric return false; 1844*0b57cec5SDimitry Andric // Run post-promotion simplifications. 1845*0b57cec5SDimitry Andric Simplifier PostSimp; 1846*0b57cec5SDimitry Andric setupPostSimplifier(PostSimp); 1847*0b57cec5SDimitry Andric for (Instruction &In : *LoopB) { 1848*0b57cec5SDimitry Andric SelectInst *SI = dyn_cast<SelectInst>(&In); 1849*0b57cec5SDimitry Andric if (!SI || !FeedsPHI(SI)) 1850*0b57cec5SDimitry Andric continue; 1851*0b57cec5SDimitry Andric Simplifier::Context C(SI); 1852*0b57cec5SDimitry Andric Value *T = PostSimp.simplify(C); 1853*0b57cec5SDimitry Andric SelectInst *SelI = dyn_cast_or_null<SelectInst>(T); 1854*0b57cec5SDimitry Andric if (SelI != SI) { 1855*0b57cec5SDimitry Andric Value *NewSel = C.materialize(LoopB, SI->getIterator()); 1856*0b57cec5SDimitry Andric SI->replaceAllUsesWith(NewSel); 1857*0b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); 1858*0b57cec5SDimitry Andric } 1859*0b57cec5SDimitry Andric break; 1860*0b57cec5SDimitry Andric } 1861*0b57cec5SDimitry Andric 1862*0b57cec5SDimitry Andric if (!convertShiftsToLeft(LoopB, ExitB, IterCount)) 1863*0b57cec5SDimitry Andric return false; 1864*0b57cec5SDimitry Andric cleanupLoopBody(LoopB); 1865*0b57cec5SDimitry Andric } 1866*0b57cec5SDimitry Andric 1867*0b57cec5SDimitry Andric // Scan the loop again, find the generating select instruction. 1868*0b57cec5SDimitry Andric bool FoundScan = false; 1869*0b57cec5SDimitry Andric for (Instruction &In : *LoopB) { 1870*0b57cec5SDimitry Andric SelectInst *SelI = dyn_cast<SelectInst>(&In); 1871*0b57cec5SDimitry Andric if (!SelI) 1872*0b57cec5SDimitry Andric continue; 1873*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n'); 1874*0b57cec5SDimitry Andric FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false); 1875*0b57cec5SDimitry Andric if (FoundScan) 1876*0b57cec5SDimitry Andric break; 1877*0b57cec5SDimitry Andric } 1878*0b57cec5SDimitry Andric assert(FoundScan); 1879*0b57cec5SDimitry Andric 1880*0b57cec5SDimitry Andric LLVM_DEBUG({ 1881*0b57cec5SDimitry Andric StringRef PP = (PV.M ? "(P+M)" : "P"); 1882*0b57cec5SDimitry Andric if (!PV.Inv) 1883*0b57cec5SDimitry Andric dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; 1884*0b57cec5SDimitry Andric else 1885*0b57cec5SDimitry Andric dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " 1886*0b57cec5SDimitry Andric << PP << "\n"; 1887*0b57cec5SDimitry Andric dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; 1888*0b57cec5SDimitry Andric if (PV.M) 1889*0b57cec5SDimitry Andric dbgs() << " M:" << *PV.M << "\n"; 1890*0b57cec5SDimitry Andric dbgs() << " Q:" << *PV.Q << "\n"; 1891*0b57cec5SDimitry Andric dbgs() << " Iteration count:" << PV.IterCount << "\n"; 1892*0b57cec5SDimitry Andric }); 1893*0b57cec5SDimitry Andric 1894*0b57cec5SDimitry Andric BasicBlock::iterator At(EntryB->getTerminator()); 1895*0b57cec5SDimitry Andric Value *PM = generate(At, PV); 1896*0b57cec5SDimitry Andric if (PM == nullptr) 1897*0b57cec5SDimitry Andric return false; 1898*0b57cec5SDimitry Andric 1899*0b57cec5SDimitry Andric if (PM->getType() != PV.Res->getType()) 1900*0b57cec5SDimitry Andric PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false); 1901*0b57cec5SDimitry Andric 1902*0b57cec5SDimitry Andric PV.Res->replaceAllUsesWith(PM); 1903*0b57cec5SDimitry Andric PV.Res->eraseFromParent(); 1904*0b57cec5SDimitry Andric return true; 1905*0b57cec5SDimitry Andric } 1906*0b57cec5SDimitry Andric 1907*0b57cec5SDimitry Andric int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) { 1908*0b57cec5SDimitry Andric if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1))) 1909*0b57cec5SDimitry Andric return SC->getAPInt().getSExtValue(); 1910*0b57cec5SDimitry Andric return 0; 1911*0b57cec5SDimitry Andric } 1912*0b57cec5SDimitry Andric 1913*0b57cec5SDimitry Andric bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) { 1914*0b57cec5SDimitry Andric // Allow volatile stores if HexagonVolatileMemcpy is enabled. 1915*0b57cec5SDimitry Andric if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple()) 1916*0b57cec5SDimitry Andric return false; 1917*0b57cec5SDimitry Andric 1918*0b57cec5SDimitry Andric Value *StoredVal = SI->getValueOperand(); 1919*0b57cec5SDimitry Andric Value *StorePtr = SI->getPointerOperand(); 1920*0b57cec5SDimitry Andric 1921*0b57cec5SDimitry Andric // Reject stores that are so large that they overflow an unsigned. 1922*0b57cec5SDimitry Andric uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); 1923*0b57cec5SDimitry Andric if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) 1924*0b57cec5SDimitry Andric return false; 1925*0b57cec5SDimitry Andric 1926*0b57cec5SDimitry Andric // See if the pointer expression is an AddRec like {base,+,1} on the current 1927*0b57cec5SDimitry Andric // loop, which indicates a strided store. If we have something else, it's a 1928*0b57cec5SDimitry Andric // random store we can't handle. 1929*0b57cec5SDimitry Andric auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); 1930*0b57cec5SDimitry Andric if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) 1931*0b57cec5SDimitry Andric return false; 1932*0b57cec5SDimitry Andric 1933*0b57cec5SDimitry Andric // Check to see if the stride matches the size of the store. If so, then we 1934*0b57cec5SDimitry Andric // know that every byte is touched in the loop. 1935*0b57cec5SDimitry Andric int Stride = getSCEVStride(StoreEv); 1936*0b57cec5SDimitry Andric if (Stride == 0) 1937*0b57cec5SDimitry Andric return false; 1938*0b57cec5SDimitry Andric unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); 1939*0b57cec5SDimitry Andric if (StoreSize != unsigned(std::abs(Stride))) 1940*0b57cec5SDimitry Andric return false; 1941*0b57cec5SDimitry Andric 1942*0b57cec5SDimitry Andric // The store must be feeding a non-volatile load. 1943*0b57cec5SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); 1944*0b57cec5SDimitry Andric if (!LI || !LI->isSimple()) 1945*0b57cec5SDimitry Andric return false; 1946*0b57cec5SDimitry Andric 1947*0b57cec5SDimitry Andric // See if the pointer expression is an AddRec like {base,+,1} on the current 1948*0b57cec5SDimitry Andric // loop, which indicates a strided load. If we have something else, it's a 1949*0b57cec5SDimitry Andric // random load we can't handle. 1950*0b57cec5SDimitry Andric Value *LoadPtr = LI->getPointerOperand(); 1951*0b57cec5SDimitry Andric auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr)); 1952*0b57cec5SDimitry Andric if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) 1953*0b57cec5SDimitry Andric return false; 1954*0b57cec5SDimitry Andric 1955*0b57cec5SDimitry Andric // The store and load must share the same stride. 1956*0b57cec5SDimitry Andric if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) 1957*0b57cec5SDimitry Andric return false; 1958*0b57cec5SDimitry Andric 1959*0b57cec5SDimitry Andric // Success. This store can be converted into a memcpy. 1960*0b57cec5SDimitry Andric return true; 1961*0b57cec5SDimitry Andric } 1962*0b57cec5SDimitry Andric 1963*0b57cec5SDimitry Andric /// mayLoopAccessLocation - Return true if the specified loop might access the 1964*0b57cec5SDimitry Andric /// specified pointer location, which is a loop-strided access. The 'Access' 1965*0b57cec5SDimitry Andric /// argument specifies what the verboten forms of access are (read or write). 1966*0b57cec5SDimitry Andric static bool 1967*0b57cec5SDimitry Andric mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, 1968*0b57cec5SDimitry Andric const SCEV *BECount, unsigned StoreSize, 1969*0b57cec5SDimitry Andric AliasAnalysis &AA, 1970*0b57cec5SDimitry Andric SmallPtrSetImpl<Instruction *> &Ignored) { 1971*0b57cec5SDimitry Andric // Get the location that may be stored across the loop. Since the access 1972*0b57cec5SDimitry Andric // is strided positively through memory, we say that the modified location 1973*0b57cec5SDimitry Andric // starts at the pointer and has infinite size. 1974*0b57cec5SDimitry Andric LocationSize AccessSize = LocationSize::unknown(); 1975*0b57cec5SDimitry Andric 1976*0b57cec5SDimitry Andric // If the loop iterates a fixed number of times, we can refine the access 1977*0b57cec5SDimitry Andric // size to be exactly the size of the memset, which is (BECount+1)*StoreSize 1978*0b57cec5SDimitry Andric if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 1979*0b57cec5SDimitry Andric AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) * 1980*0b57cec5SDimitry Andric StoreSize); 1981*0b57cec5SDimitry Andric 1982*0b57cec5SDimitry Andric // TODO: For this to be really effective, we have to dive into the pointer 1983*0b57cec5SDimitry Andric // operand in the store. Store to &A[i] of 100 will always return may alias 1984*0b57cec5SDimitry Andric // with store of &A[100], we need to StoreLoc to be "A" with size of 100, 1985*0b57cec5SDimitry Andric // which will then no-alias a store to &A[100]. 1986*0b57cec5SDimitry Andric MemoryLocation StoreLoc(Ptr, AccessSize); 1987*0b57cec5SDimitry Andric 1988*0b57cec5SDimitry Andric for (auto *B : L->blocks()) 1989*0b57cec5SDimitry Andric for (auto &I : *B) 1990*0b57cec5SDimitry Andric if (Ignored.count(&I) == 0 && 1991*0b57cec5SDimitry Andric isModOrRefSet( 1992*0b57cec5SDimitry Andric intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access))) 1993*0b57cec5SDimitry Andric return true; 1994*0b57cec5SDimitry Andric 1995*0b57cec5SDimitry Andric return false; 1996*0b57cec5SDimitry Andric } 1997*0b57cec5SDimitry Andric 1998*0b57cec5SDimitry Andric void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB, 1999*0b57cec5SDimitry Andric SmallVectorImpl<StoreInst*> &Stores) { 2000*0b57cec5SDimitry Andric Stores.clear(); 2001*0b57cec5SDimitry Andric for (Instruction &I : *BB) 2002*0b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(&I)) 2003*0b57cec5SDimitry Andric if (isLegalStore(CurLoop, SI)) 2004*0b57cec5SDimitry Andric Stores.push_back(SI); 2005*0b57cec5SDimitry Andric } 2006*0b57cec5SDimitry Andric 2007*0b57cec5SDimitry Andric bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop, 2008*0b57cec5SDimitry Andric StoreInst *SI, const SCEV *BECount) { 2009*0b57cec5SDimitry Andric assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && 2010*0b57cec5SDimitry Andric "Expected only non-volatile stores, or Hexagon-specific memcpy" 2011*0b57cec5SDimitry Andric "to volatile destination."); 2012*0b57cec5SDimitry Andric 2013*0b57cec5SDimitry Andric Value *StorePtr = SI->getPointerOperand(); 2014*0b57cec5SDimitry Andric auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); 2015*0b57cec5SDimitry Andric unsigned Stride = getSCEVStride(StoreEv); 2016*0b57cec5SDimitry Andric unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); 2017*0b57cec5SDimitry Andric if (Stride != StoreSize) 2018*0b57cec5SDimitry Andric return false; 2019*0b57cec5SDimitry Andric 2020*0b57cec5SDimitry Andric // See if the pointer expression is an AddRec like {base,+,1} on the current 2021*0b57cec5SDimitry Andric // loop, which indicates a strided load. If we have something else, it's a 2022*0b57cec5SDimitry Andric // random load we can't handle. 2023*0b57cec5SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); 2024*0b57cec5SDimitry Andric auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); 2025*0b57cec5SDimitry Andric 2026*0b57cec5SDimitry Andric // The trip count of the loop and the base pointer of the addrec SCEV is 2027*0b57cec5SDimitry Andric // guaranteed to be loop invariant, which means that it should dominate the 2028*0b57cec5SDimitry Andric // header. This allows us to insert code for it in the preheader. 2029*0b57cec5SDimitry Andric BasicBlock *Preheader = CurLoop->getLoopPreheader(); 2030*0b57cec5SDimitry Andric Instruction *ExpPt = Preheader->getTerminator(); 2031*0b57cec5SDimitry Andric IRBuilder<> Builder(ExpPt); 2032*0b57cec5SDimitry Andric SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom"); 2033*0b57cec5SDimitry Andric 2034*0b57cec5SDimitry Andric Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace()); 2035*0b57cec5SDimitry Andric 2036*0b57cec5SDimitry Andric // Okay, we have a strided store "p[i]" of a loaded value. We can turn 2037*0b57cec5SDimitry Andric // this into a memcpy/memmove in the loop preheader now if we want. However, 2038*0b57cec5SDimitry Andric // this would be unsafe to do if there is anything else in the loop that may 2039*0b57cec5SDimitry Andric // read or write the memory region we're storing to. For memcpy, this 2040*0b57cec5SDimitry Andric // includes the load that feeds the stores. Check for an alias by generating 2041*0b57cec5SDimitry Andric // the base address and checking everything. 2042*0b57cec5SDimitry Andric Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(), 2043*0b57cec5SDimitry Andric Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt); 2044*0b57cec5SDimitry Andric Value *LoadBasePtr = nullptr; 2045*0b57cec5SDimitry Andric 2046*0b57cec5SDimitry Andric bool Overlap = false; 2047*0b57cec5SDimitry Andric bool DestVolatile = SI->isVolatile(); 2048*0b57cec5SDimitry Andric Type *BECountTy = BECount->getType(); 2049*0b57cec5SDimitry Andric 2050*0b57cec5SDimitry Andric if (DestVolatile) { 2051*0b57cec5SDimitry Andric // The trip count must fit in i32, since it is the type of the "num_words" 2052*0b57cec5SDimitry Andric // argument to hexagon_memcpy_forward_vp4cp4n2. 2053*0b57cec5SDimitry Andric if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) { 2054*0b57cec5SDimitry Andric CleanupAndExit: 2055*0b57cec5SDimitry Andric // If we generated new code for the base pointer, clean up. 2056*0b57cec5SDimitry Andric Expander.clear(); 2057*0b57cec5SDimitry Andric if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) { 2058*0b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); 2059*0b57cec5SDimitry Andric StoreBasePtr = nullptr; 2060*0b57cec5SDimitry Andric } 2061*0b57cec5SDimitry Andric if (LoadBasePtr) { 2062*0b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); 2063*0b57cec5SDimitry Andric LoadBasePtr = nullptr; 2064*0b57cec5SDimitry Andric } 2065*0b57cec5SDimitry Andric return false; 2066*0b57cec5SDimitry Andric } 2067*0b57cec5SDimitry Andric } 2068*0b57cec5SDimitry Andric 2069*0b57cec5SDimitry Andric SmallPtrSet<Instruction*, 2> Ignore1; 2070*0b57cec5SDimitry Andric Ignore1.insert(SI); 2071*0b57cec5SDimitry Andric if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, 2072*0b57cec5SDimitry Andric StoreSize, *AA, Ignore1)) { 2073*0b57cec5SDimitry Andric // Check if the load is the offending instruction. 2074*0b57cec5SDimitry Andric Ignore1.insert(LI); 2075*0b57cec5SDimitry Andric if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, 2076*0b57cec5SDimitry Andric BECount, StoreSize, *AA, Ignore1)) { 2077*0b57cec5SDimitry Andric // Still bad. Nothing we can do. 2078*0b57cec5SDimitry Andric goto CleanupAndExit; 2079*0b57cec5SDimitry Andric } 2080*0b57cec5SDimitry Andric // It worked with the load ignored. 2081*0b57cec5SDimitry Andric Overlap = true; 2082*0b57cec5SDimitry Andric } 2083*0b57cec5SDimitry Andric 2084*0b57cec5SDimitry Andric if (!Overlap) { 2085*0b57cec5SDimitry Andric if (DisableMemcpyIdiom || !HasMemcpy) 2086*0b57cec5SDimitry Andric goto CleanupAndExit; 2087*0b57cec5SDimitry Andric } else { 2088*0b57cec5SDimitry Andric // Don't generate memmove if this function will be inlined. This is 2089*0b57cec5SDimitry Andric // because the caller will undergo this transformation after inlining. 2090*0b57cec5SDimitry Andric Function *Func = CurLoop->getHeader()->getParent(); 2091*0b57cec5SDimitry Andric if (Func->hasFnAttribute(Attribute::AlwaysInline)) 2092*0b57cec5SDimitry Andric goto CleanupAndExit; 2093*0b57cec5SDimitry Andric 2094*0b57cec5SDimitry Andric // In case of a memmove, the call to memmove will be executed instead 2095*0b57cec5SDimitry Andric // of the loop, so we need to make sure that there is nothing else in 2096*0b57cec5SDimitry Andric // the loop than the load, store and instructions that these two depend 2097*0b57cec5SDimitry Andric // on. 2098*0b57cec5SDimitry Andric SmallVector<Instruction*,2> Insts; 2099*0b57cec5SDimitry Andric Insts.push_back(SI); 2100*0b57cec5SDimitry Andric Insts.push_back(LI); 2101*0b57cec5SDimitry Andric if (!coverLoop(CurLoop, Insts)) 2102*0b57cec5SDimitry Andric goto CleanupAndExit; 2103*0b57cec5SDimitry Andric 2104*0b57cec5SDimitry Andric if (DisableMemmoveIdiom || !HasMemmove) 2105*0b57cec5SDimitry Andric goto CleanupAndExit; 2106*0b57cec5SDimitry Andric bool IsNested = CurLoop->getParentLoop() != nullptr; 2107*0b57cec5SDimitry Andric if (IsNested && OnlyNonNestedMemmove) 2108*0b57cec5SDimitry Andric goto CleanupAndExit; 2109*0b57cec5SDimitry Andric } 2110*0b57cec5SDimitry Andric 2111*0b57cec5SDimitry Andric // For a memcpy, we have to make sure that the input array is not being 2112*0b57cec5SDimitry Andric // mutated by the loop. 2113*0b57cec5SDimitry Andric LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), 2114*0b57cec5SDimitry Andric Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt); 2115*0b57cec5SDimitry Andric 2116*0b57cec5SDimitry Andric SmallPtrSet<Instruction*, 2> Ignore2; 2117*0b57cec5SDimitry Andric Ignore2.insert(SI); 2118*0b57cec5SDimitry Andric if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, 2119*0b57cec5SDimitry Andric StoreSize, *AA, Ignore2)) 2120*0b57cec5SDimitry Andric goto CleanupAndExit; 2121*0b57cec5SDimitry Andric 2122*0b57cec5SDimitry Andric // Check the stride. 2123*0b57cec5SDimitry Andric bool StridePos = getSCEVStride(LoadEv) >= 0; 2124*0b57cec5SDimitry Andric 2125*0b57cec5SDimitry Andric // Currently, the volatile memcpy only emulates traversing memory forward. 2126*0b57cec5SDimitry Andric if (!StridePos && DestVolatile) 2127*0b57cec5SDimitry Andric goto CleanupAndExit; 2128*0b57cec5SDimitry Andric 2129*0b57cec5SDimitry Andric bool RuntimeCheck = (Overlap || DestVolatile); 2130*0b57cec5SDimitry Andric 2131*0b57cec5SDimitry Andric BasicBlock *ExitB; 2132*0b57cec5SDimitry Andric if (RuntimeCheck) { 2133*0b57cec5SDimitry Andric // The runtime check needs a single exit block. 2134*0b57cec5SDimitry Andric SmallVector<BasicBlock*, 8> ExitBlocks; 2135*0b57cec5SDimitry Andric CurLoop->getUniqueExitBlocks(ExitBlocks); 2136*0b57cec5SDimitry Andric if (ExitBlocks.size() != 1) 2137*0b57cec5SDimitry Andric goto CleanupAndExit; 2138*0b57cec5SDimitry Andric ExitB = ExitBlocks[0]; 2139*0b57cec5SDimitry Andric } 2140*0b57cec5SDimitry Andric 2141*0b57cec5SDimitry Andric // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 2142*0b57cec5SDimitry Andric // pointer size if it isn't already. 2143*0b57cec5SDimitry Andric LLVMContext &Ctx = SI->getContext(); 2144*0b57cec5SDimitry Andric BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); 2145*0b57cec5SDimitry Andric DebugLoc DLoc = SI->getDebugLoc(); 2146*0b57cec5SDimitry Andric 2147*0b57cec5SDimitry Andric const SCEV *NumBytesS = 2148*0b57cec5SDimitry Andric SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); 2149*0b57cec5SDimitry Andric if (StoreSize != 1) 2150*0b57cec5SDimitry Andric NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), 2151*0b57cec5SDimitry Andric SCEV::FlagNUW); 2152*0b57cec5SDimitry Andric Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt); 2153*0b57cec5SDimitry Andric if (Instruction *In = dyn_cast<Instruction>(NumBytes)) 2154*0b57cec5SDimitry Andric if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) 2155*0b57cec5SDimitry Andric NumBytes = Simp; 2156*0b57cec5SDimitry Andric 2157*0b57cec5SDimitry Andric CallInst *NewCall; 2158*0b57cec5SDimitry Andric 2159*0b57cec5SDimitry Andric if (RuntimeCheck) { 2160*0b57cec5SDimitry Andric unsigned Threshold = RuntimeMemSizeThreshold; 2161*0b57cec5SDimitry Andric if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) { 2162*0b57cec5SDimitry Andric uint64_t C = CI->getZExtValue(); 2163*0b57cec5SDimitry Andric if (Threshold != 0 && C < Threshold) 2164*0b57cec5SDimitry Andric goto CleanupAndExit; 2165*0b57cec5SDimitry Andric if (C < CompileTimeMemSizeThreshold) 2166*0b57cec5SDimitry Andric goto CleanupAndExit; 2167*0b57cec5SDimitry Andric } 2168*0b57cec5SDimitry Andric 2169*0b57cec5SDimitry Andric BasicBlock *Header = CurLoop->getHeader(); 2170*0b57cec5SDimitry Andric Function *Func = Header->getParent(); 2171*0b57cec5SDimitry Andric Loop *ParentL = LF->getLoopFor(Preheader); 2172*0b57cec5SDimitry Andric StringRef HeaderName = Header->getName(); 2173*0b57cec5SDimitry Andric 2174*0b57cec5SDimitry Andric // Create a new (empty) preheader, and update the PHI nodes in the 2175*0b57cec5SDimitry Andric // header to use the new preheader. 2176*0b57cec5SDimitry Andric BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph", 2177*0b57cec5SDimitry Andric Func, Header); 2178*0b57cec5SDimitry Andric if (ParentL) 2179*0b57cec5SDimitry Andric ParentL->addBasicBlockToLoop(NewPreheader, *LF); 2180*0b57cec5SDimitry Andric IRBuilder<>(NewPreheader).CreateBr(Header); 2181*0b57cec5SDimitry Andric for (auto &In : *Header) { 2182*0b57cec5SDimitry Andric PHINode *PN = dyn_cast<PHINode>(&In); 2183*0b57cec5SDimitry Andric if (!PN) 2184*0b57cec5SDimitry Andric break; 2185*0b57cec5SDimitry Andric int bx = PN->getBasicBlockIndex(Preheader); 2186*0b57cec5SDimitry Andric if (bx >= 0) 2187*0b57cec5SDimitry Andric PN->setIncomingBlock(bx, NewPreheader); 2188*0b57cec5SDimitry Andric } 2189*0b57cec5SDimitry Andric DT->addNewBlock(NewPreheader, Preheader); 2190*0b57cec5SDimitry Andric DT->changeImmediateDominator(Header, NewPreheader); 2191*0b57cec5SDimitry Andric 2192*0b57cec5SDimitry Andric // Check for safe conditions to execute memmove. 2193*0b57cec5SDimitry Andric // If stride is positive, copying things from higher to lower addresses 2194*0b57cec5SDimitry Andric // is equivalent to memmove. For negative stride, it's the other way 2195*0b57cec5SDimitry Andric // around. Copying forward in memory with positive stride may not be 2196*0b57cec5SDimitry Andric // same as memmove since we may be copying values that we just stored 2197*0b57cec5SDimitry Andric // in some previous iteration. 2198*0b57cec5SDimitry Andric Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy); 2199*0b57cec5SDimitry Andric Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy); 2200*0b57cec5SDimitry Andric Value *LowA = StridePos ? SA : LA; 2201*0b57cec5SDimitry Andric Value *HighA = StridePos ? LA : SA; 2202*0b57cec5SDimitry Andric Value *CmpA = Builder.CreateICmpULT(LowA, HighA); 2203*0b57cec5SDimitry Andric Value *Cond = CmpA; 2204*0b57cec5SDimitry Andric 2205*0b57cec5SDimitry Andric // Check for distance between pointers. Since the case LowA < HighA 2206*0b57cec5SDimitry Andric // is checked for above, assume LowA >= HighA. 2207*0b57cec5SDimitry Andric Value *Dist = Builder.CreateSub(LowA, HighA); 2208*0b57cec5SDimitry Andric Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist); 2209*0b57cec5SDimitry Andric Value *CmpEither = Builder.CreateOr(Cond, CmpD); 2210*0b57cec5SDimitry Andric Cond = CmpEither; 2211*0b57cec5SDimitry Andric 2212*0b57cec5SDimitry Andric if (Threshold != 0) { 2213*0b57cec5SDimitry Andric Type *Ty = NumBytes->getType(); 2214*0b57cec5SDimitry Andric Value *Thr = ConstantInt::get(Ty, Threshold); 2215*0b57cec5SDimitry Andric Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes); 2216*0b57cec5SDimitry Andric Value *CmpBoth = Builder.CreateAnd(Cond, CmpB); 2217*0b57cec5SDimitry Andric Cond = CmpBoth; 2218*0b57cec5SDimitry Andric } 2219*0b57cec5SDimitry Andric BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli", 2220*0b57cec5SDimitry Andric Func, NewPreheader); 2221*0b57cec5SDimitry Andric if (ParentL) 2222*0b57cec5SDimitry Andric ParentL->addBasicBlockToLoop(MemmoveB, *LF); 2223*0b57cec5SDimitry Andric Instruction *OldT = Preheader->getTerminator(); 2224*0b57cec5SDimitry Andric Builder.CreateCondBr(Cond, MemmoveB, NewPreheader); 2225*0b57cec5SDimitry Andric OldT->eraseFromParent(); 2226*0b57cec5SDimitry Andric Preheader->setName(Preheader->getName()+".old"); 2227*0b57cec5SDimitry Andric DT->addNewBlock(MemmoveB, Preheader); 2228*0b57cec5SDimitry Andric // Find the new immediate dominator of the exit block. 2229*0b57cec5SDimitry Andric BasicBlock *ExitD = Preheader; 2230*0b57cec5SDimitry Andric for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) { 2231*0b57cec5SDimitry Andric BasicBlock *PB = *PI; 2232*0b57cec5SDimitry Andric ExitD = DT->findNearestCommonDominator(ExitD, PB); 2233*0b57cec5SDimitry Andric if (!ExitD) 2234*0b57cec5SDimitry Andric break; 2235*0b57cec5SDimitry Andric } 2236*0b57cec5SDimitry Andric // If the prior immediate dominator of ExitB was dominated by the 2237*0b57cec5SDimitry Andric // old preheader, then the old preheader becomes the new immediate 2238*0b57cec5SDimitry Andric // dominator. Otherwise don't change anything (because the newly 2239*0b57cec5SDimitry Andric // added blocks are dominated by the old preheader). 2240*0b57cec5SDimitry Andric if (ExitD && DT->dominates(Preheader, ExitD)) { 2241*0b57cec5SDimitry Andric DomTreeNode *BN = DT->getNode(ExitB); 2242*0b57cec5SDimitry Andric DomTreeNode *DN = DT->getNode(ExitD); 2243*0b57cec5SDimitry Andric BN->setIDom(DN); 2244*0b57cec5SDimitry Andric } 2245*0b57cec5SDimitry Andric 2246*0b57cec5SDimitry Andric // Add a call to memmove to the conditional block. 2247*0b57cec5SDimitry Andric IRBuilder<> CondBuilder(MemmoveB); 2248*0b57cec5SDimitry Andric CondBuilder.CreateBr(ExitB); 2249*0b57cec5SDimitry Andric CondBuilder.SetInsertPoint(MemmoveB->getTerminator()); 2250*0b57cec5SDimitry Andric 2251*0b57cec5SDimitry Andric if (DestVolatile) { 2252*0b57cec5SDimitry Andric Type *Int32Ty = Type::getInt32Ty(Ctx); 2253*0b57cec5SDimitry Andric Type *Int32PtrTy = Type::getInt32PtrTy(Ctx); 2254*0b57cec5SDimitry Andric Type *VoidTy = Type::getVoidTy(Ctx); 2255*0b57cec5SDimitry Andric Module *M = Func->getParent(); 2256*0b57cec5SDimitry Andric FunctionCallee Fn = M->getOrInsertFunction( 2257*0b57cec5SDimitry Andric HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty); 2258*0b57cec5SDimitry Andric 2259*0b57cec5SDimitry Andric const SCEV *OneS = SE->getConstant(Int32Ty, 1); 2260*0b57cec5SDimitry Andric const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty); 2261*0b57cec5SDimitry Andric const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW); 2262*0b57cec5SDimitry Andric Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty, 2263*0b57cec5SDimitry Andric MemmoveB->getTerminator()); 2264*0b57cec5SDimitry Andric if (Instruction *In = dyn_cast<Instruction>(NumWords)) 2265*0b57cec5SDimitry Andric if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) 2266*0b57cec5SDimitry Andric NumWords = Simp; 2267*0b57cec5SDimitry Andric 2268*0b57cec5SDimitry Andric Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy) 2269*0b57cec5SDimitry Andric ? StoreBasePtr 2270*0b57cec5SDimitry Andric : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy); 2271*0b57cec5SDimitry Andric Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy) 2272*0b57cec5SDimitry Andric ? LoadBasePtr 2273*0b57cec5SDimitry Andric : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy); 2274*0b57cec5SDimitry Andric NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords}); 2275*0b57cec5SDimitry Andric } else { 2276*0b57cec5SDimitry Andric NewCall = CondBuilder.CreateMemMove(StoreBasePtr, SI->getAlignment(), 2277*0b57cec5SDimitry Andric LoadBasePtr, LI->getAlignment(), 2278*0b57cec5SDimitry Andric NumBytes); 2279*0b57cec5SDimitry Andric } 2280*0b57cec5SDimitry Andric } else { 2281*0b57cec5SDimitry Andric NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(), 2282*0b57cec5SDimitry Andric LoadBasePtr, LI->getAlignment(), 2283*0b57cec5SDimitry Andric NumBytes); 2284*0b57cec5SDimitry Andric // Okay, the memcpy has been formed. Zap the original store and 2285*0b57cec5SDimitry Andric // anything that feeds into it. 2286*0b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(SI, TLI); 2287*0b57cec5SDimitry Andric } 2288*0b57cec5SDimitry Andric 2289*0b57cec5SDimitry Andric NewCall->setDebugLoc(DLoc); 2290*0b57cec5SDimitry Andric 2291*0b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") 2292*0b57cec5SDimitry Andric << *NewCall << "\n" 2293*0b57cec5SDimitry Andric << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" 2294*0b57cec5SDimitry Andric << " from store ptr=" << *StoreEv << " at: " << *SI 2295*0b57cec5SDimitry Andric << "\n"); 2296*0b57cec5SDimitry Andric 2297*0b57cec5SDimitry Andric return true; 2298*0b57cec5SDimitry Andric } 2299*0b57cec5SDimitry Andric 2300*0b57cec5SDimitry Andric // Check if the instructions in Insts, together with their dependencies 2301*0b57cec5SDimitry Andric // cover the loop in the sense that the loop could be safely eliminated once 2302*0b57cec5SDimitry Andric // the instructions in Insts are removed. 2303*0b57cec5SDimitry Andric bool HexagonLoopIdiomRecognize::coverLoop(Loop *L, 2304*0b57cec5SDimitry Andric SmallVectorImpl<Instruction*> &Insts) const { 2305*0b57cec5SDimitry Andric SmallSet<BasicBlock*,8> LoopBlocks; 2306*0b57cec5SDimitry Andric for (auto *B : L->blocks()) 2307*0b57cec5SDimitry Andric LoopBlocks.insert(B); 2308*0b57cec5SDimitry Andric 2309*0b57cec5SDimitry Andric SetVector<Instruction*> Worklist(Insts.begin(), Insts.end()); 2310*0b57cec5SDimitry Andric 2311*0b57cec5SDimitry Andric // Collect all instructions from the loop that the instructions in Insts 2312*0b57cec5SDimitry Andric // depend on (plus their dependencies, etc.). These instructions will 2313*0b57cec5SDimitry Andric // constitute the expression trees that feed those in Insts, but the trees 2314*0b57cec5SDimitry Andric // will be limited only to instructions contained in the loop. 2315*0b57cec5SDimitry Andric for (unsigned i = 0; i < Worklist.size(); ++i) { 2316*0b57cec5SDimitry Andric Instruction *In = Worklist[i]; 2317*0b57cec5SDimitry Andric for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) { 2318*0b57cec5SDimitry Andric Instruction *OpI = dyn_cast<Instruction>(I); 2319*0b57cec5SDimitry Andric if (!OpI) 2320*0b57cec5SDimitry Andric continue; 2321*0b57cec5SDimitry Andric BasicBlock *PB = OpI->getParent(); 2322*0b57cec5SDimitry Andric if (!LoopBlocks.count(PB)) 2323*0b57cec5SDimitry Andric continue; 2324*0b57cec5SDimitry Andric Worklist.insert(OpI); 2325*0b57cec5SDimitry Andric } 2326*0b57cec5SDimitry Andric } 2327*0b57cec5SDimitry Andric 2328*0b57cec5SDimitry Andric // Scan all instructions in the loop, if any of them have a user outside 2329*0b57cec5SDimitry Andric // of the loop, or outside of the expressions collected above, then either 2330*0b57cec5SDimitry Andric // the loop has a side-effect visible outside of it, or there are 2331*0b57cec5SDimitry Andric // instructions in it that are not involved in the original set Insts. 2332*0b57cec5SDimitry Andric for (auto *B : L->blocks()) { 2333*0b57cec5SDimitry Andric for (auto &In : *B) { 2334*0b57cec5SDimitry Andric if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In)) 2335*0b57cec5SDimitry Andric continue; 2336*0b57cec5SDimitry Andric if (!Worklist.count(&In) && In.mayHaveSideEffects()) 2337*0b57cec5SDimitry Andric return false; 2338*0b57cec5SDimitry Andric for (const auto &K : In.users()) { 2339*0b57cec5SDimitry Andric Instruction *UseI = dyn_cast<Instruction>(K); 2340*0b57cec5SDimitry Andric if (!UseI) 2341*0b57cec5SDimitry Andric continue; 2342*0b57cec5SDimitry Andric BasicBlock *UseB = UseI->getParent(); 2343*0b57cec5SDimitry Andric if (LF->getLoopFor(UseB) != L) 2344*0b57cec5SDimitry Andric return false; 2345*0b57cec5SDimitry Andric } 2346*0b57cec5SDimitry Andric } 2347*0b57cec5SDimitry Andric } 2348*0b57cec5SDimitry Andric 2349*0b57cec5SDimitry Andric return true; 2350*0b57cec5SDimitry Andric } 2351*0b57cec5SDimitry Andric 2352*0b57cec5SDimitry Andric /// runOnLoopBlock - Process the specified block, which lives in a counted loop 2353*0b57cec5SDimitry Andric /// with the specified backedge count. This block is known to be in the current 2354*0b57cec5SDimitry Andric /// loop and not in any subloops. 2355*0b57cec5SDimitry Andric bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, 2356*0b57cec5SDimitry Andric const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) { 2357*0b57cec5SDimitry Andric // We can only promote stores in this block if they are unconditionally 2358*0b57cec5SDimitry Andric // executed in the loop. For a block to be unconditionally executed, it has 2359*0b57cec5SDimitry Andric // to dominate all the exit blocks of the loop. Verify this now. 2360*0b57cec5SDimitry Andric auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool { 2361*0b57cec5SDimitry Andric return DT->dominates(BB, EB); 2362*0b57cec5SDimitry Andric }; 2363*0b57cec5SDimitry Andric if (!all_of(ExitBlocks, DominatedByBB)) 2364*0b57cec5SDimitry Andric return false; 2365*0b57cec5SDimitry Andric 2366*0b57cec5SDimitry Andric bool MadeChange = false; 2367*0b57cec5SDimitry Andric // Look for store instructions, which may be optimized to memset/memcpy. 2368*0b57cec5SDimitry Andric SmallVector<StoreInst*,8> Stores; 2369*0b57cec5SDimitry Andric collectStores(CurLoop, BB, Stores); 2370*0b57cec5SDimitry Andric 2371*0b57cec5SDimitry Andric // Optimize the store into a memcpy, if it feeds an similarly strided load. 2372*0b57cec5SDimitry Andric for (auto &SI : Stores) 2373*0b57cec5SDimitry Andric MadeChange |= processCopyingStore(CurLoop, SI, BECount); 2374*0b57cec5SDimitry Andric 2375*0b57cec5SDimitry Andric return MadeChange; 2376*0b57cec5SDimitry Andric } 2377*0b57cec5SDimitry Andric 2378*0b57cec5SDimitry Andric bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) { 2379*0b57cec5SDimitry Andric PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE); 2380*0b57cec5SDimitry Andric if (PMR.recognize()) 2381*0b57cec5SDimitry Andric return true; 2382*0b57cec5SDimitry Andric 2383*0b57cec5SDimitry Andric if (!HasMemcpy && !HasMemmove) 2384*0b57cec5SDimitry Andric return false; 2385*0b57cec5SDimitry Andric 2386*0b57cec5SDimitry Andric const SCEV *BECount = SE->getBackedgeTakenCount(L); 2387*0b57cec5SDimitry Andric assert(!isa<SCEVCouldNotCompute>(BECount) && 2388*0b57cec5SDimitry Andric "runOnCountableLoop() called on a loop without a predictable" 2389*0b57cec5SDimitry Andric "backedge-taken count"); 2390*0b57cec5SDimitry Andric 2391*0b57cec5SDimitry Andric SmallVector<BasicBlock *, 8> ExitBlocks; 2392*0b57cec5SDimitry Andric L->getUniqueExitBlocks(ExitBlocks); 2393*0b57cec5SDimitry Andric 2394*0b57cec5SDimitry Andric bool Changed = false; 2395*0b57cec5SDimitry Andric 2396*0b57cec5SDimitry Andric // Scan all the blocks in the loop that are not in subloops. 2397*0b57cec5SDimitry Andric for (auto *BB : L->getBlocks()) { 2398*0b57cec5SDimitry Andric // Ignore blocks in subloops. 2399*0b57cec5SDimitry Andric if (LF->getLoopFor(BB) != L) 2400*0b57cec5SDimitry Andric continue; 2401*0b57cec5SDimitry Andric Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks); 2402*0b57cec5SDimitry Andric } 2403*0b57cec5SDimitry Andric 2404*0b57cec5SDimitry Andric return Changed; 2405*0b57cec5SDimitry Andric } 2406*0b57cec5SDimitry Andric 2407*0b57cec5SDimitry Andric bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { 2408*0b57cec5SDimitry Andric const Module &M = *L->getHeader()->getParent()->getParent(); 2409*0b57cec5SDimitry Andric if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon) 2410*0b57cec5SDimitry Andric return false; 2411*0b57cec5SDimitry Andric 2412*0b57cec5SDimitry Andric if (skipLoop(L)) 2413*0b57cec5SDimitry Andric return false; 2414*0b57cec5SDimitry Andric 2415*0b57cec5SDimitry Andric // If the loop could not be converted to canonical form, it must have an 2416*0b57cec5SDimitry Andric // indirectbr in it, just give up. 2417*0b57cec5SDimitry Andric if (!L->getLoopPreheader()) 2418*0b57cec5SDimitry Andric return false; 2419*0b57cec5SDimitry Andric 2420*0b57cec5SDimitry Andric // Disable loop idiom recognition if the function's name is a common idiom. 2421*0b57cec5SDimitry Andric StringRef Name = L->getHeader()->getParent()->getName(); 2422*0b57cec5SDimitry Andric if (Name == "memset" || Name == "memcpy" || Name == "memmove") 2423*0b57cec5SDimitry Andric return false; 2424*0b57cec5SDimitry Andric 2425*0b57cec5SDimitry Andric AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 2426*0b57cec5SDimitry Andric DL = &L->getHeader()->getModule()->getDataLayout(); 2427*0b57cec5SDimitry Andric DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2428*0b57cec5SDimitry Andric LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2429*0b57cec5SDimitry Andric TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); 2430*0b57cec5SDimitry Andric SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2431*0b57cec5SDimitry Andric 2432*0b57cec5SDimitry Andric HasMemcpy = TLI->has(LibFunc_memcpy); 2433*0b57cec5SDimitry Andric HasMemmove = TLI->has(LibFunc_memmove); 2434*0b57cec5SDimitry Andric 2435*0b57cec5SDimitry Andric if (SE->hasLoopInvariantBackedgeTakenCount(L)) 2436*0b57cec5SDimitry Andric return runOnCountableLoop(L); 2437*0b57cec5SDimitry Andric return false; 2438*0b57cec5SDimitry Andric } 2439*0b57cec5SDimitry Andric 2440*0b57cec5SDimitry Andric Pass *llvm::createHexagonLoopIdiomPass() { 2441*0b57cec5SDimitry Andric return new HexagonLoopIdiomRecognize(); 2442*0b57cec5SDimitry Andric } 2443