10b57cec5SDimitry Andric //===-------- LoopDataPrefetch.cpp - Loop Data Prefetching Pass -----------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements a Loop Data Prefetching Pass.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
14480093f4SDimitry Andric #include "llvm/InitializePasses.h"
150b57cec5SDimitry Andric
160b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h"
170b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
180b57cec5SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
190b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h"
200b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
210b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
230b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
240b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
250b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
260b57cec5SDimitry Andric #include "llvm/IR/Function.h"
270b57cec5SDimitry Andric #include "llvm/IR/Module.h"
280b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
290b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
300b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
31349cc55cSDimitry Andric #include "llvm/Transforms/Utils.h"
325ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
33fe6060f1SDimitry Andric
34fe6060f1SDimitry Andric #define DEBUG_TYPE "loop-data-prefetch"
35fe6060f1SDimitry Andric
360b57cec5SDimitry Andric using namespace llvm;
370b57cec5SDimitry Andric
380b57cec5SDimitry Andric // By default, we limit this to creating 16 PHIs (which is a little over half
390b57cec5SDimitry Andric // of the allocatable register set).
400b57cec5SDimitry Andric static cl::opt<bool>
410b57cec5SDimitry Andric PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false),
420b57cec5SDimitry Andric cl::desc("Prefetch write addresses"));
430b57cec5SDimitry Andric
440b57cec5SDimitry Andric static cl::opt<unsigned>
450b57cec5SDimitry Andric PrefetchDistance("prefetch-distance",
460b57cec5SDimitry Andric cl::desc("Number of instructions to prefetch ahead"),
470b57cec5SDimitry Andric cl::Hidden);
480b57cec5SDimitry Andric
490b57cec5SDimitry Andric static cl::opt<unsigned>
500b57cec5SDimitry Andric MinPrefetchStride("min-prefetch-stride",
510b57cec5SDimitry Andric cl::desc("Min stride to add prefetches"), cl::Hidden);
520b57cec5SDimitry Andric
530b57cec5SDimitry Andric static cl::opt<unsigned> MaxPrefetchIterationsAhead(
540b57cec5SDimitry Andric "max-prefetch-iters-ahead",
550b57cec5SDimitry Andric cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden);
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric STATISTIC(NumPrefetches, "Number of prefetches inserted");
580b57cec5SDimitry Andric
590b57cec5SDimitry Andric namespace {
600b57cec5SDimitry Andric
610b57cec5SDimitry Andric /// Loop prefetch implementation class.
620b57cec5SDimitry Andric class LoopDataPrefetch {
630b57cec5SDimitry Andric public:
LoopDataPrefetch(AssumptionCache * AC,DominatorTree * DT,LoopInfo * LI,ScalarEvolution * SE,const TargetTransformInfo * TTI,OptimizationRemarkEmitter * ORE)645ffd83dbSDimitry Andric LoopDataPrefetch(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
655ffd83dbSDimitry Andric ScalarEvolution *SE, const TargetTransformInfo *TTI,
660b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE)
675ffd83dbSDimitry Andric : AC(AC), DT(DT), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
680b57cec5SDimitry Andric
690b57cec5SDimitry Andric bool run();
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric private:
720b57cec5SDimitry Andric bool runOnLoop(Loop *L);
730b57cec5SDimitry Andric
740b57cec5SDimitry Andric /// Check if the stride of the accesses is large enough to
750b57cec5SDimitry Andric /// warrant a prefetch.
765ffd83dbSDimitry Andric bool isStrideLargeEnough(const SCEVAddRecExpr *AR, unsigned TargetMinStride);
770b57cec5SDimitry Andric
getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)785ffd83dbSDimitry Andric unsigned getMinPrefetchStride(unsigned NumMemAccesses,
795ffd83dbSDimitry Andric unsigned NumStridedMemAccesses,
805ffd83dbSDimitry Andric unsigned NumPrefetches,
815ffd83dbSDimitry Andric bool HasCall) {
820b57cec5SDimitry Andric if (MinPrefetchStride.getNumOccurrences() > 0)
830b57cec5SDimitry Andric return MinPrefetchStride;
845ffd83dbSDimitry Andric return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
855ffd83dbSDimitry Andric NumPrefetches, HasCall);
860b57cec5SDimitry Andric }
870b57cec5SDimitry Andric
getPrefetchDistance()880b57cec5SDimitry Andric unsigned getPrefetchDistance() {
890b57cec5SDimitry Andric if (PrefetchDistance.getNumOccurrences() > 0)
900b57cec5SDimitry Andric return PrefetchDistance;
910b57cec5SDimitry Andric return TTI->getPrefetchDistance();
920b57cec5SDimitry Andric }
930b57cec5SDimitry Andric
getMaxPrefetchIterationsAhead()940b57cec5SDimitry Andric unsigned getMaxPrefetchIterationsAhead() {
950b57cec5SDimitry Andric if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
960b57cec5SDimitry Andric return MaxPrefetchIterationsAhead;
970b57cec5SDimitry Andric return TTI->getMaxPrefetchIterationsAhead();
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric
doPrefetchWrites()1005ffd83dbSDimitry Andric bool doPrefetchWrites() {
1015ffd83dbSDimitry Andric if (PrefetchWrites.getNumOccurrences() > 0)
1025ffd83dbSDimitry Andric return PrefetchWrites;
1035ffd83dbSDimitry Andric return TTI->enableWritePrefetching();
1045ffd83dbSDimitry Andric }
1055ffd83dbSDimitry Andric
1060b57cec5SDimitry Andric AssumptionCache *AC;
1075ffd83dbSDimitry Andric DominatorTree *DT;
1080b57cec5SDimitry Andric LoopInfo *LI;
1090b57cec5SDimitry Andric ScalarEvolution *SE;
1100b57cec5SDimitry Andric const TargetTransformInfo *TTI;
1110b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE;
1120b57cec5SDimitry Andric };
1130b57cec5SDimitry Andric
1140b57cec5SDimitry Andric /// Legacy class for inserting loop data prefetches.
1150b57cec5SDimitry Andric class LoopDataPrefetchLegacyPass : public FunctionPass {
1160b57cec5SDimitry Andric public:
1170b57cec5SDimitry Andric static char ID; // Pass ID, replacement for typeid
LoopDataPrefetchLegacyPass()1180b57cec5SDimitry Andric LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
1190b57cec5SDimitry Andric initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry());
1200b57cec5SDimitry Andric }
1210b57cec5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const1220b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
1230b57cec5SDimitry Andric AU.addRequired<AssumptionCacheTracker>();
1245ffd83dbSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>();
1250b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
1260b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>();
1270b57cec5SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>();
128349cc55cSDimitry Andric AU.addRequiredID(LoopSimplifyID);
129349cc55cSDimitry Andric AU.addPreservedID(LoopSimplifyID);
1300b57cec5SDimitry Andric AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1310b57cec5SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>();
1320b57cec5SDimitry Andric AU.addPreserved<ScalarEvolutionWrapperPass>();
1330b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
1340b57cec5SDimitry Andric }
1350b57cec5SDimitry Andric
1360b57cec5SDimitry Andric bool runOnFunction(Function &F) override;
1370b57cec5SDimitry Andric };
1380b57cec5SDimitry Andric }
1390b57cec5SDimitry Andric
1400b57cec5SDimitry Andric char LoopDataPrefetchLegacyPass::ID = 0;
1410b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
1420b57cec5SDimitry Andric "Loop Data Prefetch", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)1430b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1440b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1450b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
146349cc55cSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
1470b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
1480b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
1490b57cec5SDimitry Andric INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
1500b57cec5SDimitry Andric "Loop Data Prefetch", false, false)
1510b57cec5SDimitry Andric
1520b57cec5SDimitry Andric FunctionPass *llvm::createLoopDataPrefetchPass() {
1530b57cec5SDimitry Andric return new LoopDataPrefetchLegacyPass();
1540b57cec5SDimitry Andric }
1550b57cec5SDimitry Andric
isStrideLargeEnough(const SCEVAddRecExpr * AR,unsigned TargetMinStride)1565ffd83dbSDimitry Andric bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR,
1575ffd83dbSDimitry Andric unsigned TargetMinStride) {
1580b57cec5SDimitry Andric // No need to check if any stride goes.
1590b57cec5SDimitry Andric if (TargetMinStride <= 1)
1600b57cec5SDimitry Andric return true;
1610b57cec5SDimitry Andric
1620b57cec5SDimitry Andric const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
1630b57cec5SDimitry Andric // If MinStride is set, don't prefetch unless we can ensure that stride is
1640b57cec5SDimitry Andric // larger.
1650b57cec5SDimitry Andric if (!ConstStride)
1660b57cec5SDimitry Andric return false;
1670b57cec5SDimitry Andric
1680b57cec5SDimitry Andric unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
1690b57cec5SDimitry Andric return TargetMinStride <= AbsStride;
1700b57cec5SDimitry Andric }
1710b57cec5SDimitry Andric
run(Function & F,FunctionAnalysisManager & AM)1720b57cec5SDimitry Andric PreservedAnalyses LoopDataPrefetchPass::run(Function &F,
1730b57cec5SDimitry Andric FunctionAnalysisManager &AM) {
1745ffd83dbSDimitry Andric DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1750b57cec5SDimitry Andric LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
1760b57cec5SDimitry Andric ScalarEvolution *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
1770b57cec5SDimitry Andric AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
1780b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE =
1790b57cec5SDimitry Andric &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
1800b57cec5SDimitry Andric const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
1810b57cec5SDimitry Andric
1825ffd83dbSDimitry Andric LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
1830b57cec5SDimitry Andric bool Changed = LDP.run();
1840b57cec5SDimitry Andric
1850b57cec5SDimitry Andric if (Changed) {
1860b57cec5SDimitry Andric PreservedAnalyses PA;
1870b57cec5SDimitry Andric PA.preserve<DominatorTreeAnalysis>();
1880b57cec5SDimitry Andric PA.preserve<LoopAnalysis>();
1890b57cec5SDimitry Andric return PA;
1900b57cec5SDimitry Andric }
1910b57cec5SDimitry Andric
1920b57cec5SDimitry Andric return PreservedAnalyses::all();
1930b57cec5SDimitry Andric }
1940b57cec5SDimitry Andric
runOnFunction(Function & F)1950b57cec5SDimitry Andric bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
1960b57cec5SDimitry Andric if (skipFunction(F))
1970b57cec5SDimitry Andric return false;
1980b57cec5SDimitry Andric
1995ffd83dbSDimitry Andric DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2000b57cec5SDimitry Andric LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2010b57cec5SDimitry Andric ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2020b57cec5SDimitry Andric AssumptionCache *AC =
2030b57cec5SDimitry Andric &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
2040b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE =
2050b57cec5SDimitry Andric &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
2060b57cec5SDimitry Andric const TargetTransformInfo *TTI =
2070b57cec5SDimitry Andric &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2080b57cec5SDimitry Andric
2095ffd83dbSDimitry Andric LoopDataPrefetch LDP(AC, DT, LI, SE, TTI, ORE);
2100b57cec5SDimitry Andric return LDP.run();
2110b57cec5SDimitry Andric }
2120b57cec5SDimitry Andric
run()2130b57cec5SDimitry Andric bool LoopDataPrefetch::run() {
2140b57cec5SDimitry Andric // If PrefetchDistance is not set, don't run the pass. This gives an
2150b57cec5SDimitry Andric // opportunity for targets to run this pass for selected subtargets only
216972a253aSDimitry Andric // (whose TTI sets PrefetchDistance and CacheLineSize).
217972a253aSDimitry Andric if (getPrefetchDistance() == 0 || TTI->getCacheLineSize() == 0) {
218972a253aSDimitry Andric LLVM_DEBUG(dbgs() << "Please set both PrefetchDistance and CacheLineSize "
219972a253aSDimitry Andric "for loop data prefetch.\n");
2200b57cec5SDimitry Andric return false;
221972a253aSDimitry Andric }
2220b57cec5SDimitry Andric
2230b57cec5SDimitry Andric bool MadeChange = false;
2240b57cec5SDimitry Andric
2250b57cec5SDimitry Andric for (Loop *I : *LI)
2260eae32dcSDimitry Andric for (Loop *L : depth_first(I))
2270eae32dcSDimitry Andric MadeChange |= runOnLoop(L);
2280b57cec5SDimitry Andric
2290b57cec5SDimitry Andric return MadeChange;
2300b57cec5SDimitry Andric }
2310b57cec5SDimitry Andric
2325ffd83dbSDimitry Andric /// A record for a potential prefetch made during the initial scan of the
2335ffd83dbSDimitry Andric /// loop. This is used to let a single prefetch target multiple memory accesses.
2345ffd83dbSDimitry Andric struct Prefetch {
2355ffd83dbSDimitry Andric /// The address formula for this prefetch as returned by ScalarEvolution.
2365ffd83dbSDimitry Andric const SCEVAddRecExpr *LSCEVAddRec;
2375ffd83dbSDimitry Andric /// The point of insertion for the prefetch instruction.
23881ad6265SDimitry Andric Instruction *InsertPt = nullptr;
2395ffd83dbSDimitry Andric /// True if targeting a write memory access.
24081ad6265SDimitry Andric bool Writes = false;
2415ffd83dbSDimitry Andric /// The (first seen) prefetched instruction.
24281ad6265SDimitry Andric Instruction *MemI = nullptr;
2435ffd83dbSDimitry Andric
2445ffd83dbSDimitry Andric /// Constructor to create a new Prefetch for \p I.
PrefetchPrefetch24581ad6265SDimitry Andric Prefetch(const SCEVAddRecExpr *L, Instruction *I) : LSCEVAddRec(L) {
2465ffd83dbSDimitry Andric addInstruction(I);
2475ffd83dbSDimitry Andric };
2485ffd83dbSDimitry Andric
2495ffd83dbSDimitry Andric /// Add the instruction \param I to this prefetch. If it's not the first
2505ffd83dbSDimitry Andric /// one, 'InsertPt' and 'Writes' will be updated as required.
2515ffd83dbSDimitry Andric /// \param PtrDiff the known constant address difference to the first added
2525ffd83dbSDimitry Andric /// instruction.
addInstructionPrefetch2535ffd83dbSDimitry Andric void addInstruction(Instruction *I, DominatorTree *DT = nullptr,
2545ffd83dbSDimitry Andric int64_t PtrDiff = 0) {
2555ffd83dbSDimitry Andric if (!InsertPt) {
2565ffd83dbSDimitry Andric MemI = I;
2575ffd83dbSDimitry Andric InsertPt = I;
2585ffd83dbSDimitry Andric Writes = isa<StoreInst>(I);
2595ffd83dbSDimitry Andric } else {
2605ffd83dbSDimitry Andric BasicBlock *PrefBB = InsertPt->getParent();
2615ffd83dbSDimitry Andric BasicBlock *InsBB = I->getParent();
2625ffd83dbSDimitry Andric if (PrefBB != InsBB) {
2635ffd83dbSDimitry Andric BasicBlock *DomBB = DT->findNearestCommonDominator(PrefBB, InsBB);
2645ffd83dbSDimitry Andric if (DomBB != PrefBB)
2655ffd83dbSDimitry Andric InsertPt = DomBB->getTerminator();
2665ffd83dbSDimitry Andric }
2675ffd83dbSDimitry Andric
2685ffd83dbSDimitry Andric if (isa<StoreInst>(I) && PtrDiff == 0)
2695ffd83dbSDimitry Andric Writes = true;
2705ffd83dbSDimitry Andric }
2715ffd83dbSDimitry Andric }
2725ffd83dbSDimitry Andric };
2735ffd83dbSDimitry Andric
runOnLoop(Loop * L)2740b57cec5SDimitry Andric bool LoopDataPrefetch::runOnLoop(Loop *L) {
2750b57cec5SDimitry Andric bool MadeChange = false;
2760b57cec5SDimitry Andric
2770b57cec5SDimitry Andric // Only prefetch in the inner-most loop
278e8d8bef9SDimitry Andric if (!L->isInnermost())
2790b57cec5SDimitry Andric return MadeChange;
2800b57cec5SDimitry Andric
2810b57cec5SDimitry Andric SmallPtrSet<const Value *, 32> EphValues;
2820b57cec5SDimitry Andric CodeMetrics::collectEphemeralValues(L, AC, EphValues);
2830b57cec5SDimitry Andric
2840b57cec5SDimitry Andric // Calculate the number of iterations ahead to prefetch
2850b57cec5SDimitry Andric CodeMetrics Metrics;
2865ffd83dbSDimitry Andric bool HasCall = false;
2870b57cec5SDimitry Andric for (const auto BB : L->blocks()) {
2880b57cec5SDimitry Andric // If the loop already has prefetches, then assume that the user knows
2890b57cec5SDimitry Andric // what they are doing and don't add any more.
2905ffd83dbSDimitry Andric for (auto &I : *BB) {
2915ffd83dbSDimitry Andric if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
2925ffd83dbSDimitry Andric if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
2930b57cec5SDimitry Andric if (F->getIntrinsicID() == Intrinsic::prefetch)
2940b57cec5SDimitry Andric return MadeChange;
2955ffd83dbSDimitry Andric if (TTI->isLoweredToCall(F))
2965ffd83dbSDimitry Andric HasCall = true;
2975ffd83dbSDimitry Andric } else { // indirect call.
2985ffd83dbSDimitry Andric HasCall = true;
2995ffd83dbSDimitry Andric }
3005ffd83dbSDimitry Andric }
3015ffd83dbSDimitry Andric }
3020b57cec5SDimitry Andric Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
3030b57cec5SDimitry Andric }
30481ad6265SDimitry Andric
30581ad6265SDimitry Andric if (!Metrics.NumInsts.isValid())
30681ad6265SDimitry Andric return MadeChange;
30781ad6265SDimitry Andric
30881ad6265SDimitry Andric unsigned LoopSize = *Metrics.NumInsts.getValue();
3090b57cec5SDimitry Andric if (!LoopSize)
3100b57cec5SDimitry Andric LoopSize = 1;
3110b57cec5SDimitry Andric
3120b57cec5SDimitry Andric unsigned ItersAhead = getPrefetchDistance() / LoopSize;
3130b57cec5SDimitry Andric if (!ItersAhead)
3140b57cec5SDimitry Andric ItersAhead = 1;
3150b57cec5SDimitry Andric
3160b57cec5SDimitry Andric if (ItersAhead > getMaxPrefetchIterationsAhead())
3170b57cec5SDimitry Andric return MadeChange;
3180b57cec5SDimitry Andric
3195ffd83dbSDimitry Andric unsigned ConstantMaxTripCount = SE->getSmallConstantMaxTripCount(L);
3205ffd83dbSDimitry Andric if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
3215ffd83dbSDimitry Andric return MadeChange;
3220b57cec5SDimitry Andric
3235ffd83dbSDimitry Andric unsigned NumMemAccesses = 0;
3245ffd83dbSDimitry Andric unsigned NumStridedMemAccesses = 0;
3255ffd83dbSDimitry Andric SmallVector<Prefetch, 16> Prefetches;
3265ffd83dbSDimitry Andric for (const auto BB : L->blocks())
3270b57cec5SDimitry Andric for (auto &I : *BB) {
3280b57cec5SDimitry Andric Value *PtrValue;
3290b57cec5SDimitry Andric Instruction *MemI;
3300b57cec5SDimitry Andric
3310b57cec5SDimitry Andric if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
3320b57cec5SDimitry Andric MemI = LMemI;
3330b57cec5SDimitry Andric PtrValue = LMemI->getPointerOperand();
3340b57cec5SDimitry Andric } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
3355ffd83dbSDimitry Andric if (!doPrefetchWrites()) continue;
3360b57cec5SDimitry Andric MemI = SMemI;
3370b57cec5SDimitry Andric PtrValue = SMemI->getPointerOperand();
3380b57cec5SDimitry Andric } else continue;
3390b57cec5SDimitry Andric
3400b57cec5SDimitry Andric unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
341bdd1243dSDimitry Andric if (!TTI->shouldPrefetchAddressSpace(PtrAddrSpace))
3420b57cec5SDimitry Andric continue;
3435ffd83dbSDimitry Andric NumMemAccesses++;
3440b57cec5SDimitry Andric if (L->isLoopInvariant(PtrValue))
3450b57cec5SDimitry Andric continue;
3460b57cec5SDimitry Andric
3470b57cec5SDimitry Andric const SCEV *LSCEV = SE->getSCEV(PtrValue);
3480b57cec5SDimitry Andric const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
3490b57cec5SDimitry Andric if (!LSCEVAddRec)
3500b57cec5SDimitry Andric continue;
3515ffd83dbSDimitry Andric NumStridedMemAccesses++;
3520b57cec5SDimitry Andric
3535ffd83dbSDimitry Andric // We don't want to double prefetch individual cache lines. If this
3545ffd83dbSDimitry Andric // access is known to be within one cache line of some other one that
3555ffd83dbSDimitry Andric // has already been prefetched, then don't prefetch this one as well.
3560b57cec5SDimitry Andric bool DupPref = false;
3575ffd83dbSDimitry Andric for (auto &Pref : Prefetches) {
3585ffd83dbSDimitry Andric const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
3590b57cec5SDimitry Andric if (const SCEVConstant *ConstPtrDiff =
3600b57cec5SDimitry Andric dyn_cast<SCEVConstant>(PtrDiff)) {
3610b57cec5SDimitry Andric int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
3620b57cec5SDimitry Andric if (PD < (int64_t) TTI->getCacheLineSize()) {
3635ffd83dbSDimitry Andric Pref.addInstruction(MemI, DT, PD);
3640b57cec5SDimitry Andric DupPref = true;
3650b57cec5SDimitry Andric break;
3660b57cec5SDimitry Andric }
3670b57cec5SDimitry Andric }
3680b57cec5SDimitry Andric }
3695ffd83dbSDimitry Andric if (!DupPref)
3705ffd83dbSDimitry Andric Prefetches.push_back(Prefetch(LSCEVAddRec, MemI));
3715ffd83dbSDimitry Andric }
3725ffd83dbSDimitry Andric
3735ffd83dbSDimitry Andric unsigned TargetMinStride =
3745ffd83dbSDimitry Andric getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
3755ffd83dbSDimitry Andric Prefetches.size(), HasCall);
3765ffd83dbSDimitry Andric
3775ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Prefetching " << ItersAhead
3785ffd83dbSDimitry Andric << " iterations ahead (loop size: " << LoopSize << ") in "
3795ffd83dbSDimitry Andric << L->getHeader()->getParent()->getName() << ": " << *L);
3805ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "Loop has: "
3815ffd83dbSDimitry Andric << NumMemAccesses << " memory accesses, "
3825ffd83dbSDimitry Andric << NumStridedMemAccesses << " strided memory accesses, "
3835ffd83dbSDimitry Andric << Prefetches.size() << " potential prefetch(es), "
3845ffd83dbSDimitry Andric << "a minimum stride of " << TargetMinStride << ", "
3855ffd83dbSDimitry Andric << (HasCall ? "calls" : "no calls") << ".\n");
3865ffd83dbSDimitry Andric
3875ffd83dbSDimitry Andric for (auto &P : Prefetches) {
3885ffd83dbSDimitry Andric // Check if the stride of the accesses is large enough to warrant a
3895ffd83dbSDimitry Andric // prefetch.
3905ffd83dbSDimitry Andric if (!isStrideLargeEnough(P.LSCEVAddRec, TargetMinStride))
3910b57cec5SDimitry Andric continue;
3920b57cec5SDimitry Andric
393fcaf7f86SDimitry Andric BasicBlock *BB = P.InsertPt->getParent();
394*0fca6ea1SDimitry Andric SCEVExpander SCEVE(*SE, BB->getDataLayout(), "prefaddr");
3955ffd83dbSDimitry Andric const SCEV *NextLSCEV = SE->getAddExpr(P.LSCEVAddRec, SE->getMulExpr(
3965ffd83dbSDimitry Andric SE->getConstant(P.LSCEVAddRec->getType(), ItersAhead),
3975ffd83dbSDimitry Andric P.LSCEVAddRec->getStepRecurrence(*SE)));
398fcaf7f86SDimitry Andric if (!SCEVE.isSafeToExpand(NextLSCEV))
3990b57cec5SDimitry Andric continue;
4000b57cec5SDimitry Andric
401bdd1243dSDimitry Andric unsigned PtrAddrSpace = NextLSCEV->getType()->getPointerAddressSpace();
4025f757f3fSDimitry Andric Type *I8Ptr = PointerType::get(BB->getContext(), PtrAddrSpace);
4035ffd83dbSDimitry Andric Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, P.InsertPt);
4040b57cec5SDimitry Andric
4055ffd83dbSDimitry Andric IRBuilder<> Builder(P.InsertPt);
4060b57cec5SDimitry Andric Module *M = BB->getParent()->getParent();
4070b57cec5SDimitry Andric Type *I32 = Type::getInt32Ty(BB->getContext());
4088bcb0991SDimitry Andric Function *PrefetchFunc = Intrinsic::getDeclaration(
4098bcb0991SDimitry Andric M, Intrinsic::prefetch, PrefPtrValue->getType());
4100b57cec5SDimitry Andric Builder.CreateCall(
4110b57cec5SDimitry Andric PrefetchFunc,
4120b57cec5SDimitry Andric {PrefPtrValue,
4135ffd83dbSDimitry Andric ConstantInt::get(I32, P.Writes),
4140b57cec5SDimitry Andric ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});
4150b57cec5SDimitry Andric ++NumPrefetches;
4165ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << " Access: "
4175ffd83dbSDimitry Andric << *P.MemI->getOperand(isa<LoadInst>(P.MemI) ? 0 : 1)
4185ffd83dbSDimitry Andric << ", SCEV: " << *P.LSCEVAddRec << "\n");
4190b57cec5SDimitry Andric ORE->emit([&]() {
4205ffd83dbSDimitry Andric return OptimizationRemark(DEBUG_TYPE, "Prefetched", P.MemI)
4210b57cec5SDimitry Andric << "prefetched memory access";
4220b57cec5SDimitry Andric });
4230b57cec5SDimitry Andric
4240b57cec5SDimitry Andric MadeChange = true;
4250b57cec5SDimitry Andric }
4260b57cec5SDimitry Andric
4270b57cec5SDimitry Andric return MadeChange;
4280b57cec5SDimitry Andric }
429