10b57cec5SDimitry Andric //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass performs loop invariant code motion, attempting to remove as much
100b57cec5SDimitry Andric // code from the body of a loop as possible. It does this by either hoisting
110b57cec5SDimitry Andric // code into the preheader block, or by sinking code to the exit blocks if it is
120b57cec5SDimitry Andric // safe. This pass also promotes must-aliased memory locations in the loop to
130b57cec5SDimitry Andric // live in registers, thus hoisting and sinking "invariant" loads and stores.
140b57cec5SDimitry Andric //
15e8d8bef9SDimitry Andric // Hoisting operations out of loops is a canonicalization transform. It
16e8d8bef9SDimitry Andric // enables and simplifies subsequent optimizations in the middle-end.
17e8d8bef9SDimitry Andric // Rematerialization of hoisted instructions to reduce register pressure is the
18e8d8bef9SDimitry Andric // responsibility of the back-end, which has more accurate information about
19e8d8bef9SDimitry Andric // register pressure and also handles other optimizations than LICM that
20e8d8bef9SDimitry Andric // increase live-ranges.
21e8d8bef9SDimitry Andric //
220b57cec5SDimitry Andric // This pass uses alias analysis for two purposes:
230b57cec5SDimitry Andric //
240b57cec5SDimitry Andric // 1. Moving loop invariant loads and calls out of loops. If we can determine
250b57cec5SDimitry Andric // that a load or call inside of a loop never aliases anything stored to,
260b57cec5SDimitry Andric // we can hoist it or sink it like any other instruction.
270b57cec5SDimitry Andric // 2. Scalar Promotion of Memory - If there is a store instruction inside of
280b57cec5SDimitry Andric // the loop, we try to move the store to happen AFTER the loop instead of
290b57cec5SDimitry Andric // inside of the loop. This can only happen if a few conditions are true:
300b57cec5SDimitry Andric // A. The pointer stored through is loop invariant
310b57cec5SDimitry Andric // B. There are no stores or loads in the loop which _may_ alias the
320b57cec5SDimitry Andric // pointer. There are no calls in the loop which mod/ref the pointer.
330b57cec5SDimitry Andric // If these conditions are true, we can promote the loads and stores in the
340b57cec5SDimitry Andric // loop of the pointer to use a temporary alloca'd variable. We then use
350b57cec5SDimitry Andric // the SSAUpdater to construct the appropriate SSA form for the value.
360b57cec5SDimitry Andric //
370b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LICM.h"
4081ad6265SDimitry Andric #include "llvm/ADT/PriorityWorklist.h"
410b57cec5SDimitry Andric #include "llvm/ADT/SetOperations.h"
420b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
430b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
440b57cec5SDimitry Andric #include "llvm/Analysis/AliasSetTracker.h"
45bdd1243dSDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
460b57cec5SDimitry Andric #include "llvm/Analysis/CaptureTracking.h"
470b57cec5SDimitry Andric #include "llvm/Analysis/GuardUtils.h"
48e8d8bef9SDimitry Andric #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
490b57cec5SDimitry Andric #include "llvm/Analysis/Loads.h"
500b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
510b57cec5SDimitry Andric #include "llvm/Analysis/LoopIterator.h"
5281ad6265SDimitry Andric #include "llvm/Analysis/LoopNestAnalysis.h"
530b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h"
540b57cec5SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
550b57cec5SDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h"
565ffd83dbSDimitry Andric #include "llvm/Analysis/MustExecute.h"
570b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
580b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
590b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
6081ad6265SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
610b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
620b57cec5SDimitry Andric #include "llvm/IR/CFG.h"
630b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
640b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
650b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
660b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
670b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
680b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
690b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
7006c3fb27SDimitry Andric #include "llvm/IR/IRBuilder.h"
710b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h"
720b57cec5SDimitry Andric #include "llvm/IR/Metadata.h"
730b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h"
740b57cec5SDimitry Andric #include "llvm/IR/PredIteratorCache.h"
75480093f4SDimitry Andric #include "llvm/InitializePasses.h"
760b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
770b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
780b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
79bdd1243dSDimitry Andric #include "llvm/Target/TargetOptions.h"
800b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
815ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
820b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
830b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
840b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
850b57cec5SDimitry Andric #include "llvm/Transforms/Utils/SSAUpdater.h"
860b57cec5SDimitry Andric #include <algorithm>
870b57cec5SDimitry Andric #include <utility>
880b57cec5SDimitry Andric using namespace llvm;
890b57cec5SDimitry Andric
9081ad6265SDimitry Andric namespace llvm {
9181ad6265SDimitry Andric class LPMUpdater;
9281ad6265SDimitry Andric } // namespace llvm
9381ad6265SDimitry Andric
940b57cec5SDimitry Andric #define DEBUG_TYPE "licm"
950b57cec5SDimitry Andric
960b57cec5SDimitry Andric STATISTIC(NumCreatedBlocks, "Number of blocks created");
970b57cec5SDimitry Andric STATISTIC(NumClonedBranches, "Number of branches cloned");
980b57cec5SDimitry Andric STATISTIC(NumSunk, "Number of instructions sunk out of loop");
990b57cec5SDimitry Andric STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
1000b57cec5SDimitry Andric STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
1010b57cec5SDimitry Andric STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
102bdd1243dSDimitry Andric STATISTIC(NumPromotionCandidates, "Number of promotion candidates");
103bdd1243dSDimitry Andric STATISTIC(NumLoadPromoted, "Number of load-only promotions");
104bdd1243dSDimitry Andric STATISTIC(NumLoadStorePromoted, "Number of load and store promotions");
10506c3fb27SDimitry Andric STATISTIC(NumMinMaxHoisted,
10606c3fb27SDimitry Andric "Number of min/max expressions hoisted out of the loop");
10706c3fb27SDimitry Andric STATISTIC(NumGEPsHoisted,
10806c3fb27SDimitry Andric "Number of geps reassociated and hoisted out of the loop");
10906c3fb27SDimitry Andric STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
11006c3fb27SDimitry Andric "and hoisted out of the loop");
1115f757f3fSDimitry Andric STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
1125f757f3fSDimitry Andric "reassociated and hoisted out of the loop");
1130fca6ea1SDimitry Andric STATISTIC(NumIntAssociationsHoisted,
1140fca6ea1SDimitry Andric "Number of invariant int expressions "
1150fca6ea1SDimitry Andric "reassociated and hoisted out of the loop");
1160b57cec5SDimitry Andric
1170b57cec5SDimitry Andric /// Memory promotion is enabled by default.
1180b57cec5SDimitry Andric static cl::opt<bool>
1190b57cec5SDimitry Andric DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
1200b57cec5SDimitry Andric cl::desc("Disable memory promotion in LICM pass"));
1210b57cec5SDimitry Andric
1220b57cec5SDimitry Andric static cl::opt<bool> ControlFlowHoisting(
1230b57cec5SDimitry Andric "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
1240b57cec5SDimitry Andric cl::desc("Enable control flow (and PHI) hoisting in LICM"));
1250b57cec5SDimitry Andric
126bdd1243dSDimitry Andric static cl::opt<bool>
127bdd1243dSDimitry Andric SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
128bdd1243dSDimitry Andric cl::desc("Force thread model single in LICM pass"));
129bdd1243dSDimitry Andric
1300b57cec5SDimitry Andric static cl::opt<uint32_t> MaxNumUsesTraversed(
1310b57cec5SDimitry Andric "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
1320b57cec5SDimitry Andric cl::desc("Max num uses visited for identifying load "
1330b57cec5SDimitry Andric "invariance in loop using invariant start (default = 8)"));
1340b57cec5SDimitry Andric
1355f757f3fSDimitry Andric static cl::opt<unsigned> FPAssociationUpperLimit(
1365f757f3fSDimitry Andric "licm-max-num-fp-reassociations", cl::init(5U), cl::Hidden,
1375f757f3fSDimitry Andric cl::desc(
1385f757f3fSDimitry Andric "Set upper limit for the number of transformations performed "
1395f757f3fSDimitry Andric "during a single round of hoisting the reassociated expressions."));
1405f757f3fSDimitry Andric
1410fca6ea1SDimitry Andric cl::opt<unsigned> IntAssociationUpperLimit(
1420fca6ea1SDimitry Andric "licm-max-num-int-reassociations", cl::init(5U), cl::Hidden,
1430fca6ea1SDimitry Andric cl::desc(
1440fca6ea1SDimitry Andric "Set upper limit for the number of transformations performed "
1450fca6ea1SDimitry Andric "during a single round of hoisting the reassociated expressions."));
1460fca6ea1SDimitry Andric
1470b57cec5SDimitry Andric // Experimental option to allow imprecision in LICM in pathological cases, in
1480b57cec5SDimitry Andric // exchange for faster compile. This is to be removed if MemorySSA starts to
14981ad6265SDimitry Andric // address the same issue. LICM calls MemorySSAWalker's
1500b57cec5SDimitry Andric // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
1510b57cec5SDimitry Andric // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
1520b57cec5SDimitry Andric // which may not be precise, since optimizeUses is capped. The result is
1530b57cec5SDimitry Andric // correct, but we may not get as "far up" as possible to get which access is
1540b57cec5SDimitry Andric // clobbering the one queried.
1550b57cec5SDimitry Andric cl::opt<unsigned> llvm::SetLicmMssaOptCap(
1560b57cec5SDimitry Andric "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
1570b57cec5SDimitry Andric cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
1580b57cec5SDimitry Andric "for faster compile. Caps the MemorySSA clobbering calls."));
1590b57cec5SDimitry Andric
1600b57cec5SDimitry Andric // Experimentally, memory promotion carries less importance than sinking and
1610b57cec5SDimitry Andric // hoisting. Limit when we do promotion when using MemorySSA, in order to save
1620b57cec5SDimitry Andric // compile time.
1630b57cec5SDimitry Andric cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
1640b57cec5SDimitry Andric "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
1650b57cec5SDimitry Andric cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
1660b57cec5SDimitry Andric "effect. When MSSA in LICM is enabled, then this is the maximum "
1670b57cec5SDimitry Andric "number of accesses allowed to be present in a loop in order to "
1680b57cec5SDimitry Andric "enable memory promotion."));
1690b57cec5SDimitry Andric
1700b57cec5SDimitry Andric static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
17106c3fb27SDimitry Andric static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1720b57cec5SDimitry Andric const LoopSafetyInfo *SafetyInfo,
17306c3fb27SDimitry Andric TargetTransformInfo *TTI,
17406c3fb27SDimitry Andric bool &FoldableInLoop, bool LoopNestMode);
1750b57cec5SDimitry Andric static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1760b57cec5SDimitry Andric BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
17781ad6265SDimitry Andric MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
178480093f4SDimitry Andric OptimizationRemarkEmitter *ORE);
1790b57cec5SDimitry Andric static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
180bdd1243dSDimitry Andric const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
181bdd1243dSDimitry Andric MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE);
182fb03ea46SDimitry Andric static bool isSafeToExecuteUnconditionally(
183fb03ea46SDimitry Andric Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
184fb03ea46SDimitry Andric const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
185fb03ea46SDimitry Andric OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
186bdd1243dSDimitry Andric AssumptionCache *AC, bool AllowSpeculation);
18781ad6265SDimitry Andric static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
188e8d8bef9SDimitry Andric Loop *CurLoop, Instruction &I,
18906c3fb27SDimitry Andric SinkAndHoistLICMFlags &Flags,
19006c3fb27SDimitry Andric bool InvariantGroup);
19181ad6265SDimitry Andric static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
192e8d8bef9SDimitry Andric MemoryUse &MU);
19306c3fb27SDimitry Andric /// Aggregates various functions for hoisting computations out of loop.
19406c3fb27SDimitry Andric static bool hoistArithmetics(Instruction &I, Loop &L,
19506c3fb27SDimitry Andric ICFLoopSafetyInfo &SafetyInfo,
19606c3fb27SDimitry Andric MemorySSAUpdater &MSSAU, AssumptionCache *AC,
19706c3fb27SDimitry Andric DominatorTree *DT);
1985ffd83dbSDimitry Andric static Instruction *cloneInstructionInExitBlock(
1990b57cec5SDimitry Andric Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
20081ad6265SDimitry Andric const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
2010b57cec5SDimitry Andric
2020b57cec5SDimitry Andric static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
20381ad6265SDimitry Andric MemorySSAUpdater &MSSAU);
2040b57cec5SDimitry Andric
2055f757f3fSDimitry Andric static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
2060b57cec5SDimitry Andric ICFLoopSafetyInfo &SafetyInfo,
20781ad6265SDimitry Andric MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
2080b57cec5SDimitry Andric
209fe6060f1SDimitry Andric static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
210fe6060f1SDimitry Andric function_ref<void(Instruction *)> Fn);
211bdd1243dSDimitry Andric using PointersAndHasReadsOutsideSet =
212bdd1243dSDimitry Andric std::pair<SmallSetVector<Value *, 8>, bool>;
213bdd1243dSDimitry Andric static SmallVector<PointersAndHasReadsOutsideSet, 0>
214fe6060f1SDimitry Andric collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
215fe6060f1SDimitry Andric
2160b57cec5SDimitry Andric namespace {
2170b57cec5SDimitry Andric struct LoopInvariantCodeMotion {
2185ffd83dbSDimitry Andric bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
219bdd1243dSDimitry Andric AssumptionCache *AC, TargetLibraryInfo *TLI,
220e8d8bef9SDimitry Andric TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
221fe6060f1SDimitry Andric OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
2220b57cec5SDimitry Andric
LoopInvariantCodeMotion__anon9998ac180111::LoopInvariantCodeMotion2230b57cec5SDimitry Andric LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
224fb03ea46SDimitry Andric unsigned LicmMssaNoAccForPromotionCap,
225fb03ea46SDimitry Andric bool LicmAllowSpeculation)
2260b57cec5SDimitry Andric : LicmMssaOptCap(LicmMssaOptCap),
227fb03ea46SDimitry Andric LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
228fb03ea46SDimitry Andric LicmAllowSpeculation(LicmAllowSpeculation) {}
2290b57cec5SDimitry Andric
2300b57cec5SDimitry Andric private:
2310b57cec5SDimitry Andric unsigned LicmMssaOptCap;
2320b57cec5SDimitry Andric unsigned LicmMssaNoAccForPromotionCap;
233fb03ea46SDimitry Andric bool LicmAllowSpeculation;
2340b57cec5SDimitry Andric };
2350b57cec5SDimitry Andric
2360b57cec5SDimitry Andric struct LegacyLICMPass : public LoopPass {
2370b57cec5SDimitry Andric static char ID; // Pass identification, replacement for typeid
LegacyLICMPass__anon9998ac180111::LegacyLICMPass2380b57cec5SDimitry Andric LegacyLICMPass(
2390b57cec5SDimitry Andric unsigned LicmMssaOptCap = SetLicmMssaOptCap,
240fb03ea46SDimitry Andric unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
241fb03ea46SDimitry Andric bool LicmAllowSpeculation = true)
242fb03ea46SDimitry Andric : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
243fb03ea46SDimitry Andric LicmAllowSpeculation) {
2440b57cec5SDimitry Andric initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
2450b57cec5SDimitry Andric }
2460b57cec5SDimitry Andric
runOnLoop__anon9998ac180111::LegacyLICMPass2470b57cec5SDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2485ffd83dbSDimitry Andric if (skipLoop(L))
2490b57cec5SDimitry Andric return false;
2500b57cec5SDimitry Andric
251e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
252e8d8bef9SDimitry Andric << L->getHeader()->getNameOrAsOperand() << "\n");
253e8d8bef9SDimitry Andric
254bdd1243dSDimitry Andric Function *F = L->getHeader()->getParent();
255bdd1243dSDimitry Andric
2560b57cec5SDimitry Andric auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
257349cc55cSDimitry Andric MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
2580b57cec5SDimitry Andric // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
2590b57cec5SDimitry Andric // pass. Function analyses need to be preserved across loop transformations
2600b57cec5SDimitry Andric // but ORE cannot be preserved (see comment before the pass definition).
2610b57cec5SDimitry Andric OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
262e8d8bef9SDimitry Andric return LICM.runOnLoop(
263e8d8bef9SDimitry Andric L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
2640b57cec5SDimitry Andric &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
265bdd1243dSDimitry Andric &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
266bdd1243dSDimitry Andric &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F),
267bdd1243dSDimitry Andric &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(*F),
268bdd1243dSDimitry Andric &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F),
2695ffd83dbSDimitry Andric SE ? &SE->getSE() : nullptr, MSSA, &ORE);
2700b57cec5SDimitry Andric }
2710b57cec5SDimitry Andric
2720b57cec5SDimitry Andric /// This transformation requires natural loop information & requires that
2730b57cec5SDimitry Andric /// loop preheaders be inserted into the CFG...
2740b57cec5SDimitry Andric ///
getAnalysisUsage__anon9998ac180111::LegacyLICMPass2750b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
2760b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
2770b57cec5SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>();
2780b57cec5SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>();
2790b57cec5SDimitry Andric AU.addRequired<MemorySSAWrapperPass>();
2800b57cec5SDimitry Andric AU.addPreserved<MemorySSAWrapperPass>();
2810b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
282bdd1243dSDimitry Andric AU.addRequired<AssumptionCacheTracker>();
2830b57cec5SDimitry Andric getLoopAnalysisUsage(AU);
284e8d8bef9SDimitry Andric LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
285e8d8bef9SDimitry Andric AU.addPreserved<LazyBlockFrequencyInfoPass>();
286e8d8bef9SDimitry Andric AU.addPreserved<LazyBranchProbabilityInfoPass>();
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric
2890b57cec5SDimitry Andric private:
2900b57cec5SDimitry Andric LoopInvariantCodeMotion LICM;
2910b57cec5SDimitry Andric };
2920b57cec5SDimitry Andric } // namespace
2930b57cec5SDimitry Andric
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)2940b57cec5SDimitry Andric PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
2950b57cec5SDimitry Andric LoopStandardAnalysisResults &AR, LPMUpdater &) {
296349cc55cSDimitry Andric if (!AR.MSSA)
297bdd1243dSDimitry Andric report_fatal_error("LICM requires MemorySSA (loop-mssa)",
298bdd1243dSDimitry Andric /*GenCrashDiag*/false);
299349cc55cSDimitry Andric
3005ffd83dbSDimitry Andric // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
3015ffd83dbSDimitry Andric // pass. Function analyses need to be preserved across loop transformations
3025ffd83dbSDimitry Andric // but ORE cannot be preserved (see comment before the pass definition).
3035ffd83dbSDimitry Andric OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
3040b57cec5SDimitry Andric
30581ad6265SDimitry Andric LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
30681ad6265SDimitry Andric Opts.AllowSpeculation);
307bdd1243dSDimitry Andric if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.AC, &AR.TLI, &AR.TTI,
308e8d8bef9SDimitry Andric &AR.SE, AR.MSSA, &ORE))
3090b57cec5SDimitry Andric return PreservedAnalyses::all();
3100b57cec5SDimitry Andric
3110b57cec5SDimitry Andric auto PA = getLoopPassPreservedAnalyses();
3120b57cec5SDimitry Andric PA.preserve<MemorySSAAnalysis>();
3130b57cec5SDimitry Andric
3140b57cec5SDimitry Andric return PA;
3150b57cec5SDimitry Andric }
3160b57cec5SDimitry Andric
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)31781ad6265SDimitry Andric void LICMPass::printPipeline(
31881ad6265SDimitry Andric raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
31981ad6265SDimitry Andric static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
32081ad6265SDimitry Andric OS, MapClassName2PassName);
32181ad6265SDimitry Andric
32206c3fb27SDimitry Andric OS << '<';
32381ad6265SDimitry Andric OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
32406c3fb27SDimitry Andric OS << '>';
32581ad6265SDimitry Andric }
32681ad6265SDimitry Andric
run(LoopNest & LN,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)327fe6060f1SDimitry Andric PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
328fe6060f1SDimitry Andric LoopStandardAnalysisResults &AR,
329fe6060f1SDimitry Andric LPMUpdater &) {
330349cc55cSDimitry Andric if (!AR.MSSA)
331bdd1243dSDimitry Andric report_fatal_error("LNICM requires MemorySSA (loop-mssa)",
332bdd1243dSDimitry Andric /*GenCrashDiag*/false);
333349cc55cSDimitry Andric
334fe6060f1SDimitry Andric // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
335fe6060f1SDimitry Andric // pass. Function analyses need to be preserved across loop transformations
336fe6060f1SDimitry Andric // but ORE cannot be preserved (see comment before the pass definition).
337fe6060f1SDimitry Andric OptimizationRemarkEmitter ORE(LN.getParent());
338fe6060f1SDimitry Andric
33981ad6265SDimitry Andric LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
34081ad6265SDimitry Andric Opts.AllowSpeculation);
341fe6060f1SDimitry Andric
342fe6060f1SDimitry Andric Loop &OutermostLoop = LN.getOutermostLoop();
343bdd1243dSDimitry Andric bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, &AR.AC,
344fe6060f1SDimitry Andric &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
345fe6060f1SDimitry Andric
346fe6060f1SDimitry Andric if (!Changed)
347fe6060f1SDimitry Andric return PreservedAnalyses::all();
348fe6060f1SDimitry Andric
349fe6060f1SDimitry Andric auto PA = getLoopPassPreservedAnalyses();
350fe6060f1SDimitry Andric
351fe6060f1SDimitry Andric PA.preserve<DominatorTreeAnalysis>();
352fe6060f1SDimitry Andric PA.preserve<LoopAnalysis>();
353fe6060f1SDimitry Andric PA.preserve<MemorySSAAnalysis>();
354fe6060f1SDimitry Andric
355fe6060f1SDimitry Andric return PA;
356fe6060f1SDimitry Andric }
357fe6060f1SDimitry Andric
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)35881ad6265SDimitry Andric void LNICMPass::printPipeline(
35981ad6265SDimitry Andric raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
36081ad6265SDimitry Andric static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
36181ad6265SDimitry Andric OS, MapClassName2PassName);
36281ad6265SDimitry Andric
36306c3fb27SDimitry Andric OS << '<';
36481ad6265SDimitry Andric OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
36506c3fb27SDimitry Andric OS << '>';
36681ad6265SDimitry Andric }
36781ad6265SDimitry Andric
3680b57cec5SDimitry Andric char LegacyLICMPass::ID = 0;
3690b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
3700b57cec5SDimitry Andric false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)3710b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopPass)
3720b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
3730b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
3740b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
375e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
3760b57cec5SDimitry Andric INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
3770b57cec5SDimitry Andric false)
3780b57cec5SDimitry Andric
3790b57cec5SDimitry Andric Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
3800b57cec5SDimitry Andric
SinkAndHoistLICMFlags(bool IsSink,Loop & L,MemorySSA & MSSA)38106c3fb27SDimitry Andric llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop &L,
38206c3fb27SDimitry Andric MemorySSA &MSSA)
383e8d8bef9SDimitry Andric : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
384e8d8bef9SDimitry Andric IsSink, L, MSSA) {}
385e8d8bef9SDimitry Andric
SinkAndHoistLICMFlags(unsigned LicmMssaOptCap,unsigned LicmMssaNoAccForPromotionCap,bool IsSink,Loop & L,MemorySSA & MSSA)386e8d8bef9SDimitry Andric llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
387e8d8bef9SDimitry Andric unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
38806c3fb27SDimitry Andric Loop &L, MemorySSA &MSSA)
389e8d8bef9SDimitry Andric : LicmMssaOptCap(LicmMssaOptCap),
390e8d8bef9SDimitry Andric LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
391e8d8bef9SDimitry Andric IsSink(IsSink) {
392e8d8bef9SDimitry Andric unsigned AccessCapCount = 0;
39306c3fb27SDimitry Andric for (auto *BB : L.getBlocks())
39406c3fb27SDimitry Andric if (const auto *Accesses = MSSA.getBlockAccesses(BB))
395e8d8bef9SDimitry Andric for (const auto &MA : *Accesses) {
396e8d8bef9SDimitry Andric (void)MA;
397e8d8bef9SDimitry Andric ++AccessCapCount;
398e8d8bef9SDimitry Andric if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
399e8d8bef9SDimitry Andric NoOfMemAccTooLarge = true;
400e8d8bef9SDimitry Andric return;
401e8d8bef9SDimitry Andric }
402e8d8bef9SDimitry Andric }
403e8d8bef9SDimitry Andric }
404e8d8bef9SDimitry Andric
4050b57cec5SDimitry Andric /// Hoist expressions out of the specified loop. Note, alias info for inner
4060b57cec5SDimitry Andric /// loop is not preserved so it is not a good idea to run LICM multiple
4070b57cec5SDimitry Andric /// times on one loop.
runOnLoop(Loop * L,AAResults * AA,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,ScalarEvolution * SE,MemorySSA * MSSA,OptimizationRemarkEmitter * ORE,bool LoopNestMode)408bdd1243dSDimitry Andric bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
409bdd1243dSDimitry Andric DominatorTree *DT, AssumptionCache *AC,
410bdd1243dSDimitry Andric TargetLibraryInfo *TLI,
411bdd1243dSDimitry Andric TargetTransformInfo *TTI,
412bdd1243dSDimitry Andric ScalarEvolution *SE, MemorySSA *MSSA,
413bdd1243dSDimitry Andric OptimizationRemarkEmitter *ORE,
414fe6060f1SDimitry Andric bool LoopNestMode) {
4150b57cec5SDimitry Andric bool Changed = false;
4160b57cec5SDimitry Andric
4170b57cec5SDimitry Andric assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
4180b57cec5SDimitry Andric
4198bcb0991SDimitry Andric // If this loop has metadata indicating that LICM is not to be performed then
4208bcb0991SDimitry Andric // just exit.
4218bcb0991SDimitry Andric if (hasDisableLICMTransformsHint(L)) {
4228bcb0991SDimitry Andric return false;
4238bcb0991SDimitry Andric }
4248bcb0991SDimitry Andric
425fe6060f1SDimitry Andric // Don't sink stores from loops with coroutine suspend instructions.
426fe6060f1SDimitry Andric // LICM would sink instructions into the default destination of
427fe6060f1SDimitry Andric // the coroutine switch. The default destination of the switch is to
428fe6060f1SDimitry Andric // handle the case where the coroutine is suspended, by which point the
429fe6060f1SDimitry Andric // coroutine frame may have been destroyed. No instruction can be sunk there.
430fe6060f1SDimitry Andric // FIXME: This would unfortunately hurt the performance of coroutines, however
431fe6060f1SDimitry Andric // there is currently no general solution for this. Similar issues could also
432fe6060f1SDimitry Andric // potentially happen in other passes where instructions are being moved
433fe6060f1SDimitry Andric // across that edge.
434fe6060f1SDimitry Andric bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
435fe6060f1SDimitry Andric return llvm::any_of(*BB, [](Instruction &I) {
436fe6060f1SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
437fe6060f1SDimitry Andric return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
438fe6060f1SDimitry Andric });
439fe6060f1SDimitry Andric });
440fe6060f1SDimitry Andric
441349cc55cSDimitry Andric MemorySSAUpdater MSSAU(MSSA);
442349cc55cSDimitry Andric SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
44306c3fb27SDimitry Andric /*IsSink=*/true, *L, *MSSA);
4440b57cec5SDimitry Andric
4450b57cec5SDimitry Andric // Get the preheader block to move instructions into...
4460b57cec5SDimitry Andric BasicBlock *Preheader = L->getLoopPreheader();
4470b57cec5SDimitry Andric
4480b57cec5SDimitry Andric // Compute loop safety information.
4495ffd83dbSDimitry Andric ICFLoopSafetyInfo SafetyInfo;
4500b57cec5SDimitry Andric SafetyInfo.computeLoopSafetyInfo(L);
4510b57cec5SDimitry Andric
4520b57cec5SDimitry Andric // We want to visit all of the instructions in this loop... that are not parts
4530b57cec5SDimitry Andric // of our subloops (they have already had their invariants hoisted out of
4540b57cec5SDimitry Andric // their loop, into this loop, so there is no need to process the BODIES of
4550b57cec5SDimitry Andric // the subloops).
4560b57cec5SDimitry Andric //
4570b57cec5SDimitry Andric // Traverse the body of the loop in depth first order on the dominator tree so
4580b57cec5SDimitry Andric // that we are guaranteed to see definitions before we see uses. This allows
4590b57cec5SDimitry Andric // us to sink instructions in one pass, without iteration. After sinking
4600b57cec5SDimitry Andric // instructions, we perform another pass to hoist them out of the loop.
4610b57cec5SDimitry Andric if (L->hasDedicatedExits())
462bdd1243dSDimitry Andric Changed |=
463bdd1243dSDimitry Andric LoopNestMode
464bdd1243dSDimitry Andric ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT,
465bdd1243dSDimitry Andric TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
466bdd1243dSDimitry Andric : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
467bdd1243dSDimitry Andric MSSAU, &SafetyInfo, Flags, ORE);
468349cc55cSDimitry Andric Flags.setIsSink(false);
469e8d8bef9SDimitry Andric if (Preheader)
470bdd1243dSDimitry Andric Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
47181ad6265SDimitry Andric MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
472fb03ea46SDimitry Andric LicmAllowSpeculation);
4730b57cec5SDimitry Andric
4740b57cec5SDimitry Andric // Now that all loop invariants have been removed from the loop, promote any
4750b57cec5SDimitry Andric // memory references to scalars that we can.
4760b57cec5SDimitry Andric // Don't sink stores from loops without dedicated block exits. Exits
4770b57cec5SDimitry Andric // containing indirect branches are not transformed by loop simplify,
4780b57cec5SDimitry Andric // make sure we catch that. An additional load may be generated in the
4790b57cec5SDimitry Andric // preheader for SSA updater, so also avoid sinking when no preheader
4800b57cec5SDimitry Andric // is available.
4810b57cec5SDimitry Andric if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
482349cc55cSDimitry Andric !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
4830b57cec5SDimitry Andric // Figure out the loop exits and their insertion points
4840b57cec5SDimitry Andric SmallVector<BasicBlock *, 8> ExitBlocks;
4850b57cec5SDimitry Andric L->getUniqueExitBlocks(ExitBlocks);
4860b57cec5SDimitry Andric
4870b57cec5SDimitry Andric // We can't insert into a catchswitch.
4880b57cec5SDimitry Andric bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
4890b57cec5SDimitry Andric return isa<CatchSwitchInst>(Exit->getTerminator());
4900b57cec5SDimitry Andric });
4910b57cec5SDimitry Andric
4920b57cec5SDimitry Andric if (!HasCatchSwitch) {
4935f757f3fSDimitry Andric SmallVector<BasicBlock::iterator, 8> InsertPts;
4940b57cec5SDimitry Andric SmallVector<MemoryAccess *, 8> MSSAInsertPts;
4950b57cec5SDimitry Andric InsertPts.reserve(ExitBlocks.size());
4960b57cec5SDimitry Andric MSSAInsertPts.reserve(ExitBlocks.size());
4970b57cec5SDimitry Andric for (BasicBlock *ExitBlock : ExitBlocks) {
4985f757f3fSDimitry Andric InsertPts.push_back(ExitBlock->getFirstInsertionPt());
4990b57cec5SDimitry Andric MSSAInsertPts.push_back(nullptr);
5000b57cec5SDimitry Andric }
5010b57cec5SDimitry Andric
5020b57cec5SDimitry Andric PredIteratorCache PIC;
5030b57cec5SDimitry Andric
504fe6060f1SDimitry Andric // Promoting one set of accesses may make the pointers for another set
50581ad6265SDimitry Andric // loop invariant, so run this in a loop.
506349cc55cSDimitry Andric bool Promoted = false;
507fe6060f1SDimitry Andric bool LocalPromoted;
508fe6060f1SDimitry Andric do {
509fe6060f1SDimitry Andric LocalPromoted = false;
510bdd1243dSDimitry Andric for (auto [PointerMustAliases, HasReadsOutsideSet] :
511fe6060f1SDimitry Andric collectPromotionCandidates(MSSA, AA, L)) {
512fe6060f1SDimitry Andric LocalPromoted |= promoteLoopAccessesToScalars(
513fb03ea46SDimitry Andric PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
514bdd1243dSDimitry Andric DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
515bdd1243dSDimitry Andric LicmAllowSpeculation, HasReadsOutsideSet);
516fe6060f1SDimitry Andric }
517fe6060f1SDimitry Andric Promoted |= LocalPromoted;
518fe6060f1SDimitry Andric } while (LocalPromoted);
5190b57cec5SDimitry Andric
5200b57cec5SDimitry Andric // Once we have promoted values across the loop body we have to
5210b57cec5SDimitry Andric // recursively reform LCSSA as any nested loop may now have values defined
5220b57cec5SDimitry Andric // within the loop used in the outer loop.
5230b57cec5SDimitry Andric // FIXME: This is really heavy handed. It would be a bit better to use an
5240b57cec5SDimitry Andric // SSAUpdater strategy during promotion that was LCSSA aware and reformed
5250b57cec5SDimitry Andric // it as it went.
5260b57cec5SDimitry Andric if (Promoted)
5270b57cec5SDimitry Andric formLCSSARecursively(*L, *DT, LI, SE);
5280b57cec5SDimitry Andric
5290b57cec5SDimitry Andric Changed |= Promoted;
5300b57cec5SDimitry Andric }
5310b57cec5SDimitry Andric }
5320b57cec5SDimitry Andric
5330b57cec5SDimitry Andric // Check that neither this loop nor its parent have had LCSSA broken. LICM is
5340b57cec5SDimitry Andric // specifically moving instructions across the loop boundary and so it is
5354824e7fdSDimitry Andric // especially in need of basic functional correctness checking here.
5360b57cec5SDimitry Andric assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
537e8d8bef9SDimitry Andric assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
5380b57cec5SDimitry Andric "Parent loop not left in LCSSA form after LICM!");
5390b57cec5SDimitry Andric
540349cc55cSDimitry Andric if (VerifyMemorySSA)
541349cc55cSDimitry Andric MSSA->verifyMemorySSA();
5420b57cec5SDimitry Andric
5430b57cec5SDimitry Andric if (Changed && SE)
544bdd1243dSDimitry Andric SE->forgetLoopDispositions();
5450b57cec5SDimitry Andric return Changed;
5460b57cec5SDimitry Andric }
5470b57cec5SDimitry Andric
5480b57cec5SDimitry Andric /// Walk the specified region of the CFG (defined by all blocks dominated by
5490b57cec5SDimitry Andric /// the specified block, and that are in the current loop) in reverse depth
5500b57cec5SDimitry Andric /// first order w.r.t the DominatorTree. This allows us to visit uses before
5510b57cec5SDimitry Andric /// definitions, allowing us to sink a loop body in one pass without iteration.
5520b57cec5SDimitry Andric ///
sinkRegion(DomTreeNode * N,AAResults * AA,LoopInfo * LI,DominatorTree * DT,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ICFLoopSafetyInfo * SafetyInfo,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE,Loop * OutermostLoop)5535ffd83dbSDimitry Andric bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
554bdd1243dSDimitry Andric DominatorTree *DT, TargetLibraryInfo *TLI,
555bdd1243dSDimitry Andric TargetTransformInfo *TTI, Loop *CurLoop,
556bdd1243dSDimitry Andric MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
5570b57cec5SDimitry Andric SinkAndHoistLICMFlags &Flags,
558349cc55cSDimitry Andric OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
5590b57cec5SDimitry Andric
5600b57cec5SDimitry Andric // Verify inputs.
5610b57cec5SDimitry Andric assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
56281ad6265SDimitry Andric CurLoop != nullptr && SafetyInfo != nullptr &&
5630b57cec5SDimitry Andric "Unexpected input to sinkRegion.");
5640b57cec5SDimitry Andric
56581ad6265SDimitry Andric // We want to visit children before parents. We will enqueue all the parents
5660b57cec5SDimitry Andric // before their children in the worklist and process the worklist in reverse
5670b57cec5SDimitry Andric // order.
5680b57cec5SDimitry Andric SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
5690b57cec5SDimitry Andric
5700b57cec5SDimitry Andric bool Changed = false;
5710b57cec5SDimitry Andric for (DomTreeNode *DTN : reverse(Worklist)) {
5720b57cec5SDimitry Andric BasicBlock *BB = DTN->getBlock();
5730b57cec5SDimitry Andric // Only need to process the contents of this block if it is not part of a
5740b57cec5SDimitry Andric // subloop (which would already have been processed).
5750b57cec5SDimitry Andric if (inSubLoop(BB, CurLoop, LI))
5760b57cec5SDimitry Andric continue;
5770b57cec5SDimitry Andric
5780b57cec5SDimitry Andric for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
5790b57cec5SDimitry Andric Instruction &I = *--II;
5800b57cec5SDimitry Andric
581fe6060f1SDimitry Andric // The instruction is not used in the loop if it is dead. In this case,
582fe6060f1SDimitry Andric // we just delete it instead of sinking it.
5830b57cec5SDimitry Andric if (isInstructionTriviallyDead(&I, TLI)) {
5840b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
5855ffd83dbSDimitry Andric salvageKnowledge(&I);
5860b57cec5SDimitry Andric salvageDebugInfo(I);
5870b57cec5SDimitry Andric ++II;
588349cc55cSDimitry Andric eraseInstruction(I, *SafetyInfo, MSSAU);
5890b57cec5SDimitry Andric Changed = true;
5900b57cec5SDimitry Andric continue;
5910b57cec5SDimitry Andric }
5920b57cec5SDimitry Andric
5930b57cec5SDimitry Andric // Check to see if we can sink this instruction to the exit blocks
5940b57cec5SDimitry Andric // of the loop. We can do this if the all users of the instruction are
5950b57cec5SDimitry Andric // outside of the loop. In this case, it doesn't even matter if the
5960b57cec5SDimitry Andric // operands of the instruction are loop invariant.
5970b57cec5SDimitry Andric //
59806c3fb27SDimitry Andric bool FoldableInLoop = false;
599349cc55cSDimitry Andric bool LoopNestMode = OutermostLoop != nullptr;
6005ffd83dbSDimitry Andric if (!I.mayHaveSideEffects() &&
60106c3fb27SDimitry Andric isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
60206c3fb27SDimitry Andric SafetyInfo, TTI, FoldableInLoop,
60306c3fb27SDimitry Andric LoopNestMode) &&
60481ad6265SDimitry Andric canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
605bdd1243dSDimitry Andric if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
60606c3fb27SDimitry Andric if (!FoldableInLoop) {
6070b57cec5SDimitry Andric ++II;
6085ffd83dbSDimitry Andric salvageDebugInfo(I);
609349cc55cSDimitry Andric eraseInstruction(I, *SafetyInfo, MSSAU);
6100b57cec5SDimitry Andric }
6110b57cec5SDimitry Andric Changed = true;
6120b57cec5SDimitry Andric }
6130b57cec5SDimitry Andric }
6140b57cec5SDimitry Andric }
6150b57cec5SDimitry Andric }
616349cc55cSDimitry Andric if (VerifyMemorySSA)
61781ad6265SDimitry Andric MSSAU.getMemorySSA()->verifyMemorySSA();
6180b57cec5SDimitry Andric return Changed;
6190b57cec5SDimitry Andric }
6200b57cec5SDimitry Andric
sinkRegionForLoopNest(DomTreeNode * N,AAResults * AA,LoopInfo * LI,DominatorTree * DT,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ICFLoopSafetyInfo * SafetyInfo,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE)621bdd1243dSDimitry Andric bool llvm::sinkRegionForLoopNest(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
622bdd1243dSDimitry Andric DominatorTree *DT, TargetLibraryInfo *TLI,
623bdd1243dSDimitry Andric TargetTransformInfo *TTI, Loop *CurLoop,
624bdd1243dSDimitry Andric MemorySSAUpdater &MSSAU,
625bdd1243dSDimitry Andric ICFLoopSafetyInfo *SafetyInfo,
626bdd1243dSDimitry Andric SinkAndHoistLICMFlags &Flags,
627bdd1243dSDimitry Andric OptimizationRemarkEmitter *ORE) {
628349cc55cSDimitry Andric
629349cc55cSDimitry Andric bool Changed = false;
630349cc55cSDimitry Andric SmallPriorityWorklist<Loop *, 4> Worklist;
631349cc55cSDimitry Andric Worklist.insert(CurLoop);
632349cc55cSDimitry Andric appendLoopsToWorklist(*CurLoop, Worklist);
633349cc55cSDimitry Andric while (!Worklist.empty()) {
634349cc55cSDimitry Andric Loop *L = Worklist.pop_back_val();
635bdd1243dSDimitry Andric Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
636bdd1243dSDimitry Andric MSSAU, SafetyInfo, Flags, ORE, CurLoop);
637349cc55cSDimitry Andric }
638349cc55cSDimitry Andric return Changed;
639349cc55cSDimitry Andric }
640349cc55cSDimitry Andric
6410b57cec5SDimitry Andric namespace {
6420b57cec5SDimitry Andric // This is a helper class for hoistRegion to make it able to hoist control flow
6430b57cec5SDimitry Andric // in order to be able to hoist phis. The way this works is that we initially
6440b57cec5SDimitry Andric // start hoisting to the loop preheader, and when we see a loop invariant branch
6450b57cec5SDimitry Andric // we make note of this. When we then come to hoist an instruction that's
6460b57cec5SDimitry Andric // conditional on such a branch we duplicate the branch and the relevant control
6470b57cec5SDimitry Andric // flow, then hoist the instruction into the block corresponding to its original
6480b57cec5SDimitry Andric // block in the duplicated control flow.
6490b57cec5SDimitry Andric class ControlFlowHoister {
6500b57cec5SDimitry Andric private:
6510b57cec5SDimitry Andric // Information about the loop we are hoisting from
6520b57cec5SDimitry Andric LoopInfo *LI;
6530b57cec5SDimitry Andric DominatorTree *DT;
6540b57cec5SDimitry Andric Loop *CurLoop;
65581ad6265SDimitry Andric MemorySSAUpdater &MSSAU;
6560b57cec5SDimitry Andric
6570b57cec5SDimitry Andric // A map of blocks in the loop to the block their instructions will be hoisted
6580b57cec5SDimitry Andric // to.
6590b57cec5SDimitry Andric DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
6600b57cec5SDimitry Andric
6610b57cec5SDimitry Andric // The branches that we can hoist, mapped to the block that marks a
6620b57cec5SDimitry Andric // convergence point of their control flow.
6630b57cec5SDimitry Andric DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
6640b57cec5SDimitry Andric
6650b57cec5SDimitry Andric public:
ControlFlowHoister(LoopInfo * LI,DominatorTree * DT,Loop * CurLoop,MemorySSAUpdater & MSSAU)6660b57cec5SDimitry Andric ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
66781ad6265SDimitry Andric MemorySSAUpdater &MSSAU)
6680b57cec5SDimitry Andric : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
6690b57cec5SDimitry Andric
registerPossiblyHoistableBranch(BranchInst * BI)6700b57cec5SDimitry Andric void registerPossiblyHoistableBranch(BranchInst *BI) {
6710b57cec5SDimitry Andric // We can only hoist conditional branches with loop invariant operands.
6720b57cec5SDimitry Andric if (!ControlFlowHoisting || !BI->isConditional() ||
6730b57cec5SDimitry Andric !CurLoop->hasLoopInvariantOperands(BI))
6740b57cec5SDimitry Andric return;
6750b57cec5SDimitry Andric
6760b57cec5SDimitry Andric // The branch destinations need to be in the loop, and we don't gain
6770b57cec5SDimitry Andric // anything by duplicating conditional branches with duplicate successors,
6780b57cec5SDimitry Andric // as it's essentially the same as an unconditional branch.
6790b57cec5SDimitry Andric BasicBlock *TrueDest = BI->getSuccessor(0);
6800b57cec5SDimitry Andric BasicBlock *FalseDest = BI->getSuccessor(1);
6810b57cec5SDimitry Andric if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
6820b57cec5SDimitry Andric TrueDest == FalseDest)
6830b57cec5SDimitry Andric return;
6840b57cec5SDimitry Andric
6850b57cec5SDimitry Andric // We can hoist BI if one branch destination is the successor of the other,
6860b57cec5SDimitry Andric // or both have common successor which we check by seeing if the
6870b57cec5SDimitry Andric // intersection of their successors is non-empty.
6880b57cec5SDimitry Andric // TODO: This could be expanded to allowing branches where both ends
6890b57cec5SDimitry Andric // eventually converge to a single block.
6900b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
6910b57cec5SDimitry Andric TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
6920b57cec5SDimitry Andric FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
6930b57cec5SDimitry Andric BasicBlock *CommonSucc = nullptr;
6940b57cec5SDimitry Andric if (TrueDestSucc.count(FalseDest)) {
6950b57cec5SDimitry Andric CommonSucc = FalseDest;
6960b57cec5SDimitry Andric } else if (FalseDestSucc.count(TrueDest)) {
6970b57cec5SDimitry Andric CommonSucc = TrueDest;
6980b57cec5SDimitry Andric } else {
6990b57cec5SDimitry Andric set_intersect(TrueDestSucc, FalseDestSucc);
7000b57cec5SDimitry Andric // If there's one common successor use that.
7010b57cec5SDimitry Andric if (TrueDestSucc.size() == 1)
7020b57cec5SDimitry Andric CommonSucc = *TrueDestSucc.begin();
7030b57cec5SDimitry Andric // If there's more than one pick whichever appears first in the block list
7040b57cec5SDimitry Andric // (we can't use the value returned by TrueDestSucc.begin() as it's
7050b57cec5SDimitry Andric // unpredicatable which element gets returned).
7060b57cec5SDimitry Andric else if (!TrueDestSucc.empty()) {
7070b57cec5SDimitry Andric Function *F = TrueDest->getParent();
7080b57cec5SDimitry Andric auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
709e8d8bef9SDimitry Andric auto It = llvm::find_if(*F, IsSucc);
7100b57cec5SDimitry Andric assert(It != F->end() && "Could not find successor in function");
7110b57cec5SDimitry Andric CommonSucc = &*It;
7120b57cec5SDimitry Andric }
7130b57cec5SDimitry Andric }
7140b57cec5SDimitry Andric // The common successor has to be dominated by the branch, as otherwise
7150b57cec5SDimitry Andric // there will be some other path to the successor that will not be
7160b57cec5SDimitry Andric // controlled by this branch so any phi we hoist would be controlled by the
7170b57cec5SDimitry Andric // wrong condition. This also takes care of avoiding hoisting of loop back
7180b57cec5SDimitry Andric // edges.
7190b57cec5SDimitry Andric // TODO: In some cases this could be relaxed if the successor is dominated
7200b57cec5SDimitry Andric // by another block that's been hoisted and we can guarantee that the
7210b57cec5SDimitry Andric // control flow has been replicated exactly.
7220b57cec5SDimitry Andric if (CommonSucc && DT->dominates(BI, CommonSucc))
7230b57cec5SDimitry Andric HoistableBranches[BI] = CommonSucc;
7240b57cec5SDimitry Andric }
7250b57cec5SDimitry Andric
canHoistPHI(PHINode * PN)7260b57cec5SDimitry Andric bool canHoistPHI(PHINode *PN) {
7270b57cec5SDimitry Andric // The phi must have loop invariant operands.
7280b57cec5SDimitry Andric if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
7290b57cec5SDimitry Andric return false;
7300b57cec5SDimitry Andric // We can hoist phis if the block they are in is the target of hoistable
7310b57cec5SDimitry Andric // branches which cover all of the predecessors of the block.
7320b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
7330b57cec5SDimitry Andric BasicBlock *BB = PN->getParent();
7340b57cec5SDimitry Andric for (BasicBlock *PredBB : predecessors(BB))
7350b57cec5SDimitry Andric PredecessorBlocks.insert(PredBB);
7360b57cec5SDimitry Andric // If we have less predecessor blocks than predecessors then the phi will
7370b57cec5SDimitry Andric // have more than one incoming value for the same block which we can't
7380b57cec5SDimitry Andric // handle.
7390b57cec5SDimitry Andric // TODO: This could be handled be erasing some of the duplicate incoming
7400b57cec5SDimitry Andric // values.
7410b57cec5SDimitry Andric if (PredecessorBlocks.size() != pred_size(BB))
7420b57cec5SDimitry Andric return false;
7430b57cec5SDimitry Andric for (auto &Pair : HoistableBranches) {
7440b57cec5SDimitry Andric if (Pair.second == BB) {
7450b57cec5SDimitry Andric // Which blocks are predecessors via this branch depends on if the
7460b57cec5SDimitry Andric // branch is triangle-like or diamond-like.
7470b57cec5SDimitry Andric if (Pair.first->getSuccessor(0) == BB) {
7480b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getParent());
7490b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getSuccessor(1));
7500b57cec5SDimitry Andric } else if (Pair.first->getSuccessor(1) == BB) {
7510b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getParent());
7520b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getSuccessor(0));
7530b57cec5SDimitry Andric } else {
7540b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getSuccessor(0));
7550b57cec5SDimitry Andric PredecessorBlocks.erase(Pair.first->getSuccessor(1));
7560b57cec5SDimitry Andric }
7570b57cec5SDimitry Andric }
7580b57cec5SDimitry Andric }
7590b57cec5SDimitry Andric // PredecessorBlocks will now be empty if for every predecessor of BB we
7600b57cec5SDimitry Andric // found a hoistable branch source.
7610b57cec5SDimitry Andric return PredecessorBlocks.empty();
7620b57cec5SDimitry Andric }
7630b57cec5SDimitry Andric
getOrCreateHoistedBlock(BasicBlock * BB)7640b57cec5SDimitry Andric BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
7650b57cec5SDimitry Andric if (!ControlFlowHoisting)
7660b57cec5SDimitry Andric return CurLoop->getLoopPreheader();
7670b57cec5SDimitry Andric // If BB has already been hoisted, return that
7680b57cec5SDimitry Andric if (HoistDestinationMap.count(BB))
7690b57cec5SDimitry Andric return HoistDestinationMap[BB];
7700b57cec5SDimitry Andric
7710b57cec5SDimitry Andric // Check if this block is conditional based on a pending branch
7720b57cec5SDimitry Andric auto HasBBAsSuccessor =
7730b57cec5SDimitry Andric [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
7740b57cec5SDimitry Andric return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
7750b57cec5SDimitry Andric Pair.first->getSuccessor(1) == BB);
7760b57cec5SDimitry Andric };
777e8d8bef9SDimitry Andric auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
7780b57cec5SDimitry Andric
7790b57cec5SDimitry Andric // If not involved in a pending branch, hoist to preheader
7800b57cec5SDimitry Andric BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
7810b57cec5SDimitry Andric if (It == HoistableBranches.end()) {
782e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "LICM using "
783e8d8bef9SDimitry Andric << InitialPreheader->getNameOrAsOperand()
784e8d8bef9SDimitry Andric << " as hoist destination for "
785e8d8bef9SDimitry Andric << BB->getNameOrAsOperand() << "\n");
7860b57cec5SDimitry Andric HoistDestinationMap[BB] = InitialPreheader;
7870b57cec5SDimitry Andric return InitialPreheader;
7880b57cec5SDimitry Andric }
7890b57cec5SDimitry Andric BranchInst *BI = It->first;
7900b57cec5SDimitry Andric assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
7910b57cec5SDimitry Andric HoistableBranches.end() &&
7920b57cec5SDimitry Andric "BB is expected to be the target of at most one branch");
7930b57cec5SDimitry Andric
7940b57cec5SDimitry Andric LLVMContext &C = BB->getContext();
7950b57cec5SDimitry Andric BasicBlock *TrueDest = BI->getSuccessor(0);
7960b57cec5SDimitry Andric BasicBlock *FalseDest = BI->getSuccessor(1);
7970b57cec5SDimitry Andric BasicBlock *CommonSucc = HoistableBranches[BI];
7980b57cec5SDimitry Andric BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
7990b57cec5SDimitry Andric
8000b57cec5SDimitry Andric // Create hoisted versions of blocks that currently don't have them
8010b57cec5SDimitry Andric auto CreateHoistedBlock = [&](BasicBlock *Orig) {
8020b57cec5SDimitry Andric if (HoistDestinationMap.count(Orig))
8030b57cec5SDimitry Andric return HoistDestinationMap[Orig];
8040b57cec5SDimitry Andric BasicBlock *New =
8050b57cec5SDimitry Andric BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
8060b57cec5SDimitry Andric HoistDestinationMap[Orig] = New;
8070b57cec5SDimitry Andric DT->addNewBlock(New, HoistTarget);
8080b57cec5SDimitry Andric if (CurLoop->getParentLoop())
8090b57cec5SDimitry Andric CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
8100b57cec5SDimitry Andric ++NumCreatedBlocks;
8110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
8120b57cec5SDimitry Andric << " as hoist destination for " << Orig->getName()
8130b57cec5SDimitry Andric << "\n");
8140b57cec5SDimitry Andric return New;
8150b57cec5SDimitry Andric };
8160b57cec5SDimitry Andric BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
8170b57cec5SDimitry Andric BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
8180b57cec5SDimitry Andric BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
8190b57cec5SDimitry Andric
8200b57cec5SDimitry Andric // Link up these blocks with branches.
8210b57cec5SDimitry Andric if (!HoistCommonSucc->getTerminator()) {
8220b57cec5SDimitry Andric // The new common successor we've generated will branch to whatever that
8230b57cec5SDimitry Andric // hoist target branched to.
8240b57cec5SDimitry Andric BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
8250b57cec5SDimitry Andric assert(TargetSucc && "Expected hoist target to have a single successor");
8260b57cec5SDimitry Andric HoistCommonSucc->moveBefore(TargetSucc);
8270b57cec5SDimitry Andric BranchInst::Create(TargetSucc, HoistCommonSucc);
8280b57cec5SDimitry Andric }
8290b57cec5SDimitry Andric if (!HoistTrueDest->getTerminator()) {
8300b57cec5SDimitry Andric HoistTrueDest->moveBefore(HoistCommonSucc);
8310b57cec5SDimitry Andric BranchInst::Create(HoistCommonSucc, HoistTrueDest);
8320b57cec5SDimitry Andric }
8330b57cec5SDimitry Andric if (!HoistFalseDest->getTerminator()) {
8340b57cec5SDimitry Andric HoistFalseDest->moveBefore(HoistCommonSucc);
8350b57cec5SDimitry Andric BranchInst::Create(HoistCommonSucc, HoistFalseDest);
8360b57cec5SDimitry Andric }
8370b57cec5SDimitry Andric
8380b57cec5SDimitry Andric // If BI is being cloned to what was originally the preheader then
8390b57cec5SDimitry Andric // HoistCommonSucc will now be the new preheader.
8400b57cec5SDimitry Andric if (HoistTarget == InitialPreheader) {
8410b57cec5SDimitry Andric // Phis in the loop header now need to use the new preheader.
8420b57cec5SDimitry Andric InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
84381ad6265SDimitry Andric MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
8440b57cec5SDimitry Andric HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
8450b57cec5SDimitry Andric // The new preheader dominates the loop header.
8460b57cec5SDimitry Andric DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
8470b57cec5SDimitry Andric DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
8480b57cec5SDimitry Andric DT->changeImmediateDominator(HeaderNode, PreheaderNode);
8490b57cec5SDimitry Andric // The preheader hoist destination is now the new preheader, with the
8500b57cec5SDimitry Andric // exception of the hoist destination of this branch.
8510b57cec5SDimitry Andric for (auto &Pair : HoistDestinationMap)
8520b57cec5SDimitry Andric if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
8530b57cec5SDimitry Andric Pair.second = HoistCommonSucc;
8540b57cec5SDimitry Andric }
8550b57cec5SDimitry Andric
8560b57cec5SDimitry Andric // Now finally clone BI.
8570b57cec5SDimitry Andric ReplaceInstWithInst(
8580b57cec5SDimitry Andric HoistTarget->getTerminator(),
8590b57cec5SDimitry Andric BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
8600b57cec5SDimitry Andric ++NumClonedBranches;
8610b57cec5SDimitry Andric
8620b57cec5SDimitry Andric assert(CurLoop->getLoopPreheader() &&
8630b57cec5SDimitry Andric "Hoisting blocks should not have destroyed preheader");
8640b57cec5SDimitry Andric return HoistDestinationMap[BB];
8650b57cec5SDimitry Andric }
8660b57cec5SDimitry Andric };
8670b57cec5SDimitry Andric } // namespace
8680b57cec5SDimitry Andric
8690b57cec5SDimitry Andric /// Walk the specified region of the CFG (defined by all blocks dominated by
8700b57cec5SDimitry Andric /// the specified block, and that are in the current loop) in depth first
8710b57cec5SDimitry Andric /// order w.r.t the DominatorTree. This allows us to visit definitions before
8720b57cec5SDimitry Andric /// uses, allowing us to hoist a loop body in one pass without iteration.
8730b57cec5SDimitry Andric ///
hoistRegion(DomTreeNode * N,AAResults * AA,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * TLI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ScalarEvolution * SE,ICFLoopSafetyInfo * SafetyInfo,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE,bool LoopNestMode,bool AllowSpeculation)8745ffd83dbSDimitry Andric bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
875bdd1243dSDimitry Andric DominatorTree *DT, AssumptionCache *AC,
876e8d8bef9SDimitry Andric TargetLibraryInfo *TLI, Loop *CurLoop,
87781ad6265SDimitry Andric MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
878349cc55cSDimitry Andric ICFLoopSafetyInfo *SafetyInfo,
8790b57cec5SDimitry Andric SinkAndHoistLICMFlags &Flags,
880fb03ea46SDimitry Andric OptimizationRemarkEmitter *ORE, bool LoopNestMode,
881fb03ea46SDimitry Andric bool AllowSpeculation) {
8820b57cec5SDimitry Andric // Verify inputs.
8830b57cec5SDimitry Andric assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
88481ad6265SDimitry Andric CurLoop != nullptr && SafetyInfo != nullptr &&
8850b57cec5SDimitry Andric "Unexpected input to hoistRegion.");
8860b57cec5SDimitry Andric
8870b57cec5SDimitry Andric ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
8880b57cec5SDimitry Andric
8890b57cec5SDimitry Andric // Keep track of instructions that have been hoisted, as they may need to be
8900b57cec5SDimitry Andric // re-hoisted if they end up not dominating all of their uses.
8910b57cec5SDimitry Andric SmallVector<Instruction *, 16> HoistedInstructions;
8920b57cec5SDimitry Andric
8930b57cec5SDimitry Andric // For PHI hoisting to work we need to hoist blocks before their successors.
8940b57cec5SDimitry Andric // We can do this by iterating through the blocks in the loop in reverse
8950b57cec5SDimitry Andric // post-order.
8960b57cec5SDimitry Andric LoopBlocksRPO Worklist(CurLoop);
8970b57cec5SDimitry Andric Worklist.perform(LI);
8980b57cec5SDimitry Andric bool Changed = false;
89906c3fb27SDimitry Andric BasicBlock *Preheader = CurLoop->getLoopPreheader();
9000b57cec5SDimitry Andric for (BasicBlock *BB : Worklist) {
9010b57cec5SDimitry Andric // Only need to process the contents of this block if it is not part of a
9020b57cec5SDimitry Andric // subloop (which would already have been processed).
903fe6060f1SDimitry Andric if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
9040b57cec5SDimitry Andric continue;
9050b57cec5SDimitry Andric
906349cc55cSDimitry Andric for (Instruction &I : llvm::make_early_inc_range(*BB)) {
9070b57cec5SDimitry Andric // Try hoisting the instruction out to the preheader. We can only do
9080b57cec5SDimitry Andric // this if all of the operands of the instruction are loop invariant and
909e8d8bef9SDimitry Andric // if it is safe to hoist the instruction. We also check block frequency
910e8d8bef9SDimitry Andric // to make sure instruction only gets hoisted into colder blocks.
9110b57cec5SDimitry Andric // TODO: It may be safe to hoist if we are hoisting to a conditional block
9120b57cec5SDimitry Andric // and we have accurately duplicated the control flow from the loop header
9130b57cec5SDimitry Andric // to that block.
9140b57cec5SDimitry Andric if (CurLoop->hasLoopInvariantOperands(&I) &&
91581ad6265SDimitry Andric canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
9160b57cec5SDimitry Andric isSafeToExecuteUnconditionally(
917fe6060f1SDimitry Andric I, DT, TLI, CurLoop, SafetyInfo, ORE,
91806c3fb27SDimitry Andric Preheader->getTerminator(), AC, AllowSpeculation)) {
9190b57cec5SDimitry Andric hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
920480093f4SDimitry Andric MSSAU, SE, ORE);
9210b57cec5SDimitry Andric HoistedInstructions.push_back(&I);
9220b57cec5SDimitry Andric Changed = true;
9230b57cec5SDimitry Andric continue;
9240b57cec5SDimitry Andric }
9250b57cec5SDimitry Andric
9260b57cec5SDimitry Andric // Attempt to remove floating point division out of the loop by
9270b57cec5SDimitry Andric // converting it to a reciprocal multiplication.
9285ffd83dbSDimitry Andric if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
9295ffd83dbSDimitry Andric CurLoop->isLoopInvariant(I.getOperand(1))) {
9300b57cec5SDimitry Andric auto Divisor = I.getOperand(1);
9310b57cec5SDimitry Andric auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
9320b57cec5SDimitry Andric auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
9330b57cec5SDimitry Andric ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
9340b57cec5SDimitry Andric SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
9350b57cec5SDimitry Andric ReciprocalDivisor->insertBefore(&I);
9360fca6ea1SDimitry Andric ReciprocalDivisor->setDebugLoc(I.getDebugLoc());
9370b57cec5SDimitry Andric
9380b57cec5SDimitry Andric auto Product =
9390b57cec5SDimitry Andric BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
9400b57cec5SDimitry Andric Product->setFastMathFlags(I.getFastMathFlags());
9410b57cec5SDimitry Andric SafetyInfo->insertInstructionTo(Product, I.getParent());
9420b57cec5SDimitry Andric Product->insertAfter(&I);
9430fca6ea1SDimitry Andric Product->setDebugLoc(I.getDebugLoc());
9440b57cec5SDimitry Andric I.replaceAllUsesWith(Product);
945349cc55cSDimitry Andric eraseInstruction(I, *SafetyInfo, MSSAU);
9460b57cec5SDimitry Andric
9470b57cec5SDimitry Andric hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
948480093f4SDimitry Andric SafetyInfo, MSSAU, SE, ORE);
9490b57cec5SDimitry Andric HoistedInstructions.push_back(ReciprocalDivisor);
9500b57cec5SDimitry Andric Changed = true;
9510b57cec5SDimitry Andric continue;
9520b57cec5SDimitry Andric }
9530b57cec5SDimitry Andric
9540b57cec5SDimitry Andric auto IsInvariantStart = [&](Instruction &I) {
9550b57cec5SDimitry Andric using namespace PatternMatch;
9560b57cec5SDimitry Andric return I.use_empty() &&
9570b57cec5SDimitry Andric match(&I, m_Intrinsic<Intrinsic::invariant_start>());
9580b57cec5SDimitry Andric };
9590b57cec5SDimitry Andric auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
9600b57cec5SDimitry Andric return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
9610b57cec5SDimitry Andric SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
9620b57cec5SDimitry Andric };
9630b57cec5SDimitry Andric if ((IsInvariantStart(I) || isGuard(&I)) &&
9640b57cec5SDimitry Andric CurLoop->hasLoopInvariantOperands(&I) &&
9650b57cec5SDimitry Andric MustExecuteWithoutWritesBefore(I)) {
9660b57cec5SDimitry Andric hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
967480093f4SDimitry Andric MSSAU, SE, ORE);
968480093f4SDimitry Andric HoistedInstructions.push_back(&I);
969480093f4SDimitry Andric Changed = true;
970480093f4SDimitry Andric continue;
971480093f4SDimitry Andric }
972480093f4SDimitry Andric
9730b57cec5SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(&I)) {
9740b57cec5SDimitry Andric if (CFH.canHoistPHI(PN)) {
9750b57cec5SDimitry Andric // Redirect incoming blocks first to ensure that we create hoisted
9760b57cec5SDimitry Andric // versions of those blocks before we hoist the phi.
9770b57cec5SDimitry Andric for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
9780b57cec5SDimitry Andric PN->setIncomingBlock(
9790b57cec5SDimitry Andric i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
9800b57cec5SDimitry Andric hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
981480093f4SDimitry Andric MSSAU, SE, ORE);
9820b57cec5SDimitry Andric assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
9830b57cec5SDimitry Andric Changed = true;
9840b57cec5SDimitry Andric continue;
9850b57cec5SDimitry Andric }
9860b57cec5SDimitry Andric }
9870b57cec5SDimitry Andric
98806c3fb27SDimitry Andric // Try to reassociate instructions so that part of computations can be
98906c3fb27SDimitry Andric // done out of loop.
99006c3fb27SDimitry Andric if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) {
99106c3fb27SDimitry Andric Changed = true;
99206c3fb27SDimitry Andric continue;
99306c3fb27SDimitry Andric }
99406c3fb27SDimitry Andric
9950b57cec5SDimitry Andric // Remember possibly hoistable branches so we can actually hoist them
9960b57cec5SDimitry Andric // later if needed.
9970b57cec5SDimitry Andric if (BranchInst *BI = dyn_cast<BranchInst>(&I))
9980b57cec5SDimitry Andric CFH.registerPossiblyHoistableBranch(BI);
9990b57cec5SDimitry Andric }
10000b57cec5SDimitry Andric }
10010b57cec5SDimitry Andric
10020b57cec5SDimitry Andric // If we hoisted instructions to a conditional block they may not dominate
10030b57cec5SDimitry Andric // their uses that weren't hoisted (such as phis where some operands are not
10040b57cec5SDimitry Andric // loop invariant). If so make them unconditional by moving them to their
10050b57cec5SDimitry Andric // immediate dominator. We iterate through the instructions in reverse order
10060b57cec5SDimitry Andric // which ensures that when we rehoist an instruction we rehoist its operands,
10075f757f3fSDimitry Andric // and also keep track of where in the block we are rehoisting to make sure
10080b57cec5SDimitry Andric // that we rehoist instructions before the instructions that use them.
10090b57cec5SDimitry Andric Instruction *HoistPoint = nullptr;
10100b57cec5SDimitry Andric if (ControlFlowHoisting) {
10110b57cec5SDimitry Andric for (Instruction *I : reverse(HoistedInstructions)) {
10120b57cec5SDimitry Andric if (!llvm::all_of(I->uses(),
10130b57cec5SDimitry Andric [&](Use &U) { return DT->dominates(I, U); })) {
10140b57cec5SDimitry Andric BasicBlock *Dominator =
10150b57cec5SDimitry Andric DT->getNode(I->getParent())->getIDom()->getBlock();
10160b57cec5SDimitry Andric if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
10170b57cec5SDimitry Andric if (HoistPoint)
10180b57cec5SDimitry Andric assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
10190b57cec5SDimitry Andric "New hoist point expected to dominate old hoist point");
10200b57cec5SDimitry Andric HoistPoint = Dominator->getTerminator();
10210b57cec5SDimitry Andric }
10220b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1023e8d8bef9SDimitry Andric << HoistPoint->getParent()->getNameOrAsOperand()
10240b57cec5SDimitry Andric << ": " << *I << "\n");
10255f757f3fSDimitry Andric moveInstructionBefore(*I, HoistPoint->getIterator(), *SafetyInfo, MSSAU,
10265f757f3fSDimitry Andric SE);
10270b57cec5SDimitry Andric HoistPoint = I;
10280b57cec5SDimitry Andric Changed = true;
10290b57cec5SDimitry Andric }
10300b57cec5SDimitry Andric }
10310b57cec5SDimitry Andric }
1032349cc55cSDimitry Andric if (VerifyMemorySSA)
103381ad6265SDimitry Andric MSSAU.getMemorySSA()->verifyMemorySSA();
10340b57cec5SDimitry Andric
10350b57cec5SDimitry Andric // Now that we've finished hoisting make sure that LI and DT are still
10360b57cec5SDimitry Andric // valid.
10378bcb0991SDimitry Andric #ifdef EXPENSIVE_CHECKS
10380b57cec5SDimitry Andric if (Changed) {
10390b57cec5SDimitry Andric assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
10400b57cec5SDimitry Andric "Dominator tree verification failed");
10410b57cec5SDimitry Andric LI->verify(*DT);
10420b57cec5SDimitry Andric }
10430b57cec5SDimitry Andric #endif
10440b57cec5SDimitry Andric
10450b57cec5SDimitry Andric return Changed;
10460b57cec5SDimitry Andric }
10470b57cec5SDimitry Andric
10480b57cec5SDimitry Andric // Return true if LI is invariant within scope of the loop. LI is invariant if
10490b57cec5SDimitry Andric // CurLoop is dominated by an invariant.start representing the same memory
10500b57cec5SDimitry Andric // location and size as the memory location LI loads from, and also the
10510b57cec5SDimitry Andric // invariant.start has no uses.
isLoadInvariantInLoop(LoadInst * LI,DominatorTree * DT,Loop * CurLoop)10520b57cec5SDimitry Andric static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
10530b57cec5SDimitry Andric Loop *CurLoop) {
10545f757f3fSDimitry Andric Value *Addr = LI->getPointerOperand();
10550fca6ea1SDimitry Andric const DataLayout &DL = LI->getDataLayout();
1056e8d8bef9SDimitry Andric const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
1057e8d8bef9SDimitry Andric
1058e8d8bef9SDimitry Andric // It is not currently possible for clang to generate an invariant.start
1059e8d8bef9SDimitry Andric // intrinsic with scalable vector types because we don't support thread local
1060e8d8bef9SDimitry Andric // sizeless types and we don't permit sizeless types in structs or classes.
1061e8d8bef9SDimitry Andric // Furthermore, even if support is added for this in future the intrinsic
1062e8d8bef9SDimitry Andric // itself is defined to have a size of -1 for variable sized objects. This
1063e8d8bef9SDimitry Andric // makes it impossible to verify if the intrinsic envelops our region of
1064e8d8bef9SDimitry Andric // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1065e8d8bef9SDimitry Andric // types would have a -1 parameter, but the former is clearly double the size
1066e8d8bef9SDimitry Andric // of the latter.
1067e8d8bef9SDimitry Andric if (LocSizeInBits.isScalable())
1068e8d8bef9SDimitry Andric return false;
10690b57cec5SDimitry Andric
1070349cc55cSDimitry Andric // If we've ended up at a global/constant, bail. We shouldn't be looking at
1071349cc55cSDimitry Andric // uselists for non-local Values in a loop pass.
1072349cc55cSDimitry Andric if (isa<Constant>(Addr))
1073349cc55cSDimitry Andric return false;
10740b57cec5SDimitry Andric
10750b57cec5SDimitry Andric unsigned UsesVisited = 0;
10760b57cec5SDimitry Andric // Traverse all uses of the load operand value, to see if invariant.start is
10770b57cec5SDimitry Andric // one of the uses, and whether it dominates the load instruction.
10780b57cec5SDimitry Andric for (auto *U : Addr->users()) {
10790b57cec5SDimitry Andric // Avoid traversing for Load operand with high number of users.
10800b57cec5SDimitry Andric if (++UsesVisited > MaxNumUsesTraversed)
10810b57cec5SDimitry Andric return false;
10820b57cec5SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
10830b57cec5SDimitry Andric // If there are escaping uses of invariant.start instruction, the load maybe
10840b57cec5SDimitry Andric // non-invariant.
10850b57cec5SDimitry Andric if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
10860b57cec5SDimitry Andric !II->use_empty())
10870b57cec5SDimitry Andric continue;
1088e8d8bef9SDimitry Andric ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
1089e8d8bef9SDimitry Andric // The intrinsic supports having a -1 argument for variable sized objects
1090e8d8bef9SDimitry Andric // so we should check for that here.
1091e8d8bef9SDimitry Andric if (InvariantSize->isNegative())
1092e8d8bef9SDimitry Andric continue;
1093e8d8bef9SDimitry Andric uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
10940b57cec5SDimitry Andric // Confirm the invariant.start location size contains the load operand size
10950b57cec5SDimitry Andric // in bits. Also, the invariant.start should dominate the load, and we
10960b57cec5SDimitry Andric // should not hoist the load out of a loop that contains this dominating
10970b57cec5SDimitry Andric // invariant.start.
1098bdd1243dSDimitry Andric if (LocSizeInBits.getFixedValue() <= InvariantSizeInBits &&
10990b57cec5SDimitry Andric DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
11000b57cec5SDimitry Andric return true;
11010b57cec5SDimitry Andric }
11020b57cec5SDimitry Andric
11030b57cec5SDimitry Andric return false;
11040b57cec5SDimitry Andric }
11050b57cec5SDimitry Andric
11060b57cec5SDimitry Andric namespace {
11070b57cec5SDimitry Andric /// Return true if-and-only-if we know how to (mechanically) both hoist and
11080b57cec5SDimitry Andric /// sink a given instruction out of a loop. Does not address legality
11090b57cec5SDimitry Andric /// concerns such as aliasing or speculation safety.
isHoistableAndSinkableInst(Instruction & I)11100b57cec5SDimitry Andric bool isHoistableAndSinkableInst(Instruction &I) {
11110b57cec5SDimitry Andric // Only these instructions are hoistable/sinkable.
11120b57cec5SDimitry Andric return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
11135ffd83dbSDimitry Andric isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
11145ffd83dbSDimitry Andric isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
11155ffd83dbSDimitry Andric isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
11160b57cec5SDimitry Andric isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
11170b57cec5SDimitry Andric isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
11185ffd83dbSDimitry Andric isa<InsertValueInst>(I) || isa<FreezeInst>(I));
11190b57cec5SDimitry Andric }
112081ad6265SDimitry Andric /// Return true if MSSA knows there are no MemoryDefs in the loop.
isReadOnly(const MemorySSAUpdater & MSSAU,const Loop * L)112181ad6265SDimitry Andric bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) {
11220b57cec5SDimitry Andric for (auto *BB : L->getBlocks())
112381ad6265SDimitry Andric if (MSSAU.getMemorySSA()->getBlockDefs(BB))
11240b57cec5SDimitry Andric return false;
11250b57cec5SDimitry Andric return true;
11260b57cec5SDimitry Andric }
11270b57cec5SDimitry Andric
11280b57cec5SDimitry Andric /// Return true if I is the only Instruction with a MemoryAccess in L.
isOnlyMemoryAccess(const Instruction * I,const Loop * L,const MemorySSAUpdater & MSSAU)11290b57cec5SDimitry Andric bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
113081ad6265SDimitry Andric const MemorySSAUpdater &MSSAU) {
11310b57cec5SDimitry Andric for (auto *BB : L->getBlocks())
113281ad6265SDimitry Andric if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
11330b57cec5SDimitry Andric int NotAPhi = 0;
11340b57cec5SDimitry Andric for (const auto &Acc : *Accs) {
11350b57cec5SDimitry Andric if (isa<MemoryPhi>(&Acc))
11360b57cec5SDimitry Andric continue;
11370b57cec5SDimitry Andric const auto *MUD = cast<MemoryUseOrDef>(&Acc);
11380b57cec5SDimitry Andric if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
11390b57cec5SDimitry Andric return false;
11400b57cec5SDimitry Andric }
11410b57cec5SDimitry Andric }
11420b57cec5SDimitry Andric return true;
11430b57cec5SDimitry Andric }
11440b57cec5SDimitry Andric }
11450b57cec5SDimitry Andric
getClobberingMemoryAccess(MemorySSA & MSSA,BatchAAResults & BAA,SinkAndHoistLICMFlags & Flags,MemoryUseOrDef * MA)114606c3fb27SDimitry Andric static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
114706c3fb27SDimitry Andric BatchAAResults &BAA,
114806c3fb27SDimitry Andric SinkAndHoistLICMFlags &Flags,
114906c3fb27SDimitry Andric MemoryUseOrDef *MA) {
115006c3fb27SDimitry Andric // See declaration of SetLicmMssaOptCap for usage details.
115106c3fb27SDimitry Andric if (Flags.tooManyClobberingCalls())
115206c3fb27SDimitry Andric return MA->getDefiningAccess();
115306c3fb27SDimitry Andric
115406c3fb27SDimitry Andric MemoryAccess *Source =
115506c3fb27SDimitry Andric MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(MA, BAA);
115606c3fb27SDimitry Andric Flags.incrementClobberingCalls();
115706c3fb27SDimitry Andric return Source;
115806c3fb27SDimitry Andric }
115906c3fb27SDimitry Andric
canSinkOrHoistInst(Instruction & I,AAResults * AA,DominatorTree * DT,Loop * CurLoop,MemorySSAUpdater & MSSAU,bool TargetExecutesOncePerLoop,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE)11600b57cec5SDimitry Andric bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
116181ad6265SDimitry Andric Loop *CurLoop, MemorySSAUpdater &MSSAU,
11620b57cec5SDimitry Andric bool TargetExecutesOncePerLoop,
116381ad6265SDimitry Andric SinkAndHoistLICMFlags &Flags,
11640b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE) {
11650b57cec5SDimitry Andric // If we don't understand the instruction, bail early.
11660b57cec5SDimitry Andric if (!isHoistableAndSinkableInst(I))
11670b57cec5SDimitry Andric return false;
11680b57cec5SDimitry Andric
116981ad6265SDimitry Andric MemorySSA *MSSA = MSSAU.getMemorySSA();
11700b57cec5SDimitry Andric // Loads have extra constraints we have to verify before we can hoist them.
11710b57cec5SDimitry Andric if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
11720b57cec5SDimitry Andric if (!LI->isUnordered())
11730b57cec5SDimitry Andric return false; // Don't sink/hoist volatile or ordered atomic loads!
11740b57cec5SDimitry Andric
11750b57cec5SDimitry Andric // Loads from constant memory are always safe to move, even if they end up
11760b57cec5SDimitry Andric // in the same alias set as something that ends up being modified.
1177bdd1243dSDimitry Andric if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0))))
11780b57cec5SDimitry Andric return true;
11798bcb0991SDimitry Andric if (LI->hasMetadata(LLVMContext::MD_invariant_load))
11800b57cec5SDimitry Andric return true;
11810b57cec5SDimitry Andric
11820b57cec5SDimitry Andric if (LI->isAtomic() && !TargetExecutesOncePerLoop)
11830b57cec5SDimitry Andric return false; // Don't risk duplicating unordered loads
11840b57cec5SDimitry Andric
11850b57cec5SDimitry Andric // This checks for an invariant.start dominating the load.
11860b57cec5SDimitry Andric if (isLoadInvariantInLoop(LI, DT, CurLoop))
11870b57cec5SDimitry Andric return true;
11880b57cec5SDimitry Andric
118906c3fb27SDimitry Andric auto MU = cast<MemoryUse>(MSSA->getMemoryAccess(LI));
119006c3fb27SDimitry Andric
119106c3fb27SDimitry Andric bool InvariantGroup = LI->hasMetadata(LLVMContext::MD_invariant_group);
119206c3fb27SDimitry Andric
119381ad6265SDimitry Andric bool Invalidated = pointerInvalidatedByLoop(
119406c3fb27SDimitry Andric MSSA, MU, CurLoop, I, Flags, InvariantGroup);
11950b57cec5SDimitry Andric // Check loop-invariant address because this may also be a sinkable load
11960b57cec5SDimitry Andric // whose address is not necessarily loop-invariant.
11970b57cec5SDimitry Andric if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
11980b57cec5SDimitry Andric ORE->emit([&]() {
11990b57cec5SDimitry Andric return OptimizationRemarkMissed(
12000b57cec5SDimitry Andric DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
12010b57cec5SDimitry Andric << "failed to move load with loop-invariant address "
12020b57cec5SDimitry Andric "because the loop may invalidate its value";
12030b57cec5SDimitry Andric });
12040b57cec5SDimitry Andric
12050b57cec5SDimitry Andric return !Invalidated;
12060b57cec5SDimitry Andric } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
12070b57cec5SDimitry Andric // Don't sink or hoist dbg info; it's legal, but not useful.
12080b57cec5SDimitry Andric if (isa<DbgInfoIntrinsic>(I))
12090b57cec5SDimitry Andric return false;
12100b57cec5SDimitry Andric
12110b57cec5SDimitry Andric // Don't sink calls which can throw.
12120b57cec5SDimitry Andric if (CI->mayThrow())
12130b57cec5SDimitry Andric return false;
12140b57cec5SDimitry Andric
1215e8d8bef9SDimitry Andric // Convergent attribute has been used on operations that involve
1216e8d8bef9SDimitry Andric // inter-thread communication which results are implicitly affected by the
1217e8d8bef9SDimitry Andric // enclosing control flows. It is not safe to hoist or sink such operations
1218e8d8bef9SDimitry Andric // across control flow.
1219e8d8bef9SDimitry Andric if (CI->isConvergent())
1220e8d8bef9SDimitry Andric return false;
1221e8d8bef9SDimitry Andric
12220fca6ea1SDimitry Andric // FIXME: Current LLVM IR semantics don't work well with coroutines and
12230fca6ea1SDimitry Andric // thread local globals. We currently treat getting the address of a thread
12240fca6ea1SDimitry Andric // local global as not accessing memory, even though it may not be a
12250fca6ea1SDimitry Andric // constant throughout a function with coroutines. Remove this check after
12260fca6ea1SDimitry Andric // we better model semantics of thread local globals.
12270fca6ea1SDimitry Andric if (CI->getFunction()->isPresplitCoroutine())
12280fca6ea1SDimitry Andric return false;
12290fca6ea1SDimitry Andric
12300b57cec5SDimitry Andric using namespace PatternMatch;
12310b57cec5SDimitry Andric if (match(CI, m_Intrinsic<Intrinsic::assume>()))
12320b57cec5SDimitry Andric // Assumes don't actually alias anything or throw
12330b57cec5SDimitry Andric return true;
12340b57cec5SDimitry Andric
12350b57cec5SDimitry Andric // Handle simple cases by querying alias analysis.
1236bdd1243dSDimitry Andric MemoryEffects Behavior = AA->getMemoryEffects(CI);
123706c3fb27SDimitry Andric
1238bdd1243dSDimitry Andric if (Behavior.doesNotAccessMemory())
12390b57cec5SDimitry Andric return true;
1240bdd1243dSDimitry Andric if (Behavior.onlyReadsMemory()) {
12410b57cec5SDimitry Andric // A readonly argmemonly function only reads from memory pointed to by
12420b57cec5SDimitry Andric // it's arguments with arbitrary offsets. If we can prove there are no
12430b57cec5SDimitry Andric // writes to this memory in the loop, we can hoist or sink.
1244bdd1243dSDimitry Andric if (Behavior.onlyAccessesArgPointees()) {
12450b57cec5SDimitry Andric // TODO: expand to writeable arguments
1246349cc55cSDimitry Andric for (Value *Op : CI->args())
124781ad6265SDimitry Andric if (Op->getType()->isPointerTy() &&
124881ad6265SDimitry Andric pointerInvalidatedByLoop(
1249e8d8bef9SDimitry Andric MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
125006c3fb27SDimitry Andric Flags, /*InvariantGroup=*/false))
12510b57cec5SDimitry Andric return false;
12520b57cec5SDimitry Andric return true;
12530b57cec5SDimitry Andric }
12540b57cec5SDimitry Andric
12550b57cec5SDimitry Andric // If this call only reads from memory and there are no writes to memory
12560b57cec5SDimitry Andric // in the loop, we can hoist or sink the call as appropriate.
125781ad6265SDimitry Andric if (isReadOnly(MSSAU, CurLoop))
12580b57cec5SDimitry Andric return true;
12590b57cec5SDimitry Andric }
12600b57cec5SDimitry Andric
12610b57cec5SDimitry Andric // FIXME: This should use mod/ref information to see if we can hoist or
12620b57cec5SDimitry Andric // sink the call.
12630b57cec5SDimitry Andric
12640b57cec5SDimitry Andric return false;
12650b57cec5SDimitry Andric } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
12660b57cec5SDimitry Andric // Fences alias (most) everything to provide ordering. For the moment,
12670b57cec5SDimitry Andric // just give up if there are any other memory operations in the loop.
12680b57cec5SDimitry Andric return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
12690b57cec5SDimitry Andric } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
12700b57cec5SDimitry Andric if (!SI->isUnordered())
12710b57cec5SDimitry Andric return false; // Don't sink/hoist volatile or ordered atomic store!
12720b57cec5SDimitry Andric
12730b57cec5SDimitry Andric // We can only hoist a store that we can prove writes a value which is not
12740b57cec5SDimitry Andric // read or overwritten within the loop. For those cases, we fallback to
12750b57cec5SDimitry Andric // load store promotion instead. TODO: We can extend this to cases where
12760b57cec5SDimitry Andric // there is exactly one write to the location and that write dominates an
12770b57cec5SDimitry Andric // arbitrary number of reads in the loop.
12780b57cec5SDimitry Andric if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
12790b57cec5SDimitry Andric return true;
128006c3fb27SDimitry Andric // If there are more accesses than the Promotion cap, then give up as we're
128106c3fb27SDimitry Andric // not walking a list that long.
128206c3fb27SDimitry Andric if (Flags.tooManyMemoryAccesses())
12830b57cec5SDimitry Andric return false;
128406c3fb27SDimitry Andric
128506c3fb27SDimitry Andric auto *SIMD = MSSA->getMemoryAccess(SI);
128606c3fb27SDimitry Andric BatchAAResults BAA(*AA);
128706c3fb27SDimitry Andric auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, SIMD);
128806c3fb27SDimitry Andric // Make sure there are no clobbers inside the loop.
128906c3fb27SDimitry Andric if (!MSSA->isLiveOnEntryDef(Source) &&
129006c3fb27SDimitry Andric CurLoop->contains(Source->getBlock()))
129106c3fb27SDimitry Andric return false;
129206c3fb27SDimitry Andric
12930b57cec5SDimitry Andric // If there are interfering Uses (i.e. their defining access is in the
12940b57cec5SDimitry Andric // loop), or ordered loads (stored as Defs!), don't move this store.
12950b57cec5SDimitry Andric // Could do better here, but this is conservatively correct.
12960b57cec5SDimitry Andric // TODO: Cache set of Uses on the first walk in runOnLoop, update when
12970b57cec5SDimitry Andric // moving accesses. Can also extend to dominating uses.
12980b57cec5SDimitry Andric for (auto *BB : CurLoop->getBlocks())
12990b57cec5SDimitry Andric if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
13000b57cec5SDimitry Andric for (const auto &MA : *Accesses)
13010b57cec5SDimitry Andric if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
130206c3fb27SDimitry Andric auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
130306c3fb27SDimitry Andric const_cast<MemoryUse *>(MU));
13040b57cec5SDimitry Andric if (!MSSA->isLiveOnEntryDef(MD) &&
13050b57cec5SDimitry Andric CurLoop->contains(MD->getBlock()))
13060b57cec5SDimitry Andric return false;
13070b57cec5SDimitry Andric // Disable hoisting past potentially interfering loads. Optimized
13080b57cec5SDimitry Andric // Uses may point to an access outside the loop, as getClobbering
13090b57cec5SDimitry Andric // checks the previous iteration when walking the backedge.
13100b57cec5SDimitry Andric // FIXME: More precise: no Uses that alias SI.
131181ad6265SDimitry Andric if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
13120b57cec5SDimitry Andric return false;
13138bcb0991SDimitry Andric } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
13140b57cec5SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
13150b57cec5SDimitry Andric (void)LI; // Silence warning.
13160b57cec5SDimitry Andric assert(!LI->isUnordered() && "Expected unordered load");
13170b57cec5SDimitry Andric return false;
13180b57cec5SDimitry Andric }
13198bcb0991SDimitry Andric // Any call, while it may not be clobbering SI, it may be a use.
13208bcb0991SDimitry Andric if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1321fe6060f1SDimitry Andric // Check if the call may read from the memory location written
13228bcb0991SDimitry Andric // to by SI. Check CI's attributes and arguments; the number of
13238bcb0991SDimitry Andric // such checks performed is limited above by NoOfMemAccTooLarge.
132406c3fb27SDimitry Andric ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
13258bcb0991SDimitry Andric if (isModOrRefSet(MRI))
13268bcb0991SDimitry Andric return false;
13278bcb0991SDimitry Andric }
13288bcb0991SDimitry Andric }
13290b57cec5SDimitry Andric }
133006c3fb27SDimitry Andric return true;
13310b57cec5SDimitry Andric }
13320b57cec5SDimitry Andric
13330b57cec5SDimitry Andric assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
13340b57cec5SDimitry Andric
13350b57cec5SDimitry Andric // We've established mechanical ability and aliasing, it's up to the caller
13360b57cec5SDimitry Andric // to check fault safety
13370b57cec5SDimitry Andric return true;
13380b57cec5SDimitry Andric }
13390b57cec5SDimitry Andric
13400b57cec5SDimitry Andric /// Returns true if a PHINode is a trivially replaceable with an
13410b57cec5SDimitry Andric /// Instruction.
13420b57cec5SDimitry Andric /// This is true when all incoming values are that instruction.
13430b57cec5SDimitry Andric /// This pattern occurs most often with LCSSA PHI nodes.
13440b57cec5SDimitry Andric ///
isTriviallyReplaceablePHI(const PHINode & PN,const Instruction & I)13450b57cec5SDimitry Andric static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
13460b57cec5SDimitry Andric for (const Value *IncValue : PN.incoming_values())
13470b57cec5SDimitry Andric if (IncValue != &I)
13480b57cec5SDimitry Andric return false;
13490b57cec5SDimitry Andric
13500b57cec5SDimitry Andric return true;
13510b57cec5SDimitry Andric }
13520b57cec5SDimitry Andric
135306c3fb27SDimitry Andric /// Return true if the instruction is foldable in the loop.
isFoldableInLoop(const Instruction & I,const Loop * CurLoop,const TargetTransformInfo * TTI)135406c3fb27SDimitry Andric static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
13550b57cec5SDimitry Andric const TargetTransformInfo *TTI) {
135606c3fb27SDimitry Andric if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1357bdd1243dSDimitry Andric InstructionCost CostI =
1358bdd1243dSDimitry Andric TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
1359bdd1243dSDimitry Andric if (CostI != TargetTransformInfo::TCC_Free)
13600b57cec5SDimitry Andric return false;
1361bdd1243dSDimitry Andric // For a GEP, we cannot simply use getInstructionCost because currently
1362bdd1243dSDimitry Andric // it optimistically assumes that a GEP will fold into addressing mode
13630b57cec5SDimitry Andric // regardless of its users.
13640b57cec5SDimitry Andric const BasicBlock *BB = GEP->getParent();
13650b57cec5SDimitry Andric for (const User *U : GEP->users()) {
13660b57cec5SDimitry Andric const Instruction *UI = cast<Instruction>(U);
13670b57cec5SDimitry Andric if (CurLoop->contains(UI) &&
13680b57cec5SDimitry Andric (BB != UI->getParent() ||
13690b57cec5SDimitry Andric (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
13700b57cec5SDimitry Andric return false;
13710b57cec5SDimitry Andric }
13720b57cec5SDimitry Andric return true;
1373bdd1243dSDimitry Andric }
1374bdd1243dSDimitry Andric
137506c3fb27SDimitry Andric return false;
13760b57cec5SDimitry Andric }
13770b57cec5SDimitry Andric
13780b57cec5SDimitry Andric /// Return true if the only users of this instruction are outside of
13790b57cec5SDimitry Andric /// the loop. If this is true, we can sink the instruction to the exit
13800b57cec5SDimitry Andric /// blocks of the loop.
13810b57cec5SDimitry Andric ///
13820b57cec5SDimitry Andric /// We also return true if the instruction could be folded away in lowering.
13830b57cec5SDimitry Andric /// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
isNotUsedOrFoldableInLoop(const Instruction & I,const Loop * CurLoop,const LoopSafetyInfo * SafetyInfo,TargetTransformInfo * TTI,bool & FoldableInLoop,bool LoopNestMode)138406c3fb27SDimitry Andric static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
13850b57cec5SDimitry Andric const LoopSafetyInfo *SafetyInfo,
138606c3fb27SDimitry Andric TargetTransformInfo *TTI,
138706c3fb27SDimitry Andric bool &FoldableInLoop, bool LoopNestMode) {
13880b57cec5SDimitry Andric const auto &BlockColors = SafetyInfo->getBlockColors();
138906c3fb27SDimitry Andric bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
13900b57cec5SDimitry Andric for (const User *U : I.users()) {
13910b57cec5SDimitry Andric const Instruction *UI = cast<Instruction>(U);
13920b57cec5SDimitry Andric if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
13930b57cec5SDimitry Andric const BasicBlock *BB = PN->getParent();
13940b57cec5SDimitry Andric // We cannot sink uses in catchswitches.
13950b57cec5SDimitry Andric if (isa<CatchSwitchInst>(BB->getTerminator()))
13960b57cec5SDimitry Andric return false;
13970b57cec5SDimitry Andric
13980b57cec5SDimitry Andric // We need to sink a callsite to a unique funclet. Avoid sinking if the
13990b57cec5SDimitry Andric // phi use is too muddled.
14000b57cec5SDimitry Andric if (isa<CallInst>(I))
14010b57cec5SDimitry Andric if (!BlockColors.empty() &&
14020b57cec5SDimitry Andric BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
14030b57cec5SDimitry Andric return false;
1404349cc55cSDimitry Andric
1405349cc55cSDimitry Andric if (LoopNestMode) {
1406349cc55cSDimitry Andric while (isa<PHINode>(UI) && UI->hasOneUser() &&
1407349cc55cSDimitry Andric UI->getNumOperands() == 1) {
1408349cc55cSDimitry Andric if (!CurLoop->contains(UI))
1409349cc55cSDimitry Andric break;
1410349cc55cSDimitry Andric UI = cast<Instruction>(UI->user_back());
1411349cc55cSDimitry Andric }
1412349cc55cSDimitry Andric }
14130b57cec5SDimitry Andric }
14140b57cec5SDimitry Andric
14150b57cec5SDimitry Andric if (CurLoop->contains(UI)) {
141606c3fb27SDimitry Andric if (IsFoldable) {
141706c3fb27SDimitry Andric FoldableInLoop = true;
14180b57cec5SDimitry Andric continue;
14190b57cec5SDimitry Andric }
14200b57cec5SDimitry Andric return false;
14210b57cec5SDimitry Andric }
14220b57cec5SDimitry Andric }
14230b57cec5SDimitry Andric return true;
14240b57cec5SDimitry Andric }
14250b57cec5SDimitry Andric
cloneInstructionInExitBlock(Instruction & I,BasicBlock & ExitBlock,PHINode & PN,const LoopInfo * LI,const LoopSafetyInfo * SafetyInfo,MemorySSAUpdater & MSSAU)14265ffd83dbSDimitry Andric static Instruction *cloneInstructionInExitBlock(
14270b57cec5SDimitry Andric Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
142881ad6265SDimitry Andric const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
14290b57cec5SDimitry Andric Instruction *New;
14300b57cec5SDimitry Andric if (auto *CI = dyn_cast<CallInst>(&I)) {
14310b57cec5SDimitry Andric const auto &BlockColors = SafetyInfo->getBlockColors();
14320b57cec5SDimitry Andric
14330b57cec5SDimitry Andric // Sinking call-sites need to be handled differently from other
14340b57cec5SDimitry Andric // instructions. The cloned call-site needs a funclet bundle operand
14350b57cec5SDimitry Andric // appropriate for its location in the CFG.
14360b57cec5SDimitry Andric SmallVector<OperandBundleDef, 1> OpBundles;
14370b57cec5SDimitry Andric for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
14380b57cec5SDimitry Andric BundleIdx != BundleEnd; ++BundleIdx) {
14390b57cec5SDimitry Andric OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
14400b57cec5SDimitry Andric if (Bundle.getTagID() == LLVMContext::OB_funclet)
14410b57cec5SDimitry Andric continue;
14420b57cec5SDimitry Andric
14430b57cec5SDimitry Andric OpBundles.emplace_back(Bundle);
14440b57cec5SDimitry Andric }
14450b57cec5SDimitry Andric
14460b57cec5SDimitry Andric if (!BlockColors.empty()) {
14470b57cec5SDimitry Andric const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
14480b57cec5SDimitry Andric assert(CV.size() == 1 && "non-unique color for exit block!");
14490b57cec5SDimitry Andric BasicBlock *BBColor = CV.front();
14500b57cec5SDimitry Andric Instruction *EHPad = BBColor->getFirstNonPHI();
14510b57cec5SDimitry Andric if (EHPad->isEHPad())
14520b57cec5SDimitry Andric OpBundles.emplace_back("funclet", EHPad);
14530b57cec5SDimitry Andric }
14540b57cec5SDimitry Andric
14550b57cec5SDimitry Andric New = CallInst::Create(CI, OpBundles);
14560fca6ea1SDimitry Andric New->copyMetadata(*CI);
14570b57cec5SDimitry Andric } else {
14580b57cec5SDimitry Andric New = I.clone();
14590b57cec5SDimitry Andric }
14600b57cec5SDimitry Andric
1461bdd1243dSDimitry Andric New->insertInto(&ExitBlock, ExitBlock.getFirstInsertionPt());
14620b57cec5SDimitry Andric if (!I.getName().empty())
14630b57cec5SDimitry Andric New->setName(I.getName() + ".le");
14640b57cec5SDimitry Andric
146581ad6265SDimitry Andric if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
14660b57cec5SDimitry Andric // Create a new MemoryAccess and let MemorySSA set its defining access.
1467*71ac745dSDimitry Andric // After running some passes, MemorySSA might be outdated, and the
1468*71ac745dSDimitry Andric // instruction `I` may have become a non-memory touching instruction.
146981ad6265SDimitry Andric MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
1470*71ac745dSDimitry Andric New, nullptr, New->getParent(), MemorySSA::Beginning,
1471*71ac745dSDimitry Andric /*CreationMustSucceed=*/false);
14720b57cec5SDimitry Andric if (NewMemAcc) {
14730b57cec5SDimitry Andric if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
147481ad6265SDimitry Andric MSSAU.insertDef(MemDef, /*RenameUses=*/true);
14750b57cec5SDimitry Andric else {
14760b57cec5SDimitry Andric auto *MemUse = cast<MemoryUse>(NewMemAcc);
147781ad6265SDimitry Andric MSSAU.insertUse(MemUse, /*RenameUses=*/true);
14780b57cec5SDimitry Andric }
14790b57cec5SDimitry Andric }
14800b57cec5SDimitry Andric }
14810b57cec5SDimitry Andric
1482fe6060f1SDimitry Andric // Build LCSSA PHI nodes for any in-loop operands (if legal). Note that
1483fe6060f1SDimitry Andric // this is particularly cheap because we can rip off the PHI node that we're
14840b57cec5SDimitry Andric // replacing for the number and blocks of the predecessors.
14850b57cec5SDimitry Andric // OPT: If this shows up in a profile, we can instead finish sinking all
14860b57cec5SDimitry Andric // invariant instructions, and then walk their operands to re-establish
14870b57cec5SDimitry Andric // LCSSA. That will eliminate creating PHI nodes just to nuke them when
14880b57cec5SDimitry Andric // sinking bottom-up.
1489fe6060f1SDimitry Andric for (Use &Op : New->operands())
1490fe6060f1SDimitry Andric if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
1491fe6060f1SDimitry Andric auto *OInst = cast<Instruction>(Op.get());
14920b57cec5SDimitry Andric PHINode *OpPN =
14930b57cec5SDimitry Andric PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
14945f757f3fSDimitry Andric OInst->getName() + ".lcssa");
14955f757f3fSDimitry Andric OpPN->insertBefore(ExitBlock.begin());
14960b57cec5SDimitry Andric for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
14970b57cec5SDimitry Andric OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1498fe6060f1SDimitry Andric Op = OpPN;
14990b57cec5SDimitry Andric }
15000b57cec5SDimitry Andric return New;
15010b57cec5SDimitry Andric }
15020b57cec5SDimitry Andric
eraseInstruction(Instruction & I,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU)15030b57cec5SDimitry Andric static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
150481ad6265SDimitry Andric MemorySSAUpdater &MSSAU) {
150581ad6265SDimitry Andric MSSAU.removeMemoryAccess(&I);
15060b57cec5SDimitry Andric SafetyInfo.removeInstruction(&I);
15070b57cec5SDimitry Andric I.eraseFromParent();
15080b57cec5SDimitry Andric }
15090b57cec5SDimitry Andric
moveInstructionBefore(Instruction & I,BasicBlock::iterator Dest,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,ScalarEvolution * SE)15105f757f3fSDimitry Andric static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
15110b57cec5SDimitry Andric ICFLoopSafetyInfo &SafetyInfo,
151281ad6265SDimitry Andric MemorySSAUpdater &MSSAU,
1513480093f4SDimitry Andric ScalarEvolution *SE) {
15140b57cec5SDimitry Andric SafetyInfo.removeInstruction(&I);
15155f757f3fSDimitry Andric SafetyInfo.insertInstructionTo(&I, Dest->getParent());
15165f757f3fSDimitry Andric I.moveBefore(*Dest->getParent(), Dest);
15170b57cec5SDimitry Andric if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
151881ad6265SDimitry Andric MSSAU.getMemorySSA()->getMemoryAccess(&I)))
15195f757f3fSDimitry Andric MSSAU.moveToPlace(OldMemAcc, Dest->getParent(),
15205f757f3fSDimitry Andric MemorySSA::BeforeTerminator);
1521480093f4SDimitry Andric if (SE)
152206c3fb27SDimitry Andric SE->forgetBlockAndLoopDispositions(&I);
15230b57cec5SDimitry Andric }
15240b57cec5SDimitry Andric
sinkThroughTriviallyReplaceablePHI(PHINode * TPN,Instruction * I,LoopInfo * LI,SmallDenseMap<BasicBlock *,Instruction *,32> & SunkCopies,const LoopSafetyInfo * SafetyInfo,const Loop * CurLoop,MemorySSAUpdater & MSSAU)15250b57cec5SDimitry Andric static Instruction *sinkThroughTriviallyReplaceablePHI(
15260b57cec5SDimitry Andric PHINode *TPN, Instruction *I, LoopInfo *LI,
15270b57cec5SDimitry Andric SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
15280b57cec5SDimitry Andric const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
152981ad6265SDimitry Andric MemorySSAUpdater &MSSAU) {
15300b57cec5SDimitry Andric assert(isTriviallyReplaceablePHI(*TPN, *I) &&
15310b57cec5SDimitry Andric "Expect only trivially replaceable PHI");
15320b57cec5SDimitry Andric BasicBlock *ExitBlock = TPN->getParent();
15330b57cec5SDimitry Andric Instruction *New;
15340b57cec5SDimitry Andric auto It = SunkCopies.find(ExitBlock);
15350b57cec5SDimitry Andric if (It != SunkCopies.end())
15360b57cec5SDimitry Andric New = It->second;
15370b57cec5SDimitry Andric else
15385ffd83dbSDimitry Andric New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock(
15390b57cec5SDimitry Andric *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
15400b57cec5SDimitry Andric return New;
15410b57cec5SDimitry Andric }
15420b57cec5SDimitry Andric
canSplitPredecessors(PHINode * PN,LoopSafetyInfo * SafetyInfo)15430b57cec5SDimitry Andric static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
15440b57cec5SDimitry Andric BasicBlock *BB = PN->getParent();
15450b57cec5SDimitry Andric if (!BB->canSplitPredecessors())
15460b57cec5SDimitry Andric return false;
15470b57cec5SDimitry Andric // It's not impossible to split EHPad blocks, but if BlockColors already exist
15480b57cec5SDimitry Andric // it require updating BlockColors for all offspring blocks accordingly. By
15490b57cec5SDimitry Andric // skipping such corner case, we can make updating BlockColors after splitting
15500b57cec5SDimitry Andric // predecessor fairly simple.
15510b57cec5SDimitry Andric if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
15520b57cec5SDimitry Andric return false;
1553349cc55cSDimitry Andric for (BasicBlock *BBPred : predecessors(BB)) {
1554753f127fSDimitry Andric if (isa<IndirectBrInst>(BBPred->getTerminator()))
15550b57cec5SDimitry Andric return false;
15560b57cec5SDimitry Andric }
15570b57cec5SDimitry Andric return true;
15580b57cec5SDimitry Andric }
15590b57cec5SDimitry Andric
splitPredecessorsOfLoopExit(PHINode * PN,DominatorTree * DT,LoopInfo * LI,const Loop * CurLoop,LoopSafetyInfo * SafetyInfo,MemorySSAUpdater * MSSAU)15600b57cec5SDimitry Andric static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
15610b57cec5SDimitry Andric LoopInfo *LI, const Loop *CurLoop,
15620b57cec5SDimitry Andric LoopSafetyInfo *SafetyInfo,
15630b57cec5SDimitry Andric MemorySSAUpdater *MSSAU) {
15640b57cec5SDimitry Andric #ifndef NDEBUG
15650b57cec5SDimitry Andric SmallVector<BasicBlock *, 32> ExitBlocks;
15660b57cec5SDimitry Andric CurLoop->getUniqueExitBlocks(ExitBlocks);
15670b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
15680b57cec5SDimitry Andric ExitBlocks.end());
15690b57cec5SDimitry Andric #endif
15700b57cec5SDimitry Andric BasicBlock *ExitBB = PN->getParent();
15710b57cec5SDimitry Andric assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
15720b57cec5SDimitry Andric
15730b57cec5SDimitry Andric // Split predecessors of the loop exit to make instructions in the loop are
15740b57cec5SDimitry Andric // exposed to exit blocks through trivially replaceable PHIs while keeping the
15750b57cec5SDimitry Andric // loop in the canonical form where each predecessor of each exit block should
15760b57cec5SDimitry Andric // be contained within the loop. For example, this will convert the loop below
15770b57cec5SDimitry Andric // from
15780b57cec5SDimitry Andric //
15790b57cec5SDimitry Andric // LB1:
15800b57cec5SDimitry Andric // %v1 =
15810b57cec5SDimitry Andric // br %LE, %LB2
15820b57cec5SDimitry Andric // LB2:
15830b57cec5SDimitry Andric // %v2 =
15840b57cec5SDimitry Andric // br %LE, %LB1
15850b57cec5SDimitry Andric // LE:
15860b57cec5SDimitry Andric // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
15870b57cec5SDimitry Andric //
15880b57cec5SDimitry Andric // to
15890b57cec5SDimitry Andric //
15900b57cec5SDimitry Andric // LB1:
15910b57cec5SDimitry Andric // %v1 =
15920b57cec5SDimitry Andric // br %LE.split, %LB2
15930b57cec5SDimitry Andric // LB2:
15940b57cec5SDimitry Andric // %v2 =
15950b57cec5SDimitry Andric // br %LE.split2, %LB1
15960b57cec5SDimitry Andric // LE.split:
15970b57cec5SDimitry Andric // %p1 = phi [%v1, %LB1] <-- trivially replaceable
15980b57cec5SDimitry Andric // br %LE
15990b57cec5SDimitry Andric // LE.split2:
16000b57cec5SDimitry Andric // %p2 = phi [%v2, %LB2] <-- trivially replaceable
16010b57cec5SDimitry Andric // br %LE
16020b57cec5SDimitry Andric // LE:
16030b57cec5SDimitry Andric // %p = phi [%p1, %LE.split], [%p2, %LE.split2]
16040b57cec5SDimitry Andric //
16050b57cec5SDimitry Andric const auto &BlockColors = SafetyInfo->getBlockColors();
16060b57cec5SDimitry Andric SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
16070b57cec5SDimitry Andric while (!PredBBs.empty()) {
16080b57cec5SDimitry Andric BasicBlock *PredBB = *PredBBs.begin();
16090b57cec5SDimitry Andric assert(CurLoop->contains(PredBB) &&
16100b57cec5SDimitry Andric "Expect all predecessors are in the loop");
16110b57cec5SDimitry Andric if (PN->getBasicBlockIndex(PredBB) >= 0) {
16120b57cec5SDimitry Andric BasicBlock *NewPred = SplitBlockPredecessors(
16130b57cec5SDimitry Andric ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
16140b57cec5SDimitry Andric // Since we do not allow splitting EH-block with BlockColors in
16150b57cec5SDimitry Andric // canSplitPredecessors(), we can simply assign predecessor's color to
16160b57cec5SDimitry Andric // the new block.
16170b57cec5SDimitry Andric if (!BlockColors.empty())
16180b57cec5SDimitry Andric // Grab a reference to the ColorVector to be inserted before getting the
16190b57cec5SDimitry Andric // reference to the vector we are copying because inserting the new
16200b57cec5SDimitry Andric // element in BlockColors might cause the map to be reallocated.
16210b57cec5SDimitry Andric SafetyInfo->copyColors(NewPred, PredBB);
16220b57cec5SDimitry Andric }
16230b57cec5SDimitry Andric PredBBs.remove(PredBB);
16240b57cec5SDimitry Andric }
16250b57cec5SDimitry Andric }
16260b57cec5SDimitry Andric
16270b57cec5SDimitry Andric /// When an instruction is found to only be used outside of the loop, this
16280b57cec5SDimitry Andric /// function moves it to the exit blocks and patches up SSA form as needed.
16290b57cec5SDimitry Andric /// This method is guaranteed to remove the original instruction from its
16300b57cec5SDimitry Andric /// position, and may either delete it or move it to outside of the loop.
16310b57cec5SDimitry Andric ///
sink(Instruction & I,LoopInfo * LI,DominatorTree * DT,const Loop * CurLoop,ICFLoopSafetyInfo * SafetyInfo,MemorySSAUpdater & MSSAU,OptimizationRemarkEmitter * ORE)16320b57cec5SDimitry Andric static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1633bdd1243dSDimitry Andric const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1634bdd1243dSDimitry Andric MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE) {
16350b57cec5SDimitry Andric bool Changed = false;
1636fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
16370b57cec5SDimitry Andric
16380b57cec5SDimitry Andric // Iterate over users to be ready for actual sinking. Replace users via
16390b57cec5SDimitry Andric // unreachable blocks with undef and make all user PHIs trivially replaceable.
16400b57cec5SDimitry Andric SmallPtrSet<Instruction *, 8> VisitedUsers;
16410b57cec5SDimitry Andric for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
16420b57cec5SDimitry Andric auto *User = cast<Instruction>(*UI);
16430b57cec5SDimitry Andric Use &U = UI.getUse();
16440b57cec5SDimitry Andric ++UI;
16450b57cec5SDimitry Andric
16460b57cec5SDimitry Andric if (VisitedUsers.count(User) || CurLoop->contains(User))
16470b57cec5SDimitry Andric continue;
16480b57cec5SDimitry Andric
16490b57cec5SDimitry Andric if (!DT->isReachableFromEntry(User->getParent())) {
165081ad6265SDimitry Andric U = PoisonValue::get(I.getType());
16510b57cec5SDimitry Andric Changed = true;
16520b57cec5SDimitry Andric continue;
16530b57cec5SDimitry Andric }
16540b57cec5SDimitry Andric
16550b57cec5SDimitry Andric // The user must be a PHI node.
16560b57cec5SDimitry Andric PHINode *PN = cast<PHINode>(User);
16570b57cec5SDimitry Andric
16580b57cec5SDimitry Andric // Surprisingly, instructions can be used outside of loops without any
16590b57cec5SDimitry Andric // exits. This can only happen in PHI nodes if the incoming block is
16600b57cec5SDimitry Andric // unreachable.
16610b57cec5SDimitry Andric BasicBlock *BB = PN->getIncomingBlock(U);
16620b57cec5SDimitry Andric if (!DT->isReachableFromEntry(BB)) {
166381ad6265SDimitry Andric U = PoisonValue::get(I.getType());
16640b57cec5SDimitry Andric Changed = true;
16650b57cec5SDimitry Andric continue;
16660b57cec5SDimitry Andric }
16670b57cec5SDimitry Andric
16680b57cec5SDimitry Andric VisitedUsers.insert(PN);
16690b57cec5SDimitry Andric if (isTriviallyReplaceablePHI(*PN, I))
16700b57cec5SDimitry Andric continue;
16710b57cec5SDimitry Andric
16720b57cec5SDimitry Andric if (!canSplitPredecessors(PN, SafetyInfo))
16730b57cec5SDimitry Andric return Changed;
16740b57cec5SDimitry Andric
16750b57cec5SDimitry Andric // Split predecessors of the PHI so that we can make users trivially
16760b57cec5SDimitry Andric // replaceable.
167781ad6265SDimitry Andric splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU);
16780b57cec5SDimitry Andric
16790b57cec5SDimitry Andric // Should rebuild the iterators, as they may be invalidated by
16800b57cec5SDimitry Andric // splitPredecessorsOfLoopExit().
16810b57cec5SDimitry Andric UI = I.user_begin();
16820b57cec5SDimitry Andric UE = I.user_end();
16830b57cec5SDimitry Andric }
16840b57cec5SDimitry Andric
16850b57cec5SDimitry Andric if (VisitedUsers.empty())
16860b57cec5SDimitry Andric return Changed;
16870b57cec5SDimitry Andric
1688fe6060f1SDimitry Andric ORE->emit([&]() {
1689fe6060f1SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1690fe6060f1SDimitry Andric << "sinking " << ore::NV("Inst", &I);
1691fe6060f1SDimitry Andric });
1692fe6060f1SDimitry Andric if (isa<LoadInst>(I))
1693fe6060f1SDimitry Andric ++NumMovedLoads;
1694fe6060f1SDimitry Andric else if (isa<CallInst>(I))
1695fe6060f1SDimitry Andric ++NumMovedCalls;
1696fe6060f1SDimitry Andric ++NumSunk;
1697fe6060f1SDimitry Andric
16980b57cec5SDimitry Andric #ifndef NDEBUG
16990b57cec5SDimitry Andric SmallVector<BasicBlock *, 32> ExitBlocks;
17000b57cec5SDimitry Andric CurLoop->getUniqueExitBlocks(ExitBlocks);
17010b57cec5SDimitry Andric SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
17020b57cec5SDimitry Andric ExitBlocks.end());
17030b57cec5SDimitry Andric #endif
17040b57cec5SDimitry Andric
17050b57cec5SDimitry Andric // Clones of this instruction. Don't create more than one per exit block!
17060b57cec5SDimitry Andric SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
17070b57cec5SDimitry Andric
17080b57cec5SDimitry Andric // If this instruction is only used outside of the loop, then all users are
17090b57cec5SDimitry Andric // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
17100b57cec5SDimitry Andric // the instruction.
1711e8d8bef9SDimitry Andric // First check if I is worth sinking for all uses. Sink only when it is worth
1712e8d8bef9SDimitry Andric // across all uses.
17130b57cec5SDimitry Andric SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
17140b57cec5SDimitry Andric for (auto *UI : Users) {
17150b57cec5SDimitry Andric auto *User = cast<Instruction>(UI);
17160b57cec5SDimitry Andric
17170b57cec5SDimitry Andric if (CurLoop->contains(User))
17180b57cec5SDimitry Andric continue;
17190b57cec5SDimitry Andric
17200b57cec5SDimitry Andric PHINode *PN = cast<PHINode>(User);
17210b57cec5SDimitry Andric assert(ExitBlockSet.count(PN->getParent()) &&
17220b57cec5SDimitry Andric "The LCSSA PHI is not in an exit block!");
1723e8d8bef9SDimitry Andric
17240b57cec5SDimitry Andric // The PHI must be trivially replaceable.
17250b57cec5SDimitry Andric Instruction *New = sinkThroughTriviallyReplaceablePHI(
17260b57cec5SDimitry Andric PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
172706c3fb27SDimitry Andric // As we sink the instruction out of the BB, drop its debug location.
172806c3fb27SDimitry Andric New->dropLocation();
17290b57cec5SDimitry Andric PN->replaceAllUsesWith(New);
173081ad6265SDimitry Andric eraseInstruction(*PN, *SafetyInfo, MSSAU);
17310b57cec5SDimitry Andric Changed = true;
17320b57cec5SDimitry Andric }
17330b57cec5SDimitry Andric return Changed;
17340b57cec5SDimitry Andric }
17350b57cec5SDimitry Andric
17360b57cec5SDimitry Andric /// When an instruction is found to only use loop invariant operands that
17370b57cec5SDimitry Andric /// is safe to hoist, this instruction is called to do the dirty work.
17380b57cec5SDimitry Andric ///
hoist(Instruction & I,const DominatorTree * DT,const Loop * CurLoop,BasicBlock * Dest,ICFLoopSafetyInfo * SafetyInfo,MemorySSAUpdater & MSSAU,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)17390b57cec5SDimitry Andric static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
17400b57cec5SDimitry Andric BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
174181ad6265SDimitry Andric MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1742480093f4SDimitry Andric OptimizationRemarkEmitter *ORE) {
1743e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1744e8d8bef9SDimitry Andric << I << "\n");
17450b57cec5SDimitry Andric ORE->emit([&]() {
17460b57cec5SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
17470b57cec5SDimitry Andric << ore::NV("Inst", &I);
17480b57cec5SDimitry Andric });
17490b57cec5SDimitry Andric
17500b57cec5SDimitry Andric // Metadata can be dependent on conditions we are hoisting above.
17510b57cec5SDimitry Andric // Conservatively strip all metadata on the instruction unless we were
17520b57cec5SDimitry Andric // guaranteed to execute I if we entered the loop, in which case the metadata
17530b57cec5SDimitry Andric // is valid in the loop preheader.
1754fe6060f1SDimitry Andric // Similarly, If I is a call and it is not guaranteed to execute in the loop,
1755fe6060f1SDimitry Andric // then moving to the preheader means we should strip attributes on the call
1756fe6060f1SDimitry Andric // that can cause UB since we may be hoisting above conditions that allowed
1757fe6060f1SDimitry Andric // inferring those attributes. They may not be valid at the preheader.
1758fe6060f1SDimitry Andric if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) &&
17590b57cec5SDimitry Andric // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
17600b57cec5SDimitry Andric // time in isGuaranteedToExecute if we don't actually have anything to
17610b57cec5SDimitry Andric // drop. It is a compile time optimization, not required for correctness.
17620b57cec5SDimitry Andric !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
176306c3fb27SDimitry Andric I.dropUBImplyingAttrsAndMetadata();
17640b57cec5SDimitry Andric
17650b57cec5SDimitry Andric if (isa<PHINode>(I))
17660b57cec5SDimitry Andric // Move the new node to the end of the phi list in the destination block.
17675f757f3fSDimitry Andric moveInstructionBefore(I, Dest->getFirstNonPHIIt(), *SafetyInfo, MSSAU, SE);
17680b57cec5SDimitry Andric else
17690b57cec5SDimitry Andric // Move the new node to the destination block, before its terminator.
17705f757f3fSDimitry Andric moveInstructionBefore(I, Dest->getTerminator()->getIterator(), *SafetyInfo,
17715f757f3fSDimitry Andric MSSAU, SE);
17720b57cec5SDimitry Andric
1773e8d8bef9SDimitry Andric I.updateLocationAfterHoist();
17740b57cec5SDimitry Andric
17750b57cec5SDimitry Andric if (isa<LoadInst>(I))
17760b57cec5SDimitry Andric ++NumMovedLoads;
17770b57cec5SDimitry Andric else if (isa<CallInst>(I))
17780b57cec5SDimitry Andric ++NumMovedCalls;
17790b57cec5SDimitry Andric ++NumHoisted;
17800b57cec5SDimitry Andric }
17810b57cec5SDimitry Andric
17820b57cec5SDimitry Andric /// Only sink or hoist an instruction if it is not a trapping instruction,
17830b57cec5SDimitry Andric /// or if the instruction is known not to trap when moved to the preheader.
17840b57cec5SDimitry Andric /// or if it is a trapping instruction and is guaranteed to execute.
isSafeToExecuteUnconditionally(Instruction & Inst,const DominatorTree * DT,const TargetLibraryInfo * TLI,const Loop * CurLoop,const LoopSafetyInfo * SafetyInfo,OptimizationRemarkEmitter * ORE,const Instruction * CtxI,AssumptionCache * AC,bool AllowSpeculation)1785fb03ea46SDimitry Andric static bool isSafeToExecuteUnconditionally(
1786fb03ea46SDimitry Andric Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
1787fb03ea46SDimitry Andric const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
1788fb03ea46SDimitry Andric OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
1789bdd1243dSDimitry Andric AssumptionCache *AC, bool AllowSpeculation) {
1790bdd1243dSDimitry Andric if (AllowSpeculation &&
1791bdd1243dSDimitry Andric isSafeToSpeculativelyExecute(&Inst, CtxI, AC, DT, TLI))
17920b57cec5SDimitry Andric return true;
17930b57cec5SDimitry Andric
17940b57cec5SDimitry Andric bool GuaranteedToExecute =
17950b57cec5SDimitry Andric SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
17960b57cec5SDimitry Andric
17970b57cec5SDimitry Andric if (!GuaranteedToExecute) {
17980b57cec5SDimitry Andric auto *LI = dyn_cast<LoadInst>(&Inst);
17990b57cec5SDimitry Andric if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
18000b57cec5SDimitry Andric ORE->emit([&]() {
18010b57cec5SDimitry Andric return OptimizationRemarkMissed(
18020b57cec5SDimitry Andric DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
18030b57cec5SDimitry Andric << "failed to hoist load with loop-invariant address "
18040b57cec5SDimitry Andric "because load is conditionally executed";
18050b57cec5SDimitry Andric });
18060b57cec5SDimitry Andric }
18070b57cec5SDimitry Andric
18080b57cec5SDimitry Andric return GuaranteedToExecute;
18090b57cec5SDimitry Andric }
18100b57cec5SDimitry Andric
18110b57cec5SDimitry Andric namespace {
18120b57cec5SDimitry Andric class LoopPromoter : public LoadAndStorePromoter {
18130b57cec5SDimitry Andric Value *SomePtr; // Designated pointer to store to.
18140b57cec5SDimitry Andric SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
18155f757f3fSDimitry Andric SmallVectorImpl<BasicBlock::iterator> &LoopInsertPts;
18160b57cec5SDimitry Andric SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
18170b57cec5SDimitry Andric PredIteratorCache &PredCache;
181881ad6265SDimitry Andric MemorySSAUpdater &MSSAU;
18190b57cec5SDimitry Andric LoopInfo &LI;
18200b57cec5SDimitry Andric DebugLoc DL;
1821349cc55cSDimitry Andric Align Alignment;
18220b57cec5SDimitry Andric bool UnorderedAtomic;
18230b57cec5SDimitry Andric AAMDNodes AATags;
18240b57cec5SDimitry Andric ICFLoopSafetyInfo &SafetyInfo;
18254824e7fdSDimitry Andric bool CanInsertStoresInExitBlocks;
1826bdd1243dSDimitry Andric ArrayRef<const Instruction *> Uses;
18270b57cec5SDimitry Andric
1828fe6060f1SDimitry Andric // We're about to add a use of V in a loop exit block. Insert an LCSSA phi
1829fe6060f1SDimitry Andric // (if legal) if doing so would add an out-of-loop use to an instruction
1830fe6060f1SDimitry Andric // defined in-loop.
maybeInsertLCSSAPHI(Value * V,BasicBlock * BB) const18310b57cec5SDimitry Andric Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1832fe6060f1SDimitry Andric if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB))
1833fe6060f1SDimitry Andric return V;
1834fe6060f1SDimitry Andric
1835fe6060f1SDimitry Andric Instruction *I = cast<Instruction>(V);
18360b57cec5SDimitry Andric // We need to create an LCSSA PHI node for the incoming value and
18370b57cec5SDimitry Andric // store that.
18380b57cec5SDimitry Andric PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
18395f757f3fSDimitry Andric I->getName() + ".lcssa");
18405f757f3fSDimitry Andric PN->insertBefore(BB->begin());
18410b57cec5SDimitry Andric for (BasicBlock *Pred : PredCache.get(BB))
18420b57cec5SDimitry Andric PN->addIncoming(I, Pred);
18430b57cec5SDimitry Andric return PN;
18440b57cec5SDimitry Andric }
18450b57cec5SDimitry Andric
18460b57cec5SDimitry Andric public:
LoopPromoter(Value * SP,ArrayRef<const Instruction * > Insts,SSAUpdater & S,SmallVectorImpl<BasicBlock * > & LEB,SmallVectorImpl<BasicBlock::iterator> & LIP,SmallVectorImpl<MemoryAccess * > & MSSAIP,PredIteratorCache & PIC,MemorySSAUpdater & MSSAU,LoopInfo & li,DebugLoc dl,Align Alignment,bool UnorderedAtomic,const AAMDNodes & AATags,ICFLoopSafetyInfo & SafetyInfo,bool CanInsertStoresInExitBlocks)18470b57cec5SDimitry Andric LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
18480b57cec5SDimitry Andric SmallVectorImpl<BasicBlock *> &LEB,
18495f757f3fSDimitry Andric SmallVectorImpl<BasicBlock::iterator> &LIP,
18500b57cec5SDimitry Andric SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
185181ad6265SDimitry Andric MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
1852349cc55cSDimitry Andric Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
18534824e7fdSDimitry Andric ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1854bdd1243dSDimitry Andric : LoadAndStorePromoter(Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
1855bdd1243dSDimitry Andric LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
1856bdd1243dSDimitry Andric LI(li), DL(std::move(dl)), Alignment(Alignment),
1857bdd1243dSDimitry Andric UnorderedAtomic(UnorderedAtomic), AATags(AATags),
18584824e7fdSDimitry Andric SafetyInfo(SafetyInfo),
1859bdd1243dSDimitry Andric CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses(Insts) {}
18600b57cec5SDimitry Andric
insertStoresInLoopExitBlocks()18614824e7fdSDimitry Andric void insertStoresInLoopExitBlocks() {
18620b57cec5SDimitry Andric // Insert stores after in the loop exit blocks. Each exit block gets a
18630b57cec5SDimitry Andric // store of the live-out values that feed them. Since we've already told
18640b57cec5SDimitry Andric // the SSA updater about the defs in the loop and the preheader
18650b57cec5SDimitry Andric // definition, it is all set and we can start using it.
1866bdd1243dSDimitry Andric DIAssignID *NewID = nullptr;
18670b57cec5SDimitry Andric for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
18680b57cec5SDimitry Andric BasicBlock *ExitBlock = LoopExitBlocks[i];
18690b57cec5SDimitry Andric Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
18700b57cec5SDimitry Andric LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
18710b57cec5SDimitry Andric Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
18725f757f3fSDimitry Andric BasicBlock::iterator InsertPos = LoopInsertPts[i];
18730b57cec5SDimitry Andric StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
18740b57cec5SDimitry Andric if (UnorderedAtomic)
18750b57cec5SDimitry Andric NewSI->setOrdering(AtomicOrdering::Unordered);
1876349cc55cSDimitry Andric NewSI->setAlignment(Alignment);
18770b57cec5SDimitry Andric NewSI->setDebugLoc(DL);
1878bdd1243dSDimitry Andric // Attach DIAssignID metadata to the new store, generating it on the
1879bdd1243dSDimitry Andric // first loop iteration.
1880bdd1243dSDimitry Andric if (i == 0) {
1881bdd1243dSDimitry Andric // NewSI will have its DIAssignID set here if there are any stores in
1882bdd1243dSDimitry Andric // Uses with a DIAssignID attachment. This merged ID will then be
1883bdd1243dSDimitry Andric // attached to the other inserted stores (in the branch below).
1884bdd1243dSDimitry Andric NewSI->mergeDIAssignID(Uses);
1885bdd1243dSDimitry Andric NewID = cast_or_null<DIAssignID>(
1886bdd1243dSDimitry Andric NewSI->getMetadata(LLVMContext::MD_DIAssignID));
1887bdd1243dSDimitry Andric } else {
1888bdd1243dSDimitry Andric // Attach the DIAssignID (or nullptr) merged from Uses in the branch
1889bdd1243dSDimitry Andric // above.
1890bdd1243dSDimitry Andric NewSI->setMetadata(LLVMContext::MD_DIAssignID, NewID);
1891bdd1243dSDimitry Andric }
1892bdd1243dSDimitry Andric
18930b57cec5SDimitry Andric if (AATags)
18940b57cec5SDimitry Andric NewSI->setAAMetadata(AATags);
18950b57cec5SDimitry Andric
18960b57cec5SDimitry Andric MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
18970b57cec5SDimitry Andric MemoryAccess *NewMemAcc;
18980b57cec5SDimitry Andric if (!MSSAInsertPoint) {
189981ad6265SDimitry Andric NewMemAcc = MSSAU.createMemoryAccessInBB(
19000b57cec5SDimitry Andric NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
19010b57cec5SDimitry Andric } else {
19020b57cec5SDimitry Andric NewMemAcc =
190381ad6265SDimitry Andric MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
19040b57cec5SDimitry Andric }
19050b57cec5SDimitry Andric MSSAInsertPts[i] = NewMemAcc;
190681ad6265SDimitry Andric MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true);
19070b57cec5SDimitry Andric // FIXME: true for safety, false may still be correct.
19080b57cec5SDimitry Andric }
19090b57cec5SDimitry Andric }
19100b57cec5SDimitry Andric
doExtraRewritesBeforeFinalDeletion()19114824e7fdSDimitry Andric void doExtraRewritesBeforeFinalDeletion() override {
19124824e7fdSDimitry Andric if (CanInsertStoresInExitBlocks)
19134824e7fdSDimitry Andric insertStoresInLoopExitBlocks();
19144824e7fdSDimitry Andric }
19154824e7fdSDimitry Andric
instructionDeleted(Instruction * I) const19160b57cec5SDimitry Andric void instructionDeleted(Instruction *I) const override {
19170b57cec5SDimitry Andric SafetyInfo.removeInstruction(I);
191881ad6265SDimitry Andric MSSAU.removeMemoryAccess(I);
19190b57cec5SDimitry Andric }
19204824e7fdSDimitry Andric
shouldDelete(Instruction * I) const19214824e7fdSDimitry Andric bool shouldDelete(Instruction *I) const override {
19224824e7fdSDimitry Andric if (isa<StoreInst>(I))
19234824e7fdSDimitry Andric return CanInsertStoresInExitBlocks;
19244824e7fdSDimitry Andric return true;
19254824e7fdSDimitry Andric }
19260b57cec5SDimitry Andric };
19270b57cec5SDimitry Andric
isNotCapturedBeforeOrInLoop(const Value * V,const Loop * L,DominatorTree * DT)1928fe6060f1SDimitry Andric bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
1929fe6060f1SDimitry Andric DominatorTree *DT) {
1930fe6060f1SDimitry Andric // We can perform the captured-before check against any instruction in the
1931fe6060f1SDimitry Andric // loop header, as the loop header is reachable from any instruction inside
1932fe6060f1SDimitry Andric // the loop.
1933fe6060f1SDimitry Andric // TODO: ReturnCaptures=true shouldn't be necessary here.
1934fe6060f1SDimitry Andric return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true,
1935fe6060f1SDimitry Andric /* StoreCaptures */ true,
1936fe6060f1SDimitry Andric L->getHeader()->getTerminator(), DT);
1937fe6060f1SDimitry Andric }
19380b57cec5SDimitry Andric
193904eeddc0SDimitry Andric /// Return true if we can prove that a caller cannot inspect the object if an
194004eeddc0SDimitry Andric /// unwind occurs inside the loop.
isNotVisibleOnUnwindInLoop(const Value * Object,const Loop * L,DominatorTree * DT)194104eeddc0SDimitry Andric bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
194204eeddc0SDimitry Andric DominatorTree *DT) {
194304eeddc0SDimitry Andric bool RequiresNoCaptureBeforeUnwind;
194404eeddc0SDimitry Andric if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
194504eeddc0SDimitry Andric return false;
19460b57cec5SDimitry Andric
194704eeddc0SDimitry Andric return !RequiresNoCaptureBeforeUnwind ||
1948fe6060f1SDimitry Andric isNotCapturedBeforeOrInLoop(Object, L, DT);
19490b57cec5SDimitry Andric }
19500b57cec5SDimitry Andric
isThreadLocalObject(const Value * Object,const Loop * L,DominatorTree * DT,TargetTransformInfo * TTI)1951bdd1243dSDimitry Andric bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
1952bdd1243dSDimitry Andric TargetTransformInfo *TTI) {
1953bdd1243dSDimitry Andric // The object must be function-local to start with, and then not captured
1954bdd1243dSDimitry Andric // before/in the loop.
1955bdd1243dSDimitry Andric return (isIdentifiedFunctionLocal(Object) &&
1956bdd1243dSDimitry Andric isNotCapturedBeforeOrInLoop(Object, L, DT)) ||
1957bdd1243dSDimitry Andric (TTI->isSingleThreaded() || SingleThread);
1958bdd1243dSDimitry Andric }
1959bdd1243dSDimitry Andric
19600b57cec5SDimitry Andric } // namespace
19610b57cec5SDimitry Andric
19620b57cec5SDimitry Andric /// Try to promote memory values to scalars by sinking stores out of the
19630b57cec5SDimitry Andric /// loop and moving loads to before the loop. We do this by looping over
19640b57cec5SDimitry Andric /// the stores in the loop, looking for stores to Must pointers which are
19650b57cec5SDimitry Andric /// loop invariant.
19660b57cec5SDimitry Andric ///
promoteLoopAccessesToScalars(const SmallSetVector<Value *,8> & PointerMustAliases,SmallVectorImpl<BasicBlock * > & ExitBlocks,SmallVectorImpl<BasicBlock::iterator> & InsertPts,SmallVectorImpl<MemoryAccess * > & MSSAInsertPts,PredIteratorCache & PIC,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,const TargetLibraryInfo * TLI,TargetTransformInfo * TTI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ICFLoopSafetyInfo * SafetyInfo,OptimizationRemarkEmitter * ORE,bool AllowSpeculation,bool HasReadsOutsideSet)19670b57cec5SDimitry Andric bool llvm::promoteLoopAccessesToScalars(
19680b57cec5SDimitry Andric const SmallSetVector<Value *, 8> &PointerMustAliases,
19690b57cec5SDimitry Andric SmallVectorImpl<BasicBlock *> &ExitBlocks,
19705f757f3fSDimitry Andric SmallVectorImpl<BasicBlock::iterator> &InsertPts,
19710b57cec5SDimitry Andric SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1972bdd1243dSDimitry Andric LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1973bdd1243dSDimitry Andric const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
1974bdd1243dSDimitry Andric MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1975bdd1243dSDimitry Andric OptimizationRemarkEmitter *ORE, bool AllowSpeculation,
1976bdd1243dSDimitry Andric bool HasReadsOutsideSet) {
19770b57cec5SDimitry Andric // Verify inputs.
19780b57cec5SDimitry Andric assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1979e8d8bef9SDimitry Andric SafetyInfo != nullptr &&
19800b57cec5SDimitry Andric "Unexpected Input to promoteLoopAccessesToScalars");
19810b57cec5SDimitry Andric
1982bdd1243dSDimitry Andric LLVM_DEBUG({
1983bdd1243dSDimitry Andric dbgs() << "Trying to promote set of must-aliased pointers:\n";
1984bdd1243dSDimitry Andric for (Value *Ptr : PointerMustAliases)
1985bdd1243dSDimitry Andric dbgs() << " " << *Ptr << "\n";
1986bdd1243dSDimitry Andric });
1987bdd1243dSDimitry Andric ++NumPromotionCandidates;
1988bdd1243dSDimitry Andric
19890b57cec5SDimitry Andric Value *SomePtr = *PointerMustAliases.begin();
19900b57cec5SDimitry Andric BasicBlock *Preheader = CurLoop->getLoopPreheader();
19910b57cec5SDimitry Andric
19920b57cec5SDimitry Andric // It is not safe to promote a load/store from the loop if the load/store is
19930b57cec5SDimitry Andric // conditional. For example, turning:
19940b57cec5SDimitry Andric //
19950b57cec5SDimitry Andric // for () { if (c) *P += 1; }
19960b57cec5SDimitry Andric //
19970b57cec5SDimitry Andric // into:
19980b57cec5SDimitry Andric //
19990b57cec5SDimitry Andric // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
20000b57cec5SDimitry Andric //
20010b57cec5SDimitry Andric // is not safe, because *P may only be valid to access if 'c' is true.
20020b57cec5SDimitry Andric //
20030b57cec5SDimitry Andric // The safety property divides into two parts:
20040b57cec5SDimitry Andric // p1) The memory may not be dereferenceable on entry to the loop. In this
20050b57cec5SDimitry Andric // case, we can't insert the required load in the preheader.
20060b57cec5SDimitry Andric // p2) The memory model does not allow us to insert a store along any dynamic
20070b57cec5SDimitry Andric // path which did not originally have one.
20080b57cec5SDimitry Andric //
20090b57cec5SDimitry Andric // If at least one store is guaranteed to execute, both properties are
20100b57cec5SDimitry Andric // satisfied, and promotion is legal.
20110b57cec5SDimitry Andric //
20120b57cec5SDimitry Andric // This, however, is not a necessary condition. Even if no store/load is
20130b57cec5SDimitry Andric // guaranteed to execute, we can still establish these properties.
20140b57cec5SDimitry Andric // We can establish (p1) by proving that hoisting the load into the preheader
20150b57cec5SDimitry Andric // is safe (i.e. proving dereferenceability on all paths through the loop). We
20160b57cec5SDimitry Andric // can use any access within the alias set to prove dereferenceability,
20170b57cec5SDimitry Andric // since they're all must alias.
20180b57cec5SDimitry Andric //
20190b57cec5SDimitry Andric // There are two ways establish (p2):
20200b57cec5SDimitry Andric // a) Prove the location is thread-local. In this case the memory model
20210b57cec5SDimitry Andric // requirement does not apply, and stores are safe to insert.
20220b57cec5SDimitry Andric // b) Prove a store dominates every exit block. In this case, if an exit
20230b57cec5SDimitry Andric // blocks is reached, the original dynamic path would have taken us through
20240b57cec5SDimitry Andric // the store, so inserting a store into the exit block is safe. Note that this
20250b57cec5SDimitry Andric // is different from the store being guaranteed to execute. For instance,
20260b57cec5SDimitry Andric // if an exception is thrown on the first iteration of the loop, the original
20270b57cec5SDimitry Andric // store is never executed, but the exit blocks are not executed either.
20280b57cec5SDimitry Andric
20290b57cec5SDimitry Andric bool DereferenceableInPH = false;
203081ad6265SDimitry Andric bool StoreIsGuanteedToExecute = false;
20314824e7fdSDimitry Andric bool FoundLoadToPromote = false;
2032bdd1243dSDimitry Andric // Goes from Unknown to either Safe or Unsafe, but can't switch between them.
2033bdd1243dSDimitry Andric enum {
2034bdd1243dSDimitry Andric StoreSafe,
2035bdd1243dSDimitry Andric StoreUnsafe,
2036bdd1243dSDimitry Andric StoreSafetyUnknown,
2037bdd1243dSDimitry Andric } StoreSafety = StoreSafetyUnknown;
20380b57cec5SDimitry Andric
20390b57cec5SDimitry Andric SmallVector<Instruction *, 64> LoopUses;
20400b57cec5SDimitry Andric
20410b57cec5SDimitry Andric // We start with an alignment of one and try to find instructions that allow
20420b57cec5SDimitry Andric // us to prove better alignment.
20435ffd83dbSDimitry Andric Align Alignment;
20440b57cec5SDimitry Andric // Keep track of which types of access we see
20450b57cec5SDimitry Andric bool SawUnorderedAtomic = false;
20460b57cec5SDimitry Andric bool SawNotAtomic = false;
20470b57cec5SDimitry Andric AAMDNodes AATags;
20480b57cec5SDimitry Andric
20490fca6ea1SDimitry Andric const DataLayout &MDL = Preheader->getDataLayout();
20500b57cec5SDimitry Andric
2051bdd1243dSDimitry Andric // If there are reads outside the promoted set, then promoting stores is
2052bdd1243dSDimitry Andric // definitely not safe.
2053bdd1243dSDimitry Andric if (HasReadsOutsideSet)
2054bdd1243dSDimitry Andric StoreSafety = StoreUnsafe;
2055bdd1243dSDimitry Andric
2056bdd1243dSDimitry Andric if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) {
20570b57cec5SDimitry Andric // If a loop can throw, we have to insert a store along each unwind edge.
20580b57cec5SDimitry Andric // That said, we can't actually make the unwind edge explicit. Therefore,
20590b57cec5SDimitry Andric // we have to prove that the store is dead along the unwind edge. We do
20600b57cec5SDimitry Andric // this by proving that the caller can't have a reference to the object
20610b57cec5SDimitry Andric // after return and thus can't possibly load from the object.
2062e8d8bef9SDimitry Andric Value *Object = getUnderlyingObject(SomePtr);
206304eeddc0SDimitry Andric if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
2064bdd1243dSDimitry Andric StoreSafety = StoreUnsafe;
20650b57cec5SDimitry Andric }
20660b57cec5SDimitry Andric
2067bdd1243dSDimitry Andric // Check that all accesses to pointers in the alias set use the same type.
20684824e7fdSDimitry Andric // We cannot (yet) promote a memory location that is loaded and stored in
20690b57cec5SDimitry Andric // different sizes. While we are at it, collect alignment and AA info.
20704824e7fdSDimitry Andric Type *AccessTy = nullptr;
20710b57cec5SDimitry Andric for (Value *ASIV : PointerMustAliases) {
207281ad6265SDimitry Andric for (Use &U : ASIV->uses()) {
20730b57cec5SDimitry Andric // Ignore instructions that are outside the loop.
207481ad6265SDimitry Andric Instruction *UI = dyn_cast<Instruction>(U.getUser());
20750b57cec5SDimitry Andric if (!UI || !CurLoop->contains(UI))
20760b57cec5SDimitry Andric continue;
20770b57cec5SDimitry Andric
20780b57cec5SDimitry Andric // If there is an non-load/store instruction in the loop, we can't promote
20790b57cec5SDimitry Andric // it.
20800b57cec5SDimitry Andric if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
20810b57cec5SDimitry Andric if (!Load->isUnordered())
20820b57cec5SDimitry Andric return false;
20830b57cec5SDimitry Andric
20840b57cec5SDimitry Andric SawUnorderedAtomic |= Load->isAtomic();
20850b57cec5SDimitry Andric SawNotAtomic |= !Load->isAtomic();
20864824e7fdSDimitry Andric FoundLoadToPromote = true;
20870b57cec5SDimitry Andric
20885ffd83dbSDimitry Andric Align InstAlignment = Load->getAlign();
20890b57cec5SDimitry Andric
20900b57cec5SDimitry Andric // Note that proving a load safe to speculate requires proving
20910b57cec5SDimitry Andric // sufficient alignment at the target location. Proving it guaranteed
20920b57cec5SDimitry Andric // to execute does as well. Thus we can increase our guaranteed
20930b57cec5SDimitry Andric // alignment as well.
20940b57cec5SDimitry Andric if (!DereferenceableInPH || (InstAlignment > Alignment))
2095fb03ea46SDimitry Andric if (isSafeToExecuteUnconditionally(
2096fb03ea46SDimitry Andric *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2097bdd1243dSDimitry Andric Preheader->getTerminator(), AC, AllowSpeculation)) {
20980b57cec5SDimitry Andric DereferenceableInPH = true;
20990b57cec5SDimitry Andric Alignment = std::max(Alignment, InstAlignment);
21000b57cec5SDimitry Andric }
21010b57cec5SDimitry Andric } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
21020b57cec5SDimitry Andric // Stores *of* the pointer are not interesting, only stores *to* the
21030b57cec5SDimitry Andric // pointer.
210481ad6265SDimitry Andric if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
21050b57cec5SDimitry Andric continue;
21060b57cec5SDimitry Andric if (!Store->isUnordered())
21070b57cec5SDimitry Andric return false;
21080b57cec5SDimitry Andric
21090b57cec5SDimitry Andric SawUnorderedAtomic |= Store->isAtomic();
21100b57cec5SDimitry Andric SawNotAtomic |= !Store->isAtomic();
21110b57cec5SDimitry Andric
21120b57cec5SDimitry Andric // If the store is guaranteed to execute, both properties are satisfied.
21130b57cec5SDimitry Andric // We may want to check if a store is guaranteed to execute even if we
21140b57cec5SDimitry Andric // already know that promotion is safe, since it may have higher
21150b57cec5SDimitry Andric // alignment than any other guaranteed stores, in which case we can
21160b57cec5SDimitry Andric // raise the alignment on the promoted store.
21175ffd83dbSDimitry Andric Align InstAlignment = Store->getAlign();
211881ad6265SDimitry Andric bool GuaranteedToExecute =
211981ad6265SDimitry Andric SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop);
212081ad6265SDimitry Andric StoreIsGuanteedToExecute |= GuaranteedToExecute;
212181ad6265SDimitry Andric if (GuaranteedToExecute) {
21220b57cec5SDimitry Andric DereferenceableInPH = true;
2123bdd1243dSDimitry Andric if (StoreSafety == StoreSafetyUnknown)
2124bdd1243dSDimitry Andric StoreSafety = StoreSafe;
21250b57cec5SDimitry Andric Alignment = std::max(Alignment, InstAlignment);
21260b57cec5SDimitry Andric }
21270b57cec5SDimitry Andric
21280b57cec5SDimitry Andric // If a store dominates all exit blocks, it is safe to sink.
21290b57cec5SDimitry Andric // As explained above, if an exit block was executed, a dominating
21300b57cec5SDimitry Andric // store must have been executed at least once, so we are not
21310b57cec5SDimitry Andric // introducing stores on paths that did not have them.
21320b57cec5SDimitry Andric // Note that this only looks at explicit exit blocks. If we ever
21330b57cec5SDimitry Andric // start sinking stores into unwind edges (see above), this will break.
2134bdd1243dSDimitry Andric if (StoreSafety == StoreSafetyUnknown &&
2135bdd1243dSDimitry Andric llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
21360b57cec5SDimitry Andric return DT->dominates(Store->getParent(), Exit);
2137bdd1243dSDimitry Andric }))
2138bdd1243dSDimitry Andric StoreSafety = StoreSafe;
21390b57cec5SDimitry Andric
21400b57cec5SDimitry Andric // If the store is not guaranteed to execute, we may still get
21410b57cec5SDimitry Andric // deref info through it.
21420b57cec5SDimitry Andric if (!DereferenceableInPH) {
21430b57cec5SDimitry Andric DereferenceableInPH = isDereferenceableAndAlignedPointer(
21440b57cec5SDimitry Andric Store->getPointerOperand(), Store->getValueOperand()->getType(),
2145bdd1243dSDimitry Andric Store->getAlign(), MDL, Preheader->getTerminator(), AC, DT, TLI);
21460b57cec5SDimitry Andric }
21470b57cec5SDimitry Andric } else
2148bdd1243dSDimitry Andric continue; // Not a load or store.
21490b57cec5SDimitry Andric
21504824e7fdSDimitry Andric if (!AccessTy)
21514824e7fdSDimitry Andric AccessTy = getLoadStoreType(UI);
21524824e7fdSDimitry Andric else if (AccessTy != getLoadStoreType(UI))
21534824e7fdSDimitry Andric return false;
21544824e7fdSDimitry Andric
21550b57cec5SDimitry Andric // Merge the AA tags.
21560b57cec5SDimitry Andric if (LoopUses.empty()) {
21570b57cec5SDimitry Andric // On the first load/store, just take its AA tags.
2158349cc55cSDimitry Andric AATags = UI->getAAMetadata();
21590b57cec5SDimitry Andric } else if (AATags) {
2160349cc55cSDimitry Andric AATags = AATags.merge(UI->getAAMetadata());
21610b57cec5SDimitry Andric }
21620b57cec5SDimitry Andric
21630b57cec5SDimitry Andric LoopUses.push_back(UI);
21640b57cec5SDimitry Andric }
21650b57cec5SDimitry Andric }
21660b57cec5SDimitry Andric
21670b57cec5SDimitry Andric // If we found both an unordered atomic instruction and a non-atomic memory
21680b57cec5SDimitry Andric // access, bail. We can't blindly promote non-atomic to atomic since we
21690b57cec5SDimitry Andric // might not be able to lower the result. We can't downgrade since that
21700b57cec5SDimitry Andric // would violate memory model. Also, align 0 is an error for atomics.
21710b57cec5SDimitry Andric if (SawUnorderedAtomic && SawNotAtomic)
21720b57cec5SDimitry Andric return false;
21730b57cec5SDimitry Andric
21740b57cec5SDimitry Andric // If we're inserting an atomic load in the preheader, we must be able to
21750b57cec5SDimitry Andric // lower it. We're only guaranteed to be able to lower naturally aligned
21760b57cec5SDimitry Andric // atomics.
21774824e7fdSDimitry Andric if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
21780b57cec5SDimitry Andric return false;
21790b57cec5SDimitry Andric
21800b57cec5SDimitry Andric // If we couldn't prove we can hoist the load, bail.
2181bdd1243dSDimitry Andric if (!DereferenceableInPH) {
2182bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Not promoting: Not dereferenceable in preheader\n");
21830b57cec5SDimitry Andric return false;
2184bdd1243dSDimitry Andric }
21850b57cec5SDimitry Andric
21860b57cec5SDimitry Andric // We know we can hoist the load, but don't have a guaranteed store.
2187bdd1243dSDimitry Andric // Check whether the location is writable and thread-local. If it is, then we
2188bdd1243dSDimitry Andric // can insert stores along paths which originally didn't have them without
2189bdd1243dSDimitry Andric // violating the memory model.
2190bdd1243dSDimitry Andric if (StoreSafety == StoreSafetyUnknown) {
2191e8d8bef9SDimitry Andric Value *Object = getUnderlyingObject(SomePtr);
21925f757f3fSDimitry Andric bool ExplicitlyDereferenceableOnly;
21935f757f3fSDimitry Andric if (isWritableObject(Object, ExplicitlyDereferenceableOnly) &&
21945f757f3fSDimitry Andric (!ExplicitlyDereferenceableOnly ||
21955f757f3fSDimitry Andric isDereferenceablePointer(SomePtr, AccessTy, MDL)) &&
2196bdd1243dSDimitry Andric isThreadLocalObject(Object, CurLoop, DT, TTI))
2197bdd1243dSDimitry Andric StoreSafety = StoreSafe;
21980b57cec5SDimitry Andric }
21990b57cec5SDimitry Andric
22004824e7fdSDimitry Andric // If we've still failed to prove we can sink the store, hoist the load
22014824e7fdSDimitry Andric // only, if possible.
2202bdd1243dSDimitry Andric if (StoreSafety != StoreSafe && !FoundLoadToPromote)
22034824e7fdSDimitry Andric // If we cannot hoist the load either, give up.
22040b57cec5SDimitry Andric return false;
22050b57cec5SDimitry Andric
22064824e7fdSDimitry Andric // Lets do the promotion!
2207bdd1243dSDimitry Andric if (StoreSafety == StoreSafe) {
22084824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
22090b57cec5SDimitry Andric << '\n');
2210bdd1243dSDimitry Andric ++NumLoadStorePromoted;
2211bdd1243dSDimitry Andric } else {
22124824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
22134824e7fdSDimitry Andric << '\n');
2214bdd1243dSDimitry Andric ++NumLoadPromoted;
2215bdd1243dSDimitry Andric }
22164824e7fdSDimitry Andric
22170b57cec5SDimitry Andric ORE->emit([&]() {
22180b57cec5SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
22190b57cec5SDimitry Andric LoopUses[0])
22200b57cec5SDimitry Andric << "Moving accesses to memory location out of the loop";
22210b57cec5SDimitry Andric });
22220b57cec5SDimitry Andric
22235ffd83dbSDimitry Andric // Look at all the loop uses, and try to merge their locations.
222406c3fb27SDimitry Andric std::vector<DILocation *> LoopUsesLocs;
2225bdd1243dSDimitry Andric for (auto *U : LoopUses)
22265ffd83dbSDimitry Andric LoopUsesLocs.push_back(U->getDebugLoc().get());
22275ffd83dbSDimitry Andric auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
22280b57cec5SDimitry Andric
22290b57cec5SDimitry Andric // We use the SSAUpdater interface to insert phi nodes as required.
22300b57cec5SDimitry Andric SmallVector<PHINode *, 16> NewPHIs;
22310b57cec5SDimitry Andric SSAUpdater SSA(&NewPHIs);
2232bdd1243dSDimitry Andric LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
2233bdd1243dSDimitry Andric MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment,
2234bdd1243dSDimitry Andric SawUnorderedAtomic, AATags, *SafetyInfo,
2235bdd1243dSDimitry Andric StoreSafety == StoreSafe);
22360b57cec5SDimitry Andric
22370b57cec5SDimitry Andric // Set up the preheader to have a definition of the value. It is the live-out
22380b57cec5SDimitry Andric // value from the preheader that uses in the loop will use.
223981ad6265SDimitry Andric LoadInst *PreheaderLoad = nullptr;
224081ad6265SDimitry Andric if (FoundLoadToPromote || !StoreIsGuanteedToExecute) {
224181ad6265SDimitry Andric PreheaderLoad =
224281ad6265SDimitry Andric new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
22430fca6ea1SDimitry Andric Preheader->getTerminator()->getIterator());
22440b57cec5SDimitry Andric if (SawUnorderedAtomic)
22450b57cec5SDimitry Andric PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
22465ffd83dbSDimitry Andric PreheaderLoad->setAlignment(Alignment);
22475ffd83dbSDimitry Andric PreheaderLoad->setDebugLoc(DebugLoc());
22480b57cec5SDimitry Andric if (AATags)
22490b57cec5SDimitry Andric PreheaderLoad->setAAMetadata(AATags);
22500b57cec5SDimitry Andric
225181ad6265SDimitry Andric MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
22520b57cec5SDimitry Andric PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
22530b57cec5SDimitry Andric MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
225481ad6265SDimitry Andric MSSAU.insertUse(NewMemUse, /*RenameUses=*/true);
225581ad6265SDimitry Andric SSA.AddAvailableValue(Preheader, PreheaderLoad);
225681ad6265SDimitry Andric } else {
225781ad6265SDimitry Andric SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy));
225881ad6265SDimitry Andric }
22590b57cec5SDimitry Andric
2260349cc55cSDimitry Andric if (VerifyMemorySSA)
226181ad6265SDimitry Andric MSSAU.getMemorySSA()->verifyMemorySSA();
22620b57cec5SDimitry Andric // Rewrite all the loads in the loop and remember all the definitions from
22630b57cec5SDimitry Andric // stores in the loop.
22640b57cec5SDimitry Andric Promoter.run(LoopUses);
22650b57cec5SDimitry Andric
2266349cc55cSDimitry Andric if (VerifyMemorySSA)
226781ad6265SDimitry Andric MSSAU.getMemorySSA()->verifyMemorySSA();
22680b57cec5SDimitry Andric // If the SSAUpdater didn't use the load in the preheader, just zap it now.
226981ad6265SDimitry Andric if (PreheaderLoad && PreheaderLoad->use_empty())
2270349cc55cSDimitry Andric eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
22710b57cec5SDimitry Andric
22720b57cec5SDimitry Andric return true;
22730b57cec5SDimitry Andric }
22740b57cec5SDimitry Andric
foreachMemoryAccess(MemorySSA * MSSA,Loop * L,function_ref<void (Instruction *)> Fn)2275fe6060f1SDimitry Andric static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2276fe6060f1SDimitry Andric function_ref<void(Instruction *)> Fn) {
2277fe6060f1SDimitry Andric for (const BasicBlock *BB : L->blocks())
2278fe6060f1SDimitry Andric if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2279fe6060f1SDimitry Andric for (const auto &Access : *Accesses)
2280fe6060f1SDimitry Andric if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
2281fe6060f1SDimitry Andric Fn(MUD->getMemoryInst());
2282fe6060f1SDimitry Andric }
2283fe6060f1SDimitry Andric
2284bdd1243dSDimitry Andric // The bool indicates whether there might be reads outside the set, in which
2285bdd1243dSDimitry Andric // case only loads may be promoted.
2286bdd1243dSDimitry Andric static SmallVector<PointersAndHasReadsOutsideSet, 0>
collectPromotionCandidates(MemorySSA * MSSA,AliasAnalysis * AA,Loop * L)2287fe6060f1SDimitry Andric collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
2288bdd1243dSDimitry Andric BatchAAResults BatchAA(*AA);
2289bdd1243dSDimitry Andric AliasSetTracker AST(BatchAA);
2290fe6060f1SDimitry Andric
2291fe6060f1SDimitry Andric auto IsPotentiallyPromotable = [L](const Instruction *I) {
2292fe6060f1SDimitry Andric if (const auto *SI = dyn_cast<StoreInst>(I))
2293fe6060f1SDimitry Andric return L->isLoopInvariant(SI->getPointerOperand());
2294fe6060f1SDimitry Andric if (const auto *LI = dyn_cast<LoadInst>(I))
2295fe6060f1SDimitry Andric return L->isLoopInvariant(LI->getPointerOperand());
2296fe6060f1SDimitry Andric return false;
2297fe6060f1SDimitry Andric };
2298fe6060f1SDimitry Andric
229981ad6265SDimitry Andric // Populate AST with potentially promotable accesses.
2300fe6060f1SDimitry Andric SmallPtrSet<Value *, 16> AttemptingPromotion;
2301fe6060f1SDimitry Andric foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2302fe6060f1SDimitry Andric if (IsPotentiallyPromotable(I)) {
2303fe6060f1SDimitry Andric AttemptingPromotion.insert(I);
2304fe6060f1SDimitry Andric AST.add(I);
2305fe6060f1SDimitry Andric }
2306fe6060f1SDimitry Andric });
2307fe6060f1SDimitry Andric
2308fe6060f1SDimitry Andric // We're only interested in must-alias sets that contain a mod.
2309bdd1243dSDimitry Andric SmallVector<PointerIntPair<const AliasSet *, 1, bool>, 8> Sets;
2310fe6060f1SDimitry Andric for (AliasSet &AS : AST)
2311fe6060f1SDimitry Andric if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2312bdd1243dSDimitry Andric Sets.push_back({&AS, false});
2313fe6060f1SDimitry Andric
2314fe6060f1SDimitry Andric if (Sets.empty())
2315fe6060f1SDimitry Andric return {}; // Nothing to promote...
2316fe6060f1SDimitry Andric
2317fe6060f1SDimitry Andric // Discard any sets for which there is an aliasing non-promotable access.
2318fe6060f1SDimitry Andric foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2319fe6060f1SDimitry Andric if (AttemptingPromotion.contains(I))
2320fe6060f1SDimitry Andric return;
2321fe6060f1SDimitry Andric
2322bdd1243dSDimitry Andric llvm::erase_if(Sets, [&](PointerIntPair<const AliasSet *, 1, bool> &Pair) {
2323bdd1243dSDimitry Andric ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(I, BatchAA);
2324bdd1243dSDimitry Andric // Cannot promote if there are writes outside the set.
2325bdd1243dSDimitry Andric if (isModSet(MR))
2326bdd1243dSDimitry Andric return true;
2327bdd1243dSDimitry Andric if (isRefSet(MR)) {
2328bdd1243dSDimitry Andric // Remember reads outside the set.
2329bdd1243dSDimitry Andric Pair.setInt(true);
2330bdd1243dSDimitry Andric // If this is a mod-only set and there are reads outside the set,
2331bdd1243dSDimitry Andric // we will not be able to promote, so bail out early.
2332bdd1243dSDimitry Andric return !Pair.getPointer()->isRef();
2333bdd1243dSDimitry Andric }
2334bdd1243dSDimitry Andric return false;
2335fe6060f1SDimitry Andric });
2336fe6060f1SDimitry Andric });
2337fe6060f1SDimitry Andric
2338bdd1243dSDimitry Andric SmallVector<std::pair<SmallSetVector<Value *, 8>, bool>, 0> Result;
2339bdd1243dSDimitry Andric for (auto [Set, HasReadsOutsideSet] : Sets) {
2340fe6060f1SDimitry Andric SmallSetVector<Value *, 8> PointerMustAliases;
23417a6dacacSDimitry Andric for (const auto &MemLoc : *Set)
23427a6dacacSDimitry Andric PointerMustAliases.insert(const_cast<Value *>(MemLoc.Ptr));
2343bdd1243dSDimitry Andric Result.emplace_back(std::move(PointerMustAliases), HasReadsOutsideSet);
2344fe6060f1SDimitry Andric }
2345fe6060f1SDimitry Andric
2346fe6060f1SDimitry Andric return Result;
2347fe6060f1SDimitry Andric }
2348fe6060f1SDimitry Andric
pointerInvalidatedByLoop(MemorySSA * MSSA,MemoryUse * MU,Loop * CurLoop,Instruction & I,SinkAndHoistLICMFlags & Flags,bool InvariantGroup)234981ad6265SDimitry Andric static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
2350e8d8bef9SDimitry Andric Loop *CurLoop, Instruction &I,
235106c3fb27SDimitry Andric SinkAndHoistLICMFlags &Flags,
235206c3fb27SDimitry Andric bool InvariantGroup) {
23530b57cec5SDimitry Andric // For hoisting, use the walker to determine safety
2354e8d8bef9SDimitry Andric if (!Flags.getIsSink()) {
235506c3fb27SDimitry Andric // If hoisting an invariant group, we only need to check that there
235606c3fb27SDimitry Andric // is no store to the loaded pointer between the start of the loop,
235706c3fb27SDimitry Andric // and the load (since all values must be the same).
235806c3fb27SDimitry Andric
235906c3fb27SDimitry Andric // This can be checked in two conditions:
236006c3fb27SDimitry Andric // 1) if the memoryaccess is outside the loop
236106c3fb27SDimitry Andric // 2) the earliest access is at the loop header,
236206c3fb27SDimitry Andric // if the memory loaded is the phi node
236306c3fb27SDimitry Andric
236406c3fb27SDimitry Andric BatchAAResults BAA(MSSA->getAA());
236506c3fb27SDimitry Andric MemoryAccess *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, MU);
23660b57cec5SDimitry Andric return !MSSA->isLiveOnEntryDef(Source) &&
236706c3fb27SDimitry Andric CurLoop->contains(Source->getBlock()) &&
236806c3fb27SDimitry Andric !(InvariantGroup && Source->getBlock() == CurLoop->getHeader() && isa<MemoryPhi>(Source));
23690b57cec5SDimitry Andric }
23700b57cec5SDimitry Andric
23710b57cec5SDimitry Andric // For sinking, we'd need to check all Defs below this use. The getClobbering
23720b57cec5SDimitry Andric // call will look on the backedge of the loop, but will check aliasing with
23730b57cec5SDimitry Andric // the instructions on the previous iteration.
23740b57cec5SDimitry Andric // For example:
23750b57cec5SDimitry Andric // for (i ... )
23760b57cec5SDimitry Andric // load a[i] ( Use (LoE)
23770b57cec5SDimitry Andric // store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
23780b57cec5SDimitry Andric // i++;
23790b57cec5SDimitry Andric // The load sees no clobbering inside the loop, as the backedge alias check
23800b57cec5SDimitry Andric // does phi translation, and will check aliasing against store a[i-1].
23810b57cec5SDimitry Andric // However sinking the load outside the loop, below the store is incorrect.
23820b57cec5SDimitry Andric
23830b57cec5SDimitry Andric // For now, only sink if there are no Defs in the loop, and the existing ones
23840b57cec5SDimitry Andric // precede the use and are in the same block.
23850b57cec5SDimitry Andric // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
23860b57cec5SDimitry Andric // needs PostDominatorTreeAnalysis.
23870b57cec5SDimitry Andric // FIXME: More precise: no Defs that alias this Use.
2388e8d8bef9SDimitry Andric if (Flags.tooManyMemoryAccesses())
23890b57cec5SDimitry Andric return true;
23900b57cec5SDimitry Andric for (auto *BB : CurLoop->getBlocks())
239181ad6265SDimitry Andric if (pointerInvalidatedByBlock(*BB, *MSSA, *MU))
2392e8d8bef9SDimitry Andric return true;
2393e8d8bef9SDimitry Andric // When sinking, the source block may not be part of the loop so check it.
2394e8d8bef9SDimitry Andric if (!CurLoop->contains(&I))
239581ad6265SDimitry Andric return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU);
2396e8d8bef9SDimitry Andric
2397e8d8bef9SDimitry Andric return false;
2398e8d8bef9SDimitry Andric }
2399e8d8bef9SDimitry Andric
pointerInvalidatedByBlock(BasicBlock & BB,MemorySSA & MSSA,MemoryUse & MU)240081ad6265SDimitry Andric bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
2401e8d8bef9SDimitry Andric if (const auto *Accesses = MSSA.getBlockDefs(&BB))
24020b57cec5SDimitry Andric for (const auto &MA : *Accesses)
24030b57cec5SDimitry Andric if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2404e8d8bef9SDimitry Andric if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
24050b57cec5SDimitry Andric return true;
24060b57cec5SDimitry Andric return false;
24070b57cec5SDimitry Andric }
24080b57cec5SDimitry Andric
240906c3fb27SDimitry Andric /// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A <
241006c3fb27SDimitry Andric /// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their
241106c3fb27SDimitry Andric /// minimun can be computed outside of loop, and X is not a loop-invariant.
hoistMinMax(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU)241206c3fb27SDimitry Andric static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
241306c3fb27SDimitry Andric MemorySSAUpdater &MSSAU) {
241406c3fb27SDimitry Andric bool Inverse = false;
241506c3fb27SDimitry Andric using namespace PatternMatch;
241606c3fb27SDimitry Andric Value *Cond1, *Cond2;
241706c3fb27SDimitry Andric if (match(&I, m_LogicalOr(m_Value(Cond1), m_Value(Cond2)))) {
241806c3fb27SDimitry Andric Inverse = true;
241906c3fb27SDimitry Andric } else if (match(&I, m_LogicalAnd(m_Value(Cond1), m_Value(Cond2)))) {
242006c3fb27SDimitry Andric // Do nothing
242106c3fb27SDimitry Andric } else
242206c3fb27SDimitry Andric return false;
242306c3fb27SDimitry Andric
242406c3fb27SDimitry Andric auto MatchICmpAgainstInvariant = [&](Value *C, ICmpInst::Predicate &P,
242506c3fb27SDimitry Andric Value *&LHS, Value *&RHS) {
242606c3fb27SDimitry Andric if (!match(C, m_OneUse(m_ICmp(P, m_Value(LHS), m_Value(RHS)))))
242706c3fb27SDimitry Andric return false;
242806c3fb27SDimitry Andric if (!LHS->getType()->isIntegerTy())
242906c3fb27SDimitry Andric return false;
243006c3fb27SDimitry Andric if (!ICmpInst::isRelational(P))
243106c3fb27SDimitry Andric return false;
243206c3fb27SDimitry Andric if (L.isLoopInvariant(LHS)) {
243306c3fb27SDimitry Andric std::swap(LHS, RHS);
243406c3fb27SDimitry Andric P = ICmpInst::getSwappedPredicate(P);
243506c3fb27SDimitry Andric }
243606c3fb27SDimitry Andric if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
243706c3fb27SDimitry Andric return false;
243806c3fb27SDimitry Andric if (Inverse)
243906c3fb27SDimitry Andric P = ICmpInst::getInversePredicate(P);
244006c3fb27SDimitry Andric return true;
244106c3fb27SDimitry Andric };
244206c3fb27SDimitry Andric ICmpInst::Predicate P1, P2;
244306c3fb27SDimitry Andric Value *LHS1, *LHS2, *RHS1, *RHS2;
244406c3fb27SDimitry Andric if (!MatchICmpAgainstInvariant(Cond1, P1, LHS1, RHS1) ||
244506c3fb27SDimitry Andric !MatchICmpAgainstInvariant(Cond2, P2, LHS2, RHS2))
244606c3fb27SDimitry Andric return false;
244706c3fb27SDimitry Andric if (P1 != P2 || LHS1 != LHS2)
244806c3fb27SDimitry Andric return false;
244906c3fb27SDimitry Andric
245006c3fb27SDimitry Andric // Everything is fine, we can do the transform.
245106c3fb27SDimitry Andric bool UseMin = ICmpInst::isLT(P1) || ICmpInst::isLE(P1);
245206c3fb27SDimitry Andric assert(
245306c3fb27SDimitry Andric (UseMin || ICmpInst::isGT(P1) || ICmpInst::isGE(P1)) &&
245406c3fb27SDimitry Andric "Relational predicate is either less (or equal) or greater (or equal)!");
245506c3fb27SDimitry Andric Intrinsic::ID id = ICmpInst::isSigned(P1)
245606c3fb27SDimitry Andric ? (UseMin ? Intrinsic::smin : Intrinsic::smax)
245706c3fb27SDimitry Andric : (UseMin ? Intrinsic::umin : Intrinsic::umax);
245806c3fb27SDimitry Andric auto *Preheader = L.getLoopPreheader();
245906c3fb27SDimitry Andric assert(Preheader && "Loop is not in simplify form?");
246006c3fb27SDimitry Andric IRBuilder<> Builder(Preheader->getTerminator());
246106c3fb27SDimitry Andric // We are about to create a new guaranteed use for RHS2 which might not exist
246206c3fb27SDimitry Andric // before (if it was a non-taken input of logical and/or instruction). If it
246306c3fb27SDimitry Andric // was poison, we need to freeze it. Note that no new use for LHS and RHS1 are
246406c3fb27SDimitry Andric // introduced, so they don't need this.
246506c3fb27SDimitry Andric if (isa<SelectInst>(I))
246606c3fb27SDimitry Andric RHS2 = Builder.CreateFreeze(RHS2, RHS2->getName() + ".fr");
246706c3fb27SDimitry Andric Value *NewRHS = Builder.CreateBinaryIntrinsic(
246806c3fb27SDimitry Andric id, RHS1, RHS2, nullptr, StringRef("invariant.") +
246906c3fb27SDimitry Andric (ICmpInst::isSigned(P1) ? "s" : "u") +
247006c3fb27SDimitry Andric (UseMin ? "min" : "max"));
247106c3fb27SDimitry Andric Builder.SetInsertPoint(&I);
247206c3fb27SDimitry Andric ICmpInst::Predicate P = P1;
247306c3fb27SDimitry Andric if (Inverse)
247406c3fb27SDimitry Andric P = ICmpInst::getInversePredicate(P);
247506c3fb27SDimitry Andric Value *NewCond = Builder.CreateICmp(P, LHS1, NewRHS);
247606c3fb27SDimitry Andric NewCond->takeName(&I);
247706c3fb27SDimitry Andric I.replaceAllUsesWith(NewCond);
247806c3fb27SDimitry Andric eraseInstruction(I, SafetyInfo, MSSAU);
247906c3fb27SDimitry Andric eraseInstruction(*cast<Instruction>(Cond1), SafetyInfo, MSSAU);
248006c3fb27SDimitry Andric eraseInstruction(*cast<Instruction>(Cond2), SafetyInfo, MSSAU);
248106c3fb27SDimitry Andric return true;
248206c3fb27SDimitry Andric }
248306c3fb27SDimitry Andric
248406c3fb27SDimitry Andric /// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if
248506c3fb27SDimitry Andric /// this allows hoisting the inner GEP.
hoistGEP(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)248606c3fb27SDimitry Andric static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
248706c3fb27SDimitry Andric MemorySSAUpdater &MSSAU, AssumptionCache *AC,
248806c3fb27SDimitry Andric DominatorTree *DT) {
248906c3fb27SDimitry Andric auto *GEP = dyn_cast<GetElementPtrInst>(&I);
249006c3fb27SDimitry Andric if (!GEP)
249106c3fb27SDimitry Andric return false;
249206c3fb27SDimitry Andric
249306c3fb27SDimitry Andric auto *Src = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
249406c3fb27SDimitry Andric if (!Src || !Src->hasOneUse() || !L.contains(Src))
249506c3fb27SDimitry Andric return false;
249606c3fb27SDimitry Andric
249706c3fb27SDimitry Andric Value *SrcPtr = Src->getPointerOperand();
249806c3fb27SDimitry Andric auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); };
249906c3fb27SDimitry Andric if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant))
250006c3fb27SDimitry Andric return false;
250106c3fb27SDimitry Andric
250206c3fb27SDimitry Andric // This can only happen if !AllowSpeculation, otherwise this would already be
250306c3fb27SDimitry Andric // handled.
250406c3fb27SDimitry Andric // FIXME: Should we respect AllowSpeculation in these reassociation folds?
250506c3fb27SDimitry Andric // The flag exists to prevent metadata dropping, which is not relevant here.
250606c3fb27SDimitry Andric if (all_of(Src->indices(), LoopInvariant))
250706c3fb27SDimitry Andric return false;
250806c3fb27SDimitry Andric
250906c3fb27SDimitry Andric // The swapped GEPs are inbounds if both original GEPs are inbounds
251006c3fb27SDimitry Andric // and the sign of the offsets is the same. For simplicity, only
251106c3fb27SDimitry Andric // handle both offsets being non-negative.
25120fca6ea1SDimitry Andric const DataLayout &DL = GEP->getDataLayout();
251306c3fb27SDimitry Andric auto NonNegative = [&](Value *V) {
25145f757f3fSDimitry Andric return isKnownNonNegative(V, SimplifyQuery(DL, DT, AC, GEP));
251506c3fb27SDimitry Andric };
251606c3fb27SDimitry Andric bool IsInBounds = Src->isInBounds() && GEP->isInBounds() &&
251706c3fb27SDimitry Andric all_of(Src->indices(), NonNegative) &&
251806c3fb27SDimitry Andric all_of(GEP->indices(), NonNegative);
251906c3fb27SDimitry Andric
252006c3fb27SDimitry Andric BasicBlock *Preheader = L.getLoopPreheader();
252106c3fb27SDimitry Andric IRBuilder<> Builder(Preheader->getTerminator());
252206c3fb27SDimitry Andric Value *NewSrc = Builder.CreateGEP(GEP->getSourceElementType(), SrcPtr,
252306c3fb27SDimitry Andric SmallVector<Value *>(GEP->indices()),
252406c3fb27SDimitry Andric "invariant.gep", IsInBounds);
252506c3fb27SDimitry Andric Builder.SetInsertPoint(GEP);
252606c3fb27SDimitry Andric Value *NewGEP = Builder.CreateGEP(Src->getSourceElementType(), NewSrc,
252706c3fb27SDimitry Andric SmallVector<Value *>(Src->indices()), "gep",
252806c3fb27SDimitry Andric IsInBounds);
252906c3fb27SDimitry Andric GEP->replaceAllUsesWith(NewGEP);
253006c3fb27SDimitry Andric eraseInstruction(*GEP, SafetyInfo, MSSAU);
253106c3fb27SDimitry Andric eraseInstruction(*Src, SafetyInfo, MSSAU);
253206c3fb27SDimitry Andric return true;
253306c3fb27SDimitry Andric }
253406c3fb27SDimitry Andric
253506c3fb27SDimitry Andric /// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here
253606c3fb27SDimitry Andric /// C1 and C2 are loop invariants and LV is a loop-variant.
hoistAdd(ICmpInst::Predicate Pred,Value * VariantLHS,Value * InvariantRHS,ICmpInst & ICmp,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)253706c3fb27SDimitry Andric static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
253806c3fb27SDimitry Andric Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
253906c3fb27SDimitry Andric ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
254006c3fb27SDimitry Andric AssumptionCache *AC, DominatorTree *DT) {
254106c3fb27SDimitry Andric assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
254206c3fb27SDimitry Andric assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
254306c3fb27SDimitry Andric assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
254406c3fb27SDimitry Andric
254506c3fb27SDimitry Andric // Try to represent VariantLHS as sum of invariant and variant operands.
254606c3fb27SDimitry Andric using namespace PatternMatch;
254706c3fb27SDimitry Andric Value *VariantOp, *InvariantOp;
254806c3fb27SDimitry Andric if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
254906c3fb27SDimitry Andric return false;
255006c3fb27SDimitry Andric
255106c3fb27SDimitry Andric // LHS itself is a loop-variant, try to represent it in the form:
255206c3fb27SDimitry Andric // "VariantOp + InvariantOp". If it is possible, then we can reassociate.
255306c3fb27SDimitry Andric if (L.isLoopInvariant(VariantOp))
255406c3fb27SDimitry Andric std::swap(VariantOp, InvariantOp);
255506c3fb27SDimitry Andric if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
255606c3fb27SDimitry Andric return false;
255706c3fb27SDimitry Andric
255806c3fb27SDimitry Andric // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to
255906c3fb27SDimitry Andric // freely move values from left side of inequality to right side (just as in
256006c3fb27SDimitry Andric // normal linear arithmetics). Overflows make things much more complicated, so
256106c3fb27SDimitry Andric // we want to avoid this.
25620fca6ea1SDimitry Andric auto &DL = L.getHeader()->getDataLayout();
256306c3fb27SDimitry Andric bool ProvedNoOverflowAfterReassociate =
25645f757f3fSDimitry Andric computeOverflowForSignedSub(InvariantRHS, InvariantOp,
25655f757f3fSDimitry Andric SimplifyQuery(DL, DT, AC, &ICmp)) ==
25665f757f3fSDimitry Andric llvm::OverflowResult::NeverOverflows;
256706c3fb27SDimitry Andric if (!ProvedNoOverflowAfterReassociate)
256806c3fb27SDimitry Andric return false;
256906c3fb27SDimitry Andric auto *Preheader = L.getLoopPreheader();
257006c3fb27SDimitry Andric assert(Preheader && "Loop is not in simplify form?");
257106c3fb27SDimitry Andric IRBuilder<> Builder(Preheader->getTerminator());
257206c3fb27SDimitry Andric Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op",
257306c3fb27SDimitry Andric /*HasNUW*/ false, /*HasNSW*/ true);
257406c3fb27SDimitry Andric ICmp.setPredicate(Pred);
257506c3fb27SDimitry Andric ICmp.setOperand(0, VariantOp);
257606c3fb27SDimitry Andric ICmp.setOperand(1, NewCmpOp);
257706c3fb27SDimitry Andric eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
257806c3fb27SDimitry Andric return true;
257906c3fb27SDimitry Andric }
258006c3fb27SDimitry Andric
258106c3fb27SDimitry Andric /// Try to reassociate and hoist the following two patterns:
258206c3fb27SDimitry Andric /// LV - C1 < C2 --> LV < C1 + C2,
258306c3fb27SDimitry Andric /// C1 - LV < C2 --> LV > C1 - C2.
hoistSub(ICmpInst::Predicate Pred,Value * VariantLHS,Value * InvariantRHS,ICmpInst & ICmp,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)258406c3fb27SDimitry Andric static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
258506c3fb27SDimitry Andric Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
258606c3fb27SDimitry Andric ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
258706c3fb27SDimitry Andric AssumptionCache *AC, DominatorTree *DT) {
258806c3fb27SDimitry Andric assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
258906c3fb27SDimitry Andric assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
259006c3fb27SDimitry Andric assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
259106c3fb27SDimitry Andric
259206c3fb27SDimitry Andric // Try to represent VariantLHS as sum of invariant and variant operands.
259306c3fb27SDimitry Andric using namespace PatternMatch;
259406c3fb27SDimitry Andric Value *VariantOp, *InvariantOp;
259506c3fb27SDimitry Andric if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp))))
259606c3fb27SDimitry Andric return false;
259706c3fb27SDimitry Andric
259806c3fb27SDimitry Andric bool VariantSubtracted = false;
259906c3fb27SDimitry Andric // LHS itself is a loop-variant, try to represent it in the form:
260006c3fb27SDimitry Andric // "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
260106c3fb27SDimitry Andric // the variant operand goes with minus, we use a slightly different scheme.
260206c3fb27SDimitry Andric if (L.isLoopInvariant(VariantOp)) {
260306c3fb27SDimitry Andric std::swap(VariantOp, InvariantOp);
260406c3fb27SDimitry Andric VariantSubtracted = true;
260506c3fb27SDimitry Andric Pred = ICmpInst::getSwappedPredicate(Pred);
260606c3fb27SDimitry Andric }
260706c3fb27SDimitry Andric if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
260806c3fb27SDimitry Andric return false;
260906c3fb27SDimitry Andric
261006c3fb27SDimitry Andric // In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
261106c3fb27SDimitry Andric // freely move values from left side of inequality to right side (just as in
261206c3fb27SDimitry Andric // normal linear arithmetics). Overflows make things much more complicated, so
261306c3fb27SDimitry Andric // we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
261406c3fb27SDimitry Andric // "C1 - C2" does not overflow.
26150fca6ea1SDimitry Andric auto &DL = L.getHeader()->getDataLayout();
26165f757f3fSDimitry Andric SimplifyQuery SQ(DL, DT, AC, &ICmp);
261706c3fb27SDimitry Andric if (VariantSubtracted) {
261806c3fb27SDimitry Andric // C1 - LV < C2 --> LV > C1 - C2
26195f757f3fSDimitry Andric if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, SQ) !=
26205f757f3fSDimitry Andric llvm::OverflowResult::NeverOverflows)
262106c3fb27SDimitry Andric return false;
262206c3fb27SDimitry Andric } else {
262306c3fb27SDimitry Andric // LV - C1 < C2 --> LV < C1 + C2
26245f757f3fSDimitry Andric if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, SQ) !=
26255f757f3fSDimitry Andric llvm::OverflowResult::NeverOverflows)
262606c3fb27SDimitry Andric return false;
262706c3fb27SDimitry Andric }
262806c3fb27SDimitry Andric auto *Preheader = L.getLoopPreheader();
262906c3fb27SDimitry Andric assert(Preheader && "Loop is not in simplify form?");
263006c3fb27SDimitry Andric IRBuilder<> Builder(Preheader->getTerminator());
263106c3fb27SDimitry Andric Value *NewCmpOp =
263206c3fb27SDimitry Andric VariantSubtracted
263306c3fb27SDimitry Andric ? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op",
263406c3fb27SDimitry Andric /*HasNUW*/ false, /*HasNSW*/ true)
263506c3fb27SDimitry Andric : Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op",
263606c3fb27SDimitry Andric /*HasNUW*/ false, /*HasNSW*/ true);
263706c3fb27SDimitry Andric ICmp.setPredicate(Pred);
263806c3fb27SDimitry Andric ICmp.setOperand(0, VariantOp);
263906c3fb27SDimitry Andric ICmp.setOperand(1, NewCmpOp);
264006c3fb27SDimitry Andric eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
264106c3fb27SDimitry Andric return true;
264206c3fb27SDimitry Andric }
264306c3fb27SDimitry Andric
264406c3fb27SDimitry Andric /// Reassociate and hoist add/sub expressions.
hoistAddSub(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)264506c3fb27SDimitry Andric static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
264606c3fb27SDimitry Andric MemorySSAUpdater &MSSAU, AssumptionCache *AC,
264706c3fb27SDimitry Andric DominatorTree *DT) {
264806c3fb27SDimitry Andric using namespace PatternMatch;
264906c3fb27SDimitry Andric ICmpInst::Predicate Pred;
265006c3fb27SDimitry Andric Value *LHS, *RHS;
265106c3fb27SDimitry Andric if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
265206c3fb27SDimitry Andric return false;
265306c3fb27SDimitry Andric
265406c3fb27SDimitry Andric // TODO: Support unsigned predicates?
265506c3fb27SDimitry Andric if (!ICmpInst::isSigned(Pred))
265606c3fb27SDimitry Andric return false;
265706c3fb27SDimitry Andric
265806c3fb27SDimitry Andric // Put variant operand to LHS position.
265906c3fb27SDimitry Andric if (L.isLoopInvariant(LHS)) {
266006c3fb27SDimitry Andric std::swap(LHS, RHS);
266106c3fb27SDimitry Andric Pred = ICmpInst::getSwappedPredicate(Pred);
266206c3fb27SDimitry Andric }
266306c3fb27SDimitry Andric // We want to delete the initial operation after reassociation, so only do it
266406c3fb27SDimitry Andric // if it has no other uses.
266506c3fb27SDimitry Andric if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse())
266606c3fb27SDimitry Andric return false;
266706c3fb27SDimitry Andric
266806c3fb27SDimitry Andric // TODO: We could go with smarter context, taking common dominator of all I's
266906c3fb27SDimitry Andric // users instead of I itself.
267006c3fb27SDimitry Andric if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
267106c3fb27SDimitry Andric return true;
267206c3fb27SDimitry Andric
267306c3fb27SDimitry Andric if (hoistSub(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
267406c3fb27SDimitry Andric return true;
267506c3fb27SDimitry Andric
267606c3fb27SDimitry Andric return false;
267706c3fb27SDimitry Andric }
267806c3fb27SDimitry Andric
isReassociableOp(Instruction * I,unsigned IntOpcode,unsigned FPOpcode)26790fca6ea1SDimitry Andric static bool isReassociableOp(Instruction *I, unsigned IntOpcode,
26800fca6ea1SDimitry Andric unsigned FPOpcode) {
26810fca6ea1SDimitry Andric if (I->getOpcode() == IntOpcode)
26820fca6ea1SDimitry Andric return true;
26830fca6ea1SDimitry Andric if (I->getOpcode() == FPOpcode && I->hasAllowReassoc() &&
26840fca6ea1SDimitry Andric I->hasNoSignedZeros())
26850fca6ea1SDimitry Andric return true;
26860fca6ea1SDimitry Andric return false;
26870fca6ea1SDimitry Andric }
26880fca6ea1SDimitry Andric
26895f757f3fSDimitry Andric /// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where
26905f757f3fSDimitry Andric /// A1, A2, ... and C are loop invariants into expressions like
26915f757f3fSDimitry Andric /// ((A1 * C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ...
26925f757f3fSDimitry Andric /// invariant expressions. This functions returns true only if any hoisting has
26935f757f3fSDimitry Andric /// actually occured.
hoistMulAddAssociation(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)26940fca6ea1SDimitry Andric static bool hoistMulAddAssociation(Instruction &I, Loop &L,
26955f757f3fSDimitry Andric ICFLoopSafetyInfo &SafetyInfo,
26965f757f3fSDimitry Andric MemorySSAUpdater &MSSAU, AssumptionCache *AC,
26975f757f3fSDimitry Andric DominatorTree *DT) {
26980fca6ea1SDimitry Andric if (!isReassociableOp(&I, Instruction::Mul, Instruction::FMul))
26995f757f3fSDimitry Andric return false;
27000fca6ea1SDimitry Andric Value *VariantOp = I.getOperand(0);
27010fca6ea1SDimitry Andric Value *InvariantOp = I.getOperand(1);
27025f757f3fSDimitry Andric if (L.isLoopInvariant(VariantOp))
27035f757f3fSDimitry Andric std::swap(VariantOp, InvariantOp);
27045f757f3fSDimitry Andric if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
27055f757f3fSDimitry Andric return false;
27065f757f3fSDimitry Andric Value *Factor = InvariantOp;
27075f757f3fSDimitry Andric
27085f757f3fSDimitry Andric // First, we need to make sure we should do the transformation.
27095f757f3fSDimitry Andric SmallVector<Use *> Changes;
27100fca6ea1SDimitry Andric SmallVector<BinaryOperator *> Adds;
27115f757f3fSDimitry Andric SmallVector<BinaryOperator *> Worklist;
27125f757f3fSDimitry Andric if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(VariantOp))
27135f757f3fSDimitry Andric Worklist.push_back(VariantBinOp);
27145f757f3fSDimitry Andric while (!Worklist.empty()) {
27155f757f3fSDimitry Andric BinaryOperator *BO = Worklist.pop_back_val();
27160fca6ea1SDimitry Andric if (!BO->hasOneUse())
27175f757f3fSDimitry Andric return false;
27180fca6ea1SDimitry Andric if (isReassociableOp(BO, Instruction::Add, Instruction::FAdd) &&
27190fca6ea1SDimitry Andric isa<BinaryOperator>(BO->getOperand(0)) &&
27200fca6ea1SDimitry Andric isa<BinaryOperator>(BO->getOperand(1))) {
27210fca6ea1SDimitry Andric Worklist.push_back(cast<BinaryOperator>(BO->getOperand(0)));
27220fca6ea1SDimitry Andric Worklist.push_back(cast<BinaryOperator>(BO->getOperand(1)));
27230fca6ea1SDimitry Andric Adds.push_back(BO);
27245f757f3fSDimitry Andric continue;
27255f757f3fSDimitry Andric }
27260fca6ea1SDimitry Andric if (!isReassociableOp(BO, Instruction::Mul, Instruction::FMul) ||
27270fca6ea1SDimitry Andric L.isLoopInvariant(BO))
27285f757f3fSDimitry Andric return false;
27295f757f3fSDimitry Andric Use &U0 = BO->getOperandUse(0);
27305f757f3fSDimitry Andric Use &U1 = BO->getOperandUse(1);
27315f757f3fSDimitry Andric if (L.isLoopInvariant(U0))
27325f757f3fSDimitry Andric Changes.push_back(&U0);
27335f757f3fSDimitry Andric else if (L.isLoopInvariant(U1))
27345f757f3fSDimitry Andric Changes.push_back(&U1);
27355f757f3fSDimitry Andric else
27365f757f3fSDimitry Andric return false;
27370fca6ea1SDimitry Andric unsigned Limit = I.getType()->isIntOrIntVectorTy()
27380fca6ea1SDimitry Andric ? IntAssociationUpperLimit
27390fca6ea1SDimitry Andric : FPAssociationUpperLimit;
27400fca6ea1SDimitry Andric if (Changes.size() > Limit)
27415f757f3fSDimitry Andric return false;
27425f757f3fSDimitry Andric }
27435f757f3fSDimitry Andric if (Changes.empty())
27445f757f3fSDimitry Andric return false;
27455f757f3fSDimitry Andric
27460fca6ea1SDimitry Andric // Drop the poison flags for any adds we looked through.
27470fca6ea1SDimitry Andric if (I.getType()->isIntOrIntVectorTy()) {
27480fca6ea1SDimitry Andric for (auto *Add : Adds)
27490fca6ea1SDimitry Andric Add->dropPoisonGeneratingFlags();
27500fca6ea1SDimitry Andric }
27510fca6ea1SDimitry Andric
27525f757f3fSDimitry Andric // We know we should do it so let's do the transformation.
27535f757f3fSDimitry Andric auto *Preheader = L.getLoopPreheader();
27545f757f3fSDimitry Andric assert(Preheader && "Loop is not in simplify form?");
27555f757f3fSDimitry Andric IRBuilder<> Builder(Preheader->getTerminator());
27565f757f3fSDimitry Andric for (auto *U : Changes) {
27575f757f3fSDimitry Andric assert(L.isLoopInvariant(U->get()));
27580fca6ea1SDimitry Andric auto *Ins = cast<BinaryOperator>(U->getUser());
27590fca6ea1SDimitry Andric Value *Mul;
27600fca6ea1SDimitry Andric if (I.getType()->isIntOrIntVectorTy()) {
27610fca6ea1SDimitry Andric Mul = Builder.CreateMul(U->get(), Factor, "factor.op.mul");
27620fca6ea1SDimitry Andric // Drop the poison flags on the original multiply.
27630fca6ea1SDimitry Andric Ins->dropPoisonGeneratingFlags();
27640fca6ea1SDimitry Andric } else
27650fca6ea1SDimitry Andric Mul = Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul");
27660fca6ea1SDimitry Andric
27670fca6ea1SDimitry Andric // Rewrite the reassociable instruction.
27680fca6ea1SDimitry Andric unsigned OpIdx = U->getOperandNo();
27690fca6ea1SDimitry Andric auto *LHS = OpIdx == 0 ? Mul : Ins->getOperand(0);
27700fca6ea1SDimitry Andric auto *RHS = OpIdx == 1 ? Mul : Ins->getOperand(1);
27710fca6ea1SDimitry Andric auto *NewBO = BinaryOperator::Create(Ins->getOpcode(), LHS, RHS,
27720fca6ea1SDimitry Andric Ins->getName() + ".reass", Ins);
27730fca6ea1SDimitry Andric NewBO->copyIRFlags(Ins);
27740fca6ea1SDimitry Andric if (VariantOp == Ins)
27750fca6ea1SDimitry Andric VariantOp = NewBO;
27760fca6ea1SDimitry Andric Ins->replaceAllUsesWith(NewBO);
27770fca6ea1SDimitry Andric eraseInstruction(*Ins, SafetyInfo, MSSAU);
27785f757f3fSDimitry Andric }
27790fca6ea1SDimitry Andric
27805f757f3fSDimitry Andric I.replaceAllUsesWith(VariantOp);
27815f757f3fSDimitry Andric eraseInstruction(I, SafetyInfo, MSSAU);
27825f757f3fSDimitry Andric return true;
27835f757f3fSDimitry Andric }
27845f757f3fSDimitry Andric
hoistArithmetics(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)278506c3fb27SDimitry Andric static bool hoistArithmetics(Instruction &I, Loop &L,
278606c3fb27SDimitry Andric ICFLoopSafetyInfo &SafetyInfo,
278706c3fb27SDimitry Andric MemorySSAUpdater &MSSAU, AssumptionCache *AC,
278806c3fb27SDimitry Andric DominatorTree *DT) {
278906c3fb27SDimitry Andric // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
279006c3fb27SDimitry Andric // into (x < min(INV1, INV2)), and hoisting the invariant part of this
279106c3fb27SDimitry Andric // expression out of the loop.
279206c3fb27SDimitry Andric if (hoistMinMax(I, L, SafetyInfo, MSSAU)) {
279306c3fb27SDimitry Andric ++NumHoisted;
279406c3fb27SDimitry Andric ++NumMinMaxHoisted;
279506c3fb27SDimitry Andric return true;
279606c3fb27SDimitry Andric }
279706c3fb27SDimitry Andric
279806c3fb27SDimitry Andric // Try to hoist GEPs by reassociation.
279906c3fb27SDimitry Andric if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) {
280006c3fb27SDimitry Andric ++NumHoisted;
280106c3fb27SDimitry Andric ++NumGEPsHoisted;
280206c3fb27SDimitry Andric return true;
280306c3fb27SDimitry Andric }
280406c3fb27SDimitry Andric
280506c3fb27SDimitry Andric // Try to hoist add/sub's by reassociation.
280606c3fb27SDimitry Andric if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) {
280706c3fb27SDimitry Andric ++NumHoisted;
280806c3fb27SDimitry Andric ++NumAddSubHoisted;
280906c3fb27SDimitry Andric return true;
281006c3fb27SDimitry Andric }
281106c3fb27SDimitry Andric
28120fca6ea1SDimitry Andric bool IsInt = I.getType()->isIntOrIntVectorTy();
28130fca6ea1SDimitry Andric if (hoistMulAddAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
28145f757f3fSDimitry Andric ++NumHoisted;
28150fca6ea1SDimitry Andric if (IsInt)
28160fca6ea1SDimitry Andric ++NumIntAssociationsHoisted;
28170fca6ea1SDimitry Andric else
28185f757f3fSDimitry Andric ++NumFPAssociationsHoisted;
28195f757f3fSDimitry Andric return true;
28205f757f3fSDimitry Andric }
28215f757f3fSDimitry Andric
282206c3fb27SDimitry Andric return false;
282306c3fb27SDimitry Andric }
282406c3fb27SDimitry Andric
28250b57cec5SDimitry Andric /// Little predicate that returns true if the specified basic block is in
28260b57cec5SDimitry Andric /// a subloop of the current one, not the current one itself.
28270b57cec5SDimitry Andric ///
inSubLoop(BasicBlock * BB,Loop * CurLoop,LoopInfo * LI)28280b57cec5SDimitry Andric static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
28290b57cec5SDimitry Andric assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
28300b57cec5SDimitry Andric return LI->getLoopFor(BB) != CurLoop;
28310b57cec5SDimitry Andric }
2832