xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible.  It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe.  This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // Hoisting operations out of loops is a canonicalization transform.  It
16 // enables and simplifies subsequent optimizations in the middle-end.
17 // Rematerialization of hoisted instructions to reduce register pressure is the
18 // responsibility of the back-end, which has more accurate information about
19 // register pressure and also handles other optimizations than LICM that
20 // increase live-ranges.
21 //
22 // This pass uses alias analysis for two purposes:
23 //
24 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
25 //     that a load or call inside of a loop never aliases anything stored to,
26 //     we can hoist it or sink it like any other instruction.
27 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
28 //     the loop, we try to move the store to happen AFTER the loop instead of
29 //     inside of the loop.  This can only happen if a few conditions are true:
30 //       A. The pointer stored through is loop invariant
31 //       B. There are no stores or loads in the loop which _may_ alias the
32 //          pointer.  There are no calls in the loop which mod/ref the pointer.
33 //     If these conditions are true, we can promote the loads and stores in the
34 //     loop of the pointer to use a temporary alloca'd variable.  We then use
35 //     the SSAUpdater to construct the appropriate SSA form for the value.
36 //
37 //===----------------------------------------------------------------------===//
38 
39 #include "llvm/Transforms/Scalar/LICM.h"
40 #include "llvm/ADT/PriorityWorklist.h"
41 #include "llvm/ADT/SetOperations.h"
42 #include "llvm/ADT/Statistic.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/AssumptionCache.h"
46 #include "llvm/Analysis/CaptureTracking.h"
47 #include "llvm/Analysis/DomTreeUpdater.h"
48 #include "llvm/Analysis/GuardUtils.h"
49 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
50 #include "llvm/Analysis/Loads.h"
51 #include "llvm/Analysis/LoopInfo.h"
52 #include "llvm/Analysis/LoopIterator.h"
53 #include "llvm/Analysis/LoopNestAnalysis.h"
54 #include "llvm/Analysis/LoopPass.h"
55 #include "llvm/Analysis/MemorySSA.h"
56 #include "llvm/Analysis/MemorySSAUpdater.h"
57 #include "llvm/Analysis/MustExecute.h"
58 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
59 #include "llvm/Analysis/ScalarEvolution.h"
60 #include "llvm/Analysis/TargetLibraryInfo.h"
61 #include "llvm/Analysis/TargetTransformInfo.h"
62 #include "llvm/Analysis/ValueTracking.h"
63 #include "llvm/IR/CFG.h"
64 #include "llvm/IR/Constants.h"
65 #include "llvm/IR/DataLayout.h"
66 #include "llvm/IR/DebugInfoMetadata.h"
67 #include "llvm/IR/DerivedTypes.h"
68 #include "llvm/IR/Dominators.h"
69 #include "llvm/IR/IRBuilder.h"
70 #include "llvm/IR/Instructions.h"
71 #include "llvm/IR/IntrinsicInst.h"
72 #include "llvm/IR/LLVMContext.h"
73 #include "llvm/IR/Metadata.h"
74 #include "llvm/IR/PatternMatch.h"
75 #include "llvm/IR/PredIteratorCache.h"
76 #include "llvm/InitializePasses.h"
77 #include "llvm/Support/CommandLine.h"
78 #include "llvm/Support/Debug.h"
79 #include "llvm/Support/raw_ostream.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/SSAUpdater.h"
86 #include <algorithm>
87 #include <utility>
88 using namespace llvm;
89 
90 namespace llvm {
91 class LPMUpdater;
92 } // namespace llvm
93 
94 #define DEBUG_TYPE "licm"
95 
96 STATISTIC(NumCreatedBlocks, "Number of blocks created");
97 STATISTIC(NumClonedBranches, "Number of branches cloned");
98 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
99 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
100 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
101 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
102 STATISTIC(NumPromotionCandidates, "Number of promotion candidates");
103 STATISTIC(NumLoadPromoted, "Number of load-only promotions");
104 STATISTIC(NumLoadStorePromoted, "Number of load and store promotions");
105 STATISTIC(NumMinMaxHoisted,
106           "Number of min/max expressions hoisted out of the loop");
107 STATISTIC(NumGEPsHoisted,
108           "Number of geps reassociated and hoisted out of the loop");
109 STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
110                             "and hoisted out of the loop");
111 STATISTIC(NumFPAssociationsHoisted, "Number of invariant FP expressions "
112                                     "reassociated and hoisted out of the loop");
113 STATISTIC(NumIntAssociationsHoisted,
114           "Number of invariant int expressions "
115           "reassociated and hoisted out of the loop");
116 STATISTIC(NumBOAssociationsHoisted, "Number of invariant BinaryOp expressions "
117                                     "reassociated and hoisted out of the loop");
118 
119 /// Memory promotion is enabled by default.
120 static cl::opt<bool>
121     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
122                      cl::desc("Disable memory promotion in LICM pass"));
123 
124 static cl::opt<bool> ControlFlowHoisting(
125     "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
126     cl::desc("Enable control flow (and PHI) hoisting in LICM"));
127 
128 static cl::opt<bool>
129     SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
130                  cl::desc("Force thread model single in LICM pass"));
131 
132 static cl::opt<uint32_t> MaxNumUsesTraversed(
133     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
134     cl::desc("Max num uses visited for identifying load "
135              "invariance in loop using invariant start (default = 8)"));
136 
137 static cl::opt<unsigned> FPAssociationUpperLimit(
138     "licm-max-num-fp-reassociations", cl::init(5U), cl::Hidden,
139     cl::desc(
140         "Set upper limit for the number of transformations performed "
141         "during a single round of hoisting the reassociated expressions."));
142 
143 static cl::opt<unsigned> IntAssociationUpperLimit(
144     "licm-max-num-int-reassociations", cl::init(5U), cl::Hidden,
145     cl::desc(
146         "Set upper limit for the number of transformations performed "
147         "during a single round of hoisting the reassociated expressions."));
148 
149 // Experimental option to allow imprecision in LICM in pathological cases, in
150 // exchange for faster compile. This is to be removed if MemorySSA starts to
151 // address the same issue. LICM calls MemorySSAWalker's
152 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
153 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
154 // which may not be precise, since optimizeUses is capped. The result is
155 // correct, but we may not get as "far up" as possible to get which access is
156 // clobbering the one queried.
157 cl::opt<unsigned> llvm::SetLicmMssaOptCap(
158     "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
159     cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
160              "for faster compile. Caps the MemorySSA clobbering calls."));
161 
162 // Experimentally, memory promotion carries less importance than sinking and
163 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
164 // compile time.
165 cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
166     "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
167     cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
168              "effect. When MSSA in LICM is enabled, then this is the maximum "
169              "number of accesses allowed to be present in a loop in order to "
170              "enable memory promotion."));
171 
172 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
173 static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
174                                       const LoopSafetyInfo *SafetyInfo,
175                                       TargetTransformInfo *TTI,
176                                       bool &FoldableInLoop, bool LoopNestMode);
177 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
178                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
179                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
180                   OptimizationRemarkEmitter *ORE);
181 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
182                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
183                  MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE);
184 static bool isSafeToExecuteUnconditionally(
185     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
186     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
187     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
188     AssumptionCache *AC, bool AllowSpeculation);
189 static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
190                                     AAResults *AA, Loop *CurLoop,
191                                     SinkAndHoistLICMFlags &Flags);
192 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
193                                      Loop *CurLoop, Instruction &I,
194                                      SinkAndHoistLICMFlags &Flags,
195                                      bool InvariantGroup);
196 static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
197                                       MemoryUse &MU);
198 /// Aggregates various functions for hoisting computations out of loop.
199 static bool hoistArithmetics(Instruction &I, Loop &L,
200                              ICFLoopSafetyInfo &SafetyInfo,
201                              MemorySSAUpdater &MSSAU, AssumptionCache *AC,
202                              DominatorTree *DT);
203 static Instruction *cloneInstructionInExitBlock(
204     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
205     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
206 
207 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
208                              MemorySSAUpdater &MSSAU);
209 
210 static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
211                                   ICFLoopSafetyInfo &SafetyInfo,
212                                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
213 
214 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
215                                 function_ref<void(Instruction *)> Fn);
216 using PointersAndHasReadsOutsideSet =
217     std::pair<SmallSetVector<Value *, 8>, bool>;
218 static SmallVector<PointersAndHasReadsOutsideSet, 0>
219 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
220 
221 namespace {
222 struct LoopInvariantCodeMotion {
223   bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
224                  AssumptionCache *AC, TargetLibraryInfo *TLI,
225                  TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
226                  OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
227 
LoopInvariantCodeMotion__anon9998ac180111::LoopInvariantCodeMotion228   LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
229                           unsigned LicmMssaNoAccForPromotionCap,
230                           bool LicmAllowSpeculation)
231       : LicmMssaOptCap(LicmMssaOptCap),
232         LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
233         LicmAllowSpeculation(LicmAllowSpeculation) {}
234 
235 private:
236   unsigned LicmMssaOptCap;
237   unsigned LicmMssaNoAccForPromotionCap;
238   bool LicmAllowSpeculation;
239 };
240 
241 struct LegacyLICMPass : public LoopPass {
242   static char ID; // Pass identification, replacement for typeid
LegacyLICMPass__anon9998ac180111::LegacyLICMPass243   LegacyLICMPass(
244       unsigned LicmMssaOptCap = SetLicmMssaOptCap,
245       unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
246       bool LicmAllowSpeculation = true)
247       : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
248                            LicmAllowSpeculation) {
249     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
250   }
251 
runOnLoop__anon9998ac180111::LegacyLICMPass252   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
253     if (skipLoop(L))
254       return false;
255 
256     LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
257                       << L->getHeader()->getNameOrAsOperand() << "\n");
258 
259     Function *F = L->getHeader()->getParent();
260 
261     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
262     MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
263     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
264     // pass. Function analyses need to be preserved across loop transformations
265     // but ORE cannot be preserved (see comment before the pass definition).
266     OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
267     return LICM.runOnLoop(
268         L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
269         &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
270         &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
271         &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F),
272         &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(*F),
273         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F),
274         SE ? &SE->getSE() : nullptr, MSSA, &ORE);
275   }
276 
277   /// This transformation requires natural loop information & requires that
278   /// loop preheaders be inserted into the CFG...
279   ///
getAnalysisUsage__anon9998ac180111::LegacyLICMPass280   void getAnalysisUsage(AnalysisUsage &AU) const override {
281     AU.addPreserved<DominatorTreeWrapperPass>();
282     AU.addPreserved<LoopInfoWrapperPass>();
283     AU.addRequired<TargetLibraryInfoWrapperPass>();
284     AU.addRequired<MemorySSAWrapperPass>();
285     AU.addPreserved<MemorySSAWrapperPass>();
286     AU.addRequired<TargetTransformInfoWrapperPass>();
287     AU.addRequired<AssumptionCacheTracker>();
288     getLoopAnalysisUsage(AU);
289     LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
290     AU.addPreserved<LazyBlockFrequencyInfoPass>();
291     AU.addPreserved<LazyBranchProbabilityInfoPass>();
292   }
293 
294 private:
295   LoopInvariantCodeMotion LICM;
296 };
297 } // namespace
298 
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)299 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
300                                 LoopStandardAnalysisResults &AR, LPMUpdater &) {
301   if (!AR.MSSA)
302     reportFatalUsageError("LICM requires MemorySSA (loop-mssa)");
303 
304   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
305   // pass.  Function analyses need to be preserved across loop transformations
306   // but ORE cannot be preserved (see comment before the pass definition).
307   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
308 
309   LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
310                                Opts.AllowSpeculation);
311   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.AC, &AR.TLI, &AR.TTI,
312                       &AR.SE, AR.MSSA, &ORE))
313     return PreservedAnalyses::all();
314 
315   auto PA = getLoopPassPreservedAnalyses();
316   PA.preserve<MemorySSAAnalysis>();
317 
318   return PA;
319 }
320 
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)321 void LICMPass::printPipeline(
322     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
323   static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
324       OS, MapClassName2PassName);
325 
326   OS << '<';
327   OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
328   OS << '>';
329 }
330 
run(LoopNest & LN,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)331 PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
332                                  LoopStandardAnalysisResults &AR,
333                                  LPMUpdater &) {
334   if (!AR.MSSA)
335     reportFatalUsageError("LNICM requires MemorySSA (loop-mssa)");
336 
337   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
338   // pass.  Function analyses need to be preserved across loop transformations
339   // but ORE cannot be preserved (see comment before the pass definition).
340   OptimizationRemarkEmitter ORE(LN.getParent());
341 
342   LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
343                                Opts.AllowSpeculation);
344 
345   Loop &OutermostLoop = LN.getOutermostLoop();
346   bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, &AR.AC,
347                                 &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
348 
349   if (!Changed)
350     return PreservedAnalyses::all();
351 
352   auto PA = getLoopPassPreservedAnalyses();
353 
354   PA.preserve<DominatorTreeAnalysis>();
355   PA.preserve<LoopAnalysis>();
356   PA.preserve<MemorySSAAnalysis>();
357 
358   return PA;
359 }
360 
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)361 void LNICMPass::printPipeline(
362     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
363   static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
364       OS, MapClassName2PassName);
365 
366   OS << '<';
367   OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
368   OS << '>';
369 }
370 
371 char LegacyLICMPass::ID = 0;
372 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
373                       false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)374 INITIALIZE_PASS_DEPENDENCY(LoopPass)
375 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
376 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
377 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
378 INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
379 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
380                     false)
381 
382 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
383 
SinkAndHoistLICMFlags(bool IsSink,Loop & L,MemorySSA & MSSA)384 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop &L,
385                                                    MemorySSA &MSSA)
386     : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
387                             IsSink, L, MSSA) {}
388 
SinkAndHoistLICMFlags(unsigned LicmMssaOptCap,unsigned LicmMssaNoAccForPromotionCap,bool IsSink,Loop & L,MemorySSA & MSSA)389 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
390     unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
391     Loop &L, MemorySSA &MSSA)
392     : LicmMssaOptCap(LicmMssaOptCap),
393       LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
394       IsSink(IsSink) {
395   unsigned AccessCapCount = 0;
396   for (auto *BB : L.getBlocks())
397     if (const auto *Accesses = MSSA.getBlockAccesses(BB))
398       for (const auto &MA : *Accesses) {
399         (void)MA;
400         ++AccessCapCount;
401         if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
402           NoOfMemAccTooLarge = true;
403           return;
404         }
405       }
406 }
407 
408 /// Hoist expressions out of the specified loop. Note, alias info for inner
409 /// loop is not preserved so it is not a good idea to run LICM multiple
410 /// times on one loop.
runOnLoop(Loop * L,AAResults * AA,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,ScalarEvolution * SE,MemorySSA * MSSA,OptimizationRemarkEmitter * ORE,bool LoopNestMode)411 bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
412                                         DominatorTree *DT, AssumptionCache *AC,
413                                         TargetLibraryInfo *TLI,
414                                         TargetTransformInfo *TTI,
415                                         ScalarEvolution *SE, MemorySSA *MSSA,
416                                         OptimizationRemarkEmitter *ORE,
417                                         bool LoopNestMode) {
418   bool Changed = false;
419 
420   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
421 
422   // If this loop has metadata indicating that LICM is not to be performed then
423   // just exit.
424   if (hasDisableLICMTransformsHint(L)) {
425     return false;
426   }
427 
428   // Don't sink stores from loops with coroutine suspend instructions.
429   // LICM would sink instructions into the default destination of
430   // the coroutine switch. The default destination of the switch is to
431   // handle the case where the coroutine is suspended, by which point the
432   // coroutine frame may have been destroyed. No instruction can be sunk there.
433   // FIXME: This would unfortunately hurt the performance of coroutines, however
434   // there is currently no general solution for this. Similar issues could also
435   // potentially happen in other passes where instructions are being moved
436   // across that edge.
437   bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
438     return llvm::any_of(*BB, [](Instruction &I) {
439       IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
440       return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
441     });
442   });
443 
444   MemorySSAUpdater MSSAU(MSSA);
445   SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
446                               /*IsSink=*/true, *L, *MSSA);
447 
448   // Get the preheader block to move instructions into...
449   BasicBlock *Preheader = L->getLoopPreheader();
450 
451   // Compute loop safety information.
452   ICFLoopSafetyInfo SafetyInfo;
453   SafetyInfo.computeLoopSafetyInfo(L);
454 
455   // We want to visit all of the instructions in this loop... that are not parts
456   // of our subloops (they have already had their invariants hoisted out of
457   // their loop, into this loop, so there is no need to process the BODIES of
458   // the subloops).
459   //
460   // Traverse the body of the loop in depth first order on the dominator tree so
461   // that we are guaranteed to see definitions before we see uses.  This allows
462   // us to sink instructions in one pass, without iteration.  After sinking
463   // instructions, we perform another pass to hoist them out of the loop.
464   if (L->hasDedicatedExits())
465     Changed |=
466         LoopNestMode
467             ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT,
468                                     TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
469             : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
470                          MSSAU, &SafetyInfo, Flags, ORE);
471   Flags.setIsSink(false);
472   if (Preheader)
473     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
474                            MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
475                            LicmAllowSpeculation);
476 
477   // Now that all loop invariants have been removed from the loop, promote any
478   // memory references to scalars that we can.
479   // Don't sink stores from loops without dedicated block exits. Exits
480   // containing indirect branches are not transformed by loop simplify,
481   // make sure we catch that. An additional load may be generated in the
482   // preheader for SSA updater, so also avoid sinking when no preheader
483   // is available.
484   if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
485       !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
486     // Figure out the loop exits and their insertion points
487     SmallVector<BasicBlock *, 8> ExitBlocks;
488     L->getUniqueExitBlocks(ExitBlocks);
489 
490     // We can't insert into a catchswitch.
491     bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
492       return isa<CatchSwitchInst>(Exit->getTerminator());
493     });
494 
495     if (!HasCatchSwitch) {
496       SmallVector<BasicBlock::iterator, 8> InsertPts;
497       SmallVector<MemoryAccess *, 8> MSSAInsertPts;
498       InsertPts.reserve(ExitBlocks.size());
499       MSSAInsertPts.reserve(ExitBlocks.size());
500       for (BasicBlock *ExitBlock : ExitBlocks) {
501         InsertPts.push_back(ExitBlock->getFirstInsertionPt());
502         MSSAInsertPts.push_back(nullptr);
503       }
504 
505       PredIteratorCache PIC;
506 
507       // Promoting one set of accesses may make the pointers for another set
508       // loop invariant, so run this in a loop.
509       bool Promoted = false;
510       bool LocalPromoted;
511       do {
512         LocalPromoted = false;
513         for (auto [PointerMustAliases, HasReadsOutsideSet] :
514              collectPromotionCandidates(MSSA, AA, L)) {
515           LocalPromoted |= promoteLoopAccessesToScalars(
516               PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
517               DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
518               LicmAllowSpeculation, HasReadsOutsideSet);
519         }
520         Promoted |= LocalPromoted;
521       } while (LocalPromoted);
522 
523       // Once we have promoted values across the loop body we have to
524       // recursively reform LCSSA as any nested loop may now have values defined
525       // within the loop used in the outer loop.
526       // FIXME: This is really heavy handed. It would be a bit better to use an
527       // SSAUpdater strategy during promotion that was LCSSA aware and reformed
528       // it as it went.
529       if (Promoted)
530         formLCSSARecursively(*L, *DT, LI, SE);
531 
532       Changed |= Promoted;
533     }
534   }
535 
536   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
537   // specifically moving instructions across the loop boundary and so it is
538   // especially in need of basic functional correctness checking here.
539   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
540   assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
541          "Parent loop not left in LCSSA form after LICM!");
542 
543   if (VerifyMemorySSA)
544     MSSA->verifyMemorySSA();
545 
546   if (Changed && SE)
547     SE->forgetLoopDispositions();
548   return Changed;
549 }
550 
551 /// Walk the specified region of the CFG (defined by all blocks dominated by
552 /// the specified block, and that are in the current loop) in reverse depth
553 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
554 /// definitions, allowing us to sink a loop body in one pass without iteration.
555 ///
sinkRegion(DomTreeNode * N,AAResults * AA,LoopInfo * LI,DominatorTree * DT,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ICFLoopSafetyInfo * SafetyInfo,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE,Loop * OutermostLoop)556 bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
557                       DominatorTree *DT, TargetLibraryInfo *TLI,
558                       TargetTransformInfo *TTI, Loop *CurLoop,
559                       MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
560                       SinkAndHoistLICMFlags &Flags,
561                       OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
562 
563   // Verify inputs.
564   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
565          CurLoop != nullptr && SafetyInfo != nullptr &&
566          "Unexpected input to sinkRegion.");
567 
568   // We want to visit children before parents. We will enqueue all the parents
569   // before their children in the worklist and process the worklist in reverse
570   // order.
571   SmallVector<BasicBlock *, 16> Worklist =
572       collectChildrenInLoop(DT, N, CurLoop);
573 
574   bool Changed = false;
575   for (BasicBlock *BB : reverse(Worklist)) {
576     // subloop (which would already have been processed).
577     if (inSubLoop(BB, CurLoop, LI))
578       continue;
579 
580     for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
581       Instruction &I = *--II;
582 
583       // The instruction is not used in the loop if it is dead.  In this case,
584       // we just delete it instead of sinking it.
585       if (isInstructionTriviallyDead(&I, TLI)) {
586         LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
587         salvageKnowledge(&I);
588         salvageDebugInfo(I);
589         ++II;
590         eraseInstruction(I, *SafetyInfo, MSSAU);
591         Changed = true;
592         continue;
593       }
594 
595       // Check to see if we can sink this instruction to the exit blocks
596       // of the loop.  We can do this if the all users of the instruction are
597       // outside of the loop.  In this case, it doesn't even matter if the
598       // operands of the instruction are loop invariant.
599       //
600       bool FoldableInLoop = false;
601       bool LoopNestMode = OutermostLoop != nullptr;
602       if (!I.mayHaveSideEffects() &&
603           isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
604                                     SafetyInfo, TTI, FoldableInLoop,
605                                     LoopNestMode) &&
606           canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
607         if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
608           if (!FoldableInLoop) {
609             ++II;
610             salvageDebugInfo(I);
611             eraseInstruction(I, *SafetyInfo, MSSAU);
612           }
613           Changed = true;
614         }
615       }
616     }
617   }
618   if (VerifyMemorySSA)
619     MSSAU.getMemorySSA()->verifyMemorySSA();
620   return Changed;
621 }
622 
sinkRegionForLoopNest(DomTreeNode * N,AAResults * AA,LoopInfo * LI,DominatorTree * DT,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ICFLoopSafetyInfo * SafetyInfo,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE)623 bool llvm::sinkRegionForLoopNest(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
624                                  DominatorTree *DT, TargetLibraryInfo *TLI,
625                                  TargetTransformInfo *TTI, Loop *CurLoop,
626                                  MemorySSAUpdater &MSSAU,
627                                  ICFLoopSafetyInfo *SafetyInfo,
628                                  SinkAndHoistLICMFlags &Flags,
629                                  OptimizationRemarkEmitter *ORE) {
630 
631   bool Changed = false;
632   SmallPriorityWorklist<Loop *, 4> Worklist;
633   Worklist.insert(CurLoop);
634   appendLoopsToWorklist(*CurLoop, Worklist);
635   while (!Worklist.empty()) {
636     Loop *L = Worklist.pop_back_val();
637     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
638                           MSSAU, SafetyInfo, Flags, ORE, CurLoop);
639   }
640   return Changed;
641 }
642 
643 namespace {
644 // This is a helper class for hoistRegion to make it able to hoist control flow
645 // in order to be able to hoist phis. The way this works is that we initially
646 // start hoisting to the loop preheader, and when we see a loop invariant branch
647 // we make note of this. When we then come to hoist an instruction that's
648 // conditional on such a branch we duplicate the branch and the relevant control
649 // flow, then hoist the instruction into the block corresponding to its original
650 // block in the duplicated control flow.
651 class ControlFlowHoister {
652 private:
653   // Information about the loop we are hoisting from
654   LoopInfo *LI;
655   DominatorTree *DT;
656   Loop *CurLoop;
657   MemorySSAUpdater &MSSAU;
658 
659   // A map of blocks in the loop to the block their instructions will be hoisted
660   // to.
661   DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
662 
663   // The branches that we can hoist, mapped to the block that marks a
664   // convergence point of their control flow.
665   DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
666 
667 public:
ControlFlowHoister(LoopInfo * LI,DominatorTree * DT,Loop * CurLoop,MemorySSAUpdater & MSSAU)668   ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
669                      MemorySSAUpdater &MSSAU)
670       : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
671 
registerPossiblyHoistableBranch(BranchInst * BI)672   void registerPossiblyHoistableBranch(BranchInst *BI) {
673     // We can only hoist conditional branches with loop invariant operands.
674     if (!ControlFlowHoisting || !BI->isConditional() ||
675         !CurLoop->hasLoopInvariantOperands(BI))
676       return;
677 
678     // The branch destinations need to be in the loop, and we don't gain
679     // anything by duplicating conditional branches with duplicate successors,
680     // as it's essentially the same as an unconditional branch.
681     BasicBlock *TrueDest = BI->getSuccessor(0);
682     BasicBlock *FalseDest = BI->getSuccessor(1);
683     if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
684         TrueDest == FalseDest)
685       return;
686 
687     // We can hoist BI if one branch destination is the successor of the other,
688     // or both have common successor which we check by seeing if the
689     // intersection of their successors is non-empty.
690     // TODO: This could be expanded to allowing branches where both ends
691     // eventually converge to a single block.
692     SmallPtrSet<BasicBlock *, 4> TrueDestSucc(llvm::from_range,
693                                               successors(TrueDest));
694     SmallPtrSet<BasicBlock *, 4> FalseDestSucc(llvm::from_range,
695                                                successors(FalseDest));
696     BasicBlock *CommonSucc = nullptr;
697     if (TrueDestSucc.count(FalseDest)) {
698       CommonSucc = FalseDest;
699     } else if (FalseDestSucc.count(TrueDest)) {
700       CommonSucc = TrueDest;
701     } else {
702       set_intersect(TrueDestSucc, FalseDestSucc);
703       // If there's one common successor use that.
704       if (TrueDestSucc.size() == 1)
705         CommonSucc = *TrueDestSucc.begin();
706       // If there's more than one pick whichever appears first in the block list
707       // (we can't use the value returned by TrueDestSucc.begin() as it's
708       // unpredicatable which element gets returned).
709       else if (!TrueDestSucc.empty()) {
710         Function *F = TrueDest->getParent();
711         auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
712         auto It = llvm::find_if(*F, IsSucc);
713         assert(It != F->end() && "Could not find successor in function");
714         CommonSucc = &*It;
715       }
716     }
717     // The common successor has to be dominated by the branch, as otherwise
718     // there will be some other path to the successor that will not be
719     // controlled by this branch so any phi we hoist would be controlled by the
720     // wrong condition. This also takes care of avoiding hoisting of loop back
721     // edges.
722     // TODO: In some cases this could be relaxed if the successor is dominated
723     // by another block that's been hoisted and we can guarantee that the
724     // control flow has been replicated exactly.
725     if (CommonSucc && DT->dominates(BI, CommonSucc))
726       HoistableBranches[BI] = CommonSucc;
727   }
728 
canHoistPHI(PHINode * PN)729   bool canHoistPHI(PHINode *PN) {
730     // The phi must have loop invariant operands.
731     if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
732       return false;
733     // We can hoist phis if the block they are in is the target of hoistable
734     // branches which cover all of the predecessors of the block.
735     BasicBlock *BB = PN->getParent();
736     SmallPtrSet<BasicBlock *, 8> PredecessorBlocks(llvm::from_range,
737                                                    predecessors(BB));
738     // If we have less predecessor blocks than predecessors then the phi will
739     // have more than one incoming value for the same block which we can't
740     // handle.
741     // TODO: This could be handled be erasing some of the duplicate incoming
742     // values.
743     if (PredecessorBlocks.size() != pred_size(BB))
744       return false;
745     for (auto &Pair : HoistableBranches) {
746       if (Pair.second == BB) {
747         // Which blocks are predecessors via this branch depends on if the
748         // branch is triangle-like or diamond-like.
749         if (Pair.first->getSuccessor(0) == BB) {
750           PredecessorBlocks.erase(Pair.first->getParent());
751           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
752         } else if (Pair.first->getSuccessor(1) == BB) {
753           PredecessorBlocks.erase(Pair.first->getParent());
754           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
755         } else {
756           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
757           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
758         }
759       }
760     }
761     // PredecessorBlocks will now be empty if for every predecessor of BB we
762     // found a hoistable branch source.
763     return PredecessorBlocks.empty();
764   }
765 
getOrCreateHoistedBlock(BasicBlock * BB)766   BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
767     if (!ControlFlowHoisting)
768       return CurLoop->getLoopPreheader();
769     // If BB has already been hoisted, return that
770     if (auto It = HoistDestinationMap.find(BB); It != HoistDestinationMap.end())
771       return It->second;
772 
773     // Check if this block is conditional based on a pending branch
774     auto HasBBAsSuccessor =
775         [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
776           return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
777                                        Pair.first->getSuccessor(1) == BB);
778         };
779     auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
780 
781     // If not involved in a pending branch, hoist to preheader
782     BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
783     if (It == HoistableBranches.end()) {
784       LLVM_DEBUG(dbgs() << "LICM using "
785                         << InitialPreheader->getNameOrAsOperand()
786                         << " as hoist destination for "
787                         << BB->getNameOrAsOperand() << "\n");
788       HoistDestinationMap[BB] = InitialPreheader;
789       return InitialPreheader;
790     }
791     BranchInst *BI = It->first;
792     assert(std::none_of(std::next(It), HoistableBranches.end(),
793                         HasBBAsSuccessor) &&
794            "BB is expected to be the target of at most one branch");
795 
796     LLVMContext &C = BB->getContext();
797     BasicBlock *TrueDest = BI->getSuccessor(0);
798     BasicBlock *FalseDest = BI->getSuccessor(1);
799     BasicBlock *CommonSucc = HoistableBranches[BI];
800     BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
801 
802     // Create hoisted versions of blocks that currently don't have them
803     auto CreateHoistedBlock = [&](BasicBlock *Orig) {
804       auto [It, Inserted] = HoistDestinationMap.try_emplace(Orig);
805       if (!Inserted)
806         return It->second;
807       BasicBlock *New =
808           BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
809       It->second = New;
810       DT->addNewBlock(New, HoistTarget);
811       if (CurLoop->getParentLoop())
812         CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
813       ++NumCreatedBlocks;
814       LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
815                         << " as hoist destination for " << Orig->getName()
816                         << "\n");
817       return New;
818     };
819     BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
820     BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
821     BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
822 
823     // Link up these blocks with branches.
824     if (!HoistCommonSucc->getTerminator()) {
825       // The new common successor we've generated will branch to whatever that
826       // hoist target branched to.
827       BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
828       assert(TargetSucc && "Expected hoist target to have a single successor");
829       HoistCommonSucc->moveBefore(TargetSucc);
830       BranchInst::Create(TargetSucc, HoistCommonSucc);
831     }
832     if (!HoistTrueDest->getTerminator()) {
833       HoistTrueDest->moveBefore(HoistCommonSucc);
834       BranchInst::Create(HoistCommonSucc, HoistTrueDest);
835     }
836     if (!HoistFalseDest->getTerminator()) {
837       HoistFalseDest->moveBefore(HoistCommonSucc);
838       BranchInst::Create(HoistCommonSucc, HoistFalseDest);
839     }
840 
841     // If BI is being cloned to what was originally the preheader then
842     // HoistCommonSucc will now be the new preheader.
843     if (HoistTarget == InitialPreheader) {
844       // Phis in the loop header now need to use the new preheader.
845       InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
846       MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
847           HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
848       // The new preheader dominates the loop header.
849       DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
850       DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
851       DT->changeImmediateDominator(HeaderNode, PreheaderNode);
852       // The preheader hoist destination is now the new preheader, with the
853       // exception of the hoist destination of this branch.
854       for (auto &Pair : HoistDestinationMap)
855         if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
856           Pair.second = HoistCommonSucc;
857     }
858 
859     // Now finally clone BI.
860     ReplaceInstWithInst(
861         HoistTarget->getTerminator(),
862         BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
863     ++NumClonedBranches;
864 
865     assert(CurLoop->getLoopPreheader() &&
866            "Hoisting blocks should not have destroyed preheader");
867     return HoistDestinationMap[BB];
868   }
869 };
870 } // namespace
871 
872 /// Walk the specified region of the CFG (defined by all blocks dominated by
873 /// the specified block, and that are in the current loop) in depth first
874 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
875 /// uses, allowing us to hoist a loop body in one pass without iteration.
876 ///
hoistRegion(DomTreeNode * N,AAResults * AA,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * TLI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ScalarEvolution * SE,ICFLoopSafetyInfo * SafetyInfo,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE,bool LoopNestMode,bool AllowSpeculation)877 bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
878                        DominatorTree *DT, AssumptionCache *AC,
879                        TargetLibraryInfo *TLI, Loop *CurLoop,
880                        MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
881                        ICFLoopSafetyInfo *SafetyInfo,
882                        SinkAndHoistLICMFlags &Flags,
883                        OptimizationRemarkEmitter *ORE, bool LoopNestMode,
884                        bool AllowSpeculation) {
885   // Verify inputs.
886   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
887          CurLoop != nullptr && SafetyInfo != nullptr &&
888          "Unexpected input to hoistRegion.");
889 
890   ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
891 
892   // Keep track of instructions that have been hoisted, as they may need to be
893   // re-hoisted if they end up not dominating all of their uses.
894   SmallVector<Instruction *, 16> HoistedInstructions;
895 
896   // For PHI hoisting to work we need to hoist blocks before their successors.
897   // We can do this by iterating through the blocks in the loop in reverse
898   // post-order.
899   LoopBlocksRPO Worklist(CurLoop);
900   Worklist.perform(LI);
901   bool Changed = false;
902   BasicBlock *Preheader = CurLoop->getLoopPreheader();
903   for (BasicBlock *BB : Worklist) {
904     // Only need to process the contents of this block if it is not part of a
905     // subloop (which would already have been processed).
906     if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
907       continue;
908 
909     for (Instruction &I : llvm::make_early_inc_range(*BB)) {
910       // Try hoisting the instruction out to the preheader.  We can only do
911       // this if all of the operands of the instruction are loop invariant and
912       // if it is safe to hoist the instruction. We also check block frequency
913       // to make sure instruction only gets hoisted into colder blocks.
914       // TODO: It may be safe to hoist if we are hoisting to a conditional block
915       // and we have accurately duplicated the control flow from the loop header
916       // to that block.
917       if (CurLoop->hasLoopInvariantOperands(&I) &&
918           canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
919           isSafeToExecuteUnconditionally(
920               I, DT, TLI, CurLoop, SafetyInfo, ORE,
921               Preheader->getTerminator(), AC, AllowSpeculation)) {
922         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
923               MSSAU, SE, ORE);
924         HoistedInstructions.push_back(&I);
925         Changed = true;
926         continue;
927       }
928 
929       // Attempt to remove floating point division out of the loop by
930       // converting it to a reciprocal multiplication.
931       if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
932           CurLoop->isLoopInvariant(I.getOperand(1))) {
933         auto Divisor = I.getOperand(1);
934         auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
935         auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
936         ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
937         SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
938         ReciprocalDivisor->insertBefore(I.getIterator());
939         ReciprocalDivisor->setDebugLoc(I.getDebugLoc());
940 
941         auto Product =
942             BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
943         Product->setFastMathFlags(I.getFastMathFlags());
944         SafetyInfo->insertInstructionTo(Product, I.getParent());
945         Product->insertAfter(I.getIterator());
946         Product->setDebugLoc(I.getDebugLoc());
947         I.replaceAllUsesWith(Product);
948         eraseInstruction(I, *SafetyInfo, MSSAU);
949 
950         hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
951               SafetyInfo, MSSAU, SE, ORE);
952         HoistedInstructions.push_back(ReciprocalDivisor);
953         Changed = true;
954         continue;
955       }
956 
957       auto IsInvariantStart = [&](Instruction &I) {
958         using namespace PatternMatch;
959         return I.use_empty() &&
960                match(&I, m_Intrinsic<Intrinsic::invariant_start>());
961       };
962       auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
963         return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
964                SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
965       };
966       if ((IsInvariantStart(I) || isGuard(&I)) &&
967           CurLoop->hasLoopInvariantOperands(&I) &&
968           MustExecuteWithoutWritesBefore(I)) {
969         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
970               MSSAU, SE, ORE);
971         HoistedInstructions.push_back(&I);
972         Changed = true;
973         continue;
974       }
975 
976       if (PHINode *PN = dyn_cast<PHINode>(&I)) {
977         if (CFH.canHoistPHI(PN)) {
978           // Redirect incoming blocks first to ensure that we create hoisted
979           // versions of those blocks before we hoist the phi.
980           for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
981             PN->setIncomingBlock(
982                 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
983           hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
984                 MSSAU, SE, ORE);
985           assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
986           Changed = true;
987           continue;
988         }
989       }
990 
991       // Try to reassociate instructions so that part of computations can be
992       // done out of loop.
993       if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) {
994         Changed = true;
995         continue;
996       }
997 
998       // Remember possibly hoistable branches so we can actually hoist them
999       // later if needed.
1000       if (BranchInst *BI = dyn_cast<BranchInst>(&I))
1001         CFH.registerPossiblyHoistableBranch(BI);
1002     }
1003   }
1004 
1005   // If we hoisted instructions to a conditional block they may not dominate
1006   // their uses that weren't hoisted (such as phis where some operands are not
1007   // loop invariant). If so make them unconditional by moving them to their
1008   // immediate dominator. We iterate through the instructions in reverse order
1009   // which ensures that when we rehoist an instruction we rehoist its operands,
1010   // and also keep track of where in the block we are rehoisting to make sure
1011   // that we rehoist instructions before the instructions that use them.
1012   Instruction *HoistPoint = nullptr;
1013   if (ControlFlowHoisting) {
1014     for (Instruction *I : reverse(HoistedInstructions)) {
1015       if (!llvm::all_of(I->uses(),
1016                         [&](Use &U) { return DT->dominates(I, U); })) {
1017         BasicBlock *Dominator =
1018             DT->getNode(I->getParent())->getIDom()->getBlock();
1019         if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
1020           if (HoistPoint)
1021             assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
1022                    "New hoist point expected to dominate old hoist point");
1023           HoistPoint = Dominator->getTerminator();
1024         }
1025         LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1026                           << HoistPoint->getParent()->getNameOrAsOperand()
1027                           << ": " << *I << "\n");
1028         moveInstructionBefore(*I, HoistPoint->getIterator(), *SafetyInfo, MSSAU,
1029                               SE);
1030         HoistPoint = I;
1031         Changed = true;
1032       }
1033     }
1034   }
1035   if (VerifyMemorySSA)
1036     MSSAU.getMemorySSA()->verifyMemorySSA();
1037 
1038     // Now that we've finished hoisting make sure that LI and DT are still
1039     // valid.
1040 #ifdef EXPENSIVE_CHECKS
1041   if (Changed) {
1042     assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1043            "Dominator tree verification failed");
1044     LI->verify(*DT);
1045   }
1046 #endif
1047 
1048   return Changed;
1049 }
1050 
1051 // Return true if LI is invariant within scope of the loop. LI is invariant if
1052 // CurLoop is dominated by an invariant.start representing the same memory
1053 // location and size as the memory location LI loads from, and also the
1054 // invariant.start has no uses.
isLoadInvariantInLoop(LoadInst * LI,DominatorTree * DT,Loop * CurLoop)1055 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
1056                                   Loop *CurLoop) {
1057   Value *Addr = LI->getPointerOperand();
1058   const DataLayout &DL = LI->getDataLayout();
1059   const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
1060 
1061   // It is not currently possible for clang to generate an invariant.start
1062   // intrinsic with scalable vector types because we don't support thread local
1063   // sizeless types and we don't permit sizeless types in structs or classes.
1064   // Furthermore, even if support is added for this in future the intrinsic
1065   // itself is defined to have a size of -1 for variable sized objects. This
1066   // makes it impossible to verify if the intrinsic envelops our region of
1067   // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1068   // types would have a -1 parameter, but the former is clearly double the size
1069   // of the latter.
1070   if (LocSizeInBits.isScalable())
1071     return false;
1072 
1073   // If we've ended up at a global/constant, bail. We shouldn't be looking at
1074   // uselists for non-local Values in a loop pass.
1075   if (isa<Constant>(Addr))
1076     return false;
1077 
1078   unsigned UsesVisited = 0;
1079   // Traverse all uses of the load operand value, to see if invariant.start is
1080   // one of the uses, and whether it dominates the load instruction.
1081   for (auto *U : Addr->users()) {
1082     // Avoid traversing for Load operand with high number of users.
1083     if (++UsesVisited > MaxNumUsesTraversed)
1084       return false;
1085     IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
1086     // If there are escaping uses of invariant.start instruction, the load maybe
1087     // non-invariant.
1088     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1089         !II->use_empty())
1090       continue;
1091     ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
1092     // The intrinsic supports having a -1 argument for variable sized objects
1093     // so we should check for that here.
1094     if (InvariantSize->isNegative())
1095       continue;
1096     uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
1097     // Confirm the invariant.start location size contains the load operand size
1098     // in bits. Also, the invariant.start should dominate the load, and we
1099     // should not hoist the load out of a loop that contains this dominating
1100     // invariant.start.
1101     if (LocSizeInBits.getFixedValue() <= InvariantSizeInBits &&
1102         DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1103       return true;
1104   }
1105 
1106   return false;
1107 }
1108 
1109 namespace {
1110 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1111 /// sink a given instruction out of a loop.  Does not address legality
1112 /// concerns such as aliasing or speculation safety.
isHoistableAndSinkableInst(Instruction & I)1113 bool isHoistableAndSinkableInst(Instruction &I) {
1114   // Only these instructions are hoistable/sinkable.
1115   return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1116           isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
1117           isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
1118           isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1119           isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1120           isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1121           isa<InsertValueInst>(I) || isa<FreezeInst>(I));
1122 }
1123 
1124 /// Return true if I is the only Instruction with a MemoryAccess in L.
isOnlyMemoryAccess(const Instruction * I,const Loop * L,const MemorySSAUpdater & MSSAU)1125 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1126                         const MemorySSAUpdater &MSSAU) {
1127   for (auto *BB : L->getBlocks())
1128     if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
1129       int NotAPhi = 0;
1130       for (const auto &Acc : *Accs) {
1131         if (isa<MemoryPhi>(&Acc))
1132           continue;
1133         const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1134         if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1135           return false;
1136       }
1137     }
1138   return true;
1139 }
1140 }
1141 
getClobberingMemoryAccess(MemorySSA & MSSA,BatchAAResults & BAA,SinkAndHoistLICMFlags & Flags,MemoryUseOrDef * MA)1142 static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
1143                                                BatchAAResults &BAA,
1144                                                SinkAndHoistLICMFlags &Flags,
1145                                                MemoryUseOrDef *MA) {
1146   // See declaration of SetLicmMssaOptCap for usage details.
1147   if (Flags.tooManyClobberingCalls())
1148     return MA->getDefiningAccess();
1149 
1150   MemoryAccess *Source =
1151       MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(MA, BAA);
1152   Flags.incrementClobberingCalls();
1153   return Source;
1154 }
1155 
canSinkOrHoistInst(Instruction & I,AAResults * AA,DominatorTree * DT,Loop * CurLoop,MemorySSAUpdater & MSSAU,bool TargetExecutesOncePerLoop,SinkAndHoistLICMFlags & Flags,OptimizationRemarkEmitter * ORE)1156 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
1157                               Loop *CurLoop, MemorySSAUpdater &MSSAU,
1158                               bool TargetExecutesOncePerLoop,
1159                               SinkAndHoistLICMFlags &Flags,
1160                               OptimizationRemarkEmitter *ORE) {
1161   // If we don't understand the instruction, bail early.
1162   if (!isHoistableAndSinkableInst(I))
1163     return false;
1164 
1165   MemorySSA *MSSA = MSSAU.getMemorySSA();
1166   // Loads have extra constraints we have to verify before we can hoist them.
1167   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1168     if (!LI->isUnordered())
1169       return false; // Don't sink/hoist volatile or ordered atomic loads!
1170 
1171     // Loads from constant memory are always safe to move, even if they end up
1172     // in the same alias set as something that ends up being modified.
1173     if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0))))
1174       return true;
1175     if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1176       return true;
1177 
1178     if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1179       return false; // Don't risk duplicating unordered loads
1180 
1181     // This checks for an invariant.start dominating the load.
1182     if (isLoadInvariantInLoop(LI, DT, CurLoop))
1183       return true;
1184 
1185     auto MU = cast<MemoryUse>(MSSA->getMemoryAccess(LI));
1186 
1187     bool InvariantGroup = LI->hasMetadata(LLVMContext::MD_invariant_group);
1188 
1189     bool Invalidated = pointerInvalidatedByLoop(
1190         MSSA, MU, CurLoop, I, Flags, InvariantGroup);
1191     // Check loop-invariant address because this may also be a sinkable load
1192     // whose address is not necessarily loop-invariant.
1193     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1194       ORE->emit([&]() {
1195         return OptimizationRemarkMissed(
1196                    DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1197                << "failed to move load with loop-invariant address "
1198                   "because the loop may invalidate its value";
1199       });
1200 
1201     return !Invalidated;
1202   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1203     // Don't sink calls which can throw.
1204     if (CI->mayThrow())
1205       return false;
1206 
1207     // Convergent attribute has been used on operations that involve
1208     // inter-thread communication which results are implicitly affected by the
1209     // enclosing control flows. It is not safe to hoist or sink such operations
1210     // across control flow.
1211     if (CI->isConvergent())
1212       return false;
1213 
1214     // FIXME: Current LLVM IR semantics don't work well with coroutines and
1215     // thread local globals. We currently treat getting the address of a thread
1216     // local global as not accessing memory, even though it may not be a
1217     // constant throughout a function with coroutines. Remove this check after
1218     // we better model semantics of thread local globals.
1219     if (CI->getFunction()->isPresplitCoroutine())
1220       return false;
1221 
1222     using namespace PatternMatch;
1223     if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1224       // Assumes don't actually alias anything or throw
1225       return true;
1226 
1227     // Handle simple cases by querying alias analysis.
1228     MemoryEffects Behavior = AA->getMemoryEffects(CI);
1229 
1230     if (Behavior.doesNotAccessMemory())
1231       return true;
1232     if (Behavior.onlyReadsMemory()) {
1233       // If we can prove there are no writes to the memory read by the call, we
1234       // can hoist or sink.
1235       return !pointerInvalidatedByLoop(
1236           MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I, Flags,
1237           /*InvariantGroup=*/false);
1238     }
1239 
1240     if (Behavior.onlyWritesMemory()) {
1241       // can hoist or sink if there are no conflicting read/writes to the
1242       // memory location written to by the call.
1243       return noConflictingReadWrites(CI, MSSA, AA, CurLoop, Flags);
1244     }
1245 
1246     return false;
1247   } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1248     // Fences alias (most) everything to provide ordering.  For the moment,
1249     // just give up if there are any other memory operations in the loop.
1250     return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1251   } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1252     if (!SI->isUnordered())
1253       return false; // Don't sink/hoist volatile or ordered atomic store!
1254 
1255     // We can only hoist a store that we can prove writes a value which is not
1256     // read or overwritten within the loop.  For those cases, we fallback to
1257     // load store promotion instead.  TODO: We can extend this to cases where
1258     // there is exactly one write to the location and that write dominates an
1259     // arbitrary number of reads in the loop.
1260     if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1261       return true;
1262     return noConflictingReadWrites(SI, MSSA, AA, CurLoop, Flags);
1263   }
1264 
1265   assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1266 
1267   // We've established mechanical ability and aliasing, it's up to the caller
1268   // to check fault safety
1269   return true;
1270 }
1271 
1272 /// Returns true if a PHINode is a trivially replaceable with an
1273 /// Instruction.
1274 /// This is true when all incoming values are that instruction.
1275 /// This pattern occurs most often with LCSSA PHI nodes.
1276 ///
isTriviallyReplaceablePHI(const PHINode & PN,const Instruction & I)1277 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1278   for (const Value *IncValue : PN.incoming_values())
1279     if (IncValue != &I)
1280       return false;
1281 
1282   return true;
1283 }
1284 
1285 /// Return true if the instruction is foldable in the loop.
isFoldableInLoop(const Instruction & I,const Loop * CurLoop,const TargetTransformInfo * TTI)1286 static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1287                          const TargetTransformInfo *TTI) {
1288   if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1289     InstructionCost CostI =
1290         TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
1291     if (CostI != TargetTransformInfo::TCC_Free)
1292       return false;
1293     // For a GEP, we cannot simply use getInstructionCost because currently
1294     // it optimistically assumes that a GEP will fold into addressing mode
1295     // regardless of its users.
1296     const BasicBlock *BB = GEP->getParent();
1297     for (const User *U : GEP->users()) {
1298       const Instruction *UI = cast<Instruction>(U);
1299       if (CurLoop->contains(UI) &&
1300           (BB != UI->getParent() ||
1301            (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1302         return false;
1303     }
1304     return true;
1305   }
1306 
1307   return false;
1308 }
1309 
1310 /// Return true if the only users of this instruction are outside of
1311 /// the loop. If this is true, we can sink the instruction to the exit
1312 /// blocks of the loop.
1313 ///
1314 /// We also return true if the instruction could be folded away in lowering.
1315 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
isNotUsedOrFoldableInLoop(const Instruction & I,const Loop * CurLoop,const LoopSafetyInfo * SafetyInfo,TargetTransformInfo * TTI,bool & FoldableInLoop,bool LoopNestMode)1316 static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1317                                       const LoopSafetyInfo *SafetyInfo,
1318                                       TargetTransformInfo *TTI,
1319                                       bool &FoldableInLoop, bool LoopNestMode) {
1320   const auto &BlockColors = SafetyInfo->getBlockColors();
1321   bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
1322   for (const User *U : I.users()) {
1323     const Instruction *UI = cast<Instruction>(U);
1324     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1325       const BasicBlock *BB = PN->getParent();
1326       // We cannot sink uses in catchswitches.
1327       if (isa<CatchSwitchInst>(BB->getTerminator()))
1328         return false;
1329 
1330       // We need to sink a callsite to a unique funclet.  Avoid sinking if the
1331       // phi use is too muddled.
1332       if (isa<CallInst>(I))
1333         if (!BlockColors.empty() &&
1334             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1335           return false;
1336 
1337       if (LoopNestMode) {
1338         while (isa<PHINode>(UI) && UI->hasOneUser() &&
1339                UI->getNumOperands() == 1) {
1340           if (!CurLoop->contains(UI))
1341             break;
1342           UI = cast<Instruction>(UI->user_back());
1343         }
1344       }
1345     }
1346 
1347     if (CurLoop->contains(UI)) {
1348       if (IsFoldable) {
1349         FoldableInLoop = true;
1350         continue;
1351       }
1352       return false;
1353     }
1354   }
1355   return true;
1356 }
1357 
cloneInstructionInExitBlock(Instruction & I,BasicBlock & ExitBlock,PHINode & PN,const LoopInfo * LI,const LoopSafetyInfo * SafetyInfo,MemorySSAUpdater & MSSAU)1358 static Instruction *cloneInstructionInExitBlock(
1359     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1360     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
1361   Instruction *New;
1362   if (auto *CI = dyn_cast<CallInst>(&I)) {
1363     const auto &BlockColors = SafetyInfo->getBlockColors();
1364 
1365     // Sinking call-sites need to be handled differently from other
1366     // instructions.  The cloned call-site needs a funclet bundle operand
1367     // appropriate for its location in the CFG.
1368     SmallVector<OperandBundleDef, 1> OpBundles;
1369     for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1370          BundleIdx != BundleEnd; ++BundleIdx) {
1371       OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1372       if (Bundle.getTagID() == LLVMContext::OB_funclet)
1373         continue;
1374 
1375       OpBundles.emplace_back(Bundle);
1376     }
1377 
1378     if (!BlockColors.empty()) {
1379       const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1380       assert(CV.size() == 1 && "non-unique color for exit block!");
1381       BasicBlock *BBColor = CV.front();
1382       BasicBlock::iterator EHPad = BBColor->getFirstNonPHIIt();
1383       if (EHPad->isEHPad())
1384         OpBundles.emplace_back("funclet", &*EHPad);
1385     }
1386 
1387     New = CallInst::Create(CI, OpBundles);
1388     New->copyMetadata(*CI);
1389   } else {
1390     New = I.clone();
1391   }
1392 
1393   New->insertInto(&ExitBlock, ExitBlock.getFirstInsertionPt());
1394   if (!I.getName().empty())
1395     New->setName(I.getName() + ".le");
1396 
1397   if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
1398     // Create a new MemoryAccess and let MemorySSA set its defining access.
1399     // After running some passes, MemorySSA might be outdated, and the
1400     // instruction `I` may have become a non-memory touching instruction.
1401     MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
1402         New, nullptr, New->getParent(), MemorySSA::Beginning,
1403         /*CreationMustSucceed=*/false);
1404     if (NewMemAcc) {
1405       if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1406         MSSAU.insertDef(MemDef, /*RenameUses=*/true);
1407       else {
1408         auto *MemUse = cast<MemoryUse>(NewMemAcc);
1409         MSSAU.insertUse(MemUse, /*RenameUses=*/true);
1410       }
1411     }
1412   }
1413 
1414   // Build LCSSA PHI nodes for any in-loop operands (if legal).  Note that
1415   // this is particularly cheap because we can rip off the PHI node that we're
1416   // replacing for the number and blocks of the predecessors.
1417   // OPT: If this shows up in a profile, we can instead finish sinking all
1418   // invariant instructions, and then walk their operands to re-establish
1419   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1420   // sinking bottom-up.
1421   for (Use &Op : New->operands())
1422     if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
1423       auto *OInst = cast<Instruction>(Op.get());
1424       PHINode *OpPN =
1425           PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1426                           OInst->getName() + ".lcssa");
1427       OpPN->insertBefore(ExitBlock.begin());
1428       for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1429         OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1430       Op = OpPN;
1431     }
1432   return New;
1433 }
1434 
eraseInstruction(Instruction & I,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU)1435 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
1436                              MemorySSAUpdater &MSSAU) {
1437   MSSAU.removeMemoryAccess(&I);
1438   SafetyInfo.removeInstruction(&I);
1439   I.eraseFromParent();
1440 }
1441 
moveInstructionBefore(Instruction & I,BasicBlock::iterator Dest,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,ScalarEvolution * SE)1442 static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
1443                                   ICFLoopSafetyInfo &SafetyInfo,
1444                                   MemorySSAUpdater &MSSAU,
1445                                   ScalarEvolution *SE) {
1446   SafetyInfo.removeInstruction(&I);
1447   SafetyInfo.insertInstructionTo(&I, Dest->getParent());
1448   I.moveBefore(*Dest->getParent(), Dest);
1449   if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1450           MSSAU.getMemorySSA()->getMemoryAccess(&I)))
1451     MSSAU.moveToPlace(OldMemAcc, Dest->getParent(),
1452                       MemorySSA::BeforeTerminator);
1453   if (SE)
1454     SE->forgetBlockAndLoopDispositions(&I);
1455 }
1456 
sinkThroughTriviallyReplaceablePHI(PHINode * TPN,Instruction * I,LoopInfo * LI,SmallDenseMap<BasicBlock *,Instruction *,32> & SunkCopies,const LoopSafetyInfo * SafetyInfo,const Loop * CurLoop,MemorySSAUpdater & MSSAU)1457 static Instruction *sinkThroughTriviallyReplaceablePHI(
1458     PHINode *TPN, Instruction *I, LoopInfo *LI,
1459     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
1460     const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1461     MemorySSAUpdater &MSSAU) {
1462   assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1463          "Expect only trivially replaceable PHI");
1464   BasicBlock *ExitBlock = TPN->getParent();
1465   auto [It, Inserted] = SunkCopies.try_emplace(ExitBlock);
1466   if (Inserted)
1467     It->second = cloneInstructionInExitBlock(*I, *ExitBlock, *TPN, LI,
1468                                              SafetyInfo, MSSAU);
1469   return It->second;
1470 }
1471 
canSplitPredecessors(PHINode * PN,LoopSafetyInfo * SafetyInfo)1472 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1473   BasicBlock *BB = PN->getParent();
1474   if (!BB->canSplitPredecessors())
1475     return false;
1476   // It's not impossible to split EHPad blocks, but if BlockColors already exist
1477   // it require updating BlockColors for all offspring blocks accordingly. By
1478   // skipping such corner case, we can make updating BlockColors after splitting
1479   // predecessor fairly simple.
1480   if (!SafetyInfo->getBlockColors().empty() &&
1481       BB->getFirstNonPHIIt()->isEHPad())
1482     return false;
1483   for (BasicBlock *BBPred : predecessors(BB)) {
1484     if (isa<IndirectBrInst>(BBPred->getTerminator()))
1485       return false;
1486   }
1487   return true;
1488 }
1489 
splitPredecessorsOfLoopExit(PHINode * PN,DominatorTree * DT,LoopInfo * LI,const Loop * CurLoop,LoopSafetyInfo * SafetyInfo,MemorySSAUpdater * MSSAU)1490 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
1491                                         LoopInfo *LI, const Loop *CurLoop,
1492                                         LoopSafetyInfo *SafetyInfo,
1493                                         MemorySSAUpdater *MSSAU) {
1494 #ifndef NDEBUG
1495   SmallVector<BasicBlock *, 32> ExitBlocks;
1496   CurLoop->getUniqueExitBlocks(ExitBlocks);
1497   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(llvm::from_range, ExitBlocks);
1498 #endif
1499   BasicBlock *ExitBB = PN->getParent();
1500   assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1501 
1502   // Split predecessors of the loop exit to make instructions in the loop are
1503   // exposed to exit blocks through trivially replaceable PHIs while keeping the
1504   // loop in the canonical form where each predecessor of each exit block should
1505   // be contained within the loop. For example, this will convert the loop below
1506   // from
1507   //
1508   // LB1:
1509   //   %v1 =
1510   //   br %LE, %LB2
1511   // LB2:
1512   //   %v2 =
1513   //   br %LE, %LB1
1514   // LE:
1515   //   %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1516   //
1517   // to
1518   //
1519   // LB1:
1520   //   %v1 =
1521   //   br %LE.split, %LB2
1522   // LB2:
1523   //   %v2 =
1524   //   br %LE.split2, %LB1
1525   // LE.split:
1526   //   %p1 = phi [%v1, %LB1]  <-- trivially replaceable
1527   //   br %LE
1528   // LE.split2:
1529   //   %p2 = phi [%v2, %LB2]  <-- trivially replaceable
1530   //   br %LE
1531   // LE:
1532   //   %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1533   //
1534   const auto &BlockColors = SafetyInfo->getBlockColors();
1535   SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1536   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
1537   while (!PredBBs.empty()) {
1538     BasicBlock *PredBB = *PredBBs.begin();
1539     assert(CurLoop->contains(PredBB) &&
1540            "Expect all predecessors are in the loop");
1541     if (PN->getBasicBlockIndex(PredBB) >= 0) {
1542       BasicBlock *NewPred = SplitBlockPredecessors(
1543           ExitBB, PredBB, ".split.loop.exit", &DTU, LI, MSSAU, true);
1544       // Since we do not allow splitting EH-block with BlockColors in
1545       // canSplitPredecessors(), we can simply assign predecessor's color to
1546       // the new block.
1547       if (!BlockColors.empty())
1548         // Grab a reference to the ColorVector to be inserted before getting the
1549         // reference to the vector we are copying because inserting the new
1550         // element in BlockColors might cause the map to be reallocated.
1551         SafetyInfo->copyColors(NewPred, PredBB);
1552     }
1553     PredBBs.remove(PredBB);
1554   }
1555 }
1556 
1557 /// When an instruction is found to only be used outside of the loop, this
1558 /// function moves it to the exit blocks and patches up SSA form as needed.
1559 /// This method is guaranteed to remove the original instruction from its
1560 /// position, and may either delete it or move it to outside of the loop.
1561 ///
sink(Instruction & I,LoopInfo * LI,DominatorTree * DT,const Loop * CurLoop,ICFLoopSafetyInfo * SafetyInfo,MemorySSAUpdater & MSSAU,OptimizationRemarkEmitter * ORE)1562 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1563                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1564                  MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE) {
1565   bool Changed = false;
1566   LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1567 
1568   // Iterate over users to be ready for actual sinking. Replace users via
1569   // unreachable blocks with undef and make all user PHIs trivially replaceable.
1570   SmallPtrSet<Instruction *, 8> VisitedUsers;
1571   for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1572     auto *User = cast<Instruction>(*UI);
1573     Use &U = UI.getUse();
1574     ++UI;
1575 
1576     if (VisitedUsers.count(User) || CurLoop->contains(User))
1577       continue;
1578 
1579     if (!DT->isReachableFromEntry(User->getParent())) {
1580       U = PoisonValue::get(I.getType());
1581       Changed = true;
1582       continue;
1583     }
1584 
1585     // The user must be a PHI node.
1586     PHINode *PN = cast<PHINode>(User);
1587 
1588     // Surprisingly, instructions can be used outside of loops without any
1589     // exits.  This can only happen in PHI nodes if the incoming block is
1590     // unreachable.
1591     BasicBlock *BB = PN->getIncomingBlock(U);
1592     if (!DT->isReachableFromEntry(BB)) {
1593       U = PoisonValue::get(I.getType());
1594       Changed = true;
1595       continue;
1596     }
1597 
1598     VisitedUsers.insert(PN);
1599     if (isTriviallyReplaceablePHI(*PN, I))
1600       continue;
1601 
1602     if (!canSplitPredecessors(PN, SafetyInfo))
1603       return Changed;
1604 
1605     // Split predecessors of the PHI so that we can make users trivially
1606     // replaceable.
1607     splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU);
1608 
1609     // Should rebuild the iterators, as they may be invalidated by
1610     // splitPredecessorsOfLoopExit().
1611     UI = I.user_begin();
1612     UE = I.user_end();
1613   }
1614 
1615   if (VisitedUsers.empty())
1616     return Changed;
1617 
1618   ORE->emit([&]() {
1619     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1620            << "sinking " << ore::NV("Inst", &I);
1621   });
1622   if (isa<LoadInst>(I))
1623     ++NumMovedLoads;
1624   else if (isa<CallInst>(I))
1625     ++NumMovedCalls;
1626   ++NumSunk;
1627 
1628 #ifndef NDEBUG
1629   SmallVector<BasicBlock *, 32> ExitBlocks;
1630   CurLoop->getUniqueExitBlocks(ExitBlocks);
1631   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(llvm::from_range, ExitBlocks);
1632 #endif
1633 
1634   // Clones of this instruction. Don't create more than one per exit block!
1635   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
1636 
1637   // If this instruction is only used outside of the loop, then all users are
1638   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1639   // the instruction.
1640   // First check if I is worth sinking for all uses. Sink only when it is worth
1641   // across all uses.
1642   SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1643   for (auto *UI : Users) {
1644     auto *User = cast<Instruction>(UI);
1645 
1646     if (CurLoop->contains(User))
1647       continue;
1648 
1649     PHINode *PN = cast<PHINode>(User);
1650     assert(ExitBlockSet.count(PN->getParent()) &&
1651            "The LCSSA PHI is not in an exit block!");
1652 
1653     // The PHI must be trivially replaceable.
1654     Instruction *New = sinkThroughTriviallyReplaceablePHI(
1655         PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1656     // As we sink the instruction out of the BB, drop its debug location.
1657     New->dropLocation();
1658     PN->replaceAllUsesWith(New);
1659     eraseInstruction(*PN, *SafetyInfo, MSSAU);
1660     Changed = true;
1661   }
1662   return Changed;
1663 }
1664 
1665 /// When an instruction is found to only use loop invariant operands that
1666 /// is safe to hoist, this instruction is called to do the dirty work.
1667 ///
hoist(Instruction & I,const DominatorTree * DT,const Loop * CurLoop,BasicBlock * Dest,ICFLoopSafetyInfo * SafetyInfo,MemorySSAUpdater & MSSAU,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)1668 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1669                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1670                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1671                   OptimizationRemarkEmitter *ORE) {
1672   LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1673                     << I << "\n");
1674   ORE->emit([&]() {
1675     return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1676                                                          << ore::NV("Inst", &I);
1677   });
1678 
1679   // Metadata can be dependent on conditions we are hoisting above.
1680   // Conservatively strip all metadata on the instruction unless we were
1681   // guaranteed to execute I if we entered the loop, in which case the metadata
1682   // is valid in the loop preheader.
1683   // Similarly, If I is a call and it is not guaranteed to execute in the loop,
1684   // then moving to the preheader means we should strip attributes on the call
1685   // that can cause UB since we may be hoisting above conditions that allowed
1686   // inferring those attributes. They may not be valid at the preheader.
1687   if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) &&
1688       // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1689       // time in isGuaranteedToExecute if we don't actually have anything to
1690       // drop.  It is a compile time optimization, not required for correctness.
1691       !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1692     I.dropUBImplyingAttrsAndMetadata();
1693 
1694   if (isa<PHINode>(I))
1695     // Move the new node to the end of the phi list in the destination block.
1696     moveInstructionBefore(I, Dest->getFirstNonPHIIt(), *SafetyInfo, MSSAU, SE);
1697   else
1698     // Move the new node to the destination block, before its terminator.
1699     moveInstructionBefore(I, Dest->getTerminator()->getIterator(), *SafetyInfo,
1700                           MSSAU, SE);
1701 
1702   I.updateLocationAfterHoist();
1703 
1704   if (isa<LoadInst>(I))
1705     ++NumMovedLoads;
1706   else if (isa<CallInst>(I))
1707     ++NumMovedCalls;
1708   ++NumHoisted;
1709 }
1710 
1711 /// Only sink or hoist an instruction if it is not a trapping instruction,
1712 /// or if the instruction is known not to trap when moved to the preheader.
1713 /// or if it is a trapping instruction and is guaranteed to execute.
isSafeToExecuteUnconditionally(Instruction & Inst,const DominatorTree * DT,const TargetLibraryInfo * TLI,const Loop * CurLoop,const LoopSafetyInfo * SafetyInfo,OptimizationRemarkEmitter * ORE,const Instruction * CtxI,AssumptionCache * AC,bool AllowSpeculation)1714 static bool isSafeToExecuteUnconditionally(
1715     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
1716     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
1717     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
1718     AssumptionCache *AC, bool AllowSpeculation) {
1719   if (AllowSpeculation &&
1720       isSafeToSpeculativelyExecute(&Inst, CtxI, AC, DT, TLI))
1721     return true;
1722 
1723   bool GuaranteedToExecute =
1724       SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1725 
1726   if (!GuaranteedToExecute) {
1727     auto *LI = dyn_cast<LoadInst>(&Inst);
1728     if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1729       ORE->emit([&]() {
1730         return OptimizationRemarkMissed(
1731                    DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1732                << "failed to hoist load with loop-invariant address "
1733                   "because load is conditionally executed";
1734       });
1735   }
1736 
1737   return GuaranteedToExecute;
1738 }
1739 
1740 namespace {
1741 class LoopPromoter : public LoadAndStorePromoter {
1742   Value *SomePtr; // Designated pointer to store to.
1743   SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1744   SmallVectorImpl<BasicBlock::iterator> &LoopInsertPts;
1745   SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1746   PredIteratorCache &PredCache;
1747   MemorySSAUpdater &MSSAU;
1748   LoopInfo &LI;
1749   DebugLoc DL;
1750   Align Alignment;
1751   bool UnorderedAtomic;
1752   AAMDNodes AATags;
1753   ICFLoopSafetyInfo &SafetyInfo;
1754   bool CanInsertStoresInExitBlocks;
1755   ArrayRef<const Instruction *> Uses;
1756 
1757   // We're about to add a use of V in a loop exit block.  Insert an LCSSA phi
1758   // (if legal) if doing so would add an out-of-loop use to an instruction
1759   // defined in-loop.
maybeInsertLCSSAPHI(Value * V,BasicBlock * BB) const1760   Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1761     if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB))
1762       return V;
1763 
1764     Instruction *I = cast<Instruction>(V);
1765     // We need to create an LCSSA PHI node for the incoming value and
1766     // store that.
1767     PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1768                                   I->getName() + ".lcssa");
1769     PN->insertBefore(BB->begin());
1770     for (BasicBlock *Pred : PredCache.get(BB))
1771       PN->addIncoming(I, Pred);
1772     return PN;
1773   }
1774 
1775 public:
LoopPromoter(Value * SP,ArrayRef<const Instruction * > Insts,SSAUpdater & S,SmallVectorImpl<BasicBlock * > & LEB,SmallVectorImpl<BasicBlock::iterator> & LIP,SmallVectorImpl<MemoryAccess * > & MSSAIP,PredIteratorCache & PIC,MemorySSAUpdater & MSSAU,LoopInfo & li,DebugLoc dl,Align Alignment,bool UnorderedAtomic,const AAMDNodes & AATags,ICFLoopSafetyInfo & SafetyInfo,bool CanInsertStoresInExitBlocks)1776   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1777                SmallVectorImpl<BasicBlock *> &LEB,
1778                SmallVectorImpl<BasicBlock::iterator> &LIP,
1779                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
1780                MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
1781                Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1782                ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1783       : LoadAndStorePromoter(Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
1784         LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
1785         LI(li), DL(std::move(dl)), Alignment(Alignment),
1786         UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1787         SafetyInfo(SafetyInfo),
1788         CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses(Insts) {}
1789 
insertStoresInLoopExitBlocks()1790   void insertStoresInLoopExitBlocks() {
1791     // Insert stores after in the loop exit blocks.  Each exit block gets a
1792     // store of the live-out values that feed them.  Since we've already told
1793     // the SSA updater about the defs in the loop and the preheader
1794     // definition, it is all set and we can start using it.
1795     DIAssignID *NewID = nullptr;
1796     for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1797       BasicBlock *ExitBlock = LoopExitBlocks[i];
1798       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1799       LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1800       Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1801       BasicBlock::iterator InsertPos = LoopInsertPts[i];
1802       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1803       if (UnorderedAtomic)
1804         NewSI->setOrdering(AtomicOrdering::Unordered);
1805       NewSI->setAlignment(Alignment);
1806       NewSI->setDebugLoc(DL);
1807       // Attach DIAssignID metadata to the new store, generating it on the
1808       // first loop iteration.
1809       if (i == 0) {
1810         // NewSI will have its DIAssignID set here if there are any stores in
1811         // Uses with a DIAssignID attachment. This merged ID will then be
1812         // attached to the other inserted stores (in the branch below).
1813         NewSI->mergeDIAssignID(Uses);
1814         NewID = cast_or_null<DIAssignID>(
1815             NewSI->getMetadata(LLVMContext::MD_DIAssignID));
1816       } else {
1817         // Attach the DIAssignID (or nullptr) merged from Uses in the branch
1818         // above.
1819         NewSI->setMetadata(LLVMContext::MD_DIAssignID, NewID);
1820       }
1821 
1822       if (AATags)
1823         NewSI->setAAMetadata(AATags);
1824 
1825       MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1826       MemoryAccess *NewMemAcc;
1827       if (!MSSAInsertPoint) {
1828         NewMemAcc = MSSAU.createMemoryAccessInBB(
1829             NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1830       } else {
1831         NewMemAcc =
1832             MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1833       }
1834       MSSAInsertPts[i] = NewMemAcc;
1835       MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true);
1836       // FIXME: true for safety, false may still be correct.
1837     }
1838   }
1839 
doExtraRewritesBeforeFinalDeletion()1840   void doExtraRewritesBeforeFinalDeletion() override {
1841     if (CanInsertStoresInExitBlocks)
1842       insertStoresInLoopExitBlocks();
1843   }
1844 
instructionDeleted(Instruction * I) const1845   void instructionDeleted(Instruction *I) const override {
1846     SafetyInfo.removeInstruction(I);
1847     MSSAU.removeMemoryAccess(I);
1848   }
1849 
shouldDelete(Instruction * I) const1850   bool shouldDelete(Instruction *I) const override {
1851     if (isa<StoreInst>(I))
1852       return CanInsertStoresInExitBlocks;
1853     return true;
1854   }
1855 };
1856 
isNotCapturedBeforeOrInLoop(const Value * V,const Loop * L,DominatorTree * DT)1857 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
1858                                  DominatorTree *DT) {
1859   // We can perform the captured-before check against any instruction in the
1860   // loop header, as the loop header is reachable from any instruction inside
1861   // the loop.
1862   // TODO: ReturnCaptures=true shouldn't be necessary here.
1863   return capturesNothing(PointerMayBeCapturedBefore(
1864       V, /*ReturnCaptures=*/true, L->getHeader()->getTerminator(), DT,
1865       /*IncludeI=*/false, CaptureComponents::Provenance));
1866 }
1867 
1868 /// Return true if we can prove that a caller cannot inspect the object if an
1869 /// unwind occurs inside the loop.
isNotVisibleOnUnwindInLoop(const Value * Object,const Loop * L,DominatorTree * DT)1870 bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
1871                                 DominatorTree *DT) {
1872   bool RequiresNoCaptureBeforeUnwind;
1873   if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
1874     return false;
1875 
1876   return !RequiresNoCaptureBeforeUnwind ||
1877          isNotCapturedBeforeOrInLoop(Object, L, DT);
1878 }
1879 
isThreadLocalObject(const Value * Object,const Loop * L,DominatorTree * DT,TargetTransformInfo * TTI)1880 bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
1881                          TargetTransformInfo *TTI) {
1882   // The object must be function-local to start with, and then not captured
1883   // before/in the loop.
1884   return (isIdentifiedFunctionLocal(Object) &&
1885           isNotCapturedBeforeOrInLoop(Object, L, DT)) ||
1886          (TTI->isSingleThreaded() || SingleThread);
1887 }
1888 
1889 } // namespace
1890 
1891 /// Try to promote memory values to scalars by sinking stores out of the
1892 /// loop and moving loads to before the loop.  We do this by looping over
1893 /// the stores in the loop, looking for stores to Must pointers which are
1894 /// loop invariant.
1895 ///
promoteLoopAccessesToScalars(const SmallSetVector<Value *,8> & PointerMustAliases,SmallVectorImpl<BasicBlock * > & ExitBlocks,SmallVectorImpl<BasicBlock::iterator> & InsertPts,SmallVectorImpl<MemoryAccess * > & MSSAInsertPts,PredIteratorCache & PIC,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,const TargetLibraryInfo * TLI,TargetTransformInfo * TTI,Loop * CurLoop,MemorySSAUpdater & MSSAU,ICFLoopSafetyInfo * SafetyInfo,OptimizationRemarkEmitter * ORE,bool AllowSpeculation,bool HasReadsOutsideSet)1896 bool llvm::promoteLoopAccessesToScalars(
1897     const SmallSetVector<Value *, 8> &PointerMustAliases,
1898     SmallVectorImpl<BasicBlock *> &ExitBlocks,
1899     SmallVectorImpl<BasicBlock::iterator> &InsertPts,
1900     SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1901     LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1902     const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
1903     MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1904     OptimizationRemarkEmitter *ORE, bool AllowSpeculation,
1905     bool HasReadsOutsideSet) {
1906   // Verify inputs.
1907   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1908          SafetyInfo != nullptr &&
1909          "Unexpected Input to promoteLoopAccessesToScalars");
1910 
1911   LLVM_DEBUG({
1912     dbgs() << "Trying to promote set of must-aliased pointers:\n";
1913     for (Value *Ptr : PointerMustAliases)
1914       dbgs() << "  " << *Ptr << "\n";
1915   });
1916   ++NumPromotionCandidates;
1917 
1918   Value *SomePtr = *PointerMustAliases.begin();
1919   BasicBlock *Preheader = CurLoop->getLoopPreheader();
1920 
1921   // It is not safe to promote a load/store from the loop if the load/store is
1922   // conditional.  For example, turning:
1923   //
1924   //    for () { if (c) *P += 1; }
1925   //
1926   // into:
1927   //
1928   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
1929   //
1930   // is not safe, because *P may only be valid to access if 'c' is true.
1931   //
1932   // The safety property divides into two parts:
1933   // p1) The memory may not be dereferenceable on entry to the loop.  In this
1934   //    case, we can't insert the required load in the preheader.
1935   // p2) The memory model does not allow us to insert a store along any dynamic
1936   //    path which did not originally have one.
1937   //
1938   // If at least one store is guaranteed to execute, both properties are
1939   // satisfied, and promotion is legal.
1940   //
1941   // This, however, is not a necessary condition. Even if no store/load is
1942   // guaranteed to execute, we can still establish these properties.
1943   // We can establish (p1) by proving that hoisting the load into the preheader
1944   // is safe (i.e. proving dereferenceability on all paths through the loop). We
1945   // can use any access within the alias set to prove dereferenceability,
1946   // since they're all must alias.
1947   //
1948   // There are two ways establish (p2):
1949   // a) Prove the location is thread-local. In this case the memory model
1950   // requirement does not apply, and stores are safe to insert.
1951   // b) Prove a store dominates every exit block. In this case, if an exit
1952   // blocks is reached, the original dynamic path would have taken us through
1953   // the store, so inserting a store into the exit block is safe. Note that this
1954   // is different from the store being guaranteed to execute. For instance,
1955   // if an exception is thrown on the first iteration of the loop, the original
1956   // store is never executed, but the exit blocks are not executed either.
1957 
1958   bool DereferenceableInPH = false;
1959   bool StoreIsGuanteedToExecute = false;
1960   bool LoadIsGuaranteedToExecute = false;
1961   bool FoundLoadToPromote = false;
1962 
1963   // Goes from Unknown to either Safe or Unsafe, but can't switch between them.
1964   enum {
1965     StoreSafe,
1966     StoreUnsafe,
1967     StoreSafetyUnknown,
1968   } StoreSafety = StoreSafetyUnknown;
1969 
1970   SmallVector<Instruction *, 64> LoopUses;
1971 
1972   // We start with an alignment of one and try to find instructions that allow
1973   // us to prove better alignment.
1974   Align Alignment;
1975   // Keep track of which types of access we see
1976   bool SawUnorderedAtomic = false;
1977   bool SawNotAtomic = false;
1978   AAMDNodes AATags;
1979 
1980   const DataLayout &MDL = Preheader->getDataLayout();
1981 
1982   // If there are reads outside the promoted set, then promoting stores is
1983   // definitely not safe.
1984   if (HasReadsOutsideSet)
1985     StoreSafety = StoreUnsafe;
1986 
1987   if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) {
1988     // If a loop can throw, we have to insert a store along each unwind edge.
1989     // That said, we can't actually make the unwind edge explicit. Therefore,
1990     // we have to prove that the store is dead along the unwind edge.  We do
1991     // this by proving that the caller can't have a reference to the object
1992     // after return and thus can't possibly load from the object.
1993     Value *Object = getUnderlyingObject(SomePtr);
1994     if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
1995       StoreSafety = StoreUnsafe;
1996   }
1997 
1998   // Check that all accesses to pointers in the alias set use the same type.
1999   // We cannot (yet) promote a memory location that is loaded and stored in
2000   // different sizes.  While we are at it, collect alignment and AA info.
2001   Type *AccessTy = nullptr;
2002   for (Value *ASIV : PointerMustAliases) {
2003     for (Use &U : ASIV->uses()) {
2004       // Ignore instructions that are outside the loop.
2005       Instruction *UI = dyn_cast<Instruction>(U.getUser());
2006       if (!UI || !CurLoop->contains(UI))
2007         continue;
2008 
2009       // If there is an non-load/store instruction in the loop, we can't promote
2010       // it.
2011       if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
2012         if (!Load->isUnordered())
2013           return false;
2014 
2015         SawUnorderedAtomic |= Load->isAtomic();
2016         SawNotAtomic |= !Load->isAtomic();
2017         FoundLoadToPromote = true;
2018 
2019         Align InstAlignment = Load->getAlign();
2020 
2021         if (!LoadIsGuaranteedToExecute)
2022           LoadIsGuaranteedToExecute =
2023               SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop);
2024 
2025         // Note that proving a load safe to speculate requires proving
2026         // sufficient alignment at the target location.  Proving it guaranteed
2027         // to execute does as well.  Thus we can increase our guaranteed
2028         // alignment as well.
2029         if (!DereferenceableInPH || (InstAlignment > Alignment))
2030           if (isSafeToExecuteUnconditionally(
2031                   *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2032                   Preheader->getTerminator(), AC, AllowSpeculation)) {
2033             DereferenceableInPH = true;
2034             Alignment = std::max(Alignment, InstAlignment);
2035           }
2036       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
2037         // Stores *of* the pointer are not interesting, only stores *to* the
2038         // pointer.
2039         if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
2040           continue;
2041         if (!Store->isUnordered())
2042           return false;
2043 
2044         SawUnorderedAtomic |= Store->isAtomic();
2045         SawNotAtomic |= !Store->isAtomic();
2046 
2047         // If the store is guaranteed to execute, both properties are satisfied.
2048         // We may want to check if a store is guaranteed to execute even if we
2049         // already know that promotion is safe, since it may have higher
2050         // alignment than any other guaranteed stores, in which case we can
2051         // raise the alignment on the promoted store.
2052         Align InstAlignment = Store->getAlign();
2053         bool GuaranteedToExecute =
2054             SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop);
2055         StoreIsGuanteedToExecute |= GuaranteedToExecute;
2056         if (GuaranteedToExecute) {
2057           DereferenceableInPH = true;
2058           if (StoreSafety == StoreSafetyUnknown)
2059             StoreSafety = StoreSafe;
2060           Alignment = std::max(Alignment, InstAlignment);
2061         }
2062 
2063         // If a store dominates all exit blocks, it is safe to sink.
2064         // As explained above, if an exit block was executed, a dominating
2065         // store must have been executed at least once, so we are not
2066         // introducing stores on paths that did not have them.
2067         // Note that this only looks at explicit exit blocks. If we ever
2068         // start sinking stores into unwind edges (see above), this will break.
2069         if (StoreSafety == StoreSafetyUnknown &&
2070             llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2071               return DT->dominates(Store->getParent(), Exit);
2072             }))
2073           StoreSafety = StoreSafe;
2074 
2075         // If the store is not guaranteed to execute, we may still get
2076         // deref info through it.
2077         if (!DereferenceableInPH) {
2078           DereferenceableInPH = isDereferenceableAndAlignedPointer(
2079               Store->getPointerOperand(), Store->getValueOperand()->getType(),
2080               Store->getAlign(), MDL, Preheader->getTerminator(), AC, DT, TLI);
2081         }
2082       } else
2083         continue; // Not a load or store.
2084 
2085       if (!AccessTy)
2086         AccessTy = getLoadStoreType(UI);
2087       else if (AccessTy != getLoadStoreType(UI))
2088         return false;
2089 
2090       // Merge the AA tags.
2091       if (LoopUses.empty()) {
2092         // On the first load/store, just take its AA tags.
2093         AATags = UI->getAAMetadata();
2094       } else if (AATags) {
2095         AATags = AATags.merge(UI->getAAMetadata());
2096       }
2097 
2098       LoopUses.push_back(UI);
2099     }
2100   }
2101 
2102   // If we found both an unordered atomic instruction and a non-atomic memory
2103   // access, bail.  We can't blindly promote non-atomic to atomic since we
2104   // might not be able to lower the result.  We can't downgrade since that
2105   // would violate memory model.  Also, align 0 is an error for atomics.
2106   if (SawUnorderedAtomic && SawNotAtomic)
2107     return false;
2108 
2109   // If we're inserting an atomic load in the preheader, we must be able to
2110   // lower it.  We're only guaranteed to be able to lower naturally aligned
2111   // atomics.
2112   if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
2113     return false;
2114 
2115   // If we couldn't prove we can hoist the load, bail.
2116   if (!DereferenceableInPH) {
2117     LLVM_DEBUG(dbgs() << "Not promoting: Not dereferenceable in preheader\n");
2118     return false;
2119   }
2120 
2121   // We know we can hoist the load, but don't have a guaranteed store.
2122   // Check whether the location is writable and thread-local. If it is, then we
2123   // can insert stores along paths which originally didn't have them without
2124   // violating the memory model.
2125   if (StoreSafety == StoreSafetyUnknown) {
2126     Value *Object = getUnderlyingObject(SomePtr);
2127     bool ExplicitlyDereferenceableOnly;
2128     if (isWritableObject(Object, ExplicitlyDereferenceableOnly) &&
2129         (!ExplicitlyDereferenceableOnly ||
2130          isDereferenceablePointer(SomePtr, AccessTy, MDL)) &&
2131         isThreadLocalObject(Object, CurLoop, DT, TTI))
2132       StoreSafety = StoreSafe;
2133   }
2134 
2135   // If we've still failed to prove we can sink the store, hoist the load
2136   // only, if possible.
2137   if (StoreSafety != StoreSafe && !FoundLoadToPromote)
2138     // If we cannot hoist the load either, give up.
2139     return false;
2140 
2141   // Lets do the promotion!
2142   if (StoreSafety == StoreSafe) {
2143     LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2144                       << '\n');
2145     ++NumLoadStorePromoted;
2146   } else {
2147     LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2148                       << '\n');
2149     ++NumLoadPromoted;
2150   }
2151 
2152   ORE->emit([&]() {
2153     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2154                               LoopUses[0])
2155            << "Moving accesses to memory location out of the loop";
2156   });
2157 
2158   // Look at all the loop uses, and try to merge their locations.
2159   std::vector<DebugLoc> LoopUsesLocs;
2160   for (auto U : LoopUses)
2161     LoopUsesLocs.push_back(U->getDebugLoc());
2162   auto DL = DebugLoc::getMergedLocations(LoopUsesLocs);
2163 
2164   // We use the SSAUpdater interface to insert phi nodes as required.
2165   SmallVector<PHINode *, 16> NewPHIs;
2166   SSAUpdater SSA(&NewPHIs);
2167   LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
2168                         MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment,
2169                         SawUnorderedAtomic,
2170                         StoreIsGuanteedToExecute ? AATags : AAMDNodes(),
2171                         *SafetyInfo, StoreSafety == StoreSafe);
2172 
2173   // Set up the preheader to have a definition of the value.  It is the live-out
2174   // value from the preheader that uses in the loop will use.
2175   LoadInst *PreheaderLoad = nullptr;
2176   if (FoundLoadToPromote || !StoreIsGuanteedToExecute) {
2177     PreheaderLoad =
2178         new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
2179                      Preheader->getTerminator()->getIterator());
2180     if (SawUnorderedAtomic)
2181       PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2182     PreheaderLoad->setAlignment(Alignment);
2183     PreheaderLoad->setDebugLoc(DebugLoc::getDropped());
2184     if (AATags && LoadIsGuaranteedToExecute)
2185       PreheaderLoad->setAAMetadata(AATags);
2186 
2187     MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
2188         PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2189     MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2190     MSSAU.insertUse(NewMemUse, /*RenameUses=*/true);
2191     SSA.AddAvailableValue(Preheader, PreheaderLoad);
2192   } else {
2193     SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy));
2194   }
2195 
2196   if (VerifyMemorySSA)
2197     MSSAU.getMemorySSA()->verifyMemorySSA();
2198   // Rewrite all the loads in the loop and remember all the definitions from
2199   // stores in the loop.
2200   Promoter.run(LoopUses);
2201 
2202   if (VerifyMemorySSA)
2203     MSSAU.getMemorySSA()->verifyMemorySSA();
2204   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2205   if (PreheaderLoad && PreheaderLoad->use_empty())
2206     eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
2207 
2208   return true;
2209 }
2210 
foreachMemoryAccess(MemorySSA * MSSA,Loop * L,function_ref<void (Instruction *)> Fn)2211 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2212                                 function_ref<void(Instruction *)> Fn) {
2213   for (const BasicBlock *BB : L->blocks())
2214     if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2215       for (const auto &Access : *Accesses)
2216         if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
2217           Fn(MUD->getMemoryInst());
2218 }
2219 
2220 // The bool indicates whether there might be reads outside the set, in which
2221 // case only loads may be promoted.
2222 static SmallVector<PointersAndHasReadsOutsideSet, 0>
collectPromotionCandidates(MemorySSA * MSSA,AliasAnalysis * AA,Loop * L)2223 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
2224   BatchAAResults BatchAA(*AA);
2225   AliasSetTracker AST(BatchAA);
2226 
2227   auto IsPotentiallyPromotable = [L](const Instruction *I) {
2228     if (const auto *SI = dyn_cast<StoreInst>(I)) {
2229       const Value *PtrOp = SI->getPointerOperand();
2230       return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp);
2231     }
2232     if (const auto *LI = dyn_cast<LoadInst>(I)) {
2233       const Value *PtrOp = LI->getPointerOperand();
2234       return !isa<ConstantData>(PtrOp) && L->isLoopInvariant(PtrOp);
2235     }
2236     return false;
2237   };
2238 
2239   // Populate AST with potentially promotable accesses.
2240   SmallPtrSet<Value *, 16> AttemptingPromotion;
2241   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2242     if (IsPotentiallyPromotable(I)) {
2243       AttemptingPromotion.insert(I);
2244       AST.add(I);
2245     }
2246   });
2247 
2248   // We're only interested in must-alias sets that contain a mod.
2249   SmallVector<PointerIntPair<const AliasSet *, 1, bool>, 8> Sets;
2250   for (AliasSet &AS : AST)
2251     if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2252       Sets.push_back({&AS, false});
2253 
2254   if (Sets.empty())
2255     return {}; // Nothing to promote...
2256 
2257   // Discard any sets for which there is an aliasing non-promotable access.
2258   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2259     if (AttemptingPromotion.contains(I))
2260       return;
2261 
2262     llvm::erase_if(Sets, [&](PointerIntPair<const AliasSet *, 1, bool> &Pair) {
2263       ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(I, BatchAA);
2264       // Cannot promote if there are writes outside the set.
2265       if (isModSet(MR))
2266         return true;
2267       if (isRefSet(MR)) {
2268         // Remember reads outside the set.
2269         Pair.setInt(true);
2270         // If this is a mod-only set and there are reads outside the set,
2271         // we will not be able to promote, so bail out early.
2272         return !Pair.getPointer()->isRef();
2273       }
2274       return false;
2275     });
2276   });
2277 
2278   SmallVector<std::pair<SmallSetVector<Value *, 8>, bool>, 0> Result;
2279   for (auto [Set, HasReadsOutsideSet] : Sets) {
2280     SmallSetVector<Value *, 8> PointerMustAliases;
2281     for (const auto &MemLoc : *Set)
2282       PointerMustAliases.insert(const_cast<Value *>(MemLoc.Ptr));
2283     Result.emplace_back(std::move(PointerMustAliases), HasReadsOutsideSet);
2284   }
2285 
2286   return Result;
2287 }
2288 
2289 // For a given store instruction or writeonly call instruction, this function
2290 // checks that there are no read or writes that conflict with the memory
2291 // access in the instruction
noConflictingReadWrites(Instruction * I,MemorySSA * MSSA,AAResults * AA,Loop * CurLoop,SinkAndHoistLICMFlags & Flags)2292 static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
2293                                     AAResults *AA, Loop *CurLoop,
2294                                     SinkAndHoistLICMFlags &Flags) {
2295   assert(isa<CallInst>(*I) || isa<StoreInst>(*I));
2296   // If there are more accesses than the Promotion cap, then give up as we're
2297   // not walking a list that long.
2298   if (Flags.tooManyMemoryAccesses())
2299     return false;
2300 
2301   auto *IMD = MSSA->getMemoryAccess(I);
2302   BatchAAResults BAA(*AA);
2303   auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, IMD);
2304   // Make sure there are no clobbers inside the loop.
2305   if (!MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock()))
2306     return false;
2307 
2308   // If there are interfering Uses (i.e. their defining access is in the
2309   // loop), or ordered loads (stored as Defs!), don't move this store.
2310   // Could do better here, but this is conservatively correct.
2311   // TODO: Cache set of Uses on the first walk in runOnLoop, update when
2312   // moving accesses. Can also extend to dominating uses.
2313   for (auto *BB : CurLoop->getBlocks()) {
2314     auto *Accesses = MSSA->getBlockAccesses(BB);
2315     if (!Accesses)
2316       continue;
2317     for (const auto &MA : *Accesses)
2318       if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
2319         auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
2320                                              const_cast<MemoryUse *>(MU));
2321         if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock()))
2322           return false;
2323         // Disable hoisting past potentially interfering loads. Optimized
2324         // Uses may point to an access outside the loop, as getClobbering
2325         // checks the previous iteration when walking the backedge.
2326         // FIXME: More precise: no Uses that alias I.
2327         if (!Flags.getIsSink() && !MSSA->dominates(IMD, MU))
2328           return false;
2329       } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
2330         if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
2331           (void)LI; // Silence warning.
2332           assert(!LI->isUnordered() && "Expected unordered load");
2333           return false;
2334         }
2335         // Any call, while it may not be clobbering I, it may be a use.
2336         if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
2337           // Check if the call may read from the memory location written
2338           // to by I. Check CI's attributes and arguments; the number of
2339           // such checks performed is limited above by NoOfMemAccTooLarge.
2340           if (auto *SI = dyn_cast<StoreInst>(I)) {
2341             ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
2342             if (isModOrRefSet(MRI))
2343               return false;
2344           } else {
2345             auto *SCI = cast<CallInst>(I);
2346             // If the instruction we are wanting to hoist is also a call
2347             // instruction then we need not check mod/ref info with itself
2348             if (SCI == CI)
2349               continue;
2350             ModRefInfo MRI = BAA.getModRefInfo(CI, SCI);
2351             if (isModOrRefSet(MRI))
2352               return false;
2353           }
2354         }
2355       }
2356   }
2357   return true;
2358 }
2359 
pointerInvalidatedByLoop(MemorySSA * MSSA,MemoryUse * MU,Loop * CurLoop,Instruction & I,SinkAndHoistLICMFlags & Flags,bool InvariantGroup)2360 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
2361                                      Loop *CurLoop, Instruction &I,
2362                                      SinkAndHoistLICMFlags &Flags,
2363                                      bool InvariantGroup) {
2364   // For hoisting, use the walker to determine safety
2365   if (!Flags.getIsSink()) {
2366     // If hoisting an invariant group, we only need to check that there
2367     // is no store to the loaded pointer between the start of the loop,
2368     // and the load (since all values must be the same).
2369 
2370     // This can be checked in two conditions:
2371     // 1) if the memoryaccess is outside the loop
2372     // 2) the earliest access is at the loop header,
2373     // if the memory loaded is the phi node
2374 
2375     BatchAAResults BAA(MSSA->getAA());
2376     MemoryAccess *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, MU);
2377     return !MSSA->isLiveOnEntryDef(Source) &&
2378            CurLoop->contains(Source->getBlock()) &&
2379            !(InvariantGroup && Source->getBlock() == CurLoop->getHeader() && isa<MemoryPhi>(Source));
2380   }
2381 
2382   // For sinking, we'd need to check all Defs below this use. The getClobbering
2383   // call will look on the backedge of the loop, but will check aliasing with
2384   // the instructions on the previous iteration.
2385   // For example:
2386   // for (i ... )
2387   //   load a[i] ( Use (LoE)
2388   //   store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2389   //   i++;
2390   // The load sees no clobbering inside the loop, as the backedge alias check
2391   // does phi translation, and will check aliasing against store a[i-1].
2392   // However sinking the load outside the loop, below the store is incorrect.
2393 
2394   // For now, only sink if there are no Defs in the loop, and the existing ones
2395   // precede the use and are in the same block.
2396   // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2397   // needs PostDominatorTreeAnalysis.
2398   // FIXME: More precise: no Defs that alias this Use.
2399   if (Flags.tooManyMemoryAccesses())
2400     return true;
2401   for (auto *BB : CurLoop->getBlocks())
2402     if (pointerInvalidatedByBlock(*BB, *MSSA, *MU))
2403       return true;
2404   // When sinking, the source block may not be part of the loop so check it.
2405   if (!CurLoop->contains(&I))
2406     return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU);
2407 
2408   return false;
2409 }
2410 
pointerInvalidatedByBlock(BasicBlock & BB,MemorySSA & MSSA,MemoryUse & MU)2411 bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
2412   if (const auto *Accesses = MSSA.getBlockDefs(&BB))
2413     for (const auto &MA : *Accesses)
2414       if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2415         if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
2416           return true;
2417   return false;
2418 }
2419 
2420 /// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A <
2421 /// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their
2422 /// minimun can be computed outside of loop, and X is not a loop-invariant.
hoistMinMax(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU)2423 static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2424                         MemorySSAUpdater &MSSAU) {
2425   bool Inverse = false;
2426   using namespace PatternMatch;
2427   Value *Cond1, *Cond2;
2428   if (match(&I, m_LogicalOr(m_Value(Cond1), m_Value(Cond2)))) {
2429     Inverse = true;
2430   } else if (match(&I, m_LogicalAnd(m_Value(Cond1), m_Value(Cond2)))) {
2431     // Do nothing
2432   } else
2433     return false;
2434 
2435   auto MatchICmpAgainstInvariant = [&](Value *C, CmpPredicate &P, Value *&LHS,
2436                                        Value *&RHS) {
2437     if (!match(C, m_OneUse(m_ICmp(P, m_Value(LHS), m_Value(RHS)))))
2438       return false;
2439     if (!LHS->getType()->isIntegerTy())
2440       return false;
2441     if (!ICmpInst::isRelational(P))
2442       return false;
2443     if (L.isLoopInvariant(LHS)) {
2444       std::swap(LHS, RHS);
2445       P = ICmpInst::getSwappedPredicate(P);
2446     }
2447     if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
2448       return false;
2449     if (Inverse)
2450       P = ICmpInst::getInversePredicate(P);
2451     return true;
2452   };
2453   CmpPredicate P1, P2;
2454   Value *LHS1, *LHS2, *RHS1, *RHS2;
2455   if (!MatchICmpAgainstInvariant(Cond1, P1, LHS1, RHS1) ||
2456       !MatchICmpAgainstInvariant(Cond2, P2, LHS2, RHS2))
2457     return false;
2458   auto MatchingPred = CmpPredicate::getMatching(P1, P2);
2459   if (!MatchingPred || LHS1 != LHS2)
2460     return false;
2461 
2462   // Everything is fine, we can do the transform.
2463   bool UseMin = ICmpInst::isLT(*MatchingPred) || ICmpInst::isLE(*MatchingPred);
2464   assert(
2465       (UseMin || ICmpInst::isGT(*MatchingPred) ||
2466        ICmpInst::isGE(*MatchingPred)) &&
2467       "Relational predicate is either less (or equal) or greater (or equal)!");
2468   Intrinsic::ID id = ICmpInst::isSigned(*MatchingPred)
2469                          ? (UseMin ? Intrinsic::smin : Intrinsic::smax)
2470                          : (UseMin ? Intrinsic::umin : Intrinsic::umax);
2471   auto *Preheader = L.getLoopPreheader();
2472   assert(Preheader && "Loop is not in simplify form?");
2473   IRBuilder<> Builder(Preheader->getTerminator());
2474   // We are about to create a new guaranteed use for RHS2 which might not exist
2475   // before (if it was a non-taken input of logical and/or instruction). If it
2476   // was poison, we need to freeze it. Note that no new use for LHS and RHS1 are
2477   // introduced, so they don't need this.
2478   if (isa<SelectInst>(I))
2479     RHS2 = Builder.CreateFreeze(RHS2, RHS2->getName() + ".fr");
2480   Value *NewRHS = Builder.CreateBinaryIntrinsic(
2481       id, RHS1, RHS2, nullptr,
2482       StringRef("invariant.") +
2483           (ICmpInst::isSigned(*MatchingPred) ? "s" : "u") +
2484           (UseMin ? "min" : "max"));
2485   Builder.SetInsertPoint(&I);
2486   ICmpInst::Predicate P = *MatchingPred;
2487   if (Inverse)
2488     P = ICmpInst::getInversePredicate(P);
2489   Value *NewCond = Builder.CreateICmp(P, LHS1, NewRHS);
2490   NewCond->takeName(&I);
2491   I.replaceAllUsesWith(NewCond);
2492   eraseInstruction(I, SafetyInfo, MSSAU);
2493   Instruction &CondI1 = *cast<Instruction>(Cond1);
2494   Instruction &CondI2 = *cast<Instruction>(Cond2);
2495   salvageDebugInfo(CondI1);
2496   salvageDebugInfo(CondI2);
2497   eraseInstruction(CondI1, SafetyInfo, MSSAU);
2498   eraseInstruction(CondI2, SafetyInfo, MSSAU);
2499   return true;
2500 }
2501 
2502 /// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if
2503 /// this allows hoisting the inner GEP.
hoistGEP(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2504 static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2505                      MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2506                      DominatorTree *DT) {
2507   auto *GEP = dyn_cast<GetElementPtrInst>(&I);
2508   if (!GEP)
2509     return false;
2510 
2511   auto *Src = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
2512   if (!Src || !Src->hasOneUse() || !L.contains(Src))
2513     return false;
2514 
2515   Value *SrcPtr = Src->getPointerOperand();
2516   auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); };
2517   if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant))
2518     return false;
2519 
2520   // This can only happen if !AllowSpeculation, otherwise this would already be
2521   // handled.
2522   // FIXME: Should we respect AllowSpeculation in these reassociation folds?
2523   // The flag exists to prevent metadata dropping, which is not relevant here.
2524   if (all_of(Src->indices(), LoopInvariant))
2525     return false;
2526 
2527   // The swapped GEPs are inbounds if both original GEPs are inbounds
2528   // and the sign of the offsets is the same. For simplicity, only
2529   // handle both offsets being non-negative.
2530   const DataLayout &DL = GEP->getDataLayout();
2531   auto NonNegative = [&](Value *V) {
2532     return isKnownNonNegative(V, SimplifyQuery(DL, DT, AC, GEP));
2533   };
2534   bool IsInBounds = Src->isInBounds() && GEP->isInBounds() &&
2535                     all_of(Src->indices(), NonNegative) &&
2536                     all_of(GEP->indices(), NonNegative);
2537 
2538   BasicBlock *Preheader = L.getLoopPreheader();
2539   IRBuilder<> Builder(Preheader->getTerminator());
2540   Value *NewSrc = Builder.CreateGEP(GEP->getSourceElementType(), SrcPtr,
2541                                     SmallVector<Value *>(GEP->indices()),
2542                                     "invariant.gep", IsInBounds);
2543   Builder.SetInsertPoint(GEP);
2544   Value *NewGEP = Builder.CreateGEP(Src->getSourceElementType(), NewSrc,
2545                                     SmallVector<Value *>(Src->indices()), "gep",
2546                                     IsInBounds);
2547   GEP->replaceAllUsesWith(NewGEP);
2548   eraseInstruction(*GEP, SafetyInfo, MSSAU);
2549   salvageDebugInfo(*Src);
2550   eraseInstruction(*Src, SafetyInfo, MSSAU);
2551   return true;
2552 }
2553 
2554 /// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here
2555 /// C1 and C2 are loop invariants and LV is a loop-variant.
hoistAdd(ICmpInst::Predicate Pred,Value * VariantLHS,Value * InvariantRHS,ICmpInst & ICmp,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2556 static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
2557                      Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2558                      ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2559                      AssumptionCache *AC, DominatorTree *DT) {
2560   assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2561   assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2562 
2563   bool IsSigned = ICmpInst::isSigned(Pred);
2564 
2565   // Try to represent VariantLHS as sum of invariant and variant operands.
2566   using namespace PatternMatch;
2567   Value *VariantOp, *InvariantOp;
2568   if (IsSigned &&
2569       !match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
2570     return false;
2571   if (!IsSigned &&
2572       !match(VariantLHS, m_NUWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
2573     return false;
2574 
2575   // LHS itself is a loop-variant, try to represent it in the form:
2576   // "VariantOp + InvariantOp". If it is possible, then we can reassociate.
2577   if (L.isLoopInvariant(VariantOp))
2578     std::swap(VariantOp, InvariantOp);
2579   if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2580     return false;
2581 
2582   // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to
2583   // freely move values from left side of inequality to right side (just as in
2584   // normal linear arithmetics). Overflows make things much more complicated, so
2585   // we want to avoid this.
2586   auto &DL = L.getHeader()->getDataLayout();
2587   SimplifyQuery SQ(DL, DT, AC, &ICmp);
2588   if (IsSigned && computeOverflowForSignedSub(InvariantRHS, InvariantOp, SQ) !=
2589                       llvm::OverflowResult::NeverOverflows)
2590     return false;
2591   if (!IsSigned &&
2592       computeOverflowForUnsignedSub(InvariantRHS, InvariantOp, SQ) !=
2593           llvm::OverflowResult::NeverOverflows)
2594     return false;
2595   auto *Preheader = L.getLoopPreheader();
2596   assert(Preheader && "Loop is not in simplify form?");
2597   IRBuilder<> Builder(Preheader->getTerminator());
2598   Value *NewCmpOp =
2599       Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op",
2600                         /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned);
2601   ICmp.setPredicate(Pred);
2602   ICmp.setOperand(0, VariantOp);
2603   ICmp.setOperand(1, NewCmpOp);
2604 
2605   Instruction &DeadI = cast<Instruction>(*VariantLHS);
2606   salvageDebugInfo(DeadI);
2607   eraseInstruction(DeadI, SafetyInfo, MSSAU);
2608   return true;
2609 }
2610 
2611 /// Try to reassociate and hoist the following two patterns:
2612 /// LV - C1 < C2 --> LV < C1 + C2,
2613 /// C1 - LV < C2 --> LV > C1 - C2.
hoistSub(ICmpInst::Predicate Pred,Value * VariantLHS,Value * InvariantRHS,ICmpInst & ICmp,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2614 static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
2615                      Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2616                      ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2617                      AssumptionCache *AC, DominatorTree *DT) {
2618   assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2619   assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2620 
2621   bool IsSigned = ICmpInst::isSigned(Pred);
2622 
2623   // Try to represent VariantLHS as sum of invariant and variant operands.
2624   using namespace PatternMatch;
2625   Value *VariantOp, *InvariantOp;
2626   if (IsSigned &&
2627       !match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp))))
2628     return false;
2629   if (!IsSigned &&
2630       !match(VariantLHS, m_NUWSub(m_Value(VariantOp), m_Value(InvariantOp))))
2631     return false;
2632 
2633   bool VariantSubtracted = false;
2634   // LHS itself is a loop-variant, try to represent it in the form:
2635   // "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
2636   // the variant operand goes with minus, we use a slightly different scheme.
2637   if (L.isLoopInvariant(VariantOp)) {
2638     std::swap(VariantOp, InvariantOp);
2639     VariantSubtracted = true;
2640     Pred = ICmpInst::getSwappedPredicate(Pred);
2641   }
2642   if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2643     return false;
2644 
2645   // In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
2646   // freely move values from left side of inequality to right side (just as in
2647   // normal linear arithmetics). Overflows make things much more complicated, so
2648   // we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
2649   // "C1 - C2" does not overflow.
2650   auto &DL = L.getHeader()->getDataLayout();
2651   SimplifyQuery SQ(DL, DT, AC, &ICmp);
2652   if (VariantSubtracted && IsSigned) {
2653     // C1 - LV < C2 --> LV > C1 - C2
2654     if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, SQ) !=
2655         llvm::OverflowResult::NeverOverflows)
2656       return false;
2657   } else if (VariantSubtracted && !IsSigned) {
2658     // C1 - LV < C2 --> LV > C1 - C2
2659     if (computeOverflowForUnsignedSub(InvariantOp, InvariantRHS, SQ) !=
2660         llvm::OverflowResult::NeverOverflows)
2661       return false;
2662   } else if (!VariantSubtracted && IsSigned) {
2663     // LV - C1 < C2 --> LV < C1 + C2
2664     if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, SQ) !=
2665         llvm::OverflowResult::NeverOverflows)
2666       return false;
2667   } else { // !VariantSubtracted && !IsSigned
2668     // LV - C1 < C2 --> LV < C1 + C2
2669     if (computeOverflowForUnsignedAdd(InvariantOp, InvariantRHS, SQ) !=
2670         llvm::OverflowResult::NeverOverflows)
2671       return false;
2672   }
2673   auto *Preheader = L.getLoopPreheader();
2674   assert(Preheader && "Loop is not in simplify form?");
2675   IRBuilder<> Builder(Preheader->getTerminator());
2676   Value *NewCmpOp =
2677       VariantSubtracted
2678           ? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op",
2679                               /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned)
2680           : Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op",
2681                               /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned);
2682   ICmp.setPredicate(Pred);
2683   ICmp.setOperand(0, VariantOp);
2684   ICmp.setOperand(1, NewCmpOp);
2685 
2686   Instruction &DeadI = cast<Instruction>(*VariantLHS);
2687   salvageDebugInfo(DeadI);
2688   eraseInstruction(DeadI, SafetyInfo, MSSAU);
2689   return true;
2690 }
2691 
2692 /// Reassociate and hoist add/sub expressions.
hoistAddSub(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2693 static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2694                         MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2695                         DominatorTree *DT) {
2696   using namespace PatternMatch;
2697   CmpPredicate Pred;
2698   Value *LHS, *RHS;
2699   if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
2700     return false;
2701 
2702   // Put variant operand to LHS position.
2703   if (L.isLoopInvariant(LHS)) {
2704     std::swap(LHS, RHS);
2705     Pred = ICmpInst::getSwappedPredicate(Pred);
2706   }
2707   // We want to delete the initial operation after reassociation, so only do it
2708   // if it has no other uses.
2709   if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse())
2710     return false;
2711 
2712   // TODO: We could go with smarter context, taking common dominator of all I's
2713   // users instead of I itself.
2714   if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
2715     return true;
2716 
2717   if (hoistSub(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
2718     return true;
2719 
2720   return false;
2721 }
2722 
isReassociableOp(Instruction * I,unsigned IntOpcode,unsigned FPOpcode)2723 static bool isReassociableOp(Instruction *I, unsigned IntOpcode,
2724                              unsigned FPOpcode) {
2725   if (I->getOpcode() == IntOpcode)
2726     return true;
2727   if (I->getOpcode() == FPOpcode && I->hasAllowReassoc() &&
2728       I->hasNoSignedZeros())
2729     return true;
2730   return false;
2731 }
2732 
2733 /// Try to reassociate expressions like ((A1 * B1) + (A2 * B2) + ...) * C where
2734 /// A1, A2, ... and C are loop invariants into expressions like
2735 /// ((A1 * C * B1) + (A2 * C * B2) + ...) and hoist the (A1 * C), (A2 * C), ...
2736 /// invariant expressions. This functions returns true only if any hoisting has
2737 /// actually occured.
hoistMulAddAssociation(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2738 static bool hoistMulAddAssociation(Instruction &I, Loop &L,
2739                                    ICFLoopSafetyInfo &SafetyInfo,
2740                                    MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2741                                    DominatorTree *DT) {
2742   if (!isReassociableOp(&I, Instruction::Mul, Instruction::FMul))
2743     return false;
2744   Value *VariantOp = I.getOperand(0);
2745   Value *InvariantOp = I.getOperand(1);
2746   if (L.isLoopInvariant(VariantOp))
2747     std::swap(VariantOp, InvariantOp);
2748   if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2749     return false;
2750   Value *Factor = InvariantOp;
2751 
2752   // First, we need to make sure we should do the transformation.
2753   SmallVector<Use *> Changes;
2754   SmallVector<BinaryOperator *> Adds;
2755   SmallVector<BinaryOperator *> Worklist;
2756   if (BinaryOperator *VariantBinOp = dyn_cast<BinaryOperator>(VariantOp))
2757     Worklist.push_back(VariantBinOp);
2758   while (!Worklist.empty()) {
2759     BinaryOperator *BO = Worklist.pop_back_val();
2760     if (!BO->hasOneUse())
2761       return false;
2762     if (isReassociableOp(BO, Instruction::Add, Instruction::FAdd) &&
2763         isa<BinaryOperator>(BO->getOperand(0)) &&
2764         isa<BinaryOperator>(BO->getOperand(1))) {
2765       Worklist.push_back(cast<BinaryOperator>(BO->getOperand(0)));
2766       Worklist.push_back(cast<BinaryOperator>(BO->getOperand(1)));
2767       Adds.push_back(BO);
2768       continue;
2769     }
2770     if (!isReassociableOp(BO, Instruction::Mul, Instruction::FMul) ||
2771         L.isLoopInvariant(BO))
2772       return false;
2773     Use &U0 = BO->getOperandUse(0);
2774     Use &U1 = BO->getOperandUse(1);
2775     if (L.isLoopInvariant(U0))
2776       Changes.push_back(&U0);
2777     else if (L.isLoopInvariant(U1))
2778       Changes.push_back(&U1);
2779     else
2780       return false;
2781     unsigned Limit = I.getType()->isIntOrIntVectorTy()
2782                          ? IntAssociationUpperLimit
2783                          : FPAssociationUpperLimit;
2784     if (Changes.size() > Limit)
2785       return false;
2786   }
2787   if (Changes.empty())
2788     return false;
2789 
2790   // Drop the poison flags for any adds we looked through.
2791   if (I.getType()->isIntOrIntVectorTy()) {
2792     for (auto *Add : Adds)
2793       Add->dropPoisonGeneratingFlags();
2794   }
2795 
2796   // We know we should do it so let's do the transformation.
2797   auto *Preheader = L.getLoopPreheader();
2798   assert(Preheader && "Loop is not in simplify form?");
2799   IRBuilder<> Builder(Preheader->getTerminator());
2800   for (auto *U : Changes) {
2801     assert(L.isLoopInvariant(U->get()));
2802     auto *Ins = cast<BinaryOperator>(U->getUser());
2803     Value *Mul;
2804     if (I.getType()->isIntOrIntVectorTy()) {
2805       Mul = Builder.CreateMul(U->get(), Factor, "factor.op.mul");
2806       // Drop the poison flags on the original multiply.
2807       Ins->dropPoisonGeneratingFlags();
2808     } else
2809       Mul = Builder.CreateFMulFMF(U->get(), Factor, Ins, "factor.op.fmul");
2810 
2811     // Rewrite the reassociable instruction.
2812     unsigned OpIdx = U->getOperandNo();
2813     auto *LHS = OpIdx == 0 ? Mul : Ins->getOperand(0);
2814     auto *RHS = OpIdx == 1 ? Mul : Ins->getOperand(1);
2815     auto *NewBO =
2816         BinaryOperator::Create(Ins->getOpcode(), LHS, RHS,
2817                                Ins->getName() + ".reass", Ins->getIterator());
2818     NewBO->setDebugLoc(DebugLoc::getDropped());
2819     NewBO->copyIRFlags(Ins);
2820     if (VariantOp == Ins)
2821       VariantOp = NewBO;
2822     Ins->replaceAllUsesWith(NewBO);
2823     eraseInstruction(*Ins, SafetyInfo, MSSAU);
2824   }
2825 
2826   I.replaceAllUsesWith(VariantOp);
2827   eraseInstruction(I, SafetyInfo, MSSAU);
2828   return true;
2829 }
2830 
2831 /// Reassociate associative binary expressions of the form
2832 ///
2833 /// 1. "(LV op C1) op C2" ==> "LV op (C1 op C2)"
2834 /// 2. "(C1 op LV) op C2" ==> "LV op (C1 op C2)"
2835 /// 3. "C2 op (C1 op LV)" ==> "LV op (C1 op C2)"
2836 /// 4. "C2 op (LV op C1)" ==> "LV op (C1 op C2)"
2837 ///
2838 /// where op is an associative BinOp, LV is a loop variant, and C1 and C2 are
2839 /// loop invariants that we want to hoist, noting that associativity implies
2840 /// commutativity.
hoistBOAssociation(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2841 static bool hoistBOAssociation(Instruction &I, Loop &L,
2842                                ICFLoopSafetyInfo &SafetyInfo,
2843                                MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2844                                DominatorTree *DT) {
2845   auto *BO = dyn_cast<BinaryOperator>(&I);
2846   if (!BO || !BO->isAssociative())
2847     return false;
2848 
2849   Instruction::BinaryOps Opcode = BO->getOpcode();
2850   bool LVInRHS = L.isLoopInvariant(BO->getOperand(0));
2851   auto *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(LVInRHS));
2852   if (!BO0 || BO0->getOpcode() != Opcode || !BO0->isAssociative() ||
2853       BO0->hasNUsesOrMore(3))
2854     return false;
2855 
2856   Value *LV = BO0->getOperand(0);
2857   Value *C1 = BO0->getOperand(1);
2858   Value *C2 = BO->getOperand(!LVInRHS);
2859 
2860   assert(BO->isCommutative() && BO0->isCommutative() &&
2861          "Associativity implies commutativity");
2862   if (L.isLoopInvariant(LV) && !L.isLoopInvariant(C1))
2863     std::swap(LV, C1);
2864   if (L.isLoopInvariant(LV) || !L.isLoopInvariant(C1) || !L.isLoopInvariant(C2))
2865     return false;
2866 
2867   auto *Preheader = L.getLoopPreheader();
2868   assert(Preheader && "Loop is not in simplify form?");
2869 
2870   IRBuilder<> Builder(Preheader->getTerminator());
2871   auto *Inv = Builder.CreateBinOp(Opcode, C1, C2, "invariant.op");
2872 
2873   auto *NewBO = BinaryOperator::Create(
2874       Opcode, LV, Inv, BO->getName() + ".reass", BO->getIterator());
2875   NewBO->setDebugLoc(DebugLoc::getDropped());
2876 
2877   if (Opcode == Instruction::FAdd || Opcode == Instruction::FMul) {
2878     // Intersect FMF flags for FADD and FMUL.
2879     FastMathFlags Intersect = BO->getFastMathFlags() & BO0->getFastMathFlags();
2880     if (auto *I = dyn_cast<Instruction>(Inv))
2881       I->setFastMathFlags(Intersect);
2882     NewBO->setFastMathFlags(Intersect);
2883   } else {
2884     OverflowTracking Flags;
2885     Flags.AllKnownNonNegative = false;
2886     Flags.AllKnownNonZero = false;
2887     Flags.mergeFlags(*BO);
2888     Flags.mergeFlags(*BO0);
2889     // If `Inv` was not constant-folded, a new Instruction has been created.
2890     if (auto *I = dyn_cast<Instruction>(Inv))
2891       Flags.applyFlags(*I);
2892     Flags.applyFlags(*NewBO);
2893   }
2894 
2895   BO->replaceAllUsesWith(NewBO);
2896   eraseInstruction(*BO, SafetyInfo, MSSAU);
2897 
2898   // (LV op C1) might not be erased if it has more uses than the one we just
2899   // replaced.
2900   if (BO0->use_empty()) {
2901     salvageDebugInfo(*BO0);
2902     eraseInstruction(*BO0, SafetyInfo, MSSAU);
2903   }
2904 
2905   return true;
2906 }
2907 
hoistArithmetics(Instruction & I,Loop & L,ICFLoopSafetyInfo & SafetyInfo,MemorySSAUpdater & MSSAU,AssumptionCache * AC,DominatorTree * DT)2908 static bool hoistArithmetics(Instruction &I, Loop &L,
2909                              ICFLoopSafetyInfo &SafetyInfo,
2910                              MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2911                              DominatorTree *DT) {
2912   // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
2913   // into (x < min(INV1, INV2)), and hoisting the invariant part of this
2914   // expression out of the loop.
2915   if (hoistMinMax(I, L, SafetyInfo, MSSAU)) {
2916     ++NumHoisted;
2917     ++NumMinMaxHoisted;
2918     return true;
2919   }
2920 
2921   // Try to hoist GEPs by reassociation.
2922   if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) {
2923     ++NumHoisted;
2924     ++NumGEPsHoisted;
2925     return true;
2926   }
2927 
2928   // Try to hoist add/sub's by reassociation.
2929   if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) {
2930     ++NumHoisted;
2931     ++NumAddSubHoisted;
2932     return true;
2933   }
2934 
2935   bool IsInt = I.getType()->isIntOrIntVectorTy();
2936   if (hoistMulAddAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
2937     ++NumHoisted;
2938     if (IsInt)
2939       ++NumIntAssociationsHoisted;
2940     else
2941       ++NumFPAssociationsHoisted;
2942     return true;
2943   }
2944 
2945   if (hoistBOAssociation(I, L, SafetyInfo, MSSAU, AC, DT)) {
2946     ++NumHoisted;
2947     ++NumBOAssociationsHoisted;
2948     return true;
2949   }
2950 
2951   return false;
2952 }
2953 
2954 /// Little predicate that returns true if the specified basic block is in
2955 /// a subloop of the current one, not the current one itself.
2956 ///
inSubLoop(BasicBlock * BB,Loop * CurLoop,LoopInfo * LI)2957 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2958   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2959   return LI->getLoopFor(BB) != CurLoop;
2960 }
2961