xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp (revision 2a9021898c4ee2154787da862c238cfeccd655df)
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible.  It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe.  This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // Hoisting operations out of loops is a canonicalization transform.  It
16 // enables and simplifies subsequent optimizations in the middle-end.
17 // Rematerialization of hoisted instructions to reduce register pressure is the
18 // responsibility of the back-end, which has more accurate information about
19 // register pressure and also handles other optimizations than LICM that
20 // increase live-ranges.
21 //
22 // This pass uses alias analysis for two purposes:
23 //
24 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
25 //     that a load or call inside of a loop never aliases anything stored to,
26 //     we can hoist it or sink it like any other instruction.
27 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
28 //     the loop, we try to move the store to happen AFTER the loop instead of
29 //     inside of the loop.  This can only happen if a few conditions are true:
30 //       A. The pointer stored through is loop invariant
31 //       B. There are no stores or loads in the loop which _may_ alias the
32 //          pointer.  There are no calls in the loop which mod/ref the pointer.
33 //     If these conditions are true, we can promote the loads and stores in the
34 //     loop of the pointer to use a temporary alloca'd variable.  We then use
35 //     the SSAUpdater to construct the appropriate SSA form for the value.
36 //
37 //===----------------------------------------------------------------------===//
38 
39 #include "llvm/Transforms/Scalar/LICM.h"
40 #include "llvm/ADT/PriorityWorklist.h"
41 #include "llvm/ADT/SetOperations.h"
42 #include "llvm/ADT/Statistic.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/AssumptionCache.h"
46 #include "llvm/Analysis/CaptureTracking.h"
47 #include "llvm/Analysis/ConstantFolding.h"
48 #include "llvm/Analysis/GuardUtils.h"
49 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
50 #include "llvm/Analysis/Loads.h"
51 #include "llvm/Analysis/LoopInfo.h"
52 #include "llvm/Analysis/LoopIterator.h"
53 #include "llvm/Analysis/LoopNestAnalysis.h"
54 #include "llvm/Analysis/LoopPass.h"
55 #include "llvm/Analysis/MemorySSA.h"
56 #include "llvm/Analysis/MemorySSAUpdater.h"
57 #include "llvm/Analysis/MustExecute.h"
58 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
59 #include "llvm/Analysis/ScalarEvolution.h"
60 #include "llvm/Analysis/TargetLibraryInfo.h"
61 #include "llvm/Analysis/TargetTransformInfo.h"
62 #include "llvm/Analysis/ValueTracking.h"
63 #include "llvm/IR/CFG.h"
64 #include "llvm/IR/Constants.h"
65 #include "llvm/IR/DataLayout.h"
66 #include "llvm/IR/DebugInfoMetadata.h"
67 #include "llvm/IR/DerivedTypes.h"
68 #include "llvm/IR/Dominators.h"
69 #include "llvm/IR/Instructions.h"
70 #include "llvm/IR/IntrinsicInst.h"
71 #include "llvm/IR/LLVMContext.h"
72 #include "llvm/IR/Metadata.h"
73 #include "llvm/IR/PatternMatch.h"
74 #include "llvm/IR/PredIteratorCache.h"
75 #include "llvm/InitializePasses.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/Debug.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Target/TargetOptions.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/SSAUpdater.h"
86 #include <algorithm>
87 #include <utility>
88 using namespace llvm;
89 
90 namespace llvm {
91 class LPMUpdater;
92 } // namespace llvm
93 
94 #define DEBUG_TYPE "licm"
95 
96 STATISTIC(NumCreatedBlocks, "Number of blocks created");
97 STATISTIC(NumClonedBranches, "Number of branches cloned");
98 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
99 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
100 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
101 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
102 STATISTIC(NumPromotionCandidates, "Number of promotion candidates");
103 STATISTIC(NumLoadPromoted, "Number of load-only promotions");
104 STATISTIC(NumLoadStorePromoted, "Number of load and store promotions");
105 
106 /// Memory promotion is enabled by default.
107 static cl::opt<bool>
108     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
109                      cl::desc("Disable memory promotion in LICM pass"));
110 
111 static cl::opt<bool> ControlFlowHoisting(
112     "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
113     cl::desc("Enable control flow (and PHI) hoisting in LICM"));
114 
115 static cl::opt<bool>
116     SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
117                  cl::desc("Force thread model single in LICM pass"));
118 
119 static cl::opt<uint32_t> MaxNumUsesTraversed(
120     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
121     cl::desc("Max num uses visited for identifying load "
122              "invariance in loop using invariant start (default = 8)"));
123 
124 // Experimental option to allow imprecision in LICM in pathological cases, in
125 // exchange for faster compile. This is to be removed if MemorySSA starts to
126 // address the same issue. LICM calls MemorySSAWalker's
127 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
128 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
129 // which may not be precise, since optimizeUses is capped. The result is
130 // correct, but we may not get as "far up" as possible to get which access is
131 // clobbering the one queried.
132 cl::opt<unsigned> llvm::SetLicmMssaOptCap(
133     "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
134     cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
135              "for faster compile. Caps the MemorySSA clobbering calls."));
136 
137 // Experimentally, memory promotion carries less importance than sinking and
138 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
139 // compile time.
140 cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
141     "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
142     cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
143              "effect. When MSSA in LICM is enabled, then this is the maximum "
144              "number of accesses allowed to be present in a loop in order to "
145              "enable memory promotion."));
146 
147 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
148 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
149                                   const LoopSafetyInfo *SafetyInfo,
150                                   TargetTransformInfo *TTI, bool &FreeInLoop,
151                                   bool LoopNestMode);
152 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
153                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
154                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
155                   OptimizationRemarkEmitter *ORE);
156 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
157                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
158                  MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE);
159 static bool isSafeToExecuteUnconditionally(
160     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
161     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
162     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
163     AssumptionCache *AC, bool AllowSpeculation);
164 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
165                                      Loop *CurLoop, Instruction &I,
166                                      SinkAndHoistLICMFlags &Flags);
167 static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
168                                       MemoryUse &MU);
169 static Instruction *cloneInstructionInExitBlock(
170     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
171     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
172 
173 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
174                              MemorySSAUpdater &MSSAU);
175 
176 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
177                                   ICFLoopSafetyInfo &SafetyInfo,
178                                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
179 
180 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
181                                 function_ref<void(Instruction *)> Fn);
182 using PointersAndHasReadsOutsideSet =
183     std::pair<SmallSetVector<Value *, 8>, bool>;
184 static SmallVector<PointersAndHasReadsOutsideSet, 0>
185 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
186 
187 namespace {
188 struct LoopInvariantCodeMotion {
189   bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
190                  AssumptionCache *AC, TargetLibraryInfo *TLI,
191                  TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
192                  OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
193 
194   LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
195                           unsigned LicmMssaNoAccForPromotionCap,
196                           bool LicmAllowSpeculation)
197       : LicmMssaOptCap(LicmMssaOptCap),
198         LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
199         LicmAllowSpeculation(LicmAllowSpeculation) {}
200 
201 private:
202   unsigned LicmMssaOptCap;
203   unsigned LicmMssaNoAccForPromotionCap;
204   bool LicmAllowSpeculation;
205 };
206 
207 struct LegacyLICMPass : public LoopPass {
208   static char ID; // Pass identification, replacement for typeid
209   LegacyLICMPass(
210       unsigned LicmMssaOptCap = SetLicmMssaOptCap,
211       unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
212       bool LicmAllowSpeculation = true)
213       : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
214                            LicmAllowSpeculation) {
215     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
216   }
217 
218   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
219     if (skipLoop(L))
220       return false;
221 
222     LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
223                       << L->getHeader()->getNameOrAsOperand() << "\n");
224 
225     Function *F = L->getHeader()->getParent();
226 
227     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
228     MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
229     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
230     // pass. Function analyses need to be preserved across loop transformations
231     // but ORE cannot be preserved (see comment before the pass definition).
232     OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
233     return LICM.runOnLoop(
234         L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
235         &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
236         &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
237         &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F),
238         &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(*F),
239         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F),
240         SE ? &SE->getSE() : nullptr, MSSA, &ORE);
241   }
242 
243   /// This transformation requires natural loop information & requires that
244   /// loop preheaders be inserted into the CFG...
245   ///
246   void getAnalysisUsage(AnalysisUsage &AU) const override {
247     AU.addPreserved<DominatorTreeWrapperPass>();
248     AU.addPreserved<LoopInfoWrapperPass>();
249     AU.addRequired<TargetLibraryInfoWrapperPass>();
250     AU.addRequired<MemorySSAWrapperPass>();
251     AU.addPreserved<MemorySSAWrapperPass>();
252     AU.addRequired<TargetTransformInfoWrapperPass>();
253     AU.addRequired<AssumptionCacheTracker>();
254     getLoopAnalysisUsage(AU);
255     LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
256     AU.addPreserved<LazyBlockFrequencyInfoPass>();
257     AU.addPreserved<LazyBranchProbabilityInfoPass>();
258   }
259 
260 private:
261   LoopInvariantCodeMotion LICM;
262 };
263 } // namespace
264 
265 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
266                                 LoopStandardAnalysisResults &AR, LPMUpdater &) {
267   if (!AR.MSSA)
268     report_fatal_error("LICM requires MemorySSA (loop-mssa)",
269                        /*GenCrashDiag*/false);
270 
271   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
272   // pass.  Function analyses need to be preserved across loop transformations
273   // but ORE cannot be preserved (see comment before the pass definition).
274   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
275 
276   LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
277                                Opts.AllowSpeculation);
278   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.AC, &AR.TLI, &AR.TTI,
279                       &AR.SE, AR.MSSA, &ORE))
280     return PreservedAnalyses::all();
281 
282   auto PA = getLoopPassPreservedAnalyses();
283 
284   PA.preserve<DominatorTreeAnalysis>();
285   PA.preserve<LoopAnalysis>();
286   PA.preserve<MemorySSAAnalysis>();
287 
288   return PA;
289 }
290 
291 void LICMPass::printPipeline(
292     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
293   static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
294       OS, MapClassName2PassName);
295 
296   OS << "<";
297   OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
298   OS << ">";
299 }
300 
301 PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
302                                  LoopStandardAnalysisResults &AR,
303                                  LPMUpdater &) {
304   if (!AR.MSSA)
305     report_fatal_error("LNICM requires MemorySSA (loop-mssa)",
306                        /*GenCrashDiag*/false);
307 
308   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
309   // pass.  Function analyses need to be preserved across loop transformations
310   // but ORE cannot be preserved (see comment before the pass definition).
311   OptimizationRemarkEmitter ORE(LN.getParent());
312 
313   LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
314                                Opts.AllowSpeculation);
315 
316   Loop &OutermostLoop = LN.getOutermostLoop();
317   bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, &AR.AC,
318                                 &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
319 
320   if (!Changed)
321     return PreservedAnalyses::all();
322 
323   auto PA = getLoopPassPreservedAnalyses();
324 
325   PA.preserve<DominatorTreeAnalysis>();
326   PA.preserve<LoopAnalysis>();
327   PA.preserve<MemorySSAAnalysis>();
328 
329   return PA;
330 }
331 
332 void LNICMPass::printPipeline(
333     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
334   static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
335       OS, MapClassName2PassName);
336 
337   OS << "<";
338   OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
339   OS << ">";
340 }
341 
342 char LegacyLICMPass::ID = 0;
343 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
344                       false, false)
345 INITIALIZE_PASS_DEPENDENCY(LoopPass)
346 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
347 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
348 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
349 INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
350 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
351                     false)
352 
353 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
354 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
355                            unsigned LicmMssaNoAccForPromotionCap,
356                            bool LicmAllowSpeculation) {
357   return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
358                             LicmAllowSpeculation);
359 }
360 
361 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
362                                                    MemorySSA *MSSA)
363     : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
364                             IsSink, L, MSSA) {}
365 
366 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
367     unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
368     Loop *L, MemorySSA *MSSA)
369     : LicmMssaOptCap(LicmMssaOptCap),
370       LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
371       IsSink(IsSink) {
372   assert(((L != nullptr) == (MSSA != nullptr)) &&
373          "Unexpected values for SinkAndHoistLICMFlags");
374   if (!MSSA)
375     return;
376 
377   unsigned AccessCapCount = 0;
378   for (auto *BB : L->getBlocks())
379     if (const auto *Accesses = MSSA->getBlockAccesses(BB))
380       for (const auto &MA : *Accesses) {
381         (void)MA;
382         ++AccessCapCount;
383         if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
384           NoOfMemAccTooLarge = true;
385           return;
386         }
387       }
388 }
389 
390 /// Hoist expressions out of the specified loop. Note, alias info for inner
391 /// loop is not preserved so it is not a good idea to run LICM multiple
392 /// times on one loop.
393 bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
394                                         DominatorTree *DT, AssumptionCache *AC,
395                                         TargetLibraryInfo *TLI,
396                                         TargetTransformInfo *TTI,
397                                         ScalarEvolution *SE, MemorySSA *MSSA,
398                                         OptimizationRemarkEmitter *ORE,
399                                         bool LoopNestMode) {
400   bool Changed = false;
401 
402   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
403   MSSA->ensureOptimizedUses();
404 
405   // If this loop has metadata indicating that LICM is not to be performed then
406   // just exit.
407   if (hasDisableLICMTransformsHint(L)) {
408     return false;
409   }
410 
411   // Don't sink stores from loops with coroutine suspend instructions.
412   // LICM would sink instructions into the default destination of
413   // the coroutine switch. The default destination of the switch is to
414   // handle the case where the coroutine is suspended, by which point the
415   // coroutine frame may have been destroyed. No instruction can be sunk there.
416   // FIXME: This would unfortunately hurt the performance of coroutines, however
417   // there is currently no general solution for this. Similar issues could also
418   // potentially happen in other passes where instructions are being moved
419   // across that edge.
420   bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
421     return llvm::any_of(*BB, [](Instruction &I) {
422       IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
423       return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
424     });
425   });
426 
427   MemorySSAUpdater MSSAU(MSSA);
428   SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
429                               /*IsSink=*/true, L, MSSA);
430 
431   // Get the preheader block to move instructions into...
432   BasicBlock *Preheader = L->getLoopPreheader();
433 
434   // Compute loop safety information.
435   ICFLoopSafetyInfo SafetyInfo;
436   SafetyInfo.computeLoopSafetyInfo(L);
437 
438   // We want to visit all of the instructions in this loop... that are not parts
439   // of our subloops (they have already had their invariants hoisted out of
440   // their loop, into this loop, so there is no need to process the BODIES of
441   // the subloops).
442   //
443   // Traverse the body of the loop in depth first order on the dominator tree so
444   // that we are guaranteed to see definitions before we see uses.  This allows
445   // us to sink instructions in one pass, without iteration.  After sinking
446   // instructions, we perform another pass to hoist them out of the loop.
447   if (L->hasDedicatedExits())
448     Changed |=
449         LoopNestMode
450             ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT,
451                                     TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
452             : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
453                          MSSAU, &SafetyInfo, Flags, ORE);
454   Flags.setIsSink(false);
455   if (Preheader)
456     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
457                            MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
458                            LicmAllowSpeculation);
459 
460   // Now that all loop invariants have been removed from the loop, promote any
461   // memory references to scalars that we can.
462   // Don't sink stores from loops without dedicated block exits. Exits
463   // containing indirect branches are not transformed by loop simplify,
464   // make sure we catch that. An additional load may be generated in the
465   // preheader for SSA updater, so also avoid sinking when no preheader
466   // is available.
467   if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
468       !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
469     // Figure out the loop exits and their insertion points
470     SmallVector<BasicBlock *, 8> ExitBlocks;
471     L->getUniqueExitBlocks(ExitBlocks);
472 
473     // We can't insert into a catchswitch.
474     bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
475       return isa<CatchSwitchInst>(Exit->getTerminator());
476     });
477 
478     if (!HasCatchSwitch) {
479       SmallVector<Instruction *, 8> InsertPts;
480       SmallVector<MemoryAccess *, 8> MSSAInsertPts;
481       InsertPts.reserve(ExitBlocks.size());
482       MSSAInsertPts.reserve(ExitBlocks.size());
483       for (BasicBlock *ExitBlock : ExitBlocks) {
484         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
485         MSSAInsertPts.push_back(nullptr);
486       }
487 
488       PredIteratorCache PIC;
489 
490       // Promoting one set of accesses may make the pointers for another set
491       // loop invariant, so run this in a loop.
492       bool Promoted = false;
493       bool LocalPromoted;
494       do {
495         LocalPromoted = false;
496         for (auto [PointerMustAliases, HasReadsOutsideSet] :
497              collectPromotionCandidates(MSSA, AA, L)) {
498           LocalPromoted |= promoteLoopAccessesToScalars(
499               PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
500               DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
501               LicmAllowSpeculation, HasReadsOutsideSet);
502         }
503         Promoted |= LocalPromoted;
504       } while (LocalPromoted);
505 
506       // Once we have promoted values across the loop body we have to
507       // recursively reform LCSSA as any nested loop may now have values defined
508       // within the loop used in the outer loop.
509       // FIXME: This is really heavy handed. It would be a bit better to use an
510       // SSAUpdater strategy during promotion that was LCSSA aware and reformed
511       // it as it went.
512       if (Promoted)
513         formLCSSARecursively(*L, *DT, LI, SE);
514 
515       Changed |= Promoted;
516     }
517   }
518 
519   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
520   // specifically moving instructions across the loop boundary and so it is
521   // especially in need of basic functional correctness checking here.
522   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
523   assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
524          "Parent loop not left in LCSSA form after LICM!");
525 
526   if (VerifyMemorySSA)
527     MSSA->verifyMemorySSA();
528 
529   if (Changed && SE)
530     SE->forgetLoopDispositions();
531   return Changed;
532 }
533 
534 /// Walk the specified region of the CFG (defined by all blocks dominated by
535 /// the specified block, and that are in the current loop) in reverse depth
536 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
537 /// definitions, allowing us to sink a loop body in one pass without iteration.
538 ///
539 bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
540                       DominatorTree *DT, TargetLibraryInfo *TLI,
541                       TargetTransformInfo *TTI, Loop *CurLoop,
542                       MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
543                       SinkAndHoistLICMFlags &Flags,
544                       OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
545 
546   // Verify inputs.
547   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
548          CurLoop != nullptr && SafetyInfo != nullptr &&
549          "Unexpected input to sinkRegion.");
550 
551   // We want to visit children before parents. We will enqueue all the parents
552   // before their children in the worklist and process the worklist in reverse
553   // order.
554   SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
555 
556   bool Changed = false;
557   for (DomTreeNode *DTN : reverse(Worklist)) {
558     BasicBlock *BB = DTN->getBlock();
559     // Only need to process the contents of this block if it is not part of a
560     // subloop (which would already have been processed).
561     if (inSubLoop(BB, CurLoop, LI))
562       continue;
563 
564     for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
565       Instruction &I = *--II;
566 
567       // The instruction is not used in the loop if it is dead.  In this case,
568       // we just delete it instead of sinking it.
569       if (isInstructionTriviallyDead(&I, TLI)) {
570         LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
571         salvageKnowledge(&I);
572         salvageDebugInfo(I);
573         ++II;
574         eraseInstruction(I, *SafetyInfo, MSSAU);
575         Changed = true;
576         continue;
577       }
578 
579       // Check to see if we can sink this instruction to the exit blocks
580       // of the loop.  We can do this if the all users of the instruction are
581       // outside of the loop.  In this case, it doesn't even matter if the
582       // operands of the instruction are loop invariant.
583       //
584       bool FreeInLoop = false;
585       bool LoopNestMode = OutermostLoop != nullptr;
586       if (!I.mayHaveSideEffects() &&
587           isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
588                                 SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
589           canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
590         if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
591           if (!FreeInLoop) {
592             ++II;
593             salvageDebugInfo(I);
594             eraseInstruction(I, *SafetyInfo, MSSAU);
595           }
596           Changed = true;
597         }
598       }
599     }
600   }
601   if (VerifyMemorySSA)
602     MSSAU.getMemorySSA()->verifyMemorySSA();
603   return Changed;
604 }
605 
606 bool llvm::sinkRegionForLoopNest(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
607                                  DominatorTree *DT, TargetLibraryInfo *TLI,
608                                  TargetTransformInfo *TTI, Loop *CurLoop,
609                                  MemorySSAUpdater &MSSAU,
610                                  ICFLoopSafetyInfo *SafetyInfo,
611                                  SinkAndHoistLICMFlags &Flags,
612                                  OptimizationRemarkEmitter *ORE) {
613 
614   bool Changed = false;
615   SmallPriorityWorklist<Loop *, 4> Worklist;
616   Worklist.insert(CurLoop);
617   appendLoopsToWorklist(*CurLoop, Worklist);
618   while (!Worklist.empty()) {
619     Loop *L = Worklist.pop_back_val();
620     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
621                           MSSAU, SafetyInfo, Flags, ORE, CurLoop);
622   }
623   return Changed;
624 }
625 
626 namespace {
627 // This is a helper class for hoistRegion to make it able to hoist control flow
628 // in order to be able to hoist phis. The way this works is that we initially
629 // start hoisting to the loop preheader, and when we see a loop invariant branch
630 // we make note of this. When we then come to hoist an instruction that's
631 // conditional on such a branch we duplicate the branch and the relevant control
632 // flow, then hoist the instruction into the block corresponding to its original
633 // block in the duplicated control flow.
634 class ControlFlowHoister {
635 private:
636   // Information about the loop we are hoisting from
637   LoopInfo *LI;
638   DominatorTree *DT;
639   Loop *CurLoop;
640   MemorySSAUpdater &MSSAU;
641 
642   // A map of blocks in the loop to the block their instructions will be hoisted
643   // to.
644   DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
645 
646   // The branches that we can hoist, mapped to the block that marks a
647   // convergence point of their control flow.
648   DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
649 
650 public:
651   ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
652                      MemorySSAUpdater &MSSAU)
653       : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
654 
655   void registerPossiblyHoistableBranch(BranchInst *BI) {
656     // We can only hoist conditional branches with loop invariant operands.
657     if (!ControlFlowHoisting || !BI->isConditional() ||
658         !CurLoop->hasLoopInvariantOperands(BI))
659       return;
660 
661     // The branch destinations need to be in the loop, and we don't gain
662     // anything by duplicating conditional branches with duplicate successors,
663     // as it's essentially the same as an unconditional branch.
664     BasicBlock *TrueDest = BI->getSuccessor(0);
665     BasicBlock *FalseDest = BI->getSuccessor(1);
666     if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
667         TrueDest == FalseDest)
668       return;
669 
670     // We can hoist BI if one branch destination is the successor of the other,
671     // or both have common successor which we check by seeing if the
672     // intersection of their successors is non-empty.
673     // TODO: This could be expanded to allowing branches where both ends
674     // eventually converge to a single block.
675     SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
676     TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
677     FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
678     BasicBlock *CommonSucc = nullptr;
679     if (TrueDestSucc.count(FalseDest)) {
680       CommonSucc = FalseDest;
681     } else if (FalseDestSucc.count(TrueDest)) {
682       CommonSucc = TrueDest;
683     } else {
684       set_intersect(TrueDestSucc, FalseDestSucc);
685       // If there's one common successor use that.
686       if (TrueDestSucc.size() == 1)
687         CommonSucc = *TrueDestSucc.begin();
688       // If there's more than one pick whichever appears first in the block list
689       // (we can't use the value returned by TrueDestSucc.begin() as it's
690       // unpredicatable which element gets returned).
691       else if (!TrueDestSucc.empty()) {
692         Function *F = TrueDest->getParent();
693         auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
694         auto It = llvm::find_if(*F, IsSucc);
695         assert(It != F->end() && "Could not find successor in function");
696         CommonSucc = &*It;
697       }
698     }
699     // The common successor has to be dominated by the branch, as otherwise
700     // there will be some other path to the successor that will not be
701     // controlled by this branch so any phi we hoist would be controlled by the
702     // wrong condition. This also takes care of avoiding hoisting of loop back
703     // edges.
704     // TODO: In some cases this could be relaxed if the successor is dominated
705     // by another block that's been hoisted and we can guarantee that the
706     // control flow has been replicated exactly.
707     if (CommonSucc && DT->dominates(BI, CommonSucc))
708       HoistableBranches[BI] = CommonSucc;
709   }
710 
711   bool canHoistPHI(PHINode *PN) {
712     // The phi must have loop invariant operands.
713     if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
714       return false;
715     // We can hoist phis if the block they are in is the target of hoistable
716     // branches which cover all of the predecessors of the block.
717     SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
718     BasicBlock *BB = PN->getParent();
719     for (BasicBlock *PredBB : predecessors(BB))
720       PredecessorBlocks.insert(PredBB);
721     // If we have less predecessor blocks than predecessors then the phi will
722     // have more than one incoming value for the same block which we can't
723     // handle.
724     // TODO: This could be handled be erasing some of the duplicate incoming
725     // values.
726     if (PredecessorBlocks.size() != pred_size(BB))
727       return false;
728     for (auto &Pair : HoistableBranches) {
729       if (Pair.second == BB) {
730         // Which blocks are predecessors via this branch depends on if the
731         // branch is triangle-like or diamond-like.
732         if (Pair.first->getSuccessor(0) == BB) {
733           PredecessorBlocks.erase(Pair.first->getParent());
734           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
735         } else if (Pair.first->getSuccessor(1) == BB) {
736           PredecessorBlocks.erase(Pair.first->getParent());
737           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
738         } else {
739           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
740           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
741         }
742       }
743     }
744     // PredecessorBlocks will now be empty if for every predecessor of BB we
745     // found a hoistable branch source.
746     return PredecessorBlocks.empty();
747   }
748 
749   BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
750     if (!ControlFlowHoisting)
751       return CurLoop->getLoopPreheader();
752     // If BB has already been hoisted, return that
753     if (HoistDestinationMap.count(BB))
754       return HoistDestinationMap[BB];
755 
756     // Check if this block is conditional based on a pending branch
757     auto HasBBAsSuccessor =
758         [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
759           return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
760                                        Pair.first->getSuccessor(1) == BB);
761         };
762     auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
763 
764     // If not involved in a pending branch, hoist to preheader
765     BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
766     if (It == HoistableBranches.end()) {
767       LLVM_DEBUG(dbgs() << "LICM using "
768                         << InitialPreheader->getNameOrAsOperand()
769                         << " as hoist destination for "
770                         << BB->getNameOrAsOperand() << "\n");
771       HoistDestinationMap[BB] = InitialPreheader;
772       return InitialPreheader;
773     }
774     BranchInst *BI = It->first;
775     assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
776                HoistableBranches.end() &&
777            "BB is expected to be the target of at most one branch");
778 
779     LLVMContext &C = BB->getContext();
780     BasicBlock *TrueDest = BI->getSuccessor(0);
781     BasicBlock *FalseDest = BI->getSuccessor(1);
782     BasicBlock *CommonSucc = HoistableBranches[BI];
783     BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
784 
785     // Create hoisted versions of blocks that currently don't have them
786     auto CreateHoistedBlock = [&](BasicBlock *Orig) {
787       if (HoistDestinationMap.count(Orig))
788         return HoistDestinationMap[Orig];
789       BasicBlock *New =
790           BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
791       HoistDestinationMap[Orig] = New;
792       DT->addNewBlock(New, HoistTarget);
793       if (CurLoop->getParentLoop())
794         CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
795       ++NumCreatedBlocks;
796       LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
797                         << " as hoist destination for " << Orig->getName()
798                         << "\n");
799       return New;
800     };
801     BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
802     BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
803     BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
804 
805     // Link up these blocks with branches.
806     if (!HoistCommonSucc->getTerminator()) {
807       // The new common successor we've generated will branch to whatever that
808       // hoist target branched to.
809       BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
810       assert(TargetSucc && "Expected hoist target to have a single successor");
811       HoistCommonSucc->moveBefore(TargetSucc);
812       BranchInst::Create(TargetSucc, HoistCommonSucc);
813     }
814     if (!HoistTrueDest->getTerminator()) {
815       HoistTrueDest->moveBefore(HoistCommonSucc);
816       BranchInst::Create(HoistCommonSucc, HoistTrueDest);
817     }
818     if (!HoistFalseDest->getTerminator()) {
819       HoistFalseDest->moveBefore(HoistCommonSucc);
820       BranchInst::Create(HoistCommonSucc, HoistFalseDest);
821     }
822 
823     // If BI is being cloned to what was originally the preheader then
824     // HoistCommonSucc will now be the new preheader.
825     if (HoistTarget == InitialPreheader) {
826       // Phis in the loop header now need to use the new preheader.
827       InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
828       MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
829           HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
830       // The new preheader dominates the loop header.
831       DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
832       DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
833       DT->changeImmediateDominator(HeaderNode, PreheaderNode);
834       // The preheader hoist destination is now the new preheader, with the
835       // exception of the hoist destination of this branch.
836       for (auto &Pair : HoistDestinationMap)
837         if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
838           Pair.second = HoistCommonSucc;
839     }
840 
841     // Now finally clone BI.
842     ReplaceInstWithInst(
843         HoistTarget->getTerminator(),
844         BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
845     ++NumClonedBranches;
846 
847     assert(CurLoop->getLoopPreheader() &&
848            "Hoisting blocks should not have destroyed preheader");
849     return HoistDestinationMap[BB];
850   }
851 };
852 } // namespace
853 
854 /// Walk the specified region of the CFG (defined by all blocks dominated by
855 /// the specified block, and that are in the current loop) in depth first
856 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
857 /// uses, allowing us to hoist a loop body in one pass without iteration.
858 ///
859 bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
860                        DominatorTree *DT, AssumptionCache *AC,
861                        TargetLibraryInfo *TLI, Loop *CurLoop,
862                        MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
863                        ICFLoopSafetyInfo *SafetyInfo,
864                        SinkAndHoistLICMFlags &Flags,
865                        OptimizationRemarkEmitter *ORE, bool LoopNestMode,
866                        bool AllowSpeculation) {
867   // Verify inputs.
868   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
869          CurLoop != nullptr && SafetyInfo != nullptr &&
870          "Unexpected input to hoistRegion.");
871 
872   ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
873 
874   // Keep track of instructions that have been hoisted, as they may need to be
875   // re-hoisted if they end up not dominating all of their uses.
876   SmallVector<Instruction *, 16> HoistedInstructions;
877 
878   // For PHI hoisting to work we need to hoist blocks before their successors.
879   // We can do this by iterating through the blocks in the loop in reverse
880   // post-order.
881   LoopBlocksRPO Worklist(CurLoop);
882   Worklist.perform(LI);
883   bool Changed = false;
884   for (BasicBlock *BB : Worklist) {
885     // Only need to process the contents of this block if it is not part of a
886     // subloop (which would already have been processed).
887     if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
888       continue;
889 
890     for (Instruction &I : llvm::make_early_inc_range(*BB)) {
891       // Try constant folding this instruction.  If all the operands are
892       // constants, it is technically hoistable, but it would be better to
893       // just fold it.
894       if (Constant *C = ConstantFoldInstruction(
895               &I, I.getModule()->getDataLayout(), TLI)) {
896         LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C
897                           << '\n');
898         // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
899         I.replaceAllUsesWith(C);
900         if (isInstructionTriviallyDead(&I, TLI))
901           eraseInstruction(I, *SafetyInfo, MSSAU);
902         Changed = true;
903         continue;
904       }
905 
906       // Try hoisting the instruction out to the preheader.  We can only do
907       // this if all of the operands of the instruction are loop invariant and
908       // if it is safe to hoist the instruction. We also check block frequency
909       // to make sure instruction only gets hoisted into colder blocks.
910       // TODO: It may be safe to hoist if we are hoisting to a conditional block
911       // and we have accurately duplicated the control flow from the loop header
912       // to that block.
913       if (CurLoop->hasLoopInvariantOperands(&I) &&
914           canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
915           isSafeToExecuteUnconditionally(
916               I, DT, TLI, CurLoop, SafetyInfo, ORE,
917               CurLoop->getLoopPreheader()->getTerminator(), AC,
918               AllowSpeculation)) {
919         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
920               MSSAU, SE, ORE);
921         HoistedInstructions.push_back(&I);
922         Changed = true;
923         continue;
924       }
925 
926       // Attempt to remove floating point division out of the loop by
927       // converting it to a reciprocal multiplication.
928       if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
929           CurLoop->isLoopInvariant(I.getOperand(1))) {
930         auto Divisor = I.getOperand(1);
931         auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
932         auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
933         ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
934         SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
935         ReciprocalDivisor->insertBefore(&I);
936 
937         auto Product =
938             BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
939         Product->setFastMathFlags(I.getFastMathFlags());
940         SafetyInfo->insertInstructionTo(Product, I.getParent());
941         Product->insertAfter(&I);
942         I.replaceAllUsesWith(Product);
943         eraseInstruction(I, *SafetyInfo, MSSAU);
944 
945         hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
946               SafetyInfo, MSSAU, SE, ORE);
947         HoistedInstructions.push_back(ReciprocalDivisor);
948         Changed = true;
949         continue;
950       }
951 
952       auto IsInvariantStart = [&](Instruction &I) {
953         using namespace PatternMatch;
954         return I.use_empty() &&
955                match(&I, m_Intrinsic<Intrinsic::invariant_start>());
956       };
957       auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
958         return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
959                SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
960       };
961       if ((IsInvariantStart(I) || isGuard(&I)) &&
962           CurLoop->hasLoopInvariantOperands(&I) &&
963           MustExecuteWithoutWritesBefore(I)) {
964         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
965               MSSAU, SE, ORE);
966         HoistedInstructions.push_back(&I);
967         Changed = true;
968         continue;
969       }
970 
971       if (PHINode *PN = dyn_cast<PHINode>(&I)) {
972         if (CFH.canHoistPHI(PN)) {
973           // Redirect incoming blocks first to ensure that we create hoisted
974           // versions of those blocks before we hoist the phi.
975           for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
976             PN->setIncomingBlock(
977                 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
978           hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
979                 MSSAU, SE, ORE);
980           assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
981           Changed = true;
982           continue;
983         }
984       }
985 
986       // Remember possibly hoistable branches so we can actually hoist them
987       // later if needed.
988       if (BranchInst *BI = dyn_cast<BranchInst>(&I))
989         CFH.registerPossiblyHoistableBranch(BI);
990     }
991   }
992 
993   // If we hoisted instructions to a conditional block they may not dominate
994   // their uses that weren't hoisted (such as phis where some operands are not
995   // loop invariant). If so make them unconditional by moving them to their
996   // immediate dominator. We iterate through the instructions in reverse order
997   // which ensures that when we rehoist an instruction we rehoist its operands,
998   // and also keep track of where in the block we are rehoisting to to make sure
999   // that we rehoist instructions before the instructions that use them.
1000   Instruction *HoistPoint = nullptr;
1001   if (ControlFlowHoisting) {
1002     for (Instruction *I : reverse(HoistedInstructions)) {
1003       if (!llvm::all_of(I->uses(),
1004                         [&](Use &U) { return DT->dominates(I, U); })) {
1005         BasicBlock *Dominator =
1006             DT->getNode(I->getParent())->getIDom()->getBlock();
1007         if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
1008           if (HoistPoint)
1009             assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
1010                    "New hoist point expected to dominate old hoist point");
1011           HoistPoint = Dominator->getTerminator();
1012         }
1013         LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1014                           << HoistPoint->getParent()->getNameOrAsOperand()
1015                           << ": " << *I << "\n");
1016         moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
1017         HoistPoint = I;
1018         Changed = true;
1019       }
1020     }
1021   }
1022   if (VerifyMemorySSA)
1023     MSSAU.getMemorySSA()->verifyMemorySSA();
1024 
1025     // Now that we've finished hoisting make sure that LI and DT are still
1026     // valid.
1027 #ifdef EXPENSIVE_CHECKS
1028   if (Changed) {
1029     assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1030            "Dominator tree verification failed");
1031     LI->verify(*DT);
1032   }
1033 #endif
1034 
1035   return Changed;
1036 }
1037 
1038 // Return true if LI is invariant within scope of the loop. LI is invariant if
1039 // CurLoop is dominated by an invariant.start representing the same memory
1040 // location and size as the memory location LI loads from, and also the
1041 // invariant.start has no uses.
1042 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
1043                                   Loop *CurLoop) {
1044   Value *Addr = LI->getOperand(0);
1045   const DataLayout &DL = LI->getModule()->getDataLayout();
1046   const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
1047 
1048   // It is not currently possible for clang to generate an invariant.start
1049   // intrinsic with scalable vector types because we don't support thread local
1050   // sizeless types and we don't permit sizeless types in structs or classes.
1051   // Furthermore, even if support is added for this in future the intrinsic
1052   // itself is defined to have a size of -1 for variable sized objects. This
1053   // makes it impossible to verify if the intrinsic envelops our region of
1054   // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1055   // types would have a -1 parameter, but the former is clearly double the size
1056   // of the latter.
1057   if (LocSizeInBits.isScalable())
1058     return false;
1059 
1060   // if the type is i8 addrspace(x)*, we know this is the type of
1061   // llvm.invariant.start operand
1062   auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
1063                                      LI->getPointerAddressSpace());
1064   unsigned BitcastsVisited = 0;
1065   // Look through bitcasts until we reach the i8* type (this is invariant.start
1066   // operand type).
1067   while (Addr->getType() != PtrInt8Ty) {
1068     auto *BC = dyn_cast<BitCastInst>(Addr);
1069     // Avoid traversing high number of bitcast uses.
1070     if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
1071       return false;
1072     Addr = BC->getOperand(0);
1073   }
1074   // If we've ended up at a global/constant, bail. We shouldn't be looking at
1075   // uselists for non-local Values in a loop pass.
1076   if (isa<Constant>(Addr))
1077     return false;
1078 
1079   unsigned UsesVisited = 0;
1080   // Traverse all uses of the load operand value, to see if invariant.start is
1081   // one of the uses, and whether it dominates the load instruction.
1082   for (auto *U : Addr->users()) {
1083     // Avoid traversing for Load operand with high number of users.
1084     if (++UsesVisited > MaxNumUsesTraversed)
1085       return false;
1086     IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
1087     // If there are escaping uses of invariant.start instruction, the load maybe
1088     // non-invariant.
1089     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1090         !II->use_empty())
1091       continue;
1092     ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
1093     // The intrinsic supports having a -1 argument for variable sized objects
1094     // so we should check for that here.
1095     if (InvariantSize->isNegative())
1096       continue;
1097     uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
1098     // Confirm the invariant.start location size contains the load operand size
1099     // in bits. Also, the invariant.start should dominate the load, and we
1100     // should not hoist the load out of a loop that contains this dominating
1101     // invariant.start.
1102     if (LocSizeInBits.getFixedValue() <= InvariantSizeInBits &&
1103         DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1104       return true;
1105   }
1106 
1107   return false;
1108 }
1109 
1110 namespace {
1111 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1112 /// sink a given instruction out of a loop.  Does not address legality
1113 /// concerns such as aliasing or speculation safety.
1114 bool isHoistableAndSinkableInst(Instruction &I) {
1115   // Only these instructions are hoistable/sinkable.
1116   return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1117           isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
1118           isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
1119           isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1120           isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1121           isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1122           isa<InsertValueInst>(I) || isa<FreezeInst>(I));
1123 }
1124 /// Return true if MSSA knows there are no MemoryDefs in the loop.
1125 bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) {
1126   for (auto *BB : L->getBlocks())
1127     if (MSSAU.getMemorySSA()->getBlockDefs(BB))
1128       return false;
1129   return true;
1130 }
1131 
1132 /// Return true if I is the only Instruction with a MemoryAccess in L.
1133 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1134                         const MemorySSAUpdater &MSSAU) {
1135   for (auto *BB : L->getBlocks())
1136     if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
1137       int NotAPhi = 0;
1138       for (const auto &Acc : *Accs) {
1139         if (isa<MemoryPhi>(&Acc))
1140           continue;
1141         const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1142         if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1143           return false;
1144       }
1145     }
1146   return true;
1147 }
1148 }
1149 
1150 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
1151                               Loop *CurLoop, MemorySSAUpdater &MSSAU,
1152                               bool TargetExecutesOncePerLoop,
1153                               SinkAndHoistLICMFlags &Flags,
1154                               OptimizationRemarkEmitter *ORE) {
1155   // If we don't understand the instruction, bail early.
1156   if (!isHoistableAndSinkableInst(I))
1157     return false;
1158 
1159   MemorySSA *MSSA = MSSAU.getMemorySSA();
1160   // Loads have extra constraints we have to verify before we can hoist them.
1161   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1162     if (!LI->isUnordered())
1163       return false; // Don't sink/hoist volatile or ordered atomic loads!
1164 
1165     // Loads from constant memory are always safe to move, even if they end up
1166     // in the same alias set as something that ends up being modified.
1167     if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0))))
1168       return true;
1169     if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1170       return true;
1171 
1172     if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1173       return false; // Don't risk duplicating unordered loads
1174 
1175     // This checks for an invariant.start dominating the load.
1176     if (isLoadInvariantInLoop(LI, DT, CurLoop))
1177       return true;
1178 
1179     bool Invalidated = pointerInvalidatedByLoop(
1180         MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, Flags);
1181     // Check loop-invariant address because this may also be a sinkable load
1182     // whose address is not necessarily loop-invariant.
1183     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1184       ORE->emit([&]() {
1185         return OptimizationRemarkMissed(
1186                    DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1187                << "failed to move load with loop-invariant address "
1188                   "because the loop may invalidate its value";
1189       });
1190 
1191     return !Invalidated;
1192   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1193     // Don't sink or hoist dbg info; it's legal, but not useful.
1194     if (isa<DbgInfoIntrinsic>(I))
1195       return false;
1196 
1197     // Don't sink calls which can throw.
1198     if (CI->mayThrow())
1199       return false;
1200 
1201     // Convergent attribute has been used on operations that involve
1202     // inter-thread communication which results are implicitly affected by the
1203     // enclosing control flows. It is not safe to hoist or sink such operations
1204     // across control flow.
1205     if (CI->isConvergent())
1206       return false;
1207 
1208     using namespace PatternMatch;
1209     if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1210       // Assumes don't actually alias anything or throw
1211       return true;
1212 
1213     if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
1214       // Widenable conditions don't actually alias anything or throw
1215       return true;
1216 
1217     // Handle simple cases by querying alias analysis.
1218     MemoryEffects Behavior = AA->getMemoryEffects(CI);
1219     if (Behavior.doesNotAccessMemory())
1220       return true;
1221     if (Behavior.onlyReadsMemory()) {
1222       // A readonly argmemonly function only reads from memory pointed to by
1223       // it's arguments with arbitrary offsets.  If we can prove there are no
1224       // writes to this memory in the loop, we can hoist or sink.
1225       if (Behavior.onlyAccessesArgPointees()) {
1226         // TODO: expand to writeable arguments
1227         for (Value *Op : CI->args())
1228           if (Op->getType()->isPointerTy() &&
1229               pointerInvalidatedByLoop(
1230                   MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
1231                   Flags))
1232             return false;
1233         return true;
1234       }
1235 
1236       // If this call only reads from memory and there are no writes to memory
1237       // in the loop, we can hoist or sink the call as appropriate.
1238       if (isReadOnly(MSSAU, CurLoop))
1239         return true;
1240     }
1241 
1242     // FIXME: This should use mod/ref information to see if we can hoist or
1243     // sink the call.
1244 
1245     return false;
1246   } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1247     // Fences alias (most) everything to provide ordering.  For the moment,
1248     // just give up if there are any other memory operations in the loop.
1249     return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1250   } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1251     if (!SI->isUnordered())
1252       return false; // Don't sink/hoist volatile or ordered atomic store!
1253 
1254     // We can only hoist a store that we can prove writes a value which is not
1255     // read or overwritten within the loop.  For those cases, we fallback to
1256     // load store promotion instead.  TODO: We can extend this to cases where
1257     // there is exactly one write to the location and that write dominates an
1258     // arbitrary number of reads in the loop.
1259     if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1260       return true;
1261     // If there are more accesses than the Promotion cap or no "quota" to
1262     // check clobber, then give up as we're not walking a list that long.
1263     if (Flags.tooManyMemoryAccesses() || Flags.tooManyClobberingCalls())
1264       return false;
1265     // If there are interfering Uses (i.e. their defining access is in the
1266     // loop), or ordered loads (stored as Defs!), don't move this store.
1267     // Could do better here, but this is conservatively correct.
1268     // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1269     // moving accesses. Can also extend to dominating uses.
1270     auto *SIMD = MSSA->getMemoryAccess(SI);
1271     for (auto *BB : CurLoop->getBlocks())
1272       if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1273         for (const auto &MA : *Accesses)
1274           if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1275             auto *MD = MU->getDefiningAccess();
1276             if (!MSSA->isLiveOnEntryDef(MD) &&
1277                 CurLoop->contains(MD->getBlock()))
1278               return false;
1279             // Disable hoisting past potentially interfering loads. Optimized
1280             // Uses may point to an access outside the loop, as getClobbering
1281             // checks the previous iteration when walking the backedge.
1282             // FIXME: More precise: no Uses that alias SI.
1283             if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
1284               return false;
1285           } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
1286             if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1287               (void)LI; // Silence warning.
1288               assert(!LI->isUnordered() && "Expected unordered load");
1289               return false;
1290             }
1291             // Any call, while it may not be clobbering SI, it may be a use.
1292             if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1293               // Check if the call may read from the memory location written
1294               // to by SI. Check CI's attributes and arguments; the number of
1295               // such checks performed is limited above by NoOfMemAccTooLarge.
1296               ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
1297               if (isModOrRefSet(MRI))
1298                 return false;
1299             }
1300           }
1301       }
1302     auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
1303     Flags.incrementClobberingCalls();
1304     // If there are no clobbering Defs in the loop, store is safe to hoist.
1305     return MSSA->isLiveOnEntryDef(Source) ||
1306            !CurLoop->contains(Source->getBlock());
1307   }
1308 
1309   assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1310 
1311   // We've established mechanical ability and aliasing, it's up to the caller
1312   // to check fault safety
1313   return true;
1314 }
1315 
1316 /// Returns true if a PHINode is a trivially replaceable with an
1317 /// Instruction.
1318 /// This is true when all incoming values are that instruction.
1319 /// This pattern occurs most often with LCSSA PHI nodes.
1320 ///
1321 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1322   for (const Value *IncValue : PN.incoming_values())
1323     if (IncValue != &I)
1324       return false;
1325 
1326   return true;
1327 }
1328 
1329 /// Return true if the instruction is free in the loop.
1330 static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
1331                          const TargetTransformInfo *TTI) {
1332   InstructionCost CostI =
1333       TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
1334 
1335   if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1336     if (CostI != TargetTransformInfo::TCC_Free)
1337       return false;
1338     // For a GEP, we cannot simply use getInstructionCost because currently
1339     // it optimistically assumes that a GEP will fold into addressing mode
1340     // regardless of its users.
1341     const BasicBlock *BB = GEP->getParent();
1342     for (const User *U : GEP->users()) {
1343       const Instruction *UI = cast<Instruction>(U);
1344       if (CurLoop->contains(UI) &&
1345           (BB != UI->getParent() ||
1346            (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1347         return false;
1348     }
1349     return true;
1350   }
1351 
1352   return CostI == TargetTransformInfo::TCC_Free;
1353 }
1354 
1355 /// Return true if the only users of this instruction are outside of
1356 /// the loop. If this is true, we can sink the instruction to the exit
1357 /// blocks of the loop.
1358 ///
1359 /// We also return true if the instruction could be folded away in lowering.
1360 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
1361 static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
1362                                   const LoopSafetyInfo *SafetyInfo,
1363                                   TargetTransformInfo *TTI, bool &FreeInLoop,
1364                                   bool LoopNestMode) {
1365   const auto &BlockColors = SafetyInfo->getBlockColors();
1366   bool IsFree = isFreeInLoop(I, CurLoop, TTI);
1367   for (const User *U : I.users()) {
1368     const Instruction *UI = cast<Instruction>(U);
1369     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1370       const BasicBlock *BB = PN->getParent();
1371       // We cannot sink uses in catchswitches.
1372       if (isa<CatchSwitchInst>(BB->getTerminator()))
1373         return false;
1374 
1375       // We need to sink a callsite to a unique funclet.  Avoid sinking if the
1376       // phi use is too muddled.
1377       if (isa<CallInst>(I))
1378         if (!BlockColors.empty() &&
1379             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1380           return false;
1381 
1382       if (LoopNestMode) {
1383         while (isa<PHINode>(UI) && UI->hasOneUser() &&
1384                UI->getNumOperands() == 1) {
1385           if (!CurLoop->contains(UI))
1386             break;
1387           UI = cast<Instruction>(UI->user_back());
1388         }
1389       }
1390     }
1391 
1392     if (CurLoop->contains(UI)) {
1393       if (IsFree) {
1394         FreeInLoop = true;
1395         continue;
1396       }
1397       return false;
1398     }
1399   }
1400   return true;
1401 }
1402 
1403 static Instruction *cloneInstructionInExitBlock(
1404     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1405     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
1406   Instruction *New;
1407   if (auto *CI = dyn_cast<CallInst>(&I)) {
1408     const auto &BlockColors = SafetyInfo->getBlockColors();
1409 
1410     // Sinking call-sites need to be handled differently from other
1411     // instructions.  The cloned call-site needs a funclet bundle operand
1412     // appropriate for its location in the CFG.
1413     SmallVector<OperandBundleDef, 1> OpBundles;
1414     for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1415          BundleIdx != BundleEnd; ++BundleIdx) {
1416       OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1417       if (Bundle.getTagID() == LLVMContext::OB_funclet)
1418         continue;
1419 
1420       OpBundles.emplace_back(Bundle);
1421     }
1422 
1423     if (!BlockColors.empty()) {
1424       const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1425       assert(CV.size() == 1 && "non-unique color for exit block!");
1426       BasicBlock *BBColor = CV.front();
1427       Instruction *EHPad = BBColor->getFirstNonPHI();
1428       if (EHPad->isEHPad())
1429         OpBundles.emplace_back("funclet", EHPad);
1430     }
1431 
1432     New = CallInst::Create(CI, OpBundles);
1433   } else {
1434     New = I.clone();
1435   }
1436 
1437   New->insertInto(&ExitBlock, ExitBlock.getFirstInsertionPt());
1438   if (!I.getName().empty())
1439     New->setName(I.getName() + ".le");
1440 
1441   if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
1442     // Create a new MemoryAccess and let MemorySSA set its defining access.
1443     MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
1444         New, nullptr, New->getParent(), MemorySSA::Beginning);
1445     if (NewMemAcc) {
1446       if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1447         MSSAU.insertDef(MemDef, /*RenameUses=*/true);
1448       else {
1449         auto *MemUse = cast<MemoryUse>(NewMemAcc);
1450         MSSAU.insertUse(MemUse, /*RenameUses=*/true);
1451       }
1452     }
1453   }
1454 
1455   // Build LCSSA PHI nodes for any in-loop operands (if legal).  Note that
1456   // this is particularly cheap because we can rip off the PHI node that we're
1457   // replacing for the number and blocks of the predecessors.
1458   // OPT: If this shows up in a profile, we can instead finish sinking all
1459   // invariant instructions, and then walk their operands to re-establish
1460   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1461   // sinking bottom-up.
1462   for (Use &Op : New->operands())
1463     if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
1464       auto *OInst = cast<Instruction>(Op.get());
1465       PHINode *OpPN =
1466         PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1467                         OInst->getName() + ".lcssa", &ExitBlock.front());
1468       for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1469         OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1470       Op = OpPN;
1471     }
1472   return New;
1473 }
1474 
1475 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
1476                              MemorySSAUpdater &MSSAU) {
1477   MSSAU.removeMemoryAccess(&I);
1478   SafetyInfo.removeInstruction(&I);
1479   I.eraseFromParent();
1480 }
1481 
1482 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
1483                                   ICFLoopSafetyInfo &SafetyInfo,
1484                                   MemorySSAUpdater &MSSAU,
1485                                   ScalarEvolution *SE) {
1486   SafetyInfo.removeInstruction(&I);
1487   SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1488   I.moveBefore(&Dest);
1489   if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1490           MSSAU.getMemorySSA()->getMemoryAccess(&I)))
1491     MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator);
1492   if (SE)
1493     SE->forgetValue(&I);
1494 }
1495 
1496 static Instruction *sinkThroughTriviallyReplaceablePHI(
1497     PHINode *TPN, Instruction *I, LoopInfo *LI,
1498     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
1499     const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1500     MemorySSAUpdater &MSSAU) {
1501   assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1502          "Expect only trivially replaceable PHI");
1503   BasicBlock *ExitBlock = TPN->getParent();
1504   Instruction *New;
1505   auto It = SunkCopies.find(ExitBlock);
1506   if (It != SunkCopies.end())
1507     New = It->second;
1508   else
1509     New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock(
1510         *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1511   return New;
1512 }
1513 
1514 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1515   BasicBlock *BB = PN->getParent();
1516   if (!BB->canSplitPredecessors())
1517     return false;
1518   // It's not impossible to split EHPad blocks, but if BlockColors already exist
1519   // it require updating BlockColors for all offspring blocks accordingly. By
1520   // skipping such corner case, we can make updating BlockColors after splitting
1521   // predecessor fairly simple.
1522   if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1523     return false;
1524   for (BasicBlock *BBPred : predecessors(BB)) {
1525     if (isa<IndirectBrInst>(BBPred->getTerminator()))
1526       return false;
1527   }
1528   return true;
1529 }
1530 
1531 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
1532                                         LoopInfo *LI, const Loop *CurLoop,
1533                                         LoopSafetyInfo *SafetyInfo,
1534                                         MemorySSAUpdater *MSSAU) {
1535 #ifndef NDEBUG
1536   SmallVector<BasicBlock *, 32> ExitBlocks;
1537   CurLoop->getUniqueExitBlocks(ExitBlocks);
1538   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1539                                              ExitBlocks.end());
1540 #endif
1541   BasicBlock *ExitBB = PN->getParent();
1542   assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1543 
1544   // Split predecessors of the loop exit to make instructions in the loop are
1545   // exposed to exit blocks through trivially replaceable PHIs while keeping the
1546   // loop in the canonical form where each predecessor of each exit block should
1547   // be contained within the loop. For example, this will convert the loop below
1548   // from
1549   //
1550   // LB1:
1551   //   %v1 =
1552   //   br %LE, %LB2
1553   // LB2:
1554   //   %v2 =
1555   //   br %LE, %LB1
1556   // LE:
1557   //   %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1558   //
1559   // to
1560   //
1561   // LB1:
1562   //   %v1 =
1563   //   br %LE.split, %LB2
1564   // LB2:
1565   //   %v2 =
1566   //   br %LE.split2, %LB1
1567   // LE.split:
1568   //   %p1 = phi [%v1, %LB1]  <-- trivially replaceable
1569   //   br %LE
1570   // LE.split2:
1571   //   %p2 = phi [%v2, %LB2]  <-- trivially replaceable
1572   //   br %LE
1573   // LE:
1574   //   %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1575   //
1576   const auto &BlockColors = SafetyInfo->getBlockColors();
1577   SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1578   while (!PredBBs.empty()) {
1579     BasicBlock *PredBB = *PredBBs.begin();
1580     assert(CurLoop->contains(PredBB) &&
1581            "Expect all predecessors are in the loop");
1582     if (PN->getBasicBlockIndex(PredBB) >= 0) {
1583       BasicBlock *NewPred = SplitBlockPredecessors(
1584           ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1585       // Since we do not allow splitting EH-block with BlockColors in
1586       // canSplitPredecessors(), we can simply assign predecessor's color to
1587       // the new block.
1588       if (!BlockColors.empty())
1589         // Grab a reference to the ColorVector to be inserted before getting the
1590         // reference to the vector we are copying because inserting the new
1591         // element in BlockColors might cause the map to be reallocated.
1592         SafetyInfo->copyColors(NewPred, PredBB);
1593     }
1594     PredBBs.remove(PredBB);
1595   }
1596 }
1597 
1598 /// When an instruction is found to only be used outside of the loop, this
1599 /// function moves it to the exit blocks and patches up SSA form as needed.
1600 /// This method is guaranteed to remove the original instruction from its
1601 /// position, and may either delete it or move it to outside of the loop.
1602 ///
1603 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1604                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1605                  MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE) {
1606   bool Changed = false;
1607   LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1608 
1609   // Iterate over users to be ready for actual sinking. Replace users via
1610   // unreachable blocks with undef and make all user PHIs trivially replaceable.
1611   SmallPtrSet<Instruction *, 8> VisitedUsers;
1612   for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1613     auto *User = cast<Instruction>(*UI);
1614     Use &U = UI.getUse();
1615     ++UI;
1616 
1617     if (VisitedUsers.count(User) || CurLoop->contains(User))
1618       continue;
1619 
1620     if (!DT->isReachableFromEntry(User->getParent())) {
1621       U = PoisonValue::get(I.getType());
1622       Changed = true;
1623       continue;
1624     }
1625 
1626     // The user must be a PHI node.
1627     PHINode *PN = cast<PHINode>(User);
1628 
1629     // Surprisingly, instructions can be used outside of loops without any
1630     // exits.  This can only happen in PHI nodes if the incoming block is
1631     // unreachable.
1632     BasicBlock *BB = PN->getIncomingBlock(U);
1633     if (!DT->isReachableFromEntry(BB)) {
1634       U = PoisonValue::get(I.getType());
1635       Changed = true;
1636       continue;
1637     }
1638 
1639     VisitedUsers.insert(PN);
1640     if (isTriviallyReplaceablePHI(*PN, I))
1641       continue;
1642 
1643     if (!canSplitPredecessors(PN, SafetyInfo))
1644       return Changed;
1645 
1646     // Split predecessors of the PHI so that we can make users trivially
1647     // replaceable.
1648     splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU);
1649 
1650     // Should rebuild the iterators, as they may be invalidated by
1651     // splitPredecessorsOfLoopExit().
1652     UI = I.user_begin();
1653     UE = I.user_end();
1654   }
1655 
1656   if (VisitedUsers.empty())
1657     return Changed;
1658 
1659   ORE->emit([&]() {
1660     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1661            << "sinking " << ore::NV("Inst", &I);
1662   });
1663   if (isa<LoadInst>(I))
1664     ++NumMovedLoads;
1665   else if (isa<CallInst>(I))
1666     ++NumMovedCalls;
1667   ++NumSunk;
1668 
1669 #ifndef NDEBUG
1670   SmallVector<BasicBlock *, 32> ExitBlocks;
1671   CurLoop->getUniqueExitBlocks(ExitBlocks);
1672   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1673                                              ExitBlocks.end());
1674 #endif
1675 
1676   // Clones of this instruction. Don't create more than one per exit block!
1677   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
1678 
1679   // If this instruction is only used outside of the loop, then all users are
1680   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1681   // the instruction.
1682   // First check if I is worth sinking for all uses. Sink only when it is worth
1683   // across all uses.
1684   SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1685   for (auto *UI : Users) {
1686     auto *User = cast<Instruction>(UI);
1687 
1688     if (CurLoop->contains(User))
1689       continue;
1690 
1691     PHINode *PN = cast<PHINode>(User);
1692     assert(ExitBlockSet.count(PN->getParent()) &&
1693            "The LCSSA PHI is not in an exit block!");
1694 
1695     // The PHI must be trivially replaceable.
1696     Instruction *New = sinkThroughTriviallyReplaceablePHI(
1697         PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1698     PN->replaceAllUsesWith(New);
1699     eraseInstruction(*PN, *SafetyInfo, MSSAU);
1700     Changed = true;
1701   }
1702   return Changed;
1703 }
1704 
1705 /// When an instruction is found to only use loop invariant operands that
1706 /// is safe to hoist, this instruction is called to do the dirty work.
1707 ///
1708 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1709                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1710                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1711                   OptimizationRemarkEmitter *ORE) {
1712   LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1713                     << I << "\n");
1714   ORE->emit([&]() {
1715     return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1716                                                          << ore::NV("Inst", &I);
1717   });
1718 
1719   // Metadata can be dependent on conditions we are hoisting above.
1720   // Conservatively strip all metadata on the instruction unless we were
1721   // guaranteed to execute I if we entered the loop, in which case the metadata
1722   // is valid in the loop preheader.
1723   // Similarly, If I is a call and it is not guaranteed to execute in the loop,
1724   // then moving to the preheader means we should strip attributes on the call
1725   // that can cause UB since we may be hoisting above conditions that allowed
1726   // inferring those attributes. They may not be valid at the preheader.
1727   if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) &&
1728       // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1729       // time in isGuaranteedToExecute if we don't actually have anything to
1730       // drop.  It is a compile time optimization, not required for correctness.
1731       !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1732     I.dropUndefImplyingAttrsAndUnknownMetadata();
1733 
1734   if (isa<PHINode>(I))
1735     // Move the new node to the end of the phi list in the destination block.
1736     moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
1737   else
1738     // Move the new node to the destination block, before its terminator.
1739     moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
1740 
1741   I.updateLocationAfterHoist();
1742 
1743   if (isa<LoadInst>(I))
1744     ++NumMovedLoads;
1745   else if (isa<CallInst>(I))
1746     ++NumMovedCalls;
1747   ++NumHoisted;
1748 }
1749 
1750 /// Only sink or hoist an instruction if it is not a trapping instruction,
1751 /// or if the instruction is known not to trap when moved to the preheader.
1752 /// or if it is a trapping instruction and is guaranteed to execute.
1753 static bool isSafeToExecuteUnconditionally(
1754     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
1755     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
1756     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
1757     AssumptionCache *AC, bool AllowSpeculation) {
1758   if (AllowSpeculation &&
1759       isSafeToSpeculativelyExecute(&Inst, CtxI, AC, DT, TLI))
1760     return true;
1761 
1762   bool GuaranteedToExecute =
1763       SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1764 
1765   if (!GuaranteedToExecute) {
1766     auto *LI = dyn_cast<LoadInst>(&Inst);
1767     if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1768       ORE->emit([&]() {
1769         return OptimizationRemarkMissed(
1770                    DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1771                << "failed to hoist load with loop-invariant address "
1772                   "because load is conditionally executed";
1773       });
1774   }
1775 
1776   return GuaranteedToExecute;
1777 }
1778 
1779 namespace {
1780 class LoopPromoter : public LoadAndStorePromoter {
1781   Value *SomePtr; // Designated pointer to store to.
1782   SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1783   SmallVectorImpl<Instruction *> &LoopInsertPts;
1784   SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1785   PredIteratorCache &PredCache;
1786   MemorySSAUpdater &MSSAU;
1787   LoopInfo &LI;
1788   DebugLoc DL;
1789   Align Alignment;
1790   bool UnorderedAtomic;
1791   AAMDNodes AATags;
1792   ICFLoopSafetyInfo &SafetyInfo;
1793   bool CanInsertStoresInExitBlocks;
1794   ArrayRef<const Instruction *> Uses;
1795 
1796   // We're about to add a use of V in a loop exit block.  Insert an LCSSA phi
1797   // (if legal) if doing so would add an out-of-loop use to an instruction
1798   // defined in-loop.
1799   Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1800     if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB))
1801       return V;
1802 
1803     Instruction *I = cast<Instruction>(V);
1804     // We need to create an LCSSA PHI node for the incoming value and
1805     // store that.
1806     PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1807                                   I->getName() + ".lcssa", &BB->front());
1808     for (BasicBlock *Pred : PredCache.get(BB))
1809       PN->addIncoming(I, Pred);
1810     return PN;
1811   }
1812 
1813 public:
1814   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1815                SmallVectorImpl<BasicBlock *> &LEB,
1816                SmallVectorImpl<Instruction *> &LIP,
1817                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
1818                MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
1819                Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1820                ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1821       : LoadAndStorePromoter(Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
1822         LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
1823         LI(li), DL(std::move(dl)), Alignment(Alignment),
1824         UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1825         SafetyInfo(SafetyInfo),
1826         CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses(Insts) {}
1827 
1828   void insertStoresInLoopExitBlocks() {
1829     // Insert stores after in the loop exit blocks.  Each exit block gets a
1830     // store of the live-out values that feed them.  Since we've already told
1831     // the SSA updater about the defs in the loop and the preheader
1832     // definition, it is all set and we can start using it.
1833     DIAssignID *NewID = nullptr;
1834     for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1835       BasicBlock *ExitBlock = LoopExitBlocks[i];
1836       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1837       LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1838       Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1839       Instruction *InsertPos = LoopInsertPts[i];
1840       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1841       if (UnorderedAtomic)
1842         NewSI->setOrdering(AtomicOrdering::Unordered);
1843       NewSI->setAlignment(Alignment);
1844       NewSI->setDebugLoc(DL);
1845       // Attach DIAssignID metadata to the new store, generating it on the
1846       // first loop iteration.
1847       if (i == 0) {
1848         // NewSI will have its DIAssignID set here if there are any stores in
1849         // Uses with a DIAssignID attachment. This merged ID will then be
1850         // attached to the other inserted stores (in the branch below).
1851         NewSI->mergeDIAssignID(Uses);
1852         NewID = cast_or_null<DIAssignID>(
1853             NewSI->getMetadata(LLVMContext::MD_DIAssignID));
1854       } else {
1855         // Attach the DIAssignID (or nullptr) merged from Uses in the branch
1856         // above.
1857         NewSI->setMetadata(LLVMContext::MD_DIAssignID, NewID);
1858       }
1859 
1860       if (AATags)
1861         NewSI->setAAMetadata(AATags);
1862 
1863       MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1864       MemoryAccess *NewMemAcc;
1865       if (!MSSAInsertPoint) {
1866         NewMemAcc = MSSAU.createMemoryAccessInBB(
1867             NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1868       } else {
1869         NewMemAcc =
1870             MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1871       }
1872       MSSAInsertPts[i] = NewMemAcc;
1873       MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true);
1874       // FIXME: true for safety, false may still be correct.
1875     }
1876   }
1877 
1878   void doExtraRewritesBeforeFinalDeletion() override {
1879     if (CanInsertStoresInExitBlocks)
1880       insertStoresInLoopExitBlocks();
1881   }
1882 
1883   void instructionDeleted(Instruction *I) const override {
1884     SafetyInfo.removeInstruction(I);
1885     MSSAU.removeMemoryAccess(I);
1886   }
1887 
1888   bool shouldDelete(Instruction *I) const override {
1889     if (isa<StoreInst>(I))
1890       return CanInsertStoresInExitBlocks;
1891     return true;
1892   }
1893 };
1894 
1895 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
1896                                  DominatorTree *DT) {
1897   // We can perform the captured-before check against any instruction in the
1898   // loop header, as the loop header is reachable from any instruction inside
1899   // the loop.
1900   // TODO: ReturnCaptures=true shouldn't be necessary here.
1901   return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true,
1902                                      /* StoreCaptures */ true,
1903                                      L->getHeader()->getTerminator(), DT);
1904 }
1905 
1906 /// Return true if we can prove that a caller cannot inspect the object if an
1907 /// unwind occurs inside the loop.
1908 bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
1909                                 DominatorTree *DT) {
1910   bool RequiresNoCaptureBeforeUnwind;
1911   if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
1912     return false;
1913 
1914   return !RequiresNoCaptureBeforeUnwind ||
1915          isNotCapturedBeforeOrInLoop(Object, L, DT);
1916 }
1917 
1918 bool isWritableObject(const Value *Object) {
1919   // TODO: Alloca might not be writable after its lifetime ends.
1920   // See https://github.com/llvm/llvm-project/issues/51838.
1921   if (isa<AllocaInst>(Object))
1922     return true;
1923 
1924   // TODO: Also handle sret.
1925   if (auto *A = dyn_cast<Argument>(Object))
1926     return A->hasByValAttr();
1927 
1928   if (auto *G = dyn_cast<GlobalVariable>(Object))
1929     return !G->isConstant();
1930 
1931   // TODO: Noalias has nothing to do with writability, this should check for
1932   // an allocator function.
1933   return isNoAliasCall(Object);
1934 }
1935 
1936 bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
1937                          TargetTransformInfo *TTI) {
1938   // The object must be function-local to start with, and then not captured
1939   // before/in the loop.
1940   return (isIdentifiedFunctionLocal(Object) &&
1941           isNotCapturedBeforeOrInLoop(Object, L, DT)) ||
1942          (TTI->isSingleThreaded() || SingleThread);
1943 }
1944 
1945 } // namespace
1946 
1947 /// Try to promote memory values to scalars by sinking stores out of the
1948 /// loop and moving loads to before the loop.  We do this by looping over
1949 /// the stores in the loop, looking for stores to Must pointers which are
1950 /// loop invariant.
1951 ///
1952 bool llvm::promoteLoopAccessesToScalars(
1953     const SmallSetVector<Value *, 8> &PointerMustAliases,
1954     SmallVectorImpl<BasicBlock *> &ExitBlocks,
1955     SmallVectorImpl<Instruction *> &InsertPts,
1956     SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1957     LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1958     const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
1959     MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1960     OptimizationRemarkEmitter *ORE, bool AllowSpeculation,
1961     bool HasReadsOutsideSet) {
1962   // Verify inputs.
1963   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1964          SafetyInfo != nullptr &&
1965          "Unexpected Input to promoteLoopAccessesToScalars");
1966 
1967   LLVM_DEBUG({
1968     dbgs() << "Trying to promote set of must-aliased pointers:\n";
1969     for (Value *Ptr : PointerMustAliases)
1970       dbgs() << "  " << *Ptr << "\n";
1971   });
1972   ++NumPromotionCandidates;
1973 
1974   Value *SomePtr = *PointerMustAliases.begin();
1975   BasicBlock *Preheader = CurLoop->getLoopPreheader();
1976 
1977   // It is not safe to promote a load/store from the loop if the load/store is
1978   // conditional.  For example, turning:
1979   //
1980   //    for () { if (c) *P += 1; }
1981   //
1982   // into:
1983   //
1984   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
1985   //
1986   // is not safe, because *P may only be valid to access if 'c' is true.
1987   //
1988   // The safety property divides into two parts:
1989   // p1) The memory may not be dereferenceable on entry to the loop.  In this
1990   //    case, we can't insert the required load in the preheader.
1991   // p2) The memory model does not allow us to insert a store along any dynamic
1992   //    path which did not originally have one.
1993   //
1994   // If at least one store is guaranteed to execute, both properties are
1995   // satisfied, and promotion is legal.
1996   //
1997   // This, however, is not a necessary condition. Even if no store/load is
1998   // guaranteed to execute, we can still establish these properties.
1999   // We can establish (p1) by proving that hoisting the load into the preheader
2000   // is safe (i.e. proving dereferenceability on all paths through the loop). We
2001   // can use any access within the alias set to prove dereferenceability,
2002   // since they're all must alias.
2003   //
2004   // There are two ways establish (p2):
2005   // a) Prove the location is thread-local. In this case the memory model
2006   // requirement does not apply, and stores are safe to insert.
2007   // b) Prove a store dominates every exit block. In this case, if an exit
2008   // blocks is reached, the original dynamic path would have taken us through
2009   // the store, so inserting a store into the exit block is safe. Note that this
2010   // is different from the store being guaranteed to execute. For instance,
2011   // if an exception is thrown on the first iteration of the loop, the original
2012   // store is never executed, but the exit blocks are not executed either.
2013 
2014   bool DereferenceableInPH = false;
2015   bool StoreIsGuanteedToExecute = false;
2016   bool FoundLoadToPromote = false;
2017   // Goes from Unknown to either Safe or Unsafe, but can't switch between them.
2018   enum {
2019     StoreSafe,
2020     StoreUnsafe,
2021     StoreSafetyUnknown,
2022   } StoreSafety = StoreSafetyUnknown;
2023 
2024   SmallVector<Instruction *, 64> LoopUses;
2025 
2026   // We start with an alignment of one and try to find instructions that allow
2027   // us to prove better alignment.
2028   Align Alignment;
2029   // Keep track of which types of access we see
2030   bool SawUnorderedAtomic = false;
2031   bool SawNotAtomic = false;
2032   AAMDNodes AATags;
2033 
2034   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
2035 
2036   // If there are reads outside the promoted set, then promoting stores is
2037   // definitely not safe.
2038   if (HasReadsOutsideSet)
2039     StoreSafety = StoreUnsafe;
2040 
2041   if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) {
2042     // If a loop can throw, we have to insert a store along each unwind edge.
2043     // That said, we can't actually make the unwind edge explicit. Therefore,
2044     // we have to prove that the store is dead along the unwind edge.  We do
2045     // this by proving that the caller can't have a reference to the object
2046     // after return and thus can't possibly load from the object.
2047     Value *Object = getUnderlyingObject(SomePtr);
2048     if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
2049       StoreSafety = StoreUnsafe;
2050   }
2051 
2052   // Check that all accesses to pointers in the alias set use the same type.
2053   // We cannot (yet) promote a memory location that is loaded and stored in
2054   // different sizes.  While we are at it, collect alignment and AA info.
2055   Type *AccessTy = nullptr;
2056   for (Value *ASIV : PointerMustAliases) {
2057     for (Use &U : ASIV->uses()) {
2058       // Ignore instructions that are outside the loop.
2059       Instruction *UI = dyn_cast<Instruction>(U.getUser());
2060       if (!UI || !CurLoop->contains(UI))
2061         continue;
2062 
2063       // If there is an non-load/store instruction in the loop, we can't promote
2064       // it.
2065       if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
2066         if (!Load->isUnordered())
2067           return false;
2068 
2069         SawUnorderedAtomic |= Load->isAtomic();
2070         SawNotAtomic |= !Load->isAtomic();
2071         FoundLoadToPromote = true;
2072 
2073         Align InstAlignment = Load->getAlign();
2074 
2075         // Note that proving a load safe to speculate requires proving
2076         // sufficient alignment at the target location.  Proving it guaranteed
2077         // to execute does as well.  Thus we can increase our guaranteed
2078         // alignment as well.
2079         if (!DereferenceableInPH || (InstAlignment > Alignment))
2080           if (isSafeToExecuteUnconditionally(
2081                   *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2082                   Preheader->getTerminator(), AC, AllowSpeculation)) {
2083             DereferenceableInPH = true;
2084             Alignment = std::max(Alignment, InstAlignment);
2085           }
2086       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
2087         // Stores *of* the pointer are not interesting, only stores *to* the
2088         // pointer.
2089         if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
2090           continue;
2091         if (!Store->isUnordered())
2092           return false;
2093 
2094         SawUnorderedAtomic |= Store->isAtomic();
2095         SawNotAtomic |= !Store->isAtomic();
2096 
2097         // If the store is guaranteed to execute, both properties are satisfied.
2098         // We may want to check if a store is guaranteed to execute even if we
2099         // already know that promotion is safe, since it may have higher
2100         // alignment than any other guaranteed stores, in which case we can
2101         // raise the alignment on the promoted store.
2102         Align InstAlignment = Store->getAlign();
2103         bool GuaranteedToExecute =
2104             SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop);
2105         StoreIsGuanteedToExecute |= GuaranteedToExecute;
2106         if (GuaranteedToExecute) {
2107           DereferenceableInPH = true;
2108           if (StoreSafety == StoreSafetyUnknown)
2109             StoreSafety = StoreSafe;
2110           Alignment = std::max(Alignment, InstAlignment);
2111         }
2112 
2113         // If a store dominates all exit blocks, it is safe to sink.
2114         // As explained above, if an exit block was executed, a dominating
2115         // store must have been executed at least once, so we are not
2116         // introducing stores on paths that did not have them.
2117         // Note that this only looks at explicit exit blocks. If we ever
2118         // start sinking stores into unwind edges (see above), this will break.
2119         if (StoreSafety == StoreSafetyUnknown &&
2120             llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2121               return DT->dominates(Store->getParent(), Exit);
2122             }))
2123           StoreSafety = StoreSafe;
2124 
2125         // If the store is not guaranteed to execute, we may still get
2126         // deref info through it.
2127         if (!DereferenceableInPH) {
2128           DereferenceableInPH = isDereferenceableAndAlignedPointer(
2129               Store->getPointerOperand(), Store->getValueOperand()->getType(),
2130               Store->getAlign(), MDL, Preheader->getTerminator(), AC, DT, TLI);
2131         }
2132       } else
2133         continue; // Not a load or store.
2134 
2135       if (!AccessTy)
2136         AccessTy = getLoadStoreType(UI);
2137       else if (AccessTy != getLoadStoreType(UI))
2138         return false;
2139 
2140       // Merge the AA tags.
2141       if (LoopUses.empty()) {
2142         // On the first load/store, just take its AA tags.
2143         AATags = UI->getAAMetadata();
2144       } else if (AATags) {
2145         AATags = AATags.merge(UI->getAAMetadata());
2146       }
2147 
2148       LoopUses.push_back(UI);
2149     }
2150   }
2151 
2152   // If we found both an unordered atomic instruction and a non-atomic memory
2153   // access, bail.  We can't blindly promote non-atomic to atomic since we
2154   // might not be able to lower the result.  We can't downgrade since that
2155   // would violate memory model.  Also, align 0 is an error for atomics.
2156   if (SawUnorderedAtomic && SawNotAtomic)
2157     return false;
2158 
2159   // If we're inserting an atomic load in the preheader, we must be able to
2160   // lower it.  We're only guaranteed to be able to lower naturally aligned
2161   // atomics.
2162   if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
2163     return false;
2164 
2165   // If we couldn't prove we can hoist the load, bail.
2166   if (!DereferenceableInPH) {
2167     LLVM_DEBUG(dbgs() << "Not promoting: Not dereferenceable in preheader\n");
2168     return false;
2169   }
2170 
2171   // We know we can hoist the load, but don't have a guaranteed store.
2172   // Check whether the location is writable and thread-local. If it is, then we
2173   // can insert stores along paths which originally didn't have them without
2174   // violating the memory model.
2175   if (StoreSafety == StoreSafetyUnknown) {
2176     Value *Object = getUnderlyingObject(SomePtr);
2177     if (isWritableObject(Object) &&
2178         isThreadLocalObject(Object, CurLoop, DT, TTI))
2179       StoreSafety = StoreSafe;
2180   }
2181 
2182   // If we've still failed to prove we can sink the store, hoist the load
2183   // only, if possible.
2184   if (StoreSafety != StoreSafe && !FoundLoadToPromote)
2185     // If we cannot hoist the load either, give up.
2186     return false;
2187 
2188   // Lets do the promotion!
2189   if (StoreSafety == StoreSafe) {
2190     LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2191                       << '\n');
2192     ++NumLoadStorePromoted;
2193   } else {
2194     LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2195                       << '\n');
2196     ++NumLoadPromoted;
2197   }
2198 
2199   ORE->emit([&]() {
2200     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2201                               LoopUses[0])
2202            << "Moving accesses to memory location out of the loop";
2203   });
2204 
2205   // Look at all the loop uses, and try to merge their locations.
2206   std::vector<const DILocation *> LoopUsesLocs;
2207   for (auto *U : LoopUses)
2208     LoopUsesLocs.push_back(U->getDebugLoc().get());
2209   auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
2210 
2211   // We use the SSAUpdater interface to insert phi nodes as required.
2212   SmallVector<PHINode *, 16> NewPHIs;
2213   SSAUpdater SSA(&NewPHIs);
2214   LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
2215                         MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment,
2216                         SawUnorderedAtomic, AATags, *SafetyInfo,
2217                         StoreSafety == StoreSafe);
2218 
2219   // Set up the preheader to have a definition of the value.  It is the live-out
2220   // value from the preheader that uses in the loop will use.
2221   LoadInst *PreheaderLoad = nullptr;
2222   if (FoundLoadToPromote || !StoreIsGuanteedToExecute) {
2223     PreheaderLoad =
2224         new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
2225                      Preheader->getTerminator());
2226     if (SawUnorderedAtomic)
2227       PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2228     PreheaderLoad->setAlignment(Alignment);
2229     PreheaderLoad->setDebugLoc(DebugLoc());
2230     if (AATags)
2231       PreheaderLoad->setAAMetadata(AATags);
2232 
2233     MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
2234         PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2235     MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2236     MSSAU.insertUse(NewMemUse, /*RenameUses=*/true);
2237     SSA.AddAvailableValue(Preheader, PreheaderLoad);
2238   } else {
2239     SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy));
2240   }
2241 
2242   if (VerifyMemorySSA)
2243     MSSAU.getMemorySSA()->verifyMemorySSA();
2244   // Rewrite all the loads in the loop and remember all the definitions from
2245   // stores in the loop.
2246   Promoter.run(LoopUses);
2247 
2248   if (VerifyMemorySSA)
2249     MSSAU.getMemorySSA()->verifyMemorySSA();
2250   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2251   if (PreheaderLoad && PreheaderLoad->use_empty())
2252     eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
2253 
2254   return true;
2255 }
2256 
2257 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2258                                 function_ref<void(Instruction *)> Fn) {
2259   for (const BasicBlock *BB : L->blocks())
2260     if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2261       for (const auto &Access : *Accesses)
2262         if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
2263           Fn(MUD->getMemoryInst());
2264 }
2265 
2266 // The bool indicates whether there might be reads outside the set, in which
2267 // case only loads may be promoted.
2268 static SmallVector<PointersAndHasReadsOutsideSet, 0>
2269 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
2270   BatchAAResults BatchAA(*AA);
2271   AliasSetTracker AST(BatchAA);
2272 
2273   auto IsPotentiallyPromotable = [L](const Instruction *I) {
2274     if (const auto *SI = dyn_cast<StoreInst>(I))
2275       return L->isLoopInvariant(SI->getPointerOperand());
2276     if (const auto *LI = dyn_cast<LoadInst>(I))
2277       return L->isLoopInvariant(LI->getPointerOperand());
2278     return false;
2279   };
2280 
2281   // Populate AST with potentially promotable accesses.
2282   SmallPtrSet<Value *, 16> AttemptingPromotion;
2283   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2284     if (IsPotentiallyPromotable(I)) {
2285       AttemptingPromotion.insert(I);
2286       AST.add(I);
2287     }
2288   });
2289 
2290   // We're only interested in must-alias sets that contain a mod.
2291   SmallVector<PointerIntPair<const AliasSet *, 1, bool>, 8> Sets;
2292   for (AliasSet &AS : AST)
2293     if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2294       Sets.push_back({&AS, false});
2295 
2296   if (Sets.empty())
2297     return {}; // Nothing to promote...
2298 
2299   // Discard any sets for which there is an aliasing non-promotable access.
2300   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2301     if (AttemptingPromotion.contains(I))
2302       return;
2303 
2304     llvm::erase_if(Sets, [&](PointerIntPair<const AliasSet *, 1, bool> &Pair) {
2305       ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(I, BatchAA);
2306       // Cannot promote if there are writes outside the set.
2307       if (isModSet(MR))
2308         return true;
2309       if (isRefSet(MR)) {
2310         // Remember reads outside the set.
2311         Pair.setInt(true);
2312         // If this is a mod-only set and there are reads outside the set,
2313         // we will not be able to promote, so bail out early.
2314         return !Pair.getPointer()->isRef();
2315       }
2316       return false;
2317     });
2318   });
2319 
2320   SmallVector<std::pair<SmallSetVector<Value *, 8>, bool>, 0> Result;
2321   for (auto [Set, HasReadsOutsideSet] : Sets) {
2322     SmallSetVector<Value *, 8> PointerMustAliases;
2323     for (const auto &ASI : *Set)
2324       PointerMustAliases.insert(ASI.getValue());
2325     Result.emplace_back(std::move(PointerMustAliases), HasReadsOutsideSet);
2326   }
2327 
2328   return Result;
2329 }
2330 
2331 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
2332                                      Loop *CurLoop, Instruction &I,
2333                                      SinkAndHoistLICMFlags &Flags) {
2334   // For hoisting, use the walker to determine safety
2335   if (!Flags.getIsSink()) {
2336     MemoryAccess *Source;
2337     // See declaration of SetLicmMssaOptCap for usage details.
2338     if (Flags.tooManyClobberingCalls())
2339       Source = MU->getDefiningAccess();
2340     else {
2341       Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
2342       Flags.incrementClobberingCalls();
2343     }
2344     return !MSSA->isLiveOnEntryDef(Source) &&
2345            CurLoop->contains(Source->getBlock());
2346   }
2347 
2348   // For sinking, we'd need to check all Defs below this use. The getClobbering
2349   // call will look on the backedge of the loop, but will check aliasing with
2350   // the instructions on the previous iteration.
2351   // For example:
2352   // for (i ... )
2353   //   load a[i] ( Use (LoE)
2354   //   store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2355   //   i++;
2356   // The load sees no clobbering inside the loop, as the backedge alias check
2357   // does phi translation, and will check aliasing against store a[i-1].
2358   // However sinking the load outside the loop, below the store is incorrect.
2359 
2360   // For now, only sink if there are no Defs in the loop, and the existing ones
2361   // precede the use and are in the same block.
2362   // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2363   // needs PostDominatorTreeAnalysis.
2364   // FIXME: More precise: no Defs that alias this Use.
2365   if (Flags.tooManyMemoryAccesses())
2366     return true;
2367   for (auto *BB : CurLoop->getBlocks())
2368     if (pointerInvalidatedByBlock(*BB, *MSSA, *MU))
2369       return true;
2370   // When sinking, the source block may not be part of the loop so check it.
2371   if (!CurLoop->contains(&I))
2372     return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU);
2373 
2374   return false;
2375 }
2376 
2377 bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
2378   if (const auto *Accesses = MSSA.getBlockDefs(&BB))
2379     for (const auto &MA : *Accesses)
2380       if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2381         if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
2382           return true;
2383   return false;
2384 }
2385 
2386 /// Little predicate that returns true if the specified basic block is in
2387 /// a subloop of the current one, not the current one itself.
2388 ///
2389 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2390   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2391   return LI->getLoopFor(BB) != CurLoop;
2392 }
2393