xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp (revision 8311bc5f17dec348749f763b82dfe2737bc53cd7)
1 //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs loop invariant code motion, attempting to remove as much
10 // code from the body of a loop as possible.  It does this by either hoisting
11 // code into the preheader block, or by sinking code to the exit blocks if it is
12 // safe.  This pass also promotes must-aliased memory locations in the loop to
13 // live in registers, thus hoisting and sinking "invariant" loads and stores.
14 //
15 // Hoisting operations out of loops is a canonicalization transform.  It
16 // enables and simplifies subsequent optimizations in the middle-end.
17 // Rematerialization of hoisted instructions to reduce register pressure is the
18 // responsibility of the back-end, which has more accurate information about
19 // register pressure and also handles other optimizations than LICM that
20 // increase live-ranges.
21 //
22 // This pass uses alias analysis for two purposes:
23 //
24 //  1. Moving loop invariant loads and calls out of loops.  If we can determine
25 //     that a load or call inside of a loop never aliases anything stored to,
26 //     we can hoist it or sink it like any other instruction.
27 //  2. Scalar Promotion of Memory - If there is a store instruction inside of
28 //     the loop, we try to move the store to happen AFTER the loop instead of
29 //     inside of the loop.  This can only happen if a few conditions are true:
30 //       A. The pointer stored through is loop invariant
31 //       B. There are no stores or loads in the loop which _may_ alias the
32 //          pointer.  There are no calls in the loop which mod/ref the pointer.
33 //     If these conditions are true, we can promote the loads and stores in the
34 //     loop of the pointer to use a temporary alloca'd variable.  We then use
35 //     the SSAUpdater to construct the appropriate SSA form for the value.
36 //
37 //===----------------------------------------------------------------------===//
38 
39 #include "llvm/Transforms/Scalar/LICM.h"
40 #include "llvm/ADT/PriorityWorklist.h"
41 #include "llvm/ADT/SetOperations.h"
42 #include "llvm/ADT/Statistic.h"
43 #include "llvm/Analysis/AliasAnalysis.h"
44 #include "llvm/Analysis/AliasSetTracker.h"
45 #include "llvm/Analysis/AssumptionCache.h"
46 #include "llvm/Analysis/CaptureTracking.h"
47 #include "llvm/Analysis/GuardUtils.h"
48 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
49 #include "llvm/Analysis/Loads.h"
50 #include "llvm/Analysis/LoopInfo.h"
51 #include "llvm/Analysis/LoopIterator.h"
52 #include "llvm/Analysis/LoopNestAnalysis.h"
53 #include "llvm/Analysis/LoopPass.h"
54 #include "llvm/Analysis/MemorySSA.h"
55 #include "llvm/Analysis/MemorySSAUpdater.h"
56 #include "llvm/Analysis/MustExecute.h"
57 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
58 #include "llvm/Analysis/ScalarEvolution.h"
59 #include "llvm/Analysis/TargetLibraryInfo.h"
60 #include "llvm/Analysis/TargetTransformInfo.h"
61 #include "llvm/Analysis/ValueTracking.h"
62 #include "llvm/IR/CFG.h"
63 #include "llvm/IR/Constants.h"
64 #include "llvm/IR/DataLayout.h"
65 #include "llvm/IR/DebugInfoMetadata.h"
66 #include "llvm/IR/DerivedTypes.h"
67 #include "llvm/IR/Dominators.h"
68 #include "llvm/IR/Instructions.h"
69 #include "llvm/IR/IntrinsicInst.h"
70 #include "llvm/IR/IRBuilder.h"
71 #include "llvm/IR/LLVMContext.h"
72 #include "llvm/IR/Metadata.h"
73 #include "llvm/IR/PatternMatch.h"
74 #include "llvm/IR/PredIteratorCache.h"
75 #include "llvm/InitializePasses.h"
76 #include "llvm/Support/CommandLine.h"
77 #include "llvm/Support/Debug.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Target/TargetOptions.h"
80 #include "llvm/Transforms/Scalar.h"
81 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
82 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
83 #include "llvm/Transforms/Utils/Local.h"
84 #include "llvm/Transforms/Utils/LoopUtils.h"
85 #include "llvm/Transforms/Utils/SSAUpdater.h"
86 #include <algorithm>
87 #include <utility>
88 using namespace llvm;
89 
90 namespace llvm {
91 class LPMUpdater;
92 } // namespace llvm
93 
94 #define DEBUG_TYPE "licm"
95 
96 STATISTIC(NumCreatedBlocks, "Number of blocks created");
97 STATISTIC(NumClonedBranches, "Number of branches cloned");
98 STATISTIC(NumSunk, "Number of instructions sunk out of loop");
99 STATISTIC(NumHoisted, "Number of instructions hoisted out of loop");
100 STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
101 STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
102 STATISTIC(NumPromotionCandidates, "Number of promotion candidates");
103 STATISTIC(NumLoadPromoted, "Number of load-only promotions");
104 STATISTIC(NumLoadStorePromoted, "Number of load and store promotions");
105 STATISTIC(NumMinMaxHoisted,
106           "Number of min/max expressions hoisted out of the loop");
107 STATISTIC(NumGEPsHoisted,
108           "Number of geps reassociated and hoisted out of the loop");
109 STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
110                             "and hoisted out of the loop");
111 
112 /// Memory promotion is enabled by default.
113 static cl::opt<bool>
114     DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
115                      cl::desc("Disable memory promotion in LICM pass"));
116 
117 static cl::opt<bool> ControlFlowHoisting(
118     "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
119     cl::desc("Enable control flow (and PHI) hoisting in LICM"));
120 
121 static cl::opt<bool>
122     SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
123                  cl::desc("Force thread model single in LICM pass"));
124 
125 static cl::opt<uint32_t> MaxNumUsesTraversed(
126     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
127     cl::desc("Max num uses visited for identifying load "
128              "invariance in loop using invariant start (default = 8)"));
129 
130 // Experimental option to allow imprecision in LICM in pathological cases, in
131 // exchange for faster compile. This is to be removed if MemorySSA starts to
132 // address the same issue. LICM calls MemorySSAWalker's
133 // getClobberingMemoryAccess, up to the value of the Cap, getting perfect
134 // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess,
135 // which may not be precise, since optimizeUses is capped. The result is
136 // correct, but we may not get as "far up" as possible to get which access is
137 // clobbering the one queried.
138 cl::opt<unsigned> llvm::SetLicmMssaOptCap(
139     "licm-mssa-optimization-cap", cl::init(100), cl::Hidden,
140     cl::desc("Enable imprecision in LICM in pathological cases, in exchange "
141              "for faster compile. Caps the MemorySSA clobbering calls."));
142 
143 // Experimentally, memory promotion carries less importance than sinking and
144 // hoisting. Limit when we do promotion when using MemorySSA, in order to save
145 // compile time.
146 cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
147     "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden,
148     cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no "
149              "effect. When MSSA in LICM is enabled, then this is the maximum "
150              "number of accesses allowed to be present in a loop in order to "
151              "enable memory promotion."));
152 
153 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
154 static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
155                                       const LoopSafetyInfo *SafetyInfo,
156                                       TargetTransformInfo *TTI,
157                                       bool &FoldableInLoop, bool LoopNestMode);
158 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
159                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
160                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
161                   OptimizationRemarkEmitter *ORE);
162 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
163                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
164                  MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE);
165 static bool isSafeToExecuteUnconditionally(
166     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
167     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
168     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
169     AssumptionCache *AC, bool AllowSpeculation);
170 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
171                                      Loop *CurLoop, Instruction &I,
172                                      SinkAndHoistLICMFlags &Flags,
173                                      bool InvariantGroup);
174 static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
175                                       MemoryUse &MU);
176 /// Aggregates various functions for hoisting computations out of loop.
177 static bool hoistArithmetics(Instruction &I, Loop &L,
178                              ICFLoopSafetyInfo &SafetyInfo,
179                              MemorySSAUpdater &MSSAU, AssumptionCache *AC,
180                              DominatorTree *DT);
181 static Instruction *cloneInstructionInExitBlock(
182     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
183     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
184 
185 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
186                              MemorySSAUpdater &MSSAU);
187 
188 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
189                                   ICFLoopSafetyInfo &SafetyInfo,
190                                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
191 
192 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
193                                 function_ref<void(Instruction *)> Fn);
194 using PointersAndHasReadsOutsideSet =
195     std::pair<SmallSetVector<Value *, 8>, bool>;
196 static SmallVector<PointersAndHasReadsOutsideSet, 0>
197 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L);
198 
199 namespace {
200 struct LoopInvariantCodeMotion {
201   bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
202                  AssumptionCache *AC, TargetLibraryInfo *TLI,
203                  TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA,
204                  OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
205 
206   LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
207                           unsigned LicmMssaNoAccForPromotionCap,
208                           bool LicmAllowSpeculation)
209       : LicmMssaOptCap(LicmMssaOptCap),
210         LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
211         LicmAllowSpeculation(LicmAllowSpeculation) {}
212 
213 private:
214   unsigned LicmMssaOptCap;
215   unsigned LicmMssaNoAccForPromotionCap;
216   bool LicmAllowSpeculation;
217 };
218 
219 struct LegacyLICMPass : public LoopPass {
220   static char ID; // Pass identification, replacement for typeid
221   LegacyLICMPass(
222       unsigned LicmMssaOptCap = SetLicmMssaOptCap,
223       unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
224       bool LicmAllowSpeculation = true)
225       : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
226                            LicmAllowSpeculation) {
227     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
228   }
229 
230   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
231     if (skipLoop(L))
232       return false;
233 
234     LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block "
235                       << L->getHeader()->getNameOrAsOperand() << "\n");
236 
237     Function *F = L->getHeader()->getParent();
238 
239     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
240     MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
241     // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
242     // pass. Function analyses need to be preserved across loop transformations
243     // but ORE cannot be preserved (see comment before the pass definition).
244     OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
245     return LICM.runOnLoop(
246         L, &getAnalysis<AAResultsWrapperPass>().getAAResults(),
247         &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
248         &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
249         &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F),
250         &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(*F),
251         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F),
252         SE ? &SE->getSE() : nullptr, MSSA, &ORE);
253   }
254 
255   /// This transformation requires natural loop information & requires that
256   /// loop preheaders be inserted into the CFG...
257   ///
258   void getAnalysisUsage(AnalysisUsage &AU) const override {
259     AU.addPreserved<DominatorTreeWrapperPass>();
260     AU.addPreserved<LoopInfoWrapperPass>();
261     AU.addRequired<TargetLibraryInfoWrapperPass>();
262     AU.addRequired<MemorySSAWrapperPass>();
263     AU.addPreserved<MemorySSAWrapperPass>();
264     AU.addRequired<TargetTransformInfoWrapperPass>();
265     AU.addRequired<AssumptionCacheTracker>();
266     getLoopAnalysisUsage(AU);
267     LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
268     AU.addPreserved<LazyBlockFrequencyInfoPass>();
269     AU.addPreserved<LazyBranchProbabilityInfoPass>();
270   }
271 
272 private:
273   LoopInvariantCodeMotion LICM;
274 };
275 } // namespace
276 
277 PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
278                                 LoopStandardAnalysisResults &AR, LPMUpdater &) {
279   if (!AR.MSSA)
280     report_fatal_error("LICM requires MemorySSA (loop-mssa)",
281                        /*GenCrashDiag*/false);
282 
283   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
284   // pass.  Function analyses need to be preserved across loop transformations
285   // but ORE cannot be preserved (see comment before the pass definition).
286   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
287 
288   LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
289                                Opts.AllowSpeculation);
290   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.AC, &AR.TLI, &AR.TTI,
291                       &AR.SE, AR.MSSA, &ORE))
292     return PreservedAnalyses::all();
293 
294   auto PA = getLoopPassPreservedAnalyses();
295   PA.preserve<MemorySSAAnalysis>();
296 
297   return PA;
298 }
299 
300 void LICMPass::printPipeline(
301     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
302   static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
303       OS, MapClassName2PassName);
304 
305   OS << '<';
306   OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
307   OS << '>';
308 }
309 
310 PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
311                                  LoopStandardAnalysisResults &AR,
312                                  LPMUpdater &) {
313   if (!AR.MSSA)
314     report_fatal_error("LNICM requires MemorySSA (loop-mssa)",
315                        /*GenCrashDiag*/false);
316 
317   // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
318   // pass.  Function analyses need to be preserved across loop transformations
319   // but ORE cannot be preserved (see comment before the pass definition).
320   OptimizationRemarkEmitter ORE(LN.getParent());
321 
322   LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap,
323                                Opts.AllowSpeculation);
324 
325   Loop &OutermostLoop = LN.getOutermostLoop();
326   bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, &AR.AC,
327                                 &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true);
328 
329   if (!Changed)
330     return PreservedAnalyses::all();
331 
332   auto PA = getLoopPassPreservedAnalyses();
333 
334   PA.preserve<DominatorTreeAnalysis>();
335   PA.preserve<LoopAnalysis>();
336   PA.preserve<MemorySSAAnalysis>();
337 
338   return PA;
339 }
340 
341 void LNICMPass::printPipeline(
342     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
343   static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
344       OS, MapClassName2PassName);
345 
346   OS << '<';
347   OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
348   OS << '>';
349 }
350 
351 char LegacyLICMPass::ID = 0;
352 INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
353                       false, false)
354 INITIALIZE_PASS_DEPENDENCY(LoopPass)
355 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
356 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
357 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
358 INITIALIZE_PASS_DEPENDENCY(LazyBFIPass)
359 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
360                     false)
361 
362 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
363 
364 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop &L,
365                                                    MemorySSA &MSSA)
366     : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
367                             IsSink, L, MSSA) {}
368 
369 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
370     unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
371     Loop &L, MemorySSA &MSSA)
372     : LicmMssaOptCap(LicmMssaOptCap),
373       LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
374       IsSink(IsSink) {
375   unsigned AccessCapCount = 0;
376   for (auto *BB : L.getBlocks())
377     if (const auto *Accesses = MSSA.getBlockAccesses(BB))
378       for (const auto &MA : *Accesses) {
379         (void)MA;
380         ++AccessCapCount;
381         if (AccessCapCount > LicmMssaNoAccForPromotionCap) {
382           NoOfMemAccTooLarge = true;
383           return;
384         }
385       }
386 }
387 
388 /// Hoist expressions out of the specified loop. Note, alias info for inner
389 /// loop is not preserved so it is not a good idea to run LICM multiple
390 /// times on one loop.
391 bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
392                                         DominatorTree *DT, AssumptionCache *AC,
393                                         TargetLibraryInfo *TLI,
394                                         TargetTransformInfo *TTI,
395                                         ScalarEvolution *SE, MemorySSA *MSSA,
396                                         OptimizationRemarkEmitter *ORE,
397                                         bool LoopNestMode) {
398   bool Changed = false;
399 
400   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
401 
402   // If this loop has metadata indicating that LICM is not to be performed then
403   // just exit.
404   if (hasDisableLICMTransformsHint(L)) {
405     return false;
406   }
407 
408   // Don't sink stores from loops with coroutine suspend instructions.
409   // LICM would sink instructions into the default destination of
410   // the coroutine switch. The default destination of the switch is to
411   // handle the case where the coroutine is suspended, by which point the
412   // coroutine frame may have been destroyed. No instruction can be sunk there.
413   // FIXME: This would unfortunately hurt the performance of coroutines, however
414   // there is currently no general solution for this. Similar issues could also
415   // potentially happen in other passes where instructions are being moved
416   // across that edge.
417   bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) {
418     return llvm::any_of(*BB, [](Instruction &I) {
419       IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
420       return II && II->getIntrinsicID() == Intrinsic::coro_suspend;
421     });
422   });
423 
424   MemorySSAUpdater MSSAU(MSSA);
425   SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
426                               /*IsSink=*/true, *L, *MSSA);
427 
428   // Get the preheader block to move instructions into...
429   BasicBlock *Preheader = L->getLoopPreheader();
430 
431   // Compute loop safety information.
432   ICFLoopSafetyInfo SafetyInfo;
433   SafetyInfo.computeLoopSafetyInfo(L);
434 
435   // We want to visit all of the instructions in this loop... that are not parts
436   // of our subloops (they have already had their invariants hoisted out of
437   // their loop, into this loop, so there is no need to process the BODIES of
438   // the subloops).
439   //
440   // Traverse the body of the loop in depth first order on the dominator tree so
441   // that we are guaranteed to see definitions before we see uses.  This allows
442   // us to sink instructions in one pass, without iteration.  After sinking
443   // instructions, we perform another pass to hoist them out of the loop.
444   if (L->hasDedicatedExits())
445     Changed |=
446         LoopNestMode
447             ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT,
448                                     TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
449             : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
450                          MSSAU, &SafetyInfo, Flags, ORE);
451   Flags.setIsSink(false);
452   if (Preheader)
453     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
454                            MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
455                            LicmAllowSpeculation);
456 
457   // Now that all loop invariants have been removed from the loop, promote any
458   // memory references to scalars that we can.
459   // Don't sink stores from loops without dedicated block exits. Exits
460   // containing indirect branches are not transformed by loop simplify,
461   // make sure we catch that. An additional load may be generated in the
462   // preheader for SSA updater, so also avoid sinking when no preheader
463   // is available.
464   if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
465       !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
466     // Figure out the loop exits and their insertion points
467     SmallVector<BasicBlock *, 8> ExitBlocks;
468     L->getUniqueExitBlocks(ExitBlocks);
469 
470     // We can't insert into a catchswitch.
471     bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
472       return isa<CatchSwitchInst>(Exit->getTerminator());
473     });
474 
475     if (!HasCatchSwitch) {
476       SmallVector<Instruction *, 8> InsertPts;
477       SmallVector<MemoryAccess *, 8> MSSAInsertPts;
478       InsertPts.reserve(ExitBlocks.size());
479       MSSAInsertPts.reserve(ExitBlocks.size());
480       for (BasicBlock *ExitBlock : ExitBlocks) {
481         InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
482         MSSAInsertPts.push_back(nullptr);
483       }
484 
485       PredIteratorCache PIC;
486 
487       // Promoting one set of accesses may make the pointers for another set
488       // loop invariant, so run this in a loop.
489       bool Promoted = false;
490       bool LocalPromoted;
491       do {
492         LocalPromoted = false;
493         for (auto [PointerMustAliases, HasReadsOutsideSet] :
494              collectPromotionCandidates(MSSA, AA, L)) {
495           LocalPromoted |= promoteLoopAccessesToScalars(
496               PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
497               DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
498               LicmAllowSpeculation, HasReadsOutsideSet);
499         }
500         Promoted |= LocalPromoted;
501       } while (LocalPromoted);
502 
503       // Once we have promoted values across the loop body we have to
504       // recursively reform LCSSA as any nested loop may now have values defined
505       // within the loop used in the outer loop.
506       // FIXME: This is really heavy handed. It would be a bit better to use an
507       // SSAUpdater strategy during promotion that was LCSSA aware and reformed
508       // it as it went.
509       if (Promoted)
510         formLCSSARecursively(*L, *DT, LI, SE);
511 
512       Changed |= Promoted;
513     }
514   }
515 
516   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
517   // specifically moving instructions across the loop boundary and so it is
518   // especially in need of basic functional correctness checking here.
519   assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
520   assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
521          "Parent loop not left in LCSSA form after LICM!");
522 
523   if (VerifyMemorySSA)
524     MSSA->verifyMemorySSA();
525 
526   if (Changed && SE)
527     SE->forgetLoopDispositions();
528   return Changed;
529 }
530 
531 /// Walk the specified region of the CFG (defined by all blocks dominated by
532 /// the specified block, and that are in the current loop) in reverse depth
533 /// first order w.r.t the DominatorTree.  This allows us to visit uses before
534 /// definitions, allowing us to sink a loop body in one pass without iteration.
535 ///
536 bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
537                       DominatorTree *DT, TargetLibraryInfo *TLI,
538                       TargetTransformInfo *TTI, Loop *CurLoop,
539                       MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
540                       SinkAndHoistLICMFlags &Flags,
541                       OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
542 
543   // Verify inputs.
544   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
545          CurLoop != nullptr && SafetyInfo != nullptr &&
546          "Unexpected input to sinkRegion.");
547 
548   // We want to visit children before parents. We will enqueue all the parents
549   // before their children in the worklist and process the worklist in reverse
550   // order.
551   SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
552 
553   bool Changed = false;
554   for (DomTreeNode *DTN : reverse(Worklist)) {
555     BasicBlock *BB = DTN->getBlock();
556     // Only need to process the contents of this block if it is not part of a
557     // subloop (which would already have been processed).
558     if (inSubLoop(BB, CurLoop, LI))
559       continue;
560 
561     for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
562       Instruction &I = *--II;
563 
564       // The instruction is not used in the loop if it is dead.  In this case,
565       // we just delete it instead of sinking it.
566       if (isInstructionTriviallyDead(&I, TLI)) {
567         LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
568         salvageKnowledge(&I);
569         salvageDebugInfo(I);
570         ++II;
571         eraseInstruction(I, *SafetyInfo, MSSAU);
572         Changed = true;
573         continue;
574       }
575 
576       // Check to see if we can sink this instruction to the exit blocks
577       // of the loop.  We can do this if the all users of the instruction are
578       // outside of the loop.  In this case, it doesn't even matter if the
579       // operands of the instruction are loop invariant.
580       //
581       bool FoldableInLoop = false;
582       bool LoopNestMode = OutermostLoop != nullptr;
583       if (!I.mayHaveSideEffects() &&
584           isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
585                                     SafetyInfo, TTI, FoldableInLoop,
586                                     LoopNestMode) &&
587           canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
588         if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
589           if (!FoldableInLoop) {
590             ++II;
591             salvageDebugInfo(I);
592             eraseInstruction(I, *SafetyInfo, MSSAU);
593           }
594           Changed = true;
595         }
596       }
597     }
598   }
599   if (VerifyMemorySSA)
600     MSSAU.getMemorySSA()->verifyMemorySSA();
601   return Changed;
602 }
603 
604 bool llvm::sinkRegionForLoopNest(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
605                                  DominatorTree *DT, TargetLibraryInfo *TLI,
606                                  TargetTransformInfo *TTI, Loop *CurLoop,
607                                  MemorySSAUpdater &MSSAU,
608                                  ICFLoopSafetyInfo *SafetyInfo,
609                                  SinkAndHoistLICMFlags &Flags,
610                                  OptimizationRemarkEmitter *ORE) {
611 
612   bool Changed = false;
613   SmallPriorityWorklist<Loop *, 4> Worklist;
614   Worklist.insert(CurLoop);
615   appendLoopsToWorklist(*CurLoop, Worklist);
616   while (!Worklist.empty()) {
617     Loop *L = Worklist.pop_back_val();
618     Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
619                           MSSAU, SafetyInfo, Flags, ORE, CurLoop);
620   }
621   return Changed;
622 }
623 
624 namespace {
625 // This is a helper class for hoistRegion to make it able to hoist control flow
626 // in order to be able to hoist phis. The way this works is that we initially
627 // start hoisting to the loop preheader, and when we see a loop invariant branch
628 // we make note of this. When we then come to hoist an instruction that's
629 // conditional on such a branch we duplicate the branch and the relevant control
630 // flow, then hoist the instruction into the block corresponding to its original
631 // block in the duplicated control flow.
632 class ControlFlowHoister {
633 private:
634   // Information about the loop we are hoisting from
635   LoopInfo *LI;
636   DominatorTree *DT;
637   Loop *CurLoop;
638   MemorySSAUpdater &MSSAU;
639 
640   // A map of blocks in the loop to the block their instructions will be hoisted
641   // to.
642   DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
643 
644   // The branches that we can hoist, mapped to the block that marks a
645   // convergence point of their control flow.
646   DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
647 
648 public:
649   ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop,
650                      MemorySSAUpdater &MSSAU)
651       : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {}
652 
653   void registerPossiblyHoistableBranch(BranchInst *BI) {
654     // We can only hoist conditional branches with loop invariant operands.
655     if (!ControlFlowHoisting || !BI->isConditional() ||
656         !CurLoop->hasLoopInvariantOperands(BI))
657       return;
658 
659     // The branch destinations need to be in the loop, and we don't gain
660     // anything by duplicating conditional branches with duplicate successors,
661     // as it's essentially the same as an unconditional branch.
662     BasicBlock *TrueDest = BI->getSuccessor(0);
663     BasicBlock *FalseDest = BI->getSuccessor(1);
664     if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) ||
665         TrueDest == FalseDest)
666       return;
667 
668     // We can hoist BI if one branch destination is the successor of the other,
669     // or both have common successor which we check by seeing if the
670     // intersection of their successors is non-empty.
671     // TODO: This could be expanded to allowing branches where both ends
672     // eventually converge to a single block.
673     SmallPtrSet<BasicBlock *, 4> TrueDestSucc, FalseDestSucc;
674     TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest));
675     FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest));
676     BasicBlock *CommonSucc = nullptr;
677     if (TrueDestSucc.count(FalseDest)) {
678       CommonSucc = FalseDest;
679     } else if (FalseDestSucc.count(TrueDest)) {
680       CommonSucc = TrueDest;
681     } else {
682       set_intersect(TrueDestSucc, FalseDestSucc);
683       // If there's one common successor use that.
684       if (TrueDestSucc.size() == 1)
685         CommonSucc = *TrueDestSucc.begin();
686       // If there's more than one pick whichever appears first in the block list
687       // (we can't use the value returned by TrueDestSucc.begin() as it's
688       // unpredicatable which element gets returned).
689       else if (!TrueDestSucc.empty()) {
690         Function *F = TrueDest->getParent();
691         auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); };
692         auto It = llvm::find_if(*F, IsSucc);
693         assert(It != F->end() && "Could not find successor in function");
694         CommonSucc = &*It;
695       }
696     }
697     // The common successor has to be dominated by the branch, as otherwise
698     // there will be some other path to the successor that will not be
699     // controlled by this branch so any phi we hoist would be controlled by the
700     // wrong condition. This also takes care of avoiding hoisting of loop back
701     // edges.
702     // TODO: In some cases this could be relaxed if the successor is dominated
703     // by another block that's been hoisted and we can guarantee that the
704     // control flow has been replicated exactly.
705     if (CommonSucc && DT->dominates(BI, CommonSucc))
706       HoistableBranches[BI] = CommonSucc;
707   }
708 
709   bool canHoistPHI(PHINode *PN) {
710     // The phi must have loop invariant operands.
711     if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN))
712       return false;
713     // We can hoist phis if the block they are in is the target of hoistable
714     // branches which cover all of the predecessors of the block.
715     SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
716     BasicBlock *BB = PN->getParent();
717     for (BasicBlock *PredBB : predecessors(BB))
718       PredecessorBlocks.insert(PredBB);
719     // If we have less predecessor blocks than predecessors then the phi will
720     // have more than one incoming value for the same block which we can't
721     // handle.
722     // TODO: This could be handled be erasing some of the duplicate incoming
723     // values.
724     if (PredecessorBlocks.size() != pred_size(BB))
725       return false;
726     for (auto &Pair : HoistableBranches) {
727       if (Pair.second == BB) {
728         // Which blocks are predecessors via this branch depends on if the
729         // branch is triangle-like or diamond-like.
730         if (Pair.first->getSuccessor(0) == BB) {
731           PredecessorBlocks.erase(Pair.first->getParent());
732           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
733         } else if (Pair.first->getSuccessor(1) == BB) {
734           PredecessorBlocks.erase(Pair.first->getParent());
735           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
736         } else {
737           PredecessorBlocks.erase(Pair.first->getSuccessor(0));
738           PredecessorBlocks.erase(Pair.first->getSuccessor(1));
739         }
740       }
741     }
742     // PredecessorBlocks will now be empty if for every predecessor of BB we
743     // found a hoistable branch source.
744     return PredecessorBlocks.empty();
745   }
746 
747   BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) {
748     if (!ControlFlowHoisting)
749       return CurLoop->getLoopPreheader();
750     // If BB has already been hoisted, return that
751     if (HoistDestinationMap.count(BB))
752       return HoistDestinationMap[BB];
753 
754     // Check if this block is conditional based on a pending branch
755     auto HasBBAsSuccessor =
756         [&](DenseMap<BranchInst *, BasicBlock *>::value_type &Pair) {
757           return BB != Pair.second && (Pair.first->getSuccessor(0) == BB ||
758                                        Pair.first->getSuccessor(1) == BB);
759         };
760     auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor);
761 
762     // If not involved in a pending branch, hoist to preheader
763     BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
764     if (It == HoistableBranches.end()) {
765       LLVM_DEBUG(dbgs() << "LICM using "
766                         << InitialPreheader->getNameOrAsOperand()
767                         << " as hoist destination for "
768                         << BB->getNameOrAsOperand() << "\n");
769       HoistDestinationMap[BB] = InitialPreheader;
770       return InitialPreheader;
771     }
772     BranchInst *BI = It->first;
773     assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) ==
774                HoistableBranches.end() &&
775            "BB is expected to be the target of at most one branch");
776 
777     LLVMContext &C = BB->getContext();
778     BasicBlock *TrueDest = BI->getSuccessor(0);
779     BasicBlock *FalseDest = BI->getSuccessor(1);
780     BasicBlock *CommonSucc = HoistableBranches[BI];
781     BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent());
782 
783     // Create hoisted versions of blocks that currently don't have them
784     auto CreateHoistedBlock = [&](BasicBlock *Orig) {
785       if (HoistDestinationMap.count(Orig))
786         return HoistDestinationMap[Orig];
787       BasicBlock *New =
788           BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
789       HoistDestinationMap[Orig] = New;
790       DT->addNewBlock(New, HoistTarget);
791       if (CurLoop->getParentLoop())
792         CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
793       ++NumCreatedBlocks;
794       LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
795                         << " as hoist destination for " << Orig->getName()
796                         << "\n");
797       return New;
798     };
799     BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
800     BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
801     BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
802 
803     // Link up these blocks with branches.
804     if (!HoistCommonSucc->getTerminator()) {
805       // The new common successor we've generated will branch to whatever that
806       // hoist target branched to.
807       BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
808       assert(TargetSucc && "Expected hoist target to have a single successor");
809       HoistCommonSucc->moveBefore(TargetSucc);
810       BranchInst::Create(TargetSucc, HoistCommonSucc);
811     }
812     if (!HoistTrueDest->getTerminator()) {
813       HoistTrueDest->moveBefore(HoistCommonSucc);
814       BranchInst::Create(HoistCommonSucc, HoistTrueDest);
815     }
816     if (!HoistFalseDest->getTerminator()) {
817       HoistFalseDest->moveBefore(HoistCommonSucc);
818       BranchInst::Create(HoistCommonSucc, HoistFalseDest);
819     }
820 
821     // If BI is being cloned to what was originally the preheader then
822     // HoistCommonSucc will now be the new preheader.
823     if (HoistTarget == InitialPreheader) {
824       // Phis in the loop header now need to use the new preheader.
825       InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
826       MSSAU.wireOldPredecessorsToNewImmediatePredecessor(
827           HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
828       // The new preheader dominates the loop header.
829       DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
830       DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
831       DT->changeImmediateDominator(HeaderNode, PreheaderNode);
832       // The preheader hoist destination is now the new preheader, with the
833       // exception of the hoist destination of this branch.
834       for (auto &Pair : HoistDestinationMap)
835         if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
836           Pair.second = HoistCommonSucc;
837     }
838 
839     // Now finally clone BI.
840     ReplaceInstWithInst(
841         HoistTarget->getTerminator(),
842         BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
843     ++NumClonedBranches;
844 
845     assert(CurLoop->getLoopPreheader() &&
846            "Hoisting blocks should not have destroyed preheader");
847     return HoistDestinationMap[BB];
848   }
849 };
850 } // namespace
851 
852 /// Walk the specified region of the CFG (defined by all blocks dominated by
853 /// the specified block, and that are in the current loop) in depth first
854 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
855 /// uses, allowing us to hoist a loop body in one pass without iteration.
856 ///
857 bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
858                        DominatorTree *DT, AssumptionCache *AC,
859                        TargetLibraryInfo *TLI, Loop *CurLoop,
860                        MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
861                        ICFLoopSafetyInfo *SafetyInfo,
862                        SinkAndHoistLICMFlags &Flags,
863                        OptimizationRemarkEmitter *ORE, bool LoopNestMode,
864                        bool AllowSpeculation) {
865   // Verify inputs.
866   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
867          CurLoop != nullptr && SafetyInfo != nullptr &&
868          "Unexpected input to hoistRegion.");
869 
870   ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
871 
872   // Keep track of instructions that have been hoisted, as they may need to be
873   // re-hoisted if they end up not dominating all of their uses.
874   SmallVector<Instruction *, 16> HoistedInstructions;
875 
876   // For PHI hoisting to work we need to hoist blocks before their successors.
877   // We can do this by iterating through the blocks in the loop in reverse
878   // post-order.
879   LoopBlocksRPO Worklist(CurLoop);
880   Worklist.perform(LI);
881   bool Changed = false;
882   BasicBlock *Preheader = CurLoop->getLoopPreheader();
883   for (BasicBlock *BB : Worklist) {
884     // Only need to process the contents of this block if it is not part of a
885     // subloop (which would already have been processed).
886     if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
887       continue;
888 
889     for (Instruction &I : llvm::make_early_inc_range(*BB)) {
890       // Try hoisting the instruction out to the preheader.  We can only do
891       // this if all of the operands of the instruction are loop invariant and
892       // if it is safe to hoist the instruction. We also check block frequency
893       // to make sure instruction only gets hoisted into colder blocks.
894       // TODO: It may be safe to hoist if we are hoisting to a conditional block
895       // and we have accurately duplicated the control flow from the loop header
896       // to that block.
897       if (CurLoop->hasLoopInvariantOperands(&I) &&
898           canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
899           isSafeToExecuteUnconditionally(
900               I, DT, TLI, CurLoop, SafetyInfo, ORE,
901               Preheader->getTerminator(), AC, AllowSpeculation)) {
902         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
903               MSSAU, SE, ORE);
904         HoistedInstructions.push_back(&I);
905         Changed = true;
906         continue;
907       }
908 
909       // Attempt to remove floating point division out of the loop by
910       // converting it to a reciprocal multiplication.
911       if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() &&
912           CurLoop->isLoopInvariant(I.getOperand(1))) {
913         auto Divisor = I.getOperand(1);
914         auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
915         auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
916         ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
917         SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent());
918         ReciprocalDivisor->insertBefore(&I);
919 
920         auto Product =
921             BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor);
922         Product->setFastMathFlags(I.getFastMathFlags());
923         SafetyInfo->insertInstructionTo(Product, I.getParent());
924         Product->insertAfter(&I);
925         I.replaceAllUsesWith(Product);
926         eraseInstruction(I, *SafetyInfo, MSSAU);
927 
928         hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
929               SafetyInfo, MSSAU, SE, ORE);
930         HoistedInstructions.push_back(ReciprocalDivisor);
931         Changed = true;
932         continue;
933       }
934 
935       auto IsInvariantStart = [&](Instruction &I) {
936         using namespace PatternMatch;
937         return I.use_empty() &&
938                match(&I, m_Intrinsic<Intrinsic::invariant_start>());
939       };
940       auto MustExecuteWithoutWritesBefore = [&](Instruction &I) {
941         return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) &&
942                SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop);
943       };
944       if ((IsInvariantStart(I) || isGuard(&I)) &&
945           CurLoop->hasLoopInvariantOperands(&I) &&
946           MustExecuteWithoutWritesBefore(I)) {
947         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
948               MSSAU, SE, ORE);
949         HoistedInstructions.push_back(&I);
950         Changed = true;
951         continue;
952       }
953 
954       if (PHINode *PN = dyn_cast<PHINode>(&I)) {
955         if (CFH.canHoistPHI(PN)) {
956           // Redirect incoming blocks first to ensure that we create hoisted
957           // versions of those blocks before we hoist the phi.
958           for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
959             PN->setIncomingBlock(
960                 i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i)));
961           hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
962                 MSSAU, SE, ORE);
963           assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
964           Changed = true;
965           continue;
966         }
967       }
968 
969       // Try to reassociate instructions so that part of computations can be
970       // done out of loop.
971       if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) {
972         Changed = true;
973         continue;
974       }
975 
976       // Remember possibly hoistable branches so we can actually hoist them
977       // later if needed.
978       if (BranchInst *BI = dyn_cast<BranchInst>(&I))
979         CFH.registerPossiblyHoistableBranch(BI);
980     }
981   }
982 
983   // If we hoisted instructions to a conditional block they may not dominate
984   // their uses that weren't hoisted (such as phis where some operands are not
985   // loop invariant). If so make them unconditional by moving them to their
986   // immediate dominator. We iterate through the instructions in reverse order
987   // which ensures that when we rehoist an instruction we rehoist its operands,
988   // and also keep track of where in the block we are rehoisting to to make sure
989   // that we rehoist instructions before the instructions that use them.
990   Instruction *HoistPoint = nullptr;
991   if (ControlFlowHoisting) {
992     for (Instruction *I : reverse(HoistedInstructions)) {
993       if (!llvm::all_of(I->uses(),
994                         [&](Use &U) { return DT->dominates(I, U); })) {
995         BasicBlock *Dominator =
996             DT->getNode(I->getParent())->getIDom()->getBlock();
997         if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) {
998           if (HoistPoint)
999             assert(DT->dominates(Dominator, HoistPoint->getParent()) &&
1000                    "New hoist point expected to dominate old hoist point");
1001           HoistPoint = Dominator->getTerminator();
1002         }
1003         LLVM_DEBUG(dbgs() << "LICM rehoisting to "
1004                           << HoistPoint->getParent()->getNameOrAsOperand()
1005                           << ": " << *I << "\n");
1006         moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE);
1007         HoistPoint = I;
1008         Changed = true;
1009       }
1010     }
1011   }
1012   if (VerifyMemorySSA)
1013     MSSAU.getMemorySSA()->verifyMemorySSA();
1014 
1015     // Now that we've finished hoisting make sure that LI and DT are still
1016     // valid.
1017 #ifdef EXPENSIVE_CHECKS
1018   if (Changed) {
1019     assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1020            "Dominator tree verification failed");
1021     LI->verify(*DT);
1022   }
1023 #endif
1024 
1025   return Changed;
1026 }
1027 
1028 // Return true if LI is invariant within scope of the loop. LI is invariant if
1029 // CurLoop is dominated by an invariant.start representing the same memory
1030 // location and size as the memory location LI loads from, and also the
1031 // invariant.start has no uses.
1032 static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
1033                                   Loop *CurLoop) {
1034   Value *Addr = LI->getOperand(0);
1035   const DataLayout &DL = LI->getModule()->getDataLayout();
1036   const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType());
1037 
1038   // It is not currently possible for clang to generate an invariant.start
1039   // intrinsic with scalable vector types because we don't support thread local
1040   // sizeless types and we don't permit sizeless types in structs or classes.
1041   // Furthermore, even if support is added for this in future the intrinsic
1042   // itself is defined to have a size of -1 for variable sized objects. This
1043   // makes it impossible to verify if the intrinsic envelops our region of
1044   // interest. For example, both <vscale x 32 x i8> and <vscale x 16 x i8>
1045   // types would have a -1 parameter, but the former is clearly double the size
1046   // of the latter.
1047   if (LocSizeInBits.isScalable())
1048     return false;
1049 
1050   // if the type is i8 addrspace(x)*, we know this is the type of
1051   // llvm.invariant.start operand
1052   auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
1053                                      LI->getPointerAddressSpace());
1054   unsigned BitcastsVisited = 0;
1055   // Look through bitcasts until we reach the i8* type (this is invariant.start
1056   // operand type).
1057   while (Addr->getType() != PtrInt8Ty) {
1058     auto *BC = dyn_cast<BitCastInst>(Addr);
1059     // Avoid traversing high number of bitcast uses.
1060     if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
1061       return false;
1062     Addr = BC->getOperand(0);
1063   }
1064   // If we've ended up at a global/constant, bail. We shouldn't be looking at
1065   // uselists for non-local Values in a loop pass.
1066   if (isa<Constant>(Addr))
1067     return false;
1068 
1069   unsigned UsesVisited = 0;
1070   // Traverse all uses of the load operand value, to see if invariant.start is
1071   // one of the uses, and whether it dominates the load instruction.
1072   for (auto *U : Addr->users()) {
1073     // Avoid traversing for Load operand with high number of users.
1074     if (++UsesVisited > MaxNumUsesTraversed)
1075       return false;
1076     IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
1077     // If there are escaping uses of invariant.start instruction, the load maybe
1078     // non-invariant.
1079     if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
1080         !II->use_empty())
1081       continue;
1082     ConstantInt *InvariantSize = cast<ConstantInt>(II->getArgOperand(0));
1083     // The intrinsic supports having a -1 argument for variable sized objects
1084     // so we should check for that here.
1085     if (InvariantSize->isNegative())
1086       continue;
1087     uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8;
1088     // Confirm the invariant.start location size contains the load operand size
1089     // in bits. Also, the invariant.start should dominate the load, and we
1090     // should not hoist the load out of a loop that contains this dominating
1091     // invariant.start.
1092     if (LocSizeInBits.getFixedValue() <= InvariantSizeInBits &&
1093         DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
1094       return true;
1095   }
1096 
1097   return false;
1098 }
1099 
1100 namespace {
1101 /// Return true if-and-only-if we know how to (mechanically) both hoist and
1102 /// sink a given instruction out of a loop.  Does not address legality
1103 /// concerns such as aliasing or speculation safety.
1104 bool isHoistableAndSinkableInst(Instruction &I) {
1105   // Only these instructions are hoistable/sinkable.
1106   return (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
1107           isa<FenceInst>(I) || isa<CastInst>(I) || isa<UnaryOperator>(I) ||
1108           isa<BinaryOperator>(I) || isa<SelectInst>(I) ||
1109           isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
1110           isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
1111           isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
1112           isa<InsertValueInst>(I) || isa<FreezeInst>(I));
1113 }
1114 /// Return true if MSSA knows there are no MemoryDefs in the loop.
1115 bool isReadOnly(const MemorySSAUpdater &MSSAU, const Loop *L) {
1116   for (auto *BB : L->getBlocks())
1117     if (MSSAU.getMemorySSA()->getBlockDefs(BB))
1118       return false;
1119   return true;
1120 }
1121 
1122 /// Return true if I is the only Instruction with a MemoryAccess in L.
1123 bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
1124                         const MemorySSAUpdater &MSSAU) {
1125   for (auto *BB : L->getBlocks())
1126     if (auto *Accs = MSSAU.getMemorySSA()->getBlockAccesses(BB)) {
1127       int NotAPhi = 0;
1128       for (const auto &Acc : *Accs) {
1129         if (isa<MemoryPhi>(&Acc))
1130           continue;
1131         const auto *MUD = cast<MemoryUseOrDef>(&Acc);
1132         if (MUD->getMemoryInst() != I || NotAPhi++ == 1)
1133           return false;
1134       }
1135     }
1136   return true;
1137 }
1138 }
1139 
1140 static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
1141                                                BatchAAResults &BAA,
1142                                                SinkAndHoistLICMFlags &Flags,
1143                                                MemoryUseOrDef *MA) {
1144   // See declaration of SetLicmMssaOptCap for usage details.
1145   if (Flags.tooManyClobberingCalls())
1146     return MA->getDefiningAccess();
1147 
1148   MemoryAccess *Source =
1149       MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(MA, BAA);
1150   Flags.incrementClobberingCalls();
1151   return Source;
1152 }
1153 
1154 bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
1155                               Loop *CurLoop, MemorySSAUpdater &MSSAU,
1156                               bool TargetExecutesOncePerLoop,
1157                               SinkAndHoistLICMFlags &Flags,
1158                               OptimizationRemarkEmitter *ORE) {
1159   // If we don't understand the instruction, bail early.
1160   if (!isHoistableAndSinkableInst(I))
1161     return false;
1162 
1163   MemorySSA *MSSA = MSSAU.getMemorySSA();
1164   // Loads have extra constraints we have to verify before we can hoist them.
1165   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
1166     if (!LI->isUnordered())
1167       return false; // Don't sink/hoist volatile or ordered atomic loads!
1168 
1169     // Loads from constant memory are always safe to move, even if they end up
1170     // in the same alias set as something that ends up being modified.
1171     if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0))))
1172       return true;
1173     if (LI->hasMetadata(LLVMContext::MD_invariant_load))
1174       return true;
1175 
1176     if (LI->isAtomic() && !TargetExecutesOncePerLoop)
1177       return false; // Don't risk duplicating unordered loads
1178 
1179     // This checks for an invariant.start dominating the load.
1180     if (isLoadInvariantInLoop(LI, DT, CurLoop))
1181       return true;
1182 
1183     auto MU = cast<MemoryUse>(MSSA->getMemoryAccess(LI));
1184 
1185     bool InvariantGroup = LI->hasMetadata(LLVMContext::MD_invariant_group);
1186 
1187     bool Invalidated = pointerInvalidatedByLoop(
1188         MSSA, MU, CurLoop, I, Flags, InvariantGroup);
1189     // Check loop-invariant address because this may also be a sinkable load
1190     // whose address is not necessarily loop-invariant.
1191     if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1192       ORE->emit([&]() {
1193         return OptimizationRemarkMissed(
1194                    DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI)
1195                << "failed to move load with loop-invariant address "
1196                   "because the loop may invalidate its value";
1197       });
1198 
1199     return !Invalidated;
1200   } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
1201     // Don't sink or hoist dbg info; it's legal, but not useful.
1202     if (isa<DbgInfoIntrinsic>(I))
1203       return false;
1204 
1205     // Don't sink calls which can throw.
1206     if (CI->mayThrow())
1207       return false;
1208 
1209     // Convergent attribute has been used on operations that involve
1210     // inter-thread communication which results are implicitly affected by the
1211     // enclosing control flows. It is not safe to hoist or sink such operations
1212     // across control flow.
1213     if (CI->isConvergent())
1214       return false;
1215 
1216     using namespace PatternMatch;
1217     if (match(CI, m_Intrinsic<Intrinsic::assume>()))
1218       // Assumes don't actually alias anything or throw
1219       return true;
1220 
1221     // Handle simple cases by querying alias analysis.
1222     MemoryEffects Behavior = AA->getMemoryEffects(CI);
1223 
1224     // FIXME: we don't handle the semantics of thread local well. So that the
1225     // address of thread locals are fake constants in coroutines. So We forbid
1226     // to treat onlyReadsMemory call in coroutines as constants now. Note that
1227     // it is possible to hide a thread local access in a onlyReadsMemory call.
1228     // Remove this check after we handle the semantics of thread locals well.
1229     if (Behavior.onlyReadsMemory() && CI->getFunction()->isPresplitCoroutine())
1230       return false;
1231 
1232     if (Behavior.doesNotAccessMemory())
1233       return true;
1234     if (Behavior.onlyReadsMemory()) {
1235       // A readonly argmemonly function only reads from memory pointed to by
1236       // it's arguments with arbitrary offsets.  If we can prove there are no
1237       // writes to this memory in the loop, we can hoist or sink.
1238       if (Behavior.onlyAccessesArgPointees()) {
1239         // TODO: expand to writeable arguments
1240         for (Value *Op : CI->args())
1241           if (Op->getType()->isPointerTy() &&
1242               pointerInvalidatedByLoop(
1243                   MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
1244                   Flags, /*InvariantGroup=*/false))
1245             return false;
1246         return true;
1247       }
1248 
1249       // If this call only reads from memory and there are no writes to memory
1250       // in the loop, we can hoist or sink the call as appropriate.
1251       if (isReadOnly(MSSAU, CurLoop))
1252         return true;
1253     }
1254 
1255     // FIXME: This should use mod/ref information to see if we can hoist or
1256     // sink the call.
1257 
1258     return false;
1259   } else if (auto *FI = dyn_cast<FenceInst>(&I)) {
1260     // Fences alias (most) everything to provide ordering.  For the moment,
1261     // just give up if there are any other memory operations in the loop.
1262     return isOnlyMemoryAccess(FI, CurLoop, MSSAU);
1263   } else if (auto *SI = dyn_cast<StoreInst>(&I)) {
1264     if (!SI->isUnordered())
1265       return false; // Don't sink/hoist volatile or ordered atomic store!
1266 
1267     // We can only hoist a store that we can prove writes a value which is not
1268     // read or overwritten within the loop.  For those cases, we fallback to
1269     // load store promotion instead.  TODO: We can extend this to cases where
1270     // there is exactly one write to the location and that write dominates an
1271     // arbitrary number of reads in the loop.
1272     if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
1273       return true;
1274     // If there are more accesses than the Promotion cap, then give up as we're
1275     // not walking a list that long.
1276     if (Flags.tooManyMemoryAccesses())
1277       return false;
1278 
1279     auto *SIMD = MSSA->getMemoryAccess(SI);
1280     BatchAAResults BAA(*AA);
1281     auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, SIMD);
1282     // Make sure there are no clobbers inside the loop.
1283     if (!MSSA->isLiveOnEntryDef(Source) &&
1284            CurLoop->contains(Source->getBlock()))
1285       return false;
1286 
1287     // If there are interfering Uses (i.e. their defining access is in the
1288     // loop), or ordered loads (stored as Defs!), don't move this store.
1289     // Could do better here, but this is conservatively correct.
1290     // TODO: Cache set of Uses on the first walk in runOnLoop, update when
1291     // moving accesses. Can also extend to dominating uses.
1292     for (auto *BB : CurLoop->getBlocks())
1293       if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
1294         for (const auto &MA : *Accesses)
1295           if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
1296             auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
1297                 const_cast<MemoryUse *>(MU));
1298             if (!MSSA->isLiveOnEntryDef(MD) &&
1299                 CurLoop->contains(MD->getBlock()))
1300               return false;
1301             // Disable hoisting past potentially interfering loads. Optimized
1302             // Uses may point to an access outside the loop, as getClobbering
1303             // checks the previous iteration when walking the backedge.
1304             // FIXME: More precise: no Uses that alias SI.
1305             if (!Flags.getIsSink() && !MSSA->dominates(SIMD, MU))
1306               return false;
1307           } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
1308             if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
1309               (void)LI; // Silence warning.
1310               assert(!LI->isUnordered() && "Expected unordered load");
1311               return false;
1312             }
1313             // Any call, while it may not be clobbering SI, it may be a use.
1314             if (auto *CI = dyn_cast<CallInst>(MD->getMemoryInst())) {
1315               // Check if the call may read from the memory location written
1316               // to by SI. Check CI's attributes and arguments; the number of
1317               // such checks performed is limited above by NoOfMemAccTooLarge.
1318               ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
1319               if (isModOrRefSet(MRI))
1320                 return false;
1321             }
1322           }
1323       }
1324     return true;
1325   }
1326 
1327   assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
1328 
1329   // We've established mechanical ability and aliasing, it's up to the caller
1330   // to check fault safety
1331   return true;
1332 }
1333 
1334 /// Returns true if a PHINode is a trivially replaceable with an
1335 /// Instruction.
1336 /// This is true when all incoming values are that instruction.
1337 /// This pattern occurs most often with LCSSA PHI nodes.
1338 ///
1339 static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
1340   for (const Value *IncValue : PN.incoming_values())
1341     if (IncValue != &I)
1342       return false;
1343 
1344   return true;
1345 }
1346 
1347 /// Return true if the instruction is foldable in the loop.
1348 static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1349                          const TargetTransformInfo *TTI) {
1350   if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1351     InstructionCost CostI =
1352         TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
1353     if (CostI != TargetTransformInfo::TCC_Free)
1354       return false;
1355     // For a GEP, we cannot simply use getInstructionCost because currently
1356     // it optimistically assumes that a GEP will fold into addressing mode
1357     // regardless of its users.
1358     const BasicBlock *BB = GEP->getParent();
1359     for (const User *U : GEP->users()) {
1360       const Instruction *UI = cast<Instruction>(U);
1361       if (CurLoop->contains(UI) &&
1362           (BB != UI->getParent() ||
1363            (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
1364         return false;
1365     }
1366     return true;
1367   }
1368 
1369   return false;
1370 }
1371 
1372 /// Return true if the only users of this instruction are outside of
1373 /// the loop. If this is true, we can sink the instruction to the exit
1374 /// blocks of the loop.
1375 ///
1376 /// We also return true if the instruction could be folded away in lowering.
1377 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
1378 static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
1379                                       const LoopSafetyInfo *SafetyInfo,
1380                                       TargetTransformInfo *TTI,
1381                                       bool &FoldableInLoop, bool LoopNestMode) {
1382   const auto &BlockColors = SafetyInfo->getBlockColors();
1383   bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
1384   for (const User *U : I.users()) {
1385     const Instruction *UI = cast<Instruction>(U);
1386     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
1387       const BasicBlock *BB = PN->getParent();
1388       // We cannot sink uses in catchswitches.
1389       if (isa<CatchSwitchInst>(BB->getTerminator()))
1390         return false;
1391 
1392       // We need to sink a callsite to a unique funclet.  Avoid sinking if the
1393       // phi use is too muddled.
1394       if (isa<CallInst>(I))
1395         if (!BlockColors.empty() &&
1396             BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
1397           return false;
1398 
1399       if (LoopNestMode) {
1400         while (isa<PHINode>(UI) && UI->hasOneUser() &&
1401                UI->getNumOperands() == 1) {
1402           if (!CurLoop->contains(UI))
1403             break;
1404           UI = cast<Instruction>(UI->user_back());
1405         }
1406       }
1407     }
1408 
1409     if (CurLoop->contains(UI)) {
1410       if (IsFoldable) {
1411         FoldableInLoop = true;
1412         continue;
1413       }
1414       return false;
1415     }
1416   }
1417   return true;
1418 }
1419 
1420 static Instruction *cloneInstructionInExitBlock(
1421     Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
1422     const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU) {
1423   Instruction *New;
1424   if (auto *CI = dyn_cast<CallInst>(&I)) {
1425     const auto &BlockColors = SafetyInfo->getBlockColors();
1426 
1427     // Sinking call-sites need to be handled differently from other
1428     // instructions.  The cloned call-site needs a funclet bundle operand
1429     // appropriate for its location in the CFG.
1430     SmallVector<OperandBundleDef, 1> OpBundles;
1431     for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles();
1432          BundleIdx != BundleEnd; ++BundleIdx) {
1433       OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx);
1434       if (Bundle.getTagID() == LLVMContext::OB_funclet)
1435         continue;
1436 
1437       OpBundles.emplace_back(Bundle);
1438     }
1439 
1440     if (!BlockColors.empty()) {
1441       const ColorVector &CV = BlockColors.find(&ExitBlock)->second;
1442       assert(CV.size() == 1 && "non-unique color for exit block!");
1443       BasicBlock *BBColor = CV.front();
1444       Instruction *EHPad = BBColor->getFirstNonPHI();
1445       if (EHPad->isEHPad())
1446         OpBundles.emplace_back("funclet", EHPad);
1447     }
1448 
1449     New = CallInst::Create(CI, OpBundles);
1450   } else {
1451     New = I.clone();
1452   }
1453 
1454   New->insertInto(&ExitBlock, ExitBlock.getFirstInsertionPt());
1455   if (!I.getName().empty())
1456     New->setName(I.getName() + ".le");
1457 
1458   if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
1459     // Create a new MemoryAccess and let MemorySSA set its defining access.
1460     MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
1461         New, nullptr, New->getParent(), MemorySSA::Beginning);
1462     if (NewMemAcc) {
1463       if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
1464         MSSAU.insertDef(MemDef, /*RenameUses=*/true);
1465       else {
1466         auto *MemUse = cast<MemoryUse>(NewMemAcc);
1467         MSSAU.insertUse(MemUse, /*RenameUses=*/true);
1468       }
1469     }
1470   }
1471 
1472   // Build LCSSA PHI nodes for any in-loop operands (if legal).  Note that
1473   // this is particularly cheap because we can rip off the PHI node that we're
1474   // replacing for the number and blocks of the predecessors.
1475   // OPT: If this shows up in a profile, we can instead finish sinking all
1476   // invariant instructions, and then walk their operands to re-establish
1477   // LCSSA. That will eliminate creating PHI nodes just to nuke them when
1478   // sinking bottom-up.
1479   for (Use &Op : New->operands())
1480     if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) {
1481       auto *OInst = cast<Instruction>(Op.get());
1482       PHINode *OpPN =
1483         PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
1484                         OInst->getName() + ".lcssa", &ExitBlock.front());
1485       for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1486         OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
1487       Op = OpPN;
1488     }
1489   return New;
1490 }
1491 
1492 static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
1493                              MemorySSAUpdater &MSSAU) {
1494   MSSAU.removeMemoryAccess(&I);
1495   SafetyInfo.removeInstruction(&I);
1496   I.eraseFromParent();
1497 }
1498 
1499 static void moveInstructionBefore(Instruction &I, Instruction &Dest,
1500                                   ICFLoopSafetyInfo &SafetyInfo,
1501                                   MemorySSAUpdater &MSSAU,
1502                                   ScalarEvolution *SE) {
1503   SafetyInfo.removeInstruction(&I);
1504   SafetyInfo.insertInstructionTo(&I, Dest.getParent());
1505   I.moveBefore(&Dest);
1506   if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
1507           MSSAU.getMemorySSA()->getMemoryAccess(&I)))
1508     MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator);
1509   if (SE)
1510     SE->forgetBlockAndLoopDispositions(&I);
1511 }
1512 
1513 static Instruction *sinkThroughTriviallyReplaceablePHI(
1514     PHINode *TPN, Instruction *I, LoopInfo *LI,
1515     SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
1516     const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop,
1517     MemorySSAUpdater &MSSAU) {
1518   assert(isTriviallyReplaceablePHI(*TPN, *I) &&
1519          "Expect only trivially replaceable PHI");
1520   BasicBlock *ExitBlock = TPN->getParent();
1521   Instruction *New;
1522   auto It = SunkCopies.find(ExitBlock);
1523   if (It != SunkCopies.end())
1524     New = It->second;
1525   else
1526     New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock(
1527         *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU);
1528   return New;
1529 }
1530 
1531 static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
1532   BasicBlock *BB = PN->getParent();
1533   if (!BB->canSplitPredecessors())
1534     return false;
1535   // It's not impossible to split EHPad blocks, but if BlockColors already exist
1536   // it require updating BlockColors for all offspring blocks accordingly. By
1537   // skipping such corner case, we can make updating BlockColors after splitting
1538   // predecessor fairly simple.
1539   if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
1540     return false;
1541   for (BasicBlock *BBPred : predecessors(BB)) {
1542     if (isa<IndirectBrInst>(BBPred->getTerminator()))
1543       return false;
1544   }
1545   return true;
1546 }
1547 
1548 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
1549                                         LoopInfo *LI, const Loop *CurLoop,
1550                                         LoopSafetyInfo *SafetyInfo,
1551                                         MemorySSAUpdater *MSSAU) {
1552 #ifndef NDEBUG
1553   SmallVector<BasicBlock *, 32> ExitBlocks;
1554   CurLoop->getUniqueExitBlocks(ExitBlocks);
1555   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1556                                              ExitBlocks.end());
1557 #endif
1558   BasicBlock *ExitBB = PN->getParent();
1559   assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block.");
1560 
1561   // Split predecessors of the loop exit to make instructions in the loop are
1562   // exposed to exit blocks through trivially replaceable PHIs while keeping the
1563   // loop in the canonical form where each predecessor of each exit block should
1564   // be contained within the loop. For example, this will convert the loop below
1565   // from
1566   //
1567   // LB1:
1568   //   %v1 =
1569   //   br %LE, %LB2
1570   // LB2:
1571   //   %v2 =
1572   //   br %LE, %LB1
1573   // LE:
1574   //   %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable
1575   //
1576   // to
1577   //
1578   // LB1:
1579   //   %v1 =
1580   //   br %LE.split, %LB2
1581   // LB2:
1582   //   %v2 =
1583   //   br %LE.split2, %LB1
1584   // LE.split:
1585   //   %p1 = phi [%v1, %LB1]  <-- trivially replaceable
1586   //   br %LE
1587   // LE.split2:
1588   //   %p2 = phi [%v2, %LB2]  <-- trivially replaceable
1589   //   br %LE
1590   // LE:
1591   //   %p = phi [%p1, %LE.split], [%p2, %LE.split2]
1592   //
1593   const auto &BlockColors = SafetyInfo->getBlockColors();
1594   SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
1595   while (!PredBBs.empty()) {
1596     BasicBlock *PredBB = *PredBBs.begin();
1597     assert(CurLoop->contains(PredBB) &&
1598            "Expect all predecessors are in the loop");
1599     if (PN->getBasicBlockIndex(PredBB) >= 0) {
1600       BasicBlock *NewPred = SplitBlockPredecessors(
1601           ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true);
1602       // Since we do not allow splitting EH-block with BlockColors in
1603       // canSplitPredecessors(), we can simply assign predecessor's color to
1604       // the new block.
1605       if (!BlockColors.empty())
1606         // Grab a reference to the ColorVector to be inserted before getting the
1607         // reference to the vector we are copying because inserting the new
1608         // element in BlockColors might cause the map to be reallocated.
1609         SafetyInfo->copyColors(NewPred, PredBB);
1610     }
1611     PredBBs.remove(PredBB);
1612   }
1613 }
1614 
1615 /// When an instruction is found to only be used outside of the loop, this
1616 /// function moves it to the exit blocks and patches up SSA form as needed.
1617 /// This method is guaranteed to remove the original instruction from its
1618 /// position, and may either delete it or move it to outside of the loop.
1619 ///
1620 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
1621                  const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo,
1622                  MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE) {
1623   bool Changed = false;
1624   LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
1625 
1626   // Iterate over users to be ready for actual sinking. Replace users via
1627   // unreachable blocks with undef and make all user PHIs trivially replaceable.
1628   SmallPtrSet<Instruction *, 8> VisitedUsers;
1629   for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
1630     auto *User = cast<Instruction>(*UI);
1631     Use &U = UI.getUse();
1632     ++UI;
1633 
1634     if (VisitedUsers.count(User) || CurLoop->contains(User))
1635       continue;
1636 
1637     if (!DT->isReachableFromEntry(User->getParent())) {
1638       U = PoisonValue::get(I.getType());
1639       Changed = true;
1640       continue;
1641     }
1642 
1643     // The user must be a PHI node.
1644     PHINode *PN = cast<PHINode>(User);
1645 
1646     // Surprisingly, instructions can be used outside of loops without any
1647     // exits.  This can only happen in PHI nodes if the incoming block is
1648     // unreachable.
1649     BasicBlock *BB = PN->getIncomingBlock(U);
1650     if (!DT->isReachableFromEntry(BB)) {
1651       U = PoisonValue::get(I.getType());
1652       Changed = true;
1653       continue;
1654     }
1655 
1656     VisitedUsers.insert(PN);
1657     if (isTriviallyReplaceablePHI(*PN, I))
1658       continue;
1659 
1660     if (!canSplitPredecessors(PN, SafetyInfo))
1661       return Changed;
1662 
1663     // Split predecessors of the PHI so that we can make users trivially
1664     // replaceable.
1665     splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, &MSSAU);
1666 
1667     // Should rebuild the iterators, as they may be invalidated by
1668     // splitPredecessorsOfLoopExit().
1669     UI = I.user_begin();
1670     UE = I.user_end();
1671   }
1672 
1673   if (VisitedUsers.empty())
1674     return Changed;
1675 
1676   ORE->emit([&]() {
1677     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
1678            << "sinking " << ore::NV("Inst", &I);
1679   });
1680   if (isa<LoadInst>(I))
1681     ++NumMovedLoads;
1682   else if (isa<CallInst>(I))
1683     ++NumMovedCalls;
1684   ++NumSunk;
1685 
1686 #ifndef NDEBUG
1687   SmallVector<BasicBlock *, 32> ExitBlocks;
1688   CurLoop->getUniqueExitBlocks(ExitBlocks);
1689   SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
1690                                              ExitBlocks.end());
1691 #endif
1692 
1693   // Clones of this instruction. Don't create more than one per exit block!
1694   SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
1695 
1696   // If this instruction is only used outside of the loop, then all users are
1697   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
1698   // the instruction.
1699   // First check if I is worth sinking for all uses. Sink only when it is worth
1700   // across all uses.
1701   SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
1702   for (auto *UI : Users) {
1703     auto *User = cast<Instruction>(UI);
1704 
1705     if (CurLoop->contains(User))
1706       continue;
1707 
1708     PHINode *PN = cast<PHINode>(User);
1709     assert(ExitBlockSet.count(PN->getParent()) &&
1710            "The LCSSA PHI is not in an exit block!");
1711 
1712     // The PHI must be trivially replaceable.
1713     Instruction *New = sinkThroughTriviallyReplaceablePHI(
1714         PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
1715     // As we sink the instruction out of the BB, drop its debug location.
1716     New->dropLocation();
1717     PN->replaceAllUsesWith(New);
1718     eraseInstruction(*PN, *SafetyInfo, MSSAU);
1719     Changed = true;
1720   }
1721   return Changed;
1722 }
1723 
1724 /// When an instruction is found to only use loop invariant operands that
1725 /// is safe to hoist, this instruction is called to do the dirty work.
1726 ///
1727 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
1728                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
1729                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1730                   OptimizationRemarkEmitter *ORE) {
1731   LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": "
1732                     << I << "\n");
1733   ORE->emit([&]() {
1734     return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting "
1735                                                          << ore::NV("Inst", &I);
1736   });
1737 
1738   // Metadata can be dependent on conditions we are hoisting above.
1739   // Conservatively strip all metadata on the instruction unless we were
1740   // guaranteed to execute I if we entered the loop, in which case the metadata
1741   // is valid in the loop preheader.
1742   // Similarly, If I is a call and it is not guaranteed to execute in the loop,
1743   // then moving to the preheader means we should strip attributes on the call
1744   // that can cause UB since we may be hoisting above conditions that allowed
1745   // inferring those attributes. They may not be valid at the preheader.
1746   if ((I.hasMetadataOtherThanDebugLoc() || isa<CallInst>(I)) &&
1747       // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning
1748       // time in isGuaranteedToExecute if we don't actually have anything to
1749       // drop.  It is a compile time optimization, not required for correctness.
1750       !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
1751     I.dropUBImplyingAttrsAndMetadata();
1752 
1753   if (isa<PHINode>(I))
1754     // Move the new node to the end of the phi list in the destination block.
1755     moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE);
1756   else
1757     // Move the new node to the destination block, before its terminator.
1758     moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE);
1759 
1760   I.updateLocationAfterHoist();
1761 
1762   if (isa<LoadInst>(I))
1763     ++NumMovedLoads;
1764   else if (isa<CallInst>(I))
1765     ++NumMovedCalls;
1766   ++NumHoisted;
1767 }
1768 
1769 /// Only sink or hoist an instruction if it is not a trapping instruction,
1770 /// or if the instruction is known not to trap when moved to the preheader.
1771 /// or if it is a trapping instruction and is guaranteed to execute.
1772 static bool isSafeToExecuteUnconditionally(
1773     Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
1774     const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
1775     OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
1776     AssumptionCache *AC, bool AllowSpeculation) {
1777   if (AllowSpeculation &&
1778       isSafeToSpeculativelyExecute(&Inst, CtxI, AC, DT, TLI))
1779     return true;
1780 
1781   bool GuaranteedToExecute =
1782       SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop);
1783 
1784   if (!GuaranteedToExecute) {
1785     auto *LI = dyn_cast<LoadInst>(&Inst);
1786     if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand()))
1787       ORE->emit([&]() {
1788         return OptimizationRemarkMissed(
1789                    DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI)
1790                << "failed to hoist load with loop-invariant address "
1791                   "because load is conditionally executed";
1792       });
1793   }
1794 
1795   return GuaranteedToExecute;
1796 }
1797 
1798 namespace {
1799 class LoopPromoter : public LoadAndStorePromoter {
1800   Value *SomePtr; // Designated pointer to store to.
1801   SmallVectorImpl<BasicBlock *> &LoopExitBlocks;
1802   SmallVectorImpl<Instruction *> &LoopInsertPts;
1803   SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
1804   PredIteratorCache &PredCache;
1805   MemorySSAUpdater &MSSAU;
1806   LoopInfo &LI;
1807   DebugLoc DL;
1808   Align Alignment;
1809   bool UnorderedAtomic;
1810   AAMDNodes AATags;
1811   ICFLoopSafetyInfo &SafetyInfo;
1812   bool CanInsertStoresInExitBlocks;
1813   ArrayRef<const Instruction *> Uses;
1814 
1815   // We're about to add a use of V in a loop exit block.  Insert an LCSSA phi
1816   // (if legal) if doing so would add an out-of-loop use to an instruction
1817   // defined in-loop.
1818   Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
1819     if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB))
1820       return V;
1821 
1822     Instruction *I = cast<Instruction>(V);
1823     // We need to create an LCSSA PHI node for the incoming value and
1824     // store that.
1825     PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB),
1826                                   I->getName() + ".lcssa", &BB->front());
1827     for (BasicBlock *Pred : PredCache.get(BB))
1828       PN->addIncoming(I, Pred);
1829     return PN;
1830   }
1831 
1832 public:
1833   LoopPromoter(Value *SP, ArrayRef<const Instruction *> Insts, SSAUpdater &S,
1834                SmallVectorImpl<BasicBlock *> &LEB,
1835                SmallVectorImpl<Instruction *> &LIP,
1836                SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
1837                MemorySSAUpdater &MSSAU, LoopInfo &li, DebugLoc dl,
1838                Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
1839                ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
1840       : LoadAndStorePromoter(Insts, S), SomePtr(SP), LoopExitBlocks(LEB),
1841         LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU),
1842         LI(li), DL(std::move(dl)), Alignment(Alignment),
1843         UnorderedAtomic(UnorderedAtomic), AATags(AATags),
1844         SafetyInfo(SafetyInfo),
1845         CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks), Uses(Insts) {}
1846 
1847   void insertStoresInLoopExitBlocks() {
1848     // Insert stores after in the loop exit blocks.  Each exit block gets a
1849     // store of the live-out values that feed them.  Since we've already told
1850     // the SSA updater about the defs in the loop and the preheader
1851     // definition, it is all set and we can start using it.
1852     DIAssignID *NewID = nullptr;
1853     for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
1854       BasicBlock *ExitBlock = LoopExitBlocks[i];
1855       Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
1856       LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock);
1857       Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
1858       Instruction *InsertPos = LoopInsertPts[i];
1859       StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
1860       if (UnorderedAtomic)
1861         NewSI->setOrdering(AtomicOrdering::Unordered);
1862       NewSI->setAlignment(Alignment);
1863       NewSI->setDebugLoc(DL);
1864       // Attach DIAssignID metadata to the new store, generating it on the
1865       // first loop iteration.
1866       if (i == 0) {
1867         // NewSI will have its DIAssignID set here if there are any stores in
1868         // Uses with a DIAssignID attachment. This merged ID will then be
1869         // attached to the other inserted stores (in the branch below).
1870         NewSI->mergeDIAssignID(Uses);
1871         NewID = cast_or_null<DIAssignID>(
1872             NewSI->getMetadata(LLVMContext::MD_DIAssignID));
1873       } else {
1874         // Attach the DIAssignID (or nullptr) merged from Uses in the branch
1875         // above.
1876         NewSI->setMetadata(LLVMContext::MD_DIAssignID, NewID);
1877       }
1878 
1879       if (AATags)
1880         NewSI->setAAMetadata(AATags);
1881 
1882       MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
1883       MemoryAccess *NewMemAcc;
1884       if (!MSSAInsertPoint) {
1885         NewMemAcc = MSSAU.createMemoryAccessInBB(
1886             NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
1887       } else {
1888         NewMemAcc =
1889             MSSAU.createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
1890       }
1891       MSSAInsertPts[i] = NewMemAcc;
1892       MSSAU.insertDef(cast<MemoryDef>(NewMemAcc), true);
1893       // FIXME: true for safety, false may still be correct.
1894     }
1895   }
1896 
1897   void doExtraRewritesBeforeFinalDeletion() override {
1898     if (CanInsertStoresInExitBlocks)
1899       insertStoresInLoopExitBlocks();
1900   }
1901 
1902   void instructionDeleted(Instruction *I) const override {
1903     SafetyInfo.removeInstruction(I);
1904     MSSAU.removeMemoryAccess(I);
1905   }
1906 
1907   bool shouldDelete(Instruction *I) const override {
1908     if (isa<StoreInst>(I))
1909       return CanInsertStoresInExitBlocks;
1910     return true;
1911   }
1912 };
1913 
1914 bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
1915                                  DominatorTree *DT) {
1916   // We can perform the captured-before check against any instruction in the
1917   // loop header, as the loop header is reachable from any instruction inside
1918   // the loop.
1919   // TODO: ReturnCaptures=true shouldn't be necessary here.
1920   return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true,
1921                                      /* StoreCaptures */ true,
1922                                      L->getHeader()->getTerminator(), DT);
1923 }
1924 
1925 /// Return true if we can prove that a caller cannot inspect the object if an
1926 /// unwind occurs inside the loop.
1927 bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
1928                                 DominatorTree *DT) {
1929   bool RequiresNoCaptureBeforeUnwind;
1930   if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
1931     return false;
1932 
1933   return !RequiresNoCaptureBeforeUnwind ||
1934          isNotCapturedBeforeOrInLoop(Object, L, DT);
1935 }
1936 
1937 // We don't consider globals as writable: While the physical memory is writable,
1938 // we may not have provenance to perform the write.
1939 bool isWritableObject(const Value *Object) {
1940   // TODO: Alloca might not be writable after its lifetime ends.
1941   // See https://github.com/llvm/llvm-project/issues/51838.
1942   if (isa<AllocaInst>(Object))
1943     return true;
1944 
1945   // TODO: Also handle sret.
1946   if (auto *A = dyn_cast<Argument>(Object))
1947     return A->hasByValAttr();
1948 
1949   // TODO: Noalias has nothing to do with writability, this should check for
1950   // an allocator function.
1951   return isNoAliasCall(Object);
1952 }
1953 
1954 bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
1955                          TargetTransformInfo *TTI) {
1956   // The object must be function-local to start with, and then not captured
1957   // before/in the loop.
1958   return (isIdentifiedFunctionLocal(Object) &&
1959           isNotCapturedBeforeOrInLoop(Object, L, DT)) ||
1960          (TTI->isSingleThreaded() || SingleThread);
1961 }
1962 
1963 } // namespace
1964 
1965 /// Try to promote memory values to scalars by sinking stores out of the
1966 /// loop and moving loads to before the loop.  We do this by looping over
1967 /// the stores in the loop, looking for stores to Must pointers which are
1968 /// loop invariant.
1969 ///
1970 bool llvm::promoteLoopAccessesToScalars(
1971     const SmallSetVector<Value *, 8> &PointerMustAliases,
1972     SmallVectorImpl<BasicBlock *> &ExitBlocks,
1973     SmallVectorImpl<Instruction *> &InsertPts,
1974     SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
1975     LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1976     const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
1977     MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
1978     OptimizationRemarkEmitter *ORE, bool AllowSpeculation,
1979     bool HasReadsOutsideSet) {
1980   // Verify inputs.
1981   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
1982          SafetyInfo != nullptr &&
1983          "Unexpected Input to promoteLoopAccessesToScalars");
1984 
1985   LLVM_DEBUG({
1986     dbgs() << "Trying to promote set of must-aliased pointers:\n";
1987     for (Value *Ptr : PointerMustAliases)
1988       dbgs() << "  " << *Ptr << "\n";
1989   });
1990   ++NumPromotionCandidates;
1991 
1992   Value *SomePtr = *PointerMustAliases.begin();
1993   BasicBlock *Preheader = CurLoop->getLoopPreheader();
1994 
1995   // It is not safe to promote a load/store from the loop if the load/store is
1996   // conditional.  For example, turning:
1997   //
1998   //    for () { if (c) *P += 1; }
1999   //
2000   // into:
2001   //
2002   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
2003   //
2004   // is not safe, because *P may only be valid to access if 'c' is true.
2005   //
2006   // The safety property divides into two parts:
2007   // p1) The memory may not be dereferenceable on entry to the loop.  In this
2008   //    case, we can't insert the required load in the preheader.
2009   // p2) The memory model does not allow us to insert a store along any dynamic
2010   //    path which did not originally have one.
2011   //
2012   // If at least one store is guaranteed to execute, both properties are
2013   // satisfied, and promotion is legal.
2014   //
2015   // This, however, is not a necessary condition. Even if no store/load is
2016   // guaranteed to execute, we can still establish these properties.
2017   // We can establish (p1) by proving that hoisting the load into the preheader
2018   // is safe (i.e. proving dereferenceability on all paths through the loop). We
2019   // can use any access within the alias set to prove dereferenceability,
2020   // since they're all must alias.
2021   //
2022   // There are two ways establish (p2):
2023   // a) Prove the location is thread-local. In this case the memory model
2024   // requirement does not apply, and stores are safe to insert.
2025   // b) Prove a store dominates every exit block. In this case, if an exit
2026   // blocks is reached, the original dynamic path would have taken us through
2027   // the store, so inserting a store into the exit block is safe. Note that this
2028   // is different from the store being guaranteed to execute. For instance,
2029   // if an exception is thrown on the first iteration of the loop, the original
2030   // store is never executed, but the exit blocks are not executed either.
2031 
2032   bool DereferenceableInPH = false;
2033   bool StoreIsGuanteedToExecute = false;
2034   bool FoundLoadToPromote = false;
2035   // Goes from Unknown to either Safe or Unsafe, but can't switch between them.
2036   enum {
2037     StoreSafe,
2038     StoreUnsafe,
2039     StoreSafetyUnknown,
2040   } StoreSafety = StoreSafetyUnknown;
2041 
2042   SmallVector<Instruction *, 64> LoopUses;
2043 
2044   // We start with an alignment of one and try to find instructions that allow
2045   // us to prove better alignment.
2046   Align Alignment;
2047   // Keep track of which types of access we see
2048   bool SawUnorderedAtomic = false;
2049   bool SawNotAtomic = false;
2050   AAMDNodes AATags;
2051 
2052   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
2053 
2054   // If there are reads outside the promoted set, then promoting stores is
2055   // definitely not safe.
2056   if (HasReadsOutsideSet)
2057     StoreSafety = StoreUnsafe;
2058 
2059   if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) {
2060     // If a loop can throw, we have to insert a store along each unwind edge.
2061     // That said, we can't actually make the unwind edge explicit. Therefore,
2062     // we have to prove that the store is dead along the unwind edge.  We do
2063     // this by proving that the caller can't have a reference to the object
2064     // after return and thus can't possibly load from the object.
2065     Value *Object = getUnderlyingObject(SomePtr);
2066     if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
2067       StoreSafety = StoreUnsafe;
2068   }
2069 
2070   // Check that all accesses to pointers in the alias set use the same type.
2071   // We cannot (yet) promote a memory location that is loaded and stored in
2072   // different sizes.  While we are at it, collect alignment and AA info.
2073   Type *AccessTy = nullptr;
2074   for (Value *ASIV : PointerMustAliases) {
2075     for (Use &U : ASIV->uses()) {
2076       // Ignore instructions that are outside the loop.
2077       Instruction *UI = dyn_cast<Instruction>(U.getUser());
2078       if (!UI || !CurLoop->contains(UI))
2079         continue;
2080 
2081       // If there is an non-load/store instruction in the loop, we can't promote
2082       // it.
2083       if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
2084         if (!Load->isUnordered())
2085           return false;
2086 
2087         SawUnorderedAtomic |= Load->isAtomic();
2088         SawNotAtomic |= !Load->isAtomic();
2089         FoundLoadToPromote = true;
2090 
2091         Align InstAlignment = Load->getAlign();
2092 
2093         // Note that proving a load safe to speculate requires proving
2094         // sufficient alignment at the target location.  Proving it guaranteed
2095         // to execute does as well.  Thus we can increase our guaranteed
2096         // alignment as well.
2097         if (!DereferenceableInPH || (InstAlignment > Alignment))
2098           if (isSafeToExecuteUnconditionally(
2099                   *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
2100                   Preheader->getTerminator(), AC, AllowSpeculation)) {
2101             DereferenceableInPH = true;
2102             Alignment = std::max(Alignment, InstAlignment);
2103           }
2104       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
2105         // Stores *of* the pointer are not interesting, only stores *to* the
2106         // pointer.
2107         if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
2108           continue;
2109         if (!Store->isUnordered())
2110           return false;
2111 
2112         SawUnorderedAtomic |= Store->isAtomic();
2113         SawNotAtomic |= !Store->isAtomic();
2114 
2115         // If the store is guaranteed to execute, both properties are satisfied.
2116         // We may want to check if a store is guaranteed to execute even if we
2117         // already know that promotion is safe, since it may have higher
2118         // alignment than any other guaranteed stores, in which case we can
2119         // raise the alignment on the promoted store.
2120         Align InstAlignment = Store->getAlign();
2121         bool GuaranteedToExecute =
2122             SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop);
2123         StoreIsGuanteedToExecute |= GuaranteedToExecute;
2124         if (GuaranteedToExecute) {
2125           DereferenceableInPH = true;
2126           if (StoreSafety == StoreSafetyUnknown)
2127             StoreSafety = StoreSafe;
2128           Alignment = std::max(Alignment, InstAlignment);
2129         }
2130 
2131         // If a store dominates all exit blocks, it is safe to sink.
2132         // As explained above, if an exit block was executed, a dominating
2133         // store must have been executed at least once, so we are not
2134         // introducing stores on paths that did not have them.
2135         // Note that this only looks at explicit exit blocks. If we ever
2136         // start sinking stores into unwind edges (see above), this will break.
2137         if (StoreSafety == StoreSafetyUnknown &&
2138             llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
2139               return DT->dominates(Store->getParent(), Exit);
2140             }))
2141           StoreSafety = StoreSafe;
2142 
2143         // If the store is not guaranteed to execute, we may still get
2144         // deref info through it.
2145         if (!DereferenceableInPH) {
2146           DereferenceableInPH = isDereferenceableAndAlignedPointer(
2147               Store->getPointerOperand(), Store->getValueOperand()->getType(),
2148               Store->getAlign(), MDL, Preheader->getTerminator(), AC, DT, TLI);
2149         }
2150       } else
2151         continue; // Not a load or store.
2152 
2153       if (!AccessTy)
2154         AccessTy = getLoadStoreType(UI);
2155       else if (AccessTy != getLoadStoreType(UI))
2156         return false;
2157 
2158       // Merge the AA tags.
2159       if (LoopUses.empty()) {
2160         // On the first load/store, just take its AA tags.
2161         AATags = UI->getAAMetadata();
2162       } else if (AATags) {
2163         AATags = AATags.merge(UI->getAAMetadata());
2164       }
2165 
2166       LoopUses.push_back(UI);
2167     }
2168   }
2169 
2170   // If we found both an unordered atomic instruction and a non-atomic memory
2171   // access, bail.  We can't blindly promote non-atomic to atomic since we
2172   // might not be able to lower the result.  We can't downgrade since that
2173   // would violate memory model.  Also, align 0 is an error for atomics.
2174   if (SawUnorderedAtomic && SawNotAtomic)
2175     return false;
2176 
2177   // If we're inserting an atomic load in the preheader, we must be able to
2178   // lower it.  We're only guaranteed to be able to lower naturally aligned
2179   // atomics.
2180   if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
2181     return false;
2182 
2183   // If we couldn't prove we can hoist the load, bail.
2184   if (!DereferenceableInPH) {
2185     LLVM_DEBUG(dbgs() << "Not promoting: Not dereferenceable in preheader\n");
2186     return false;
2187   }
2188 
2189   // We know we can hoist the load, but don't have a guaranteed store.
2190   // Check whether the location is writable and thread-local. If it is, then we
2191   // can insert stores along paths which originally didn't have them without
2192   // violating the memory model.
2193   if (StoreSafety == StoreSafetyUnknown) {
2194     Value *Object = getUnderlyingObject(SomePtr);
2195     if (isWritableObject(Object) &&
2196         isThreadLocalObject(Object, CurLoop, DT, TTI))
2197       StoreSafety = StoreSafe;
2198   }
2199 
2200   // If we've still failed to prove we can sink the store, hoist the load
2201   // only, if possible.
2202   if (StoreSafety != StoreSafe && !FoundLoadToPromote)
2203     // If we cannot hoist the load either, give up.
2204     return false;
2205 
2206   // Lets do the promotion!
2207   if (StoreSafety == StoreSafe) {
2208     LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
2209                       << '\n');
2210     ++NumLoadStorePromoted;
2211   } else {
2212     LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
2213                       << '\n');
2214     ++NumLoadPromoted;
2215   }
2216 
2217   ORE->emit([&]() {
2218     return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
2219                               LoopUses[0])
2220            << "Moving accesses to memory location out of the loop";
2221   });
2222 
2223   // Look at all the loop uses, and try to merge their locations.
2224   std::vector<DILocation *> LoopUsesLocs;
2225   for (auto *U : LoopUses)
2226     LoopUsesLocs.push_back(U->getDebugLoc().get());
2227   auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
2228 
2229   // We use the SSAUpdater interface to insert phi nodes as required.
2230   SmallVector<PHINode *, 16> NewPHIs;
2231   SSAUpdater SSA(&NewPHIs);
2232   LoopPromoter Promoter(SomePtr, LoopUses, SSA, ExitBlocks, InsertPts,
2233                         MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment,
2234                         SawUnorderedAtomic, AATags, *SafetyInfo,
2235                         StoreSafety == StoreSafe);
2236 
2237   // Set up the preheader to have a definition of the value.  It is the live-out
2238   // value from the preheader that uses in the loop will use.
2239   LoadInst *PreheaderLoad = nullptr;
2240   if (FoundLoadToPromote || !StoreIsGuanteedToExecute) {
2241     PreheaderLoad =
2242         new LoadInst(AccessTy, SomePtr, SomePtr->getName() + ".promoted",
2243                      Preheader->getTerminator());
2244     if (SawUnorderedAtomic)
2245       PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
2246     PreheaderLoad->setAlignment(Alignment);
2247     PreheaderLoad->setDebugLoc(DebugLoc());
2248     if (AATags)
2249       PreheaderLoad->setAAMetadata(AATags);
2250 
2251     MemoryAccess *PreheaderLoadMemoryAccess = MSSAU.createMemoryAccessInBB(
2252         PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
2253     MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
2254     MSSAU.insertUse(NewMemUse, /*RenameUses=*/true);
2255     SSA.AddAvailableValue(Preheader, PreheaderLoad);
2256   } else {
2257     SSA.AddAvailableValue(Preheader, PoisonValue::get(AccessTy));
2258   }
2259 
2260   if (VerifyMemorySSA)
2261     MSSAU.getMemorySSA()->verifyMemorySSA();
2262   // Rewrite all the loads in the loop and remember all the definitions from
2263   // stores in the loop.
2264   Promoter.run(LoopUses);
2265 
2266   if (VerifyMemorySSA)
2267     MSSAU.getMemorySSA()->verifyMemorySSA();
2268   // If the SSAUpdater didn't use the load in the preheader, just zap it now.
2269   if (PreheaderLoad && PreheaderLoad->use_empty())
2270     eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
2271 
2272   return true;
2273 }
2274 
2275 static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
2276                                 function_ref<void(Instruction *)> Fn) {
2277   for (const BasicBlock *BB : L->blocks())
2278     if (const auto *Accesses = MSSA->getBlockAccesses(BB))
2279       for (const auto &Access : *Accesses)
2280         if (const auto *MUD = dyn_cast<MemoryUseOrDef>(&Access))
2281           Fn(MUD->getMemoryInst());
2282 }
2283 
2284 // The bool indicates whether there might be reads outside the set, in which
2285 // case only loads may be promoted.
2286 static SmallVector<PointersAndHasReadsOutsideSet, 0>
2287 collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
2288   BatchAAResults BatchAA(*AA);
2289   AliasSetTracker AST(BatchAA);
2290 
2291   auto IsPotentiallyPromotable = [L](const Instruction *I) {
2292     if (const auto *SI = dyn_cast<StoreInst>(I))
2293       return L->isLoopInvariant(SI->getPointerOperand());
2294     if (const auto *LI = dyn_cast<LoadInst>(I))
2295       return L->isLoopInvariant(LI->getPointerOperand());
2296     return false;
2297   };
2298 
2299   // Populate AST with potentially promotable accesses.
2300   SmallPtrSet<Value *, 16> AttemptingPromotion;
2301   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2302     if (IsPotentiallyPromotable(I)) {
2303       AttemptingPromotion.insert(I);
2304       AST.add(I);
2305     }
2306   });
2307 
2308   // We're only interested in must-alias sets that contain a mod.
2309   SmallVector<PointerIntPair<const AliasSet *, 1, bool>, 8> Sets;
2310   for (AliasSet &AS : AST)
2311     if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias())
2312       Sets.push_back({&AS, false});
2313 
2314   if (Sets.empty())
2315     return {}; // Nothing to promote...
2316 
2317   // Discard any sets for which there is an aliasing non-promotable access.
2318   foreachMemoryAccess(MSSA, L, [&](Instruction *I) {
2319     if (AttemptingPromotion.contains(I))
2320       return;
2321 
2322     llvm::erase_if(Sets, [&](PointerIntPair<const AliasSet *, 1, bool> &Pair) {
2323       ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(I, BatchAA);
2324       // Cannot promote if there are writes outside the set.
2325       if (isModSet(MR))
2326         return true;
2327       if (isRefSet(MR)) {
2328         // Remember reads outside the set.
2329         Pair.setInt(true);
2330         // If this is a mod-only set and there are reads outside the set,
2331         // we will not be able to promote, so bail out early.
2332         return !Pair.getPointer()->isRef();
2333       }
2334       return false;
2335     });
2336   });
2337 
2338   SmallVector<std::pair<SmallSetVector<Value *, 8>, bool>, 0> Result;
2339   for (auto [Set, HasReadsOutsideSet] : Sets) {
2340     SmallSetVector<Value *, 8> PointerMustAliases;
2341     for (const auto &ASI : *Set)
2342       PointerMustAliases.insert(ASI.getValue());
2343     Result.emplace_back(std::move(PointerMustAliases), HasReadsOutsideSet);
2344   }
2345 
2346   return Result;
2347 }
2348 
2349 static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
2350                                      Loop *CurLoop, Instruction &I,
2351                                      SinkAndHoistLICMFlags &Flags,
2352                                      bool InvariantGroup) {
2353   // For hoisting, use the walker to determine safety
2354   if (!Flags.getIsSink()) {
2355     // If hoisting an invariant group, we only need to check that there
2356     // is no store to the loaded pointer between the start of the loop,
2357     // and the load (since all values must be the same).
2358 
2359     // This can be checked in two conditions:
2360     // 1) if the memoryaccess is outside the loop
2361     // 2) the earliest access is at the loop header,
2362     // if the memory loaded is the phi node
2363 
2364     BatchAAResults BAA(MSSA->getAA());
2365     MemoryAccess *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, MU);
2366     return !MSSA->isLiveOnEntryDef(Source) &&
2367            CurLoop->contains(Source->getBlock()) &&
2368            !(InvariantGroup && Source->getBlock() == CurLoop->getHeader() && isa<MemoryPhi>(Source));
2369   }
2370 
2371   // For sinking, we'd need to check all Defs below this use. The getClobbering
2372   // call will look on the backedge of the loop, but will check aliasing with
2373   // the instructions on the previous iteration.
2374   // For example:
2375   // for (i ... )
2376   //   load a[i] ( Use (LoE)
2377   //   store a[i] ( 1 = Def (2), with 2 = Phi for the loop.
2378   //   i++;
2379   // The load sees no clobbering inside the loop, as the backedge alias check
2380   // does phi translation, and will check aliasing against store a[i-1].
2381   // However sinking the load outside the loop, below the store is incorrect.
2382 
2383   // For now, only sink if there are no Defs in the loop, and the existing ones
2384   // precede the use and are in the same block.
2385   // FIXME: Increase precision: Safe to sink if Use post dominates the Def;
2386   // needs PostDominatorTreeAnalysis.
2387   // FIXME: More precise: no Defs that alias this Use.
2388   if (Flags.tooManyMemoryAccesses())
2389     return true;
2390   for (auto *BB : CurLoop->getBlocks())
2391     if (pointerInvalidatedByBlock(*BB, *MSSA, *MU))
2392       return true;
2393   // When sinking, the source block may not be part of the loop so check it.
2394   if (!CurLoop->contains(&I))
2395     return pointerInvalidatedByBlock(*I.getParent(), *MSSA, *MU);
2396 
2397   return false;
2398 }
2399 
2400 bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
2401   if (const auto *Accesses = MSSA.getBlockDefs(&BB))
2402     for (const auto &MA : *Accesses)
2403       if (const auto *MD = dyn_cast<MemoryDef>(&MA))
2404         if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU))
2405           return true;
2406   return false;
2407 }
2408 
2409 /// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A <
2410 /// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their
2411 /// minimun can be computed outside of loop, and X is not a loop-invariant.
2412 static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2413                         MemorySSAUpdater &MSSAU) {
2414   bool Inverse = false;
2415   using namespace PatternMatch;
2416   Value *Cond1, *Cond2;
2417   if (match(&I, m_LogicalOr(m_Value(Cond1), m_Value(Cond2)))) {
2418     Inverse = true;
2419   } else if (match(&I, m_LogicalAnd(m_Value(Cond1), m_Value(Cond2)))) {
2420     // Do nothing
2421   } else
2422     return false;
2423 
2424   auto MatchICmpAgainstInvariant = [&](Value *C, ICmpInst::Predicate &P,
2425                                        Value *&LHS, Value *&RHS) {
2426     if (!match(C, m_OneUse(m_ICmp(P, m_Value(LHS), m_Value(RHS)))))
2427       return false;
2428     if (!LHS->getType()->isIntegerTy())
2429       return false;
2430     if (!ICmpInst::isRelational(P))
2431       return false;
2432     if (L.isLoopInvariant(LHS)) {
2433       std::swap(LHS, RHS);
2434       P = ICmpInst::getSwappedPredicate(P);
2435     }
2436     if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
2437       return false;
2438     if (Inverse)
2439       P = ICmpInst::getInversePredicate(P);
2440     return true;
2441   };
2442   ICmpInst::Predicate P1, P2;
2443   Value *LHS1, *LHS2, *RHS1, *RHS2;
2444   if (!MatchICmpAgainstInvariant(Cond1, P1, LHS1, RHS1) ||
2445       !MatchICmpAgainstInvariant(Cond2, P2, LHS2, RHS2))
2446     return false;
2447   if (P1 != P2 || LHS1 != LHS2)
2448     return false;
2449 
2450   // Everything is fine, we can do the transform.
2451   bool UseMin = ICmpInst::isLT(P1) || ICmpInst::isLE(P1);
2452   assert(
2453       (UseMin || ICmpInst::isGT(P1) || ICmpInst::isGE(P1)) &&
2454       "Relational predicate is either less (or equal) or greater (or equal)!");
2455   Intrinsic::ID id = ICmpInst::isSigned(P1)
2456                          ? (UseMin ? Intrinsic::smin : Intrinsic::smax)
2457                          : (UseMin ? Intrinsic::umin : Intrinsic::umax);
2458   auto *Preheader = L.getLoopPreheader();
2459   assert(Preheader && "Loop is not in simplify form?");
2460   IRBuilder<> Builder(Preheader->getTerminator());
2461   // We are about to create a new guaranteed use for RHS2 which might not exist
2462   // before (if it was a non-taken input of logical and/or instruction). If it
2463   // was poison, we need to freeze it. Note that no new use for LHS and RHS1 are
2464   // introduced, so they don't need this.
2465   if (isa<SelectInst>(I))
2466     RHS2 = Builder.CreateFreeze(RHS2, RHS2->getName() + ".fr");
2467   Value *NewRHS = Builder.CreateBinaryIntrinsic(
2468       id, RHS1, RHS2, nullptr, StringRef("invariant.") +
2469                                    (ICmpInst::isSigned(P1) ? "s" : "u") +
2470                                    (UseMin ? "min" : "max"));
2471   Builder.SetInsertPoint(&I);
2472   ICmpInst::Predicate P = P1;
2473   if (Inverse)
2474     P = ICmpInst::getInversePredicate(P);
2475   Value *NewCond = Builder.CreateICmp(P, LHS1, NewRHS);
2476   NewCond->takeName(&I);
2477   I.replaceAllUsesWith(NewCond);
2478   eraseInstruction(I, SafetyInfo, MSSAU);
2479   eraseInstruction(*cast<Instruction>(Cond1), SafetyInfo, MSSAU);
2480   eraseInstruction(*cast<Instruction>(Cond2), SafetyInfo, MSSAU);
2481   return true;
2482 }
2483 
2484 /// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if
2485 /// this allows hoisting the inner GEP.
2486 static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2487                      MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2488                      DominatorTree *DT) {
2489   auto *GEP = dyn_cast<GetElementPtrInst>(&I);
2490   if (!GEP)
2491     return false;
2492 
2493   auto *Src = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
2494   if (!Src || !Src->hasOneUse() || !L.contains(Src))
2495     return false;
2496 
2497   Value *SrcPtr = Src->getPointerOperand();
2498   auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); };
2499   if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant))
2500     return false;
2501 
2502   // This can only happen if !AllowSpeculation, otherwise this would already be
2503   // handled.
2504   // FIXME: Should we respect AllowSpeculation in these reassociation folds?
2505   // The flag exists to prevent metadata dropping, which is not relevant here.
2506   if (all_of(Src->indices(), LoopInvariant))
2507     return false;
2508 
2509   // The swapped GEPs are inbounds if both original GEPs are inbounds
2510   // and the sign of the offsets is the same. For simplicity, only
2511   // handle both offsets being non-negative.
2512   const DataLayout &DL = GEP->getModule()->getDataLayout();
2513   auto NonNegative = [&](Value *V) {
2514     return isKnownNonNegative(V, DL, 0, AC, GEP, DT);
2515   };
2516   bool IsInBounds = Src->isInBounds() && GEP->isInBounds() &&
2517                     all_of(Src->indices(), NonNegative) &&
2518                     all_of(GEP->indices(), NonNegative);
2519 
2520   BasicBlock *Preheader = L.getLoopPreheader();
2521   IRBuilder<> Builder(Preheader->getTerminator());
2522   Value *NewSrc = Builder.CreateGEP(GEP->getSourceElementType(), SrcPtr,
2523                                     SmallVector<Value *>(GEP->indices()),
2524                                     "invariant.gep", IsInBounds);
2525   Builder.SetInsertPoint(GEP);
2526   Value *NewGEP = Builder.CreateGEP(Src->getSourceElementType(), NewSrc,
2527                                     SmallVector<Value *>(Src->indices()), "gep",
2528                                     IsInBounds);
2529   GEP->replaceAllUsesWith(NewGEP);
2530   eraseInstruction(*GEP, SafetyInfo, MSSAU);
2531   eraseInstruction(*Src, SafetyInfo, MSSAU);
2532   return true;
2533 }
2534 
2535 /// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here
2536 /// C1 and C2 are loop invariants and LV is a loop-variant.
2537 static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
2538                      Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2539                      ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2540                      AssumptionCache *AC, DominatorTree *DT) {
2541   assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
2542   assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2543   assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2544 
2545   // Try to represent VariantLHS as sum of invariant and variant operands.
2546   using namespace PatternMatch;
2547   Value *VariantOp, *InvariantOp;
2548   if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
2549     return false;
2550 
2551   // LHS itself is a loop-variant, try to represent it in the form:
2552   // "VariantOp + InvariantOp". If it is possible, then we can reassociate.
2553   if (L.isLoopInvariant(VariantOp))
2554     std::swap(VariantOp, InvariantOp);
2555   if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2556     return false;
2557 
2558   // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to
2559   // freely move values from left side of inequality to right side (just as in
2560   // normal linear arithmetics). Overflows make things much more complicated, so
2561   // we want to avoid this.
2562   auto &DL = L.getHeader()->getModule()->getDataLayout();
2563   bool ProvedNoOverflowAfterReassociate =
2564       computeOverflowForSignedSub(InvariantRHS, InvariantOp, DL, AC, &ICmp,
2565                                   DT) == llvm::OverflowResult::NeverOverflows;
2566   if (!ProvedNoOverflowAfterReassociate)
2567     return false;
2568   auto *Preheader = L.getLoopPreheader();
2569   assert(Preheader && "Loop is not in simplify form?");
2570   IRBuilder<> Builder(Preheader->getTerminator());
2571   Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op",
2572                                       /*HasNUW*/ false, /*HasNSW*/ true);
2573   ICmp.setPredicate(Pred);
2574   ICmp.setOperand(0, VariantOp);
2575   ICmp.setOperand(1, NewCmpOp);
2576   eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
2577   return true;
2578 }
2579 
2580 /// Try to reassociate and hoist the following two patterns:
2581 /// LV - C1 < C2 --> LV < C1 + C2,
2582 /// C1 - LV < C2 --> LV > C1 - C2.
2583 static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
2584                      Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2585                      ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2586                      AssumptionCache *AC, DominatorTree *DT) {
2587   assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
2588   assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2589   assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2590 
2591   // Try to represent VariantLHS as sum of invariant and variant operands.
2592   using namespace PatternMatch;
2593   Value *VariantOp, *InvariantOp;
2594   if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp))))
2595     return false;
2596 
2597   bool VariantSubtracted = false;
2598   // LHS itself is a loop-variant, try to represent it in the form:
2599   // "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
2600   // the variant operand goes with minus, we use a slightly different scheme.
2601   if (L.isLoopInvariant(VariantOp)) {
2602     std::swap(VariantOp, InvariantOp);
2603     VariantSubtracted = true;
2604     Pred = ICmpInst::getSwappedPredicate(Pred);
2605   }
2606   if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2607     return false;
2608 
2609   // In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
2610   // freely move values from left side of inequality to right side (just as in
2611   // normal linear arithmetics). Overflows make things much more complicated, so
2612   // we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
2613   // "C1 - C2" does not overflow.
2614   auto &DL = L.getHeader()->getModule()->getDataLayout();
2615   if (VariantSubtracted) {
2616     // C1 - LV < C2 --> LV > C1 - C2
2617     if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, DL, AC, &ICmp,
2618                                     DT) != llvm::OverflowResult::NeverOverflows)
2619       return false;
2620   } else {
2621     // LV - C1 < C2 --> LV < C1 + C2
2622     if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, DL, AC, &ICmp,
2623                                     DT) != llvm::OverflowResult::NeverOverflows)
2624       return false;
2625   }
2626   auto *Preheader = L.getLoopPreheader();
2627   assert(Preheader && "Loop is not in simplify form?");
2628   IRBuilder<> Builder(Preheader->getTerminator());
2629   Value *NewCmpOp =
2630       VariantSubtracted
2631           ? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op",
2632                               /*HasNUW*/ false, /*HasNSW*/ true)
2633           : Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op",
2634                               /*HasNUW*/ false, /*HasNSW*/ true);
2635   ICmp.setPredicate(Pred);
2636   ICmp.setOperand(0, VariantOp);
2637   ICmp.setOperand(1, NewCmpOp);
2638   eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
2639   return true;
2640 }
2641 
2642 /// Reassociate and hoist add/sub expressions.
2643 static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2644                         MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2645                         DominatorTree *DT) {
2646   using namespace PatternMatch;
2647   ICmpInst::Predicate Pred;
2648   Value *LHS, *RHS;
2649   if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
2650     return false;
2651 
2652   // TODO: Support unsigned predicates?
2653   if (!ICmpInst::isSigned(Pred))
2654     return false;
2655 
2656   // Put variant operand to LHS position.
2657   if (L.isLoopInvariant(LHS)) {
2658     std::swap(LHS, RHS);
2659     Pred = ICmpInst::getSwappedPredicate(Pred);
2660   }
2661   // We want to delete the initial operation after reassociation, so only do it
2662   // if it has no other uses.
2663   if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse())
2664     return false;
2665 
2666   // TODO: We could go with smarter context, taking common dominator of all I's
2667   // users instead of I itself.
2668   if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
2669     return true;
2670 
2671   if (hoistSub(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
2672     return true;
2673 
2674   return false;
2675 }
2676 
2677 static bool hoistArithmetics(Instruction &I, Loop &L,
2678                              ICFLoopSafetyInfo &SafetyInfo,
2679                              MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2680                              DominatorTree *DT) {
2681   // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
2682   // into (x < min(INV1, INV2)), and hoisting the invariant part of this
2683   // expression out of the loop.
2684   if (hoistMinMax(I, L, SafetyInfo, MSSAU)) {
2685     ++NumHoisted;
2686     ++NumMinMaxHoisted;
2687     return true;
2688   }
2689 
2690   // Try to hoist GEPs by reassociation.
2691   if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) {
2692     ++NumHoisted;
2693     ++NumGEPsHoisted;
2694     return true;
2695   }
2696 
2697   // Try to hoist add/sub's by reassociation.
2698   if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) {
2699     ++NumHoisted;
2700     ++NumAddSubHoisted;
2701     return true;
2702   }
2703 
2704   return false;
2705 }
2706 
2707 /// Little predicate that returns true if the specified basic block is in
2708 /// a subloop of the current one, not the current one itself.
2709 ///
2710 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) {
2711   assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
2712   return LI->getLoopFor(BB) != CurLoop;
2713 }
2714