xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 258a0d760aa8b42899a000e30f610f900a402556)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/iterator_range.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/MemorySSAUpdater.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/Analysis/TargetLibraryInfo.h"
42 #include "llvm/Analysis/TargetTransformInfo.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/ConstantRange.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/IR/PatternMatch.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/User.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/IR/ValueHandle.h"
67 #include "llvm/InitializePasses.h"
68 #include "llvm/Pass.h"
69 #include "llvm/Support/Casting.h"
70 #include "llvm/Support/CommandLine.h"
71 #include "llvm/Support/Compiler.h"
72 #include "llvm/Support/Debug.h"
73 #include "llvm/Support/MathExtras.h"
74 #include "llvm/Support/raw_ostream.h"
75 #include "llvm/Transforms/Scalar.h"
76 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
77 #include "llvm/Transforms/Utils/Local.h"
78 #include "llvm/Transforms/Utils/LoopUtils.h"
79 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
80 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
81 #include <cassert>
82 #include <cstdint>
83 #include <utility>
84 
85 using namespace llvm;
86 using namespace PatternMatch;
87 
88 #define DEBUG_TYPE "indvars"
89 
90 STATISTIC(NumWidened     , "Number of indvars widened");
91 STATISTIC(NumReplaced    , "Number of exit values replaced");
92 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
93 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
94 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
95 
96 // Trip count verification can be enabled by default under NDEBUG if we
97 // implement a strong expression equivalence checker in SCEV. Until then, we
98 // use the verify-indvars flag, which may assert in some cases.
99 static cl::opt<bool> VerifyIndvars(
100     "verify-indvars", cl::Hidden,
101     cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
102              "effect in release builds. (Note: this adds additional SCEV "
103              "queries potentially changing the analysis result)"));
104 
105 static cl::opt<ReplaceExitVal> ReplaceExitValue(
106     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
107     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
108     cl::values(
109         clEnumValN(NeverRepl, "never", "never replace exit value"),
110         clEnumValN(OnlyCheapRepl, "cheap",
111                    "only replace exit value when the cost is cheap"),
112         clEnumValN(
113             UnusedIndVarInLoop, "unusedindvarinloop",
114             "only replace exit value when it is an unused "
115             "induction variable in the loop and has cheap replacement cost"),
116         clEnumValN(NoHardUse, "noharduse",
117                    "only replace exit values when loop def likely dead"),
118         clEnumValN(AlwaysRepl, "always",
119                    "always replace exit value whenever possible")));
120 
121 static cl::opt<bool> UsePostIncrementRanges(
122   "indvars-post-increment-ranges", cl::Hidden,
123   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124   cl::init(true));
125 
126 static cl::opt<bool>
127 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128             cl::desc("Disable Linear Function Test Replace optimization"));
129 
130 static cl::opt<bool>
131 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                 cl::desc("Predicate conditions in read only loops"));
133 
134 static cl::opt<bool>
135 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136                 cl::desc("Allow widening of indvars to eliminate s/zext"));
137 
138 namespace {
139 
140 class IndVarSimplify {
141   LoopInfo *LI;
142   ScalarEvolution *SE;
143   DominatorTree *DT;
144   const DataLayout &DL;
145   TargetLibraryInfo *TLI;
146   const TargetTransformInfo *TTI;
147   std::unique_ptr<MemorySSAUpdater> MSSAU;
148 
149   SmallVector<WeakTrackingVH, 16> DeadInsts;
150   bool WidenIndVars;
151 
152   bool handleFloatingPointIV(Loop *L, PHINode *PH);
153   bool rewriteNonIntegerIVs(Loop *L);
154 
155   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
156   /// Try to improve our exit conditions by converting condition from signed
157   /// to unsigned or rotating computation out of the loop.
158   /// (See inline comment about why this is duplicated from simplifyAndExtend)
159   bool canonicalizeExitCondition(Loop *L);
160   /// Try to eliminate loop exits based on analyzeable exit counts
161   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
162   /// Try to form loop invariant tests for loop exits by changing how many
163   /// iterations of the loop run when that is unobservable.
164   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
165 
166   bool rewriteFirstIterationLoopExitValues(Loop *L);
167 
168   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
169                                  const SCEV *ExitCount,
170                                  PHINode *IndVar, SCEVExpander &Rewriter);
171 
172   bool sinkUnusedInvariants(Loop *L);
173 
174 public:
175   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
176                  const DataLayout &DL, TargetLibraryInfo *TLI,
177                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
178       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
179         WidenIndVars(WidenIndVars) {
180     if (MSSA)
181       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
182   }
183 
184   bool run(Loop *L);
185 };
186 
187 } // end anonymous namespace
188 
189 //===----------------------------------------------------------------------===//
190 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
191 //===----------------------------------------------------------------------===//
192 
193 /// Convert APF to an integer, if possible.
194 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
195   bool isExact = false;
196   // See if we can convert this to an int64_t
197   uint64_t UIntVal;
198   if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
199                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
200       !isExact)
201     return false;
202   IntVal = UIntVal;
203   return true;
204 }
205 
206 /// If the loop has floating induction variable then insert corresponding
207 /// integer induction variable if possible.
208 /// For example,
209 /// for(double i = 0; i < 10000; ++i)
210 ///   bar(i)
211 /// is converted into
212 /// for(int i = 0; i < 10000; ++i)
213 ///   bar((double)i);
214 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
215   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
216   unsigned BackEdge     = IncomingEdge^1;
217 
218   // Check incoming value.
219   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
220 
221   int64_t InitValue;
222   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
223     return false;
224 
225   // Check IV increment. Reject this PN if increment operation is not
226   // an add or increment value can not be represented by an integer.
227   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
228   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
229 
230   // If this is not an add of the PHI with a constantfp, or if the constant fp
231   // is not an integer, bail out.
232   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
233   int64_t IncValue;
234   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
235       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
236     return false;
237 
238   // Check Incr uses. One user is PN and the other user is an exit condition
239   // used by the conditional terminator.
240   Value::user_iterator IncrUse = Incr->user_begin();
241   Instruction *U1 = cast<Instruction>(*IncrUse++);
242   if (IncrUse == Incr->user_end()) return false;
243   Instruction *U2 = cast<Instruction>(*IncrUse++);
244   if (IncrUse != Incr->user_end()) return false;
245 
246   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
247   // only used by a branch, we can't transform it.
248   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
249   if (!Compare)
250     Compare = dyn_cast<FCmpInst>(U2);
251   if (!Compare || !Compare->hasOneUse() ||
252       !isa<BranchInst>(Compare->user_back()))
253     return false;
254 
255   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
256 
257   // We need to verify that the branch actually controls the iteration count
258   // of the loop.  If not, the new IV can overflow and no one will notice.
259   // The branch block must be in the loop and one of the successors must be out
260   // of the loop.
261   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
262   if (!L->contains(TheBr->getParent()) ||
263       (L->contains(TheBr->getSuccessor(0)) &&
264        L->contains(TheBr->getSuccessor(1))))
265     return false;
266 
267   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
268   // transform it.
269   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
270   int64_t ExitValue;
271   if (ExitValueVal == nullptr ||
272       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
273     return false;
274 
275   // Find new predicate for integer comparison.
276   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
277   switch (Compare->getPredicate()) {
278   default: return false;  // Unknown comparison.
279   case CmpInst::FCMP_OEQ:
280   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
281   case CmpInst::FCMP_ONE:
282   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
283   case CmpInst::FCMP_OGT:
284   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
285   case CmpInst::FCMP_OGE:
286   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
287   case CmpInst::FCMP_OLT:
288   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
289   case CmpInst::FCMP_OLE:
290   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
291   }
292 
293   // We convert the floating point induction variable to a signed i32 value if
294   // we can.  This is only safe if the comparison will not overflow in a way
295   // that won't be trapped by the integer equivalent operations.  Check for this
296   // now.
297   // TODO: We could use i64 if it is native and the range requires it.
298 
299   // The start/stride/exit values must all fit in signed i32.
300   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
301     return false;
302 
303   // If not actually striding (add x, 0.0), avoid touching the code.
304   if (IncValue == 0)
305     return false;
306 
307   // Positive and negative strides have different safety conditions.
308   if (IncValue > 0) {
309     // If we have a positive stride, we require the init to be less than the
310     // exit value.
311     if (InitValue >= ExitValue)
312       return false;
313 
314     uint32_t Range = uint32_t(ExitValue-InitValue);
315     // Check for infinite loop, either:
316     // while (i <= Exit) or until (i > Exit)
317     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
318       if (++Range == 0) return false;  // Range overflows.
319     }
320 
321     unsigned Leftover = Range % uint32_t(IncValue);
322 
323     // If this is an equality comparison, we require that the strided value
324     // exactly land on the exit value, otherwise the IV condition will wrap
325     // around and do things the fp IV wouldn't.
326     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
327         Leftover != 0)
328       return false;
329 
330     // If the stride would wrap around the i32 before exiting, we can't
331     // transform the IV.
332     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
333       return false;
334   } else {
335     // If we have a negative stride, we require the init to be greater than the
336     // exit value.
337     if (InitValue <= ExitValue)
338       return false;
339 
340     uint32_t Range = uint32_t(InitValue-ExitValue);
341     // Check for infinite loop, either:
342     // while (i >= Exit) or until (i < Exit)
343     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
344       if (++Range == 0) return false;  // Range overflows.
345     }
346 
347     unsigned Leftover = Range % uint32_t(-IncValue);
348 
349     // If this is an equality comparison, we require that the strided value
350     // exactly land on the exit value, otherwise the IV condition will wrap
351     // around and do things the fp IV wouldn't.
352     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
353         Leftover != 0)
354       return false;
355 
356     // If the stride would wrap around the i32 before exiting, we can't
357     // transform the IV.
358     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
359       return false;
360   }
361 
362   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
363 
364   // Insert new integer induction variable.
365   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
366   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
367                       PN->getIncomingBlock(IncomingEdge));
368 
369   Value *NewAdd =
370     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
371                               Incr->getName()+".int", Incr);
372   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
373 
374   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
375                                       ConstantInt::get(Int32Ty, ExitValue),
376                                       Compare->getName());
377 
378   // In the following deletions, PN may become dead and may be deleted.
379   // Use a WeakTrackingVH to observe whether this happens.
380   WeakTrackingVH WeakPH = PN;
381 
382   // Delete the old floating point exit comparison.  The branch starts using the
383   // new comparison.
384   NewCompare->takeName(Compare);
385   Compare->replaceAllUsesWith(NewCompare);
386   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
387 
388   // Delete the old floating point increment.
389   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
390   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
391 
392   // If the FP induction variable still has uses, this is because something else
393   // in the loop uses its value.  In order to canonicalize the induction
394   // variable, we chose to eliminate the IV and rewrite it in terms of an
395   // int->fp cast.
396   //
397   // We give preference to sitofp over uitofp because it is faster on most
398   // platforms.
399   if (WeakPH) {
400     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
401                                  &*PN->getParent()->getFirstInsertionPt());
402     PN->replaceAllUsesWith(Conv);
403     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
404   }
405   return true;
406 }
407 
408 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
409   // First step.  Check to see if there are any floating-point recurrences.
410   // If there are, change them into integer recurrences, permitting analysis by
411   // the SCEV routines.
412   BasicBlock *Header = L->getHeader();
413 
414   SmallVector<WeakTrackingVH, 8> PHIs;
415   for (PHINode &PN : Header->phis())
416     PHIs.push_back(&PN);
417 
418   bool Changed = false;
419   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
420     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
421       Changed |= handleFloatingPointIV(L, PN);
422 
423   // If the loop previously had floating-point IV, ScalarEvolution
424   // may not have been able to compute a trip count. Now that we've done some
425   // re-writing, the trip count may be computable.
426   if (Changed)
427     SE->forgetLoop(L);
428   return Changed;
429 }
430 
431 //===---------------------------------------------------------------------===//
432 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
433 // they will exit at the first iteration.
434 //===---------------------------------------------------------------------===//
435 
436 /// Check to see if this loop has loop invariant conditions which lead to loop
437 /// exits. If so, we know that if the exit path is taken, it is at the first
438 /// loop iteration. This lets us predict exit values of PHI nodes that live in
439 /// loop header.
440 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
441   // Verify the input to the pass is already in LCSSA form.
442   assert(L->isLCSSAForm(*DT));
443 
444   SmallVector<BasicBlock *, 8> ExitBlocks;
445   L->getUniqueExitBlocks(ExitBlocks);
446 
447   bool MadeAnyChanges = false;
448   for (auto *ExitBB : ExitBlocks) {
449     // If there are no more PHI nodes in this exit block, then no more
450     // values defined inside the loop are used on this path.
451     for (PHINode &PN : ExitBB->phis()) {
452       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
453            IncomingValIdx != E; ++IncomingValIdx) {
454         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
455 
456         // Can we prove that the exit must run on the first iteration if it
457         // runs at all?  (i.e. early exits are fine for our purposes, but
458         // traces which lead to this exit being taken on the 2nd iteration
459         // aren't.)  Note that this is about whether the exit branch is
460         // executed, not about whether it is taken.
461         if (!L->getLoopLatch() ||
462             !DT->dominates(IncomingBB, L->getLoopLatch()))
463           continue;
464 
465         // Get condition that leads to the exit path.
466         auto *TermInst = IncomingBB->getTerminator();
467 
468         Value *Cond = nullptr;
469         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
470           // Must be a conditional branch, otherwise the block
471           // should not be in the loop.
472           Cond = BI->getCondition();
473         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
474           Cond = SI->getCondition();
475         else
476           continue;
477 
478         if (!L->isLoopInvariant(Cond))
479           continue;
480 
481         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
482 
483         // Only deal with PHIs in the loop header.
484         if (!ExitVal || ExitVal->getParent() != L->getHeader())
485           continue;
486 
487         // If ExitVal is a PHI on the loop header, then we know its
488         // value along this exit because the exit can only be taken
489         // on the first iteration.
490         auto *LoopPreheader = L->getLoopPreheader();
491         assert(LoopPreheader && "Invalid loop");
492         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
493         if (PreheaderIdx != -1) {
494           assert(ExitVal->getParent() == L->getHeader() &&
495                  "ExitVal must be in loop header");
496           MadeAnyChanges = true;
497           PN.setIncomingValue(IncomingValIdx,
498                               ExitVal->getIncomingValue(PreheaderIdx));
499           SE->forgetValue(&PN);
500         }
501       }
502     }
503   }
504   return MadeAnyChanges;
505 }
506 
507 //===----------------------------------------------------------------------===//
508 //  IV Widening - Extend the width of an IV to cover its widest uses.
509 //===----------------------------------------------------------------------===//
510 
511 /// Update information about the induction variable that is extended by this
512 /// sign or zero extend operation. This is used to determine the final width of
513 /// the IV before actually widening it.
514 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
515                         ScalarEvolution *SE,
516                         const TargetTransformInfo *TTI) {
517   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
518   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
519     return;
520 
521   Type *Ty = Cast->getType();
522   uint64_t Width = SE->getTypeSizeInBits(Ty);
523   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
524     return;
525 
526   // Check that `Cast` actually extends the induction variable (we rely on this
527   // later).  This takes care of cases where `Cast` is extending a truncation of
528   // the narrow induction variable, and thus can end up being narrower than the
529   // "narrow" induction variable.
530   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
531   if (NarrowIVWidth >= Width)
532     return;
533 
534   // Cast is either an sext or zext up to this point.
535   // We should not widen an indvar if arithmetics on the wider indvar are more
536   // expensive than those on the narrower indvar. We check only the cost of ADD
537   // because at least an ADD is required to increment the induction variable. We
538   // could compute more comprehensively the cost of all instructions on the
539   // induction variable when necessary.
540   if (TTI &&
541       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
542           TTI->getArithmeticInstrCost(Instruction::Add,
543                                       Cast->getOperand(0)->getType())) {
544     return;
545   }
546 
547   if (!WI.WidestNativeType ||
548       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
549     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
550     WI.IsSigned = IsSigned;
551     return;
552   }
553 
554   // We extend the IV to satisfy the sign of its user(s), or 'signed'
555   // if there are multiple users with both sign- and zero extensions,
556   // in order not to introduce nondeterministic behaviour based on the
557   // unspecified order of a PHI nodes' users-iterator.
558   WI.IsSigned |= IsSigned;
559 }
560 
561 //===----------------------------------------------------------------------===//
562 //  Live IV Reduction - Minimize IVs live across the loop.
563 //===----------------------------------------------------------------------===//
564 
565 //===----------------------------------------------------------------------===//
566 //  Simplification of IV users based on SCEV evaluation.
567 //===----------------------------------------------------------------------===//
568 
569 namespace {
570 
571 class IndVarSimplifyVisitor : public IVVisitor {
572   ScalarEvolution *SE;
573   const TargetTransformInfo *TTI;
574   PHINode *IVPhi;
575 
576 public:
577   WideIVInfo WI;
578 
579   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
580                         const TargetTransformInfo *TTI,
581                         const DominatorTree *DTree)
582     : SE(SCEV), TTI(TTI), IVPhi(IV) {
583     DT = DTree;
584     WI.NarrowIV = IVPhi;
585   }
586 
587   // Implement the interface used by simplifyUsersOfIV.
588   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
589 };
590 
591 } // end anonymous namespace
592 
593 /// Iteratively perform simplification on a worklist of IV users. Each
594 /// successive simplification may push more users which may themselves be
595 /// candidates for simplification.
596 ///
597 /// Sign/Zero extend elimination is interleaved with IV simplification.
598 bool IndVarSimplify::simplifyAndExtend(Loop *L,
599                                        SCEVExpander &Rewriter,
600                                        LoopInfo *LI) {
601   SmallVector<WideIVInfo, 8> WideIVs;
602 
603   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
604           Intrinsic::getName(Intrinsic::experimental_guard));
605   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
606 
607   SmallVector<PHINode *, 8> LoopPhis;
608   for (PHINode &PN : L->getHeader()->phis())
609     LoopPhis.push_back(&PN);
610 
611   // Each round of simplification iterates through the SimplifyIVUsers worklist
612   // for all current phis, then determines whether any IVs can be
613   // widened. Widening adds new phis to LoopPhis, inducing another round of
614   // simplification on the wide IVs.
615   bool Changed = false;
616   while (!LoopPhis.empty()) {
617     // Evaluate as many IV expressions as possible before widening any IVs. This
618     // forces SCEV to set no-wrap flags before evaluating sign/zero
619     // extension. The first time SCEV attempts to normalize sign/zero extension,
620     // the result becomes final. So for the most predictable results, we delay
621     // evaluation of sign/zero extend evaluation until needed, and avoid running
622     // other SCEV based analysis prior to simplifyAndExtend.
623     do {
624       PHINode *CurrIV = LoopPhis.pop_back_val();
625 
626       // Information about sign/zero extensions of CurrIV.
627       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
628 
629       Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
630                                    &Visitor);
631 
632       if (Visitor.WI.WidestNativeType) {
633         WideIVs.push_back(Visitor.WI);
634       }
635     } while(!LoopPhis.empty());
636 
637     // Continue if we disallowed widening.
638     if (!WidenIndVars)
639       continue;
640 
641     for (; !WideIVs.empty(); WideIVs.pop_back()) {
642       unsigned ElimExt;
643       unsigned Widened;
644       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
645                                           DT, DeadInsts, ElimExt, Widened,
646                                           HasGuards, UsePostIncrementRanges)) {
647         NumElimExt += ElimExt;
648         NumWidened += Widened;
649         Changed = true;
650         LoopPhis.push_back(WidePhi);
651       }
652     }
653   }
654   return Changed;
655 }
656 
657 //===----------------------------------------------------------------------===//
658 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
659 //===----------------------------------------------------------------------===//
660 
661 /// Given an Value which is hoped to be part of an add recurance in the given
662 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
663 /// that this is less general than SCEVs AddRec checking.
664 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
665   Instruction *IncI = dyn_cast<Instruction>(IncV);
666   if (!IncI)
667     return nullptr;
668 
669   switch (IncI->getOpcode()) {
670   case Instruction::Add:
671   case Instruction::Sub:
672     break;
673   case Instruction::GetElementPtr:
674     // An IV counter must preserve its type.
675     if (IncI->getNumOperands() == 2)
676       break;
677     [[fallthrough]];
678   default:
679     return nullptr;
680   }
681 
682   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
683   if (Phi && Phi->getParent() == L->getHeader()) {
684     if (L->isLoopInvariant(IncI->getOperand(1)))
685       return Phi;
686     return nullptr;
687   }
688   if (IncI->getOpcode() == Instruction::GetElementPtr)
689     return nullptr;
690 
691   // Allow add/sub to be commuted.
692   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
693   if (Phi && Phi->getParent() == L->getHeader()) {
694     if (L->isLoopInvariant(IncI->getOperand(0)))
695       return Phi;
696   }
697   return nullptr;
698 }
699 
700 /// Whether the current loop exit test is based on this value.  Currently this
701 /// is limited to a direct use in the loop condition.
702 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
703   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
704   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
705   // TODO: Allow non-icmp loop test.
706   if (!ICmp)
707     return false;
708 
709   // TODO: Allow indirect use.
710   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
711 }
712 
713 /// linearFunctionTestReplace policy. Return true unless we can show that the
714 /// current exit test is already sufficiently canonical.
715 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
716   assert(L->getLoopLatch() && "Must be in simplified form");
717 
718   // Avoid converting a constant or loop invariant test back to a runtime
719   // test.  This is critical for when SCEV's cached ExitCount is less precise
720   // than the current IR (such as after we've proven a particular exit is
721   // actually dead and thus the BE count never reaches our ExitCount.)
722   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
723   if (L->isLoopInvariant(BI->getCondition()))
724     return false;
725 
726   // Do LFTR to simplify the exit condition to an ICMP.
727   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
728   if (!Cond)
729     return true;
730 
731   // Do LFTR to simplify the exit ICMP to EQ/NE
732   ICmpInst::Predicate Pred = Cond->getPredicate();
733   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
734     return true;
735 
736   // Look for a loop invariant RHS
737   Value *LHS = Cond->getOperand(0);
738   Value *RHS = Cond->getOperand(1);
739   if (!L->isLoopInvariant(RHS)) {
740     if (!L->isLoopInvariant(LHS))
741       return true;
742     std::swap(LHS, RHS);
743   }
744   // Look for a simple IV counter LHS
745   PHINode *Phi = dyn_cast<PHINode>(LHS);
746   if (!Phi)
747     Phi = getLoopPhiForCounter(LHS, L);
748 
749   if (!Phi)
750     return true;
751 
752   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
753   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
754   if (Idx < 0)
755     return true;
756 
757   // Do LFTR if the exit condition's IV is *not* a simple counter.
758   Value *IncV = Phi->getIncomingValue(Idx);
759   return Phi != getLoopPhiForCounter(IncV, L);
760 }
761 
762 /// Return true if undefined behavior would provable be executed on the path to
763 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
764 /// anything about whether OnPathTo is actually executed or whether Root is
765 /// actually poison.  This can be used to assess whether a new use of Root can
766 /// be added at a location which is control equivalent with OnPathTo (such as
767 /// immediately before it) without introducing UB which didn't previously
768 /// exist.  Note that a false result conveys no information.
769 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
770                                           Instruction *OnPathTo,
771                                           DominatorTree *DT) {
772   // Basic approach is to assume Root is poison, propagate poison forward
773   // through all users we can easily track, and then check whether any of those
774   // users are provable UB and must execute before out exiting block might
775   // exit.
776 
777   // The set of all recursive users we've visited (which are assumed to all be
778   // poison because of said visit)
779   SmallSet<const Value *, 16> KnownPoison;
780   SmallVector<const Instruction*, 16> Worklist;
781   Worklist.push_back(Root);
782   while (!Worklist.empty()) {
783     const Instruction *I = Worklist.pop_back_val();
784 
785     // If we know this must trigger UB on a path leading our target.
786     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
787       return true;
788 
789     // If we can't analyze propagation through this instruction, just skip it
790     // and transitive users.  Safe as false is a conservative result.
791     if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) {
792           return KnownPoison.contains(U) && propagatesPoison(U);
793         }))
794       continue;
795 
796     if (KnownPoison.insert(I).second)
797       for (const User *User : I->users())
798         Worklist.push_back(cast<Instruction>(User));
799   }
800 
801   // Might be non-UB, or might have a path we couldn't prove must execute on
802   // way to exiting bb.
803   return false;
804 }
805 
806 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
807 /// down to checking that all operands are constant and listing instructions
808 /// that may hide undef.
809 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
810                                unsigned Depth) {
811   if (isa<Constant>(V))
812     return !isa<UndefValue>(V);
813 
814   if (Depth >= 6)
815     return false;
816 
817   // Conservatively handle non-constant non-instructions. For example, Arguments
818   // may be undef.
819   Instruction *I = dyn_cast<Instruction>(V);
820   if (!I)
821     return false;
822 
823   // Load and return values may be undef.
824   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
825     return false;
826 
827   // Optimistically handle other instructions.
828   for (Value *Op : I->operands()) {
829     if (!Visited.insert(Op).second)
830       continue;
831     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
832       return false;
833   }
834   return true;
835 }
836 
837 /// Return true if the given value is concrete. We must prove that undef can
838 /// never reach it.
839 ///
840 /// TODO: If we decide that this is a good approach to checking for undef, we
841 /// may factor it into a common location.
842 static bool hasConcreteDef(Value *V) {
843   SmallPtrSet<Value*, 8> Visited;
844   Visited.insert(V);
845   return hasConcreteDefImpl(V, Visited, 0);
846 }
847 
848 /// Return true if this IV has any uses other than the (soon to be rewritten)
849 /// loop exit test.
850 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
851   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
852   Value *IncV = Phi->getIncomingValue(LatchIdx);
853 
854   for (User *U : Phi->users())
855     if (U != Cond && U != IncV) return false;
856 
857   for (User *U : IncV->users())
858     if (U != Cond && U != Phi) return false;
859   return true;
860 }
861 
862 /// Return true if the given phi is a "counter" in L.  A counter is an
863 /// add recurance (of integer or pointer type) with an arbitrary start, and a
864 /// step of 1.  Note that L must have exactly one latch.
865 static bool isLoopCounter(PHINode* Phi, Loop *L,
866                           ScalarEvolution *SE) {
867   assert(Phi->getParent() == L->getHeader());
868   assert(L->getLoopLatch());
869 
870   if (!SE->isSCEVable(Phi->getType()))
871     return false;
872 
873   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
874   if (!AR || AR->getLoop() != L || !AR->isAffine())
875     return false;
876 
877   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
878   if (!Step || !Step->isOne())
879     return false;
880 
881   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
882   Value *IncV = Phi->getIncomingValue(LatchIdx);
883   return (getLoopPhiForCounter(IncV, L) == Phi &&
884           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
885 }
886 
887 /// Search the loop header for a loop counter (anadd rec w/step of one)
888 /// suitable for use by LFTR.  If multiple counters are available, select the
889 /// "best" one based profitable heuristics.
890 ///
891 /// BECount may be an i8* pointer type. The pointer difference is already
892 /// valid count without scaling the address stride, so it remains a pointer
893 /// expression as far as SCEV is concerned.
894 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
895                                 const SCEV *BECount,
896                                 ScalarEvolution *SE, DominatorTree *DT) {
897   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
898 
899   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
900 
901   // Loop over all of the PHI nodes, looking for a simple counter.
902   PHINode *BestPhi = nullptr;
903   const SCEV *BestInit = nullptr;
904   BasicBlock *LatchBlock = L->getLoopLatch();
905   assert(LatchBlock && "Must be in simplified form");
906   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
907 
908   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
909     PHINode *Phi = cast<PHINode>(I);
910     if (!isLoopCounter(Phi, L, SE))
911       continue;
912 
913     // Avoid comparing an integer IV against a pointer Limit.
914     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
915       continue;
916 
917     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
918 
919     // AR may be a pointer type, while BECount is an integer type.
920     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
921     // AR may not be a narrower type, or we may never exit.
922     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
923     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
924       continue;
925 
926     // Avoid reusing a potentially undef value to compute other values that may
927     // have originally had a concrete definition.
928     if (!hasConcreteDef(Phi)) {
929       // We explicitly allow unknown phis as long as they are already used by
930       // the loop exit test.  This is legal since performing LFTR could not
931       // increase the number of undef users.
932       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
933       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
934           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
935         continue;
936     }
937 
938     // Avoid introducing undefined behavior due to poison which didn't exist in
939     // the original program.  (Annoyingly, the rules for poison and undef
940     // propagation are distinct, so this does NOT cover the undef case above.)
941     // We have to ensure that we don't introduce UB by introducing a use on an
942     // iteration where said IV produces poison.  Our strategy here differs for
943     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
944     // see code in linearFunctionTestReplace.  For pointers, we restrict
945     // transforms as there is no good way to reinfer inbounds once lost.
946     if (!Phi->getType()->isIntegerTy() &&
947         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
948       continue;
949 
950     const SCEV *Init = AR->getStart();
951 
952     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
953       // Don't force a live loop counter if another IV can be used.
954       if (AlmostDeadIV(Phi, LatchBlock, Cond))
955         continue;
956 
957       // Prefer to count-from-zero. This is a more "canonical" counter form. It
958       // also prefers integer to pointer IVs.
959       if (BestInit->isZero() != Init->isZero()) {
960         if (BestInit->isZero())
961           continue;
962       }
963       // If two IVs both count from zero or both count from nonzero then the
964       // narrower is likely a dead phi that has been widened. Use the wider phi
965       // to allow the other to be eliminated.
966       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
967         continue;
968     }
969     BestPhi = Phi;
970     BestInit = Init;
971   }
972   return BestPhi;
973 }
974 
975 /// Insert an IR expression which computes the value held by the IV IndVar
976 /// (which must be an loop counter w/unit stride) after the backedge of loop L
977 /// is taken ExitCount times.
978 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
979                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
980                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
981   assert(isLoopCounter(IndVar, L, SE));
982   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
983   const SCEV *IVInit = AR->getStart();
984   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
985 
986   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
987   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
988   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
989   // the existing GEPs whenever possible.
990   if (IndVar->getType()->isPointerTy() &&
991       !ExitCount->getType()->isPointerTy()) {
992     // IVOffset will be the new GEP offset that is interpreted by GEP as a
993     // signed value. ExitCount on the other hand represents the loop trip count,
994     // which is an unsigned value. FindLoopCounter only allows induction
995     // variables that have a positive unit stride of one. This means we don't
996     // have to handle the case of negative offsets (yet) and just need to zero
997     // extend ExitCount.
998     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
999     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
1000     if (UsePostInc)
1001       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1002 
1003     // Expand the code for the iteration count.
1004     assert(SE->isLoopInvariant(IVOffset, L) &&
1005            "Computed iteration count is not loop invariant!");
1006 
1007     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1008     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1009     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1010   } else {
1011     // In any other case, convert both IVInit and ExitCount to integers before
1012     // comparing. This may result in SCEV expansion of pointers, but in practice
1013     // SCEV will fold the pointer arithmetic away as such:
1014     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1015     //
1016     // Valid Cases: (1) both integers is most common; (2) both may be pointers
1017     // for simple memset-style loops.
1018     //
1019     // IVInit integer and ExitCount pointer would only occur if a canonical IV
1020     // were generated on top of case #2, which is not expected.
1021 
1022     // For unit stride, IVCount = Start + ExitCount with 2's complement
1023     // overflow.
1024 
1025     // For integer IVs, truncate the IV before computing IVInit + BECount,
1026     // unless we know apriori that the limit must be a constant when evaluated
1027     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
1028     // of the IV in the loop over a (potentially) expensive expansion of the
1029     // widened exit count add(zext(add)) expression.
1030     if (SE->getTypeSizeInBits(IVInit->getType())
1031         > SE->getTypeSizeInBits(ExitCount->getType())) {
1032       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1033         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1034       else
1035         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1036     }
1037 
1038     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1039 
1040     if (UsePostInc)
1041       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1042 
1043     // Expand the code for the iteration count.
1044     assert(SE->isLoopInvariant(IVLimit, L) &&
1045            "Computed iteration count is not loop invariant!");
1046     // Ensure that we generate the same type as IndVar, or a smaller integer
1047     // type. In the presence of null pointer values, we have an integer type
1048     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1049     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1050       IndVar->getType() : ExitCount->getType();
1051     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1052     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1053   }
1054 }
1055 
1056 /// This method rewrites the exit condition of the loop to be a canonical !=
1057 /// comparison against the incremented loop induction variable.  This pass is
1058 /// able to rewrite the exit tests of any loop where the SCEV analysis can
1059 /// determine a loop-invariant trip count of the loop, which is actually a much
1060 /// broader range than just linear tests.
1061 bool IndVarSimplify::
1062 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1063                           const SCEV *ExitCount,
1064                           PHINode *IndVar, SCEVExpander &Rewriter) {
1065   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1066   assert(isLoopCounter(IndVar, L, SE));
1067   Instruction * const IncVar =
1068     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1069 
1070   // Initialize CmpIndVar to the preincremented IV.
1071   Value *CmpIndVar = IndVar;
1072   bool UsePostInc = false;
1073 
1074   // If the exiting block is the same as the backedge block, we prefer to
1075   // compare against the post-incremented value, otherwise we must compare
1076   // against the preincremented value.
1077   if (ExitingBB == L->getLoopLatch()) {
1078     // For pointer IVs, we chose to not strip inbounds which requires us not
1079     // to add a potentially UB introducing use.  We need to either a) show
1080     // the loop test we're modifying is already in post-inc form, or b) show
1081     // that adding a use must not introduce UB.
1082     bool SafeToPostInc =
1083         IndVar->getType()->isIntegerTy() ||
1084         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1085         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1086     if (SafeToPostInc) {
1087       UsePostInc = true;
1088       CmpIndVar = IncVar;
1089     }
1090   }
1091 
1092   // It may be necessary to drop nowrap flags on the incrementing instruction
1093   // if either LFTR moves from a pre-inc check to a post-inc check (in which
1094   // case the increment might have previously been poison on the last iteration
1095   // only) or if LFTR switches to a different IV that was previously dynamically
1096   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1097   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1098   // check), because the pre-inc addrec flags may be adopted from the original
1099   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1100   // TODO: This handling is inaccurate for one case: If we switch to a
1101   // dynamically dead IV that wraps on the first loop iteration only, which is
1102   // not covered by the post-inc addrec. (If the new IV was not dynamically
1103   // dead, it could not be poison on the first iteration in the first place.)
1104   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1105     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1106     if (BO->hasNoUnsignedWrap())
1107       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1108     if (BO->hasNoSignedWrap())
1109       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1110   }
1111 
1112   Value *ExitCnt = genLoopLimit(
1113       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1114   assert(ExitCnt->getType()->isPointerTy() ==
1115              IndVar->getType()->isPointerTy() &&
1116          "genLoopLimit missed a cast");
1117 
1118   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1119   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1120   ICmpInst::Predicate P;
1121   if (L->contains(BI->getSuccessor(0)))
1122     P = ICmpInst::ICMP_NE;
1123   else
1124     P = ICmpInst::ICMP_EQ;
1125 
1126   IRBuilder<> Builder(BI);
1127 
1128   // The new loop exit condition should reuse the debug location of the
1129   // original loop exit condition.
1130   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1131     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1132 
1133   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1134   // avoid the expensive expansion of the limit expression in the wider type,
1135   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1136   // since we know (from the exit count bitwidth), that we can't self-wrap in
1137   // the narrower type.
1138   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1139   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1140   if (CmpIndVarSize > ExitCntSize) {
1141     assert(!CmpIndVar->getType()->isPointerTy() &&
1142            !ExitCnt->getType()->isPointerTy());
1143 
1144     // Before resorting to actually inserting the truncate, use the same
1145     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1146     // the other side of the comparison instead.  We still evaluate the limit
1147     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1148     // a truncate within in.
1149     bool Extended = false;
1150     const SCEV *IV = SE->getSCEV(CmpIndVar);
1151     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1152                                                   ExitCnt->getType());
1153     const SCEV *ZExtTrunc =
1154       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1155 
1156     if (ZExtTrunc == IV) {
1157       Extended = true;
1158       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1159                                    "wide.trip.count");
1160     } else {
1161       const SCEV *SExtTrunc =
1162         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1163       if (SExtTrunc == IV) {
1164         Extended = true;
1165         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1166                                      "wide.trip.count");
1167       }
1168     }
1169 
1170     if (Extended) {
1171       bool Discard;
1172       L->makeLoopInvariant(ExitCnt, Discard);
1173     } else
1174       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1175                                       "lftr.wideiv");
1176   }
1177   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1178                     << "      LHS:" << *CmpIndVar << '\n'
1179                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1180                     << "\n"
1181                     << "      RHS:\t" << *ExitCnt << "\n"
1182                     << "ExitCount:\t" << *ExitCount << "\n"
1183                     << "  was: " << *BI->getCondition() << "\n");
1184 
1185   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1186   Value *OrigCond = BI->getCondition();
1187   // It's tempting to use replaceAllUsesWith here to fully replace the old
1188   // comparison, but that's not immediately safe, since users of the old
1189   // comparison may not be dominated by the new comparison. Instead, just
1190   // update the branch to use the new comparison; in the common case this
1191   // will make old comparison dead.
1192   BI->setCondition(Cond);
1193   DeadInsts.emplace_back(OrigCond);
1194 
1195   ++NumLFTR;
1196   return true;
1197 }
1198 
1199 //===----------------------------------------------------------------------===//
1200 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1201 //===----------------------------------------------------------------------===//
1202 
1203 /// If there's a single exit block, sink any loop-invariant values that
1204 /// were defined in the preheader but not used inside the loop into the
1205 /// exit block to reduce register pressure in the loop.
1206 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1207   BasicBlock *ExitBlock = L->getExitBlock();
1208   if (!ExitBlock) return false;
1209 
1210   BasicBlock *Preheader = L->getLoopPreheader();
1211   if (!Preheader) return false;
1212 
1213   bool MadeAnyChanges = false;
1214   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1215   BasicBlock::iterator I(Preheader->getTerminator());
1216   while (I != Preheader->begin()) {
1217     --I;
1218     // New instructions were inserted at the end of the preheader.
1219     if (isa<PHINode>(I))
1220       break;
1221 
1222     // Don't move instructions which might have side effects, since the side
1223     // effects need to complete before instructions inside the loop.  Also don't
1224     // move instructions which might read memory, since the loop may modify
1225     // memory. Note that it's okay if the instruction might have undefined
1226     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1227     // block.
1228     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1229       continue;
1230 
1231     // Skip debug info intrinsics.
1232     if (isa<DbgInfoIntrinsic>(I))
1233       continue;
1234 
1235     // Skip eh pad instructions.
1236     if (I->isEHPad())
1237       continue;
1238 
1239     // Don't sink alloca: we never want to sink static alloca's out of the
1240     // entry block, and correctly sinking dynamic alloca's requires
1241     // checks for stacksave/stackrestore intrinsics.
1242     // FIXME: Refactor this check somehow?
1243     if (isa<AllocaInst>(I))
1244       continue;
1245 
1246     // Determine if there is a use in or before the loop (direct or
1247     // otherwise).
1248     bool UsedInLoop = false;
1249     for (Use &U : I->uses()) {
1250       Instruction *User = cast<Instruction>(U.getUser());
1251       BasicBlock *UseBB = User->getParent();
1252       if (PHINode *P = dyn_cast<PHINode>(User)) {
1253         unsigned i =
1254           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1255         UseBB = P->getIncomingBlock(i);
1256       }
1257       if (UseBB == Preheader || L->contains(UseBB)) {
1258         UsedInLoop = true;
1259         break;
1260       }
1261     }
1262 
1263     // If there is, the def must remain in the preheader.
1264     if (UsedInLoop)
1265       continue;
1266 
1267     // Otherwise, sink it to the exit block.
1268     Instruction *ToMove = &*I;
1269     bool Done = false;
1270 
1271     if (I != Preheader->begin()) {
1272       // Skip debug info intrinsics.
1273       do {
1274         --I;
1275       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1276 
1277       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1278         Done = true;
1279     } else {
1280       Done = true;
1281     }
1282 
1283     MadeAnyChanges = true;
1284     ToMove->moveBefore(*ExitBlock, InsertPt);
1285     SE->forgetValue(ToMove);
1286     if (Done) break;
1287     InsertPt = ToMove->getIterator();
1288   }
1289 
1290   return MadeAnyChanges;
1291 }
1292 
1293 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1294                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1295   auto *OldCond = BI->getCondition();
1296   LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1297                     << " with " << *NewCond << "\n");
1298   BI->setCondition(NewCond);
1299   if (OldCond->use_empty())
1300     DeadInsts.emplace_back(OldCond);
1301 }
1302 
1303 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1304                                       bool IsTaken) {
1305   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1306   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1307   auto *OldCond = BI->getCondition();
1308   return ConstantInt::get(OldCond->getType(),
1309                           IsTaken ? ExitIfTrue : !ExitIfTrue);
1310 }
1311 
1312 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1313                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1314   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1315   auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1316   replaceExitCond(BI, NewCond, DeadInsts);
1317 }
1318 
1319 static void replaceLoopPHINodesWithPreheaderValues(
1320     LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1321     ScalarEvolution &SE) {
1322   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1323   auto *LoopPreheader = L->getLoopPreheader();
1324   auto *LoopHeader = L->getHeader();
1325   SmallVector<Instruction *> Worklist;
1326   for (auto &PN : LoopHeader->phis()) {
1327     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1328     for (User *U : PN.users())
1329       Worklist.push_back(cast<Instruction>(U));
1330     SE.forgetValue(&PN);
1331     PN.replaceAllUsesWith(PreheaderIncoming);
1332     DeadInsts.emplace_back(&PN);
1333   }
1334 
1335   // Replacing with the preheader value will often allow IV users to simplify
1336   // (especially if the preheader value is a constant).
1337   SmallPtrSet<Instruction *, 16> Visited;
1338   while (!Worklist.empty()) {
1339     auto *I = cast<Instruction>(Worklist.pop_back_val());
1340     if (!Visited.insert(I).second)
1341       continue;
1342 
1343     // Don't simplify instructions outside the loop.
1344     if (!L->contains(I))
1345       continue;
1346 
1347     Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1348     if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1349       for (User *U : I->users())
1350         Worklist.push_back(cast<Instruction>(U));
1351       I->replaceAllUsesWith(Res);
1352       DeadInsts.emplace_back(I);
1353     }
1354   }
1355 }
1356 
1357 static Value *
1358 createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1359                     const ScalarEvolution::LoopInvariantPredicate &LIP,
1360                     SCEVExpander &Rewriter) {
1361   ICmpInst::Predicate InvariantPred = LIP.Pred;
1362   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1363   Rewriter.setInsertPoint(BI);
1364   auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1365   auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1366   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1367   if (ExitIfTrue)
1368     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1369   IRBuilder<> Builder(BI);
1370   return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1371                             BI->getCondition()->getName());
1372 }
1373 
1374 static std::optional<Value *>
1375 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1376                   const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1377                   ScalarEvolution *SE, SCEVExpander &Rewriter) {
1378   ICmpInst::Predicate Pred = ICmp->getPredicate();
1379   Value *LHS = ICmp->getOperand(0);
1380   Value *RHS = ICmp->getOperand(1);
1381 
1382   // 'LHS pred RHS' should now mean that we stay in loop.
1383   auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1384   if (Inverted)
1385     Pred = CmpInst::getInversePredicate(Pred);
1386 
1387   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1388   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1389   // Can we prove it to be trivially true or false?
1390   if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1391     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1392 
1393   auto *ARTy = LHSS->getType();
1394   auto *MaxIterTy = MaxIter->getType();
1395   // If possible, adjust types.
1396   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1397     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1398   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1399     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1400     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1401     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1402       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1403   }
1404 
1405   if (SkipLastIter) {
1406     // Semantically skip last iter is "subtract 1, do not bother about unsigned
1407     // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1408     // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1409     // So we manually construct umin(a - 1, b - 1).
1410     SmallVector<const SCEV *, 4> Elements;
1411     if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1412       for (auto *Op : UMin->operands())
1413         Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1414       MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1415     } else
1416       MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1417   }
1418 
1419   // Check if there is a loop-invariant predicate equivalent to our check.
1420   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1421                                                                L, BI, MaxIter);
1422   if (!LIP)
1423     return std::nullopt;
1424 
1425   // Can we prove it to be trivially true?
1426   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1427     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1428   else
1429     return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1430 }
1431 
1432 static bool optimizeLoopExitWithUnknownExitCount(
1433     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1434     bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1435     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1436   assert(
1437       (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1438       "Not a loop exit!");
1439 
1440   // For branch that stays in loop by TRUE condition, go through AND. For branch
1441   // that stays in loop by FALSE condition, go through OR. Both gives the
1442   // similar logic: "stay in loop iff all conditions are true(false)".
1443   bool Inverted = L->contains(BI->getSuccessor(1));
1444   SmallVector<ICmpInst *, 4> LeafConditions;
1445   SmallVector<Value *, 4> Worklist;
1446   SmallPtrSet<Value *, 4> Visited;
1447   Value *OldCond = BI->getCondition();
1448   Visited.insert(OldCond);
1449   Worklist.push_back(OldCond);
1450 
1451   auto GoThrough = [&](Value *V) {
1452     Value *LHS = nullptr, *RHS = nullptr;
1453     if (Inverted) {
1454       if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1455         return false;
1456     } else {
1457       if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1458         return false;
1459     }
1460     if (Visited.insert(LHS).second)
1461       Worklist.push_back(LHS);
1462     if (Visited.insert(RHS).second)
1463       Worklist.push_back(RHS);
1464     return true;
1465   };
1466 
1467   do {
1468     Value *Curr = Worklist.pop_back_val();
1469     // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1470     // those with one use, to avoid instruction duplication.
1471     if (Curr->hasOneUse())
1472       if (!GoThrough(Curr))
1473         if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1474           LeafConditions.push_back(ICmp);
1475   } while (!Worklist.empty());
1476 
1477   // If the current basic block has the same exit count as the whole loop, and
1478   // it consists of multiple icmp's, try to collect all icmp's that give exact
1479   // same exit count. For all other icmp's, we could use one less iteration,
1480   // because their value on the last iteration doesn't really matter.
1481   SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1482   if (!SkipLastIter && LeafConditions.size() > 1 &&
1483       SE->getExitCount(L, ExitingBB,
1484                        ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1485           MaxIter)
1486     for (auto *ICmp : LeafConditions) {
1487       auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1488                                              /*ControlsExit*/ false);
1489       auto *ExitMax = EL.SymbolicMaxNotTaken;
1490       if (isa<SCEVCouldNotCompute>(ExitMax))
1491         continue;
1492       // They could be of different types (specifically this happens after
1493       // IV widening).
1494       auto *WiderType =
1495           SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1496       auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1497       auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1498       if (WideExitMax == WideMaxIter)
1499         ICmpsFailingOnLastIter.insert(ICmp);
1500     }
1501 
1502   bool Changed = false;
1503   for (auto *OldCond : LeafConditions) {
1504     // Skip last iteration for this icmp under one of two conditions:
1505     // - We do it for all conditions;
1506     // - There is another ICmp that would fail on last iter, so this one doesn't
1507     // really matter.
1508     bool OptimisticSkipLastIter = SkipLastIter;
1509     if (!OptimisticSkipLastIter) {
1510       if (ICmpsFailingOnLastIter.size() > 1)
1511         OptimisticSkipLastIter = true;
1512       else if (ICmpsFailingOnLastIter.size() == 1)
1513         OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1514     }
1515     if (auto Replaced =
1516             createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1517                               OptimisticSkipLastIter, SE, Rewriter)) {
1518       Changed = true;
1519       auto *NewCond = *Replaced;
1520       if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1521         NCI->setName(OldCond->getName() + ".first_iter");
1522         NCI->moveBefore(cast<Instruction>(OldCond));
1523       }
1524       LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1525                         << " with " << *NewCond << "\n");
1526       assert(OldCond->hasOneUse() && "Must be!");
1527       OldCond->replaceAllUsesWith(NewCond);
1528       DeadInsts.push_back(OldCond);
1529       // Make sure we no longer consider this condition as failing on last
1530       // iteration.
1531       ICmpsFailingOnLastIter.erase(OldCond);
1532     }
1533   }
1534   return Changed;
1535 }
1536 
1537 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1538   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1539   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1540   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1541   // it already has flags.  The alternative to this would be to extending the
1542   // set of "interesting" IV users to include the icmp, but doing that
1543   // regresses results in practice by querying SCEVs before trip counts which
1544   // rely on them which results in SCEV caching sub-optimal answers.  The
1545   // concern about caching sub-optimal results is why we only query SCEVs of
1546   // the loop invariant RHS here.
1547   SmallVector<BasicBlock*, 16> ExitingBlocks;
1548   L->getExitingBlocks(ExitingBlocks);
1549   bool Changed = false;
1550   for (auto *ExitingBB : ExitingBlocks) {
1551     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1552     if (!BI)
1553       continue;
1554     assert(BI->isConditional() && "exit branch must be conditional");
1555 
1556     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1557     if (!ICmp || !ICmp->hasOneUse())
1558       continue;
1559 
1560     auto *LHS = ICmp->getOperand(0);
1561     auto *RHS = ICmp->getOperand(1);
1562     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1563     // poisoning cache with sub-optimal results.  For the must-execute case,
1564     // this is a neccessary precondition for correctness.
1565     if (!L->isLoopInvariant(RHS)) {
1566       if (!L->isLoopInvariant(LHS))
1567         continue;
1568       // Same logic applies for the inverse case
1569       std::swap(LHS, RHS);
1570     }
1571 
1572     // Match (icmp signed-cond zext, RHS)
1573     Value *LHSOp = nullptr;
1574     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1575       continue;
1576 
1577     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1578     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1579     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1580     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1581     FullCR = FullCR.zeroExtend(OuterBitWidth);
1582     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1583     if (FullCR.contains(RHSCR)) {
1584       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1585       // replace the signed condition with the unsigned version.
1586       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1587       Changed = true;
1588       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1589       // have not changed exit counts, or the values produced by the compare.
1590       continue;
1591     }
1592   }
1593 
1594   // Now that we've canonicalized the condition to match the extend,
1595   // see if we can rotate the extend out of the loop.
1596   for (auto *ExitingBB : ExitingBlocks) {
1597     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1598     if (!BI)
1599       continue;
1600     assert(BI->isConditional() && "exit branch must be conditional");
1601 
1602     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1603     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1604       continue;
1605 
1606     bool Swapped = false;
1607     auto *LHS = ICmp->getOperand(0);
1608     auto *RHS = ICmp->getOperand(1);
1609     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1610       // Nothing to rotate
1611       continue;
1612     if (L->isLoopInvariant(LHS)) {
1613       // Same logic applies for the inverse case until we actually pick
1614       // which operand of the compare to update.
1615       Swapped = true;
1616       std::swap(LHS, RHS);
1617     }
1618     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1619 
1620     // Match (icmp unsigned-cond zext, RHS)
1621     // TODO: Extend to handle corresponding sext/signed-cmp case
1622     // TODO: Extend to other invertible functions
1623     Value *LHSOp = nullptr;
1624     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1625       continue;
1626 
1627     // In general, we only rotate if we can do so without increasing the number
1628     // of instructions.  The exception is when we have an zext(add-rec).  The
1629     // reason for allowing this exception is that we know we need to get rid
1630     // of the zext for SCEV to be able to compute a trip count for said loops;
1631     // we consider the new trip count valuable enough to increase instruction
1632     // count by one.
1633     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1634       continue;
1635 
1636     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1637     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1638     // when zext is loop varying and RHS is loop invariant.  This converts
1639     // loop varying work to loop-invariant work.
1640     auto doRotateTransform = [&]() {
1641       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1642       auto *NewRHS =
1643         CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1644                          L->getLoopPreheader()->getTerminator());
1645       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1646       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1647       if (LHS->use_empty())
1648         DeadInsts.push_back(LHS);
1649     };
1650 
1651 
1652     const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1653     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1654     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1655     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1656     FullCR = FullCR.zeroExtend(OuterBitWidth);
1657     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1658     if (FullCR.contains(RHSCR)) {
1659       doRotateTransform();
1660       Changed = true;
1661       // Note, we are leaving SCEV in an unfortunately imprecise case here
1662       // as rotation tends to reveal information about trip counts not
1663       // previously visible.
1664       continue;
1665     }
1666   }
1667 
1668   return Changed;
1669 }
1670 
1671 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1672   SmallVector<BasicBlock*, 16> ExitingBlocks;
1673   L->getExitingBlocks(ExitingBlocks);
1674 
1675   // Remove all exits which aren't both rewriteable and execute on every
1676   // iteration.
1677   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1678     // If our exitting block exits multiple loops, we can only rewrite the
1679     // innermost one.  Otherwise, we're changing how many times the innermost
1680     // loop runs before it exits.
1681     if (LI->getLoopFor(ExitingBB) != L)
1682       return true;
1683 
1684     // Can't rewrite non-branch yet.
1685     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1686     if (!BI)
1687       return true;
1688 
1689     // Likewise, the loop latch must be dominated by the exiting BB.
1690     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1691       return true;
1692 
1693     if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1694       // If already constant, nothing to do. However, if this is an
1695       // unconditional exit, we can still replace header phis with their
1696       // preheader value.
1697       if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1698         replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1699       return true;
1700     }
1701 
1702     return false;
1703   });
1704 
1705   if (ExitingBlocks.empty())
1706     return false;
1707 
1708   // Get a symbolic upper bound on the loop backedge taken count.
1709   const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1710   if (isa<SCEVCouldNotCompute>(MaxBECount))
1711     return false;
1712 
1713   // Visit our exit blocks in order of dominance. We know from the fact that
1714   // all exits must dominate the latch, so there is a total dominance order
1715   // between them.
1716   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1717                // std::sort sorts in ascending order, so we want the inverse of
1718                // the normal dominance relation.
1719                if (A == B) return false;
1720                if (DT->properlyDominates(A, B))
1721                  return true;
1722                else {
1723                  assert(DT->properlyDominates(B, A) &&
1724                         "expected total dominance order!");
1725                  return false;
1726                }
1727   });
1728 #ifdef ASSERT
1729   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1730     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1731   }
1732 #endif
1733 
1734   bool Changed = false;
1735   bool SkipLastIter = false;
1736   const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1737   auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1738     if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1739       return;
1740     if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1741       CurrMaxExit = MaxExitCount;
1742     else
1743       CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1744     // If the loop has more than 1 iteration, all further checks will be
1745     // executed 1 iteration less.
1746     if (CurrMaxExit == MaxBECount)
1747       SkipLastIter = true;
1748   };
1749   SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1750   for (BasicBlock *ExitingBB : ExitingBlocks) {
1751     const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1752     const SCEV *MaxExitCount = SE->getExitCount(
1753         L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1754     if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1755       // Okay, we do not know the exit count here. Can we at least prove that it
1756       // will remain the same within iteration space?
1757       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1758       auto OptimizeCond = [&](bool SkipLastIter) {
1759         return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1760                                                     MaxBECount, SkipLastIter,
1761                                                     SE, Rewriter, DeadInsts);
1762       };
1763 
1764       // TODO: We might have proved that we can skip the last iteration for
1765       // this check. In this case, we only want to check the condition on the
1766       // pre-last iteration (MaxBECount - 1). However, there is a nasty
1767       // corner case:
1768       //
1769       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1770       //
1771       // If we could not prove that len != 0, then we also could not prove that
1772       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1773       // OptimizeCond will likely not prove anything for it, even if it could
1774       // prove the same fact for len.
1775       //
1776       // As a temporary solution, we query both last and pre-last iterations in
1777       // hope that we will be able to prove triviality for at least one of
1778       // them. We can stop querying MaxBECount for this case once SCEV
1779       // understands that (MaxBECount - 1) will not overflow here.
1780       if (OptimizeCond(false))
1781         Changed = true;
1782       else if (SkipLastIter && OptimizeCond(true))
1783         Changed = true;
1784       UpdateSkipLastIter(MaxExitCount);
1785       continue;
1786     }
1787 
1788     UpdateSkipLastIter(ExactExitCount);
1789 
1790     // If we know we'd exit on the first iteration, rewrite the exit to
1791     // reflect this.  This does not imply the loop must exit through this
1792     // exit; there may be an earlier one taken on the first iteration.
1793     // We know that the backedge can't be taken, so we replace all
1794     // the header PHIs with values coming from the preheader.
1795     if (ExactExitCount->isZero()) {
1796       foldExit(L, ExitingBB, true, DeadInsts);
1797       replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1798       Changed = true;
1799       continue;
1800     }
1801 
1802     assert(ExactExitCount->getType()->isIntegerTy() &&
1803            MaxBECount->getType()->isIntegerTy() &&
1804            "Exit counts must be integers");
1805 
1806     Type *WiderType =
1807         SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1808     ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1809     MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1810     assert(MaxBECount->getType() == ExactExitCount->getType());
1811 
1812     // Can we prove that some other exit must be taken strictly before this
1813     // one?
1814     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1815                                      ExactExitCount)) {
1816       foldExit(L, ExitingBB, false, DeadInsts);
1817       Changed = true;
1818       continue;
1819     }
1820 
1821     // As we run, keep track of which exit counts we've encountered.  If we
1822     // find a duplicate, we've found an exit which would have exited on the
1823     // exiting iteration, but (from the visit order) strictly follows another
1824     // which does the same and is thus dead.
1825     if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1826       foldExit(L, ExitingBB, false, DeadInsts);
1827       Changed = true;
1828       continue;
1829     }
1830 
1831     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1832     // here.  If we kept track of the min of dominanting exits so far, we could
1833     // discharge exits with EC >= MDEC. This is less powerful than the existing
1834     // transform (since later exits aren't considered), but potentially more
1835     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1836     // or a >u b.  Such a case is not currently known.
1837   }
1838   return Changed;
1839 }
1840 
1841 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1842   SmallVector<BasicBlock*, 16> ExitingBlocks;
1843   L->getExitingBlocks(ExitingBlocks);
1844 
1845   // Finally, see if we can rewrite our exit conditions into a loop invariant
1846   // form. If we have a read-only loop, and we can tell that we must exit down
1847   // a path which does not need any of the values computed within the loop, we
1848   // can rewrite the loop to exit on the first iteration.  Note that this
1849   // doesn't either a) tell us the loop exits on the first iteration (unless
1850   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1851   // This transformation looks a lot like a restricted form of dead loop
1852   // elimination, but restricted to read-only loops and without neccesssarily
1853   // needing to kill the loop entirely.
1854   if (!LoopPredication)
1855     return false;
1856 
1857   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1858   // through *explicit* control flow.  We have to eliminate the possibility of
1859   // implicit exits (see below) before we know it's truly exact.
1860   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1861   if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1862     return false;
1863 
1864   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1865   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1866 
1867   auto BadExit = [&](BasicBlock *ExitingBB) {
1868     // If our exiting block exits multiple loops, we can only rewrite the
1869     // innermost one.  Otherwise, we're changing how many times the innermost
1870     // loop runs before it exits.
1871     if (LI->getLoopFor(ExitingBB) != L)
1872       return true;
1873 
1874     // Can't rewrite non-branch yet.
1875     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1876     if (!BI)
1877       return true;
1878 
1879     // If already constant, nothing to do.
1880     if (isa<Constant>(BI->getCondition()))
1881       return true;
1882 
1883     // If the exit block has phis, we need to be able to compute the values
1884     // within the loop which contains them.  This assumes trivially lcssa phis
1885     // have already been removed; TODO: generalize
1886     BasicBlock *ExitBlock =
1887     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1888     if (!ExitBlock->phis().empty())
1889       return true;
1890 
1891     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1892     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1893         !Rewriter.isSafeToExpand(ExitCount))
1894       return true;
1895 
1896     assert(SE->isLoopInvariant(ExitCount, L) &&
1897            "Exit count must be loop invariant");
1898     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1899     return false;
1900   };
1901 
1902   // If we have any exits which can't be predicated themselves, than we can't
1903   // predicate any exit which isn't guaranteed to execute before it.  Consider
1904   // two exits (a) and (b) which would both exit on the same iteration.  If we
1905   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1906   // we could convert a loop from exiting through (a) to one exiting through
1907   // (b).  Note that this problem exists only for exits with the same exit
1908   // count, and we could be more aggressive when exit counts are known inequal.
1909   llvm::sort(ExitingBlocks,
1910             [&](BasicBlock *A, BasicBlock *B) {
1911               // std::sort sorts in ascending order, so we want the inverse of
1912               // the normal dominance relation, plus a tie breaker for blocks
1913               // unordered by dominance.
1914               if (DT->properlyDominates(A, B)) return true;
1915               if (DT->properlyDominates(B, A)) return false;
1916               return A->getName() < B->getName();
1917             });
1918   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1919   // exits before the backedge).  If they aren't, reasoning about reachability
1920   // is complicated and we choose not to for now.
1921   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1922     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1923       return false;
1924 
1925   // Given our sorted total order, we know that exit[j] must be evaluated
1926   // after all exit[i] such j > i.
1927   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1928     if (BadExit(ExitingBlocks[i])) {
1929       ExitingBlocks.resize(i);
1930       break;
1931     }
1932 
1933   if (ExitingBlocks.empty())
1934     return false;
1935 
1936   // We rely on not being able to reach an exiting block on a later iteration
1937   // then it's statically compute exit count.  The implementaton of
1938   // getExitCount currently has this invariant, but assert it here so that
1939   // breakage is obvious if this ever changes..
1940   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1941         return DT->dominates(ExitingBB, L->getLoopLatch());
1942       }));
1943 
1944   // At this point, ExitingBlocks consists of only those blocks which are
1945   // predicatable.  Given that, we know we have at least one exit we can
1946   // predicate if the loop is doesn't have side effects and doesn't have any
1947   // implicit exits (because then our exact BTC isn't actually exact).
1948   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1949   // suggestions on how to improve this?  I can obviously bail out for outer
1950   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1951   // is that enough for *all* side effects?
1952   for (BasicBlock *BB : L->blocks())
1953     for (auto &I : *BB)
1954       // TODO:isGuaranteedToTransfer
1955       if (I.mayHaveSideEffects())
1956         return false;
1957 
1958   bool Changed = false;
1959   // Finally, do the actual predication for all predicatable blocks.  A couple
1960   // of notes here:
1961   // 1) We don't bother to constant fold dominated exits with identical exit
1962   //    counts; that's simply a form of CSE/equality propagation and we leave
1963   //    it for dedicated passes.
1964   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1965   //    legality constraints and we leave that to dedicated logic.  We want to
1966   //    predicate even if we can't insert a loop invariant expression as
1967   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1968   //    varying check.
1969   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1970   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1971   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1972   for (BasicBlock *ExitingBB : ExitingBlocks) {
1973     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1974 
1975     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1976     Value *NewCond;
1977     if (ExitCount == ExactBTC) {
1978       NewCond = L->contains(BI->getSuccessor(0)) ?
1979         B.getFalse() : B.getTrue();
1980     } else {
1981       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1982       if (!ExactBTCV)
1983         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1984       Value *RHS = ExactBTCV;
1985       if (ECV->getType() != RHS->getType()) {
1986         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1987         ECV = B.CreateZExt(ECV, WiderTy);
1988         RHS = B.CreateZExt(RHS, WiderTy);
1989       }
1990       auto Pred = L->contains(BI->getSuccessor(0)) ?
1991         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1992       NewCond = B.CreateICmp(Pred, ECV, RHS);
1993     }
1994     Value *OldCond = BI->getCondition();
1995     BI->setCondition(NewCond);
1996     if (OldCond->use_empty())
1997       DeadInsts.emplace_back(OldCond);
1998     Changed = true;
1999   }
2000 
2001   return Changed;
2002 }
2003 
2004 //===----------------------------------------------------------------------===//
2005 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2006 //===----------------------------------------------------------------------===//
2007 
2008 bool IndVarSimplify::run(Loop *L) {
2009   // We need (and expect!) the incoming loop to be in LCSSA.
2010   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2011          "LCSSA required to run indvars!");
2012 
2013   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2014   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2015   //    canonicalization can be a pessimization without LSR to "clean up"
2016   //    afterwards.
2017   //  - We depend on having a preheader; in particular,
2018   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2019   //    and we're in trouble if we can't find the induction variable even when
2020   //    we've manually inserted one.
2021   //  - LFTR relies on having a single backedge.
2022   if (!L->isLoopSimplifyForm())
2023     return false;
2024 
2025 #ifndef NDEBUG
2026   // Used below for a consistency check only
2027   // Note: Since the result returned by ScalarEvolution may depend on the order
2028   // in which previous results are added to its cache, the call to
2029   // getBackedgeTakenCount() may change following SCEV queries.
2030   const SCEV *BackedgeTakenCount;
2031   if (VerifyIndvars)
2032     BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2033 #endif
2034 
2035   bool Changed = false;
2036   // If there are any floating-point recurrences, attempt to
2037   // transform them to use integer recurrences.
2038   Changed |= rewriteNonIntegerIVs(L);
2039 
2040   // Create a rewriter object which we'll use to transform the code with.
2041   SCEVExpander Rewriter(*SE, DL, "indvars");
2042 #ifndef NDEBUG
2043   Rewriter.setDebugType(DEBUG_TYPE);
2044 #endif
2045 
2046   // Eliminate redundant IV users.
2047   //
2048   // Simplification works best when run before other consumers of SCEV. We
2049   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2050   // other expressions involving loop IVs have been evaluated. This helps SCEV
2051   // set no-wrap flags before normalizing sign/zero extension.
2052   Rewriter.disableCanonicalMode();
2053   Changed |= simplifyAndExtend(L, Rewriter, LI);
2054 
2055   // Check to see if we can compute the final value of any expressions
2056   // that are recurrent in the loop, and substitute the exit values from the
2057   // loop into any instructions outside of the loop that use the final values
2058   // of the current expressions.
2059   if (ReplaceExitValue != NeverRepl) {
2060     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2061                                              ReplaceExitValue, DeadInsts)) {
2062       NumReplaced += Rewrites;
2063       Changed = true;
2064     }
2065   }
2066 
2067   // Eliminate redundant IV cycles.
2068   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
2069 
2070   // Try to convert exit conditions to unsigned and rotate computation
2071   // out of the loop.  Note: Handles invalidation internally if needed.
2072   Changed |= canonicalizeExitCondition(L);
2073 
2074   // Try to eliminate loop exits based on analyzeable exit counts
2075   if (optimizeLoopExits(L, Rewriter))  {
2076     Changed = true;
2077     // Given we've changed exit counts, notify SCEV
2078     // Some nested loops may share same folded exit basic block,
2079     // thus we need to notify top most loop.
2080     SE->forgetTopmostLoop(L);
2081   }
2082 
2083   // Try to form loop invariant tests for loop exits by changing how many
2084   // iterations of the loop run when that is unobservable.
2085   if (predicateLoopExits(L, Rewriter)) {
2086     Changed = true;
2087     // Given we've changed exit counts, notify SCEV
2088     SE->forgetLoop(L);
2089   }
2090 
2091   // If we have a trip count expression, rewrite the loop's exit condition
2092   // using it.
2093   if (!DisableLFTR) {
2094     BasicBlock *PreHeader = L->getLoopPreheader();
2095 
2096     SmallVector<BasicBlock*, 16> ExitingBlocks;
2097     L->getExitingBlocks(ExitingBlocks);
2098     for (BasicBlock *ExitingBB : ExitingBlocks) {
2099       // Can't rewrite non-branch yet.
2100       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2101         continue;
2102 
2103       // If our exitting block exits multiple loops, we can only rewrite the
2104       // innermost one.  Otherwise, we're changing how many times the innermost
2105       // loop runs before it exits.
2106       if (LI->getLoopFor(ExitingBB) != L)
2107         continue;
2108 
2109       if (!needsLFTR(L, ExitingBB))
2110         continue;
2111 
2112       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2113       if (isa<SCEVCouldNotCompute>(ExitCount))
2114         continue;
2115 
2116       // This was handled above, but as we form SCEVs, we can sometimes refine
2117       // existing ones; this allows exit counts to be folded to zero which
2118       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2119       // until stable to handle cases like this better.
2120       if (ExitCount->isZero())
2121         continue;
2122 
2123       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2124       if (!IndVar)
2125         continue;
2126 
2127       // Avoid high cost expansions.  Note: This heuristic is questionable in
2128       // that our definition of "high cost" is not exactly principled.
2129       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2130                                        TTI, PreHeader->getTerminator()))
2131         continue;
2132 
2133       // Check preconditions for proper SCEVExpander operation. SCEV does not
2134       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2135       // any pass that uses the SCEVExpander must do it. This does not work
2136       // well for loop passes because SCEVExpander makes assumptions about
2137       // all loops, while LoopPassManager only forces the current loop to be
2138       // simplified.
2139       //
2140       // FIXME: SCEV expansion has no way to bail out, so the caller must
2141       // explicitly check any assumptions made by SCEV. Brittle.
2142       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2143       if (!AR || AR->getLoop()->getLoopPreheader())
2144         Changed |= linearFunctionTestReplace(L, ExitingBB,
2145                                              ExitCount, IndVar,
2146                                              Rewriter);
2147     }
2148   }
2149   // Clear the rewriter cache, because values that are in the rewriter's cache
2150   // can be deleted in the loop below, causing the AssertingVH in the cache to
2151   // trigger.
2152   Rewriter.clear();
2153 
2154   // Now that we're done iterating through lists, clean up any instructions
2155   // which are now dead.
2156   while (!DeadInsts.empty()) {
2157     Value *V = DeadInsts.pop_back_val();
2158 
2159     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2160       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2161     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2162       Changed |=
2163           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2164   }
2165 
2166   // The Rewriter may not be used from this point on.
2167 
2168   // Loop-invariant instructions in the preheader that aren't used in the
2169   // loop may be sunk below the loop to reduce register pressure.
2170   Changed |= sinkUnusedInvariants(L);
2171 
2172   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2173   // trip count and therefore can further simplify exit values in addition to
2174   // rewriteLoopExitValues.
2175   Changed |= rewriteFirstIterationLoopExitValues(L);
2176 
2177   // Clean up dead instructions.
2178   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2179 
2180   // Check a post-condition.
2181   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2182          "Indvars did not preserve LCSSA!");
2183 
2184   // Verify that LFTR, and any other change have not interfered with SCEV's
2185   // ability to compute trip count.  We may have *changed* the exit count, but
2186   // only by reducing it.
2187 #ifndef NDEBUG
2188   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2189     SE->forgetLoop(L);
2190     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2191     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2192         SE->getTypeSizeInBits(NewBECount->getType()))
2193       NewBECount = SE->getTruncateOrNoop(NewBECount,
2194                                          BackedgeTakenCount->getType());
2195     else
2196       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2197                                                  NewBECount->getType());
2198     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2199                                  NewBECount) && "indvars must preserve SCEV");
2200   }
2201   if (VerifyMemorySSA && MSSAU)
2202     MSSAU->getMemorySSA()->verifyMemorySSA();
2203 #endif
2204 
2205   return Changed;
2206 }
2207 
2208 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2209                                           LoopStandardAnalysisResults &AR,
2210                                           LPMUpdater &) {
2211   Function *F = L.getHeader()->getParent();
2212   const DataLayout &DL = F->getParent()->getDataLayout();
2213 
2214   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2215                      WidenIndVars && AllowIVWidening);
2216   if (!IVS.run(&L))
2217     return PreservedAnalyses::all();
2218 
2219   auto PA = getLoopPassPreservedAnalyses();
2220   PA.preserveSet<CFGAnalyses>();
2221   if (AR.MSSA)
2222     PA.preserve<MemorySSAAnalysis>();
2223   return PA;
2224 }
2225 
2226 namespace {
2227 
2228 struct IndVarSimplifyLegacyPass : public LoopPass {
2229   static char ID; // Pass identification, replacement for typeid
2230 
2231   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2232     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2233   }
2234 
2235   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2236     if (skipLoop(L))
2237       return false;
2238 
2239     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2240     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2241     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2242     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2243     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2244     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2245     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2246     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2247     auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2248     MemorySSA *MSSA = nullptr;
2249     if (MSSAAnalysis)
2250       MSSA = &MSSAAnalysis->getMSSA();
2251 
2252     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2253     return IVS.run(L);
2254   }
2255 
2256   void getAnalysisUsage(AnalysisUsage &AU) const override {
2257     AU.setPreservesCFG();
2258     AU.addPreserved<MemorySSAWrapperPass>();
2259     getLoopAnalysisUsage(AU);
2260   }
2261 };
2262 
2263 } // end anonymous namespace
2264 
2265 char IndVarSimplifyLegacyPass::ID = 0;
2266 
2267 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2268                       "Induction Variable Simplification", false, false)
2269 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2270 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2271                     "Induction Variable Simplification", false, false)
2272 
2273 Pass *llvm::createIndVarSimplifyPass() {
2274   return new IndVarSimplifyLegacyPass();
2275 }
2276