xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 770cf0a5f02dc8983a89c6568d741fbc25baa999)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/iterator_range.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/MemorySSAUpdater.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
42 #include "llvm/Analysis/TargetLibraryInfo.h"
43 #include "llvm/Analysis/TargetTransformInfo.h"
44 #include "llvm/Analysis/ValueTracking.h"
45 #include "llvm/IR/BasicBlock.h"
46 #include "llvm/IR/Constant.h"
47 #include "llvm/IR/ConstantRange.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DerivedTypes.h"
51 #include "llvm/IR/Dominators.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/IRBuilder.h"
54 #include "llvm/IR/InstrTypes.h"
55 #include "llvm/IR/Instruction.h"
56 #include "llvm/IR/Instructions.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/PassManager.h"
59 #include "llvm/IR/PatternMatch.h"
60 #include "llvm/IR/Type.h"
61 #include "llvm/IR/Use.h"
62 #include "llvm/IR/User.h"
63 #include "llvm/IR/Value.h"
64 #include "llvm/IR/ValueHandle.h"
65 #include "llvm/Support/Casting.h"
66 #include "llvm/Support/CommandLine.h"
67 #include "llvm/Support/Debug.h"
68 #include "llvm/Support/MathExtras.h"
69 #include "llvm/Support/raw_ostream.h"
70 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
71 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
72 #include "llvm/Transforms/Utils/Local.h"
73 #include "llvm/Transforms/Utils/LoopUtils.h"
74 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
75 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
76 #include <cassert>
77 #include <cstdint>
78 #include <utility>
79 
80 using namespace llvm;
81 using namespace PatternMatch;
82 using namespace SCEVPatternMatch;
83 
84 #define DEBUG_TYPE "indvars"
85 
86 STATISTIC(NumWidened     , "Number of indvars widened");
87 STATISTIC(NumReplaced    , "Number of exit values replaced");
88 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
89 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
90 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
91 
92 static cl::opt<ReplaceExitVal> ReplaceExitValue(
93     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
94     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
95     cl::values(
96         clEnumValN(NeverRepl, "never", "never replace exit value"),
97         clEnumValN(OnlyCheapRepl, "cheap",
98                    "only replace exit value when the cost is cheap"),
99         clEnumValN(
100             UnusedIndVarInLoop, "unusedindvarinloop",
101             "only replace exit value when it is an unused "
102             "induction variable in the loop and has cheap replacement cost"),
103         clEnumValN(NoHardUse, "noharduse",
104                    "only replace exit values when loop def likely dead"),
105         clEnumValN(AlwaysRepl, "always",
106                    "always replace exit value whenever possible")));
107 
108 static cl::opt<bool> UsePostIncrementRanges(
109   "indvars-post-increment-ranges", cl::Hidden,
110   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
111   cl::init(true));
112 
113 static cl::opt<bool>
114 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
115             cl::desc("Disable Linear Function Test Replace optimization"));
116 
117 static cl::opt<bool>
118 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
119                 cl::desc("Predicate conditions in read only loops"));
120 
121 static cl::opt<bool>
122 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
123                 cl::desc("Allow widening of indvars to eliminate s/zext"));
124 
125 namespace {
126 
127 class IndVarSimplify {
128   LoopInfo *LI;
129   ScalarEvolution *SE;
130   DominatorTree *DT;
131   const DataLayout &DL;
132   TargetLibraryInfo *TLI;
133   const TargetTransformInfo *TTI;
134   std::unique_ptr<MemorySSAUpdater> MSSAU;
135 
136   SmallVector<WeakTrackingVH, 16> DeadInsts;
137   bool WidenIndVars;
138 
139   bool RunUnswitching = false;
140 
141   bool handleFloatingPointIV(Loop *L, PHINode *PH);
142   bool rewriteNonIntegerIVs(Loop *L);
143 
144   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
145   /// Try to improve our exit conditions by converting condition from signed
146   /// to unsigned or rotating computation out of the loop.
147   /// (See inline comment about why this is duplicated from simplifyAndExtend)
148   bool canonicalizeExitCondition(Loop *L);
149   /// Try to eliminate loop exits based on analyzeable exit counts
150   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
151   /// Try to form loop invariant tests for loop exits by changing how many
152   /// iterations of the loop run when that is unobservable.
153   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
154 
155   bool rewriteFirstIterationLoopExitValues(Loop *L);
156 
157   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
158                                  const SCEV *ExitCount,
159                                  PHINode *IndVar, SCEVExpander &Rewriter);
160 
161   bool sinkUnusedInvariants(Loop *L);
162 
163 public:
164   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
165                  const DataLayout &DL, TargetLibraryInfo *TLI,
166                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
167       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
168         WidenIndVars(WidenIndVars) {
169     if (MSSA)
170       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
171   }
172 
173   bool run(Loop *L);
174 
175   bool runUnswitching() const { return RunUnswitching; }
176 };
177 
178 } // end anonymous namespace
179 
180 //===----------------------------------------------------------------------===//
181 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
182 //===----------------------------------------------------------------------===//
183 
184 /// Convert APF to an integer, if possible.
185 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
186   bool isExact = false;
187   // See if we can convert this to an int64_t
188   uint64_t UIntVal;
189   if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
190                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
191       !isExact)
192     return false;
193   IntVal = UIntVal;
194   return true;
195 }
196 
197 /// If the loop has floating induction variable then insert corresponding
198 /// integer induction variable if possible.
199 /// For example,
200 /// for(double i = 0; i < 10000; ++i)
201 ///   bar(i)
202 /// is converted into
203 /// for(int i = 0; i < 10000; ++i)
204 ///   bar((double)i);
205 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
206   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
207   unsigned BackEdge     = IncomingEdge^1;
208 
209   // Check incoming value.
210   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
211 
212   int64_t InitValue;
213   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
214     return false;
215 
216   // Check IV increment. Reject this PN if increment operation is not
217   // an add or increment value can not be represented by an integer.
218   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
219   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
220 
221   // If this is not an add of the PHI with a constantfp, or if the constant fp
222   // is not an integer, bail out.
223   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
224   int64_t IncValue;
225   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
226       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
227     return false;
228 
229   // Check Incr uses. One user is PN and the other user is an exit condition
230   // used by the conditional terminator.
231   Value::user_iterator IncrUse = Incr->user_begin();
232   Instruction *U1 = cast<Instruction>(*IncrUse++);
233   if (IncrUse == Incr->user_end()) return false;
234   Instruction *U2 = cast<Instruction>(*IncrUse++);
235   if (IncrUse != Incr->user_end()) return false;
236 
237   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
238   // only used by a branch, we can't transform it.
239   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
240   if (!Compare)
241     Compare = dyn_cast<FCmpInst>(U2);
242   if (!Compare || !Compare->hasOneUse() ||
243       !isa<BranchInst>(Compare->user_back()))
244     return false;
245 
246   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
247 
248   // We need to verify that the branch actually controls the iteration count
249   // of the loop.  If not, the new IV can overflow and no one will notice.
250   // The branch block must be in the loop and one of the successors must be out
251   // of the loop.
252   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
253   if (!L->contains(TheBr->getParent()) ||
254       (L->contains(TheBr->getSuccessor(0)) &&
255        L->contains(TheBr->getSuccessor(1))))
256     return false;
257 
258   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
259   // transform it.
260   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
261   int64_t ExitValue;
262   if (ExitValueVal == nullptr ||
263       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
264     return false;
265 
266   // Find new predicate for integer comparison.
267   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
268   switch (Compare->getPredicate()) {
269   default: return false;  // Unknown comparison.
270   case CmpInst::FCMP_OEQ:
271   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
272   case CmpInst::FCMP_ONE:
273   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
274   case CmpInst::FCMP_OGT:
275   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
276   case CmpInst::FCMP_OGE:
277   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
278   case CmpInst::FCMP_OLT:
279   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
280   case CmpInst::FCMP_OLE:
281   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
282   }
283 
284   // We convert the floating point induction variable to a signed i32 value if
285   // we can.  This is only safe if the comparison will not overflow in a way
286   // that won't be trapped by the integer equivalent operations.  Check for this
287   // now.
288   // TODO: We could use i64 if it is native and the range requires it.
289 
290   // The start/stride/exit values must all fit in signed i32.
291   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
292     return false;
293 
294   // If not actually striding (add x, 0.0), avoid touching the code.
295   if (IncValue == 0)
296     return false;
297 
298   // Positive and negative strides have different safety conditions.
299   if (IncValue > 0) {
300     // If we have a positive stride, we require the init to be less than the
301     // exit value.
302     if (InitValue >= ExitValue)
303       return false;
304 
305     uint32_t Range = uint32_t(ExitValue-InitValue);
306     // Check for infinite loop, either:
307     // while (i <= Exit) or until (i > Exit)
308     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
309       if (++Range == 0) return false;  // Range overflows.
310     }
311 
312     unsigned Leftover = Range % uint32_t(IncValue);
313 
314     // If this is an equality comparison, we require that the strided value
315     // exactly land on the exit value, otherwise the IV condition will wrap
316     // around and do things the fp IV wouldn't.
317     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
318         Leftover != 0)
319       return false;
320 
321     // If the stride would wrap around the i32 before exiting, we can't
322     // transform the IV.
323     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
324       return false;
325   } else {
326     // If we have a negative stride, we require the init to be greater than the
327     // exit value.
328     if (InitValue <= ExitValue)
329       return false;
330 
331     uint32_t Range = uint32_t(InitValue-ExitValue);
332     // Check for infinite loop, either:
333     // while (i >= Exit) or until (i < Exit)
334     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
335       if (++Range == 0) return false;  // Range overflows.
336     }
337 
338     unsigned Leftover = Range % uint32_t(-IncValue);
339 
340     // If this is an equality comparison, we require that the strided value
341     // exactly land on the exit value, otherwise the IV condition will wrap
342     // around and do things the fp IV wouldn't.
343     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
344         Leftover != 0)
345       return false;
346 
347     // If the stride would wrap around the i32 before exiting, we can't
348     // transform the IV.
349     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
350       return false;
351   }
352 
353   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
354 
355   // Insert new integer induction variable.
356   PHINode *NewPHI =
357       PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
358   NewPHI->addIncoming(ConstantInt::getSigned(Int32Ty, InitValue),
359                       PN->getIncomingBlock(IncomingEdge));
360   NewPHI->setDebugLoc(PN->getDebugLoc());
361 
362   Instruction *NewAdd = BinaryOperator::CreateAdd(
363       NewPHI, ConstantInt::getSigned(Int32Ty, IncValue),
364       Incr->getName() + ".int", Incr->getIterator());
365   NewAdd->setDebugLoc(Incr->getDebugLoc());
366   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
367 
368   ICmpInst *NewCompare = new ICmpInst(
369       TheBr->getIterator(), NewPred, NewAdd,
370       ConstantInt::getSigned(Int32Ty, ExitValue), Compare->getName());
371   NewCompare->setDebugLoc(Compare->getDebugLoc());
372 
373   // In the following deletions, PN may become dead and may be deleted.
374   // Use a WeakTrackingVH to observe whether this happens.
375   WeakTrackingVH WeakPH = PN;
376 
377   // Delete the old floating point exit comparison.  The branch starts using the
378   // new comparison.
379   NewCompare->takeName(Compare);
380   Compare->replaceAllUsesWith(NewCompare);
381   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
382 
383   // Delete the old floating point increment.
384   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
385   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
386 
387   // If the FP induction variable still has uses, this is because something else
388   // in the loop uses its value.  In order to canonicalize the induction
389   // variable, we chose to eliminate the IV and rewrite it in terms of an
390   // int->fp cast.
391   //
392   // We give preference to sitofp over uitofp because it is faster on most
393   // platforms.
394   if (WeakPH) {
395     Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
396                                        PN->getParent()->getFirstInsertionPt());
397     Conv->setDebugLoc(PN->getDebugLoc());
398     PN->replaceAllUsesWith(Conv);
399     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
400   }
401   return true;
402 }
403 
404 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
405   // First step.  Check to see if there are any floating-point recurrences.
406   // If there are, change them into integer recurrences, permitting analysis by
407   // the SCEV routines.
408   BasicBlock *Header = L->getHeader();
409 
410   SmallVector<WeakTrackingVH, 8> PHIs(llvm::make_pointer_range(Header->phis()));
411 
412   bool Changed = false;
413   for (WeakTrackingVH &PHI : PHIs)
414     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
415       Changed |= handleFloatingPointIV(L, PN);
416 
417   // If the loop previously had floating-point IV, ScalarEvolution
418   // may not have been able to compute a trip count. Now that we've done some
419   // re-writing, the trip count may be computable.
420   if (Changed)
421     SE->forgetLoop(L);
422   return Changed;
423 }
424 
425 //===---------------------------------------------------------------------===//
426 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
427 // they will exit at the first iteration.
428 //===---------------------------------------------------------------------===//
429 
430 /// Check to see if this loop has loop invariant conditions which lead to loop
431 /// exits. If so, we know that if the exit path is taken, it is at the first
432 /// loop iteration. This lets us predict exit values of PHI nodes that live in
433 /// loop header.
434 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
435   // Verify the input to the pass is already in LCSSA form.
436   assert(L->isLCSSAForm(*DT));
437 
438   SmallVector<BasicBlock *, 8> ExitBlocks;
439   L->getUniqueExitBlocks(ExitBlocks);
440 
441   bool MadeAnyChanges = false;
442   for (auto *ExitBB : ExitBlocks) {
443     // If there are no more PHI nodes in this exit block, then no more
444     // values defined inside the loop are used on this path.
445     for (PHINode &PN : ExitBB->phis()) {
446       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
447            IncomingValIdx != E; ++IncomingValIdx) {
448         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
449 
450         // Can we prove that the exit must run on the first iteration if it
451         // runs at all?  (i.e. early exits are fine for our purposes, but
452         // traces which lead to this exit being taken on the 2nd iteration
453         // aren't.)  Note that this is about whether the exit branch is
454         // executed, not about whether it is taken.
455         if (!L->getLoopLatch() ||
456             !DT->dominates(IncomingBB, L->getLoopLatch()))
457           continue;
458 
459         // Get condition that leads to the exit path.
460         auto *TermInst = IncomingBB->getTerminator();
461 
462         Value *Cond = nullptr;
463         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
464           // Must be a conditional branch, otherwise the block
465           // should not be in the loop.
466           Cond = BI->getCondition();
467         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
468           Cond = SI->getCondition();
469         else
470           continue;
471 
472         if (!L->isLoopInvariant(Cond))
473           continue;
474 
475         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
476 
477         // Only deal with PHIs in the loop header.
478         if (!ExitVal || ExitVal->getParent() != L->getHeader())
479           continue;
480 
481         // If ExitVal is a PHI on the loop header, then we know its
482         // value along this exit because the exit can only be taken
483         // on the first iteration.
484         auto *LoopPreheader = L->getLoopPreheader();
485         assert(LoopPreheader && "Invalid loop");
486         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
487         if (PreheaderIdx != -1) {
488           assert(ExitVal->getParent() == L->getHeader() &&
489                  "ExitVal must be in loop header");
490           MadeAnyChanges = true;
491           PN.setIncomingValue(IncomingValIdx,
492                               ExitVal->getIncomingValue(PreheaderIdx));
493           SE->forgetValue(&PN);
494         }
495       }
496     }
497   }
498   return MadeAnyChanges;
499 }
500 
501 //===----------------------------------------------------------------------===//
502 //  IV Widening - Extend the width of an IV to cover its widest uses.
503 //===----------------------------------------------------------------------===//
504 
505 /// Update information about the induction variable that is extended by this
506 /// sign or zero extend operation. This is used to determine the final width of
507 /// the IV before actually widening it.
508 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
509                         ScalarEvolution *SE,
510                         const TargetTransformInfo *TTI) {
511   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
512   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
513     return;
514 
515   Type *Ty = Cast->getType();
516   uint64_t Width = SE->getTypeSizeInBits(Ty);
517   if (!Cast->getDataLayout().isLegalInteger(Width))
518     return;
519 
520   // Check that `Cast` actually extends the induction variable (we rely on this
521   // later).  This takes care of cases where `Cast` is extending a truncation of
522   // the narrow induction variable, and thus can end up being narrower than the
523   // "narrow" induction variable.
524   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
525   if (NarrowIVWidth >= Width)
526     return;
527 
528   // Cast is either an sext or zext up to this point.
529   // We should not widen an indvar if arithmetics on the wider indvar are more
530   // expensive than those on the narrower indvar. We check only the cost of ADD
531   // because at least an ADD is required to increment the induction variable. We
532   // could compute more comprehensively the cost of all instructions on the
533   // induction variable when necessary.
534   if (TTI &&
535       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
536           TTI->getArithmeticInstrCost(Instruction::Add,
537                                       Cast->getOperand(0)->getType())) {
538     return;
539   }
540 
541   if (!WI.WidestNativeType ||
542       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
543     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
544     WI.IsSigned = IsSigned;
545     return;
546   }
547 
548   // We extend the IV to satisfy the sign of its user(s), or 'signed'
549   // if there are multiple users with both sign- and zero extensions,
550   // in order not to introduce nondeterministic behaviour based on the
551   // unspecified order of a PHI nodes' users-iterator.
552   WI.IsSigned |= IsSigned;
553 }
554 
555 //===----------------------------------------------------------------------===//
556 //  Live IV Reduction - Minimize IVs live across the loop.
557 //===----------------------------------------------------------------------===//
558 
559 //===----------------------------------------------------------------------===//
560 //  Simplification of IV users based on SCEV evaluation.
561 //===----------------------------------------------------------------------===//
562 
563 namespace {
564 
565 class IndVarSimplifyVisitor : public IVVisitor {
566   ScalarEvolution *SE;
567   const TargetTransformInfo *TTI;
568   PHINode *IVPhi;
569 
570 public:
571   WideIVInfo WI;
572 
573   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
574                         const TargetTransformInfo *TTI,
575                         const DominatorTree *DTree)
576     : SE(SCEV), TTI(TTI), IVPhi(IV) {
577     DT = DTree;
578     WI.NarrowIV = IVPhi;
579   }
580 
581   // Implement the interface used by simplifyUsersOfIV.
582   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
583 };
584 
585 } // end anonymous namespace
586 
587 /// Iteratively perform simplification on a worklist of IV users. Each
588 /// successive simplification may push more users which may themselves be
589 /// candidates for simplification.
590 ///
591 /// Sign/Zero extend elimination is interleaved with IV simplification.
592 bool IndVarSimplify::simplifyAndExtend(Loop *L,
593                                        SCEVExpander &Rewriter,
594                                        LoopInfo *LI) {
595   SmallVector<WideIVInfo, 8> WideIVs;
596 
597   auto *GuardDecl = Intrinsic::getDeclarationIfExists(
598       L->getBlocks()[0]->getModule(), Intrinsic::experimental_guard);
599   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
600 
601   SmallVector<PHINode *, 8> LoopPhis(
602       llvm::make_pointer_range(L->getHeader()->phis()));
603 
604   // Each round of simplification iterates through the SimplifyIVUsers worklist
605   // for all current phis, then determines whether any IVs can be
606   // widened. Widening adds new phis to LoopPhis, inducing another round of
607   // simplification on the wide IVs.
608   bool Changed = false;
609   while (!LoopPhis.empty()) {
610     // Evaluate as many IV expressions as possible before widening any IVs. This
611     // forces SCEV to set no-wrap flags before evaluating sign/zero
612     // extension. The first time SCEV attempts to normalize sign/zero extension,
613     // the result becomes final. So for the most predictable results, we delay
614     // evaluation of sign/zero extend evaluation until needed, and avoid running
615     // other SCEV based analysis prior to simplifyAndExtend.
616     do {
617       PHINode *CurrIV = LoopPhis.pop_back_val();
618 
619       // Information about sign/zero extensions of CurrIV.
620       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
621 
622       const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
623                                              Rewriter, &Visitor);
624 
625       Changed |= C;
626       RunUnswitching |= U;
627       if (Visitor.WI.WidestNativeType) {
628         WideIVs.push_back(Visitor.WI);
629       }
630     } while(!LoopPhis.empty());
631 
632     // Continue if we disallowed widening.
633     if (!WidenIndVars)
634       continue;
635 
636     for (; !WideIVs.empty(); WideIVs.pop_back()) {
637       unsigned ElimExt;
638       unsigned Widened;
639       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
640                                           DT, DeadInsts, ElimExt, Widened,
641                                           HasGuards, UsePostIncrementRanges)) {
642         NumElimExt += ElimExt;
643         NumWidened += Widened;
644         Changed = true;
645         LoopPhis.push_back(WidePhi);
646       }
647     }
648   }
649   return Changed;
650 }
651 
652 //===----------------------------------------------------------------------===//
653 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
654 //===----------------------------------------------------------------------===//
655 
656 /// Given an Value which is hoped to be part of an add recurance in the given
657 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
658 /// that this is less general than SCEVs AddRec checking.
659 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
660   Instruction *IncI = dyn_cast<Instruction>(IncV);
661   if (!IncI)
662     return nullptr;
663 
664   switch (IncI->getOpcode()) {
665   case Instruction::Add:
666   case Instruction::Sub:
667     break;
668   case Instruction::GetElementPtr:
669     // An IV counter must preserve its type.
670     if (IncI->getNumOperands() == 2)
671       break;
672     [[fallthrough]];
673   default:
674     return nullptr;
675   }
676 
677   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
678   if (Phi && Phi->getParent() == L->getHeader()) {
679     if (L->isLoopInvariant(IncI->getOperand(1)))
680       return Phi;
681     return nullptr;
682   }
683   if (IncI->getOpcode() == Instruction::GetElementPtr)
684     return nullptr;
685 
686   // Allow add/sub to be commuted.
687   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
688   if (Phi && Phi->getParent() == L->getHeader()) {
689     if (L->isLoopInvariant(IncI->getOperand(0)))
690       return Phi;
691   }
692   return nullptr;
693 }
694 
695 /// Whether the current loop exit test is based on this value.  Currently this
696 /// is limited to a direct use in the loop condition.
697 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
698   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
699   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
700   // TODO: Allow non-icmp loop test.
701   if (!ICmp)
702     return false;
703 
704   // TODO: Allow indirect use.
705   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
706 }
707 
708 /// linearFunctionTestReplace policy. Return true unless we can show that the
709 /// current exit test is already sufficiently canonical.
710 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
711   assert(L->getLoopLatch() && "Must be in simplified form");
712 
713   // Avoid converting a constant or loop invariant test back to a runtime
714   // test.  This is critical for when SCEV's cached ExitCount is less precise
715   // than the current IR (such as after we've proven a particular exit is
716   // actually dead and thus the BE count never reaches our ExitCount.)
717   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
718   if (L->isLoopInvariant(BI->getCondition()))
719     return false;
720 
721   // Do LFTR to simplify the exit condition to an ICMP.
722   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
723   if (!Cond)
724     return true;
725 
726   // Do LFTR to simplify the exit ICMP to EQ/NE
727   ICmpInst::Predicate Pred = Cond->getPredicate();
728   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
729     return true;
730 
731   // Look for a loop invariant RHS
732   Value *LHS = Cond->getOperand(0);
733   Value *RHS = Cond->getOperand(1);
734   if (!L->isLoopInvariant(RHS)) {
735     if (!L->isLoopInvariant(LHS))
736       return true;
737     std::swap(LHS, RHS);
738   }
739   // Look for a simple IV counter LHS
740   PHINode *Phi = dyn_cast<PHINode>(LHS);
741   if (!Phi)
742     Phi = getLoopPhiForCounter(LHS, L);
743 
744   if (!Phi)
745     return true;
746 
747   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
748   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
749   if (Idx < 0)
750     return true;
751 
752   // Do LFTR if the exit condition's IV is *not* a simple counter.
753   Value *IncV = Phi->getIncomingValue(Idx);
754   return Phi != getLoopPhiForCounter(IncV, L);
755 }
756 
757 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
758 /// down to checking that all operands are constant and listing instructions
759 /// that may hide undef.
760 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
761                                unsigned Depth) {
762   if (isa<Constant>(V))
763     return !isa<UndefValue>(V);
764 
765   if (Depth >= 6)
766     return false;
767 
768   // Conservatively handle non-constant non-instructions. For example, Arguments
769   // may be undef.
770   Instruction *I = dyn_cast<Instruction>(V);
771   if (!I)
772     return false;
773 
774   // Load and return values may be undef.
775   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
776     return false;
777 
778   // Optimistically handle other instructions.
779   for (Value *Op : I->operands()) {
780     if (!Visited.insert(Op).second)
781       continue;
782     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
783       return false;
784   }
785   return true;
786 }
787 
788 /// Return true if the given value is concrete. We must prove that undef can
789 /// never reach it.
790 ///
791 /// TODO: If we decide that this is a good approach to checking for undef, we
792 /// may factor it into a common location.
793 static bool hasConcreteDef(Value *V) {
794   SmallPtrSet<Value*, 8> Visited;
795   Visited.insert(V);
796   return hasConcreteDefImpl(V, Visited, 0);
797 }
798 
799 /// Return true if the given phi is a "counter" in L.  A counter is an
800 /// add recurance (of integer or pointer type) with an arbitrary start, and a
801 /// step of 1.  Note that L must have exactly one latch.
802 static bool isLoopCounter(PHINode* Phi, Loop *L,
803                           ScalarEvolution *SE) {
804   assert(Phi->getParent() == L->getHeader());
805   assert(L->getLoopLatch());
806 
807   if (!SE->isSCEVable(Phi->getType()))
808     return false;
809 
810   const SCEV *S = SE->getSCEV(Phi);
811   if (!match(S, m_scev_AffineAddRec(m_SCEV(), m_scev_One(), m_SpecificLoop(L))))
812     return false;
813 
814   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
815   Value *IncV = Phi->getIncomingValue(LatchIdx);
816   return (getLoopPhiForCounter(IncV, L) == Phi &&
817           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
818 }
819 
820 /// Search the loop header for a loop counter (anadd rec w/step of one)
821 /// suitable for use by LFTR.  If multiple counters are available, select the
822 /// "best" one based profitable heuristics.
823 ///
824 /// BECount may be an i8* pointer type. The pointer difference is already
825 /// valid count without scaling the address stride, so it remains a pointer
826 /// expression as far as SCEV is concerned.
827 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
828                                 const SCEV *BECount,
829                                 ScalarEvolution *SE, DominatorTree *DT) {
830   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
831 
832   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
833 
834   // Loop over all of the PHI nodes, looking for a simple counter.
835   PHINode *BestPhi = nullptr;
836   const SCEV *BestInit = nullptr;
837   BasicBlock *LatchBlock = L->getLoopLatch();
838   assert(LatchBlock && "Must be in simplified form");
839   const DataLayout &DL = L->getHeader()->getDataLayout();
840 
841   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
842     PHINode *Phi = cast<PHINode>(I);
843     if (!isLoopCounter(Phi, L, SE))
844       continue;
845 
846     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
847 
848     // AR may be a pointer type, while BECount is an integer type.
849     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
850     // AR may not be a narrower type, or we may never exit.
851     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
852     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
853       continue;
854 
855     // Avoid reusing a potentially undef value to compute other values that may
856     // have originally had a concrete definition.
857     if (!hasConcreteDef(Phi)) {
858       // We explicitly allow unknown phis as long as they are already used by
859       // the loop exit test.  This is legal since performing LFTR could not
860       // increase the number of undef users.
861       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
862       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
863           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
864         continue;
865     }
866 
867     // Avoid introducing undefined behavior due to poison which didn't exist in
868     // the original program.  (Annoyingly, the rules for poison and undef
869     // propagation are distinct, so this does NOT cover the undef case above.)
870     // We have to ensure that we don't introduce UB by introducing a use on an
871     // iteration where said IV produces poison.  Our strategy here differs for
872     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
873     // see code in linearFunctionTestReplace.  For pointers, we restrict
874     // transforms as there is no good way to reinfer inbounds once lost.
875     if (!Phi->getType()->isIntegerTy() &&
876         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
877       continue;
878 
879     const SCEV *Init = AR->getStart();
880 
881     if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
882       // Don't force a live loop counter if another IV can be used.
883       if (isAlmostDeadIV(Phi, LatchBlock, Cond))
884         continue;
885 
886       // Prefer to count-from-zero. This is a more "canonical" counter form. It
887       // also prefers integer to pointer IVs.
888       if (BestInit->isZero() != Init->isZero()) {
889         if (BestInit->isZero())
890           continue;
891       }
892       // If two IVs both count from zero or both count from nonzero then the
893       // narrower is likely a dead phi that has been widened. Use the wider phi
894       // to allow the other to be eliminated.
895       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
896         continue;
897     }
898     BestPhi = Phi;
899     BestInit = Init;
900   }
901   return BestPhi;
902 }
903 
904 /// Insert an IR expression which computes the value held by the IV IndVar
905 /// (which must be an loop counter w/unit stride) after the backedge of loop L
906 /// is taken ExitCount times.
907 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
908                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
909                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
910   assert(isLoopCounter(IndVar, L, SE));
911   assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
912   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
913   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
914 
915   // For integer IVs, truncate the IV before computing the limit unless we
916   // know apriori that the limit must be a constant when evaluated in the
917   // bitwidth of the IV.  We prefer (potentially) keeping a truncate of the
918   // IV in the loop over a (potentially) expensive expansion of the widened
919   // exit count add(zext(add)) expression.
920   if (IndVar->getType()->isIntegerTy() &&
921       SE->getTypeSizeInBits(AR->getType()) >
922       SE->getTypeSizeInBits(ExitCount->getType())) {
923     const SCEV *IVInit = AR->getStart();
924     if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
925       AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
926   }
927 
928   const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
929   const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
930   assert(SE->isLoopInvariant(IVLimit, L) &&
931          "Computed iteration count is not loop invariant!");
932   return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
933                                 ExitingBB->getTerminator());
934 }
935 
936 /// This method rewrites the exit condition of the loop to be a canonical !=
937 /// comparison against the incremented loop induction variable.  This pass is
938 /// able to rewrite the exit tests of any loop where the SCEV analysis can
939 /// determine a loop-invariant trip count of the loop, which is actually a much
940 /// broader range than just linear tests.
941 bool IndVarSimplify::
942 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
943                           const SCEV *ExitCount,
944                           PHINode *IndVar, SCEVExpander &Rewriter) {
945   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
946   assert(isLoopCounter(IndVar, L, SE));
947   Instruction * const IncVar =
948     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
949 
950   // Initialize CmpIndVar to the preincremented IV.
951   Value *CmpIndVar = IndVar;
952   bool UsePostInc = false;
953 
954   // If the exiting block is the same as the backedge block, we prefer to
955   // compare against the post-incremented value, otherwise we must compare
956   // against the preincremented value.
957   if (ExitingBB == L->getLoopLatch()) {
958     // For pointer IVs, we chose to not strip inbounds which requires us not
959     // to add a potentially UB introducing use.  We need to either a) show
960     // the loop test we're modifying is already in post-inc form, or b) show
961     // that adding a use must not introduce UB.
962     bool SafeToPostInc =
963         IndVar->getType()->isIntegerTy() ||
964         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
965         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
966     if (SafeToPostInc) {
967       UsePostInc = true;
968       CmpIndVar = IncVar;
969     }
970   }
971 
972   // It may be necessary to drop nowrap flags on the incrementing instruction
973   // if either LFTR moves from a pre-inc check to a post-inc check (in which
974   // case the increment might have previously been poison on the last iteration
975   // only) or if LFTR switches to a different IV that was previously dynamically
976   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
977   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
978   // check), because the pre-inc addrec flags may be adopted from the original
979   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
980   // TODO: This handling is inaccurate for one case: If we switch to a
981   // dynamically dead IV that wraps on the first loop iteration only, which is
982   // not covered by the post-inc addrec. (If the new IV was not dynamically
983   // dead, it could not be poison on the first iteration in the first place.)
984   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
985     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
986     if (BO->hasNoUnsignedWrap())
987       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
988     if (BO->hasNoSignedWrap())
989       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
990   }
991 
992   Value *ExitCnt = genLoopLimit(
993       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
994   assert(ExitCnt->getType()->isPointerTy() ==
995              IndVar->getType()->isPointerTy() &&
996          "genLoopLimit missed a cast");
997 
998   // Insert a new icmp_ne or icmp_eq instruction before the branch.
999   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1000   ICmpInst::Predicate P;
1001   if (L->contains(BI->getSuccessor(0)))
1002     P = ICmpInst::ICMP_NE;
1003   else
1004     P = ICmpInst::ICMP_EQ;
1005 
1006   IRBuilder<> Builder(BI);
1007 
1008   // The new loop exit condition should reuse the debug location of the
1009   // original loop exit condition.
1010   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1011     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1012 
1013   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1014   // avoid the expensive expansion of the limit expression in the wider type,
1015   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1016   // since we know (from the exit count bitwidth), that we can't self-wrap in
1017   // the narrower type.
1018   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1019   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1020   if (CmpIndVarSize > ExitCntSize) {
1021     assert(!CmpIndVar->getType()->isPointerTy() &&
1022            !ExitCnt->getType()->isPointerTy());
1023 
1024     // Before resorting to actually inserting the truncate, use the same
1025     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1026     // the other side of the comparison instead.  We still evaluate the limit
1027     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1028     // a truncate within in.
1029     bool Extended = false;
1030     const SCEV *IV = SE->getSCEV(CmpIndVar);
1031     const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1032     const SCEV *ZExtTrunc =
1033       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1034 
1035     if (ZExtTrunc == IV) {
1036       Extended = true;
1037       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1038                                    "wide.trip.count");
1039     } else {
1040       const SCEV *SExtTrunc =
1041         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1042       if (SExtTrunc == IV) {
1043         Extended = true;
1044         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1045                                      "wide.trip.count");
1046       }
1047     }
1048 
1049     if (Extended) {
1050       bool Discard;
1051       L->makeLoopInvariant(ExitCnt, Discard);
1052     } else
1053       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1054                                       "lftr.wideiv");
1055   }
1056   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1057                     << "      LHS:" << *CmpIndVar << '\n'
1058                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1059                     << "\n"
1060                     << "      RHS:\t" << *ExitCnt << "\n"
1061                     << "ExitCount:\t" << *ExitCount << "\n"
1062                     << "  was: " << *BI->getCondition() << "\n");
1063 
1064   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1065   Value *OrigCond = BI->getCondition();
1066   // It's tempting to use replaceAllUsesWith here to fully replace the old
1067   // comparison, but that's not immediately safe, since users of the old
1068   // comparison may not be dominated by the new comparison. Instead, just
1069   // update the branch to use the new comparison; in the common case this
1070   // will make old comparison dead.
1071   BI->setCondition(Cond);
1072   DeadInsts.emplace_back(OrigCond);
1073 
1074   ++NumLFTR;
1075   return true;
1076 }
1077 
1078 //===----------------------------------------------------------------------===//
1079 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1080 //===----------------------------------------------------------------------===//
1081 
1082 /// If there's a single exit block, sink any loop-invariant values that
1083 /// were defined in the preheader but not used inside the loop into the
1084 /// exit block to reduce register pressure in the loop.
1085 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1086   BasicBlock *ExitBlock = L->getExitBlock();
1087   if (!ExitBlock) return false;
1088 
1089   BasicBlock *Preheader = L->getLoopPreheader();
1090   if (!Preheader) return false;
1091 
1092   bool MadeAnyChanges = false;
1093   for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
1094 
1095     // Skip BB Terminator.
1096     if (Preheader->getTerminator() == &I)
1097       continue;
1098 
1099     // New instructions were inserted at the end of the preheader.
1100     if (isa<PHINode>(I))
1101       break;
1102 
1103     // Don't move instructions which might have side effects, since the side
1104     // effects need to complete before instructions inside the loop.  Also don't
1105     // move instructions which might read memory, since the loop may modify
1106     // memory. Note that it's okay if the instruction might have undefined
1107     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1108     // block.
1109     if (I.mayHaveSideEffects() || I.mayReadFromMemory())
1110       continue;
1111 
1112     // Skip debug or pseudo instructions.
1113     if (I.isDebugOrPseudoInst())
1114       continue;
1115 
1116     // Skip eh pad instructions.
1117     if (I.isEHPad())
1118       continue;
1119 
1120     // Don't sink alloca: we never want to sink static alloca's out of the
1121     // entry block, and correctly sinking dynamic alloca's requires
1122     // checks for stacksave/stackrestore intrinsics.
1123     // FIXME: Refactor this check somehow?
1124     if (isa<AllocaInst>(&I))
1125       continue;
1126 
1127     // Determine if there is a use in or before the loop (direct or
1128     // otherwise).
1129     bool UsedInLoop = false;
1130     for (Use &U : I.uses()) {
1131       Instruction *User = cast<Instruction>(U.getUser());
1132       BasicBlock *UseBB = User->getParent();
1133       if (PHINode *P = dyn_cast<PHINode>(User)) {
1134         unsigned i =
1135           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1136         UseBB = P->getIncomingBlock(i);
1137       }
1138       if (UseBB == Preheader || L->contains(UseBB)) {
1139         UsedInLoop = true;
1140         break;
1141       }
1142     }
1143 
1144     // If there is, the def must remain in the preheader.
1145     if (UsedInLoop)
1146       continue;
1147 
1148     // Otherwise, sink it to the exit block.
1149     I.moveBefore(ExitBlock->getFirstInsertionPt());
1150     SE->forgetValue(&I);
1151     MadeAnyChanges = true;
1152   }
1153 
1154   return MadeAnyChanges;
1155 }
1156 
1157 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1158                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1159   auto *OldCond = BI->getCondition();
1160   LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1161                     << " with " << *NewCond << "\n");
1162   BI->setCondition(NewCond);
1163   if (OldCond->use_empty())
1164     DeadInsts.emplace_back(OldCond);
1165 }
1166 
1167 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1168                                       bool IsTaken) {
1169   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1170   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1171   auto *OldCond = BI->getCondition();
1172   return ConstantInt::get(OldCond->getType(),
1173                           IsTaken ? ExitIfTrue : !ExitIfTrue);
1174 }
1175 
1176 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1177                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1178   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1179   auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1180   replaceExitCond(BI, NewCond, DeadInsts);
1181 }
1182 
1183 static void replaceLoopPHINodesWithPreheaderValues(
1184     LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1185     ScalarEvolution &SE) {
1186   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1187   auto *LoopPreheader = L->getLoopPreheader();
1188   auto *LoopHeader = L->getHeader();
1189   SmallVector<Instruction *> Worklist;
1190   for (auto &PN : LoopHeader->phis()) {
1191     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1192     for (User *U : PN.users())
1193       Worklist.push_back(cast<Instruction>(U));
1194     SE.forgetValue(&PN);
1195     PN.replaceAllUsesWith(PreheaderIncoming);
1196     DeadInsts.emplace_back(&PN);
1197   }
1198 
1199   // Replacing with the preheader value will often allow IV users to simplify
1200   // (especially if the preheader value is a constant).
1201   SmallPtrSet<Instruction *, 16> Visited;
1202   while (!Worklist.empty()) {
1203     auto *I = cast<Instruction>(Worklist.pop_back_val());
1204     if (!Visited.insert(I).second)
1205       continue;
1206 
1207     // Don't simplify instructions outside the loop.
1208     if (!L->contains(I))
1209       continue;
1210 
1211     Value *Res = simplifyInstruction(I, I->getDataLayout());
1212     if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1213       for (User *U : I->users())
1214         Worklist.push_back(cast<Instruction>(U));
1215       I->replaceAllUsesWith(Res);
1216       DeadInsts.emplace_back(I);
1217     }
1218   }
1219 }
1220 
1221 static Value *
1222 createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1223                     const ScalarEvolution::LoopInvariantPredicate &LIP,
1224                     SCEVExpander &Rewriter) {
1225   ICmpInst::Predicate InvariantPred = LIP.Pred;
1226   BasicBlock *Preheader = L->getLoopPreheader();
1227   assert(Preheader && "Preheader doesn't exist");
1228   Rewriter.setInsertPoint(Preheader->getTerminator());
1229   auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1230   auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1231   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1232   if (ExitIfTrue)
1233     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1234   IRBuilder<> Builder(Preheader->getTerminator());
1235   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1236   return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1237                             BI->getCondition()->getName());
1238 }
1239 
1240 static std::optional<Value *>
1241 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1242                   const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1243                   ScalarEvolution *SE, SCEVExpander &Rewriter) {
1244   CmpPredicate Pred = ICmp->getCmpPredicate();
1245   Value *LHS = ICmp->getOperand(0);
1246   Value *RHS = ICmp->getOperand(1);
1247 
1248   // 'LHS pred RHS' should now mean that we stay in loop.
1249   auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1250   if (Inverted)
1251     Pred = ICmpInst::getInverseCmpPredicate(Pred);
1252 
1253   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1254   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1255   // Can we prove it to be trivially true or false?
1256   if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1257     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1258 
1259   auto *ARTy = LHSS->getType();
1260   auto *MaxIterTy = MaxIter->getType();
1261   // If possible, adjust types.
1262   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1263     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1264   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1265     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1266     const SCEV *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1267     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1268       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1269   }
1270 
1271   if (SkipLastIter) {
1272     // Semantically skip last iter is "subtract 1, do not bother about unsigned
1273     // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1274     // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1275     // So we manually construct umin(a - 1, b - 1).
1276     SmallVector<const SCEV *, 4> Elements;
1277     if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1278       for (const SCEV *Op : UMin->operands())
1279         Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1280       MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1281     } else
1282       MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1283   }
1284 
1285   // Check if there is a loop-invariant predicate equivalent to our check.
1286   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1287                                                                L, BI, MaxIter);
1288   if (!LIP)
1289     return std::nullopt;
1290 
1291   // Can we prove it to be trivially true?
1292   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1293     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1294   else
1295     return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1296 }
1297 
1298 static bool optimizeLoopExitWithUnknownExitCount(
1299     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1300     bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1301     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1302   assert(
1303       (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1304       "Not a loop exit!");
1305 
1306   // For branch that stays in loop by TRUE condition, go through AND. For branch
1307   // that stays in loop by FALSE condition, go through OR. Both gives the
1308   // similar logic: "stay in loop iff all conditions are true(false)".
1309   bool Inverted = L->contains(BI->getSuccessor(1));
1310   SmallVector<ICmpInst *, 4> LeafConditions;
1311   SmallVector<Value *, 4> Worklist;
1312   SmallPtrSet<Value *, 4> Visited;
1313   Value *OldCond = BI->getCondition();
1314   Visited.insert(OldCond);
1315   Worklist.push_back(OldCond);
1316 
1317   auto GoThrough = [&](Value *V) {
1318     Value *LHS = nullptr, *RHS = nullptr;
1319     if (Inverted) {
1320       if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1321         return false;
1322     } else {
1323       if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1324         return false;
1325     }
1326     if (Visited.insert(LHS).second)
1327       Worklist.push_back(LHS);
1328     if (Visited.insert(RHS).second)
1329       Worklist.push_back(RHS);
1330     return true;
1331   };
1332 
1333   do {
1334     Value *Curr = Worklist.pop_back_val();
1335     // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1336     // those with one use, to avoid instruction duplication.
1337     if (Curr->hasOneUse())
1338       if (!GoThrough(Curr))
1339         if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1340           LeafConditions.push_back(ICmp);
1341   } while (!Worklist.empty());
1342 
1343   // If the current basic block has the same exit count as the whole loop, and
1344   // it consists of multiple icmp's, try to collect all icmp's that give exact
1345   // same exit count. For all other icmp's, we could use one less iteration,
1346   // because their value on the last iteration doesn't really matter.
1347   SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1348   if (!SkipLastIter && LeafConditions.size() > 1 &&
1349       SE->getExitCount(L, ExitingBB,
1350                        ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1351           MaxIter)
1352     for (auto *ICmp : LeafConditions) {
1353       auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1354                                              /*ControlsExit*/ false);
1355       const SCEV *ExitMax = EL.SymbolicMaxNotTaken;
1356       if (isa<SCEVCouldNotCompute>(ExitMax))
1357         continue;
1358       // They could be of different types (specifically this happens after
1359       // IV widening).
1360       auto *WiderType =
1361           SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1362       const SCEV *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1363       const SCEV *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1364       if (WideExitMax == WideMaxIter)
1365         ICmpsFailingOnLastIter.insert(ICmp);
1366     }
1367 
1368   bool Changed = false;
1369   for (auto *OldCond : LeafConditions) {
1370     // Skip last iteration for this icmp under one of two conditions:
1371     // - We do it for all conditions;
1372     // - There is another ICmp that would fail on last iter, so this one doesn't
1373     // really matter.
1374     bool OptimisticSkipLastIter = SkipLastIter;
1375     if (!OptimisticSkipLastIter) {
1376       if (ICmpsFailingOnLastIter.size() > 1)
1377         OptimisticSkipLastIter = true;
1378       else if (ICmpsFailingOnLastIter.size() == 1)
1379         OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1380     }
1381     if (auto Replaced =
1382             createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1383                               OptimisticSkipLastIter, SE, Rewriter)) {
1384       Changed = true;
1385       auto *NewCond = *Replaced;
1386       if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1387         NCI->setName(OldCond->getName() + ".first_iter");
1388       }
1389       LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1390                         << " with " << *NewCond << "\n");
1391       assert(OldCond->hasOneUse() && "Must be!");
1392       OldCond->replaceAllUsesWith(NewCond);
1393       DeadInsts.push_back(OldCond);
1394       // Make sure we no longer consider this condition as failing on last
1395       // iteration.
1396       ICmpsFailingOnLastIter.erase(OldCond);
1397     }
1398   }
1399   return Changed;
1400 }
1401 
1402 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1403   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1404   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1405   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1406   // it already has flags.  The alternative to this would be to extending the
1407   // set of "interesting" IV users to include the icmp, but doing that
1408   // regresses results in practice by querying SCEVs before trip counts which
1409   // rely on them which results in SCEV caching sub-optimal answers.  The
1410   // concern about caching sub-optimal results is why we only query SCEVs of
1411   // the loop invariant RHS here.
1412   SmallVector<BasicBlock*, 16> ExitingBlocks;
1413   L->getExitingBlocks(ExitingBlocks);
1414   bool Changed = false;
1415   for (auto *ExitingBB : ExitingBlocks) {
1416     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1417     if (!BI)
1418       continue;
1419     assert(BI->isConditional() && "exit branch must be conditional");
1420 
1421     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1422     if (!ICmp || !ICmp->hasOneUse())
1423       continue;
1424 
1425     auto *LHS = ICmp->getOperand(0);
1426     auto *RHS = ICmp->getOperand(1);
1427     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1428     // poisoning cache with sub-optimal results.  For the must-execute case,
1429     // this is a neccessary precondition for correctness.
1430     if (!L->isLoopInvariant(RHS)) {
1431       if (!L->isLoopInvariant(LHS))
1432         continue;
1433       // Same logic applies for the inverse case
1434       std::swap(LHS, RHS);
1435     }
1436 
1437     // Match (icmp signed-cond zext, RHS)
1438     Value *LHSOp = nullptr;
1439     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1440       continue;
1441 
1442     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1443     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1444     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1445     FullCR = FullCR.zeroExtend(OuterBitWidth);
1446     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1447     if (FullCR.contains(RHSCR)) {
1448       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1449       // replace the signed condition with the unsigned version.
1450       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1451       Changed = true;
1452       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1453       // have not changed exit counts, or the values produced by the compare.
1454       continue;
1455     }
1456   }
1457 
1458   // Now that we've canonicalized the condition to match the extend,
1459   // see if we can rotate the extend out of the loop.
1460   for (auto *ExitingBB : ExitingBlocks) {
1461     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1462     if (!BI)
1463       continue;
1464     assert(BI->isConditional() && "exit branch must be conditional");
1465 
1466     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1467     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1468       continue;
1469 
1470     bool Swapped = false;
1471     auto *LHS = ICmp->getOperand(0);
1472     auto *RHS = ICmp->getOperand(1);
1473     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1474       // Nothing to rotate
1475       continue;
1476     if (L->isLoopInvariant(LHS)) {
1477       // Same logic applies for the inverse case until we actually pick
1478       // which operand of the compare to update.
1479       Swapped = true;
1480       std::swap(LHS, RHS);
1481     }
1482     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1483 
1484     // Match (icmp unsigned-cond zext, RHS)
1485     // TODO: Extend to handle corresponding sext/signed-cmp case
1486     // TODO: Extend to other invertible functions
1487     Value *LHSOp = nullptr;
1488     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1489       continue;
1490 
1491     // In general, we only rotate if we can do so without increasing the number
1492     // of instructions.  The exception is when we have an zext(add-rec).  The
1493     // reason for allowing this exception is that we know we need to get rid
1494     // of the zext for SCEV to be able to compute a trip count for said loops;
1495     // we consider the new trip count valuable enough to increase instruction
1496     // count by one.
1497     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1498       continue;
1499 
1500     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1501     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1502     // when zext is loop varying and RHS is loop invariant.  This converts
1503     // loop varying work to loop-invariant work.
1504     auto doRotateTransform = [&]() {
1505       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1506       auto *NewRHS = CastInst::Create(
1507           Instruction::Trunc, RHS, LHSOp->getType(), "",
1508           L->getLoopPreheader()->getTerminator()->getIterator());
1509       // NewRHS is an operation that has been hoisted out of the loop, and
1510       // therefore should have a dropped location.
1511       NewRHS->setDebugLoc(DebugLoc::getDropped());
1512       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1513       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1514       // Samesign flag cannot be preserved after narrowing the compare.
1515       ICmp->setSameSign(false);
1516       if (LHS->use_empty())
1517         DeadInsts.push_back(LHS);
1518     };
1519 
1520     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1521     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1522     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1523     FullCR = FullCR.zeroExtend(OuterBitWidth);
1524     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1525     if (FullCR.contains(RHSCR)) {
1526       doRotateTransform();
1527       Changed = true;
1528       // Note, we are leaving SCEV in an unfortunately imprecise case here
1529       // as rotation tends to reveal information about trip counts not
1530       // previously visible.
1531       continue;
1532     }
1533   }
1534 
1535   return Changed;
1536 }
1537 
1538 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1539   SmallVector<BasicBlock*, 16> ExitingBlocks;
1540   L->getExitingBlocks(ExitingBlocks);
1541 
1542   // Remove all exits which aren't both rewriteable and execute on every
1543   // iteration.
1544   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1545     // If our exitting block exits multiple loops, we can only rewrite the
1546     // innermost one.  Otherwise, we're changing how many times the innermost
1547     // loop runs before it exits.
1548     if (LI->getLoopFor(ExitingBB) != L)
1549       return true;
1550 
1551     // Can't rewrite non-branch yet.
1552     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1553     if (!BI)
1554       return true;
1555 
1556     // Likewise, the loop latch must be dominated by the exiting BB.
1557     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1558       return true;
1559 
1560     if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1561       // If already constant, nothing to do. However, if this is an
1562       // unconditional exit, we can still replace header phis with their
1563       // preheader value.
1564       if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1565         replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1566       return true;
1567     }
1568 
1569     return false;
1570   });
1571 
1572   if (ExitingBlocks.empty())
1573     return false;
1574 
1575   // Get a symbolic upper bound on the loop backedge taken count.
1576   const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1577   if (isa<SCEVCouldNotCompute>(MaxBECount))
1578     return false;
1579 
1580   // Visit our exit blocks in order of dominance. We know from the fact that
1581   // all exits must dominate the latch, so there is a total dominance order
1582   // between them.
1583   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1584                // std::sort sorts in ascending order, so we want the inverse of
1585                // the normal dominance relation.
1586                if (A == B) return false;
1587                if (DT->properlyDominates(A, B))
1588                  return true;
1589                else {
1590                  assert(DT->properlyDominates(B, A) &&
1591                         "expected total dominance order!");
1592                  return false;
1593                }
1594   });
1595 #ifdef ASSERT
1596   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1597     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1598   }
1599 #endif
1600 
1601   bool Changed = false;
1602   bool SkipLastIter = false;
1603   const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1604   auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1605     if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1606       return;
1607     if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1608       CurrMaxExit = MaxExitCount;
1609     else
1610       CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1611     // If the loop has more than 1 iteration, all further checks will be
1612     // executed 1 iteration less.
1613     if (CurrMaxExit == MaxBECount)
1614       SkipLastIter = true;
1615   };
1616   SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1617   for (BasicBlock *ExitingBB : ExitingBlocks) {
1618     const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1619     const SCEV *MaxExitCount = SE->getExitCount(
1620         L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1621     if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1622       // Okay, we do not know the exit count here. Can we at least prove that it
1623       // will remain the same within iteration space?
1624       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1625       auto OptimizeCond = [&](bool SkipLastIter) {
1626         return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1627                                                     MaxBECount, SkipLastIter,
1628                                                     SE, Rewriter, DeadInsts);
1629       };
1630 
1631       // TODO: We might have proved that we can skip the last iteration for
1632       // this check. In this case, we only want to check the condition on the
1633       // pre-last iteration (MaxBECount - 1). However, there is a nasty
1634       // corner case:
1635       //
1636       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1637       //
1638       // If we could not prove that len != 0, then we also could not prove that
1639       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1640       // OptimizeCond will likely not prove anything for it, even if it could
1641       // prove the same fact for len.
1642       //
1643       // As a temporary solution, we query both last and pre-last iterations in
1644       // hope that we will be able to prove triviality for at least one of
1645       // them. We can stop querying MaxBECount for this case once SCEV
1646       // understands that (MaxBECount - 1) will not overflow here.
1647       if (OptimizeCond(false))
1648         Changed = true;
1649       else if (SkipLastIter && OptimizeCond(true))
1650         Changed = true;
1651       UpdateSkipLastIter(MaxExitCount);
1652       continue;
1653     }
1654 
1655     UpdateSkipLastIter(ExactExitCount);
1656 
1657     // If we know we'd exit on the first iteration, rewrite the exit to
1658     // reflect this.  This does not imply the loop must exit through this
1659     // exit; there may be an earlier one taken on the first iteration.
1660     // We know that the backedge can't be taken, so we replace all
1661     // the header PHIs with values coming from the preheader.
1662     if (ExactExitCount->isZero()) {
1663       foldExit(L, ExitingBB, true, DeadInsts);
1664       replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1665       Changed = true;
1666       continue;
1667     }
1668 
1669     assert(ExactExitCount->getType()->isIntegerTy() &&
1670            MaxBECount->getType()->isIntegerTy() &&
1671            "Exit counts must be integers");
1672 
1673     Type *WiderType =
1674         SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1675     ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1676     MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1677     assert(MaxBECount->getType() == ExactExitCount->getType());
1678 
1679     // Can we prove that some other exit must be taken strictly before this
1680     // one?
1681     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1682                                      ExactExitCount)) {
1683       foldExit(L, ExitingBB, false, DeadInsts);
1684       Changed = true;
1685       continue;
1686     }
1687 
1688     // As we run, keep track of which exit counts we've encountered.  If we
1689     // find a duplicate, we've found an exit which would have exited on the
1690     // exiting iteration, but (from the visit order) strictly follows another
1691     // which does the same and is thus dead.
1692     if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1693       foldExit(L, ExitingBB, false, DeadInsts);
1694       Changed = true;
1695       continue;
1696     }
1697 
1698     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1699     // here.  If we kept track of the min of dominanting exits so far, we could
1700     // discharge exits with EC >= MDEC. This is less powerful than the existing
1701     // transform (since later exits aren't considered), but potentially more
1702     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1703     // or a >u b.  Such a case is not currently known.
1704   }
1705   return Changed;
1706 }
1707 
1708 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1709   SmallVector<BasicBlock*, 16> ExitingBlocks;
1710   L->getExitingBlocks(ExitingBlocks);
1711 
1712   // Finally, see if we can rewrite our exit conditions into a loop invariant
1713   // form. If we have a read-only loop, and we can tell that we must exit down
1714   // a path which does not need any of the values computed within the loop, we
1715   // can rewrite the loop to exit on the first iteration.  Note that this
1716   // doesn't either a) tell us the loop exits on the first iteration (unless
1717   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1718   // This transformation looks a lot like a restricted form of dead loop
1719   // elimination, but restricted to read-only loops and without neccesssarily
1720   // needing to kill the loop entirely.
1721   if (!LoopPredication)
1722     return false;
1723 
1724   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1725   // through *explicit* control flow.  We have to eliminate the possibility of
1726   // implicit exits (see below) before we know it's truly exact.
1727   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1728   if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1729     return false;
1730 
1731   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1732   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1733 
1734   auto BadExit = [&](BasicBlock *ExitingBB) {
1735     // If our exiting block exits multiple loops, we can only rewrite the
1736     // innermost one.  Otherwise, we're changing how many times the innermost
1737     // loop runs before it exits.
1738     if (LI->getLoopFor(ExitingBB) != L)
1739       return true;
1740 
1741     // Can't rewrite non-branch yet.
1742     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1743     if (!BI)
1744       return true;
1745 
1746     // If already constant, nothing to do.
1747     if (isa<Constant>(BI->getCondition()))
1748       return true;
1749 
1750     // If the exit block has phis, we need to be able to compute the values
1751     // within the loop which contains them.  This assumes trivially lcssa phis
1752     // have already been removed; TODO: generalize
1753     BasicBlock *ExitBlock =
1754     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1755     if (!ExitBlock->phis().empty())
1756       return true;
1757 
1758     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1759     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1760         !Rewriter.isSafeToExpand(ExitCount))
1761       return true;
1762 
1763     assert(SE->isLoopInvariant(ExitCount, L) &&
1764            "Exit count must be loop invariant");
1765     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1766     return false;
1767   };
1768 
1769   // Make sure all exits dominate the latch. This means there is a linear chain
1770   // of exits. We check this before sorting so we have a total order.
1771   BasicBlock *Latch = L->getLoopLatch();
1772   for (BasicBlock *ExitingBB : ExitingBlocks)
1773     if (!DT->dominates(ExitingBB, Latch))
1774       return false;
1775 
1776   // If we have any exits which can't be predicated themselves, than we can't
1777   // predicate any exit which isn't guaranteed to execute before it.  Consider
1778   // two exits (a) and (b) which would both exit on the same iteration.  If we
1779   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1780   // we could convert a loop from exiting through (a) to one exiting through
1781   // (b).  Note that this problem exists only for exits with the same exit
1782   // count, and we could be more aggressive when exit counts are known inequal.
1783   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1784     // llvm::sort sorts in ascending order, so we want the inverse of
1785     // the normal dominance relation.
1786     if (A == B)
1787       return false;
1788     if (DT->properlyDominates(A, B))
1789       return true;
1790     if (DT->properlyDominates(B, A))
1791       return false;
1792     llvm_unreachable("Should have total dominance order");
1793   });
1794 
1795   // Make sure our exit blocks are really a total order (i.e. a linear chain of
1796   // exits before the backedge).
1797   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1798     assert(DT->dominates(ExitingBlocks[i - 1], ExitingBlocks[i]) &&
1799            "Not sorted by dominance");
1800 
1801   // Given our sorted total order, we know that exit[j] must be evaluated
1802   // after all exit[i] such j > i.
1803   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1804     if (BadExit(ExitingBlocks[i])) {
1805       ExitingBlocks.resize(i);
1806       break;
1807     }
1808 
1809   if (ExitingBlocks.empty())
1810     return false;
1811 
1812   // At this point, ExitingBlocks consists of only those blocks which are
1813   // predicatable.  Given that, we know we have at least one exit we can
1814   // predicate if the loop is doesn't have side effects and doesn't have any
1815   // implicit exits (because then our exact BTC isn't actually exact).
1816   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1817   // suggestions on how to improve this?  I can obviously bail out for outer
1818   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1819   // is that enough for *all* side effects?
1820   for (BasicBlock *BB : L->blocks())
1821     for (auto &I : *BB)
1822       // TODO:isGuaranteedToTransfer
1823       if (I.mayHaveSideEffects())
1824         return false;
1825 
1826   bool Changed = false;
1827   // Finally, do the actual predication for all predicatable blocks.  A couple
1828   // of notes here:
1829   // 1) We don't bother to constant fold dominated exits with identical exit
1830   //    counts; that's simply a form of CSE/equality propagation and we leave
1831   //    it for dedicated passes.
1832   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1833   //    legality constraints and we leave that to dedicated logic.  We want to
1834   //    predicate even if we can't insert a loop invariant expression as
1835   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1836   //    varying check.
1837   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1838   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1839   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1840   for (BasicBlock *ExitingBB : ExitingBlocks) {
1841     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1842 
1843     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1844     Value *NewCond;
1845     if (ExitCount == ExactBTC) {
1846       NewCond = L->contains(BI->getSuccessor(0)) ?
1847         B.getFalse() : B.getTrue();
1848     } else {
1849       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1850       if (!ExactBTCV)
1851         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1852       Value *RHS = ExactBTCV;
1853       if (ECV->getType() != RHS->getType()) {
1854         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1855         ECV = B.CreateZExt(ECV, WiderTy);
1856         RHS = B.CreateZExt(RHS, WiderTy);
1857       }
1858       auto Pred = L->contains(BI->getSuccessor(0)) ?
1859         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1860       NewCond = B.CreateICmp(Pred, ECV, RHS);
1861     }
1862     Value *OldCond = BI->getCondition();
1863     BI->setCondition(NewCond);
1864     if (OldCond->use_empty())
1865       DeadInsts.emplace_back(OldCond);
1866     Changed = true;
1867     RunUnswitching = true;
1868   }
1869 
1870   return Changed;
1871 }
1872 
1873 //===----------------------------------------------------------------------===//
1874 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1875 //===----------------------------------------------------------------------===//
1876 
1877 bool IndVarSimplify::run(Loop *L) {
1878   // We need (and expect!) the incoming loop to be in LCSSA.
1879   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1880          "LCSSA required to run indvars!");
1881 
1882   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1883   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1884   //    canonicalization can be a pessimization without LSR to "clean up"
1885   //    afterwards.
1886   //  - We depend on having a preheader; in particular,
1887   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1888   //    and we're in trouble if we can't find the induction variable even when
1889   //    we've manually inserted one.
1890   //  - LFTR relies on having a single backedge.
1891   if (!L->isLoopSimplifyForm())
1892     return false;
1893 
1894   bool Changed = false;
1895   // If there are any floating-point recurrences, attempt to
1896   // transform them to use integer recurrences.
1897   Changed |= rewriteNonIntegerIVs(L);
1898 
1899   // Create a rewriter object which we'll use to transform the code with.
1900   SCEVExpander Rewriter(*SE, DL, "indvars");
1901 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
1902   Rewriter.setDebugType(DEBUG_TYPE);
1903 #endif
1904 
1905   // Eliminate redundant IV users.
1906   //
1907   // Simplification works best when run before other consumers of SCEV. We
1908   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1909   // other expressions involving loop IVs have been evaluated. This helps SCEV
1910   // set no-wrap flags before normalizing sign/zero extension.
1911   Rewriter.disableCanonicalMode();
1912   Changed |= simplifyAndExtend(L, Rewriter, LI);
1913 
1914   // Check to see if we can compute the final value of any expressions
1915   // that are recurrent in the loop, and substitute the exit values from the
1916   // loop into any instructions outside of the loop that use the final values
1917   // of the current expressions.
1918   if (ReplaceExitValue != NeverRepl) {
1919     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1920                                              ReplaceExitValue, DeadInsts)) {
1921       NumReplaced += Rewrites;
1922       Changed = true;
1923     }
1924   }
1925 
1926   // Eliminate redundant IV cycles.
1927   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1928 
1929   // Try to convert exit conditions to unsigned and rotate computation
1930   // out of the loop.  Note: Handles invalidation internally if needed.
1931   Changed |= canonicalizeExitCondition(L);
1932 
1933   // Try to eliminate loop exits based on analyzeable exit counts
1934   if (optimizeLoopExits(L, Rewriter))  {
1935     Changed = true;
1936     // Given we've changed exit counts, notify SCEV
1937     // Some nested loops may share same folded exit basic block,
1938     // thus we need to notify top most loop.
1939     SE->forgetTopmostLoop(L);
1940   }
1941 
1942   // Try to form loop invariant tests for loop exits by changing how many
1943   // iterations of the loop run when that is unobservable.
1944   if (predicateLoopExits(L, Rewriter)) {
1945     Changed = true;
1946     // Given we've changed exit counts, notify SCEV
1947     SE->forgetLoop(L);
1948   }
1949 
1950   // If we have a trip count expression, rewrite the loop's exit condition
1951   // using it.
1952   if (!DisableLFTR) {
1953     BasicBlock *PreHeader = L->getLoopPreheader();
1954 
1955     SmallVector<BasicBlock*, 16> ExitingBlocks;
1956     L->getExitingBlocks(ExitingBlocks);
1957     for (BasicBlock *ExitingBB : ExitingBlocks) {
1958       // Can't rewrite non-branch yet.
1959       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1960         continue;
1961 
1962       // If our exitting block exits multiple loops, we can only rewrite the
1963       // innermost one.  Otherwise, we're changing how many times the innermost
1964       // loop runs before it exits.
1965       if (LI->getLoopFor(ExitingBB) != L)
1966         continue;
1967 
1968       if (!needsLFTR(L, ExitingBB))
1969         continue;
1970 
1971       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1972       if (isa<SCEVCouldNotCompute>(ExitCount))
1973         continue;
1974 
1975       // This was handled above, but as we form SCEVs, we can sometimes refine
1976       // existing ones; this allows exit counts to be folded to zero which
1977       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1978       // until stable to handle cases like this better.
1979       if (ExitCount->isZero())
1980         continue;
1981 
1982       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
1983       if (!IndVar)
1984         continue;
1985 
1986       // Avoid high cost expansions.  Note: This heuristic is questionable in
1987       // that our definition of "high cost" is not exactly principled.
1988       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
1989                                        TTI, PreHeader->getTerminator()))
1990         continue;
1991 
1992       if (!Rewriter.isSafeToExpand(ExitCount))
1993         continue;
1994 
1995       Changed |= linearFunctionTestReplace(L, ExitingBB,
1996                                            ExitCount, IndVar,
1997                                            Rewriter);
1998     }
1999   }
2000   // Clear the rewriter cache, because values that are in the rewriter's cache
2001   // can be deleted in the loop below, causing the AssertingVH in the cache to
2002   // trigger.
2003   Rewriter.clear();
2004 
2005   // Now that we're done iterating through lists, clean up any instructions
2006   // which are now dead.
2007   while (!DeadInsts.empty()) {
2008     Value *V = DeadInsts.pop_back_val();
2009 
2010     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2011       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2012     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2013       Changed |=
2014           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2015   }
2016 
2017   // The Rewriter may not be used from this point on.
2018 
2019   // Loop-invariant instructions in the preheader that aren't used in the
2020   // loop may be sunk below the loop to reduce register pressure.
2021   Changed |= sinkUnusedInvariants(L);
2022 
2023   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2024   // trip count and therefore can further simplify exit values in addition to
2025   // rewriteLoopExitValues.
2026   Changed |= rewriteFirstIterationLoopExitValues(L);
2027 
2028   // Clean up dead instructions.
2029   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2030 
2031   // Check a post-condition.
2032   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2033          "Indvars did not preserve LCSSA!");
2034   if (VerifyMemorySSA && MSSAU)
2035     MSSAU->getMemorySSA()->verifyMemorySSA();
2036 
2037   return Changed;
2038 }
2039 
2040 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2041                                           LoopStandardAnalysisResults &AR,
2042                                           LPMUpdater &) {
2043   Function *F = L.getHeader()->getParent();
2044   const DataLayout &DL = F->getDataLayout();
2045 
2046   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2047                      WidenIndVars && AllowIVWidening);
2048   if (!IVS.run(&L))
2049     return PreservedAnalyses::all();
2050 
2051   auto PA = getLoopPassPreservedAnalyses();
2052   PA.preserveSet<CFGAnalyses>();
2053   if (IVS.runUnswitching()) {
2054     AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR);
2055     PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2056   }
2057 
2058   if (AR.MSSA)
2059     PA.preserve<MemorySSAAnalysis>();
2060   return PA;
2061 }
2062