xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision f5b7695d2d5abd735064870ad43f4b9c723940c1)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallPtrSet.h"
35 #include "llvm/ADT/SmallSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/IR/BasicBlock.h"
48 #include "llvm/IR/Constant.h"
49 #include "llvm/IR/ConstantRange.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Dominators.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/InstrTypes.h"
57 #include "llvm/IR/Instruction.h"
58 #include "llvm/IR/Instructions.h"
59 #include "llvm/IR/IntrinsicInst.h"
60 #include "llvm/IR/Intrinsics.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PassManager.h"
64 #include "llvm/IR/PatternMatch.h"
65 #include "llvm/IR/Type.h"
66 #include "llvm/IR/Use.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/IR/Value.h"
69 #include "llvm/IR/ValueHandle.h"
70 #include "llvm/InitializePasses.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/Local.h"
83 #include "llvm/Transforms/Utils/LoopUtils.h"
84 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
85 #include <cassert>
86 #include <cstdint>
87 #include <utility>
88 
89 using namespace llvm;
90 
91 #define DEBUG_TYPE "indvars"
92 
93 STATISTIC(NumWidened     , "Number of indvars widened");
94 STATISTIC(NumReplaced    , "Number of exit values replaced");
95 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
96 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
97 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
98 
99 // Trip count verification can be enabled by default under NDEBUG if we
100 // implement a strong expression equivalence checker in SCEV. Until then, we
101 // use the verify-indvars flag, which may assert in some cases.
102 static cl::opt<bool> VerifyIndvars(
103   "verify-indvars", cl::Hidden,
104   cl::desc("Verify the ScalarEvolution result after running indvars"));
105 
106 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
107 
108 static cl::opt<ReplaceExitVal> ReplaceExitValue(
109     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
110     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
111     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
112                clEnumValN(OnlyCheapRepl, "cheap",
113                           "only replace exit value when the cost is cheap"),
114                clEnumValN(NoHardUse, "noharduse",
115                           "only replace exit values when loop def likely dead"),
116                clEnumValN(AlwaysRepl, "always",
117                           "always replace exit value whenever possible")));
118 
119 static cl::opt<bool> UsePostIncrementRanges(
120   "indvars-post-increment-ranges", cl::Hidden,
121   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
122   cl::init(true));
123 
124 static cl::opt<bool>
125 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
126             cl::desc("Disable Linear Function Test Replace optimization"));
127 
128 static cl::opt<bool>
129 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
130                 cl::desc("Predicate conditions in read only loops"));
131 
132 namespace {
133 
134 struct RewritePhi;
135 
136 class IndVarSimplify {
137   LoopInfo *LI;
138   ScalarEvolution *SE;
139   DominatorTree *DT;
140   const DataLayout &DL;
141   TargetLibraryInfo *TLI;
142   const TargetTransformInfo *TTI;
143 
144   SmallVector<WeakTrackingVH, 16> DeadInsts;
145 
146   bool isValidRewrite(Value *FromVal, Value *ToVal);
147 
148   bool handleFloatingPointIV(Loop *L, PHINode *PH);
149   bool rewriteNonIntegerIVs(Loop *L);
150 
151   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
152   /// Try to eliminate loop exits based on analyzeable exit counts
153   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
154   /// Try to form loop invariant tests for loop exits by changing how many
155   /// iterations of the loop run when that is unobservable.
156   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
157 
158   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
159   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
160   bool rewriteFirstIterationLoopExitValues(Loop *L);
161   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
162 
163   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
164                                  const SCEV *ExitCount,
165                                  PHINode *IndVar, SCEVExpander &Rewriter);
166 
167   bool sinkUnusedInvariants(Loop *L);
168 
169 public:
170   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
171                  const DataLayout &DL, TargetLibraryInfo *TLI,
172                  TargetTransformInfo *TTI)
173       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
174 
175   bool run(Loop *L);
176 };
177 
178 } // end anonymous namespace
179 
180 /// Return true if the SCEV expansion generated by the rewriter can replace the
181 /// original value. SCEV guarantees that it produces the same value, but the way
182 /// it is produced may be illegal IR.  Ideally, this function will only be
183 /// called for verification.
184 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
185   // If an SCEV expression subsumed multiple pointers, its expansion could
186   // reassociate the GEP changing the base pointer. This is illegal because the
187   // final address produced by a GEP chain must be inbounds relative to its
188   // underlying object. Otherwise basic alias analysis, among other things,
189   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
190   // producing an expression involving multiple pointers. Until then, we must
191   // bail out here.
192   //
193   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
194   // because it understands lcssa phis while SCEV does not.
195   Value *FromPtr = FromVal;
196   Value *ToPtr = ToVal;
197   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
198     FromPtr = GEP->getPointerOperand();
199   }
200   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
201     ToPtr = GEP->getPointerOperand();
202   }
203   if (FromPtr != FromVal || ToPtr != ToVal) {
204     // Quickly check the common case
205     if (FromPtr == ToPtr)
206       return true;
207 
208     // SCEV may have rewritten an expression that produces the GEP's pointer
209     // operand. That's ok as long as the pointer operand has the same base
210     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
211     // base of a recurrence. This handles the case in which SCEV expansion
212     // converts a pointer type recurrence into a nonrecurrent pointer base
213     // indexed by an integer recurrence.
214 
215     // If the GEP base pointer is a vector of pointers, abort.
216     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
217       return false;
218 
219     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
220     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
221     if (FromBase == ToBase)
222       return true;
223 
224     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
225                       << " != " << *ToBase << "\n");
226 
227     return false;
228   }
229   return true;
230 }
231 
232 /// Determine the insertion point for this user. By default, insert immediately
233 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
234 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
235 /// common dominator for the incoming blocks. A nullptr can be returned if no
236 /// viable location is found: it may happen if User is a PHI and Def only comes
237 /// to this PHI from unreachable blocks.
238 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
239                                           DominatorTree *DT, LoopInfo *LI) {
240   PHINode *PHI = dyn_cast<PHINode>(User);
241   if (!PHI)
242     return User;
243 
244   Instruction *InsertPt = nullptr;
245   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
246     if (PHI->getIncomingValue(i) != Def)
247       continue;
248 
249     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
250 
251     if (!DT->isReachableFromEntry(InsertBB))
252       continue;
253 
254     if (!InsertPt) {
255       InsertPt = InsertBB->getTerminator();
256       continue;
257     }
258     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
259     InsertPt = InsertBB->getTerminator();
260   }
261 
262   // If we have skipped all inputs, it means that Def only comes to Phi from
263   // unreachable blocks.
264   if (!InsertPt)
265     return nullptr;
266 
267   auto *DefI = dyn_cast<Instruction>(Def);
268   if (!DefI)
269     return InsertPt;
270 
271   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
272 
273   auto *L = LI->getLoopFor(DefI->getParent());
274   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
275 
276   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
277     if (LI->getLoopFor(DTN->getBlock()) == L)
278       return DTN->getBlock()->getTerminator();
279 
280   llvm_unreachable("DefI dominates InsertPt!");
281 }
282 
283 //===----------------------------------------------------------------------===//
284 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
285 //===----------------------------------------------------------------------===//
286 
287 /// Convert APF to an integer, if possible.
288 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
289   bool isExact = false;
290   // See if we can convert this to an int64_t
291   uint64_t UIntVal;
292   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
293                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
294       !isExact)
295     return false;
296   IntVal = UIntVal;
297   return true;
298 }
299 
300 /// If the loop has floating induction variable then insert corresponding
301 /// integer induction variable if possible.
302 /// For example,
303 /// for(double i = 0; i < 10000; ++i)
304 ///   bar(i)
305 /// is converted into
306 /// for(int i = 0; i < 10000; ++i)
307 ///   bar((double)i);
308 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
309   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
310   unsigned BackEdge     = IncomingEdge^1;
311 
312   // Check incoming value.
313   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
314 
315   int64_t InitValue;
316   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
317     return false;
318 
319   // Check IV increment. Reject this PN if increment operation is not
320   // an add or increment value can not be represented by an integer.
321   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
322   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
323 
324   // If this is not an add of the PHI with a constantfp, or if the constant fp
325   // is not an integer, bail out.
326   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
327   int64_t IncValue;
328   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
329       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
330     return false;
331 
332   // Check Incr uses. One user is PN and the other user is an exit condition
333   // used by the conditional terminator.
334   Value::user_iterator IncrUse = Incr->user_begin();
335   Instruction *U1 = cast<Instruction>(*IncrUse++);
336   if (IncrUse == Incr->user_end()) return false;
337   Instruction *U2 = cast<Instruction>(*IncrUse++);
338   if (IncrUse != Incr->user_end()) return false;
339 
340   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
341   // only used by a branch, we can't transform it.
342   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
343   if (!Compare)
344     Compare = dyn_cast<FCmpInst>(U2);
345   if (!Compare || !Compare->hasOneUse() ||
346       !isa<BranchInst>(Compare->user_back()))
347     return false;
348 
349   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
350 
351   // We need to verify that the branch actually controls the iteration count
352   // of the loop.  If not, the new IV can overflow and no one will notice.
353   // The branch block must be in the loop and one of the successors must be out
354   // of the loop.
355   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
356   if (!L->contains(TheBr->getParent()) ||
357       (L->contains(TheBr->getSuccessor(0)) &&
358        L->contains(TheBr->getSuccessor(1))))
359     return false;
360 
361   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
362   // transform it.
363   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
364   int64_t ExitValue;
365   if (ExitValueVal == nullptr ||
366       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
367     return false;
368 
369   // Find new predicate for integer comparison.
370   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
371   switch (Compare->getPredicate()) {
372   default: return false;  // Unknown comparison.
373   case CmpInst::FCMP_OEQ:
374   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
375   case CmpInst::FCMP_ONE:
376   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
377   case CmpInst::FCMP_OGT:
378   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
379   case CmpInst::FCMP_OGE:
380   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
381   case CmpInst::FCMP_OLT:
382   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
383   case CmpInst::FCMP_OLE:
384   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
385   }
386 
387   // We convert the floating point induction variable to a signed i32 value if
388   // we can.  This is only safe if the comparison will not overflow in a way
389   // that won't be trapped by the integer equivalent operations.  Check for this
390   // now.
391   // TODO: We could use i64 if it is native and the range requires it.
392 
393   // The start/stride/exit values must all fit in signed i32.
394   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
395     return false;
396 
397   // If not actually striding (add x, 0.0), avoid touching the code.
398   if (IncValue == 0)
399     return false;
400 
401   // Positive and negative strides have different safety conditions.
402   if (IncValue > 0) {
403     // If we have a positive stride, we require the init to be less than the
404     // exit value.
405     if (InitValue >= ExitValue)
406       return false;
407 
408     uint32_t Range = uint32_t(ExitValue-InitValue);
409     // Check for infinite loop, either:
410     // while (i <= Exit) or until (i > Exit)
411     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
412       if (++Range == 0) return false;  // Range overflows.
413     }
414 
415     unsigned Leftover = Range % uint32_t(IncValue);
416 
417     // If this is an equality comparison, we require that the strided value
418     // exactly land on the exit value, otherwise the IV condition will wrap
419     // around and do things the fp IV wouldn't.
420     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
421         Leftover != 0)
422       return false;
423 
424     // If the stride would wrap around the i32 before exiting, we can't
425     // transform the IV.
426     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
427       return false;
428   } else {
429     // If we have a negative stride, we require the init to be greater than the
430     // exit value.
431     if (InitValue <= ExitValue)
432       return false;
433 
434     uint32_t Range = uint32_t(InitValue-ExitValue);
435     // Check for infinite loop, either:
436     // while (i >= Exit) or until (i < Exit)
437     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
438       if (++Range == 0) return false;  // Range overflows.
439     }
440 
441     unsigned Leftover = Range % uint32_t(-IncValue);
442 
443     // If this is an equality comparison, we require that the strided value
444     // exactly land on the exit value, otherwise the IV condition will wrap
445     // around and do things the fp IV wouldn't.
446     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
447         Leftover != 0)
448       return false;
449 
450     // If the stride would wrap around the i32 before exiting, we can't
451     // transform the IV.
452     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
453       return false;
454   }
455 
456   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
457 
458   // Insert new integer induction variable.
459   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
460   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
461                       PN->getIncomingBlock(IncomingEdge));
462 
463   Value *NewAdd =
464     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
465                               Incr->getName()+".int", Incr);
466   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
467 
468   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
469                                       ConstantInt::get(Int32Ty, ExitValue),
470                                       Compare->getName());
471 
472   // In the following deletions, PN may become dead and may be deleted.
473   // Use a WeakTrackingVH to observe whether this happens.
474   WeakTrackingVH WeakPH = PN;
475 
476   // Delete the old floating point exit comparison.  The branch starts using the
477   // new comparison.
478   NewCompare->takeName(Compare);
479   Compare->replaceAllUsesWith(NewCompare);
480   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
481 
482   // Delete the old floating point increment.
483   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
484   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
485 
486   // If the FP induction variable still has uses, this is because something else
487   // in the loop uses its value.  In order to canonicalize the induction
488   // variable, we chose to eliminate the IV and rewrite it in terms of an
489   // int->fp cast.
490   //
491   // We give preference to sitofp over uitofp because it is faster on most
492   // platforms.
493   if (WeakPH) {
494     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
495                                  &*PN->getParent()->getFirstInsertionPt());
496     PN->replaceAllUsesWith(Conv);
497     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
498   }
499   return true;
500 }
501 
502 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
503   // First step.  Check to see if there are any floating-point recurrences.
504   // If there are, change them into integer recurrences, permitting analysis by
505   // the SCEV routines.
506   BasicBlock *Header = L->getHeader();
507 
508   SmallVector<WeakTrackingVH, 8> PHIs;
509   for (PHINode &PN : Header->phis())
510     PHIs.push_back(&PN);
511 
512   bool Changed = false;
513   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
514     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
515       Changed |= handleFloatingPointIV(L, PN);
516 
517   // If the loop previously had floating-point IV, ScalarEvolution
518   // may not have been able to compute a trip count. Now that we've done some
519   // re-writing, the trip count may be computable.
520   if (Changed)
521     SE->forgetLoop(L);
522   return Changed;
523 }
524 
525 namespace {
526 
527 // Collect information about PHI nodes which can be transformed in
528 // rewriteLoopExitValues.
529 struct RewritePhi {
530   PHINode *PN;
531 
532   // Ith incoming value.
533   unsigned Ith;
534 
535   // Exit value after expansion.
536   Value *Val;
537 
538   // High Cost when expansion.
539   bool HighCost;
540 
541   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
542       : PN(P), Ith(I), Val(V), HighCost(H) {}
543 };
544 
545 } // end anonymous namespace
546 
547 //===----------------------------------------------------------------------===//
548 // rewriteLoopExitValues - Optimize IV users outside the loop.
549 // As a side effect, reduces the amount of IV processing within the loop.
550 //===----------------------------------------------------------------------===//
551 
552 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
553   SmallPtrSet<const Instruction *, 8> Visited;
554   SmallVector<const Instruction *, 8> WorkList;
555   Visited.insert(I);
556   WorkList.push_back(I);
557   while (!WorkList.empty()) {
558     const Instruction *Curr = WorkList.pop_back_val();
559     // This use is outside the loop, nothing to do.
560     if (!L->contains(Curr))
561       continue;
562     // Do we assume it is a "hard" use which will not be eliminated easily?
563     if (Curr->mayHaveSideEffects())
564       return true;
565     // Otherwise, add all its users to worklist.
566     for (auto U : Curr->users()) {
567       auto *UI = cast<Instruction>(U);
568       if (Visited.insert(UI).second)
569         WorkList.push_back(UI);
570     }
571   }
572   return false;
573 }
574 
575 /// Check to see if this loop has a computable loop-invariant execution count.
576 /// If so, this means that we can compute the final value of any expressions
577 /// that are recurrent in the loop, and substitute the exit values from the loop
578 /// into any instructions outside of the loop that use the final values of the
579 /// current expressions.
580 ///
581 /// This is mostly redundant with the regular IndVarSimplify activities that
582 /// happen later, except that it's more powerful in some cases, because it's
583 /// able to brute-force evaluate arbitrary instructions as long as they have
584 /// constant operands at the beginning of the loop.
585 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
586   // Check a pre-condition.
587   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
588          "Indvars did not preserve LCSSA!");
589 
590   SmallVector<BasicBlock*, 8> ExitBlocks;
591   L->getUniqueExitBlocks(ExitBlocks);
592 
593   SmallVector<RewritePhi, 8> RewritePhiSet;
594   // Find all values that are computed inside the loop, but used outside of it.
595   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
596   // the exit blocks of the loop to find them.
597   for (BasicBlock *ExitBB : ExitBlocks) {
598     // If there are no PHI nodes in this exit block, then no values defined
599     // inside the loop are used on this path, skip it.
600     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
601     if (!PN) continue;
602 
603     unsigned NumPreds = PN->getNumIncomingValues();
604 
605     // Iterate over all of the PHI nodes.
606     BasicBlock::iterator BBI = ExitBB->begin();
607     while ((PN = dyn_cast<PHINode>(BBI++))) {
608       if (PN->use_empty())
609         continue; // dead use, don't replace it
610 
611       if (!SE->isSCEVable(PN->getType()))
612         continue;
613 
614       // It's necessary to tell ScalarEvolution about this explicitly so that
615       // it can walk the def-use list and forget all SCEVs, as it may not be
616       // watching the PHI itself. Once the new exit value is in place, there
617       // may not be a def-use connection between the loop and every instruction
618       // which got a SCEVAddRecExpr for that loop.
619       SE->forgetValue(PN);
620 
621       // Iterate over all of the values in all the PHI nodes.
622       for (unsigned i = 0; i != NumPreds; ++i) {
623         // If the value being merged in is not integer or is not defined
624         // in the loop, skip it.
625         Value *InVal = PN->getIncomingValue(i);
626         if (!isa<Instruction>(InVal))
627           continue;
628 
629         // If this pred is for a subloop, not L itself, skip it.
630         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
631           continue; // The Block is in a subloop, skip it.
632 
633         // Check that InVal is defined in the loop.
634         Instruction *Inst = cast<Instruction>(InVal);
635         if (!L->contains(Inst))
636           continue;
637 
638         // Okay, this instruction has a user outside of the current loop
639         // and varies predictably *inside* the loop.  Evaluate the value it
640         // contains when the loop exits, if possible.  We prefer to start with
641         // expressions which are true for all exits (so as to maximize
642         // expression reuse by the SCEVExpander), but resort to per-exit
643         // evaluation if that fails.
644         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
645         if (isa<SCEVCouldNotCompute>(ExitValue) ||
646             !SE->isLoopInvariant(ExitValue, L) ||
647             !isSafeToExpand(ExitValue, *SE)) {
648           // TODO: This should probably be sunk into SCEV in some way; maybe a
649           // getSCEVForExit(SCEV*, L, ExitingBB)?  It can be generalized for
650           // most SCEV expressions and other recurrence types (e.g. shift
651           // recurrences).  Is there existing code we can reuse?
652           const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
653           if (isa<SCEVCouldNotCompute>(ExitCount))
654             continue;
655           if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
656             if (AddRec->getLoop() == L)
657               ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
658           if (isa<SCEVCouldNotCompute>(ExitValue) ||
659               !SE->isLoopInvariant(ExitValue, L) ||
660               !isSafeToExpand(ExitValue, *SE))
661             continue;
662         }
663 
664         // Computing the value outside of the loop brings no benefit if it is
665         // definitely used inside the loop in a way which can not be optimized
666         // away.  Avoid doing so unless we know we have a value which computes
667         // the ExitValue already.  TODO: This should be merged into SCEV
668         // expander to leverage its knowledge of existing expressions.
669         if (ReplaceExitValue != AlwaysRepl &&
670             !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
671             hasHardUserWithinLoop(L, Inst))
672           continue;
673 
674         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
675         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
676 
677         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
678                           << '\n'
679                           << "  LoopVal = " << *Inst << "\n");
680 
681         if (!isValidRewrite(Inst, ExitVal)) {
682           DeadInsts.push_back(ExitVal);
683           continue;
684         }
685 
686 #ifndef NDEBUG
687         // If we reuse an instruction from a loop which is neither L nor one of
688         // its containing loops, we end up breaking LCSSA form for this loop by
689         // creating a new use of its instruction.
690         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
691           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
692             if (EVL != L)
693               assert(EVL->contains(L) && "LCSSA breach detected!");
694 #endif
695 
696         // Collect all the candidate PHINodes to be rewritten.
697         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
698       }
699     }
700   }
701 
702   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
703 
704   bool Changed = false;
705   // Transformation.
706   for (const RewritePhi &Phi : RewritePhiSet) {
707     PHINode *PN = Phi.PN;
708     Value *ExitVal = Phi.Val;
709 
710     // Only do the rewrite when the ExitValue can be expanded cheaply.
711     // If LoopCanBeDel is true, rewrite exit value aggressively.
712     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
713       DeadInsts.push_back(ExitVal);
714       continue;
715     }
716 
717     Changed = true;
718     ++NumReplaced;
719     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
720     PN->setIncomingValue(Phi.Ith, ExitVal);
721 
722     // If this instruction is dead now, delete it. Don't do it now to avoid
723     // invalidating iterators.
724     if (isInstructionTriviallyDead(Inst, TLI))
725       DeadInsts.push_back(Inst);
726 
727     // Replace PN with ExitVal if that is legal and does not break LCSSA.
728     if (PN->getNumIncomingValues() == 1 &&
729         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
730       PN->replaceAllUsesWith(ExitVal);
731       PN->eraseFromParent();
732     }
733   }
734 
735   // The insertion point instruction may have been deleted; clear it out
736   // so that the rewriter doesn't trip over it later.
737   Rewriter.clearInsertPoint();
738   return Changed;
739 }
740 
741 //===---------------------------------------------------------------------===//
742 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
743 // they will exit at the first iteration.
744 //===---------------------------------------------------------------------===//
745 
746 /// Check to see if this loop has loop invariant conditions which lead to loop
747 /// exits. If so, we know that if the exit path is taken, it is at the first
748 /// loop iteration. This lets us predict exit values of PHI nodes that live in
749 /// loop header.
750 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
751   // Verify the input to the pass is already in LCSSA form.
752   assert(L->isLCSSAForm(*DT));
753 
754   SmallVector<BasicBlock *, 8> ExitBlocks;
755   L->getUniqueExitBlocks(ExitBlocks);
756 
757   bool MadeAnyChanges = false;
758   for (auto *ExitBB : ExitBlocks) {
759     // If there are no more PHI nodes in this exit block, then no more
760     // values defined inside the loop are used on this path.
761     for (PHINode &PN : ExitBB->phis()) {
762       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
763            IncomingValIdx != E; ++IncomingValIdx) {
764         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
765 
766         // Can we prove that the exit must run on the first iteration if it
767         // runs at all?  (i.e. early exits are fine for our purposes, but
768         // traces which lead to this exit being taken on the 2nd iteration
769         // aren't.)  Note that this is about whether the exit branch is
770         // executed, not about whether it is taken.
771         if (!L->getLoopLatch() ||
772             !DT->dominates(IncomingBB, L->getLoopLatch()))
773           continue;
774 
775         // Get condition that leads to the exit path.
776         auto *TermInst = IncomingBB->getTerminator();
777 
778         Value *Cond = nullptr;
779         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
780           // Must be a conditional branch, otherwise the block
781           // should not be in the loop.
782           Cond = BI->getCondition();
783         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
784           Cond = SI->getCondition();
785         else
786           continue;
787 
788         if (!L->isLoopInvariant(Cond))
789           continue;
790 
791         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
792 
793         // Only deal with PHIs in the loop header.
794         if (!ExitVal || ExitVal->getParent() != L->getHeader())
795           continue;
796 
797         // If ExitVal is a PHI on the loop header, then we know its
798         // value along this exit because the exit can only be taken
799         // on the first iteration.
800         auto *LoopPreheader = L->getLoopPreheader();
801         assert(LoopPreheader && "Invalid loop");
802         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
803         if (PreheaderIdx != -1) {
804           assert(ExitVal->getParent() == L->getHeader() &&
805                  "ExitVal must be in loop header");
806           MadeAnyChanges = true;
807           PN.setIncomingValue(IncomingValIdx,
808                               ExitVal->getIncomingValue(PreheaderIdx));
809         }
810       }
811     }
812   }
813   return MadeAnyChanges;
814 }
815 
816 /// Check whether it is possible to delete the loop after rewriting exit
817 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
818 /// aggressively.
819 bool IndVarSimplify::canLoopBeDeleted(
820     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
821   BasicBlock *Preheader = L->getLoopPreheader();
822   // If there is no preheader, the loop will not be deleted.
823   if (!Preheader)
824     return false;
825 
826   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
827   // We obviate multiple ExitingBlocks case for simplicity.
828   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
829   // after exit value rewriting, we can enhance the logic here.
830   SmallVector<BasicBlock *, 4> ExitingBlocks;
831   L->getExitingBlocks(ExitingBlocks);
832   SmallVector<BasicBlock *, 8> ExitBlocks;
833   L->getUniqueExitBlocks(ExitBlocks);
834   if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
835     return false;
836 
837   BasicBlock *ExitBlock = ExitBlocks[0];
838   BasicBlock::iterator BI = ExitBlock->begin();
839   while (PHINode *P = dyn_cast<PHINode>(BI)) {
840     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
841 
842     // If the Incoming value of P is found in RewritePhiSet, we know it
843     // could be rewritten to use a loop invariant value in transformation
844     // phase later. Skip it in the loop invariant check below.
845     bool found = false;
846     for (const RewritePhi &Phi : RewritePhiSet) {
847       unsigned i = Phi.Ith;
848       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
849         found = true;
850         break;
851       }
852     }
853 
854     Instruction *I;
855     if (!found && (I = dyn_cast<Instruction>(Incoming)))
856       if (!L->hasLoopInvariantOperands(I))
857         return false;
858 
859     ++BI;
860   }
861 
862   for (auto *BB : L->blocks())
863     if (llvm::any_of(*BB, [](Instruction &I) {
864           return I.mayHaveSideEffects();
865         }))
866       return false;
867 
868   return true;
869 }
870 
871 //===----------------------------------------------------------------------===//
872 //  IV Widening - Extend the width of an IV to cover its widest uses.
873 //===----------------------------------------------------------------------===//
874 
875 namespace {
876 
877 // Collect information about induction variables that are used by sign/zero
878 // extend operations. This information is recorded by CollectExtend and provides
879 // the input to WidenIV.
880 struct WideIVInfo {
881   PHINode *NarrowIV = nullptr;
882 
883   // Widest integer type created [sz]ext
884   Type *WidestNativeType = nullptr;
885 
886   // Was a sext user seen before a zext?
887   bool IsSigned = false;
888 };
889 
890 } // end anonymous namespace
891 
892 /// Update information about the induction variable that is extended by this
893 /// sign or zero extend operation. This is used to determine the final width of
894 /// the IV before actually widening it.
895 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
896                         const TargetTransformInfo *TTI) {
897   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
898   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
899     return;
900 
901   Type *Ty = Cast->getType();
902   uint64_t Width = SE->getTypeSizeInBits(Ty);
903   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
904     return;
905 
906   // Check that `Cast` actually extends the induction variable (we rely on this
907   // later).  This takes care of cases where `Cast` is extending a truncation of
908   // the narrow induction variable, and thus can end up being narrower than the
909   // "narrow" induction variable.
910   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
911   if (NarrowIVWidth >= Width)
912     return;
913 
914   // Cast is either an sext or zext up to this point.
915   // We should not widen an indvar if arithmetics on the wider indvar are more
916   // expensive than those on the narrower indvar. We check only the cost of ADD
917   // because at least an ADD is required to increment the induction variable. We
918   // could compute more comprehensively the cost of all instructions on the
919   // induction variable when necessary.
920   if (TTI &&
921       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
922           TTI->getArithmeticInstrCost(Instruction::Add,
923                                       Cast->getOperand(0)->getType())) {
924     return;
925   }
926 
927   if (!WI.WidestNativeType) {
928     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
929     WI.IsSigned = IsSigned;
930     return;
931   }
932 
933   // We extend the IV to satisfy the sign of its first user, arbitrarily.
934   if (WI.IsSigned != IsSigned)
935     return;
936 
937   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
938     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
939 }
940 
941 namespace {
942 
943 /// Record a link in the Narrow IV def-use chain along with the WideIV that
944 /// computes the same value as the Narrow IV def.  This avoids caching Use*
945 /// pointers.
946 struct NarrowIVDefUse {
947   Instruction *NarrowDef = nullptr;
948   Instruction *NarrowUse = nullptr;
949   Instruction *WideDef = nullptr;
950 
951   // True if the narrow def is never negative.  Tracking this information lets
952   // us use a sign extension instead of a zero extension or vice versa, when
953   // profitable and legal.
954   bool NeverNegative = false;
955 
956   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
957                  bool NeverNegative)
958       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
959         NeverNegative(NeverNegative) {}
960 };
961 
962 /// The goal of this transform is to remove sign and zero extends without
963 /// creating any new induction variables. To do this, it creates a new phi of
964 /// the wider type and redirects all users, either removing extends or inserting
965 /// truncs whenever we stop propagating the type.
966 class WidenIV {
967   // Parameters
968   PHINode *OrigPhi;
969   Type *WideType;
970 
971   // Context
972   LoopInfo        *LI;
973   Loop            *L;
974   ScalarEvolution *SE;
975   DominatorTree   *DT;
976 
977   // Does the module have any calls to the llvm.experimental.guard intrinsic
978   // at all? If not we can avoid scanning instructions looking for guards.
979   bool HasGuards;
980 
981   // Result
982   PHINode *WidePhi = nullptr;
983   Instruction *WideInc = nullptr;
984   const SCEV *WideIncExpr = nullptr;
985   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
986 
987   SmallPtrSet<Instruction *,16> Widened;
988   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
989 
990   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
991 
992   // A map tracking the kind of extension used to widen each narrow IV
993   // and narrow IV user.
994   // Key: pointer to a narrow IV or IV user.
995   // Value: the kind of extension used to widen this Instruction.
996   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
997 
998   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
999 
1000   // A map with control-dependent ranges for post increment IV uses. The key is
1001   // a pair of IV def and a use of this def denoting the context. The value is
1002   // a ConstantRange representing possible values of the def at the given
1003   // context.
1004   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1005 
1006   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1007                                               Instruction *UseI) {
1008     DefUserPair Key(Def, UseI);
1009     auto It = PostIncRangeInfos.find(Key);
1010     return It == PostIncRangeInfos.end()
1011                ? Optional<ConstantRange>(None)
1012                : Optional<ConstantRange>(It->second);
1013   }
1014 
1015   void calculatePostIncRanges(PHINode *OrigPhi);
1016   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1017 
1018   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1019     DefUserPair Key(Def, UseI);
1020     auto It = PostIncRangeInfos.find(Key);
1021     if (It == PostIncRangeInfos.end())
1022       PostIncRangeInfos.insert({Key, R});
1023     else
1024       It->second = R.intersectWith(It->second);
1025   }
1026 
1027 public:
1028   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1029           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1030           bool HasGuards)
1031       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1032         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1033         HasGuards(HasGuards), DeadInsts(DI) {
1034     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1035     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1036   }
1037 
1038   PHINode *createWideIV(SCEVExpander &Rewriter);
1039 
1040 protected:
1041   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1042                           Instruction *Use);
1043 
1044   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1045   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1046                                      const SCEVAddRecExpr *WideAR);
1047   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1048 
1049   ExtendKind getExtendKind(Instruction *I);
1050 
1051   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1052 
1053   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1054 
1055   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1056 
1057   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1058                               unsigned OpCode) const;
1059 
1060   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1061 
1062   bool widenLoopCompare(NarrowIVDefUse DU);
1063   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1064   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1065 
1066   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1067 };
1068 
1069 } // end anonymous namespace
1070 
1071 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1072                                  bool IsSigned, Instruction *Use) {
1073   // Set the debug location and conservative insertion point.
1074   IRBuilder<> Builder(Use);
1075   // Hoist the insertion point into loop preheaders as far as possible.
1076   for (const Loop *L = LI->getLoopFor(Use->getParent());
1077        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1078        L = L->getParentLoop())
1079     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1080 
1081   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1082                     Builder.CreateZExt(NarrowOper, WideType);
1083 }
1084 
1085 /// Instantiate a wide operation to replace a narrow operation. This only needs
1086 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1087 /// 0 for any operation we decide not to clone.
1088 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1089                                   const SCEVAddRecExpr *WideAR) {
1090   unsigned Opcode = DU.NarrowUse->getOpcode();
1091   switch (Opcode) {
1092   default:
1093     return nullptr;
1094   case Instruction::Add:
1095   case Instruction::Mul:
1096   case Instruction::UDiv:
1097   case Instruction::Sub:
1098     return cloneArithmeticIVUser(DU, WideAR);
1099 
1100   case Instruction::And:
1101   case Instruction::Or:
1102   case Instruction::Xor:
1103   case Instruction::Shl:
1104   case Instruction::LShr:
1105   case Instruction::AShr:
1106     return cloneBitwiseIVUser(DU);
1107   }
1108 }
1109 
1110 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1111   Instruction *NarrowUse = DU.NarrowUse;
1112   Instruction *NarrowDef = DU.NarrowDef;
1113   Instruction *WideDef = DU.WideDef;
1114 
1115   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1116 
1117   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1118   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1119   // invariant and will be folded or hoisted. If it actually comes from a
1120   // widened IV, it should be removed during a future call to widenIVUse.
1121   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1122   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1123                    ? WideDef
1124                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1125                                       IsSigned, NarrowUse);
1126   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1127                    ? WideDef
1128                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1129                                       IsSigned, NarrowUse);
1130 
1131   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1132   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1133                                         NarrowBO->getName());
1134   IRBuilder<> Builder(NarrowUse);
1135   Builder.Insert(WideBO);
1136   WideBO->copyIRFlags(NarrowBO);
1137   return WideBO;
1138 }
1139 
1140 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1141                                             const SCEVAddRecExpr *WideAR) {
1142   Instruction *NarrowUse = DU.NarrowUse;
1143   Instruction *NarrowDef = DU.NarrowDef;
1144   Instruction *WideDef = DU.WideDef;
1145 
1146   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1147 
1148   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1149 
1150   // We're trying to find X such that
1151   //
1152   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1153   //
1154   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1155   // and check using SCEV if any of them are correct.
1156 
1157   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1158   // correct solution to X.
1159   auto GuessNonIVOperand = [&](bool SignExt) {
1160     const SCEV *WideLHS;
1161     const SCEV *WideRHS;
1162 
1163     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1164       if (SignExt)
1165         return SE->getSignExtendExpr(S, Ty);
1166       return SE->getZeroExtendExpr(S, Ty);
1167     };
1168 
1169     if (IVOpIdx == 0) {
1170       WideLHS = SE->getSCEV(WideDef);
1171       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1172       WideRHS = GetExtend(NarrowRHS, WideType);
1173     } else {
1174       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1175       WideLHS = GetExtend(NarrowLHS, WideType);
1176       WideRHS = SE->getSCEV(WideDef);
1177     }
1178 
1179     // WideUse is "WideDef `op.wide` X" as described in the comment.
1180     const SCEV *WideUse = nullptr;
1181 
1182     switch (NarrowUse->getOpcode()) {
1183     default:
1184       llvm_unreachable("No other possibility!");
1185 
1186     case Instruction::Add:
1187       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1188       break;
1189 
1190     case Instruction::Mul:
1191       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1192       break;
1193 
1194     case Instruction::UDiv:
1195       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1196       break;
1197 
1198     case Instruction::Sub:
1199       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1200       break;
1201     }
1202 
1203     return WideUse == WideAR;
1204   };
1205 
1206   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1207   if (!GuessNonIVOperand(SignExtend)) {
1208     SignExtend = !SignExtend;
1209     if (!GuessNonIVOperand(SignExtend))
1210       return nullptr;
1211   }
1212 
1213   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1214                    ? WideDef
1215                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1216                                       SignExtend, NarrowUse);
1217   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1218                    ? WideDef
1219                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1220                                       SignExtend, NarrowUse);
1221 
1222   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1223   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1224                                         NarrowBO->getName());
1225 
1226   IRBuilder<> Builder(NarrowUse);
1227   Builder.Insert(WideBO);
1228   WideBO->copyIRFlags(NarrowBO);
1229   return WideBO;
1230 }
1231 
1232 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1233   auto It = ExtendKindMap.find(I);
1234   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1235   return It->second;
1236 }
1237 
1238 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1239                                      unsigned OpCode) const {
1240   if (OpCode == Instruction::Add)
1241     return SE->getAddExpr(LHS, RHS);
1242   if (OpCode == Instruction::Sub)
1243     return SE->getMinusSCEV(LHS, RHS);
1244   if (OpCode == Instruction::Mul)
1245     return SE->getMulExpr(LHS, RHS);
1246 
1247   llvm_unreachable("Unsupported opcode.");
1248 }
1249 
1250 /// No-wrap operations can transfer sign extension of their result to their
1251 /// operands. Generate the SCEV value for the widened operation without
1252 /// actually modifying the IR yet. If the expression after extending the
1253 /// operands is an AddRec for this loop, return the AddRec and the kind of
1254 /// extension used.
1255 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1256   // Handle the common case of add<nsw/nuw>
1257   const unsigned OpCode = DU.NarrowUse->getOpcode();
1258   // Only Add/Sub/Mul instructions supported yet.
1259   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1260       OpCode != Instruction::Mul)
1261     return {nullptr, Unknown};
1262 
1263   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1264   // if extending the other will lead to a recurrence.
1265   const unsigned ExtendOperIdx =
1266       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1267   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1268 
1269   const SCEV *ExtendOperExpr = nullptr;
1270   const OverflowingBinaryOperator *OBO =
1271     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1272   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1273   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1274     ExtendOperExpr = SE->getSignExtendExpr(
1275       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1276   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1277     ExtendOperExpr = SE->getZeroExtendExpr(
1278       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1279   else
1280     return {nullptr, Unknown};
1281 
1282   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1283   // flags. This instruction may be guarded by control flow that the no-wrap
1284   // behavior depends on. Non-control-equivalent instructions can be mapped to
1285   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1286   // semantics to those operations.
1287   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1288   const SCEV *rhs = ExtendOperExpr;
1289 
1290   // Let's swap operands to the initial order for the case of non-commutative
1291   // operations, like SUB. See PR21014.
1292   if (ExtendOperIdx == 0)
1293     std::swap(lhs, rhs);
1294   const SCEVAddRecExpr *AddRec =
1295       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1296 
1297   if (!AddRec || AddRec->getLoop() != L)
1298     return {nullptr, Unknown};
1299 
1300   return {AddRec, ExtKind};
1301 }
1302 
1303 /// Is this instruction potentially interesting for further simplification after
1304 /// widening it's type? In other words, can the extend be safely hoisted out of
1305 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1306 /// so, return the extended recurrence and the kind of extension used. Otherwise
1307 /// return {nullptr, Unknown}.
1308 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1309   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1310     return {nullptr, Unknown};
1311 
1312   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1313   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1314       SE->getTypeSizeInBits(WideType)) {
1315     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1316     // index. So don't follow this use.
1317     return {nullptr, Unknown};
1318   }
1319 
1320   const SCEV *WideExpr;
1321   ExtendKind ExtKind;
1322   if (DU.NeverNegative) {
1323     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1324     if (isa<SCEVAddRecExpr>(WideExpr))
1325       ExtKind = SignExtended;
1326     else {
1327       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1328       ExtKind = ZeroExtended;
1329     }
1330   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1331     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1332     ExtKind = SignExtended;
1333   } else {
1334     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1335     ExtKind = ZeroExtended;
1336   }
1337   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1338   if (!AddRec || AddRec->getLoop() != L)
1339     return {nullptr, Unknown};
1340   return {AddRec, ExtKind};
1341 }
1342 
1343 /// This IV user cannot be widened. Replace this use of the original narrow IV
1344 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1345 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1346   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1347   if (!InsertPt)
1348     return;
1349   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1350                     << *DU.NarrowUse << "\n");
1351   IRBuilder<> Builder(InsertPt);
1352   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1353   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1354 }
1355 
1356 /// If the narrow use is a compare instruction, then widen the compare
1357 //  (and possibly the other operand).  The extend operation is hoisted into the
1358 // loop preheader as far as possible.
1359 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1360   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1361   if (!Cmp)
1362     return false;
1363 
1364   // We can legally widen the comparison in the following two cases:
1365   //
1366   //  - The signedness of the IV extension and comparison match
1367   //
1368   //  - The narrow IV is always positive (and thus its sign extension is equal
1369   //    to its zero extension).  For instance, let's say we're zero extending
1370   //    %narrow for the following use
1371   //
1372   //      icmp slt i32 %narrow, %val   ... (A)
1373   //
1374   //    and %narrow is always positive.  Then
1375   //
1376   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1377   //          == icmp slt i32 zext(%narrow), sext(%val)
1378   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1379   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1380     return false;
1381 
1382   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1383   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1384   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1385   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1386 
1387   // Widen the compare instruction.
1388   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1389   if (!InsertPt)
1390     return false;
1391   IRBuilder<> Builder(InsertPt);
1392   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1393 
1394   // Widen the other operand of the compare, if necessary.
1395   if (CastWidth < IVWidth) {
1396     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1397     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1398   }
1399   return true;
1400 }
1401 
1402 /// If the narrow use is an instruction whose two operands are the defining
1403 /// instruction of DU and a load instruction, then we have the following:
1404 /// if the load is hoisted outside the loop, then we do not reach this function
1405 /// as scalar evolution analysis works fine in widenIVUse with variables
1406 /// hoisted outside the loop and efficient code is subsequently generated by
1407 /// not emitting truncate instructions. But when the load is not hoisted
1408 /// (whether due to limitation in alias analysis or due to a true legality),
1409 /// then scalar evolution can not proceed with loop variant values and
1410 /// inefficient code is generated. This function handles the non-hoisted load
1411 /// special case by making the optimization generate the same type of code for
1412 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1413 /// instruction). This special case is important especially when the induction
1414 /// variables are affecting addressing mode in code generation.
1415 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1416   Instruction *NarrowUse = DU.NarrowUse;
1417   Instruction *NarrowDef = DU.NarrowDef;
1418   Instruction *WideDef = DU.WideDef;
1419 
1420   // Handle the common case of add<nsw/nuw>
1421   const unsigned OpCode = NarrowUse->getOpcode();
1422   // Only Add/Sub/Mul instructions are supported.
1423   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1424       OpCode != Instruction::Mul)
1425     return false;
1426 
1427   // The operand that is not defined by NarrowDef of DU. Let's call it the
1428   // other operand.
1429   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1430   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1431          "bad DU");
1432 
1433   const SCEV *ExtendOperExpr = nullptr;
1434   const OverflowingBinaryOperator *OBO =
1435     cast<OverflowingBinaryOperator>(NarrowUse);
1436   ExtendKind ExtKind = getExtendKind(NarrowDef);
1437   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1438     ExtendOperExpr = SE->getSignExtendExpr(
1439       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1440   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1441     ExtendOperExpr = SE->getZeroExtendExpr(
1442       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1443   else
1444     return false;
1445 
1446   // We are interested in the other operand being a load instruction.
1447   // But, we should look into relaxing this restriction later on.
1448   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1449   if (I && I->getOpcode() != Instruction::Load)
1450     return false;
1451 
1452   // Verifying that Defining operand is an AddRec
1453   const SCEV *Op1 = SE->getSCEV(WideDef);
1454   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1455   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1456     return false;
1457   // Verifying that other operand is an Extend.
1458   if (ExtKind == SignExtended) {
1459     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1460       return false;
1461   } else {
1462     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1463       return false;
1464   }
1465 
1466   if (ExtKind == SignExtended) {
1467     for (Use &U : NarrowUse->uses()) {
1468       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1469       if (!User || User->getType() != WideType)
1470         return false;
1471     }
1472   } else { // ExtKind == ZeroExtended
1473     for (Use &U : NarrowUse->uses()) {
1474       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1475       if (!User || User->getType() != WideType)
1476         return false;
1477     }
1478   }
1479 
1480   return true;
1481 }
1482 
1483 /// Special Case for widening with variant Loads (see
1484 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1485 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1486   Instruction *NarrowUse = DU.NarrowUse;
1487   Instruction *NarrowDef = DU.NarrowDef;
1488   Instruction *WideDef = DU.WideDef;
1489 
1490   ExtendKind ExtKind = getExtendKind(NarrowDef);
1491 
1492   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1493 
1494   // Generating a widening use instruction.
1495   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1496                    ? WideDef
1497                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1498                                       ExtKind, NarrowUse);
1499   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1500                    ? WideDef
1501                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1502                                       ExtKind, NarrowUse);
1503 
1504   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1505   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1506                                         NarrowBO->getName());
1507   IRBuilder<> Builder(NarrowUse);
1508   Builder.Insert(WideBO);
1509   WideBO->copyIRFlags(NarrowBO);
1510 
1511   if (ExtKind == SignExtended)
1512     ExtendKindMap[NarrowUse] = SignExtended;
1513   else
1514     ExtendKindMap[NarrowUse] = ZeroExtended;
1515 
1516   // Update the Use.
1517   if (ExtKind == SignExtended) {
1518     for (Use &U : NarrowUse->uses()) {
1519       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1520       if (User && User->getType() == WideType) {
1521         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1522                           << *WideBO << "\n");
1523         ++NumElimExt;
1524         User->replaceAllUsesWith(WideBO);
1525         DeadInsts.emplace_back(User);
1526       }
1527     }
1528   } else { // ExtKind == ZeroExtended
1529     for (Use &U : NarrowUse->uses()) {
1530       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1531       if (User && User->getType() == WideType) {
1532         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1533                           << *WideBO << "\n");
1534         ++NumElimExt;
1535         User->replaceAllUsesWith(WideBO);
1536         DeadInsts.emplace_back(User);
1537       }
1538     }
1539   }
1540 }
1541 
1542 /// Determine whether an individual user of the narrow IV can be widened. If so,
1543 /// return the wide clone of the user.
1544 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1545   assert(ExtendKindMap.count(DU.NarrowDef) &&
1546          "Should already know the kind of extension used to widen NarrowDef");
1547 
1548   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1549   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1550     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1551       // For LCSSA phis, sink the truncate outside the loop.
1552       // After SimplifyCFG most loop exit targets have a single predecessor.
1553       // Otherwise fall back to a truncate within the loop.
1554       if (UsePhi->getNumOperands() != 1)
1555         truncateIVUse(DU, DT, LI);
1556       else {
1557         // Widening the PHI requires us to insert a trunc.  The logical place
1558         // for this trunc is in the same BB as the PHI.  This is not possible if
1559         // the BB is terminated by a catchswitch.
1560         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1561           return nullptr;
1562 
1563         PHINode *WidePhi =
1564           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1565                           UsePhi);
1566         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1567         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1568         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1569         UsePhi->replaceAllUsesWith(Trunc);
1570         DeadInsts.emplace_back(UsePhi);
1571         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1572                           << *WidePhi << "\n");
1573       }
1574       return nullptr;
1575     }
1576   }
1577 
1578   // This narrow use can be widened by a sext if it's non-negative or its narrow
1579   // def was widended by a sext. Same for zext.
1580   auto canWidenBySExt = [&]() {
1581     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1582   };
1583   auto canWidenByZExt = [&]() {
1584     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1585   };
1586 
1587   // Our raison d'etre! Eliminate sign and zero extension.
1588   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1589       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1590     Value *NewDef = DU.WideDef;
1591     if (DU.NarrowUse->getType() != WideType) {
1592       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1593       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1594       if (CastWidth < IVWidth) {
1595         // The cast isn't as wide as the IV, so insert a Trunc.
1596         IRBuilder<> Builder(DU.NarrowUse);
1597         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1598       }
1599       else {
1600         // A wider extend was hidden behind a narrower one. This may induce
1601         // another round of IV widening in which the intermediate IV becomes
1602         // dead. It should be very rare.
1603         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1604                           << " not wide enough to subsume " << *DU.NarrowUse
1605                           << "\n");
1606         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1607         NewDef = DU.NarrowUse;
1608       }
1609     }
1610     if (NewDef != DU.NarrowUse) {
1611       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1612                         << " replaced by " << *DU.WideDef << "\n");
1613       ++NumElimExt;
1614       DU.NarrowUse->replaceAllUsesWith(NewDef);
1615       DeadInsts.emplace_back(DU.NarrowUse);
1616     }
1617     // Now that the extend is gone, we want to expose it's uses for potential
1618     // further simplification. We don't need to directly inform SimplifyIVUsers
1619     // of the new users, because their parent IV will be processed later as a
1620     // new loop phi. If we preserved IVUsers analysis, we would also want to
1621     // push the uses of WideDef here.
1622 
1623     // No further widening is needed. The deceased [sz]ext had done it for us.
1624     return nullptr;
1625   }
1626 
1627   // Does this user itself evaluate to a recurrence after widening?
1628   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1629   if (!WideAddRec.first)
1630     WideAddRec = getWideRecurrence(DU);
1631 
1632   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1633   if (!WideAddRec.first) {
1634     // If use is a loop condition, try to promote the condition instead of
1635     // truncating the IV first.
1636     if (widenLoopCompare(DU))
1637       return nullptr;
1638 
1639     // We are here about to generate a truncate instruction that may hurt
1640     // performance because the scalar evolution expression computed earlier
1641     // in WideAddRec.first does not indicate a polynomial induction expression.
1642     // In that case, look at the operands of the use instruction to determine
1643     // if we can still widen the use instead of truncating its operand.
1644     if (widenWithVariantLoadUse(DU)) {
1645       widenWithVariantLoadUseCodegen(DU);
1646       return nullptr;
1647     }
1648 
1649     // This user does not evaluate to a recurrence after widening, so don't
1650     // follow it. Instead insert a Trunc to kill off the original use,
1651     // eventually isolating the original narrow IV so it can be removed.
1652     truncateIVUse(DU, DT, LI);
1653     return nullptr;
1654   }
1655   // Assume block terminators cannot evaluate to a recurrence. We can't to
1656   // insert a Trunc after a terminator if there happens to be a critical edge.
1657   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1658          "SCEV is not expected to evaluate a block terminator");
1659 
1660   // Reuse the IV increment that SCEVExpander created as long as it dominates
1661   // NarrowUse.
1662   Instruction *WideUse = nullptr;
1663   if (WideAddRec.first == WideIncExpr &&
1664       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1665     WideUse = WideInc;
1666   else {
1667     WideUse = cloneIVUser(DU, WideAddRec.first);
1668     if (!WideUse)
1669       return nullptr;
1670   }
1671   // Evaluation of WideAddRec ensured that the narrow expression could be
1672   // extended outside the loop without overflow. This suggests that the wide use
1673   // evaluates to the same expression as the extended narrow use, but doesn't
1674   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1675   // where it fails, we simply throw away the newly created wide use.
1676   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1677     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1678                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1679                       << "\n");
1680     DeadInsts.emplace_back(WideUse);
1681     return nullptr;
1682   }
1683 
1684   // if we reached this point then we are going to replace
1685   // DU.NarrowUse with WideUse. Reattach DbgValue then.
1686   replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1687 
1688   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1689   // Returning WideUse pushes it on the worklist.
1690   return WideUse;
1691 }
1692 
1693 /// Add eligible users of NarrowDef to NarrowIVUsers.
1694 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1695   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1696   bool NonNegativeDef =
1697       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1698                            SE->getConstant(NarrowSCEV->getType(), 0));
1699   for (User *U : NarrowDef->users()) {
1700     Instruction *NarrowUser = cast<Instruction>(U);
1701 
1702     // Handle data flow merges and bizarre phi cycles.
1703     if (!Widened.insert(NarrowUser).second)
1704       continue;
1705 
1706     bool NonNegativeUse = false;
1707     if (!NonNegativeDef) {
1708       // We might have a control-dependent range information for this context.
1709       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1710         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1711     }
1712 
1713     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1714                                NonNegativeDef || NonNegativeUse);
1715   }
1716 }
1717 
1718 /// Process a single induction variable. First use the SCEVExpander to create a
1719 /// wide induction variable that evaluates to the same recurrence as the
1720 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1721 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1722 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1723 ///
1724 /// It would be simpler to delete uses as they are processed, but we must avoid
1725 /// invalidating SCEV expressions.
1726 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1727   // Is this phi an induction variable?
1728   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1729   if (!AddRec)
1730     return nullptr;
1731 
1732   // Widen the induction variable expression.
1733   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1734                                ? SE->getSignExtendExpr(AddRec, WideType)
1735                                : SE->getZeroExtendExpr(AddRec, WideType);
1736 
1737   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1738          "Expect the new IV expression to preserve its type");
1739 
1740   // Can the IV be extended outside the loop without overflow?
1741   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1742   if (!AddRec || AddRec->getLoop() != L)
1743     return nullptr;
1744 
1745   // An AddRec must have loop-invariant operands. Since this AddRec is
1746   // materialized by a loop header phi, the expression cannot have any post-loop
1747   // operands, so they must dominate the loop header.
1748   assert(
1749       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1750       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1751       "Loop header phi recurrence inputs do not dominate the loop");
1752 
1753   // Iterate over IV uses (including transitive ones) looking for IV increments
1754   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1755   // the increment calculate control-dependent range information basing on
1756   // dominating conditions inside of the loop (e.g. a range check inside of the
1757   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1758   //
1759   // Control-dependent range information is later used to prove that a narrow
1760   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1761   // this on demand because when pushNarrowIVUsers needs this information some
1762   // of the dominating conditions might be already widened.
1763   if (UsePostIncrementRanges)
1764     calculatePostIncRanges(OrigPhi);
1765 
1766   // The rewriter provides a value for the desired IV expression. This may
1767   // either find an existing phi or materialize a new one. Either way, we
1768   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1769   // of the phi-SCC dominates the loop entry.
1770   Instruction *InsertPt = &L->getHeader()->front();
1771   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1772 
1773   // Remembering the WideIV increment generated by SCEVExpander allows
1774   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1775   // employ a general reuse mechanism because the call above is the only call to
1776   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1777   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1778     WideInc =
1779       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1780     WideIncExpr = SE->getSCEV(WideInc);
1781     // Propagate the debug location associated with the original loop increment
1782     // to the new (widened) increment.
1783     auto *OrigInc =
1784       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1785     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1786   }
1787 
1788   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1789   ++NumWidened;
1790 
1791   // Traverse the def-use chain using a worklist starting at the original IV.
1792   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1793 
1794   Widened.insert(OrigPhi);
1795   pushNarrowIVUsers(OrigPhi, WidePhi);
1796 
1797   while (!NarrowIVUsers.empty()) {
1798     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1799 
1800     // Process a def-use edge. This may replace the use, so don't hold a
1801     // use_iterator across it.
1802     Instruction *WideUse = widenIVUse(DU, Rewriter);
1803 
1804     // Follow all def-use edges from the previous narrow use.
1805     if (WideUse)
1806       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1807 
1808     // widenIVUse may have removed the def-use edge.
1809     if (DU.NarrowDef->use_empty())
1810       DeadInsts.emplace_back(DU.NarrowDef);
1811   }
1812 
1813   // Attach any debug information to the new PHI.
1814   replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1815 
1816   return WidePhi;
1817 }
1818 
1819 /// Calculates control-dependent range for the given def at the given context
1820 /// by looking at dominating conditions inside of the loop
1821 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1822                                     Instruction *NarrowUser) {
1823   using namespace llvm::PatternMatch;
1824 
1825   Value *NarrowDefLHS;
1826   const APInt *NarrowDefRHS;
1827   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1828                                  m_APInt(NarrowDefRHS))) ||
1829       !NarrowDefRHS->isNonNegative())
1830     return;
1831 
1832   auto UpdateRangeFromCondition = [&] (Value *Condition,
1833                                        bool TrueDest) {
1834     CmpInst::Predicate Pred;
1835     Value *CmpRHS;
1836     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1837                                  m_Value(CmpRHS))))
1838       return;
1839 
1840     CmpInst::Predicate P =
1841             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1842 
1843     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1844     auto CmpConstrainedLHSRange =
1845             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1846     auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1847         *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1848 
1849     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1850   };
1851 
1852   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1853     if (!HasGuards)
1854       return;
1855 
1856     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1857                                      Ctx->getParent()->rend())) {
1858       Value *C = nullptr;
1859       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1860         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1861     }
1862   };
1863 
1864   UpdateRangeFromGuards(NarrowUser);
1865 
1866   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1867   // If NarrowUserBB is statically unreachable asking dominator queries may
1868   // yield surprising results. (e.g. the block may not have a dom tree node)
1869   if (!DT->isReachableFromEntry(NarrowUserBB))
1870     return;
1871 
1872   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1873        L->contains(DTB->getBlock());
1874        DTB = DTB->getIDom()) {
1875     auto *BB = DTB->getBlock();
1876     auto *TI = BB->getTerminator();
1877     UpdateRangeFromGuards(TI);
1878 
1879     auto *BI = dyn_cast<BranchInst>(TI);
1880     if (!BI || !BI->isConditional())
1881       continue;
1882 
1883     auto *TrueSuccessor = BI->getSuccessor(0);
1884     auto *FalseSuccessor = BI->getSuccessor(1);
1885 
1886     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1887       return BBE.isSingleEdge() &&
1888              DT->dominates(BBE, NarrowUser->getParent());
1889     };
1890 
1891     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1892       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1893 
1894     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1895       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1896   }
1897 }
1898 
1899 /// Calculates PostIncRangeInfos map for the given IV
1900 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1901   SmallPtrSet<Instruction *, 16> Visited;
1902   SmallVector<Instruction *, 6> Worklist;
1903   Worklist.push_back(OrigPhi);
1904   Visited.insert(OrigPhi);
1905 
1906   while (!Worklist.empty()) {
1907     Instruction *NarrowDef = Worklist.pop_back_val();
1908 
1909     for (Use &U : NarrowDef->uses()) {
1910       auto *NarrowUser = cast<Instruction>(U.getUser());
1911 
1912       // Don't go looking outside the current loop.
1913       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1914       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1915         continue;
1916 
1917       if (!Visited.insert(NarrowUser).second)
1918         continue;
1919 
1920       Worklist.push_back(NarrowUser);
1921 
1922       calculatePostIncRange(NarrowDef, NarrowUser);
1923     }
1924   }
1925 }
1926 
1927 //===----------------------------------------------------------------------===//
1928 //  Live IV Reduction - Minimize IVs live across the loop.
1929 //===----------------------------------------------------------------------===//
1930 
1931 //===----------------------------------------------------------------------===//
1932 //  Simplification of IV users based on SCEV evaluation.
1933 //===----------------------------------------------------------------------===//
1934 
1935 namespace {
1936 
1937 class IndVarSimplifyVisitor : public IVVisitor {
1938   ScalarEvolution *SE;
1939   const TargetTransformInfo *TTI;
1940   PHINode *IVPhi;
1941 
1942 public:
1943   WideIVInfo WI;
1944 
1945   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1946                         const TargetTransformInfo *TTI,
1947                         const DominatorTree *DTree)
1948     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1949     DT = DTree;
1950     WI.NarrowIV = IVPhi;
1951   }
1952 
1953   // Implement the interface used by simplifyUsersOfIV.
1954   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1955 };
1956 
1957 } // end anonymous namespace
1958 
1959 /// Iteratively perform simplification on a worklist of IV users. Each
1960 /// successive simplification may push more users which may themselves be
1961 /// candidates for simplification.
1962 ///
1963 /// Sign/Zero extend elimination is interleaved with IV simplification.
1964 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1965                                        SCEVExpander &Rewriter,
1966                                        LoopInfo *LI) {
1967   SmallVector<WideIVInfo, 8> WideIVs;
1968 
1969   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1970           Intrinsic::getName(Intrinsic::experimental_guard));
1971   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1972 
1973   SmallVector<PHINode*, 8> LoopPhis;
1974   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1975     LoopPhis.push_back(cast<PHINode>(I));
1976   }
1977   // Each round of simplification iterates through the SimplifyIVUsers worklist
1978   // for all current phis, then determines whether any IVs can be
1979   // widened. Widening adds new phis to LoopPhis, inducing another round of
1980   // simplification on the wide IVs.
1981   bool Changed = false;
1982   while (!LoopPhis.empty()) {
1983     // Evaluate as many IV expressions as possible before widening any IVs. This
1984     // forces SCEV to set no-wrap flags before evaluating sign/zero
1985     // extension. The first time SCEV attempts to normalize sign/zero extension,
1986     // the result becomes final. So for the most predictable results, we delay
1987     // evaluation of sign/zero extend evaluation until needed, and avoid running
1988     // other SCEV based analysis prior to simplifyAndExtend.
1989     do {
1990       PHINode *CurrIV = LoopPhis.pop_back_val();
1991 
1992       // Information about sign/zero extensions of CurrIV.
1993       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1994 
1995       Changed |=
1996           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1997 
1998       if (Visitor.WI.WidestNativeType) {
1999         WideIVs.push_back(Visitor.WI);
2000       }
2001     } while(!LoopPhis.empty());
2002 
2003     for (; !WideIVs.empty(); WideIVs.pop_back()) {
2004       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
2005       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
2006         Changed = true;
2007         LoopPhis.push_back(WidePhi);
2008       }
2009     }
2010   }
2011   return Changed;
2012 }
2013 
2014 //===----------------------------------------------------------------------===//
2015 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2016 //===----------------------------------------------------------------------===//
2017 
2018 /// Given an Value which is hoped to be part of an add recurance in the given
2019 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
2020 /// that this is less general than SCEVs AddRec checking.
2021 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2022   Instruction *IncI = dyn_cast<Instruction>(IncV);
2023   if (!IncI)
2024     return nullptr;
2025 
2026   switch (IncI->getOpcode()) {
2027   case Instruction::Add:
2028   case Instruction::Sub:
2029     break;
2030   case Instruction::GetElementPtr:
2031     // An IV counter must preserve its type.
2032     if (IncI->getNumOperands() == 2)
2033       break;
2034     LLVM_FALLTHROUGH;
2035   default:
2036     return nullptr;
2037   }
2038 
2039   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2040   if (Phi && Phi->getParent() == L->getHeader()) {
2041     if (L->isLoopInvariant(IncI->getOperand(1)))
2042       return Phi;
2043     return nullptr;
2044   }
2045   if (IncI->getOpcode() == Instruction::GetElementPtr)
2046     return nullptr;
2047 
2048   // Allow add/sub to be commuted.
2049   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2050   if (Phi && Phi->getParent() == L->getHeader()) {
2051     if (L->isLoopInvariant(IncI->getOperand(0)))
2052       return Phi;
2053   }
2054   return nullptr;
2055 }
2056 
2057 /// Whether the current loop exit test is based on this value.  Currently this
2058 /// is limited to a direct use in the loop condition.
2059 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2060   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2061   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2062   // TODO: Allow non-icmp loop test.
2063   if (!ICmp)
2064     return false;
2065 
2066   // TODO: Allow indirect use.
2067   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2068 }
2069 
2070 /// linearFunctionTestReplace policy. Return true unless we can show that the
2071 /// current exit test is already sufficiently canonical.
2072 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2073   assert(L->getLoopLatch() && "Must be in simplified form");
2074 
2075   // Avoid converting a constant or loop invariant test back to a runtime
2076   // test.  This is critical for when SCEV's cached ExitCount is less precise
2077   // than the current IR (such as after we've proven a particular exit is
2078   // actually dead and thus the BE count never reaches our ExitCount.)
2079   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2080   if (L->isLoopInvariant(BI->getCondition()))
2081     return false;
2082 
2083   // Do LFTR to simplify the exit condition to an ICMP.
2084   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2085   if (!Cond)
2086     return true;
2087 
2088   // Do LFTR to simplify the exit ICMP to EQ/NE
2089   ICmpInst::Predicate Pred = Cond->getPredicate();
2090   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2091     return true;
2092 
2093   // Look for a loop invariant RHS
2094   Value *LHS = Cond->getOperand(0);
2095   Value *RHS = Cond->getOperand(1);
2096   if (!L->isLoopInvariant(RHS)) {
2097     if (!L->isLoopInvariant(LHS))
2098       return true;
2099     std::swap(LHS, RHS);
2100   }
2101   // Look for a simple IV counter LHS
2102   PHINode *Phi = dyn_cast<PHINode>(LHS);
2103   if (!Phi)
2104     Phi = getLoopPhiForCounter(LHS, L);
2105 
2106   if (!Phi)
2107     return true;
2108 
2109   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2110   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2111   if (Idx < 0)
2112     return true;
2113 
2114   // Do LFTR if the exit condition's IV is *not* a simple counter.
2115   Value *IncV = Phi->getIncomingValue(Idx);
2116   return Phi != getLoopPhiForCounter(IncV, L);
2117 }
2118 
2119 /// Return true if undefined behavior would provable be executed on the path to
2120 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2121 /// anything about whether OnPathTo is actually executed or whether Root is
2122 /// actually poison.  This can be used to assess whether a new use of Root can
2123 /// be added at a location which is control equivalent with OnPathTo (such as
2124 /// immediately before it) without introducing UB which didn't previously
2125 /// exist.  Note that a false result conveys no information.
2126 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2127                                           Instruction *OnPathTo,
2128                                           DominatorTree *DT) {
2129   // Basic approach is to assume Root is poison, propagate poison forward
2130   // through all users we can easily track, and then check whether any of those
2131   // users are provable UB and must execute before out exiting block might
2132   // exit.
2133 
2134   // The set of all recursive users we've visited (which are assumed to all be
2135   // poison because of said visit)
2136   SmallSet<const Value *, 16> KnownPoison;
2137   SmallVector<const Instruction*, 16> Worklist;
2138   Worklist.push_back(Root);
2139   while (!Worklist.empty()) {
2140     const Instruction *I = Worklist.pop_back_val();
2141 
2142     // If we know this must trigger UB on a path leading our target.
2143     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2144       return true;
2145 
2146     // If we can't analyze propagation through this instruction, just skip it
2147     // and transitive users.  Safe as false is a conservative result.
2148     if (!propagatesFullPoison(I) && I != Root)
2149       continue;
2150 
2151     if (KnownPoison.insert(I).second)
2152       for (const User *User : I->users())
2153         Worklist.push_back(cast<Instruction>(User));
2154   }
2155 
2156   // Might be non-UB, or might have a path we couldn't prove must execute on
2157   // way to exiting bb.
2158   return false;
2159 }
2160 
2161 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2162 /// down to checking that all operands are constant and listing instructions
2163 /// that may hide undef.
2164 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2165                                unsigned Depth) {
2166   if (isa<Constant>(V))
2167     return !isa<UndefValue>(V);
2168 
2169   if (Depth >= 6)
2170     return false;
2171 
2172   // Conservatively handle non-constant non-instructions. For example, Arguments
2173   // may be undef.
2174   Instruction *I = dyn_cast<Instruction>(V);
2175   if (!I)
2176     return false;
2177 
2178   // Load and return values may be undef.
2179   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2180     return false;
2181 
2182   // Optimistically handle other instructions.
2183   for (Value *Op : I->operands()) {
2184     if (!Visited.insert(Op).second)
2185       continue;
2186     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2187       return false;
2188   }
2189   return true;
2190 }
2191 
2192 /// Return true if the given value is concrete. We must prove that undef can
2193 /// never reach it.
2194 ///
2195 /// TODO: If we decide that this is a good approach to checking for undef, we
2196 /// may factor it into a common location.
2197 static bool hasConcreteDef(Value *V) {
2198   SmallPtrSet<Value*, 8> Visited;
2199   Visited.insert(V);
2200   return hasConcreteDefImpl(V, Visited, 0);
2201 }
2202 
2203 /// Return true if this IV has any uses other than the (soon to be rewritten)
2204 /// loop exit test.
2205 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2206   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2207   Value *IncV = Phi->getIncomingValue(LatchIdx);
2208 
2209   for (User *U : Phi->users())
2210     if (U != Cond && U != IncV) return false;
2211 
2212   for (User *U : IncV->users())
2213     if (U != Cond && U != Phi) return false;
2214   return true;
2215 }
2216 
2217 /// Return true if the given phi is a "counter" in L.  A counter is an
2218 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2219 /// step of 1.  Note that L must have exactly one latch.
2220 static bool isLoopCounter(PHINode* Phi, Loop *L,
2221                           ScalarEvolution *SE) {
2222   assert(Phi->getParent() == L->getHeader());
2223   assert(L->getLoopLatch());
2224 
2225   if (!SE->isSCEVable(Phi->getType()))
2226     return false;
2227 
2228   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2229   if (!AR || AR->getLoop() != L || !AR->isAffine())
2230     return false;
2231 
2232   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2233   if (!Step || !Step->isOne())
2234     return false;
2235 
2236   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2237   Value *IncV = Phi->getIncomingValue(LatchIdx);
2238   return (getLoopPhiForCounter(IncV, L) == Phi);
2239 }
2240 
2241 /// Search the loop header for a loop counter (anadd rec w/step of one)
2242 /// suitable for use by LFTR.  If multiple counters are available, select the
2243 /// "best" one based profitable heuristics.
2244 ///
2245 /// BECount may be an i8* pointer type. The pointer difference is already
2246 /// valid count without scaling the address stride, so it remains a pointer
2247 /// expression as far as SCEV is concerned.
2248 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2249                                 const SCEV *BECount,
2250                                 ScalarEvolution *SE, DominatorTree *DT) {
2251   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2252 
2253   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2254 
2255   // Loop over all of the PHI nodes, looking for a simple counter.
2256   PHINode *BestPhi = nullptr;
2257   const SCEV *BestInit = nullptr;
2258   BasicBlock *LatchBlock = L->getLoopLatch();
2259   assert(LatchBlock && "Must be in simplified form");
2260   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2261 
2262   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2263     PHINode *Phi = cast<PHINode>(I);
2264     if (!isLoopCounter(Phi, L, SE))
2265       continue;
2266 
2267     // Avoid comparing an integer IV against a pointer Limit.
2268     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2269       continue;
2270 
2271     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2272 
2273     // AR may be a pointer type, while BECount is an integer type.
2274     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2275     // AR may not be a narrower type, or we may never exit.
2276     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2277     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2278       continue;
2279 
2280     // Avoid reusing a potentially undef value to compute other values that may
2281     // have originally had a concrete definition.
2282     if (!hasConcreteDef(Phi)) {
2283       // We explicitly allow unknown phis as long as they are already used by
2284       // the loop exit test.  This is legal since performing LFTR could not
2285       // increase the number of undef users.
2286       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2287       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2288           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2289         continue;
2290     }
2291 
2292     // Avoid introducing undefined behavior due to poison which didn't exist in
2293     // the original program.  (Annoyingly, the rules for poison and undef
2294     // propagation are distinct, so this does NOT cover the undef case above.)
2295     // We have to ensure that we don't introduce UB by introducing a use on an
2296     // iteration where said IV produces poison.  Our strategy here differs for
2297     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2298     // see code in linearFunctionTestReplace.  For pointers, we restrict
2299     // transforms as there is no good way to reinfer inbounds once lost.
2300     if (!Phi->getType()->isIntegerTy() &&
2301         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2302       continue;
2303 
2304     const SCEV *Init = AR->getStart();
2305 
2306     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2307       // Don't force a live loop counter if another IV can be used.
2308       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2309         continue;
2310 
2311       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2312       // also prefers integer to pointer IVs.
2313       if (BestInit->isZero() != Init->isZero()) {
2314         if (BestInit->isZero())
2315           continue;
2316       }
2317       // If two IVs both count from zero or both count from nonzero then the
2318       // narrower is likely a dead phi that has been widened. Use the wider phi
2319       // to allow the other to be eliminated.
2320       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2321         continue;
2322     }
2323     BestPhi = Phi;
2324     BestInit = Init;
2325   }
2326   return BestPhi;
2327 }
2328 
2329 /// Insert an IR expression which computes the value held by the IV IndVar
2330 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2331 /// is taken ExitCount times.
2332 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2333                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2334                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2335   assert(isLoopCounter(IndVar, L, SE));
2336   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2337   const SCEV *IVInit = AR->getStart();
2338 
2339   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2340   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2341   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2342   // the existing GEPs whenever possible.
2343   if (IndVar->getType()->isPointerTy() &&
2344       !ExitCount->getType()->isPointerTy()) {
2345     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2346     // signed value. ExitCount on the other hand represents the loop trip count,
2347     // which is an unsigned value. FindLoopCounter only allows induction
2348     // variables that have a positive unit stride of one. This means we don't
2349     // have to handle the case of negative offsets (yet) and just need to zero
2350     // extend ExitCount.
2351     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2352     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2353     if (UsePostInc)
2354       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2355 
2356     // Expand the code for the iteration count.
2357     assert(SE->isLoopInvariant(IVOffset, L) &&
2358            "Computed iteration count is not loop invariant!");
2359 
2360     // We could handle pointer IVs other than i8*, but we need to compensate for
2361     // gep index scaling.
2362     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2363                              cast<PointerType>(IndVar->getType())
2364                                  ->getElementType())->isOne() &&
2365            "unit stride pointer IV must be i8*");
2366 
2367     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2368     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2369     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2370   } else {
2371     // In any other case, convert both IVInit and ExitCount to integers before
2372     // comparing. This may result in SCEV expansion of pointers, but in practice
2373     // SCEV will fold the pointer arithmetic away as such:
2374     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2375     //
2376     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2377     // for simple memset-style loops.
2378     //
2379     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2380     // were generated on top of case #2, which is not expected.
2381 
2382     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2383     // For unit stride, IVCount = Start + ExitCount with 2's complement
2384     // overflow.
2385 
2386     // For integer IVs, truncate the IV before computing IVInit + BECount,
2387     // unless we know apriori that the limit must be a constant when evaluated
2388     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2389     // of the IV in the loop over a (potentially) expensive expansion of the
2390     // widened exit count add(zext(add)) expression.
2391     if (SE->getTypeSizeInBits(IVInit->getType())
2392         > SE->getTypeSizeInBits(ExitCount->getType())) {
2393       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2394         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2395       else
2396         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2397     }
2398 
2399     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2400 
2401     if (UsePostInc)
2402       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2403 
2404     // Expand the code for the iteration count.
2405     assert(SE->isLoopInvariant(IVLimit, L) &&
2406            "Computed iteration count is not loop invariant!");
2407     // Ensure that we generate the same type as IndVar, or a smaller integer
2408     // type. In the presence of null pointer values, we have an integer type
2409     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2410     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2411       IndVar->getType() : ExitCount->getType();
2412     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2413     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2414   }
2415 }
2416 
2417 /// This method rewrites the exit condition of the loop to be a canonical !=
2418 /// comparison against the incremented loop induction variable.  This pass is
2419 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2420 /// determine a loop-invariant trip count of the loop, which is actually a much
2421 /// broader range than just linear tests.
2422 bool IndVarSimplify::
2423 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2424                           const SCEV *ExitCount,
2425                           PHINode *IndVar, SCEVExpander &Rewriter) {
2426   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2427   assert(isLoopCounter(IndVar, L, SE));
2428   Instruction * const IncVar =
2429     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2430 
2431   // Initialize CmpIndVar to the preincremented IV.
2432   Value *CmpIndVar = IndVar;
2433   bool UsePostInc = false;
2434 
2435   // If the exiting block is the same as the backedge block, we prefer to
2436   // compare against the post-incremented value, otherwise we must compare
2437   // against the preincremented value.
2438   if (ExitingBB == L->getLoopLatch()) {
2439     // For pointer IVs, we chose to not strip inbounds which requires us not
2440     // to add a potentially UB introducing use.  We need to either a) show
2441     // the loop test we're modifying is already in post-inc form, or b) show
2442     // that adding a use must not introduce UB.
2443     bool SafeToPostInc =
2444         IndVar->getType()->isIntegerTy() ||
2445         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2446         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2447     if (SafeToPostInc) {
2448       UsePostInc = true;
2449       CmpIndVar = IncVar;
2450     }
2451   }
2452 
2453   // It may be necessary to drop nowrap flags on the incrementing instruction
2454   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2455   // case the increment might have previously been poison on the last iteration
2456   // only) or if LFTR switches to a different IV that was previously dynamically
2457   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2458   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2459   // check), because the pre-inc addrec flags may be adopted from the original
2460   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2461   // TODO: This handling is inaccurate for one case: If we switch to a
2462   // dynamically dead IV that wraps on the first loop iteration only, which is
2463   // not covered by the post-inc addrec. (If the new IV was not dynamically
2464   // dead, it could not be poison on the first iteration in the first place.)
2465   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2466     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2467     if (BO->hasNoUnsignedWrap())
2468       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2469     if (BO->hasNoSignedWrap())
2470       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2471   }
2472 
2473   Value *ExitCnt = genLoopLimit(
2474       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2475   assert(ExitCnt->getType()->isPointerTy() ==
2476              IndVar->getType()->isPointerTy() &&
2477          "genLoopLimit missed a cast");
2478 
2479   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2480   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2481   ICmpInst::Predicate P;
2482   if (L->contains(BI->getSuccessor(0)))
2483     P = ICmpInst::ICMP_NE;
2484   else
2485     P = ICmpInst::ICMP_EQ;
2486 
2487   IRBuilder<> Builder(BI);
2488 
2489   // The new loop exit condition should reuse the debug location of the
2490   // original loop exit condition.
2491   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2492     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2493 
2494   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2495   // avoid the expensive expansion of the limit expression in the wider type,
2496   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2497   // since we know (from the exit count bitwidth), that we can't self-wrap in
2498   // the narrower type.
2499   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2500   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2501   if (CmpIndVarSize > ExitCntSize) {
2502     assert(!CmpIndVar->getType()->isPointerTy() &&
2503            !ExitCnt->getType()->isPointerTy());
2504 
2505     // Before resorting to actually inserting the truncate, use the same
2506     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2507     // the other side of the comparison instead.  We still evaluate the limit
2508     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2509     // a truncate within in.
2510     bool Extended = false;
2511     const SCEV *IV = SE->getSCEV(CmpIndVar);
2512     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2513                                                   ExitCnt->getType());
2514     const SCEV *ZExtTrunc =
2515       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2516 
2517     if (ZExtTrunc == IV) {
2518       Extended = true;
2519       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2520                                    "wide.trip.count");
2521     } else {
2522       const SCEV *SExtTrunc =
2523         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2524       if (SExtTrunc == IV) {
2525         Extended = true;
2526         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2527                                      "wide.trip.count");
2528       }
2529     }
2530 
2531     if (Extended) {
2532       bool Discard;
2533       L->makeLoopInvariant(ExitCnt, Discard);
2534     } else
2535       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2536                                       "lftr.wideiv");
2537   }
2538   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2539                     << "      LHS:" << *CmpIndVar << '\n'
2540                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2541                     << "\n"
2542                     << "      RHS:\t" << *ExitCnt << "\n"
2543                     << "ExitCount:\t" << *ExitCount << "\n"
2544                     << "  was: " << *BI->getCondition() << "\n");
2545 
2546   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2547   Value *OrigCond = BI->getCondition();
2548   // It's tempting to use replaceAllUsesWith here to fully replace the old
2549   // comparison, but that's not immediately safe, since users of the old
2550   // comparison may not be dominated by the new comparison. Instead, just
2551   // update the branch to use the new comparison; in the common case this
2552   // will make old comparison dead.
2553   BI->setCondition(Cond);
2554   DeadInsts.push_back(OrigCond);
2555 
2556   ++NumLFTR;
2557   return true;
2558 }
2559 
2560 //===----------------------------------------------------------------------===//
2561 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2562 //===----------------------------------------------------------------------===//
2563 
2564 /// If there's a single exit block, sink any loop-invariant values that
2565 /// were defined in the preheader but not used inside the loop into the
2566 /// exit block to reduce register pressure in the loop.
2567 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2568   BasicBlock *ExitBlock = L->getExitBlock();
2569   if (!ExitBlock) return false;
2570 
2571   BasicBlock *Preheader = L->getLoopPreheader();
2572   if (!Preheader) return false;
2573 
2574   bool MadeAnyChanges = false;
2575   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2576   BasicBlock::iterator I(Preheader->getTerminator());
2577   while (I != Preheader->begin()) {
2578     --I;
2579     // New instructions were inserted at the end of the preheader.
2580     if (isa<PHINode>(I))
2581       break;
2582 
2583     // Don't move instructions which might have side effects, since the side
2584     // effects need to complete before instructions inside the loop.  Also don't
2585     // move instructions which might read memory, since the loop may modify
2586     // memory. Note that it's okay if the instruction might have undefined
2587     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2588     // block.
2589     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2590       continue;
2591 
2592     // Skip debug info intrinsics.
2593     if (isa<DbgInfoIntrinsic>(I))
2594       continue;
2595 
2596     // Skip eh pad instructions.
2597     if (I->isEHPad())
2598       continue;
2599 
2600     // Don't sink alloca: we never want to sink static alloca's out of the
2601     // entry block, and correctly sinking dynamic alloca's requires
2602     // checks for stacksave/stackrestore intrinsics.
2603     // FIXME: Refactor this check somehow?
2604     if (isa<AllocaInst>(I))
2605       continue;
2606 
2607     // Determine if there is a use in or before the loop (direct or
2608     // otherwise).
2609     bool UsedInLoop = false;
2610     for (Use &U : I->uses()) {
2611       Instruction *User = cast<Instruction>(U.getUser());
2612       BasicBlock *UseBB = User->getParent();
2613       if (PHINode *P = dyn_cast<PHINode>(User)) {
2614         unsigned i =
2615           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2616         UseBB = P->getIncomingBlock(i);
2617       }
2618       if (UseBB == Preheader || L->contains(UseBB)) {
2619         UsedInLoop = true;
2620         break;
2621       }
2622     }
2623 
2624     // If there is, the def must remain in the preheader.
2625     if (UsedInLoop)
2626       continue;
2627 
2628     // Otherwise, sink it to the exit block.
2629     Instruction *ToMove = &*I;
2630     bool Done = false;
2631 
2632     if (I != Preheader->begin()) {
2633       // Skip debug info intrinsics.
2634       do {
2635         --I;
2636       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2637 
2638       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2639         Done = true;
2640     } else {
2641       Done = true;
2642     }
2643 
2644     MadeAnyChanges = true;
2645     ToMove->moveBefore(*ExitBlock, InsertPt);
2646     if (Done) break;
2647     InsertPt = ToMove->getIterator();
2648   }
2649 
2650   return MadeAnyChanges;
2651 }
2652 
2653 /// Return a symbolic upper bound for the backedge taken count of the loop.
2654 /// This is more general than getConstantMaxBackedgeTakenCount as it returns
2655 /// an arbitrary expression as opposed to only constants.
2656 /// TODO: Move into the ScalarEvolution class.
2657 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2658                                             DominatorTree &DT, Loop *L) {
2659   SmallVector<BasicBlock*, 16> ExitingBlocks;
2660   L->getExitingBlocks(ExitingBlocks);
2661 
2662   // Form an expression for the maximum exit count possible for this loop. We
2663   // merge the max and exact information to approximate a version of
2664   // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2665   SmallVector<const SCEV*, 4> ExitCounts;
2666   for (BasicBlock *ExitingBB : ExitingBlocks) {
2667     const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2668     if (isa<SCEVCouldNotCompute>(ExitCount))
2669       ExitCount = SE.getExitCount(L, ExitingBB,
2670                                   ScalarEvolution::ConstantMaximum);
2671     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2672       assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2673              "We should only have known counts for exiting blocks that "
2674              "dominate latch!");
2675       ExitCounts.push_back(ExitCount);
2676     }
2677   }
2678   if (ExitCounts.empty())
2679     return SE.getCouldNotCompute();
2680   return SE.getUMinFromMismatchedTypes(ExitCounts);
2681 }
2682 
2683 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2684   SmallVector<BasicBlock*, 16> ExitingBlocks;
2685   L->getExitingBlocks(ExitingBlocks);
2686 
2687   // Remove all exits which aren't both rewriteable and analyzeable.
2688   auto NewEnd = llvm::remove_if(ExitingBlocks,
2689                                 [&](BasicBlock *ExitingBB) {
2690     // If our exitting block exits multiple loops, we can only rewrite the
2691     // innermost one.  Otherwise, we're changing how many times the innermost
2692     // loop runs before it exits.
2693     if (LI->getLoopFor(ExitingBB) != L)
2694       return true;
2695 
2696     // Can't rewrite non-branch yet.
2697     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2698     if (!BI)
2699       return true;
2700 
2701     // If already constant, nothing to do.
2702     if (isa<Constant>(BI->getCondition()))
2703       return true;
2704 
2705     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2706     if (isa<SCEVCouldNotCompute>(ExitCount))
2707       return true;
2708     return false;
2709    });
2710   ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2711 
2712   if (ExitingBlocks.empty())
2713     return false;
2714 
2715   // Get a symbolic upper bound on the loop backedge taken count.
2716   const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2717   if (isa<SCEVCouldNotCompute>(MaxExitCount))
2718     return false;
2719 
2720   // Visit our exit blocks in order of dominance.  We know from the fact that
2721   // all exits (left) are analyzeable that the must be a total dominance order
2722   // between them as each must dominate the latch.  The visit order only
2723   // matters for the provably equal case.
2724   llvm::sort(ExitingBlocks,
2725              [&](BasicBlock *A, BasicBlock *B) {
2726                // std::sort sorts in ascending order, so we want the inverse of
2727                // the normal dominance relation.
2728                if (DT->properlyDominates(A, B)) return true;
2729                if (DT->properlyDominates(B, A)) return false;
2730                llvm_unreachable("expected total dominance order!");
2731              });
2732 #ifdef ASSERT
2733   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2734     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2735   }
2736 #endif
2737 
2738   auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2739     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2740     bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2741     auto *OldCond = BI->getCondition();
2742     auto *NewCond = ConstantInt::get(OldCond->getType(),
2743                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
2744     BI->setCondition(NewCond);
2745     if (OldCond->use_empty())
2746       DeadInsts.push_back(OldCond);
2747   };
2748 
2749   bool Changed = false;
2750   SmallSet<const SCEV*, 8> DominatingExitCounts;
2751   for (BasicBlock *ExitingBB : ExitingBlocks) {
2752     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2753     assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2754 
2755     // If we know we'd exit on the first iteration, rewrite the exit to
2756     // reflect this.  This does not imply the loop must exit through this
2757     // exit; there may be an earlier one taken on the first iteration.
2758     // TODO: Given we know the backedge can't be taken, we should go ahead
2759     // and break it.  Or at least, kill all the header phis and simplify.
2760     if (ExitCount->isZero()) {
2761       FoldExit(ExitingBB, true);
2762       Changed = true;
2763       continue;
2764     }
2765 
2766     // If we end up with a pointer exit count, bail.  Note that we can end up
2767     // with a pointer exit count for one exiting block, and not for another in
2768     // the same loop.
2769     if (!ExitCount->getType()->isIntegerTy() ||
2770         !MaxExitCount->getType()->isIntegerTy())
2771       continue;
2772 
2773     Type *WiderType =
2774       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2775     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2776     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2777     assert(MaxExitCount->getType() == ExitCount->getType());
2778 
2779     // Can we prove that some other exit must be taken strictly before this
2780     // one?
2781     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2782                                      MaxExitCount, ExitCount)) {
2783       FoldExit(ExitingBB, false);
2784       Changed = true;
2785       continue;
2786     }
2787 
2788     // As we run, keep track of which exit counts we've encountered.  If we
2789     // find a duplicate, we've found an exit which would have exited on the
2790     // exiting iteration, but (from the visit order) strictly follows another
2791     // which does the same and is thus dead.
2792     if (!DominatingExitCounts.insert(ExitCount).second) {
2793       FoldExit(ExitingBB, false);
2794       Changed = true;
2795       continue;
2796     }
2797 
2798     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2799     // here.  If we kept track of the min of dominanting exits so far, we could
2800     // discharge exits with EC >= MDEC. This is less powerful than the existing
2801     // transform (since later exits aren't considered), but potentially more
2802     // powerful for any case where SCEV can prove a >=u b, but neither a == b
2803     // or a >u b.  Such a case is not currently known.
2804   }
2805   return Changed;
2806 }
2807 
2808 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2809   SmallVector<BasicBlock*, 16> ExitingBlocks;
2810   L->getExitingBlocks(ExitingBlocks);
2811 
2812   bool Changed = false;
2813 
2814   // Finally, see if we can rewrite our exit conditions into a loop invariant
2815   // form.  If we have a read-only loop, and we can tell that we must exit down
2816   // a path which does not need any of the values computed within the loop, we
2817   // can rewrite the loop to exit on the first iteration.  Note that this
2818   // doesn't either a) tell us the loop exits on the first iteration (unless
2819   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2820   // This transformation looks a lot like a restricted form of dead loop
2821   // elimination, but restricted to read-only loops and without neccesssarily
2822   // needing to kill the loop entirely.
2823   if (!LoopPredication)
2824     return Changed;
2825 
2826   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2827     return Changed;
2828 
2829   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2830   // through *explicit* control flow.  We have to eliminate the possibility of
2831   // implicit exits (see below) before we know it's truly exact.
2832   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2833   if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2834       !SE->isLoopInvariant(ExactBTC, L) ||
2835       !isSafeToExpand(ExactBTC, *SE))
2836     return Changed;
2837 
2838   // If we end up with a pointer exit count, bail.  It may be unsized.
2839   if (!ExactBTC->getType()->isIntegerTy())
2840     return Changed;
2841 
2842   auto BadExit = [&](BasicBlock *ExitingBB) {
2843     // If our exiting block exits multiple loops, we can only rewrite the
2844     // innermost one.  Otherwise, we're changing how many times the innermost
2845     // loop runs before it exits.
2846     if (LI->getLoopFor(ExitingBB) != L)
2847       return true;
2848 
2849     // Can't rewrite non-branch yet.
2850     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2851     if (!BI)
2852       return true;
2853 
2854     // If already constant, nothing to do.
2855     if (isa<Constant>(BI->getCondition()))
2856       return true;
2857 
2858     // If the exit block has phis, we need to be able to compute the values
2859     // within the loop which contains them.  This assumes trivially lcssa phis
2860     // have already been removed; TODO: generalize
2861     BasicBlock *ExitBlock =
2862     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2863     if (!ExitBlock->phis().empty())
2864       return true;
2865 
2866     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2867     assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2868     if (!SE->isLoopInvariant(ExitCount, L) ||
2869         !isSafeToExpand(ExitCount, *SE))
2870       return true;
2871 
2872     // If we end up with a pointer exit count, bail.  It may be unsized.
2873     if (!ExitCount->getType()->isIntegerTy())
2874       return true;
2875 
2876     return false;
2877   };
2878 
2879   // If we have any exits which can't be predicated themselves, than we can't
2880   // predicate any exit which isn't guaranteed to execute before it.  Consider
2881   // two exits (a) and (b) which would both exit on the same iteration.  If we
2882   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2883   // we could convert a loop from exiting through (a) to one exiting through
2884   // (b).  Note that this problem exists only for exits with the same exit
2885   // count, and we could be more aggressive when exit counts are known inequal.
2886   llvm::sort(ExitingBlocks,
2887             [&](BasicBlock *A, BasicBlock *B) {
2888               // std::sort sorts in ascending order, so we want the inverse of
2889               // the normal dominance relation, plus a tie breaker for blocks
2890               // unordered by dominance.
2891               if (DT->properlyDominates(A, B)) return true;
2892               if (DT->properlyDominates(B, A)) return false;
2893               return A->getName() < B->getName();
2894             });
2895   // Check to see if our exit blocks are a total order (i.e. a linear chain of
2896   // exits before the backedge).  If they aren't, reasoning about reachability
2897   // is complicated and we choose not to for now.
2898   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2899     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2900       return Changed;
2901 
2902   // Given our sorted total order, we know that exit[j] must be evaluated
2903   // after all exit[i] such j > i.
2904   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2905     if (BadExit(ExitingBlocks[i])) {
2906       ExitingBlocks.resize(i);
2907       break;
2908     }
2909 
2910   if (ExitingBlocks.empty())
2911     return Changed;
2912 
2913   // We rely on not being able to reach an exiting block on a later iteration
2914   // then it's statically compute exit count.  The implementaton of
2915   // getExitCount currently has this invariant, but assert it here so that
2916   // breakage is obvious if this ever changes..
2917   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2918         return DT->dominates(ExitingBB, L->getLoopLatch());
2919       }));
2920 
2921   // At this point, ExitingBlocks consists of only those blocks which are
2922   // predicatable.  Given that, we know we have at least one exit we can
2923   // predicate if the loop is doesn't have side effects and doesn't have any
2924   // implicit exits (because then our exact BTC isn't actually exact).
2925   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2926   // suggestions on how to improve this?  I can obviously bail out for outer
2927   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2928   // is that enough for *all* side effects?
2929   for (BasicBlock *BB : L->blocks())
2930     for (auto &I : *BB)
2931       // TODO:isGuaranteedToTransfer
2932       if (I.mayHaveSideEffects() || I.mayThrow())
2933         return Changed;
2934 
2935   // Finally, do the actual predication for all predicatable blocks.  A couple
2936   // of notes here:
2937   // 1) We don't bother to constant fold dominated exits with identical exit
2938   //    counts; that's simply a form of CSE/equality propagation and we leave
2939   //    it for dedicated passes.
2940   // 2) We insert the comparison at the branch.  Hoisting introduces additional
2941   //    legality constraints and we leave that to dedicated logic.  We want to
2942   //    predicate even if we can't insert a loop invariant expression as
2943   //    peeling or unrolling will likely reduce the cost of the otherwise loop
2944   //    varying check.
2945   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2946   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2947   Value *ExactBTCV = nullptr; // Lazily generated if needed.
2948   for (BasicBlock *ExitingBB : ExitingBlocks) {
2949     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2950 
2951     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2952     Value *NewCond;
2953     if (ExitCount == ExactBTC) {
2954       NewCond = L->contains(BI->getSuccessor(0)) ?
2955         B.getFalse() : B.getTrue();
2956     } else {
2957       Value *ECV = Rewriter.expandCodeFor(ExitCount);
2958       if (!ExactBTCV)
2959         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2960       Value *RHS = ExactBTCV;
2961       if (ECV->getType() != RHS->getType()) {
2962         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2963         ECV = B.CreateZExt(ECV, WiderTy);
2964         RHS = B.CreateZExt(RHS, WiderTy);
2965       }
2966       auto Pred = L->contains(BI->getSuccessor(0)) ?
2967         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2968       NewCond = B.CreateICmp(Pred, ECV, RHS);
2969     }
2970     Value *OldCond = BI->getCondition();
2971     BI->setCondition(NewCond);
2972     if (OldCond->use_empty())
2973       DeadInsts.push_back(OldCond);
2974     Changed = true;
2975   }
2976 
2977   return Changed;
2978 }
2979 
2980 //===----------------------------------------------------------------------===//
2981 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2982 //===----------------------------------------------------------------------===//
2983 
2984 bool IndVarSimplify::run(Loop *L) {
2985   // We need (and expect!) the incoming loop to be in LCSSA.
2986   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2987          "LCSSA required to run indvars!");
2988   bool Changed = false;
2989 
2990   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2991   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2992   //    canonicalization can be a pessimization without LSR to "clean up"
2993   //    afterwards.
2994   //  - We depend on having a preheader; in particular,
2995   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2996   //    and we're in trouble if we can't find the induction variable even when
2997   //    we've manually inserted one.
2998   //  - LFTR relies on having a single backedge.
2999   if (!L->isLoopSimplifyForm())
3000     return false;
3001 
3002 #ifndef NDEBUG
3003   // Used below for a consistency check only
3004   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
3005 #endif
3006 
3007   // If there are any floating-point recurrences, attempt to
3008   // transform them to use integer recurrences.
3009   Changed |= rewriteNonIntegerIVs(L);
3010 
3011   // Create a rewriter object which we'll use to transform the code with.
3012   SCEVExpander Rewriter(*SE, DL, "indvars");
3013 #ifndef NDEBUG
3014   Rewriter.setDebugType(DEBUG_TYPE);
3015 #endif
3016 
3017   // Eliminate redundant IV users.
3018   //
3019   // Simplification works best when run before other consumers of SCEV. We
3020   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
3021   // other expressions involving loop IVs have been evaluated. This helps SCEV
3022   // set no-wrap flags before normalizing sign/zero extension.
3023   Rewriter.disableCanonicalMode();
3024   Changed |= simplifyAndExtend(L, Rewriter, LI);
3025 
3026   // Check to see if we can compute the final value of any expressions
3027   // that are recurrent in the loop, and substitute the exit values from the
3028   // loop into any instructions outside of the loop that use the final values
3029   // of the current expressions.
3030   if (ReplaceExitValue != NeverRepl)
3031     Changed |= rewriteLoopExitValues(L, Rewriter);
3032 
3033   // Eliminate redundant IV cycles.
3034   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
3035 
3036   // Try to eliminate loop exits based on analyzeable exit counts
3037   if (optimizeLoopExits(L, Rewriter))  {
3038     Changed = true;
3039     // Given we've changed exit counts, notify SCEV
3040     SE->forgetLoop(L);
3041   }
3042 
3043   // Try to form loop invariant tests for loop exits by changing how many
3044   // iterations of the loop run when that is unobservable.
3045   if (predicateLoopExits(L, Rewriter)) {
3046     Changed = true;
3047     // Given we've changed exit counts, notify SCEV
3048     SE->forgetLoop(L);
3049   }
3050 
3051   // If we have a trip count expression, rewrite the loop's exit condition
3052   // using it.
3053   if (!DisableLFTR) {
3054     SmallVector<BasicBlock*, 16> ExitingBlocks;
3055     L->getExitingBlocks(ExitingBlocks);
3056     for (BasicBlock *ExitingBB : ExitingBlocks) {
3057       // Can't rewrite non-branch yet.
3058       if (!isa<BranchInst>(ExitingBB->getTerminator()))
3059         continue;
3060 
3061       // If our exitting block exits multiple loops, we can only rewrite the
3062       // innermost one.  Otherwise, we're changing how many times the innermost
3063       // loop runs before it exits.
3064       if (LI->getLoopFor(ExitingBB) != L)
3065         continue;
3066 
3067       if (!needsLFTR(L, ExitingBB))
3068         continue;
3069 
3070       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
3071       if (isa<SCEVCouldNotCompute>(ExitCount))
3072         continue;
3073 
3074       // This was handled above, but as we form SCEVs, we can sometimes refine
3075       // existing ones; this allows exit counts to be folded to zero which
3076       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
3077       // until stable to handle cases like this better.
3078       if (ExitCount->isZero())
3079         continue;
3080 
3081       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
3082       if (!IndVar)
3083         continue;
3084 
3085       // Avoid high cost expansions.  Note: This heuristic is questionable in
3086       // that our definition of "high cost" is not exactly principled.
3087       if (Rewriter.isHighCostExpansion(ExitCount, L))
3088         continue;
3089 
3090       // Check preconditions for proper SCEVExpander operation. SCEV does not
3091       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
3092       // any pass that uses the SCEVExpander must do it. This does not work
3093       // well for loop passes because SCEVExpander makes assumptions about
3094       // all loops, while LoopPassManager only forces the current loop to be
3095       // simplified.
3096       //
3097       // FIXME: SCEV expansion has no way to bail out, so the caller must
3098       // explicitly check any assumptions made by SCEV. Brittle.
3099       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
3100       if (!AR || AR->getLoop()->getLoopPreheader())
3101         Changed |= linearFunctionTestReplace(L, ExitingBB,
3102                                              ExitCount, IndVar,
3103                                              Rewriter);
3104     }
3105   }
3106   // Clear the rewriter cache, because values that are in the rewriter's cache
3107   // can be deleted in the loop below, causing the AssertingVH in the cache to
3108   // trigger.
3109   Rewriter.clear();
3110 
3111   // Now that we're done iterating through lists, clean up any instructions
3112   // which are now dead.
3113   while (!DeadInsts.empty())
3114     if (Instruction *Inst =
3115             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
3116       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
3117 
3118   // The Rewriter may not be used from this point on.
3119 
3120   // Loop-invariant instructions in the preheader that aren't used in the
3121   // loop may be sunk below the loop to reduce register pressure.
3122   Changed |= sinkUnusedInvariants(L);
3123 
3124   // rewriteFirstIterationLoopExitValues does not rely on the computation of
3125   // trip count and therefore can further simplify exit values in addition to
3126   // rewriteLoopExitValues.
3127   Changed |= rewriteFirstIterationLoopExitValues(L);
3128 
3129   // Clean up dead instructions.
3130   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
3131 
3132   // Check a post-condition.
3133   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3134          "Indvars did not preserve LCSSA!");
3135 
3136   // Verify that LFTR, and any other change have not interfered with SCEV's
3137   // ability to compute trip count.  We may have *changed* the exit count, but
3138   // only by reducing it.
3139 #ifndef NDEBUG
3140   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
3141     SE->forgetLoop(L);
3142     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
3143     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
3144         SE->getTypeSizeInBits(NewBECount->getType()))
3145       NewBECount = SE->getTruncateOrNoop(NewBECount,
3146                                          BackedgeTakenCount->getType());
3147     else
3148       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
3149                                                  NewBECount->getType());
3150     assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
3151                                  NewBECount) && "indvars must preserve SCEV");
3152   }
3153 #endif
3154 
3155   return Changed;
3156 }
3157 
3158 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
3159                                           LoopStandardAnalysisResults &AR,
3160                                           LPMUpdater &) {
3161   Function *F = L.getHeader()->getParent();
3162   const DataLayout &DL = F->getParent()->getDataLayout();
3163 
3164   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
3165   if (!IVS.run(&L))
3166     return PreservedAnalyses::all();
3167 
3168   auto PA = getLoopPassPreservedAnalyses();
3169   PA.preserveSet<CFGAnalyses>();
3170   return PA;
3171 }
3172 
3173 namespace {
3174 
3175 struct IndVarSimplifyLegacyPass : public LoopPass {
3176   static char ID; // Pass identification, replacement for typeid
3177 
3178   IndVarSimplifyLegacyPass() : LoopPass(ID) {
3179     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
3180   }
3181 
3182   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
3183     if (skipLoop(L))
3184       return false;
3185 
3186     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3187     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3188     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3189     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3190     auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
3191     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
3192     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
3193     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
3194 
3195     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
3196     return IVS.run(L);
3197   }
3198 
3199   void getAnalysisUsage(AnalysisUsage &AU) const override {
3200     AU.setPreservesCFG();
3201     getLoopAnalysisUsage(AU);
3202   }
3203 };
3204 
3205 } // end anonymous namespace
3206 
3207 char IndVarSimplifyLegacyPass::ID = 0;
3208 
3209 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
3210                       "Induction Variable Simplification", false, false)
3211 INITIALIZE_PASS_DEPENDENCY(LoopPass)
3212 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
3213                     "Induction Variable Simplification", false, false)
3214 
3215 Pass *llvm::createIndVarSimplifyPass() {
3216   return new IndVarSimplifyLegacyPass();
3217 }
3218