xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision eb24e1491f9900e922c78e53af588f22a3e9535f)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/None.h"
32 #include "llvm/ADT/Optional.h"
33 #include "llvm/ADT/STLExtras.h"
34 #include "llvm/ADT/SmallSet.h"
35 #include "llvm/ADT/SmallPtrSet.h"
36 #include "llvm/ADT/SmallVector.h"
37 #include "llvm/ADT/Statistic.h"
38 #include "llvm/ADT/iterator_range.h"
39 #include "llvm/Analysis/LoopInfo.h"
40 #include "llvm/Analysis/LoopPass.h"
41 #include "llvm/Analysis/ScalarEvolution.h"
42 #include "llvm/Analysis/ScalarEvolutionExpander.h"
43 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
44 #include "llvm/Analysis/TargetLibraryInfo.h"
45 #include "llvm/Analysis/TargetTransformInfo.h"
46 #include "llvm/Analysis/ValueTracking.h"
47 #include "llvm/Transforms/Utils/Local.h"
48 #include "llvm/IR/BasicBlock.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/ConstantRange.h"
51 #include "llvm/IR/Constants.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Dominators.h"
55 #include "llvm/IR/Function.h"
56 #include "llvm/IR/IRBuilder.h"
57 #include "llvm/IR/InstrTypes.h"
58 #include "llvm/IR/Instruction.h"
59 #include "llvm/IR/Instructions.h"
60 #include "llvm/IR/IntrinsicInst.h"
61 #include "llvm/IR/Intrinsics.h"
62 #include "llvm/IR/Module.h"
63 #include "llvm/IR/Operator.h"
64 #include "llvm/IR/PassManager.h"
65 #include "llvm/IR/PatternMatch.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/Use.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/IR/Value.h"
70 #include "llvm/IR/ValueHandle.h"
71 #include "llvm/Pass.h"
72 #include "llvm/Support/Casting.h"
73 #include "llvm/Support/CommandLine.h"
74 #include "llvm/Support/Compiler.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Support/ErrorHandling.h"
77 #include "llvm/Support/MathExtras.h"
78 #include "llvm/Support/raw_ostream.h"
79 #include "llvm/Transforms/Scalar.h"
80 #include "llvm/Transforms/Scalar/LoopPassManager.h"
81 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
82 #include "llvm/Transforms/Utils/LoopUtils.h"
83 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "indvars"
91 
92 STATISTIC(NumWidened     , "Number of indvars widened");
93 STATISTIC(NumReplaced    , "Number of exit values replaced");
94 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
95 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
96 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
97 
98 // Trip count verification can be enabled by default under NDEBUG if we
99 // implement a strong expression equivalence checker in SCEV. Until then, we
100 // use the verify-indvars flag, which may assert in some cases.
101 static cl::opt<bool> VerifyIndvars(
102   "verify-indvars", cl::Hidden,
103   cl::desc("Verify the ScalarEvolution result after running indvars"));
104 
105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
106 
107 static cl::opt<ReplaceExitVal> ReplaceExitValue(
108     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
109     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
110     cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
111                clEnumValN(OnlyCheapRepl, "cheap",
112                           "only replace exit value when the cost is cheap"),
113                clEnumValN(NoHardUse, "noharduse",
114                           "only replace exit values when loop def likely dead"),
115                clEnumValN(AlwaysRepl, "always",
116                           "always replace exit value whenever possible")));
117 
118 static cl::opt<bool> UsePostIncrementRanges(
119   "indvars-post-increment-ranges", cl::Hidden,
120   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
121   cl::init(true));
122 
123 static cl::opt<bool>
124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
125             cl::desc("Disable Linear Function Test Replace optimization"));
126 
127 namespace {
128 
129 struct RewritePhi;
130 
131 class IndVarSimplify {
132   LoopInfo *LI;
133   ScalarEvolution *SE;
134   DominatorTree *DT;
135   const DataLayout &DL;
136   TargetLibraryInfo *TLI;
137   const TargetTransformInfo *TTI;
138 
139   SmallVector<WeakTrackingVH, 16> DeadInsts;
140 
141   bool isValidRewrite(Value *FromVal, Value *ToVal);
142 
143   bool handleFloatingPointIV(Loop *L, PHINode *PH);
144   bool rewriteNonIntegerIVs(Loop *L);
145 
146   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147   bool optimizeLoopExits(Loop *L);
148 
149   bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
150   bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
151   bool rewriteFirstIterationLoopExitValues(Loop *L);
152   bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
153 
154   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
155                                  const SCEV *ExitCount,
156                                  PHINode *IndVar, SCEVExpander &Rewriter);
157 
158   bool sinkUnusedInvariants(Loop *L);
159 
160 public:
161   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
162                  const DataLayout &DL, TargetLibraryInfo *TLI,
163                  TargetTransformInfo *TTI)
164       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
165 
166   bool run(Loop *L);
167 };
168 
169 } // end anonymous namespace
170 
171 /// Return true if the SCEV expansion generated by the rewriter can replace the
172 /// original value. SCEV guarantees that it produces the same value, but the way
173 /// it is produced may be illegal IR.  Ideally, this function will only be
174 /// called for verification.
175 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
176   // If an SCEV expression subsumed multiple pointers, its expansion could
177   // reassociate the GEP changing the base pointer. This is illegal because the
178   // final address produced by a GEP chain must be inbounds relative to its
179   // underlying object. Otherwise basic alias analysis, among other things,
180   // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
181   // producing an expression involving multiple pointers. Until then, we must
182   // bail out here.
183   //
184   // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
185   // because it understands lcssa phis while SCEV does not.
186   Value *FromPtr = FromVal;
187   Value *ToPtr = ToVal;
188   if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
189     FromPtr = GEP->getPointerOperand();
190   }
191   if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
192     ToPtr = GEP->getPointerOperand();
193   }
194   if (FromPtr != FromVal || ToPtr != ToVal) {
195     // Quickly check the common case
196     if (FromPtr == ToPtr)
197       return true;
198 
199     // SCEV may have rewritten an expression that produces the GEP's pointer
200     // operand. That's ok as long as the pointer operand has the same base
201     // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
202     // base of a recurrence. This handles the case in which SCEV expansion
203     // converts a pointer type recurrence into a nonrecurrent pointer base
204     // indexed by an integer recurrence.
205 
206     // If the GEP base pointer is a vector of pointers, abort.
207     if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
208       return false;
209 
210     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
211     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
212     if (FromBase == ToBase)
213       return true;
214 
215     LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
216                       << " != " << *ToBase << "\n");
217 
218     return false;
219   }
220   return true;
221 }
222 
223 /// Determine the insertion point for this user. By default, insert immediately
224 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
225 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
226 /// common dominator for the incoming blocks. A nullptr can be returned if no
227 /// viable location is found: it may happen if User is a PHI and Def only comes
228 /// to this PHI from unreachable blocks.
229 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
230                                           DominatorTree *DT, LoopInfo *LI) {
231   PHINode *PHI = dyn_cast<PHINode>(User);
232   if (!PHI)
233     return User;
234 
235   Instruction *InsertPt = nullptr;
236   for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
237     if (PHI->getIncomingValue(i) != Def)
238       continue;
239 
240     BasicBlock *InsertBB = PHI->getIncomingBlock(i);
241 
242     if (!DT->isReachableFromEntry(InsertBB))
243       continue;
244 
245     if (!InsertPt) {
246       InsertPt = InsertBB->getTerminator();
247       continue;
248     }
249     InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
250     InsertPt = InsertBB->getTerminator();
251   }
252 
253   // If we have skipped all inputs, it means that Def only comes to Phi from
254   // unreachable blocks.
255   if (!InsertPt)
256     return nullptr;
257 
258   auto *DefI = dyn_cast<Instruction>(Def);
259   if (!DefI)
260     return InsertPt;
261 
262   assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
263 
264   auto *L = LI->getLoopFor(DefI->getParent());
265   assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
266 
267   for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
268     if (LI->getLoopFor(DTN->getBlock()) == L)
269       return DTN->getBlock()->getTerminator();
270 
271   llvm_unreachable("DefI dominates InsertPt!");
272 }
273 
274 //===----------------------------------------------------------------------===//
275 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
276 //===----------------------------------------------------------------------===//
277 
278 /// Convert APF to an integer, if possible.
279 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
280   bool isExact = false;
281   // See if we can convert this to an int64_t
282   uint64_t UIntVal;
283   if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
284                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
285       !isExact)
286     return false;
287   IntVal = UIntVal;
288   return true;
289 }
290 
291 /// If the loop has floating induction variable then insert corresponding
292 /// integer induction variable if possible.
293 /// For example,
294 /// for(double i = 0; i < 10000; ++i)
295 ///   bar(i)
296 /// is converted into
297 /// for(int i = 0; i < 10000; ++i)
298 ///   bar((double)i);
299 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
300   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
301   unsigned BackEdge     = IncomingEdge^1;
302 
303   // Check incoming value.
304   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
305 
306   int64_t InitValue;
307   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
308     return false;
309 
310   // Check IV increment. Reject this PN if increment operation is not
311   // an add or increment value can not be represented by an integer.
312   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
313   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
314 
315   // If this is not an add of the PHI with a constantfp, or if the constant fp
316   // is not an integer, bail out.
317   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
318   int64_t IncValue;
319   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
320       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
321     return false;
322 
323   // Check Incr uses. One user is PN and the other user is an exit condition
324   // used by the conditional terminator.
325   Value::user_iterator IncrUse = Incr->user_begin();
326   Instruction *U1 = cast<Instruction>(*IncrUse++);
327   if (IncrUse == Incr->user_end()) return false;
328   Instruction *U2 = cast<Instruction>(*IncrUse++);
329   if (IncrUse != Incr->user_end()) return false;
330 
331   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
332   // only used by a branch, we can't transform it.
333   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
334   if (!Compare)
335     Compare = dyn_cast<FCmpInst>(U2);
336   if (!Compare || !Compare->hasOneUse() ||
337       !isa<BranchInst>(Compare->user_back()))
338     return false;
339 
340   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
341 
342   // We need to verify that the branch actually controls the iteration count
343   // of the loop.  If not, the new IV can overflow and no one will notice.
344   // The branch block must be in the loop and one of the successors must be out
345   // of the loop.
346   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
347   if (!L->contains(TheBr->getParent()) ||
348       (L->contains(TheBr->getSuccessor(0)) &&
349        L->contains(TheBr->getSuccessor(1))))
350     return false;
351 
352   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
353   // transform it.
354   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
355   int64_t ExitValue;
356   if (ExitValueVal == nullptr ||
357       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
358     return false;
359 
360   // Find new predicate for integer comparison.
361   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
362   switch (Compare->getPredicate()) {
363   default: return false;  // Unknown comparison.
364   case CmpInst::FCMP_OEQ:
365   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
366   case CmpInst::FCMP_ONE:
367   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
368   case CmpInst::FCMP_OGT:
369   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
370   case CmpInst::FCMP_OGE:
371   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
372   case CmpInst::FCMP_OLT:
373   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
374   case CmpInst::FCMP_OLE:
375   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
376   }
377 
378   // We convert the floating point induction variable to a signed i32 value if
379   // we can.  This is only safe if the comparison will not overflow in a way
380   // that won't be trapped by the integer equivalent operations.  Check for this
381   // now.
382   // TODO: We could use i64 if it is native and the range requires it.
383 
384   // The start/stride/exit values must all fit in signed i32.
385   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
386     return false;
387 
388   // If not actually striding (add x, 0.0), avoid touching the code.
389   if (IncValue == 0)
390     return false;
391 
392   // Positive and negative strides have different safety conditions.
393   if (IncValue > 0) {
394     // If we have a positive stride, we require the init to be less than the
395     // exit value.
396     if (InitValue >= ExitValue)
397       return false;
398 
399     uint32_t Range = uint32_t(ExitValue-InitValue);
400     // Check for infinite loop, either:
401     // while (i <= Exit) or until (i > Exit)
402     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
403       if (++Range == 0) return false;  // Range overflows.
404     }
405 
406     unsigned Leftover = Range % uint32_t(IncValue);
407 
408     // If this is an equality comparison, we require that the strided value
409     // exactly land on the exit value, otherwise the IV condition will wrap
410     // around and do things the fp IV wouldn't.
411     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
412         Leftover != 0)
413       return false;
414 
415     // If the stride would wrap around the i32 before exiting, we can't
416     // transform the IV.
417     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
418       return false;
419   } else {
420     // If we have a negative stride, we require the init to be greater than the
421     // exit value.
422     if (InitValue <= ExitValue)
423       return false;
424 
425     uint32_t Range = uint32_t(InitValue-ExitValue);
426     // Check for infinite loop, either:
427     // while (i >= Exit) or until (i < Exit)
428     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
429       if (++Range == 0) return false;  // Range overflows.
430     }
431 
432     unsigned Leftover = Range % uint32_t(-IncValue);
433 
434     // If this is an equality comparison, we require that the strided value
435     // exactly land on the exit value, otherwise the IV condition will wrap
436     // around and do things the fp IV wouldn't.
437     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
438         Leftover != 0)
439       return false;
440 
441     // If the stride would wrap around the i32 before exiting, we can't
442     // transform the IV.
443     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
444       return false;
445   }
446 
447   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
448 
449   // Insert new integer induction variable.
450   PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
451   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
452                       PN->getIncomingBlock(IncomingEdge));
453 
454   Value *NewAdd =
455     BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
456                               Incr->getName()+".int", Incr);
457   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
458 
459   ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
460                                       ConstantInt::get(Int32Ty, ExitValue),
461                                       Compare->getName());
462 
463   // In the following deletions, PN may become dead and may be deleted.
464   // Use a WeakTrackingVH to observe whether this happens.
465   WeakTrackingVH WeakPH = PN;
466 
467   // Delete the old floating point exit comparison.  The branch starts using the
468   // new comparison.
469   NewCompare->takeName(Compare);
470   Compare->replaceAllUsesWith(NewCompare);
471   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
472 
473   // Delete the old floating point increment.
474   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
475   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
476 
477   // If the FP induction variable still has uses, this is because something else
478   // in the loop uses its value.  In order to canonicalize the induction
479   // variable, we chose to eliminate the IV and rewrite it in terms of an
480   // int->fp cast.
481   //
482   // We give preference to sitofp over uitofp because it is faster on most
483   // platforms.
484   if (WeakPH) {
485     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
486                                  &*PN->getParent()->getFirstInsertionPt());
487     PN->replaceAllUsesWith(Conv);
488     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
489   }
490   return true;
491 }
492 
493 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
494   // First step.  Check to see if there are any floating-point recurrences.
495   // If there are, change them into integer recurrences, permitting analysis by
496   // the SCEV routines.
497   BasicBlock *Header = L->getHeader();
498 
499   SmallVector<WeakTrackingVH, 8> PHIs;
500   for (PHINode &PN : Header->phis())
501     PHIs.push_back(&PN);
502 
503   bool Changed = false;
504   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
505     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
506       Changed |= handleFloatingPointIV(L, PN);
507 
508   // If the loop previously had floating-point IV, ScalarEvolution
509   // may not have been able to compute a trip count. Now that we've done some
510   // re-writing, the trip count may be computable.
511   if (Changed)
512     SE->forgetLoop(L);
513   return Changed;
514 }
515 
516 namespace {
517 
518 // Collect information about PHI nodes which can be transformed in
519 // rewriteLoopExitValues.
520 struct RewritePhi {
521   PHINode *PN;
522 
523   // Ith incoming value.
524   unsigned Ith;
525 
526   // Exit value after expansion.
527   Value *Val;
528 
529   // High Cost when expansion.
530   bool HighCost;
531 
532   RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
533       : PN(P), Ith(I), Val(V), HighCost(H) {}
534 };
535 
536 } // end anonymous namespace
537 
538 //===----------------------------------------------------------------------===//
539 // rewriteLoopExitValues - Optimize IV users outside the loop.
540 // As a side effect, reduces the amount of IV processing within the loop.
541 //===----------------------------------------------------------------------===//
542 
543 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
544   SmallPtrSet<const Instruction *, 8> Visited;
545   SmallVector<const Instruction *, 8> WorkList;
546   Visited.insert(I);
547   WorkList.push_back(I);
548   while (!WorkList.empty()) {
549     const Instruction *Curr = WorkList.pop_back_val();
550     // This use is outside the loop, nothing to do.
551     if (!L->contains(Curr))
552       continue;
553     // Do we assume it is a "hard" use which will not be eliminated easily?
554     if (Curr->mayHaveSideEffects())
555       return true;
556     // Otherwise, add all its users to worklist.
557     for (auto U : Curr->users()) {
558       auto *UI = cast<Instruction>(U);
559       if (Visited.insert(UI).second)
560         WorkList.push_back(UI);
561     }
562   }
563   return false;
564 }
565 
566 /// Check to see if this loop has a computable loop-invariant execution count.
567 /// If so, this means that we can compute the final value of any expressions
568 /// that are recurrent in the loop, and substitute the exit values from the loop
569 /// into any instructions outside of the loop that use the final values of the
570 /// current expressions.
571 ///
572 /// This is mostly redundant with the regular IndVarSimplify activities that
573 /// happen later, except that it's more powerful in some cases, because it's
574 /// able to brute-force evaluate arbitrary instructions as long as they have
575 /// constant operands at the beginning of the loop.
576 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
577   // Check a pre-condition.
578   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
579          "Indvars did not preserve LCSSA!");
580 
581   SmallVector<BasicBlock*, 8> ExitBlocks;
582   L->getUniqueExitBlocks(ExitBlocks);
583 
584   SmallVector<RewritePhi, 8> RewritePhiSet;
585   // Find all values that are computed inside the loop, but used outside of it.
586   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
587   // the exit blocks of the loop to find them.
588   for (BasicBlock *ExitBB : ExitBlocks) {
589     // If there are no PHI nodes in this exit block, then no values defined
590     // inside the loop are used on this path, skip it.
591     PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
592     if (!PN) continue;
593 
594     unsigned NumPreds = PN->getNumIncomingValues();
595 
596     // Iterate over all of the PHI nodes.
597     BasicBlock::iterator BBI = ExitBB->begin();
598     while ((PN = dyn_cast<PHINode>(BBI++))) {
599       if (PN->use_empty())
600         continue; // dead use, don't replace it
601 
602       if (!SE->isSCEVable(PN->getType()))
603         continue;
604 
605       // It's necessary to tell ScalarEvolution about this explicitly so that
606       // it can walk the def-use list and forget all SCEVs, as it may not be
607       // watching the PHI itself. Once the new exit value is in place, there
608       // may not be a def-use connection between the loop and every instruction
609       // which got a SCEVAddRecExpr for that loop.
610       SE->forgetValue(PN);
611 
612       // Iterate over all of the values in all the PHI nodes.
613       for (unsigned i = 0; i != NumPreds; ++i) {
614         // If the value being merged in is not integer or is not defined
615         // in the loop, skip it.
616         Value *InVal = PN->getIncomingValue(i);
617         if (!isa<Instruction>(InVal))
618           continue;
619 
620         // If this pred is for a subloop, not L itself, skip it.
621         if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
622           continue; // The Block is in a subloop, skip it.
623 
624         // Check that InVal is defined in the loop.
625         Instruction *Inst = cast<Instruction>(InVal);
626         if (!L->contains(Inst))
627           continue;
628 
629         // Okay, this instruction has a user outside of the current loop
630         // and varies predictably *inside* the loop.  Evaluate the value it
631         // contains when the loop exits, if possible.
632         const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
633         if (!SE->isLoopInvariant(ExitValue, L) ||
634             !isSafeToExpand(ExitValue, *SE))
635           continue;
636 
637         // Computing the value outside of the loop brings no benefit if it is
638         // definitely used inside the loop in a way which can not be optimized
639         // away.  Avoid doing so unless we know we have a value which computes
640         // the ExitValue already.  TODO: This should be merged into SCEV
641         // expander to leverage its knowledge of existing expressions.
642         if (ReplaceExitValue != AlwaysRepl &&
643             !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
644             hasHardUserWithinLoop(L, Inst))
645           continue;
646 
647         bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
648         Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
649 
650         LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
651                           << '\n'
652                           << "  LoopVal = " << *Inst << "\n");
653 
654         if (!isValidRewrite(Inst, ExitVal)) {
655           DeadInsts.push_back(ExitVal);
656           continue;
657         }
658 
659 #ifndef NDEBUG
660         // If we reuse an instruction from a loop which is neither L nor one of
661         // its containing loops, we end up breaking LCSSA form for this loop by
662         // creating a new use of its instruction.
663         if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
664           if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
665             if (EVL != L)
666               assert(EVL->contains(L) && "LCSSA breach detected!");
667 #endif
668 
669         // Collect all the candidate PHINodes to be rewritten.
670         RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
671       }
672     }
673   }
674 
675   bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
676 
677   bool Changed = false;
678   // Transformation.
679   for (const RewritePhi &Phi : RewritePhiSet) {
680     PHINode *PN = Phi.PN;
681     Value *ExitVal = Phi.Val;
682 
683     // Only do the rewrite when the ExitValue can be expanded cheaply.
684     // If LoopCanBeDel is true, rewrite exit value aggressively.
685     if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
686       DeadInsts.push_back(ExitVal);
687       continue;
688     }
689 
690     Changed = true;
691     ++NumReplaced;
692     Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
693     PN->setIncomingValue(Phi.Ith, ExitVal);
694 
695     // If this instruction is dead now, delete it. Don't do it now to avoid
696     // invalidating iterators.
697     if (isInstructionTriviallyDead(Inst, TLI))
698       DeadInsts.push_back(Inst);
699 
700     // Replace PN with ExitVal if that is legal and does not break LCSSA.
701     if (PN->getNumIncomingValues() == 1 &&
702         LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
703       PN->replaceAllUsesWith(ExitVal);
704       PN->eraseFromParent();
705     }
706   }
707 
708   // The insertion point instruction may have been deleted; clear it out
709   // so that the rewriter doesn't trip over it later.
710   Rewriter.clearInsertPoint();
711   return Changed;
712 }
713 
714 //===---------------------------------------------------------------------===//
715 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
716 // they will exit at the first iteration.
717 //===---------------------------------------------------------------------===//
718 
719 /// Check to see if this loop has loop invariant conditions which lead to loop
720 /// exits. If so, we know that if the exit path is taken, it is at the first
721 /// loop iteration. This lets us predict exit values of PHI nodes that live in
722 /// loop header.
723 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
724   // Verify the input to the pass is already in LCSSA form.
725   assert(L->isLCSSAForm(*DT));
726 
727   SmallVector<BasicBlock *, 8> ExitBlocks;
728   L->getUniqueExitBlocks(ExitBlocks);
729 
730   bool MadeAnyChanges = false;
731   for (auto *ExitBB : ExitBlocks) {
732     // If there are no more PHI nodes in this exit block, then no more
733     // values defined inside the loop are used on this path.
734     for (PHINode &PN : ExitBB->phis()) {
735       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
736            IncomingValIdx != E; ++IncomingValIdx) {
737         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
738 
739         // Can we prove that the exit must run on the first iteration if it
740         // runs at all?  (i.e. early exits are fine for our purposes, but
741         // traces which lead to this exit being taken on the 2nd iteration
742         // aren't.)  Note that this is about whether the exit branch is
743         // executed, not about whether it is taken.
744         if (!L->getLoopLatch() ||
745             !DT->dominates(IncomingBB, L->getLoopLatch()))
746           continue;
747 
748         // Get condition that leads to the exit path.
749         auto *TermInst = IncomingBB->getTerminator();
750 
751         Value *Cond = nullptr;
752         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
753           // Must be a conditional branch, otherwise the block
754           // should not be in the loop.
755           Cond = BI->getCondition();
756         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
757           Cond = SI->getCondition();
758         else
759           continue;
760 
761         if (!L->isLoopInvariant(Cond))
762           continue;
763 
764         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
765 
766         // Only deal with PHIs in the loop header.
767         if (!ExitVal || ExitVal->getParent() != L->getHeader())
768           continue;
769 
770         // If ExitVal is a PHI on the loop header, then we know its
771         // value along this exit because the exit can only be taken
772         // on the first iteration.
773         auto *LoopPreheader = L->getLoopPreheader();
774         assert(LoopPreheader && "Invalid loop");
775         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
776         if (PreheaderIdx != -1) {
777           assert(ExitVal->getParent() == L->getHeader() &&
778                  "ExitVal must be in loop header");
779           MadeAnyChanges = true;
780           PN.setIncomingValue(IncomingValIdx,
781                               ExitVal->getIncomingValue(PreheaderIdx));
782         }
783       }
784     }
785   }
786   return MadeAnyChanges;
787 }
788 
789 /// Check whether it is possible to delete the loop after rewriting exit
790 /// value. If it is possible, ignore ReplaceExitValue and do rewriting
791 /// aggressively.
792 bool IndVarSimplify::canLoopBeDeleted(
793     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
794   BasicBlock *Preheader = L->getLoopPreheader();
795   // If there is no preheader, the loop will not be deleted.
796   if (!Preheader)
797     return false;
798 
799   // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
800   // We obviate multiple ExitingBlocks case for simplicity.
801   // TODO: If we see testcase with multiple ExitingBlocks can be deleted
802   // after exit value rewriting, we can enhance the logic here.
803   SmallVector<BasicBlock *, 4> ExitingBlocks;
804   L->getExitingBlocks(ExitingBlocks);
805   SmallVector<BasicBlock *, 8> ExitBlocks;
806   L->getUniqueExitBlocks(ExitBlocks);
807   if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1)
808     return false;
809 
810   BasicBlock *ExitBlock = ExitBlocks[0];
811   BasicBlock::iterator BI = ExitBlock->begin();
812   while (PHINode *P = dyn_cast<PHINode>(BI)) {
813     Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
814 
815     // If the Incoming value of P is found in RewritePhiSet, we know it
816     // could be rewritten to use a loop invariant value in transformation
817     // phase later. Skip it in the loop invariant check below.
818     bool found = false;
819     for (const RewritePhi &Phi : RewritePhiSet) {
820       unsigned i = Phi.Ith;
821       if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
822         found = true;
823         break;
824       }
825     }
826 
827     Instruction *I;
828     if (!found && (I = dyn_cast<Instruction>(Incoming)))
829       if (!L->hasLoopInvariantOperands(I))
830         return false;
831 
832     ++BI;
833   }
834 
835   for (auto *BB : L->blocks())
836     if (llvm::any_of(*BB, [](Instruction &I) {
837           return I.mayHaveSideEffects();
838         }))
839       return false;
840 
841   return true;
842 }
843 
844 //===----------------------------------------------------------------------===//
845 //  IV Widening - Extend the width of an IV to cover its widest uses.
846 //===----------------------------------------------------------------------===//
847 
848 namespace {
849 
850 // Collect information about induction variables that are used by sign/zero
851 // extend operations. This information is recorded by CollectExtend and provides
852 // the input to WidenIV.
853 struct WideIVInfo {
854   PHINode *NarrowIV = nullptr;
855 
856   // Widest integer type created [sz]ext
857   Type *WidestNativeType = nullptr;
858 
859   // Was a sext user seen before a zext?
860   bool IsSigned = false;
861 };
862 
863 } // end anonymous namespace
864 
865 /// Update information about the induction variable that is extended by this
866 /// sign or zero extend operation. This is used to determine the final width of
867 /// the IV before actually widening it.
868 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
869                         const TargetTransformInfo *TTI) {
870   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
871   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
872     return;
873 
874   Type *Ty = Cast->getType();
875   uint64_t Width = SE->getTypeSizeInBits(Ty);
876   if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
877     return;
878 
879   // Check that `Cast` actually extends the induction variable (we rely on this
880   // later).  This takes care of cases where `Cast` is extending a truncation of
881   // the narrow induction variable, and thus can end up being narrower than the
882   // "narrow" induction variable.
883   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
884   if (NarrowIVWidth >= Width)
885     return;
886 
887   // Cast is either an sext or zext up to this point.
888   // We should not widen an indvar if arithmetics on the wider indvar are more
889   // expensive than those on the narrower indvar. We check only the cost of ADD
890   // because at least an ADD is required to increment the induction variable. We
891   // could compute more comprehensively the cost of all instructions on the
892   // induction variable when necessary.
893   if (TTI &&
894       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
895           TTI->getArithmeticInstrCost(Instruction::Add,
896                                       Cast->getOperand(0)->getType())) {
897     return;
898   }
899 
900   if (!WI.WidestNativeType) {
901     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
902     WI.IsSigned = IsSigned;
903     return;
904   }
905 
906   // We extend the IV to satisfy the sign of its first user, arbitrarily.
907   if (WI.IsSigned != IsSigned)
908     return;
909 
910   if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
911     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
912 }
913 
914 namespace {
915 
916 /// Record a link in the Narrow IV def-use chain along with the WideIV that
917 /// computes the same value as the Narrow IV def.  This avoids caching Use*
918 /// pointers.
919 struct NarrowIVDefUse {
920   Instruction *NarrowDef = nullptr;
921   Instruction *NarrowUse = nullptr;
922   Instruction *WideDef = nullptr;
923 
924   // True if the narrow def is never negative.  Tracking this information lets
925   // us use a sign extension instead of a zero extension or vice versa, when
926   // profitable and legal.
927   bool NeverNegative = false;
928 
929   NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
930                  bool NeverNegative)
931       : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
932         NeverNegative(NeverNegative) {}
933 };
934 
935 /// The goal of this transform is to remove sign and zero extends without
936 /// creating any new induction variables. To do this, it creates a new phi of
937 /// the wider type and redirects all users, either removing extends or inserting
938 /// truncs whenever we stop propagating the type.
939 class WidenIV {
940   // Parameters
941   PHINode *OrigPhi;
942   Type *WideType;
943 
944   // Context
945   LoopInfo        *LI;
946   Loop            *L;
947   ScalarEvolution *SE;
948   DominatorTree   *DT;
949 
950   // Does the module have any calls to the llvm.experimental.guard intrinsic
951   // at all? If not we can avoid scanning instructions looking for guards.
952   bool HasGuards;
953 
954   // Result
955   PHINode *WidePhi = nullptr;
956   Instruction *WideInc = nullptr;
957   const SCEV *WideIncExpr = nullptr;
958   SmallVectorImpl<WeakTrackingVH> &DeadInsts;
959 
960   SmallPtrSet<Instruction *,16> Widened;
961   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
962 
963   enum ExtendKind { ZeroExtended, SignExtended, Unknown };
964 
965   // A map tracking the kind of extension used to widen each narrow IV
966   // and narrow IV user.
967   // Key: pointer to a narrow IV or IV user.
968   // Value: the kind of extension used to widen this Instruction.
969   DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
970 
971   using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
972 
973   // A map with control-dependent ranges for post increment IV uses. The key is
974   // a pair of IV def and a use of this def denoting the context. The value is
975   // a ConstantRange representing possible values of the def at the given
976   // context.
977   DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
978 
979   Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
980                                               Instruction *UseI) {
981     DefUserPair Key(Def, UseI);
982     auto It = PostIncRangeInfos.find(Key);
983     return It == PostIncRangeInfos.end()
984                ? Optional<ConstantRange>(None)
985                : Optional<ConstantRange>(It->second);
986   }
987 
988   void calculatePostIncRanges(PHINode *OrigPhi);
989   void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
990 
991   void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
992     DefUserPair Key(Def, UseI);
993     auto It = PostIncRangeInfos.find(Key);
994     if (It == PostIncRangeInfos.end())
995       PostIncRangeInfos.insert({Key, R});
996     else
997       It->second = R.intersectWith(It->second);
998   }
999 
1000 public:
1001   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1002           DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1003           bool HasGuards)
1004       : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1005         L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1006         HasGuards(HasGuards), DeadInsts(DI) {
1007     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1008     ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1009   }
1010 
1011   PHINode *createWideIV(SCEVExpander &Rewriter);
1012 
1013 protected:
1014   Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1015                           Instruction *Use);
1016 
1017   Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1018   Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1019                                      const SCEVAddRecExpr *WideAR);
1020   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1021 
1022   ExtendKind getExtendKind(Instruction *I);
1023 
1024   using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1025 
1026   WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1027 
1028   WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1029 
1030   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1031                               unsigned OpCode) const;
1032 
1033   Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1034 
1035   bool widenLoopCompare(NarrowIVDefUse DU);
1036   bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1037   void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1038 
1039   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1040 };
1041 
1042 } // end anonymous namespace
1043 
1044 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1045                                  bool IsSigned, Instruction *Use) {
1046   // Set the debug location and conservative insertion point.
1047   IRBuilder<> Builder(Use);
1048   // Hoist the insertion point into loop preheaders as far as possible.
1049   for (const Loop *L = LI->getLoopFor(Use->getParent());
1050        L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1051        L = L->getParentLoop())
1052     Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1053 
1054   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1055                     Builder.CreateZExt(NarrowOper, WideType);
1056 }
1057 
1058 /// Instantiate a wide operation to replace a narrow operation. This only needs
1059 /// to handle operations that can evaluation to SCEVAddRec. It can safely return
1060 /// 0 for any operation we decide not to clone.
1061 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1062                                   const SCEVAddRecExpr *WideAR) {
1063   unsigned Opcode = DU.NarrowUse->getOpcode();
1064   switch (Opcode) {
1065   default:
1066     return nullptr;
1067   case Instruction::Add:
1068   case Instruction::Mul:
1069   case Instruction::UDiv:
1070   case Instruction::Sub:
1071     return cloneArithmeticIVUser(DU, WideAR);
1072 
1073   case Instruction::And:
1074   case Instruction::Or:
1075   case Instruction::Xor:
1076   case Instruction::Shl:
1077   case Instruction::LShr:
1078   case Instruction::AShr:
1079     return cloneBitwiseIVUser(DU);
1080   }
1081 }
1082 
1083 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1084   Instruction *NarrowUse = DU.NarrowUse;
1085   Instruction *NarrowDef = DU.NarrowDef;
1086   Instruction *WideDef = DU.WideDef;
1087 
1088   LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1089 
1090   // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1091   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1092   // invariant and will be folded or hoisted. If it actually comes from a
1093   // widened IV, it should be removed during a future call to widenIVUse.
1094   bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1095   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1096                    ? WideDef
1097                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1098                                       IsSigned, NarrowUse);
1099   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1100                    ? WideDef
1101                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1102                                       IsSigned, NarrowUse);
1103 
1104   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1105   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1106                                         NarrowBO->getName());
1107   IRBuilder<> Builder(NarrowUse);
1108   Builder.Insert(WideBO);
1109   WideBO->copyIRFlags(NarrowBO);
1110   return WideBO;
1111 }
1112 
1113 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1114                                             const SCEVAddRecExpr *WideAR) {
1115   Instruction *NarrowUse = DU.NarrowUse;
1116   Instruction *NarrowDef = DU.NarrowDef;
1117   Instruction *WideDef = DU.WideDef;
1118 
1119   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1120 
1121   unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1122 
1123   // We're trying to find X such that
1124   //
1125   //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1126   //
1127   // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1128   // and check using SCEV if any of them are correct.
1129 
1130   // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1131   // correct solution to X.
1132   auto GuessNonIVOperand = [&](bool SignExt) {
1133     const SCEV *WideLHS;
1134     const SCEV *WideRHS;
1135 
1136     auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1137       if (SignExt)
1138         return SE->getSignExtendExpr(S, Ty);
1139       return SE->getZeroExtendExpr(S, Ty);
1140     };
1141 
1142     if (IVOpIdx == 0) {
1143       WideLHS = SE->getSCEV(WideDef);
1144       const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1145       WideRHS = GetExtend(NarrowRHS, WideType);
1146     } else {
1147       const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1148       WideLHS = GetExtend(NarrowLHS, WideType);
1149       WideRHS = SE->getSCEV(WideDef);
1150     }
1151 
1152     // WideUse is "WideDef `op.wide` X" as described in the comment.
1153     const SCEV *WideUse = nullptr;
1154 
1155     switch (NarrowUse->getOpcode()) {
1156     default:
1157       llvm_unreachable("No other possibility!");
1158 
1159     case Instruction::Add:
1160       WideUse = SE->getAddExpr(WideLHS, WideRHS);
1161       break;
1162 
1163     case Instruction::Mul:
1164       WideUse = SE->getMulExpr(WideLHS, WideRHS);
1165       break;
1166 
1167     case Instruction::UDiv:
1168       WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1169       break;
1170 
1171     case Instruction::Sub:
1172       WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1173       break;
1174     }
1175 
1176     return WideUse == WideAR;
1177   };
1178 
1179   bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1180   if (!GuessNonIVOperand(SignExtend)) {
1181     SignExtend = !SignExtend;
1182     if (!GuessNonIVOperand(SignExtend))
1183       return nullptr;
1184   }
1185 
1186   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1187                    ? WideDef
1188                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1189                                       SignExtend, NarrowUse);
1190   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1191                    ? WideDef
1192                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1193                                       SignExtend, NarrowUse);
1194 
1195   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1196   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1197                                         NarrowBO->getName());
1198 
1199   IRBuilder<> Builder(NarrowUse);
1200   Builder.Insert(WideBO);
1201   WideBO->copyIRFlags(NarrowBO);
1202   return WideBO;
1203 }
1204 
1205 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1206   auto It = ExtendKindMap.find(I);
1207   assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1208   return It->second;
1209 }
1210 
1211 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1212                                      unsigned OpCode) const {
1213   if (OpCode == Instruction::Add)
1214     return SE->getAddExpr(LHS, RHS);
1215   if (OpCode == Instruction::Sub)
1216     return SE->getMinusSCEV(LHS, RHS);
1217   if (OpCode == Instruction::Mul)
1218     return SE->getMulExpr(LHS, RHS);
1219 
1220   llvm_unreachable("Unsupported opcode.");
1221 }
1222 
1223 /// No-wrap operations can transfer sign extension of their result to their
1224 /// operands. Generate the SCEV value for the widened operation without
1225 /// actually modifying the IR yet. If the expression after extending the
1226 /// operands is an AddRec for this loop, return the AddRec and the kind of
1227 /// extension used.
1228 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1229   // Handle the common case of add<nsw/nuw>
1230   const unsigned OpCode = DU.NarrowUse->getOpcode();
1231   // Only Add/Sub/Mul instructions supported yet.
1232   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1233       OpCode != Instruction::Mul)
1234     return {nullptr, Unknown};
1235 
1236   // One operand (NarrowDef) has already been extended to WideDef. Now determine
1237   // if extending the other will lead to a recurrence.
1238   const unsigned ExtendOperIdx =
1239       DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1240   assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1241 
1242   const SCEV *ExtendOperExpr = nullptr;
1243   const OverflowingBinaryOperator *OBO =
1244     cast<OverflowingBinaryOperator>(DU.NarrowUse);
1245   ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1246   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1247     ExtendOperExpr = SE->getSignExtendExpr(
1248       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1249   else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1250     ExtendOperExpr = SE->getZeroExtendExpr(
1251       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1252   else
1253     return {nullptr, Unknown};
1254 
1255   // When creating this SCEV expr, don't apply the current operations NSW or NUW
1256   // flags. This instruction may be guarded by control flow that the no-wrap
1257   // behavior depends on. Non-control-equivalent instructions can be mapped to
1258   // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1259   // semantics to those operations.
1260   const SCEV *lhs = SE->getSCEV(DU.WideDef);
1261   const SCEV *rhs = ExtendOperExpr;
1262 
1263   // Let's swap operands to the initial order for the case of non-commutative
1264   // operations, like SUB. See PR21014.
1265   if (ExtendOperIdx == 0)
1266     std::swap(lhs, rhs);
1267   const SCEVAddRecExpr *AddRec =
1268       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1269 
1270   if (!AddRec || AddRec->getLoop() != L)
1271     return {nullptr, Unknown};
1272 
1273   return {AddRec, ExtKind};
1274 }
1275 
1276 /// Is this instruction potentially interesting for further simplification after
1277 /// widening it's type? In other words, can the extend be safely hoisted out of
1278 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
1279 /// so, return the extended recurrence and the kind of extension used. Otherwise
1280 /// return {nullptr, Unknown}.
1281 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1282   if (!SE->isSCEVable(DU.NarrowUse->getType()))
1283     return {nullptr, Unknown};
1284 
1285   const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1286   if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1287       SE->getTypeSizeInBits(WideType)) {
1288     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1289     // index. So don't follow this use.
1290     return {nullptr, Unknown};
1291   }
1292 
1293   const SCEV *WideExpr;
1294   ExtendKind ExtKind;
1295   if (DU.NeverNegative) {
1296     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1297     if (isa<SCEVAddRecExpr>(WideExpr))
1298       ExtKind = SignExtended;
1299     else {
1300       WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1301       ExtKind = ZeroExtended;
1302     }
1303   } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1304     WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1305     ExtKind = SignExtended;
1306   } else {
1307     WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1308     ExtKind = ZeroExtended;
1309   }
1310   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1311   if (!AddRec || AddRec->getLoop() != L)
1312     return {nullptr, Unknown};
1313   return {AddRec, ExtKind};
1314 }
1315 
1316 /// This IV user cannot be widened. Replace this use of the original narrow IV
1317 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1318 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1319   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1320   if (!InsertPt)
1321     return;
1322   LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1323                     << *DU.NarrowUse << "\n");
1324   IRBuilder<> Builder(InsertPt);
1325   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1326   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1327 }
1328 
1329 /// If the narrow use is a compare instruction, then widen the compare
1330 //  (and possibly the other operand).  The extend operation is hoisted into the
1331 // loop preheader as far as possible.
1332 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1333   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1334   if (!Cmp)
1335     return false;
1336 
1337   // We can legally widen the comparison in the following two cases:
1338   //
1339   //  - The signedness of the IV extension and comparison match
1340   //
1341   //  - The narrow IV is always positive (and thus its sign extension is equal
1342   //    to its zero extension).  For instance, let's say we're zero extending
1343   //    %narrow for the following use
1344   //
1345   //      icmp slt i32 %narrow, %val   ... (A)
1346   //
1347   //    and %narrow is always positive.  Then
1348   //
1349   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1350   //          == icmp slt i32 zext(%narrow), sext(%val)
1351   bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1352   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1353     return false;
1354 
1355   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1356   unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1357   unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1358   assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1359 
1360   // Widen the compare instruction.
1361   auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1362   if (!InsertPt)
1363     return false;
1364   IRBuilder<> Builder(InsertPt);
1365   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1366 
1367   // Widen the other operand of the compare, if necessary.
1368   if (CastWidth < IVWidth) {
1369     Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1370     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1371   }
1372   return true;
1373 }
1374 
1375 /// If the narrow use is an instruction whose two operands are the defining
1376 /// instruction of DU and a load instruction, then we have the following:
1377 /// if the load is hoisted outside the loop, then we do not reach this function
1378 /// as scalar evolution analysis works fine in widenIVUse with variables
1379 /// hoisted outside the loop and efficient code is subsequently generated by
1380 /// not emitting truncate instructions. But when the load is not hoisted
1381 /// (whether due to limitation in alias analysis or due to a true legality),
1382 /// then scalar evolution can not proceed with loop variant values and
1383 /// inefficient code is generated. This function handles the non-hoisted load
1384 /// special case by making the optimization generate the same type of code for
1385 /// hoisted and non-hoisted load (widen use and eliminate sign extend
1386 /// instruction). This special case is important especially when the induction
1387 /// variables are affecting addressing mode in code generation.
1388 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1389   Instruction *NarrowUse = DU.NarrowUse;
1390   Instruction *NarrowDef = DU.NarrowDef;
1391   Instruction *WideDef = DU.WideDef;
1392 
1393   // Handle the common case of add<nsw/nuw>
1394   const unsigned OpCode = NarrowUse->getOpcode();
1395   // Only Add/Sub/Mul instructions are supported.
1396   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1397       OpCode != Instruction::Mul)
1398     return false;
1399 
1400   // The operand that is not defined by NarrowDef of DU. Let's call it the
1401   // other operand.
1402   unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1403   assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1404          "bad DU");
1405 
1406   const SCEV *ExtendOperExpr = nullptr;
1407   const OverflowingBinaryOperator *OBO =
1408     cast<OverflowingBinaryOperator>(NarrowUse);
1409   ExtendKind ExtKind = getExtendKind(NarrowDef);
1410   if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1411     ExtendOperExpr = SE->getSignExtendExpr(
1412       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1413   else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1414     ExtendOperExpr = SE->getZeroExtendExpr(
1415       SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1416   else
1417     return false;
1418 
1419   // We are interested in the other operand being a load instruction.
1420   // But, we should look into relaxing this restriction later on.
1421   auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1422   if (I && I->getOpcode() != Instruction::Load)
1423     return false;
1424 
1425   // Verifying that Defining operand is an AddRec
1426   const SCEV *Op1 = SE->getSCEV(WideDef);
1427   const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1428   if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1429     return false;
1430   // Verifying that other operand is an Extend.
1431   if (ExtKind == SignExtended) {
1432     if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1433       return false;
1434   } else {
1435     if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1436       return false;
1437   }
1438 
1439   if (ExtKind == SignExtended) {
1440     for (Use &U : NarrowUse->uses()) {
1441       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1442       if (!User || User->getType() != WideType)
1443         return false;
1444     }
1445   } else { // ExtKind == ZeroExtended
1446     for (Use &U : NarrowUse->uses()) {
1447       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1448       if (!User || User->getType() != WideType)
1449         return false;
1450     }
1451   }
1452 
1453   return true;
1454 }
1455 
1456 /// Special Case for widening with variant Loads (see
1457 /// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1458 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1459   Instruction *NarrowUse = DU.NarrowUse;
1460   Instruction *NarrowDef = DU.NarrowDef;
1461   Instruction *WideDef = DU.WideDef;
1462 
1463   ExtendKind ExtKind = getExtendKind(NarrowDef);
1464 
1465   LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1466 
1467   // Generating a widening use instruction.
1468   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1469                    ? WideDef
1470                    : createExtendInst(NarrowUse->getOperand(0), WideType,
1471                                       ExtKind, NarrowUse);
1472   Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1473                    ? WideDef
1474                    : createExtendInst(NarrowUse->getOperand(1), WideType,
1475                                       ExtKind, NarrowUse);
1476 
1477   auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1478   auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1479                                         NarrowBO->getName());
1480   IRBuilder<> Builder(NarrowUse);
1481   Builder.Insert(WideBO);
1482   WideBO->copyIRFlags(NarrowBO);
1483 
1484   if (ExtKind == SignExtended)
1485     ExtendKindMap[NarrowUse] = SignExtended;
1486   else
1487     ExtendKindMap[NarrowUse] = ZeroExtended;
1488 
1489   // Update the Use.
1490   if (ExtKind == SignExtended) {
1491     for (Use &U : NarrowUse->uses()) {
1492       SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1493       if (User && User->getType() == WideType) {
1494         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1495                           << *WideBO << "\n");
1496         ++NumElimExt;
1497         User->replaceAllUsesWith(WideBO);
1498         DeadInsts.emplace_back(User);
1499       }
1500     }
1501   } else { // ExtKind == ZeroExtended
1502     for (Use &U : NarrowUse->uses()) {
1503       ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1504       if (User && User->getType() == WideType) {
1505         LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1506                           << *WideBO << "\n");
1507         ++NumElimExt;
1508         User->replaceAllUsesWith(WideBO);
1509         DeadInsts.emplace_back(User);
1510       }
1511     }
1512   }
1513 }
1514 
1515 /// Determine whether an individual user of the narrow IV can be widened. If so,
1516 /// return the wide clone of the user.
1517 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1518   assert(ExtendKindMap.count(DU.NarrowDef) &&
1519          "Should already know the kind of extension used to widen NarrowDef");
1520 
1521   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1522   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1523     if (LI->getLoopFor(UsePhi->getParent()) != L) {
1524       // For LCSSA phis, sink the truncate outside the loop.
1525       // After SimplifyCFG most loop exit targets have a single predecessor.
1526       // Otherwise fall back to a truncate within the loop.
1527       if (UsePhi->getNumOperands() != 1)
1528         truncateIVUse(DU, DT, LI);
1529       else {
1530         // Widening the PHI requires us to insert a trunc.  The logical place
1531         // for this trunc is in the same BB as the PHI.  This is not possible if
1532         // the BB is terminated by a catchswitch.
1533         if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1534           return nullptr;
1535 
1536         PHINode *WidePhi =
1537           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1538                           UsePhi);
1539         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1540         IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1541         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1542         UsePhi->replaceAllUsesWith(Trunc);
1543         DeadInsts.emplace_back(UsePhi);
1544         LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1545                           << *WidePhi << "\n");
1546       }
1547       return nullptr;
1548     }
1549   }
1550 
1551   // This narrow use can be widened by a sext if it's non-negative or its narrow
1552   // def was widended by a sext. Same for zext.
1553   auto canWidenBySExt = [&]() {
1554     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1555   };
1556   auto canWidenByZExt = [&]() {
1557     return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1558   };
1559 
1560   // Our raison d'etre! Eliminate sign and zero extension.
1561   if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1562       (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1563     Value *NewDef = DU.WideDef;
1564     if (DU.NarrowUse->getType() != WideType) {
1565       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1566       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1567       if (CastWidth < IVWidth) {
1568         // The cast isn't as wide as the IV, so insert a Trunc.
1569         IRBuilder<> Builder(DU.NarrowUse);
1570         NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1571       }
1572       else {
1573         // A wider extend was hidden behind a narrower one. This may induce
1574         // another round of IV widening in which the intermediate IV becomes
1575         // dead. It should be very rare.
1576         LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1577                           << " not wide enough to subsume " << *DU.NarrowUse
1578                           << "\n");
1579         DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1580         NewDef = DU.NarrowUse;
1581       }
1582     }
1583     if (NewDef != DU.NarrowUse) {
1584       LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1585                         << " replaced by " << *DU.WideDef << "\n");
1586       ++NumElimExt;
1587       DU.NarrowUse->replaceAllUsesWith(NewDef);
1588       DeadInsts.emplace_back(DU.NarrowUse);
1589     }
1590     // Now that the extend is gone, we want to expose it's uses for potential
1591     // further simplification. We don't need to directly inform SimplifyIVUsers
1592     // of the new users, because their parent IV will be processed later as a
1593     // new loop phi. If we preserved IVUsers analysis, we would also want to
1594     // push the uses of WideDef here.
1595 
1596     // No further widening is needed. The deceased [sz]ext had done it for us.
1597     return nullptr;
1598   }
1599 
1600   // Does this user itself evaluate to a recurrence after widening?
1601   WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1602   if (!WideAddRec.first)
1603     WideAddRec = getWideRecurrence(DU);
1604 
1605   assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1606   if (!WideAddRec.first) {
1607     // If use is a loop condition, try to promote the condition instead of
1608     // truncating the IV first.
1609     if (widenLoopCompare(DU))
1610       return nullptr;
1611 
1612     // We are here about to generate a truncate instruction that may hurt
1613     // performance because the scalar evolution expression computed earlier
1614     // in WideAddRec.first does not indicate a polynomial induction expression.
1615     // In that case, look at the operands of the use instruction to determine
1616     // if we can still widen the use instead of truncating its operand.
1617     if (widenWithVariantLoadUse(DU)) {
1618       widenWithVariantLoadUseCodegen(DU);
1619       return nullptr;
1620     }
1621 
1622     // This user does not evaluate to a recurrence after widening, so don't
1623     // follow it. Instead insert a Trunc to kill off the original use,
1624     // eventually isolating the original narrow IV so it can be removed.
1625     truncateIVUse(DU, DT, LI);
1626     return nullptr;
1627   }
1628   // Assume block terminators cannot evaluate to a recurrence. We can't to
1629   // insert a Trunc after a terminator if there happens to be a critical edge.
1630   assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1631          "SCEV is not expected to evaluate a block terminator");
1632 
1633   // Reuse the IV increment that SCEVExpander created as long as it dominates
1634   // NarrowUse.
1635   Instruction *WideUse = nullptr;
1636   if (WideAddRec.first == WideIncExpr &&
1637       Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1638     WideUse = WideInc;
1639   else {
1640     WideUse = cloneIVUser(DU, WideAddRec.first);
1641     if (!WideUse)
1642       return nullptr;
1643   }
1644   // Evaluation of WideAddRec ensured that the narrow expression could be
1645   // extended outside the loop without overflow. This suggests that the wide use
1646   // evaluates to the same expression as the extended narrow use, but doesn't
1647   // absolutely guarantee it. Hence the following failsafe check. In rare cases
1648   // where it fails, we simply throw away the newly created wide use.
1649   if (WideAddRec.first != SE->getSCEV(WideUse)) {
1650     LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1651                       << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1652                       << "\n");
1653     DeadInsts.emplace_back(WideUse);
1654     return nullptr;
1655   }
1656 
1657   ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1658   // Returning WideUse pushes it on the worklist.
1659   return WideUse;
1660 }
1661 
1662 /// Add eligible users of NarrowDef to NarrowIVUsers.
1663 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1664   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1665   bool NonNegativeDef =
1666       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1667                            SE->getConstant(NarrowSCEV->getType(), 0));
1668   for (User *U : NarrowDef->users()) {
1669     Instruction *NarrowUser = cast<Instruction>(U);
1670 
1671     // Handle data flow merges and bizarre phi cycles.
1672     if (!Widened.insert(NarrowUser).second)
1673       continue;
1674 
1675     bool NonNegativeUse = false;
1676     if (!NonNegativeDef) {
1677       // We might have a control-dependent range information for this context.
1678       if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1679         NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1680     }
1681 
1682     NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1683                                NonNegativeDef || NonNegativeUse);
1684   }
1685 }
1686 
1687 /// Process a single induction variable. First use the SCEVExpander to create a
1688 /// wide induction variable that evaluates to the same recurrence as the
1689 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1690 /// def-use chain. After widenIVUse has processed all interesting IV users, the
1691 /// narrow IV will be isolated for removal by DeleteDeadPHIs.
1692 ///
1693 /// It would be simpler to delete uses as they are processed, but we must avoid
1694 /// invalidating SCEV expressions.
1695 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1696   // Is this phi an induction variable?
1697   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1698   if (!AddRec)
1699     return nullptr;
1700 
1701   // Widen the induction variable expression.
1702   const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1703                                ? SE->getSignExtendExpr(AddRec, WideType)
1704                                : SE->getZeroExtendExpr(AddRec, WideType);
1705 
1706   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1707          "Expect the new IV expression to preserve its type");
1708 
1709   // Can the IV be extended outside the loop without overflow?
1710   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1711   if (!AddRec || AddRec->getLoop() != L)
1712     return nullptr;
1713 
1714   // An AddRec must have loop-invariant operands. Since this AddRec is
1715   // materialized by a loop header phi, the expression cannot have any post-loop
1716   // operands, so they must dominate the loop header.
1717   assert(
1718       SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1719       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1720       "Loop header phi recurrence inputs do not dominate the loop");
1721 
1722   // Iterate over IV uses (including transitive ones) looking for IV increments
1723   // of the form 'add nsw %iv, <const>'. For each increment and each use of
1724   // the increment calculate control-dependent range information basing on
1725   // dominating conditions inside of the loop (e.g. a range check inside of the
1726   // loop). Calculated ranges are stored in PostIncRangeInfos map.
1727   //
1728   // Control-dependent range information is later used to prove that a narrow
1729   // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1730   // this on demand because when pushNarrowIVUsers needs this information some
1731   // of the dominating conditions might be already widened.
1732   if (UsePostIncrementRanges)
1733     calculatePostIncRanges(OrigPhi);
1734 
1735   // The rewriter provides a value for the desired IV expression. This may
1736   // either find an existing phi or materialize a new one. Either way, we
1737   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1738   // of the phi-SCC dominates the loop entry.
1739   Instruction *InsertPt = &L->getHeader()->front();
1740   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1741 
1742   // Remembering the WideIV increment generated by SCEVExpander allows
1743   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1744   // employ a general reuse mechanism because the call above is the only call to
1745   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1746   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1747     WideInc =
1748       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1749     WideIncExpr = SE->getSCEV(WideInc);
1750     // Propagate the debug location associated with the original loop increment
1751     // to the new (widened) increment.
1752     auto *OrigInc =
1753       cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1754     WideInc->setDebugLoc(OrigInc->getDebugLoc());
1755   }
1756 
1757   LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1758   ++NumWidened;
1759 
1760   // Traverse the def-use chain using a worklist starting at the original IV.
1761   assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1762 
1763   Widened.insert(OrigPhi);
1764   pushNarrowIVUsers(OrigPhi, WidePhi);
1765 
1766   while (!NarrowIVUsers.empty()) {
1767     NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1768 
1769     // Process a def-use edge. This may replace the use, so don't hold a
1770     // use_iterator across it.
1771     Instruction *WideUse = widenIVUse(DU, Rewriter);
1772 
1773     // Follow all def-use edges from the previous narrow use.
1774     if (WideUse)
1775       pushNarrowIVUsers(DU.NarrowUse, WideUse);
1776 
1777     // widenIVUse may have removed the def-use edge.
1778     if (DU.NarrowDef->use_empty())
1779       DeadInsts.emplace_back(DU.NarrowDef);
1780   }
1781 
1782   // Attach any debug information to the new PHI. Since OrigPhi and WidePHI
1783   // evaluate the same recurrence, we can just copy the debug info over.
1784   SmallVector<DbgValueInst *, 1> DbgValues;
1785   llvm::findDbgValues(DbgValues, OrigPhi);
1786   auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(),
1787                                      ValueAsMetadata::get(WidePhi));
1788   for (auto &DbgValue : DbgValues)
1789     DbgValue->setOperand(0, MDPhi);
1790   return WidePhi;
1791 }
1792 
1793 /// Calculates control-dependent range for the given def at the given context
1794 /// by looking at dominating conditions inside of the loop
1795 void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1796                                     Instruction *NarrowUser) {
1797   using namespace llvm::PatternMatch;
1798 
1799   Value *NarrowDefLHS;
1800   const APInt *NarrowDefRHS;
1801   if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1802                                  m_APInt(NarrowDefRHS))) ||
1803       !NarrowDefRHS->isNonNegative())
1804     return;
1805 
1806   auto UpdateRangeFromCondition = [&] (Value *Condition,
1807                                        bool TrueDest) {
1808     CmpInst::Predicate Pred;
1809     Value *CmpRHS;
1810     if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1811                                  m_Value(CmpRHS))))
1812       return;
1813 
1814     CmpInst::Predicate P =
1815             TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1816 
1817     auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1818     auto CmpConstrainedLHSRange =
1819             ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1820     auto NarrowDefRange =
1821             CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
1822 
1823     updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1824   };
1825 
1826   auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1827     if (!HasGuards)
1828       return;
1829 
1830     for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1831                                      Ctx->getParent()->rend())) {
1832       Value *C = nullptr;
1833       if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1834         UpdateRangeFromCondition(C, /*TrueDest=*/true);
1835     }
1836   };
1837 
1838   UpdateRangeFromGuards(NarrowUser);
1839 
1840   BasicBlock *NarrowUserBB = NarrowUser->getParent();
1841   // If NarrowUserBB is statically unreachable asking dominator queries may
1842   // yield surprising results. (e.g. the block may not have a dom tree node)
1843   if (!DT->isReachableFromEntry(NarrowUserBB))
1844     return;
1845 
1846   for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1847        L->contains(DTB->getBlock());
1848        DTB = DTB->getIDom()) {
1849     auto *BB = DTB->getBlock();
1850     auto *TI = BB->getTerminator();
1851     UpdateRangeFromGuards(TI);
1852 
1853     auto *BI = dyn_cast<BranchInst>(TI);
1854     if (!BI || !BI->isConditional())
1855       continue;
1856 
1857     auto *TrueSuccessor = BI->getSuccessor(0);
1858     auto *FalseSuccessor = BI->getSuccessor(1);
1859 
1860     auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1861       return BBE.isSingleEdge() &&
1862              DT->dominates(BBE, NarrowUser->getParent());
1863     };
1864 
1865     if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1866       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1867 
1868     if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1869       UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1870   }
1871 }
1872 
1873 /// Calculates PostIncRangeInfos map for the given IV
1874 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1875   SmallPtrSet<Instruction *, 16> Visited;
1876   SmallVector<Instruction *, 6> Worklist;
1877   Worklist.push_back(OrigPhi);
1878   Visited.insert(OrigPhi);
1879 
1880   while (!Worklist.empty()) {
1881     Instruction *NarrowDef = Worklist.pop_back_val();
1882 
1883     for (Use &U : NarrowDef->uses()) {
1884       auto *NarrowUser = cast<Instruction>(U.getUser());
1885 
1886       // Don't go looking outside the current loop.
1887       auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1888       if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1889         continue;
1890 
1891       if (!Visited.insert(NarrowUser).second)
1892         continue;
1893 
1894       Worklist.push_back(NarrowUser);
1895 
1896       calculatePostIncRange(NarrowDef, NarrowUser);
1897     }
1898   }
1899 }
1900 
1901 //===----------------------------------------------------------------------===//
1902 //  Live IV Reduction - Minimize IVs live across the loop.
1903 //===----------------------------------------------------------------------===//
1904 
1905 //===----------------------------------------------------------------------===//
1906 //  Simplification of IV users based on SCEV evaluation.
1907 //===----------------------------------------------------------------------===//
1908 
1909 namespace {
1910 
1911 class IndVarSimplifyVisitor : public IVVisitor {
1912   ScalarEvolution *SE;
1913   const TargetTransformInfo *TTI;
1914   PHINode *IVPhi;
1915 
1916 public:
1917   WideIVInfo WI;
1918 
1919   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1920                         const TargetTransformInfo *TTI,
1921                         const DominatorTree *DTree)
1922     : SE(SCEV), TTI(TTI), IVPhi(IV) {
1923     DT = DTree;
1924     WI.NarrowIV = IVPhi;
1925   }
1926 
1927   // Implement the interface used by simplifyUsersOfIV.
1928   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1929 };
1930 
1931 } // end anonymous namespace
1932 
1933 /// Iteratively perform simplification on a worklist of IV users. Each
1934 /// successive simplification may push more users which may themselves be
1935 /// candidates for simplification.
1936 ///
1937 /// Sign/Zero extend elimination is interleaved with IV simplification.
1938 bool IndVarSimplify::simplifyAndExtend(Loop *L,
1939                                        SCEVExpander &Rewriter,
1940                                        LoopInfo *LI) {
1941   SmallVector<WideIVInfo, 8> WideIVs;
1942 
1943   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1944           Intrinsic::getName(Intrinsic::experimental_guard));
1945   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1946 
1947   SmallVector<PHINode*, 8> LoopPhis;
1948   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
1949     LoopPhis.push_back(cast<PHINode>(I));
1950   }
1951   // Each round of simplification iterates through the SimplifyIVUsers worklist
1952   // for all current phis, then determines whether any IVs can be
1953   // widened. Widening adds new phis to LoopPhis, inducing another round of
1954   // simplification on the wide IVs.
1955   bool Changed = false;
1956   while (!LoopPhis.empty()) {
1957     // Evaluate as many IV expressions as possible before widening any IVs. This
1958     // forces SCEV to set no-wrap flags before evaluating sign/zero
1959     // extension. The first time SCEV attempts to normalize sign/zero extension,
1960     // the result becomes final. So for the most predictable results, we delay
1961     // evaluation of sign/zero extend evaluation until needed, and avoid running
1962     // other SCEV based analysis prior to simplifyAndExtend.
1963     do {
1964       PHINode *CurrIV = LoopPhis.pop_back_val();
1965 
1966       // Information about sign/zero extensions of CurrIV.
1967       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
1968 
1969       Changed |=
1970           simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
1971 
1972       if (Visitor.WI.WidestNativeType) {
1973         WideIVs.push_back(Visitor.WI);
1974       }
1975     } while(!LoopPhis.empty());
1976 
1977     for (; !WideIVs.empty(); WideIVs.pop_back()) {
1978       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
1979       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
1980         Changed = true;
1981         LoopPhis.push_back(WidePhi);
1982       }
1983     }
1984   }
1985   return Changed;
1986 }
1987 
1988 //===----------------------------------------------------------------------===//
1989 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1990 //===----------------------------------------------------------------------===//
1991 
1992 /// Given an Value which is hoped to be part of an add recurance in the given
1993 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
1994 /// that this is less general than SCEVs AddRec checking.
1995 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
1996   Instruction *IncI = dyn_cast<Instruction>(IncV);
1997   if (!IncI)
1998     return nullptr;
1999 
2000   switch (IncI->getOpcode()) {
2001   case Instruction::Add:
2002   case Instruction::Sub:
2003     break;
2004   case Instruction::GetElementPtr:
2005     // An IV counter must preserve its type.
2006     if (IncI->getNumOperands() == 2)
2007       break;
2008     LLVM_FALLTHROUGH;
2009   default:
2010     return nullptr;
2011   }
2012 
2013   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2014   if (Phi && Phi->getParent() == L->getHeader()) {
2015     if (L->isLoopInvariant(IncI->getOperand(1)))
2016       return Phi;
2017     return nullptr;
2018   }
2019   if (IncI->getOpcode() == Instruction::GetElementPtr)
2020     return nullptr;
2021 
2022   // Allow add/sub to be commuted.
2023   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2024   if (Phi && Phi->getParent() == L->getHeader()) {
2025     if (L->isLoopInvariant(IncI->getOperand(0)))
2026       return Phi;
2027   }
2028   return nullptr;
2029 }
2030 
2031 /// Whether the current loop exit test is based on this value.  Currently this
2032 /// is limited to a direct use in the loop condition.
2033 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2034   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2035   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2036   // TODO: Allow non-icmp loop test.
2037   if (!ICmp)
2038     return false;
2039 
2040   // TODO: Allow indirect use.
2041   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2042 }
2043 
2044 /// linearFunctionTestReplace policy. Return true unless we can show that the
2045 /// current exit test is already sufficiently canonical.
2046 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2047   assert(L->getLoopLatch() && "Must be in simplified form");
2048 
2049   // Avoid converting a constant or loop invariant test back to a runtime
2050   // test.  This is critical for when SCEV's cached ExitCount is less precise
2051   // than the current IR (such as after we've proven a particular exit is
2052   // actually dead and thus the BE count never reaches our ExitCount.)
2053   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2054   if (L->isLoopInvariant(BI->getCondition()))
2055     return false;
2056 
2057   // Do LFTR to simplify the exit condition to an ICMP.
2058   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2059   if (!Cond)
2060     return true;
2061 
2062   // Do LFTR to simplify the exit ICMP to EQ/NE
2063   ICmpInst::Predicate Pred = Cond->getPredicate();
2064   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2065     return true;
2066 
2067   // Look for a loop invariant RHS
2068   Value *LHS = Cond->getOperand(0);
2069   Value *RHS = Cond->getOperand(1);
2070   if (!L->isLoopInvariant(RHS)) {
2071     if (!L->isLoopInvariant(LHS))
2072       return true;
2073     std::swap(LHS, RHS);
2074   }
2075   // Look for a simple IV counter LHS
2076   PHINode *Phi = dyn_cast<PHINode>(LHS);
2077   if (!Phi)
2078     Phi = getLoopPhiForCounter(LHS, L);
2079 
2080   if (!Phi)
2081     return true;
2082 
2083   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2084   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2085   if (Idx < 0)
2086     return true;
2087 
2088   // Do LFTR if the exit condition's IV is *not* a simple counter.
2089   Value *IncV = Phi->getIncomingValue(Idx);
2090   return Phi != getLoopPhiForCounter(IncV, L);
2091 }
2092 
2093 /// Return true if undefined behavior would provable be executed on the path to
2094 /// OnPathTo if Root produced a posion result.  Note that this doesn't say
2095 /// anything about whether OnPathTo is actually executed or whether Root is
2096 /// actually poison.  This can be used to assess whether a new use of Root can
2097 /// be added at a location which is control equivalent with OnPathTo (such as
2098 /// immediately before it) without introducing UB which didn't previously
2099 /// exist.  Note that a false result conveys no information.
2100 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2101                                           Instruction *OnPathTo,
2102                                           DominatorTree *DT) {
2103   // Basic approach is to assume Root is poison, propagate poison forward
2104   // through all users we can easily track, and then check whether any of those
2105   // users are provable UB and must execute before out exiting block might
2106   // exit.
2107 
2108   // The set of all recursive users we've visited (which are assumed to all be
2109   // poison because of said visit)
2110   SmallSet<const Value *, 16> KnownPoison;
2111   SmallVector<const Instruction*, 16> Worklist;
2112   Worklist.push_back(Root);
2113   while (!Worklist.empty()) {
2114     const Instruction *I = Worklist.pop_back_val();
2115 
2116     // If we know this must trigger UB on a path leading our target.
2117     if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2118       return true;
2119 
2120     // If we can't analyze propagation through this instruction, just skip it
2121     // and transitive users.  Safe as false is a conservative result.
2122     if (!propagatesFullPoison(I) && I != Root)
2123       continue;
2124 
2125     if (KnownPoison.insert(I).second)
2126       for (const User *User : I->users())
2127         Worklist.push_back(cast<Instruction>(User));
2128   }
2129 
2130   // Might be non-UB, or might have a path we couldn't prove must execute on
2131   // way to exiting bb.
2132   return false;
2133 }
2134 
2135 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2136 /// down to checking that all operands are constant and listing instructions
2137 /// that may hide undef.
2138 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2139                                unsigned Depth) {
2140   if (isa<Constant>(V))
2141     return !isa<UndefValue>(V);
2142 
2143   if (Depth >= 6)
2144     return false;
2145 
2146   // Conservatively handle non-constant non-instructions. For example, Arguments
2147   // may be undef.
2148   Instruction *I = dyn_cast<Instruction>(V);
2149   if (!I)
2150     return false;
2151 
2152   // Load and return values may be undef.
2153   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2154     return false;
2155 
2156   // Optimistically handle other instructions.
2157   for (Value *Op : I->operands()) {
2158     if (!Visited.insert(Op).second)
2159       continue;
2160     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2161       return false;
2162   }
2163   return true;
2164 }
2165 
2166 /// Return true if the given value is concrete. We must prove that undef can
2167 /// never reach it.
2168 ///
2169 /// TODO: If we decide that this is a good approach to checking for undef, we
2170 /// may factor it into a common location.
2171 static bool hasConcreteDef(Value *V) {
2172   SmallPtrSet<Value*, 8> Visited;
2173   Visited.insert(V);
2174   return hasConcreteDefImpl(V, Visited, 0);
2175 }
2176 
2177 /// Return true if this IV has any uses other than the (soon to be rewritten)
2178 /// loop exit test.
2179 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2180   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2181   Value *IncV = Phi->getIncomingValue(LatchIdx);
2182 
2183   for (User *U : Phi->users())
2184     if (U != Cond && U != IncV) return false;
2185 
2186   for (User *U : IncV->users())
2187     if (U != Cond && U != Phi) return false;
2188   return true;
2189 }
2190 
2191 /// Return true if the given phi is a "counter" in L.  A counter is an
2192 /// add recurance (of integer or pointer type) with an arbitrary start, and a
2193 /// step of 1.  Note that L must have exactly one latch.
2194 static bool isLoopCounter(PHINode* Phi, Loop *L,
2195                           ScalarEvolution *SE) {
2196   assert(Phi->getParent() == L->getHeader());
2197   assert(L->getLoopLatch());
2198 
2199   if (!SE->isSCEVable(Phi->getType()))
2200     return false;
2201 
2202   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2203   if (!AR || AR->getLoop() != L || !AR->isAffine())
2204     return false;
2205 
2206   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2207   if (!Step || !Step->isOne())
2208     return false;
2209 
2210   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2211   Value *IncV = Phi->getIncomingValue(LatchIdx);
2212   return (getLoopPhiForCounter(IncV, L) == Phi);
2213 }
2214 
2215 /// Search the loop header for a loop counter (anadd rec w/step of one)
2216 /// suitable for use by LFTR.  If multiple counters are available, select the
2217 /// "best" one based profitable heuristics.
2218 ///
2219 /// BECount may be an i8* pointer type. The pointer difference is already
2220 /// valid count without scaling the address stride, so it remains a pointer
2221 /// expression as far as SCEV is concerned.
2222 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2223                                 const SCEV *BECount,
2224                                 ScalarEvolution *SE, DominatorTree *DT) {
2225   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2226 
2227   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2228 
2229   // Loop over all of the PHI nodes, looking for a simple counter.
2230   PHINode *BestPhi = nullptr;
2231   const SCEV *BestInit = nullptr;
2232   BasicBlock *LatchBlock = L->getLoopLatch();
2233   assert(LatchBlock && "Must be in simplified form");
2234   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2235 
2236   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2237     PHINode *Phi = cast<PHINode>(I);
2238     if (!isLoopCounter(Phi, L, SE))
2239       continue;
2240 
2241     // Avoid comparing an integer IV against a pointer Limit.
2242     if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2243       continue;
2244 
2245     const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2246 
2247     // AR may be a pointer type, while BECount is an integer type.
2248     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2249     // AR may not be a narrower type, or we may never exit.
2250     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2251     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2252       continue;
2253 
2254     // Avoid reusing a potentially undef value to compute other values that may
2255     // have originally had a concrete definition.
2256     if (!hasConcreteDef(Phi)) {
2257       // We explicitly allow unknown phis as long as they are already used by
2258       // the loop exit test.  This is legal since performing LFTR could not
2259       // increase the number of undef users.
2260       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2261       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2262           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2263         continue;
2264     }
2265 
2266     // Avoid introducing undefined behavior due to poison which didn't exist in
2267     // the original program.  (Annoyingly, the rules for poison and undef
2268     // propagation are distinct, so this does NOT cover the undef case above.)
2269     // We have to ensure that we don't introduce UB by introducing a use on an
2270     // iteration where said IV produces poison.  Our strategy here differs for
2271     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2272     // see code in linearFunctionTestReplace.  For pointers, we restrict
2273     // transforms as there is no good way to reinfer inbounds once lost.
2274     if (!Phi->getType()->isIntegerTy() &&
2275         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2276       continue;
2277 
2278     const SCEV *Init = AR->getStart();
2279 
2280     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2281       // Don't force a live loop counter if another IV can be used.
2282       if (AlmostDeadIV(Phi, LatchBlock, Cond))
2283         continue;
2284 
2285       // Prefer to count-from-zero. This is a more "canonical" counter form. It
2286       // also prefers integer to pointer IVs.
2287       if (BestInit->isZero() != Init->isZero()) {
2288         if (BestInit->isZero())
2289           continue;
2290       }
2291       // If two IVs both count from zero or both count from nonzero then the
2292       // narrower is likely a dead phi that has been widened. Use the wider phi
2293       // to allow the other to be eliminated.
2294       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2295         continue;
2296     }
2297     BestPhi = Phi;
2298     BestInit = Init;
2299   }
2300   return BestPhi;
2301 }
2302 
2303 /// Insert an IR expression which computes the value held by the IV IndVar
2304 /// (which must be an loop counter w/unit stride) after the backedge of loop L
2305 /// is taken ExitCount times.
2306 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2307                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
2308                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
2309   assert(isLoopCounter(IndVar, L, SE));
2310   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2311   const SCEV *IVInit = AR->getStart();
2312 
2313   // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2314   // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2315   // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2316   // the existing GEPs whenever possible.
2317   if (IndVar->getType()->isPointerTy() &&
2318       !ExitCount->getType()->isPointerTy()) {
2319     // IVOffset will be the new GEP offset that is interpreted by GEP as a
2320     // signed value. ExitCount on the other hand represents the loop trip count,
2321     // which is an unsigned value. FindLoopCounter only allows induction
2322     // variables that have a positive unit stride of one. This means we don't
2323     // have to handle the case of negative offsets (yet) and just need to zero
2324     // extend ExitCount.
2325     Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2326     const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2327     if (UsePostInc)
2328       IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2329 
2330     // Expand the code for the iteration count.
2331     assert(SE->isLoopInvariant(IVOffset, L) &&
2332            "Computed iteration count is not loop invariant!");
2333 
2334     // We could handle pointer IVs other than i8*, but we need to compensate for
2335     // gep index scaling.
2336     assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2337                              cast<PointerType>(IndVar->getType())
2338                                  ->getElementType())->isOne() &&
2339            "unit stride pointer IV must be i8*");
2340 
2341     const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2342     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2343     return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2344   } else {
2345     // In any other case, convert both IVInit and ExitCount to integers before
2346     // comparing. This may result in SCEV expansion of pointers, but in practice
2347     // SCEV will fold the pointer arithmetic away as such:
2348     // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2349     //
2350     // Valid Cases: (1) both integers is most common; (2) both may be pointers
2351     // for simple memset-style loops.
2352     //
2353     // IVInit integer and ExitCount pointer would only occur if a canonical IV
2354     // were generated on top of case #2, which is not expected.
2355 
2356     assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2357     // For unit stride, IVCount = Start + ExitCount with 2's complement
2358     // overflow.
2359 
2360     // For integer IVs, truncate the IV before computing IVInit + BECount,
2361     // unless we know apriori that the limit must be a constant when evaluated
2362     // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2363     // of the IV in the loop over a (potentially) expensive expansion of the
2364     // widened exit count add(zext(add)) expression.
2365     if (SE->getTypeSizeInBits(IVInit->getType())
2366         > SE->getTypeSizeInBits(ExitCount->getType())) {
2367       if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2368         ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2369       else
2370         IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2371     }
2372 
2373     const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2374 
2375     if (UsePostInc)
2376       IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2377 
2378     // Expand the code for the iteration count.
2379     assert(SE->isLoopInvariant(IVLimit, L) &&
2380            "Computed iteration count is not loop invariant!");
2381     // Ensure that we generate the same type as IndVar, or a smaller integer
2382     // type. In the presence of null pointer values, we have an integer type
2383     // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2384     Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2385       IndVar->getType() : ExitCount->getType();
2386     BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2387     return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2388   }
2389 }
2390 
2391 /// This method rewrites the exit condition of the loop to be a canonical !=
2392 /// comparison against the incremented loop induction variable.  This pass is
2393 /// able to rewrite the exit tests of any loop where the SCEV analysis can
2394 /// determine a loop-invariant trip count of the loop, which is actually a much
2395 /// broader range than just linear tests.
2396 bool IndVarSimplify::
2397 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2398                           const SCEV *ExitCount,
2399                           PHINode *IndVar, SCEVExpander &Rewriter) {
2400   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2401   assert(isLoopCounter(IndVar, L, SE));
2402   Instruction * const IncVar =
2403     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2404 
2405   // Initialize CmpIndVar to the preincremented IV.
2406   Value *CmpIndVar = IndVar;
2407   bool UsePostInc = false;
2408 
2409   // If the exiting block is the same as the backedge block, we prefer to
2410   // compare against the post-incremented value, otherwise we must compare
2411   // against the preincremented value.
2412   if (ExitingBB == L->getLoopLatch()) {
2413     // For pointer IVs, we chose to not strip inbounds which requires us not
2414     // to add a potentially UB introducing use.  We need to either a) show
2415     // the loop test we're modifying is already in post-inc form, or b) show
2416     // that adding a use must not introduce UB.
2417     bool SafeToPostInc =
2418         IndVar->getType()->isIntegerTy() ||
2419         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2420         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2421     if (SafeToPostInc) {
2422       UsePostInc = true;
2423       CmpIndVar = IncVar;
2424     }
2425   }
2426 
2427   // It may be necessary to drop nowrap flags on the incrementing instruction
2428   // if either LFTR moves from a pre-inc check to a post-inc check (in which
2429   // case the increment might have previously been poison on the last iteration
2430   // only) or if LFTR switches to a different IV that was previously dynamically
2431   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2432   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2433   // check), because the pre-inc addrec flags may be adopted from the original
2434   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2435   // TODO: This handling is inaccurate for one case: If we switch to a
2436   // dynamically dead IV that wraps on the first loop iteration only, which is
2437   // not covered by the post-inc addrec. (If the new IV was not dynamically
2438   // dead, it could not be poison on the first iteration in the first place.)
2439   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2440     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2441     if (BO->hasNoUnsignedWrap())
2442       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2443     if (BO->hasNoSignedWrap())
2444       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2445   }
2446 
2447   Value *ExitCnt = genLoopLimit(
2448       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2449   assert(ExitCnt->getType()->isPointerTy() ==
2450              IndVar->getType()->isPointerTy() &&
2451          "genLoopLimit missed a cast");
2452 
2453   // Insert a new icmp_ne or icmp_eq instruction before the branch.
2454   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2455   ICmpInst::Predicate P;
2456   if (L->contains(BI->getSuccessor(0)))
2457     P = ICmpInst::ICMP_NE;
2458   else
2459     P = ICmpInst::ICMP_EQ;
2460 
2461   IRBuilder<> Builder(BI);
2462 
2463   // The new loop exit condition should reuse the debug location of the
2464   // original loop exit condition.
2465   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2466     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2467 
2468   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2469   // avoid the expensive expansion of the limit expression in the wider type,
2470   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2471   // since we know (from the exit count bitwidth), that we can't self-wrap in
2472   // the narrower type.
2473   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2474   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2475   if (CmpIndVarSize > ExitCntSize) {
2476     assert(!CmpIndVar->getType()->isPointerTy() &&
2477            !ExitCnt->getType()->isPointerTy());
2478 
2479     // Before resorting to actually inserting the truncate, use the same
2480     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2481     // the other side of the comparison instead.  We still evaluate the limit
2482     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2483     // a truncate within in.
2484     bool Extended = false;
2485     const SCEV *IV = SE->getSCEV(CmpIndVar);
2486     const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2487                                                   ExitCnt->getType());
2488     const SCEV *ZExtTrunc =
2489       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2490 
2491     if (ZExtTrunc == IV) {
2492       Extended = true;
2493       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2494                                    "wide.trip.count");
2495     } else {
2496       const SCEV *SExtTrunc =
2497         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2498       if (SExtTrunc == IV) {
2499         Extended = true;
2500         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2501                                      "wide.trip.count");
2502       }
2503     }
2504 
2505     if (Extended) {
2506       bool Discard;
2507       L->makeLoopInvariant(ExitCnt, Discard);
2508     } else
2509       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2510                                       "lftr.wideiv");
2511   }
2512   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2513                     << "      LHS:" << *CmpIndVar << '\n'
2514                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2515                     << "\n"
2516                     << "      RHS:\t" << *ExitCnt << "\n"
2517                     << "ExitCount:\t" << *ExitCount << "\n"
2518                     << "  was: " << *BI->getCondition() << "\n");
2519 
2520   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2521   Value *OrigCond = BI->getCondition();
2522   // It's tempting to use replaceAllUsesWith here to fully replace the old
2523   // comparison, but that's not immediately safe, since users of the old
2524   // comparison may not be dominated by the new comparison. Instead, just
2525   // update the branch to use the new comparison; in the common case this
2526   // will make old comparison dead.
2527   BI->setCondition(Cond);
2528   DeadInsts.push_back(OrigCond);
2529 
2530   ++NumLFTR;
2531   return true;
2532 }
2533 
2534 //===----------------------------------------------------------------------===//
2535 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2536 //===----------------------------------------------------------------------===//
2537 
2538 /// If there's a single exit block, sink any loop-invariant values that
2539 /// were defined in the preheader but not used inside the loop into the
2540 /// exit block to reduce register pressure in the loop.
2541 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2542   BasicBlock *ExitBlock = L->getExitBlock();
2543   if (!ExitBlock) return false;
2544 
2545   BasicBlock *Preheader = L->getLoopPreheader();
2546   if (!Preheader) return false;
2547 
2548   bool MadeAnyChanges = false;
2549   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2550   BasicBlock::iterator I(Preheader->getTerminator());
2551   while (I != Preheader->begin()) {
2552     --I;
2553     // New instructions were inserted at the end of the preheader.
2554     if (isa<PHINode>(I))
2555       break;
2556 
2557     // Don't move instructions which might have side effects, since the side
2558     // effects need to complete before instructions inside the loop.  Also don't
2559     // move instructions which might read memory, since the loop may modify
2560     // memory. Note that it's okay if the instruction might have undefined
2561     // behavior: LoopSimplify guarantees that the preheader dominates the exit
2562     // block.
2563     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2564       continue;
2565 
2566     // Skip debug info intrinsics.
2567     if (isa<DbgInfoIntrinsic>(I))
2568       continue;
2569 
2570     // Skip eh pad instructions.
2571     if (I->isEHPad())
2572       continue;
2573 
2574     // Don't sink alloca: we never want to sink static alloca's out of the
2575     // entry block, and correctly sinking dynamic alloca's requires
2576     // checks for stacksave/stackrestore intrinsics.
2577     // FIXME: Refactor this check somehow?
2578     if (isa<AllocaInst>(I))
2579       continue;
2580 
2581     // Determine if there is a use in or before the loop (direct or
2582     // otherwise).
2583     bool UsedInLoop = false;
2584     for (Use &U : I->uses()) {
2585       Instruction *User = cast<Instruction>(U.getUser());
2586       BasicBlock *UseBB = User->getParent();
2587       if (PHINode *P = dyn_cast<PHINode>(User)) {
2588         unsigned i =
2589           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2590         UseBB = P->getIncomingBlock(i);
2591       }
2592       if (UseBB == Preheader || L->contains(UseBB)) {
2593         UsedInLoop = true;
2594         break;
2595       }
2596     }
2597 
2598     // If there is, the def must remain in the preheader.
2599     if (UsedInLoop)
2600       continue;
2601 
2602     // Otherwise, sink it to the exit block.
2603     Instruction *ToMove = &*I;
2604     bool Done = false;
2605 
2606     if (I != Preheader->begin()) {
2607       // Skip debug info intrinsics.
2608       do {
2609         --I;
2610       } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2611 
2612       if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2613         Done = true;
2614     } else {
2615       Done = true;
2616     }
2617 
2618     MadeAnyChanges = true;
2619     ToMove->moveBefore(*ExitBlock, InsertPt);
2620     if (Done) break;
2621     InsertPt = ToMove->getIterator();
2622   }
2623 
2624   return MadeAnyChanges;
2625 }
2626 
2627 bool IndVarSimplify::optimizeLoopExits(Loop *L) {
2628   SmallVector<BasicBlock*, 16> ExitingBlocks;
2629   L->getExitingBlocks(ExitingBlocks);
2630 
2631   // Form an expression for the maximum exit count possible for this loop. We
2632   // merge the max and exact information to approximate a version of
2633   // getMaxBackedgeTakenInfo which isn't restricted to just constants.
2634   // TODO: factor this out as a version of getMaxBackedgeTakenCount which
2635   // isn't guaranteed to return a constant.
2636   SmallVector<const SCEV*, 4> ExitCounts;
2637   const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L);
2638   if (!isa<SCEVCouldNotCompute>(MaxConstEC))
2639     ExitCounts.push_back(MaxConstEC);
2640   for (BasicBlock *ExitingBB : ExitingBlocks) {
2641     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2642     if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2643       assert(DT->dominates(ExitingBB, L->getLoopLatch()) &&
2644              "We should only have known counts for exiting blocks that "
2645              "dominate latch!");
2646       ExitCounts.push_back(ExitCount);
2647     }
2648   }
2649   if (ExitCounts.empty())
2650     return false;
2651   const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts);
2652 
2653   bool Changed = false;
2654   for (BasicBlock *ExitingBB : ExitingBlocks) {
2655     // If our exitting block exits multiple loops, we can only rewrite the
2656     // innermost one.  Otherwise, we're changing how many times the innermost
2657     // loop runs before it exits.
2658     if (LI->getLoopFor(ExitingBB) != L)
2659       continue;
2660 
2661     // Can't rewrite non-branch yet.
2662     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2663     if (!BI)
2664       continue;
2665 
2666     // If already constant, nothing to do.
2667     if (isa<Constant>(BI->getCondition()))
2668       continue;
2669 
2670     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2671     if (isa<SCEVCouldNotCompute>(ExitCount))
2672       continue;
2673 
2674     // If we know we'd exit on the first iteration, rewrite the exit to
2675     // reflect this.  This does not imply the loop must exit through this
2676     // exit; there may be an earlier one taken on the first iteration.
2677     // TODO: Given we know the backedge can't be taken, we should go ahead
2678     // and break it.  Or at least, kill all the header phis and simplify.
2679     if (ExitCount->isZero()) {
2680       bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2681       auto *OldCond = BI->getCondition();
2682       auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) :
2683         ConstantInt::getFalse(OldCond->getType());
2684       BI->setCondition(NewCond);
2685       if (OldCond->use_empty())
2686         DeadInsts.push_back(OldCond);
2687       Changed = true;
2688       continue;
2689     }
2690 
2691     // If we end up with a pointer exit count, bail.
2692     if (!ExitCount->getType()->isIntegerTy() ||
2693         !MaxExitCount->getType()->isIntegerTy())
2694       return false;
2695 
2696     Type *WiderType =
2697       SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2698     ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2699     MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2700     assert(MaxExitCount->getType() == ExitCount->getType());
2701 
2702     // Can we prove that some other exit must be taken strictly before this
2703     // one?  TODO: handle cases where ule is known, and equality is covered
2704     // by a dominating exit
2705     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2706                                      MaxExitCount, ExitCount)) {
2707       bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2708       auto *OldCond = BI->getCondition();
2709       auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) :
2710         ConstantInt::getTrue(OldCond->getType());
2711       BI->setCondition(NewCond);
2712       if (OldCond->use_empty())
2713         DeadInsts.push_back(OldCond);
2714       Changed = true;
2715       continue;
2716     }
2717 
2718     // TODO: If we can prove that the exiting iteration is equal to the exit
2719     // count for this exit and that no previous exit oppurtunities exist within
2720     // the loop, then we can discharge all other exits.  (May fall out of
2721     // previous TODO.)
2722 
2723     // TODO: If we can't prove any relation between our exit count and the
2724     // loops exit count, but taking this exit doesn't require actually running
2725     // the loop (i.e. no side effects, no computed values used in exit), then
2726     // we can replace the exit test with a loop invariant test which exits on
2727     // the first iteration.
2728   }
2729   return Changed;
2730 }
2731 
2732 //===----------------------------------------------------------------------===//
2733 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
2734 //===----------------------------------------------------------------------===//
2735 
2736 bool IndVarSimplify::run(Loop *L) {
2737   // We need (and expect!) the incoming loop to be in LCSSA.
2738   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2739          "LCSSA required to run indvars!");
2740   bool Changed = false;
2741 
2742   // If LoopSimplify form is not available, stay out of trouble. Some notes:
2743   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2744   //    canonicalization can be a pessimization without LSR to "clean up"
2745   //    afterwards.
2746   //  - We depend on having a preheader; in particular,
2747   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2748   //    and we're in trouble if we can't find the induction variable even when
2749   //    we've manually inserted one.
2750   //  - LFTR relies on having a single backedge.
2751   if (!L->isLoopSimplifyForm())
2752     return false;
2753 
2754   // If there are any floating-point recurrences, attempt to
2755   // transform them to use integer recurrences.
2756   Changed |= rewriteNonIntegerIVs(L);
2757 
2758   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2759 
2760   // Create a rewriter object which we'll use to transform the code with.
2761   SCEVExpander Rewriter(*SE, DL, "indvars");
2762 #ifndef NDEBUG
2763   Rewriter.setDebugType(DEBUG_TYPE);
2764 #endif
2765 
2766   // Eliminate redundant IV users.
2767   //
2768   // Simplification works best when run before other consumers of SCEV. We
2769   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2770   // other expressions involving loop IVs have been evaluated. This helps SCEV
2771   // set no-wrap flags before normalizing sign/zero extension.
2772   Rewriter.disableCanonicalMode();
2773   Changed |= simplifyAndExtend(L, Rewriter, LI);
2774 
2775   // Check to see if this loop has a computable loop-invariant execution count.
2776   // If so, this means that we can compute the final value of any expressions
2777   // that are recurrent in the loop, and substitute the exit values from the
2778   // loop into any instructions outside of the loop that use the final values of
2779   // the current expressions.
2780   //
2781   if (ReplaceExitValue != NeverRepl &&
2782       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
2783     Changed |= rewriteLoopExitValues(L, Rewriter);
2784 
2785   // Eliminate redundant IV cycles.
2786   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
2787 
2788   Changed |= optimizeLoopExits(L);
2789 
2790   // If we have a trip count expression, rewrite the loop's exit condition
2791   // using it.
2792   if (!DisableLFTR) {
2793     SmallVector<BasicBlock*, 16> ExitingBlocks;
2794     L->getExitingBlocks(ExitingBlocks);
2795     for (BasicBlock *ExitingBB : ExitingBlocks) {
2796       // Can't rewrite non-branch yet.
2797       if (!isa<BranchInst>(ExitingBB->getTerminator()))
2798         continue;
2799 
2800       // If our exitting block exits multiple loops, we can only rewrite the
2801       // innermost one.  Otherwise, we're changing how many times the innermost
2802       // loop runs before it exits.
2803       if (LI->getLoopFor(ExitingBB) != L)
2804         continue;
2805 
2806       if (!needsLFTR(L, ExitingBB))
2807         continue;
2808 
2809       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2810       if (isa<SCEVCouldNotCompute>(ExitCount))
2811         continue;
2812 
2813       // This was handled above, but as we form SCEVs, we can sometimes refine
2814       // existing ones; this allows exit counts to be folded to zero which
2815       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2816       // until stable to handle cases like this better.
2817       if (ExitCount->isZero())
2818         continue;
2819 
2820       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2821       if (!IndVar)
2822         continue;
2823 
2824       // Avoid high cost expansions.  Note: This heuristic is questionable in
2825       // that our definition of "high cost" is not exactly principled.
2826       if (Rewriter.isHighCostExpansion(ExitCount, L))
2827         continue;
2828 
2829       // Check preconditions for proper SCEVExpander operation. SCEV does not
2830       // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2831       // any pass that uses the SCEVExpander must do it. This does not work
2832       // well for loop passes because SCEVExpander makes assumptions about
2833       // all loops, while LoopPassManager only forces the current loop to be
2834       // simplified.
2835       //
2836       // FIXME: SCEV expansion has no way to bail out, so the caller must
2837       // explicitly check any assumptions made by SCEV. Brittle.
2838       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2839       if (!AR || AR->getLoop()->getLoopPreheader())
2840         Changed |= linearFunctionTestReplace(L, ExitingBB,
2841                                              ExitCount, IndVar,
2842                                              Rewriter);
2843     }
2844   }
2845   // Clear the rewriter cache, because values that are in the rewriter's cache
2846   // can be deleted in the loop below, causing the AssertingVH in the cache to
2847   // trigger.
2848   Rewriter.clear();
2849 
2850   // Now that we're done iterating through lists, clean up any instructions
2851   // which are now dead.
2852   while (!DeadInsts.empty())
2853     if (Instruction *Inst =
2854             dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
2855       Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
2856 
2857   // The Rewriter may not be used from this point on.
2858 
2859   // Loop-invariant instructions in the preheader that aren't used in the
2860   // loop may be sunk below the loop to reduce register pressure.
2861   Changed |= sinkUnusedInvariants(L);
2862 
2863   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2864   // trip count and therefore can further simplify exit values in addition to
2865   // rewriteLoopExitValues.
2866   Changed |= rewriteFirstIterationLoopExitValues(L);
2867 
2868   // Clean up dead instructions.
2869   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
2870 
2871   // Check a post-condition.
2872   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2873          "Indvars did not preserve LCSSA!");
2874 
2875   // Verify that LFTR, and any other change have not interfered with SCEV's
2876   // ability to compute trip count.
2877 #ifndef NDEBUG
2878   if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2879     SE->forgetLoop(L);
2880     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2881     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2882         SE->getTypeSizeInBits(NewBECount->getType()))
2883       NewBECount = SE->getTruncateOrNoop(NewBECount,
2884                                          BackedgeTakenCount->getType());
2885     else
2886       BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2887                                                  NewBECount->getType());
2888     assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV");
2889   }
2890 #endif
2891 
2892   return Changed;
2893 }
2894 
2895 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2896                                           LoopStandardAnalysisResults &AR,
2897                                           LPMUpdater &) {
2898   Function *F = L.getHeader()->getParent();
2899   const DataLayout &DL = F->getParent()->getDataLayout();
2900 
2901   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
2902   if (!IVS.run(&L))
2903     return PreservedAnalyses::all();
2904 
2905   auto PA = getLoopPassPreservedAnalyses();
2906   PA.preserveSet<CFGAnalyses>();
2907   return PA;
2908 }
2909 
2910 namespace {
2911 
2912 struct IndVarSimplifyLegacyPass : public LoopPass {
2913   static char ID; // Pass identification, replacement for typeid
2914 
2915   IndVarSimplifyLegacyPass() : LoopPass(ID) {
2916     initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2917   }
2918 
2919   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2920     if (skipLoop(L))
2921       return false;
2922 
2923     auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2924     auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2925     auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2926     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2927     auto *TLI = TLIP ? &TLIP->getTLI() : nullptr;
2928     auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2929     auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2930     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2931 
2932     IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
2933     return IVS.run(L);
2934   }
2935 
2936   void getAnalysisUsage(AnalysisUsage &AU) const override {
2937     AU.setPreservesCFG();
2938     getLoopAnalysisUsage(AU);
2939   }
2940 };
2941 
2942 } // end anonymous namespace
2943 
2944 char IndVarSimplifyLegacyPass::ID = 0;
2945 
2946 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2947                       "Induction Variable Simplification", false, false)
2948 INITIALIZE_PASS_DEPENDENCY(LoopPass)
2949 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2950                     "Induction Variable Simplification", false, false)
2951 
2952 Pass *llvm::createIndVarSimplifyPass() {
2953   return new IndVarSimplifyLegacyPass();
2954 }
2955