xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This transformation analyzes and transforms the induction variables (and
10 // computations derived from them) into simpler forms suitable for subsequent
11 // analysis and transformation.
12 //
13 // If the trip count of a loop is computable, this pass also makes the following
14 // changes:
15 //   1. The exit condition for the loop is canonicalized to compare the
16 //      induction value against the exit value.  This turns loops like:
17 //        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18 //   2. Any use outside of the loop of an expression derived from the indvar
19 //      is changed to compute the derived value outside of the loop, eliminating
20 //      the dependence on the exit value of the induction variable.  If the only
21 //      purpose of the loop is to compute the exit value of some derived
22 //      expression, this transformation will make the loop dead.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/iterator_range.h"
35 #include "llvm/Analysis/LoopInfo.h"
36 #include "llvm/Analysis/LoopPass.h"
37 #include "llvm/Analysis/MemorySSA.h"
38 #include "llvm/Analysis/MemorySSAUpdater.h"
39 #include "llvm/Analysis/ScalarEvolution.h"
40 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
41 #include "llvm/Analysis/TargetLibraryInfo.h"
42 #include "llvm/Analysis/TargetTransformInfo.h"
43 #include "llvm/Analysis/ValueTracking.h"
44 #include "llvm/IR/BasicBlock.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/ConstantRange.h"
47 #include "llvm/IR/Constants.h"
48 #include "llvm/IR/DataLayout.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Dominators.h"
51 #include "llvm/IR/Function.h"
52 #include "llvm/IR/IRBuilder.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instruction.h"
55 #include "llvm/IR/Instructions.h"
56 #include "llvm/IR/IntrinsicInst.h"
57 #include "llvm/IR/Intrinsics.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/PassManager.h"
61 #include "llvm/IR/PatternMatch.h"
62 #include "llvm/IR/Type.h"
63 #include "llvm/IR/Use.h"
64 #include "llvm/IR/User.h"
65 #include "llvm/IR/Value.h"
66 #include "llvm/IR/ValueHandle.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Compiler.h"
70 #include "llvm/Support/Debug.h"
71 #include "llvm/Support/MathExtras.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
74 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
75 #include "llvm/Transforms/Utils/Local.h"
76 #include "llvm/Transforms/Utils/LoopUtils.h"
77 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
78 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
79 #include <cassert>
80 #include <cstdint>
81 #include <utility>
82 
83 using namespace llvm;
84 using namespace PatternMatch;
85 
86 #define DEBUG_TYPE "indvars"
87 
88 STATISTIC(NumWidened     , "Number of indvars widened");
89 STATISTIC(NumReplaced    , "Number of exit values replaced");
90 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
91 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
92 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
93 
94 static cl::opt<ReplaceExitVal> ReplaceExitValue(
95     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
96     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
97     cl::values(
98         clEnumValN(NeverRepl, "never", "never replace exit value"),
99         clEnumValN(OnlyCheapRepl, "cheap",
100                    "only replace exit value when the cost is cheap"),
101         clEnumValN(
102             UnusedIndVarInLoop, "unusedindvarinloop",
103             "only replace exit value when it is an unused "
104             "induction variable in the loop and has cheap replacement cost"),
105         clEnumValN(NoHardUse, "noharduse",
106                    "only replace exit values when loop def likely dead"),
107         clEnumValN(AlwaysRepl, "always",
108                    "always replace exit value whenever possible")));
109 
110 static cl::opt<bool> UsePostIncrementRanges(
111   "indvars-post-increment-ranges", cl::Hidden,
112   cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
113   cl::init(true));
114 
115 static cl::opt<bool>
116 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
117             cl::desc("Disable Linear Function Test Replace optimization"));
118 
119 static cl::opt<bool>
120 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
121                 cl::desc("Predicate conditions in read only loops"));
122 
123 static cl::opt<bool>
124 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
125                 cl::desc("Allow widening of indvars to eliminate s/zext"));
126 
127 namespace {
128 
129 class IndVarSimplify {
130   LoopInfo *LI;
131   ScalarEvolution *SE;
132   DominatorTree *DT;
133   const DataLayout &DL;
134   TargetLibraryInfo *TLI;
135   const TargetTransformInfo *TTI;
136   std::unique_ptr<MemorySSAUpdater> MSSAU;
137 
138   SmallVector<WeakTrackingVH, 16> DeadInsts;
139   bool WidenIndVars;
140 
141   bool RunUnswitching = false;
142 
143   bool handleFloatingPointIV(Loop *L, PHINode *PH);
144   bool rewriteNonIntegerIVs(Loop *L);
145 
146   bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
147   /// Try to improve our exit conditions by converting condition from signed
148   /// to unsigned or rotating computation out of the loop.
149   /// (See inline comment about why this is duplicated from simplifyAndExtend)
150   bool canonicalizeExitCondition(Loop *L);
151   /// Try to eliminate loop exits based on analyzeable exit counts
152   bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
153   /// Try to form loop invariant tests for loop exits by changing how many
154   /// iterations of the loop run when that is unobservable.
155   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
156 
157   bool rewriteFirstIterationLoopExitValues(Loop *L);
158 
159   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
160                                  const SCEV *ExitCount,
161                                  PHINode *IndVar, SCEVExpander &Rewriter);
162 
163   bool sinkUnusedInvariants(Loop *L);
164 
165 public:
IndVarSimplify(LoopInfo * LI,ScalarEvolution * SE,DominatorTree * DT,const DataLayout & DL,TargetLibraryInfo * TLI,TargetTransformInfo * TTI,MemorySSA * MSSA,bool WidenIndVars)166   IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167                  const DataLayout &DL, TargetLibraryInfo *TLI,
168                  TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
169       : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
170         WidenIndVars(WidenIndVars) {
171     if (MSSA)
172       MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
173   }
174 
175   bool run(Loop *L);
176 
runUnswitching() const177   bool runUnswitching() const { return RunUnswitching; }
178 };
179 
180 } // end anonymous namespace
181 
182 //===----------------------------------------------------------------------===//
183 // rewriteNonIntegerIVs and helpers. Prefer integer IVs.
184 //===----------------------------------------------------------------------===//
185 
186 /// Convert APF to an integer, if possible.
ConvertToSInt(const APFloat & APF,int64_t & IntVal)187 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
188   bool isExact = false;
189   // See if we can convert this to an int64_t
190   uint64_t UIntVal;
191   if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
192                            APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
193       !isExact)
194     return false;
195   IntVal = UIntVal;
196   return true;
197 }
198 
199 /// If the loop has floating induction variable then insert corresponding
200 /// integer induction variable if possible.
201 /// For example,
202 /// for(double i = 0; i < 10000; ++i)
203 ///   bar(i)
204 /// is converted into
205 /// for(int i = 0; i < 10000; ++i)
206 ///   bar((double)i);
handleFloatingPointIV(Loop * L,PHINode * PN)207 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
208   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
209   unsigned BackEdge     = IncomingEdge^1;
210 
211   // Check incoming value.
212   auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
213 
214   int64_t InitValue;
215   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
216     return false;
217 
218   // Check IV increment. Reject this PN if increment operation is not
219   // an add or increment value can not be represented by an integer.
220   auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
221   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
222 
223   // If this is not an add of the PHI with a constantfp, or if the constant fp
224   // is not an integer, bail out.
225   ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
226   int64_t IncValue;
227   if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
228       !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
229     return false;
230 
231   // Check Incr uses. One user is PN and the other user is an exit condition
232   // used by the conditional terminator.
233   Value::user_iterator IncrUse = Incr->user_begin();
234   Instruction *U1 = cast<Instruction>(*IncrUse++);
235   if (IncrUse == Incr->user_end()) return false;
236   Instruction *U2 = cast<Instruction>(*IncrUse++);
237   if (IncrUse != Incr->user_end()) return false;
238 
239   // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
240   // only used by a branch, we can't transform it.
241   FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
242   if (!Compare)
243     Compare = dyn_cast<FCmpInst>(U2);
244   if (!Compare || !Compare->hasOneUse() ||
245       !isa<BranchInst>(Compare->user_back()))
246     return false;
247 
248   BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
249 
250   // We need to verify that the branch actually controls the iteration count
251   // of the loop.  If not, the new IV can overflow and no one will notice.
252   // The branch block must be in the loop and one of the successors must be out
253   // of the loop.
254   assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
255   if (!L->contains(TheBr->getParent()) ||
256       (L->contains(TheBr->getSuccessor(0)) &&
257        L->contains(TheBr->getSuccessor(1))))
258     return false;
259 
260   // If it isn't a comparison with an integer-as-fp (the exit value), we can't
261   // transform it.
262   ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
263   int64_t ExitValue;
264   if (ExitValueVal == nullptr ||
265       !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
266     return false;
267 
268   // Find new predicate for integer comparison.
269   CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
270   switch (Compare->getPredicate()) {
271   default: return false;  // Unknown comparison.
272   case CmpInst::FCMP_OEQ:
273   case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
274   case CmpInst::FCMP_ONE:
275   case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
276   case CmpInst::FCMP_OGT:
277   case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
278   case CmpInst::FCMP_OGE:
279   case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
280   case CmpInst::FCMP_OLT:
281   case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
282   case CmpInst::FCMP_OLE:
283   case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
284   }
285 
286   // We convert the floating point induction variable to a signed i32 value if
287   // we can.  This is only safe if the comparison will not overflow in a way
288   // that won't be trapped by the integer equivalent operations.  Check for this
289   // now.
290   // TODO: We could use i64 if it is native and the range requires it.
291 
292   // The start/stride/exit values must all fit in signed i32.
293   if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
294     return false;
295 
296   // If not actually striding (add x, 0.0), avoid touching the code.
297   if (IncValue == 0)
298     return false;
299 
300   // Positive and negative strides have different safety conditions.
301   if (IncValue > 0) {
302     // If we have a positive stride, we require the init to be less than the
303     // exit value.
304     if (InitValue >= ExitValue)
305       return false;
306 
307     uint32_t Range = uint32_t(ExitValue-InitValue);
308     // Check for infinite loop, either:
309     // while (i <= Exit) or until (i > Exit)
310     if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
311       if (++Range == 0) return false;  // Range overflows.
312     }
313 
314     unsigned Leftover = Range % uint32_t(IncValue);
315 
316     // If this is an equality comparison, we require that the strided value
317     // exactly land on the exit value, otherwise the IV condition will wrap
318     // around and do things the fp IV wouldn't.
319     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
320         Leftover != 0)
321       return false;
322 
323     // If the stride would wrap around the i32 before exiting, we can't
324     // transform the IV.
325     if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
326       return false;
327   } else {
328     // If we have a negative stride, we require the init to be greater than the
329     // exit value.
330     if (InitValue <= ExitValue)
331       return false;
332 
333     uint32_t Range = uint32_t(InitValue-ExitValue);
334     // Check for infinite loop, either:
335     // while (i >= Exit) or until (i < Exit)
336     if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
337       if (++Range == 0) return false;  // Range overflows.
338     }
339 
340     unsigned Leftover = Range % uint32_t(-IncValue);
341 
342     // If this is an equality comparison, we require that the strided value
343     // exactly land on the exit value, otherwise the IV condition will wrap
344     // around and do things the fp IV wouldn't.
345     if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
346         Leftover != 0)
347       return false;
348 
349     // If the stride would wrap around the i32 before exiting, we can't
350     // transform the IV.
351     if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
352       return false;
353   }
354 
355   IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
356 
357   // Insert new integer induction variable.
358   PHINode *NewPHI =
359       PHINode::Create(Int32Ty, 2, PN->getName() + ".int", PN->getIterator());
360   NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
361                       PN->getIncomingBlock(IncomingEdge));
362   NewPHI->setDebugLoc(PN->getDebugLoc());
363 
364   Instruction *NewAdd =
365       BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
366                                 Incr->getName() + ".int", Incr->getIterator());
367   NewAdd->setDebugLoc(Incr->getDebugLoc());
368   NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
369 
370   ICmpInst *NewCompare =
371       new ICmpInst(TheBr->getIterator(), NewPred, NewAdd,
372                    ConstantInt::get(Int32Ty, ExitValue), Compare->getName());
373   NewCompare->setDebugLoc(Compare->getDebugLoc());
374 
375   // In the following deletions, PN may become dead and may be deleted.
376   // Use a WeakTrackingVH to observe whether this happens.
377   WeakTrackingVH WeakPH = PN;
378 
379   // Delete the old floating point exit comparison.  The branch starts using the
380   // new comparison.
381   NewCompare->takeName(Compare);
382   Compare->replaceAllUsesWith(NewCompare);
383   RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
384 
385   // Delete the old floating point increment.
386   Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
387   RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
388 
389   // If the FP induction variable still has uses, this is because something else
390   // in the loop uses its value.  In order to canonicalize the induction
391   // variable, we chose to eliminate the IV and rewrite it in terms of an
392   // int->fp cast.
393   //
394   // We give preference to sitofp over uitofp because it is faster on most
395   // platforms.
396   if (WeakPH) {
397     Instruction *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
398                                        PN->getParent()->getFirstInsertionPt());
399     Conv->setDebugLoc(PN->getDebugLoc());
400     PN->replaceAllUsesWith(Conv);
401     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
402   }
403   return true;
404 }
405 
rewriteNonIntegerIVs(Loop * L)406 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
407   // First step.  Check to see if there are any floating-point recurrences.
408   // If there are, change them into integer recurrences, permitting analysis by
409   // the SCEV routines.
410   BasicBlock *Header = L->getHeader();
411 
412   SmallVector<WeakTrackingVH, 8> PHIs;
413   for (PHINode &PN : Header->phis())
414     PHIs.push_back(&PN);
415 
416   bool Changed = false;
417   for (WeakTrackingVH &PHI : PHIs)
418     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
419       Changed |= handleFloatingPointIV(L, PN);
420 
421   // If the loop previously had floating-point IV, ScalarEvolution
422   // may not have been able to compute a trip count. Now that we've done some
423   // re-writing, the trip count may be computable.
424   if (Changed)
425     SE->forgetLoop(L);
426   return Changed;
427 }
428 
429 //===---------------------------------------------------------------------===//
430 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
431 // they will exit at the first iteration.
432 //===---------------------------------------------------------------------===//
433 
434 /// Check to see if this loop has loop invariant conditions which lead to loop
435 /// exits. If so, we know that if the exit path is taken, it is at the first
436 /// loop iteration. This lets us predict exit values of PHI nodes that live in
437 /// loop header.
rewriteFirstIterationLoopExitValues(Loop * L)438 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
439   // Verify the input to the pass is already in LCSSA form.
440   assert(L->isLCSSAForm(*DT));
441 
442   SmallVector<BasicBlock *, 8> ExitBlocks;
443   L->getUniqueExitBlocks(ExitBlocks);
444 
445   bool MadeAnyChanges = false;
446   for (auto *ExitBB : ExitBlocks) {
447     // If there are no more PHI nodes in this exit block, then no more
448     // values defined inside the loop are used on this path.
449     for (PHINode &PN : ExitBB->phis()) {
450       for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
451            IncomingValIdx != E; ++IncomingValIdx) {
452         auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
453 
454         // Can we prove that the exit must run on the first iteration if it
455         // runs at all?  (i.e. early exits are fine for our purposes, but
456         // traces which lead to this exit being taken on the 2nd iteration
457         // aren't.)  Note that this is about whether the exit branch is
458         // executed, not about whether it is taken.
459         if (!L->getLoopLatch() ||
460             !DT->dominates(IncomingBB, L->getLoopLatch()))
461           continue;
462 
463         // Get condition that leads to the exit path.
464         auto *TermInst = IncomingBB->getTerminator();
465 
466         Value *Cond = nullptr;
467         if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
468           // Must be a conditional branch, otherwise the block
469           // should not be in the loop.
470           Cond = BI->getCondition();
471         } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
472           Cond = SI->getCondition();
473         else
474           continue;
475 
476         if (!L->isLoopInvariant(Cond))
477           continue;
478 
479         auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
480 
481         // Only deal with PHIs in the loop header.
482         if (!ExitVal || ExitVal->getParent() != L->getHeader())
483           continue;
484 
485         // If ExitVal is a PHI on the loop header, then we know its
486         // value along this exit because the exit can only be taken
487         // on the first iteration.
488         auto *LoopPreheader = L->getLoopPreheader();
489         assert(LoopPreheader && "Invalid loop");
490         int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
491         if (PreheaderIdx != -1) {
492           assert(ExitVal->getParent() == L->getHeader() &&
493                  "ExitVal must be in loop header");
494           MadeAnyChanges = true;
495           PN.setIncomingValue(IncomingValIdx,
496                               ExitVal->getIncomingValue(PreheaderIdx));
497           SE->forgetValue(&PN);
498         }
499       }
500     }
501   }
502   return MadeAnyChanges;
503 }
504 
505 //===----------------------------------------------------------------------===//
506 //  IV Widening - Extend the width of an IV to cover its widest uses.
507 //===----------------------------------------------------------------------===//
508 
509 /// Update information about the induction variable that is extended by this
510 /// sign or zero extend operation. This is used to determine the final width of
511 /// the IV before actually widening it.
visitIVCast(CastInst * Cast,WideIVInfo & WI,ScalarEvolution * SE,const TargetTransformInfo * TTI)512 static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
513                         ScalarEvolution *SE,
514                         const TargetTransformInfo *TTI) {
515   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
516   if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
517     return;
518 
519   Type *Ty = Cast->getType();
520   uint64_t Width = SE->getTypeSizeInBits(Ty);
521   if (!Cast->getDataLayout().isLegalInteger(Width))
522     return;
523 
524   // Check that `Cast` actually extends the induction variable (we rely on this
525   // later).  This takes care of cases where `Cast` is extending a truncation of
526   // the narrow induction variable, and thus can end up being narrower than the
527   // "narrow" induction variable.
528   uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
529   if (NarrowIVWidth >= Width)
530     return;
531 
532   // Cast is either an sext or zext up to this point.
533   // We should not widen an indvar if arithmetics on the wider indvar are more
534   // expensive than those on the narrower indvar. We check only the cost of ADD
535   // because at least an ADD is required to increment the induction variable. We
536   // could compute more comprehensively the cost of all instructions on the
537   // induction variable when necessary.
538   if (TTI &&
539       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
540           TTI->getArithmeticInstrCost(Instruction::Add,
541                                       Cast->getOperand(0)->getType())) {
542     return;
543   }
544 
545   if (!WI.WidestNativeType ||
546       Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
547     WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
548     WI.IsSigned = IsSigned;
549     return;
550   }
551 
552   // We extend the IV to satisfy the sign of its user(s), or 'signed'
553   // if there are multiple users with both sign- and zero extensions,
554   // in order not to introduce nondeterministic behaviour based on the
555   // unspecified order of a PHI nodes' users-iterator.
556   WI.IsSigned |= IsSigned;
557 }
558 
559 //===----------------------------------------------------------------------===//
560 //  Live IV Reduction - Minimize IVs live across the loop.
561 //===----------------------------------------------------------------------===//
562 
563 //===----------------------------------------------------------------------===//
564 //  Simplification of IV users based on SCEV evaluation.
565 //===----------------------------------------------------------------------===//
566 
567 namespace {
568 
569 class IndVarSimplifyVisitor : public IVVisitor {
570   ScalarEvolution *SE;
571   const TargetTransformInfo *TTI;
572   PHINode *IVPhi;
573 
574 public:
575   WideIVInfo WI;
576 
IndVarSimplifyVisitor(PHINode * IV,ScalarEvolution * SCEV,const TargetTransformInfo * TTI,const DominatorTree * DTree)577   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
578                         const TargetTransformInfo *TTI,
579                         const DominatorTree *DTree)
580     : SE(SCEV), TTI(TTI), IVPhi(IV) {
581     DT = DTree;
582     WI.NarrowIV = IVPhi;
583   }
584 
585   // Implement the interface used by simplifyUsersOfIV.
visitCast(CastInst * Cast)586   void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
587 };
588 
589 } // end anonymous namespace
590 
591 /// Iteratively perform simplification on a worklist of IV users. Each
592 /// successive simplification may push more users which may themselves be
593 /// candidates for simplification.
594 ///
595 /// Sign/Zero extend elimination is interleaved with IV simplification.
simplifyAndExtend(Loop * L,SCEVExpander & Rewriter,LoopInfo * LI)596 bool IndVarSimplify::simplifyAndExtend(Loop *L,
597                                        SCEVExpander &Rewriter,
598                                        LoopInfo *LI) {
599   SmallVector<WideIVInfo, 8> WideIVs;
600 
601   auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
602           Intrinsic::getName(Intrinsic::experimental_guard));
603   bool HasGuards = GuardDecl && !GuardDecl->use_empty();
604 
605   SmallVector<PHINode *, 8> LoopPhis;
606   for (PHINode &PN : L->getHeader()->phis())
607     LoopPhis.push_back(&PN);
608 
609   // Each round of simplification iterates through the SimplifyIVUsers worklist
610   // for all current phis, then determines whether any IVs can be
611   // widened. Widening adds new phis to LoopPhis, inducing another round of
612   // simplification on the wide IVs.
613   bool Changed = false;
614   while (!LoopPhis.empty()) {
615     // Evaluate as many IV expressions as possible before widening any IVs. This
616     // forces SCEV to set no-wrap flags before evaluating sign/zero
617     // extension. The first time SCEV attempts to normalize sign/zero extension,
618     // the result becomes final. So for the most predictable results, we delay
619     // evaluation of sign/zero extend evaluation until needed, and avoid running
620     // other SCEV based analysis prior to simplifyAndExtend.
621     do {
622       PHINode *CurrIV = LoopPhis.pop_back_val();
623 
624       // Information about sign/zero extensions of CurrIV.
625       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
626 
627       const auto &[C, U] = simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts,
628                                              Rewriter, &Visitor);
629 
630       Changed |= C;
631       RunUnswitching |= U;
632       if (Visitor.WI.WidestNativeType) {
633         WideIVs.push_back(Visitor.WI);
634       }
635     } while(!LoopPhis.empty());
636 
637     // Continue if we disallowed widening.
638     if (!WidenIndVars)
639       continue;
640 
641     for (; !WideIVs.empty(); WideIVs.pop_back()) {
642       unsigned ElimExt;
643       unsigned Widened;
644       if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
645                                           DT, DeadInsts, ElimExt, Widened,
646                                           HasGuards, UsePostIncrementRanges)) {
647         NumElimExt += ElimExt;
648         NumWidened += Widened;
649         Changed = true;
650         LoopPhis.push_back(WidePhi);
651       }
652     }
653   }
654   return Changed;
655 }
656 
657 //===----------------------------------------------------------------------===//
658 //  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
659 //===----------------------------------------------------------------------===//
660 
661 /// Given an Value which is hoped to be part of an add recurance in the given
662 /// loop, return the associated Phi node if so.  Otherwise, return null.  Note
663 /// that this is less general than SCEVs AddRec checking.
getLoopPhiForCounter(Value * IncV,Loop * L)664 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
665   Instruction *IncI = dyn_cast<Instruction>(IncV);
666   if (!IncI)
667     return nullptr;
668 
669   switch (IncI->getOpcode()) {
670   case Instruction::Add:
671   case Instruction::Sub:
672     break;
673   case Instruction::GetElementPtr:
674     // An IV counter must preserve its type.
675     if (IncI->getNumOperands() == 2)
676       break;
677     [[fallthrough]];
678   default:
679     return nullptr;
680   }
681 
682   PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
683   if (Phi && Phi->getParent() == L->getHeader()) {
684     if (L->isLoopInvariant(IncI->getOperand(1)))
685       return Phi;
686     return nullptr;
687   }
688   if (IncI->getOpcode() == Instruction::GetElementPtr)
689     return nullptr;
690 
691   // Allow add/sub to be commuted.
692   Phi = dyn_cast<PHINode>(IncI->getOperand(1));
693   if (Phi && Phi->getParent() == L->getHeader()) {
694     if (L->isLoopInvariant(IncI->getOperand(0)))
695       return Phi;
696   }
697   return nullptr;
698 }
699 
700 /// Whether the current loop exit test is based on this value.  Currently this
701 /// is limited to a direct use in the loop condition.
isLoopExitTestBasedOn(Value * V,BasicBlock * ExitingBB)702 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
703   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
704   ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
705   // TODO: Allow non-icmp loop test.
706   if (!ICmp)
707     return false;
708 
709   // TODO: Allow indirect use.
710   return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
711 }
712 
713 /// linearFunctionTestReplace policy. Return true unless we can show that the
714 /// current exit test is already sufficiently canonical.
needsLFTR(Loop * L,BasicBlock * ExitingBB)715 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
716   assert(L->getLoopLatch() && "Must be in simplified form");
717 
718   // Avoid converting a constant or loop invariant test back to a runtime
719   // test.  This is critical for when SCEV's cached ExitCount is less precise
720   // than the current IR (such as after we've proven a particular exit is
721   // actually dead and thus the BE count never reaches our ExitCount.)
722   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
723   if (L->isLoopInvariant(BI->getCondition()))
724     return false;
725 
726   // Do LFTR to simplify the exit condition to an ICMP.
727   ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
728   if (!Cond)
729     return true;
730 
731   // Do LFTR to simplify the exit ICMP to EQ/NE
732   ICmpInst::Predicate Pred = Cond->getPredicate();
733   if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
734     return true;
735 
736   // Look for a loop invariant RHS
737   Value *LHS = Cond->getOperand(0);
738   Value *RHS = Cond->getOperand(1);
739   if (!L->isLoopInvariant(RHS)) {
740     if (!L->isLoopInvariant(LHS))
741       return true;
742     std::swap(LHS, RHS);
743   }
744   // Look for a simple IV counter LHS
745   PHINode *Phi = dyn_cast<PHINode>(LHS);
746   if (!Phi)
747     Phi = getLoopPhiForCounter(LHS, L);
748 
749   if (!Phi)
750     return true;
751 
752   // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
753   int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
754   if (Idx < 0)
755     return true;
756 
757   // Do LFTR if the exit condition's IV is *not* a simple counter.
758   Value *IncV = Phi->getIncomingValue(Idx);
759   return Phi != getLoopPhiForCounter(IncV, L);
760 }
761 
762 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
763 /// down to checking that all operands are constant and listing instructions
764 /// that may hide undef.
hasConcreteDefImpl(Value * V,SmallPtrSetImpl<Value * > & Visited,unsigned Depth)765 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
766                                unsigned Depth) {
767   if (isa<Constant>(V))
768     return !isa<UndefValue>(V);
769 
770   if (Depth >= 6)
771     return false;
772 
773   // Conservatively handle non-constant non-instructions. For example, Arguments
774   // may be undef.
775   Instruction *I = dyn_cast<Instruction>(V);
776   if (!I)
777     return false;
778 
779   // Load and return values may be undef.
780   if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
781     return false;
782 
783   // Optimistically handle other instructions.
784   for (Value *Op : I->operands()) {
785     if (!Visited.insert(Op).second)
786       continue;
787     if (!hasConcreteDefImpl(Op, Visited, Depth+1))
788       return false;
789   }
790   return true;
791 }
792 
793 /// Return true if the given value is concrete. We must prove that undef can
794 /// never reach it.
795 ///
796 /// TODO: If we decide that this is a good approach to checking for undef, we
797 /// may factor it into a common location.
hasConcreteDef(Value * V)798 static bool hasConcreteDef(Value *V) {
799   SmallPtrSet<Value*, 8> Visited;
800   Visited.insert(V);
801   return hasConcreteDefImpl(V, Visited, 0);
802 }
803 
804 /// Return true if the given phi is a "counter" in L.  A counter is an
805 /// add recurance (of integer or pointer type) with an arbitrary start, and a
806 /// step of 1.  Note that L must have exactly one latch.
isLoopCounter(PHINode * Phi,Loop * L,ScalarEvolution * SE)807 static bool isLoopCounter(PHINode* Phi, Loop *L,
808                           ScalarEvolution *SE) {
809   assert(Phi->getParent() == L->getHeader());
810   assert(L->getLoopLatch());
811 
812   if (!SE->isSCEVable(Phi->getType()))
813     return false;
814 
815   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
816   if (!AR || AR->getLoop() != L || !AR->isAffine())
817     return false;
818 
819   const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
820   if (!Step || !Step->isOne())
821     return false;
822 
823   int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
824   Value *IncV = Phi->getIncomingValue(LatchIdx);
825   return (getLoopPhiForCounter(IncV, L) == Phi &&
826           isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
827 }
828 
829 /// Search the loop header for a loop counter (anadd rec w/step of one)
830 /// suitable for use by LFTR.  If multiple counters are available, select the
831 /// "best" one based profitable heuristics.
832 ///
833 /// BECount may be an i8* pointer type. The pointer difference is already
834 /// valid count without scaling the address stride, so it remains a pointer
835 /// expression as far as SCEV is concerned.
FindLoopCounter(Loop * L,BasicBlock * ExitingBB,const SCEV * BECount,ScalarEvolution * SE,DominatorTree * DT)836 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
837                                 const SCEV *BECount,
838                                 ScalarEvolution *SE, DominatorTree *DT) {
839   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
840 
841   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
842 
843   // Loop over all of the PHI nodes, looking for a simple counter.
844   PHINode *BestPhi = nullptr;
845   const SCEV *BestInit = nullptr;
846   BasicBlock *LatchBlock = L->getLoopLatch();
847   assert(LatchBlock && "Must be in simplified form");
848   const DataLayout &DL = L->getHeader()->getDataLayout();
849 
850   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
851     PHINode *Phi = cast<PHINode>(I);
852     if (!isLoopCounter(Phi, L, SE))
853       continue;
854 
855     const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
856 
857     // AR may be a pointer type, while BECount is an integer type.
858     // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
859     // AR may not be a narrower type, or we may never exit.
860     uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
861     if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
862       continue;
863 
864     // Avoid reusing a potentially undef value to compute other values that may
865     // have originally had a concrete definition.
866     if (!hasConcreteDef(Phi)) {
867       // We explicitly allow unknown phis as long as they are already used by
868       // the loop exit test.  This is legal since performing LFTR could not
869       // increase the number of undef users.
870       Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
871       if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
872           !isLoopExitTestBasedOn(IncPhi, ExitingBB))
873         continue;
874     }
875 
876     // Avoid introducing undefined behavior due to poison which didn't exist in
877     // the original program.  (Annoyingly, the rules for poison and undef
878     // propagation are distinct, so this does NOT cover the undef case above.)
879     // We have to ensure that we don't introduce UB by introducing a use on an
880     // iteration where said IV produces poison.  Our strategy here differs for
881     // pointers and integer IVs.  For integers, we strip and reinfer as needed,
882     // see code in linearFunctionTestReplace.  For pointers, we restrict
883     // transforms as there is no good way to reinfer inbounds once lost.
884     if (!Phi->getType()->isIntegerTy() &&
885         !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
886       continue;
887 
888     const SCEV *Init = AR->getStart();
889 
890     if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
891       // Don't force a live loop counter if another IV can be used.
892       if (isAlmostDeadIV(Phi, LatchBlock, Cond))
893         continue;
894 
895       // Prefer to count-from-zero. This is a more "canonical" counter form. It
896       // also prefers integer to pointer IVs.
897       if (BestInit->isZero() != Init->isZero()) {
898         if (BestInit->isZero())
899           continue;
900       }
901       // If two IVs both count from zero or both count from nonzero then the
902       // narrower is likely a dead phi that has been widened. Use the wider phi
903       // to allow the other to be eliminated.
904       else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
905         continue;
906     }
907     BestPhi = Phi;
908     BestInit = Init;
909   }
910   return BestPhi;
911 }
912 
913 /// Insert an IR expression which computes the value held by the IV IndVar
914 /// (which must be an loop counter w/unit stride) after the backedge of loop L
915 /// is taken ExitCount times.
genLoopLimit(PHINode * IndVar,BasicBlock * ExitingBB,const SCEV * ExitCount,bool UsePostInc,Loop * L,SCEVExpander & Rewriter,ScalarEvolution * SE)916 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
917                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
918                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
919   assert(isLoopCounter(IndVar, L, SE));
920   assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
921   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
922   assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
923 
924   // For integer IVs, truncate the IV before computing the limit unless we
925   // know apriori that the limit must be a constant when evaluated in the
926   // bitwidth of the IV.  We prefer (potentially) keeping a truncate of the
927   // IV in the loop over a (potentially) expensive expansion of the widened
928   // exit count add(zext(add)) expression.
929   if (IndVar->getType()->isIntegerTy() &&
930       SE->getTypeSizeInBits(AR->getType()) >
931       SE->getTypeSizeInBits(ExitCount->getType())) {
932     const SCEV *IVInit = AR->getStart();
933     if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
934       AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
935   }
936 
937   const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
938   const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
939   assert(SE->isLoopInvariant(IVLimit, L) &&
940          "Computed iteration count is not loop invariant!");
941   return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
942                                 ExitingBB->getTerminator());
943 }
944 
945 /// This method rewrites the exit condition of the loop to be a canonical !=
946 /// comparison against the incremented loop induction variable.  This pass is
947 /// able to rewrite the exit tests of any loop where the SCEV analysis can
948 /// determine a loop-invariant trip count of the loop, which is actually a much
949 /// broader range than just linear tests.
950 bool IndVarSimplify::
linearFunctionTestReplace(Loop * L,BasicBlock * ExitingBB,const SCEV * ExitCount,PHINode * IndVar,SCEVExpander & Rewriter)951 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
952                           const SCEV *ExitCount,
953                           PHINode *IndVar, SCEVExpander &Rewriter) {
954   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
955   assert(isLoopCounter(IndVar, L, SE));
956   Instruction * const IncVar =
957     cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
958 
959   // Initialize CmpIndVar to the preincremented IV.
960   Value *CmpIndVar = IndVar;
961   bool UsePostInc = false;
962 
963   // If the exiting block is the same as the backedge block, we prefer to
964   // compare against the post-incremented value, otherwise we must compare
965   // against the preincremented value.
966   if (ExitingBB == L->getLoopLatch()) {
967     // For pointer IVs, we chose to not strip inbounds which requires us not
968     // to add a potentially UB introducing use.  We need to either a) show
969     // the loop test we're modifying is already in post-inc form, or b) show
970     // that adding a use must not introduce UB.
971     bool SafeToPostInc =
972         IndVar->getType()->isIntegerTy() ||
973         isLoopExitTestBasedOn(IncVar, ExitingBB) ||
974         mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
975     if (SafeToPostInc) {
976       UsePostInc = true;
977       CmpIndVar = IncVar;
978     }
979   }
980 
981   // It may be necessary to drop nowrap flags on the incrementing instruction
982   // if either LFTR moves from a pre-inc check to a post-inc check (in which
983   // case the increment might have previously been poison on the last iteration
984   // only) or if LFTR switches to a different IV that was previously dynamically
985   // dead (and as such may be arbitrarily poison). We remove any nowrap flags
986   // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
987   // check), because the pre-inc addrec flags may be adopted from the original
988   // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
989   // TODO: This handling is inaccurate for one case: If we switch to a
990   // dynamically dead IV that wraps on the first loop iteration only, which is
991   // not covered by the post-inc addrec. (If the new IV was not dynamically
992   // dead, it could not be poison on the first iteration in the first place.)
993   if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
994     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
995     if (BO->hasNoUnsignedWrap())
996       BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
997     if (BO->hasNoSignedWrap())
998       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
999   }
1000 
1001   Value *ExitCnt = genLoopLimit(
1002       IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1003   assert(ExitCnt->getType()->isPointerTy() ==
1004              IndVar->getType()->isPointerTy() &&
1005          "genLoopLimit missed a cast");
1006 
1007   // Insert a new icmp_ne or icmp_eq instruction before the branch.
1008   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1009   ICmpInst::Predicate P;
1010   if (L->contains(BI->getSuccessor(0)))
1011     P = ICmpInst::ICMP_NE;
1012   else
1013     P = ICmpInst::ICMP_EQ;
1014 
1015   IRBuilder<> Builder(BI);
1016 
1017   // The new loop exit condition should reuse the debug location of the
1018   // original loop exit condition.
1019   if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1020     Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1021 
1022   // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1023   // avoid the expensive expansion of the limit expression in the wider type,
1024   // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1025   // since we know (from the exit count bitwidth), that we can't self-wrap in
1026   // the narrower type.
1027   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1028   unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1029   if (CmpIndVarSize > ExitCntSize) {
1030     assert(!CmpIndVar->getType()->isPointerTy() &&
1031            !ExitCnt->getType()->isPointerTy());
1032 
1033     // Before resorting to actually inserting the truncate, use the same
1034     // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1035     // the other side of the comparison instead.  We still evaluate the limit
1036     // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1037     // a truncate within in.
1038     bool Extended = false;
1039     const SCEV *IV = SE->getSCEV(CmpIndVar);
1040     const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
1041     const SCEV *ZExtTrunc =
1042       SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1043 
1044     if (ZExtTrunc == IV) {
1045       Extended = true;
1046       ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1047                                    "wide.trip.count");
1048     } else {
1049       const SCEV *SExtTrunc =
1050         SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1051       if (SExtTrunc == IV) {
1052         Extended = true;
1053         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1054                                      "wide.trip.count");
1055       }
1056     }
1057 
1058     if (Extended) {
1059       bool Discard;
1060       L->makeLoopInvariant(ExitCnt, Discard);
1061     } else
1062       CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1063                                       "lftr.wideiv");
1064   }
1065   LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1066                     << "      LHS:" << *CmpIndVar << '\n'
1067                     << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1068                     << "\n"
1069                     << "      RHS:\t" << *ExitCnt << "\n"
1070                     << "ExitCount:\t" << *ExitCount << "\n"
1071                     << "  was: " << *BI->getCondition() << "\n");
1072 
1073   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1074   Value *OrigCond = BI->getCondition();
1075   // It's tempting to use replaceAllUsesWith here to fully replace the old
1076   // comparison, but that's not immediately safe, since users of the old
1077   // comparison may not be dominated by the new comparison. Instead, just
1078   // update the branch to use the new comparison; in the common case this
1079   // will make old comparison dead.
1080   BI->setCondition(Cond);
1081   DeadInsts.emplace_back(OrigCond);
1082 
1083   ++NumLFTR;
1084   return true;
1085 }
1086 
1087 //===----------------------------------------------------------------------===//
1088 //  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1089 //===----------------------------------------------------------------------===//
1090 
1091 /// If there's a single exit block, sink any loop-invariant values that
1092 /// were defined in the preheader but not used inside the loop into the
1093 /// exit block to reduce register pressure in the loop.
sinkUnusedInvariants(Loop * L)1094 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1095   BasicBlock *ExitBlock = L->getExitBlock();
1096   if (!ExitBlock) return false;
1097 
1098   BasicBlock *Preheader = L->getLoopPreheader();
1099   if (!Preheader) return false;
1100 
1101   bool MadeAnyChanges = false;
1102   BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1103   BasicBlock::iterator I(Preheader->getTerminator());
1104   while (I != Preheader->begin()) {
1105     --I;
1106     // New instructions were inserted at the end of the preheader.
1107     if (isa<PHINode>(I))
1108       break;
1109 
1110     // Don't move instructions which might have side effects, since the side
1111     // effects need to complete before instructions inside the loop.  Also don't
1112     // move instructions which might read memory, since the loop may modify
1113     // memory. Note that it's okay if the instruction might have undefined
1114     // behavior: LoopSimplify guarantees that the preheader dominates the exit
1115     // block.
1116     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1117       continue;
1118 
1119     // Skip debug info intrinsics.
1120     if (isa<DbgInfoIntrinsic>(I))
1121       continue;
1122 
1123     // Skip eh pad instructions.
1124     if (I->isEHPad())
1125       continue;
1126 
1127     // Don't sink alloca: we never want to sink static alloca's out of the
1128     // entry block, and correctly sinking dynamic alloca's requires
1129     // checks for stacksave/stackrestore intrinsics.
1130     // FIXME: Refactor this check somehow?
1131     if (isa<AllocaInst>(I))
1132       continue;
1133 
1134     // Determine if there is a use in or before the loop (direct or
1135     // otherwise).
1136     bool UsedInLoop = false;
1137     for (Use &U : I->uses()) {
1138       Instruction *User = cast<Instruction>(U.getUser());
1139       BasicBlock *UseBB = User->getParent();
1140       if (PHINode *P = dyn_cast<PHINode>(User)) {
1141         unsigned i =
1142           PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1143         UseBB = P->getIncomingBlock(i);
1144       }
1145       if (UseBB == Preheader || L->contains(UseBB)) {
1146         UsedInLoop = true;
1147         break;
1148       }
1149     }
1150 
1151     // If there is, the def must remain in the preheader.
1152     if (UsedInLoop)
1153       continue;
1154 
1155     // Otherwise, sink it to the exit block.
1156     Instruction *ToMove = &*I;
1157     bool Done = false;
1158 
1159     if (I != Preheader->begin()) {
1160       // Skip debug info intrinsics.
1161       do {
1162         --I;
1163       } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1164 
1165       if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1166         Done = true;
1167     } else {
1168       Done = true;
1169     }
1170 
1171     MadeAnyChanges = true;
1172     ToMove->moveBefore(*ExitBlock, InsertPt);
1173     SE->forgetValue(ToMove);
1174     if (Done) break;
1175     InsertPt = ToMove->getIterator();
1176   }
1177 
1178   return MadeAnyChanges;
1179 }
1180 
replaceExitCond(BranchInst * BI,Value * NewCond,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1181 static void replaceExitCond(BranchInst *BI, Value *NewCond,
1182                             SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1183   auto *OldCond = BI->getCondition();
1184   LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1185                     << " with " << *NewCond << "\n");
1186   BI->setCondition(NewCond);
1187   if (OldCond->use_empty())
1188     DeadInsts.emplace_back(OldCond);
1189 }
1190 
createFoldedExitCond(const Loop * L,BasicBlock * ExitingBB,bool IsTaken)1191 static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1192                                       bool IsTaken) {
1193   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1194   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1195   auto *OldCond = BI->getCondition();
1196   return ConstantInt::get(OldCond->getType(),
1197                           IsTaken ? ExitIfTrue : !ExitIfTrue);
1198 }
1199 
foldExit(const Loop * L,BasicBlock * ExitingBB,bool IsTaken,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1200 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1201                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1202   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1203   auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1204   replaceExitCond(BI, NewCond, DeadInsts);
1205 }
1206 
replaceLoopPHINodesWithPreheaderValues(LoopInfo * LI,Loop * L,SmallVectorImpl<WeakTrackingVH> & DeadInsts,ScalarEvolution & SE)1207 static void replaceLoopPHINodesWithPreheaderValues(
1208     LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1209     ScalarEvolution &SE) {
1210   assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1211   auto *LoopPreheader = L->getLoopPreheader();
1212   auto *LoopHeader = L->getHeader();
1213   SmallVector<Instruction *> Worklist;
1214   for (auto &PN : LoopHeader->phis()) {
1215     auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1216     for (User *U : PN.users())
1217       Worklist.push_back(cast<Instruction>(U));
1218     SE.forgetValue(&PN);
1219     PN.replaceAllUsesWith(PreheaderIncoming);
1220     DeadInsts.emplace_back(&PN);
1221   }
1222 
1223   // Replacing with the preheader value will often allow IV users to simplify
1224   // (especially if the preheader value is a constant).
1225   SmallPtrSet<Instruction *, 16> Visited;
1226   while (!Worklist.empty()) {
1227     auto *I = cast<Instruction>(Worklist.pop_back_val());
1228     if (!Visited.insert(I).second)
1229       continue;
1230 
1231     // Don't simplify instructions outside the loop.
1232     if (!L->contains(I))
1233       continue;
1234 
1235     Value *Res = simplifyInstruction(I, I->getDataLayout());
1236     if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1237       for (User *U : I->users())
1238         Worklist.push_back(cast<Instruction>(U));
1239       I->replaceAllUsesWith(Res);
1240       DeadInsts.emplace_back(I);
1241     }
1242   }
1243 }
1244 
1245 static Value *
createInvariantCond(const Loop * L,BasicBlock * ExitingBB,const ScalarEvolution::LoopInvariantPredicate & LIP,SCEVExpander & Rewriter)1246 createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1247                     const ScalarEvolution::LoopInvariantPredicate &LIP,
1248                     SCEVExpander &Rewriter) {
1249   ICmpInst::Predicate InvariantPred = LIP.Pred;
1250   BasicBlock *Preheader = L->getLoopPreheader();
1251   assert(Preheader && "Preheader doesn't exist");
1252   Rewriter.setInsertPoint(Preheader->getTerminator());
1253   auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1254   auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1255   bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1256   if (ExitIfTrue)
1257     InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1258   IRBuilder<> Builder(Preheader->getTerminator());
1259   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1260   return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1261                             BI->getCondition()->getName());
1262 }
1263 
1264 static std::optional<Value *>
createReplacement(ICmpInst * ICmp,const Loop * L,BasicBlock * ExitingBB,const SCEV * MaxIter,bool Inverted,bool SkipLastIter,ScalarEvolution * SE,SCEVExpander & Rewriter)1265 createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1266                   const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1267                   ScalarEvolution *SE, SCEVExpander &Rewriter) {
1268   ICmpInst::Predicate Pred = ICmp->getPredicate();
1269   Value *LHS = ICmp->getOperand(0);
1270   Value *RHS = ICmp->getOperand(1);
1271 
1272   // 'LHS pred RHS' should now mean that we stay in loop.
1273   auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1274   if (Inverted)
1275     Pred = CmpInst::getInversePredicate(Pred);
1276 
1277   const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1278   const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1279   // Can we prove it to be trivially true or false?
1280   if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1281     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1282 
1283   auto *ARTy = LHSS->getType();
1284   auto *MaxIterTy = MaxIter->getType();
1285   // If possible, adjust types.
1286   if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1287     MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1288   else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1289     const SCEV *MinusOne = SE->getMinusOne(ARTy);
1290     auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1291     if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1292       MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1293   }
1294 
1295   if (SkipLastIter) {
1296     // Semantically skip last iter is "subtract 1, do not bother about unsigned
1297     // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1298     // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1299     // So we manually construct umin(a - 1, b - 1).
1300     SmallVector<const SCEV *, 4> Elements;
1301     if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1302       for (auto *Op : UMin->operands())
1303         Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1304       MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1305     } else
1306       MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1307   }
1308 
1309   // Check if there is a loop-invariant predicate equivalent to our check.
1310   auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1311                                                                L, BI, MaxIter);
1312   if (!LIP)
1313     return std::nullopt;
1314 
1315   // Can we prove it to be trivially true?
1316   if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1317     return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1318   else
1319     return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1320 }
1321 
optimizeLoopExitWithUnknownExitCount(const Loop * L,BranchInst * BI,BasicBlock * ExitingBB,const SCEV * MaxIter,bool SkipLastIter,ScalarEvolution * SE,SCEVExpander & Rewriter,SmallVectorImpl<WeakTrackingVH> & DeadInsts)1322 static bool optimizeLoopExitWithUnknownExitCount(
1323     const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1324     bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1325     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1326   assert(
1327       (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1328       "Not a loop exit!");
1329 
1330   // For branch that stays in loop by TRUE condition, go through AND. For branch
1331   // that stays in loop by FALSE condition, go through OR. Both gives the
1332   // similar logic: "stay in loop iff all conditions are true(false)".
1333   bool Inverted = L->contains(BI->getSuccessor(1));
1334   SmallVector<ICmpInst *, 4> LeafConditions;
1335   SmallVector<Value *, 4> Worklist;
1336   SmallPtrSet<Value *, 4> Visited;
1337   Value *OldCond = BI->getCondition();
1338   Visited.insert(OldCond);
1339   Worklist.push_back(OldCond);
1340 
1341   auto GoThrough = [&](Value *V) {
1342     Value *LHS = nullptr, *RHS = nullptr;
1343     if (Inverted) {
1344       if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1345         return false;
1346     } else {
1347       if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1348         return false;
1349     }
1350     if (Visited.insert(LHS).second)
1351       Worklist.push_back(LHS);
1352     if (Visited.insert(RHS).second)
1353       Worklist.push_back(RHS);
1354     return true;
1355   };
1356 
1357   do {
1358     Value *Curr = Worklist.pop_back_val();
1359     // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1360     // those with one use, to avoid instruction duplication.
1361     if (Curr->hasOneUse())
1362       if (!GoThrough(Curr))
1363         if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1364           LeafConditions.push_back(ICmp);
1365   } while (!Worklist.empty());
1366 
1367   // If the current basic block has the same exit count as the whole loop, and
1368   // it consists of multiple icmp's, try to collect all icmp's that give exact
1369   // same exit count. For all other icmp's, we could use one less iteration,
1370   // because their value on the last iteration doesn't really matter.
1371   SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1372   if (!SkipLastIter && LeafConditions.size() > 1 &&
1373       SE->getExitCount(L, ExitingBB,
1374                        ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1375           MaxIter)
1376     for (auto *ICmp : LeafConditions) {
1377       auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1378                                              /*ControlsExit*/ false);
1379       auto *ExitMax = EL.SymbolicMaxNotTaken;
1380       if (isa<SCEVCouldNotCompute>(ExitMax))
1381         continue;
1382       // They could be of different types (specifically this happens after
1383       // IV widening).
1384       auto *WiderType =
1385           SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1386       auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1387       auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1388       if (WideExitMax == WideMaxIter)
1389         ICmpsFailingOnLastIter.insert(ICmp);
1390     }
1391 
1392   bool Changed = false;
1393   for (auto *OldCond : LeafConditions) {
1394     // Skip last iteration for this icmp under one of two conditions:
1395     // - We do it for all conditions;
1396     // - There is another ICmp that would fail on last iter, so this one doesn't
1397     // really matter.
1398     bool OptimisticSkipLastIter = SkipLastIter;
1399     if (!OptimisticSkipLastIter) {
1400       if (ICmpsFailingOnLastIter.size() > 1)
1401         OptimisticSkipLastIter = true;
1402       else if (ICmpsFailingOnLastIter.size() == 1)
1403         OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1404     }
1405     if (auto Replaced =
1406             createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1407                               OptimisticSkipLastIter, SE, Rewriter)) {
1408       Changed = true;
1409       auto *NewCond = *Replaced;
1410       if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1411         NCI->setName(OldCond->getName() + ".first_iter");
1412       }
1413       LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1414                         << " with " << *NewCond << "\n");
1415       assert(OldCond->hasOneUse() && "Must be!");
1416       OldCond->replaceAllUsesWith(NewCond);
1417       DeadInsts.push_back(OldCond);
1418       // Make sure we no longer consider this condition as failing on last
1419       // iteration.
1420       ICmpsFailingOnLastIter.erase(OldCond);
1421     }
1422   }
1423   return Changed;
1424 }
1425 
canonicalizeExitCondition(Loop * L)1426 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1427   // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1428   // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1429   // never reaches the icmp since the zext doesn't fold to an AddRec unless
1430   // it already has flags.  The alternative to this would be to extending the
1431   // set of "interesting" IV users to include the icmp, but doing that
1432   // regresses results in practice by querying SCEVs before trip counts which
1433   // rely on them which results in SCEV caching sub-optimal answers.  The
1434   // concern about caching sub-optimal results is why we only query SCEVs of
1435   // the loop invariant RHS here.
1436   SmallVector<BasicBlock*, 16> ExitingBlocks;
1437   L->getExitingBlocks(ExitingBlocks);
1438   bool Changed = false;
1439   for (auto *ExitingBB : ExitingBlocks) {
1440     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1441     if (!BI)
1442       continue;
1443     assert(BI->isConditional() && "exit branch must be conditional");
1444 
1445     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1446     if (!ICmp || !ICmp->hasOneUse())
1447       continue;
1448 
1449     auto *LHS = ICmp->getOperand(0);
1450     auto *RHS = ICmp->getOperand(1);
1451     // For the range reasoning, avoid computing SCEVs in the loop to avoid
1452     // poisoning cache with sub-optimal results.  For the must-execute case,
1453     // this is a neccessary precondition for correctness.
1454     if (!L->isLoopInvariant(RHS)) {
1455       if (!L->isLoopInvariant(LHS))
1456         continue;
1457       // Same logic applies for the inverse case
1458       std::swap(LHS, RHS);
1459     }
1460 
1461     // Match (icmp signed-cond zext, RHS)
1462     Value *LHSOp = nullptr;
1463     if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1464       continue;
1465 
1466     const DataLayout &DL = ExitingBB->getDataLayout();
1467     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1468     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1469     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1470     FullCR = FullCR.zeroExtend(OuterBitWidth);
1471     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1472     if (FullCR.contains(RHSCR)) {
1473       // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1474       // replace the signed condition with the unsigned version.
1475       ICmp->setPredicate(ICmp->getUnsignedPredicate());
1476       Changed = true;
1477       // Note: No SCEV invalidation needed.  We've changed the predicate, but
1478       // have not changed exit counts, or the values produced by the compare.
1479       continue;
1480     }
1481   }
1482 
1483   // Now that we've canonicalized the condition to match the extend,
1484   // see if we can rotate the extend out of the loop.
1485   for (auto *ExitingBB : ExitingBlocks) {
1486     auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1487     if (!BI)
1488       continue;
1489     assert(BI->isConditional() && "exit branch must be conditional");
1490 
1491     auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1492     if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1493       continue;
1494 
1495     bool Swapped = false;
1496     auto *LHS = ICmp->getOperand(0);
1497     auto *RHS = ICmp->getOperand(1);
1498     if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1499       // Nothing to rotate
1500       continue;
1501     if (L->isLoopInvariant(LHS)) {
1502       // Same logic applies for the inverse case until we actually pick
1503       // which operand of the compare to update.
1504       Swapped = true;
1505       std::swap(LHS, RHS);
1506     }
1507     assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1508 
1509     // Match (icmp unsigned-cond zext, RHS)
1510     // TODO: Extend to handle corresponding sext/signed-cmp case
1511     // TODO: Extend to other invertible functions
1512     Value *LHSOp = nullptr;
1513     if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1514       continue;
1515 
1516     // In general, we only rotate if we can do so without increasing the number
1517     // of instructions.  The exception is when we have an zext(add-rec).  The
1518     // reason for allowing this exception is that we know we need to get rid
1519     // of the zext for SCEV to be able to compute a trip count for said loops;
1520     // we consider the new trip count valuable enough to increase instruction
1521     // count by one.
1522     if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1523       continue;
1524 
1525     // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1526     // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1527     // when zext is loop varying and RHS is loop invariant.  This converts
1528     // loop varying work to loop-invariant work.
1529     auto doRotateTransform = [&]() {
1530       assert(ICmp->isUnsigned() && "must have proven unsigned already");
1531       auto *NewRHS = CastInst::Create(
1532           Instruction::Trunc, RHS, LHSOp->getType(), "",
1533           L->getLoopPreheader()->getTerminator()->getIterator());
1534       ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1535       ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1536       if (LHS->use_empty())
1537         DeadInsts.push_back(LHS);
1538     };
1539 
1540 
1541     const DataLayout &DL = ExitingBB->getDataLayout();
1542     const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1543     const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1544     auto FullCR = ConstantRange::getFull(InnerBitWidth);
1545     FullCR = FullCR.zeroExtend(OuterBitWidth);
1546     auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1547     if (FullCR.contains(RHSCR)) {
1548       doRotateTransform();
1549       Changed = true;
1550       // Note, we are leaving SCEV in an unfortunately imprecise case here
1551       // as rotation tends to reveal information about trip counts not
1552       // previously visible.
1553       continue;
1554     }
1555   }
1556 
1557   return Changed;
1558 }
1559 
optimizeLoopExits(Loop * L,SCEVExpander & Rewriter)1560 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1561   SmallVector<BasicBlock*, 16> ExitingBlocks;
1562   L->getExitingBlocks(ExitingBlocks);
1563 
1564   // Remove all exits which aren't both rewriteable and execute on every
1565   // iteration.
1566   llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1567     // If our exitting block exits multiple loops, we can only rewrite the
1568     // innermost one.  Otherwise, we're changing how many times the innermost
1569     // loop runs before it exits.
1570     if (LI->getLoopFor(ExitingBB) != L)
1571       return true;
1572 
1573     // Can't rewrite non-branch yet.
1574     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1575     if (!BI)
1576       return true;
1577 
1578     // Likewise, the loop latch must be dominated by the exiting BB.
1579     if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1580       return true;
1581 
1582     if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1583       // If already constant, nothing to do. However, if this is an
1584       // unconditional exit, we can still replace header phis with their
1585       // preheader value.
1586       if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1587         replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1588       return true;
1589     }
1590 
1591     return false;
1592   });
1593 
1594   if (ExitingBlocks.empty())
1595     return false;
1596 
1597   // Get a symbolic upper bound on the loop backedge taken count.
1598   const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1599   if (isa<SCEVCouldNotCompute>(MaxBECount))
1600     return false;
1601 
1602   // Visit our exit blocks in order of dominance. We know from the fact that
1603   // all exits must dominate the latch, so there is a total dominance order
1604   // between them.
1605   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1606                // std::sort sorts in ascending order, so we want the inverse of
1607                // the normal dominance relation.
1608                if (A == B) return false;
1609                if (DT->properlyDominates(A, B))
1610                  return true;
1611                else {
1612                  assert(DT->properlyDominates(B, A) &&
1613                         "expected total dominance order!");
1614                  return false;
1615                }
1616   });
1617 #ifdef ASSERT
1618   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1619     assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1620   }
1621 #endif
1622 
1623   bool Changed = false;
1624   bool SkipLastIter = false;
1625   const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1626   auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1627     if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1628       return;
1629     if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1630       CurrMaxExit = MaxExitCount;
1631     else
1632       CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1633     // If the loop has more than 1 iteration, all further checks will be
1634     // executed 1 iteration less.
1635     if (CurrMaxExit == MaxBECount)
1636       SkipLastIter = true;
1637   };
1638   SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1639   for (BasicBlock *ExitingBB : ExitingBlocks) {
1640     const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1641     const SCEV *MaxExitCount = SE->getExitCount(
1642         L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1643     if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1644       // Okay, we do not know the exit count here. Can we at least prove that it
1645       // will remain the same within iteration space?
1646       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1647       auto OptimizeCond = [&](bool SkipLastIter) {
1648         return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1649                                                     MaxBECount, SkipLastIter,
1650                                                     SE, Rewriter, DeadInsts);
1651       };
1652 
1653       // TODO: We might have proved that we can skip the last iteration for
1654       // this check. In this case, we only want to check the condition on the
1655       // pre-last iteration (MaxBECount - 1). However, there is a nasty
1656       // corner case:
1657       //
1658       //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1659       //
1660       // If we could not prove that len != 0, then we also could not prove that
1661       // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1662       // OptimizeCond will likely not prove anything for it, even if it could
1663       // prove the same fact for len.
1664       //
1665       // As a temporary solution, we query both last and pre-last iterations in
1666       // hope that we will be able to prove triviality for at least one of
1667       // them. We can stop querying MaxBECount for this case once SCEV
1668       // understands that (MaxBECount - 1) will not overflow here.
1669       if (OptimizeCond(false))
1670         Changed = true;
1671       else if (SkipLastIter && OptimizeCond(true))
1672         Changed = true;
1673       UpdateSkipLastIter(MaxExitCount);
1674       continue;
1675     }
1676 
1677     UpdateSkipLastIter(ExactExitCount);
1678 
1679     // If we know we'd exit on the first iteration, rewrite the exit to
1680     // reflect this.  This does not imply the loop must exit through this
1681     // exit; there may be an earlier one taken on the first iteration.
1682     // We know that the backedge can't be taken, so we replace all
1683     // the header PHIs with values coming from the preheader.
1684     if (ExactExitCount->isZero()) {
1685       foldExit(L, ExitingBB, true, DeadInsts);
1686       replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1687       Changed = true;
1688       continue;
1689     }
1690 
1691     assert(ExactExitCount->getType()->isIntegerTy() &&
1692            MaxBECount->getType()->isIntegerTy() &&
1693            "Exit counts must be integers");
1694 
1695     Type *WiderType =
1696         SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1697     ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1698     MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1699     assert(MaxBECount->getType() == ExactExitCount->getType());
1700 
1701     // Can we prove that some other exit must be taken strictly before this
1702     // one?
1703     if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1704                                      ExactExitCount)) {
1705       foldExit(L, ExitingBB, false, DeadInsts);
1706       Changed = true;
1707       continue;
1708     }
1709 
1710     // As we run, keep track of which exit counts we've encountered.  If we
1711     // find a duplicate, we've found an exit which would have exited on the
1712     // exiting iteration, but (from the visit order) strictly follows another
1713     // which does the same and is thus dead.
1714     if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1715       foldExit(L, ExitingBB, false, DeadInsts);
1716       Changed = true;
1717       continue;
1718     }
1719 
1720     // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1721     // here.  If we kept track of the min of dominanting exits so far, we could
1722     // discharge exits with EC >= MDEC. This is less powerful than the existing
1723     // transform (since later exits aren't considered), but potentially more
1724     // powerful for any case where SCEV can prove a >=u b, but neither a == b
1725     // or a >u b.  Such a case is not currently known.
1726   }
1727   return Changed;
1728 }
1729 
predicateLoopExits(Loop * L,SCEVExpander & Rewriter)1730 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1731   SmallVector<BasicBlock*, 16> ExitingBlocks;
1732   L->getExitingBlocks(ExitingBlocks);
1733 
1734   // Finally, see if we can rewrite our exit conditions into a loop invariant
1735   // form. If we have a read-only loop, and we can tell that we must exit down
1736   // a path which does not need any of the values computed within the loop, we
1737   // can rewrite the loop to exit on the first iteration.  Note that this
1738   // doesn't either a) tell us the loop exits on the first iteration (unless
1739   // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1740   // This transformation looks a lot like a restricted form of dead loop
1741   // elimination, but restricted to read-only loops and without neccesssarily
1742   // needing to kill the loop entirely.
1743   if (!LoopPredication)
1744     return false;
1745 
1746   // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1747   // through *explicit* control flow.  We have to eliminate the possibility of
1748   // implicit exits (see below) before we know it's truly exact.
1749   const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1750   if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1751     return false;
1752 
1753   assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1754   assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1755 
1756   auto BadExit = [&](BasicBlock *ExitingBB) {
1757     // If our exiting block exits multiple loops, we can only rewrite the
1758     // innermost one.  Otherwise, we're changing how many times the innermost
1759     // loop runs before it exits.
1760     if (LI->getLoopFor(ExitingBB) != L)
1761       return true;
1762 
1763     // Can't rewrite non-branch yet.
1764     BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1765     if (!BI)
1766       return true;
1767 
1768     // If already constant, nothing to do.
1769     if (isa<Constant>(BI->getCondition()))
1770       return true;
1771 
1772     // If the exit block has phis, we need to be able to compute the values
1773     // within the loop which contains them.  This assumes trivially lcssa phis
1774     // have already been removed; TODO: generalize
1775     BasicBlock *ExitBlock =
1776     BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1777     if (!ExitBlock->phis().empty())
1778       return true;
1779 
1780     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1781     if (isa<SCEVCouldNotCompute>(ExitCount) ||
1782         !Rewriter.isSafeToExpand(ExitCount))
1783       return true;
1784 
1785     assert(SE->isLoopInvariant(ExitCount, L) &&
1786            "Exit count must be loop invariant");
1787     assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1788     return false;
1789   };
1790 
1791   // If we have any exits which can't be predicated themselves, than we can't
1792   // predicate any exit which isn't guaranteed to execute before it.  Consider
1793   // two exits (a) and (b) which would both exit on the same iteration.  If we
1794   // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1795   // we could convert a loop from exiting through (a) to one exiting through
1796   // (b).  Note that this problem exists only for exits with the same exit
1797   // count, and we could be more aggressive when exit counts are known inequal.
1798   llvm::sort(ExitingBlocks,
1799             [&](BasicBlock *A, BasicBlock *B) {
1800               // std::sort sorts in ascending order, so we want the inverse of
1801               // the normal dominance relation, plus a tie breaker for blocks
1802               // unordered by dominance.
1803               if (DT->properlyDominates(A, B)) return true;
1804               if (DT->properlyDominates(B, A)) return false;
1805               return A->getName() < B->getName();
1806             });
1807   // Check to see if our exit blocks are a total order (i.e. a linear chain of
1808   // exits before the backedge).  If they aren't, reasoning about reachability
1809   // is complicated and we choose not to for now.
1810   for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1811     if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1812       return false;
1813 
1814   // Given our sorted total order, we know that exit[j] must be evaluated
1815   // after all exit[i] such j > i.
1816   for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1817     if (BadExit(ExitingBlocks[i])) {
1818       ExitingBlocks.resize(i);
1819       break;
1820     }
1821 
1822   if (ExitingBlocks.empty())
1823     return false;
1824 
1825   // We rely on not being able to reach an exiting block on a later iteration
1826   // then it's statically compute exit count.  The implementaton of
1827   // getExitCount currently has this invariant, but assert it here so that
1828   // breakage is obvious if this ever changes..
1829   assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1830         return DT->dominates(ExitingBB, L->getLoopLatch());
1831       }));
1832 
1833   // At this point, ExitingBlocks consists of only those blocks which are
1834   // predicatable.  Given that, we know we have at least one exit we can
1835   // predicate if the loop is doesn't have side effects and doesn't have any
1836   // implicit exits (because then our exact BTC isn't actually exact).
1837   // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1838   // suggestions on how to improve this?  I can obviously bail out for outer
1839   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1840   // is that enough for *all* side effects?
1841   for (BasicBlock *BB : L->blocks())
1842     for (auto &I : *BB)
1843       // TODO:isGuaranteedToTransfer
1844       if (I.mayHaveSideEffects())
1845         return false;
1846 
1847   bool Changed = false;
1848   // Finally, do the actual predication for all predicatable blocks.  A couple
1849   // of notes here:
1850   // 1) We don't bother to constant fold dominated exits with identical exit
1851   //    counts; that's simply a form of CSE/equality propagation and we leave
1852   //    it for dedicated passes.
1853   // 2) We insert the comparison at the branch.  Hoisting introduces additional
1854   //    legality constraints and we leave that to dedicated logic.  We want to
1855   //    predicate even if we can't insert a loop invariant expression as
1856   //    peeling or unrolling will likely reduce the cost of the otherwise loop
1857   //    varying check.
1858   Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1859   IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1860   Value *ExactBTCV = nullptr; // Lazily generated if needed.
1861   for (BasicBlock *ExitingBB : ExitingBlocks) {
1862     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1863 
1864     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1865     Value *NewCond;
1866     if (ExitCount == ExactBTC) {
1867       NewCond = L->contains(BI->getSuccessor(0)) ?
1868         B.getFalse() : B.getTrue();
1869     } else {
1870       Value *ECV = Rewriter.expandCodeFor(ExitCount);
1871       if (!ExactBTCV)
1872         ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1873       Value *RHS = ExactBTCV;
1874       if (ECV->getType() != RHS->getType()) {
1875         Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1876         ECV = B.CreateZExt(ECV, WiderTy);
1877         RHS = B.CreateZExt(RHS, WiderTy);
1878       }
1879       auto Pred = L->contains(BI->getSuccessor(0)) ?
1880         ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1881       NewCond = B.CreateICmp(Pred, ECV, RHS);
1882     }
1883     Value *OldCond = BI->getCondition();
1884     BI->setCondition(NewCond);
1885     if (OldCond->use_empty())
1886       DeadInsts.emplace_back(OldCond);
1887     Changed = true;
1888     RunUnswitching = true;
1889   }
1890 
1891   return Changed;
1892 }
1893 
1894 //===----------------------------------------------------------------------===//
1895 //  IndVarSimplify driver. Manage several subpasses of IV simplification.
1896 //===----------------------------------------------------------------------===//
1897 
run(Loop * L)1898 bool IndVarSimplify::run(Loop *L) {
1899   // We need (and expect!) the incoming loop to be in LCSSA.
1900   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
1901          "LCSSA required to run indvars!");
1902 
1903   // If LoopSimplify form is not available, stay out of trouble. Some notes:
1904   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
1905   //    canonicalization can be a pessimization without LSR to "clean up"
1906   //    afterwards.
1907   //  - We depend on having a preheader; in particular,
1908   //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
1909   //    and we're in trouble if we can't find the induction variable even when
1910   //    we've manually inserted one.
1911   //  - LFTR relies on having a single backedge.
1912   if (!L->isLoopSimplifyForm())
1913     return false;
1914 
1915   bool Changed = false;
1916   // If there are any floating-point recurrences, attempt to
1917   // transform them to use integer recurrences.
1918   Changed |= rewriteNonIntegerIVs(L);
1919 
1920   // Create a rewriter object which we'll use to transform the code with.
1921   SCEVExpander Rewriter(*SE, DL, "indvars");
1922 #ifndef NDEBUG
1923   Rewriter.setDebugType(DEBUG_TYPE);
1924 #endif
1925 
1926   // Eliminate redundant IV users.
1927   //
1928   // Simplification works best when run before other consumers of SCEV. We
1929   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
1930   // other expressions involving loop IVs have been evaluated. This helps SCEV
1931   // set no-wrap flags before normalizing sign/zero extension.
1932   Rewriter.disableCanonicalMode();
1933   Changed |= simplifyAndExtend(L, Rewriter, LI);
1934 
1935   // Check to see if we can compute the final value of any expressions
1936   // that are recurrent in the loop, and substitute the exit values from the
1937   // loop into any instructions outside of the loop that use the final values
1938   // of the current expressions.
1939   if (ReplaceExitValue != NeverRepl) {
1940     if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
1941                                              ReplaceExitValue, DeadInsts)) {
1942       NumReplaced += Rewrites;
1943       Changed = true;
1944     }
1945   }
1946 
1947   // Eliminate redundant IV cycles.
1948   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
1949 
1950   // Try to convert exit conditions to unsigned and rotate computation
1951   // out of the loop.  Note: Handles invalidation internally if needed.
1952   Changed |= canonicalizeExitCondition(L);
1953 
1954   // Try to eliminate loop exits based on analyzeable exit counts
1955   if (optimizeLoopExits(L, Rewriter))  {
1956     Changed = true;
1957     // Given we've changed exit counts, notify SCEV
1958     // Some nested loops may share same folded exit basic block,
1959     // thus we need to notify top most loop.
1960     SE->forgetTopmostLoop(L);
1961   }
1962 
1963   // Try to form loop invariant tests for loop exits by changing how many
1964   // iterations of the loop run when that is unobservable.
1965   if (predicateLoopExits(L, Rewriter)) {
1966     Changed = true;
1967     // Given we've changed exit counts, notify SCEV
1968     SE->forgetLoop(L);
1969   }
1970 
1971   // If we have a trip count expression, rewrite the loop's exit condition
1972   // using it.
1973   if (!DisableLFTR) {
1974     BasicBlock *PreHeader = L->getLoopPreheader();
1975 
1976     SmallVector<BasicBlock*, 16> ExitingBlocks;
1977     L->getExitingBlocks(ExitingBlocks);
1978     for (BasicBlock *ExitingBB : ExitingBlocks) {
1979       // Can't rewrite non-branch yet.
1980       if (!isa<BranchInst>(ExitingBB->getTerminator()))
1981         continue;
1982 
1983       // If our exitting block exits multiple loops, we can only rewrite the
1984       // innermost one.  Otherwise, we're changing how many times the innermost
1985       // loop runs before it exits.
1986       if (LI->getLoopFor(ExitingBB) != L)
1987         continue;
1988 
1989       if (!needsLFTR(L, ExitingBB))
1990         continue;
1991 
1992       const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1993       if (isa<SCEVCouldNotCompute>(ExitCount))
1994         continue;
1995 
1996       // This was handled above, but as we form SCEVs, we can sometimes refine
1997       // existing ones; this allows exit counts to be folded to zero which
1998       // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
1999       // until stable to handle cases like this better.
2000       if (ExitCount->isZero())
2001         continue;
2002 
2003       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2004       if (!IndVar)
2005         continue;
2006 
2007       // Avoid high cost expansions.  Note: This heuristic is questionable in
2008       // that our definition of "high cost" is not exactly principled.
2009       if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2010                                        TTI, PreHeader->getTerminator()))
2011         continue;
2012 
2013       if (!Rewriter.isSafeToExpand(ExitCount))
2014         continue;
2015 
2016       Changed |= linearFunctionTestReplace(L, ExitingBB,
2017                                            ExitCount, IndVar,
2018                                            Rewriter);
2019     }
2020   }
2021   // Clear the rewriter cache, because values that are in the rewriter's cache
2022   // can be deleted in the loop below, causing the AssertingVH in the cache to
2023   // trigger.
2024   Rewriter.clear();
2025 
2026   // Now that we're done iterating through lists, clean up any instructions
2027   // which are now dead.
2028   while (!DeadInsts.empty()) {
2029     Value *V = DeadInsts.pop_back_val();
2030 
2031     if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2032       Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2033     else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2034       Changed |=
2035           RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2036   }
2037 
2038   // The Rewriter may not be used from this point on.
2039 
2040   // Loop-invariant instructions in the preheader that aren't used in the
2041   // loop may be sunk below the loop to reduce register pressure.
2042   Changed |= sinkUnusedInvariants(L);
2043 
2044   // rewriteFirstIterationLoopExitValues does not rely on the computation of
2045   // trip count and therefore can further simplify exit values in addition to
2046   // rewriteLoopExitValues.
2047   Changed |= rewriteFirstIterationLoopExitValues(L);
2048 
2049   // Clean up dead instructions.
2050   Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2051 
2052   // Check a post-condition.
2053   assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2054          "Indvars did not preserve LCSSA!");
2055   if (VerifyMemorySSA && MSSAU)
2056     MSSAU->getMemorySSA()->verifyMemorySSA();
2057 
2058   return Changed;
2059 }
2060 
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater &)2061 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2062                                           LoopStandardAnalysisResults &AR,
2063                                           LPMUpdater &) {
2064   Function *F = L.getHeader()->getParent();
2065   const DataLayout &DL = F->getDataLayout();
2066 
2067   IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2068                      WidenIndVars && AllowIVWidening);
2069   if (!IVS.run(&L))
2070     return PreservedAnalyses::all();
2071 
2072   auto PA = getLoopPassPreservedAnalyses();
2073   PA.preserveSet<CFGAnalyses>();
2074   if (IVS.runUnswitching()) {
2075     AM.getResult<ShouldRunExtraSimpleLoopUnswitch>(L, AR);
2076     PA.preserve<ShouldRunExtraSimpleLoopUnswitch>();
2077   }
2078 
2079   if (AR.MSSA)
2080     PA.preserve<MemorySSAAnalysis>();
2081   return PA;
2082 }
2083