1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/MemorySSA.h" 42 #include "llvm/Analysis/MemorySSAUpdater.h" 43 #include "llvm/Analysis/ScalarEvolution.h" 44 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 45 #include "llvm/Analysis/TargetLibraryInfo.h" 46 #include "llvm/Analysis/TargetTransformInfo.h" 47 #include "llvm/Analysis/ValueTracking.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/InitializePasses.h" 72 #include "llvm/Pass.h" 73 #include "llvm/Support/Casting.h" 74 #include "llvm/Support/CommandLine.h" 75 #include "llvm/Support/Compiler.h" 76 #include "llvm/Support/Debug.h" 77 #include "llvm/Support/ErrorHandling.h" 78 #include "llvm/Support/MathExtras.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Scalar/LoopPassManager.h" 82 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 83 #include "llvm/Transforms/Utils/Local.h" 84 #include "llvm/Transforms/Utils/LoopUtils.h" 85 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 86 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 87 #include <cassert> 88 #include <cstdint> 89 #include <utility> 90 91 using namespace llvm; 92 using namespace PatternMatch; 93 94 #define DEBUG_TYPE "indvars" 95 96 STATISTIC(NumWidened , "Number of indvars widened"); 97 STATISTIC(NumReplaced , "Number of exit values replaced"); 98 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 99 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 100 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 101 102 // Trip count verification can be enabled by default under NDEBUG if we 103 // implement a strong expression equivalence checker in SCEV. Until then, we 104 // use the verify-indvars flag, which may assert in some cases. 105 static cl::opt<bool> VerifyIndvars( 106 "verify-indvars", cl::Hidden, 107 cl::desc("Verify the ScalarEvolution result after running indvars. Has no " 108 "effect in release builds. (Note: this adds additional SCEV " 109 "queries potentially changing the analysis result)")); 110 111 static cl::opt<ReplaceExitVal> ReplaceExitValue( 112 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 113 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 114 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 115 clEnumValN(OnlyCheapRepl, "cheap", 116 "only replace exit value when the cost is cheap"), 117 clEnumValN(NoHardUse, "noharduse", 118 "only replace exit values when loop def likely dead"), 119 clEnumValN(AlwaysRepl, "always", 120 "always replace exit value whenever possible"))); 121 122 static cl::opt<bool> UsePostIncrementRanges( 123 "indvars-post-increment-ranges", cl::Hidden, 124 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 125 cl::init(true)); 126 127 static cl::opt<bool> 128 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 129 cl::desc("Disable Linear Function Test Replace optimization")); 130 131 static cl::opt<bool> 132 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 133 cl::desc("Predicate conditions in read only loops")); 134 135 static cl::opt<bool> 136 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true), 137 cl::desc("Allow widening of indvars to eliminate s/zext")); 138 139 namespace { 140 141 struct RewritePhi; 142 143 class IndVarSimplify { 144 LoopInfo *LI; 145 ScalarEvolution *SE; 146 DominatorTree *DT; 147 const DataLayout &DL; 148 TargetLibraryInfo *TLI; 149 const TargetTransformInfo *TTI; 150 std::unique_ptr<MemorySSAUpdater> MSSAU; 151 152 SmallVector<WeakTrackingVH, 16> DeadInsts; 153 bool WidenIndVars; 154 155 bool handleFloatingPointIV(Loop *L, PHINode *PH); 156 bool rewriteNonIntegerIVs(Loop *L); 157 158 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 159 /// Try to improve our exit conditions by converting condition from signed 160 /// to unsigned or rotating computation out of the loop. 161 /// (See inline comment about why this is duplicated from simplifyAndExtend) 162 bool canonicalizeExitCondition(Loop *L); 163 /// Try to eliminate loop exits based on analyzeable exit counts 164 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 165 /// Try to form loop invariant tests for loop exits by changing how many 166 /// iterations of the loop run when that is unobservable. 167 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 168 169 bool rewriteFirstIterationLoopExitValues(Loop *L); 170 171 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 172 const SCEV *ExitCount, 173 PHINode *IndVar, SCEVExpander &Rewriter); 174 175 bool sinkUnusedInvariants(Loop *L); 176 177 public: 178 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 179 const DataLayout &DL, TargetLibraryInfo *TLI, 180 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) 181 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), 182 WidenIndVars(WidenIndVars) { 183 if (MSSA) 184 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 185 } 186 187 bool run(Loop *L); 188 }; 189 190 } // end anonymous namespace 191 192 //===----------------------------------------------------------------------===// 193 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 194 //===----------------------------------------------------------------------===// 195 196 /// Convert APF to an integer, if possible. 197 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 198 bool isExact = false; 199 // See if we can convert this to an int64_t 200 uint64_t UIntVal; 201 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 202 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 203 !isExact) 204 return false; 205 IntVal = UIntVal; 206 return true; 207 } 208 209 /// If the loop has floating induction variable then insert corresponding 210 /// integer induction variable if possible. 211 /// For example, 212 /// for(double i = 0; i < 10000; ++i) 213 /// bar(i) 214 /// is converted into 215 /// for(int i = 0; i < 10000; ++i) 216 /// bar((double)i); 217 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 218 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 219 unsigned BackEdge = IncomingEdge^1; 220 221 // Check incoming value. 222 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 223 224 int64_t InitValue; 225 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 226 return false; 227 228 // Check IV increment. Reject this PN if increment operation is not 229 // an add or increment value can not be represented by an integer. 230 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 231 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 232 233 // If this is not an add of the PHI with a constantfp, or if the constant fp 234 // is not an integer, bail out. 235 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 236 int64_t IncValue; 237 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 238 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 239 return false; 240 241 // Check Incr uses. One user is PN and the other user is an exit condition 242 // used by the conditional terminator. 243 Value::user_iterator IncrUse = Incr->user_begin(); 244 Instruction *U1 = cast<Instruction>(*IncrUse++); 245 if (IncrUse == Incr->user_end()) return false; 246 Instruction *U2 = cast<Instruction>(*IncrUse++); 247 if (IncrUse != Incr->user_end()) return false; 248 249 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 250 // only used by a branch, we can't transform it. 251 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 252 if (!Compare) 253 Compare = dyn_cast<FCmpInst>(U2); 254 if (!Compare || !Compare->hasOneUse() || 255 !isa<BranchInst>(Compare->user_back())) 256 return false; 257 258 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 259 260 // We need to verify that the branch actually controls the iteration count 261 // of the loop. If not, the new IV can overflow and no one will notice. 262 // The branch block must be in the loop and one of the successors must be out 263 // of the loop. 264 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 265 if (!L->contains(TheBr->getParent()) || 266 (L->contains(TheBr->getSuccessor(0)) && 267 L->contains(TheBr->getSuccessor(1)))) 268 return false; 269 270 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 271 // transform it. 272 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 273 int64_t ExitValue; 274 if (ExitValueVal == nullptr || 275 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 276 return false; 277 278 // Find new predicate for integer comparison. 279 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 280 switch (Compare->getPredicate()) { 281 default: return false; // Unknown comparison. 282 case CmpInst::FCMP_OEQ: 283 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 284 case CmpInst::FCMP_ONE: 285 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 286 case CmpInst::FCMP_OGT: 287 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 288 case CmpInst::FCMP_OGE: 289 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 290 case CmpInst::FCMP_OLT: 291 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 292 case CmpInst::FCMP_OLE: 293 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 294 } 295 296 // We convert the floating point induction variable to a signed i32 value if 297 // we can. This is only safe if the comparison will not overflow in a way 298 // that won't be trapped by the integer equivalent operations. Check for this 299 // now. 300 // TODO: We could use i64 if it is native and the range requires it. 301 302 // The start/stride/exit values must all fit in signed i32. 303 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 304 return false; 305 306 // If not actually striding (add x, 0.0), avoid touching the code. 307 if (IncValue == 0) 308 return false; 309 310 // Positive and negative strides have different safety conditions. 311 if (IncValue > 0) { 312 // If we have a positive stride, we require the init to be less than the 313 // exit value. 314 if (InitValue >= ExitValue) 315 return false; 316 317 uint32_t Range = uint32_t(ExitValue-InitValue); 318 // Check for infinite loop, either: 319 // while (i <= Exit) or until (i > Exit) 320 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 321 if (++Range == 0) return false; // Range overflows. 322 } 323 324 unsigned Leftover = Range % uint32_t(IncValue); 325 326 // If this is an equality comparison, we require that the strided value 327 // exactly land on the exit value, otherwise the IV condition will wrap 328 // around and do things the fp IV wouldn't. 329 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 330 Leftover != 0) 331 return false; 332 333 // If the stride would wrap around the i32 before exiting, we can't 334 // transform the IV. 335 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 336 return false; 337 } else { 338 // If we have a negative stride, we require the init to be greater than the 339 // exit value. 340 if (InitValue <= ExitValue) 341 return false; 342 343 uint32_t Range = uint32_t(InitValue-ExitValue); 344 // Check for infinite loop, either: 345 // while (i >= Exit) or until (i < Exit) 346 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 347 if (++Range == 0) return false; // Range overflows. 348 } 349 350 unsigned Leftover = Range % uint32_t(-IncValue); 351 352 // If this is an equality comparison, we require that the strided value 353 // exactly land on the exit value, otherwise the IV condition will wrap 354 // around and do things the fp IV wouldn't. 355 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 356 Leftover != 0) 357 return false; 358 359 // If the stride would wrap around the i32 before exiting, we can't 360 // transform the IV. 361 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 362 return false; 363 } 364 365 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 366 367 // Insert new integer induction variable. 368 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 369 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 370 PN->getIncomingBlock(IncomingEdge)); 371 372 Value *NewAdd = 373 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 374 Incr->getName()+".int", Incr); 375 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 376 377 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 378 ConstantInt::get(Int32Ty, ExitValue), 379 Compare->getName()); 380 381 // In the following deletions, PN may become dead and may be deleted. 382 // Use a WeakTrackingVH to observe whether this happens. 383 WeakTrackingVH WeakPH = PN; 384 385 // Delete the old floating point exit comparison. The branch starts using the 386 // new comparison. 387 NewCompare->takeName(Compare); 388 Compare->replaceAllUsesWith(NewCompare); 389 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get()); 390 391 // Delete the old floating point increment. 392 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 393 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get()); 394 395 // If the FP induction variable still has uses, this is because something else 396 // in the loop uses its value. In order to canonicalize the induction 397 // variable, we chose to eliminate the IV and rewrite it in terms of an 398 // int->fp cast. 399 // 400 // We give preference to sitofp over uitofp because it is faster on most 401 // platforms. 402 if (WeakPH) { 403 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 404 &*PN->getParent()->getFirstInsertionPt()); 405 PN->replaceAllUsesWith(Conv); 406 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get()); 407 } 408 return true; 409 } 410 411 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 412 // First step. Check to see if there are any floating-point recurrences. 413 // If there are, change them into integer recurrences, permitting analysis by 414 // the SCEV routines. 415 BasicBlock *Header = L->getHeader(); 416 417 SmallVector<WeakTrackingVH, 8> PHIs; 418 for (PHINode &PN : Header->phis()) 419 PHIs.push_back(&PN); 420 421 bool Changed = false; 422 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 423 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 424 Changed |= handleFloatingPointIV(L, PN); 425 426 // If the loop previously had floating-point IV, ScalarEvolution 427 // may not have been able to compute a trip count. Now that we've done some 428 // re-writing, the trip count may be computable. 429 if (Changed) 430 SE->forgetLoop(L); 431 return Changed; 432 } 433 434 //===---------------------------------------------------------------------===// 435 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 436 // they will exit at the first iteration. 437 //===---------------------------------------------------------------------===// 438 439 /// Check to see if this loop has loop invariant conditions which lead to loop 440 /// exits. If so, we know that if the exit path is taken, it is at the first 441 /// loop iteration. This lets us predict exit values of PHI nodes that live in 442 /// loop header. 443 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 444 // Verify the input to the pass is already in LCSSA form. 445 assert(L->isLCSSAForm(*DT)); 446 447 SmallVector<BasicBlock *, 8> ExitBlocks; 448 L->getUniqueExitBlocks(ExitBlocks); 449 450 bool MadeAnyChanges = false; 451 for (auto *ExitBB : ExitBlocks) { 452 // If there are no more PHI nodes in this exit block, then no more 453 // values defined inside the loop are used on this path. 454 for (PHINode &PN : ExitBB->phis()) { 455 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 456 IncomingValIdx != E; ++IncomingValIdx) { 457 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 458 459 // Can we prove that the exit must run on the first iteration if it 460 // runs at all? (i.e. early exits are fine for our purposes, but 461 // traces which lead to this exit being taken on the 2nd iteration 462 // aren't.) Note that this is about whether the exit branch is 463 // executed, not about whether it is taken. 464 if (!L->getLoopLatch() || 465 !DT->dominates(IncomingBB, L->getLoopLatch())) 466 continue; 467 468 // Get condition that leads to the exit path. 469 auto *TermInst = IncomingBB->getTerminator(); 470 471 Value *Cond = nullptr; 472 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 473 // Must be a conditional branch, otherwise the block 474 // should not be in the loop. 475 Cond = BI->getCondition(); 476 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 477 Cond = SI->getCondition(); 478 else 479 continue; 480 481 if (!L->isLoopInvariant(Cond)) 482 continue; 483 484 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 485 486 // Only deal with PHIs in the loop header. 487 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 488 continue; 489 490 // If ExitVal is a PHI on the loop header, then we know its 491 // value along this exit because the exit can only be taken 492 // on the first iteration. 493 auto *LoopPreheader = L->getLoopPreheader(); 494 assert(LoopPreheader && "Invalid loop"); 495 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 496 if (PreheaderIdx != -1) { 497 assert(ExitVal->getParent() == L->getHeader() && 498 "ExitVal must be in loop header"); 499 MadeAnyChanges = true; 500 PN.setIncomingValue(IncomingValIdx, 501 ExitVal->getIncomingValue(PreheaderIdx)); 502 SE->forgetValue(&PN); 503 } 504 } 505 } 506 } 507 return MadeAnyChanges; 508 } 509 510 //===----------------------------------------------------------------------===// 511 // IV Widening - Extend the width of an IV to cover its widest uses. 512 //===----------------------------------------------------------------------===// 513 514 /// Update information about the induction variable that is extended by this 515 /// sign or zero extend operation. This is used to determine the final width of 516 /// the IV before actually widening it. 517 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, 518 ScalarEvolution *SE, 519 const TargetTransformInfo *TTI) { 520 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 521 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 522 return; 523 524 Type *Ty = Cast->getType(); 525 uint64_t Width = SE->getTypeSizeInBits(Ty); 526 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 527 return; 528 529 // Check that `Cast` actually extends the induction variable (we rely on this 530 // later). This takes care of cases where `Cast` is extending a truncation of 531 // the narrow induction variable, and thus can end up being narrower than the 532 // "narrow" induction variable. 533 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 534 if (NarrowIVWidth >= Width) 535 return; 536 537 // Cast is either an sext or zext up to this point. 538 // We should not widen an indvar if arithmetics on the wider indvar are more 539 // expensive than those on the narrower indvar. We check only the cost of ADD 540 // because at least an ADD is required to increment the induction variable. We 541 // could compute more comprehensively the cost of all instructions on the 542 // induction variable when necessary. 543 if (TTI && 544 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 545 TTI->getArithmeticInstrCost(Instruction::Add, 546 Cast->getOperand(0)->getType())) { 547 return; 548 } 549 550 if (!WI.WidestNativeType || 551 Width > SE->getTypeSizeInBits(WI.WidestNativeType)) { 552 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 553 WI.IsSigned = IsSigned; 554 return; 555 } 556 557 // We extend the IV to satisfy the sign of its user(s), or 'signed' 558 // if there are multiple users with both sign- and zero extensions, 559 // in order not to introduce nondeterministic behaviour based on the 560 // unspecified order of a PHI nodes' users-iterator. 561 WI.IsSigned |= IsSigned; 562 } 563 564 //===----------------------------------------------------------------------===// 565 // Live IV Reduction - Minimize IVs live across the loop. 566 //===----------------------------------------------------------------------===// 567 568 //===----------------------------------------------------------------------===// 569 // Simplification of IV users based on SCEV evaluation. 570 //===----------------------------------------------------------------------===// 571 572 namespace { 573 574 class IndVarSimplifyVisitor : public IVVisitor { 575 ScalarEvolution *SE; 576 const TargetTransformInfo *TTI; 577 PHINode *IVPhi; 578 579 public: 580 WideIVInfo WI; 581 582 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 583 const TargetTransformInfo *TTI, 584 const DominatorTree *DTree) 585 : SE(SCEV), TTI(TTI), IVPhi(IV) { 586 DT = DTree; 587 WI.NarrowIV = IVPhi; 588 } 589 590 // Implement the interface used by simplifyUsersOfIV. 591 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 592 }; 593 594 } // end anonymous namespace 595 596 /// Iteratively perform simplification on a worklist of IV users. Each 597 /// successive simplification may push more users which may themselves be 598 /// candidates for simplification. 599 /// 600 /// Sign/Zero extend elimination is interleaved with IV simplification. 601 bool IndVarSimplify::simplifyAndExtend(Loop *L, 602 SCEVExpander &Rewriter, 603 LoopInfo *LI) { 604 SmallVector<WideIVInfo, 8> WideIVs; 605 606 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 607 Intrinsic::getName(Intrinsic::experimental_guard)); 608 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 609 610 SmallVector<PHINode*, 8> LoopPhis; 611 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 612 LoopPhis.push_back(cast<PHINode>(I)); 613 } 614 // Each round of simplification iterates through the SimplifyIVUsers worklist 615 // for all current phis, then determines whether any IVs can be 616 // widened. Widening adds new phis to LoopPhis, inducing another round of 617 // simplification on the wide IVs. 618 bool Changed = false; 619 while (!LoopPhis.empty()) { 620 // Evaluate as many IV expressions as possible before widening any IVs. This 621 // forces SCEV to set no-wrap flags before evaluating sign/zero 622 // extension. The first time SCEV attempts to normalize sign/zero extension, 623 // the result becomes final. So for the most predictable results, we delay 624 // evaluation of sign/zero extend evaluation until needed, and avoid running 625 // other SCEV based analysis prior to simplifyAndExtend. 626 do { 627 PHINode *CurrIV = LoopPhis.pop_back_val(); 628 629 // Information about sign/zero extensions of CurrIV. 630 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 631 632 Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter, 633 &Visitor); 634 635 if (Visitor.WI.WidestNativeType) { 636 WideIVs.push_back(Visitor.WI); 637 } 638 } while(!LoopPhis.empty()); 639 640 // Continue if we disallowed widening. 641 if (!WidenIndVars) 642 continue; 643 644 for (; !WideIVs.empty(); WideIVs.pop_back()) { 645 unsigned ElimExt; 646 unsigned Widened; 647 if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter, 648 DT, DeadInsts, ElimExt, Widened, 649 HasGuards, UsePostIncrementRanges)) { 650 NumElimExt += ElimExt; 651 NumWidened += Widened; 652 Changed = true; 653 LoopPhis.push_back(WidePhi); 654 } 655 } 656 } 657 return Changed; 658 } 659 660 //===----------------------------------------------------------------------===// 661 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 662 //===----------------------------------------------------------------------===// 663 664 /// Given an Value which is hoped to be part of an add recurance in the given 665 /// loop, return the associated Phi node if so. Otherwise, return null. Note 666 /// that this is less general than SCEVs AddRec checking. 667 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 668 Instruction *IncI = dyn_cast<Instruction>(IncV); 669 if (!IncI) 670 return nullptr; 671 672 switch (IncI->getOpcode()) { 673 case Instruction::Add: 674 case Instruction::Sub: 675 break; 676 case Instruction::GetElementPtr: 677 // An IV counter must preserve its type. 678 if (IncI->getNumOperands() == 2) 679 break; 680 LLVM_FALLTHROUGH; 681 default: 682 return nullptr; 683 } 684 685 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 686 if (Phi && Phi->getParent() == L->getHeader()) { 687 if (L->isLoopInvariant(IncI->getOperand(1))) 688 return Phi; 689 return nullptr; 690 } 691 if (IncI->getOpcode() == Instruction::GetElementPtr) 692 return nullptr; 693 694 // Allow add/sub to be commuted. 695 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 696 if (Phi && Phi->getParent() == L->getHeader()) { 697 if (L->isLoopInvariant(IncI->getOperand(0))) 698 return Phi; 699 } 700 return nullptr; 701 } 702 703 /// Whether the current loop exit test is based on this value. Currently this 704 /// is limited to a direct use in the loop condition. 705 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 706 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 707 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 708 // TODO: Allow non-icmp loop test. 709 if (!ICmp) 710 return false; 711 712 // TODO: Allow indirect use. 713 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 714 } 715 716 /// linearFunctionTestReplace policy. Return true unless we can show that the 717 /// current exit test is already sufficiently canonical. 718 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 719 assert(L->getLoopLatch() && "Must be in simplified form"); 720 721 // Avoid converting a constant or loop invariant test back to a runtime 722 // test. This is critical for when SCEV's cached ExitCount is less precise 723 // than the current IR (such as after we've proven a particular exit is 724 // actually dead and thus the BE count never reaches our ExitCount.) 725 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 726 if (L->isLoopInvariant(BI->getCondition())) 727 return false; 728 729 // Do LFTR to simplify the exit condition to an ICMP. 730 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 731 if (!Cond) 732 return true; 733 734 // Do LFTR to simplify the exit ICMP to EQ/NE 735 ICmpInst::Predicate Pred = Cond->getPredicate(); 736 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 737 return true; 738 739 // Look for a loop invariant RHS 740 Value *LHS = Cond->getOperand(0); 741 Value *RHS = Cond->getOperand(1); 742 if (!L->isLoopInvariant(RHS)) { 743 if (!L->isLoopInvariant(LHS)) 744 return true; 745 std::swap(LHS, RHS); 746 } 747 // Look for a simple IV counter LHS 748 PHINode *Phi = dyn_cast<PHINode>(LHS); 749 if (!Phi) 750 Phi = getLoopPhiForCounter(LHS, L); 751 752 if (!Phi) 753 return true; 754 755 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 756 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 757 if (Idx < 0) 758 return true; 759 760 // Do LFTR if the exit condition's IV is *not* a simple counter. 761 Value *IncV = Phi->getIncomingValue(Idx); 762 return Phi != getLoopPhiForCounter(IncV, L); 763 } 764 765 /// Return true if undefined behavior would provable be executed on the path to 766 /// OnPathTo if Root produced a posion result. Note that this doesn't say 767 /// anything about whether OnPathTo is actually executed or whether Root is 768 /// actually poison. This can be used to assess whether a new use of Root can 769 /// be added at a location which is control equivalent with OnPathTo (such as 770 /// immediately before it) without introducing UB which didn't previously 771 /// exist. Note that a false result conveys no information. 772 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 773 Instruction *OnPathTo, 774 DominatorTree *DT) { 775 // Basic approach is to assume Root is poison, propagate poison forward 776 // through all users we can easily track, and then check whether any of those 777 // users are provable UB and must execute before out exiting block might 778 // exit. 779 780 // The set of all recursive users we've visited (which are assumed to all be 781 // poison because of said visit) 782 SmallSet<const Value *, 16> KnownPoison; 783 SmallVector<const Instruction*, 16> Worklist; 784 Worklist.push_back(Root); 785 while (!Worklist.empty()) { 786 const Instruction *I = Worklist.pop_back_val(); 787 788 // If we know this must trigger UB on a path leading our target. 789 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 790 return true; 791 792 // If we can't analyze propagation through this instruction, just skip it 793 // and transitive users. Safe as false is a conservative result. 794 if (!propagatesPoison(cast<Operator>(I)) && I != Root) 795 continue; 796 797 if (KnownPoison.insert(I).second) 798 for (const User *User : I->users()) 799 Worklist.push_back(cast<Instruction>(User)); 800 } 801 802 // Might be non-UB, or might have a path we couldn't prove must execute on 803 // way to exiting bb. 804 return false; 805 } 806 807 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 808 /// down to checking that all operands are constant and listing instructions 809 /// that may hide undef. 810 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 811 unsigned Depth) { 812 if (isa<Constant>(V)) 813 return !isa<UndefValue>(V); 814 815 if (Depth >= 6) 816 return false; 817 818 // Conservatively handle non-constant non-instructions. For example, Arguments 819 // may be undef. 820 Instruction *I = dyn_cast<Instruction>(V); 821 if (!I) 822 return false; 823 824 // Load and return values may be undef. 825 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 826 return false; 827 828 // Optimistically handle other instructions. 829 for (Value *Op : I->operands()) { 830 if (!Visited.insert(Op).second) 831 continue; 832 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 833 return false; 834 } 835 return true; 836 } 837 838 /// Return true if the given value is concrete. We must prove that undef can 839 /// never reach it. 840 /// 841 /// TODO: If we decide that this is a good approach to checking for undef, we 842 /// may factor it into a common location. 843 static bool hasConcreteDef(Value *V) { 844 SmallPtrSet<Value*, 8> Visited; 845 Visited.insert(V); 846 return hasConcreteDefImpl(V, Visited, 0); 847 } 848 849 /// Return true if this IV has any uses other than the (soon to be rewritten) 850 /// loop exit test. 851 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 852 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 853 Value *IncV = Phi->getIncomingValue(LatchIdx); 854 855 for (User *U : Phi->users()) 856 if (U != Cond && U != IncV) return false; 857 858 for (User *U : IncV->users()) 859 if (U != Cond && U != Phi) return false; 860 return true; 861 } 862 863 /// Return true if the given phi is a "counter" in L. A counter is an 864 /// add recurance (of integer or pointer type) with an arbitrary start, and a 865 /// step of 1. Note that L must have exactly one latch. 866 static bool isLoopCounter(PHINode* Phi, Loop *L, 867 ScalarEvolution *SE) { 868 assert(Phi->getParent() == L->getHeader()); 869 assert(L->getLoopLatch()); 870 871 if (!SE->isSCEVable(Phi->getType())) 872 return false; 873 874 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 875 if (!AR || AR->getLoop() != L || !AR->isAffine()) 876 return false; 877 878 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 879 if (!Step || !Step->isOne()) 880 return false; 881 882 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 883 Value *IncV = Phi->getIncomingValue(LatchIdx); 884 return (getLoopPhiForCounter(IncV, L) == Phi && 885 isa<SCEVAddRecExpr>(SE->getSCEV(IncV))); 886 } 887 888 /// Search the loop header for a loop counter (anadd rec w/step of one) 889 /// suitable for use by LFTR. If multiple counters are available, select the 890 /// "best" one based profitable heuristics. 891 /// 892 /// BECount may be an i8* pointer type. The pointer difference is already 893 /// valid count without scaling the address stride, so it remains a pointer 894 /// expression as far as SCEV is concerned. 895 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 896 const SCEV *BECount, 897 ScalarEvolution *SE, DominatorTree *DT) { 898 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 899 900 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 901 902 // Loop over all of the PHI nodes, looking for a simple counter. 903 PHINode *BestPhi = nullptr; 904 const SCEV *BestInit = nullptr; 905 BasicBlock *LatchBlock = L->getLoopLatch(); 906 assert(LatchBlock && "Must be in simplified form"); 907 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 908 909 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 910 PHINode *Phi = cast<PHINode>(I); 911 if (!isLoopCounter(Phi, L, SE)) 912 continue; 913 914 // Avoid comparing an integer IV against a pointer Limit. 915 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 916 continue; 917 918 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 919 920 // AR may be a pointer type, while BECount is an integer type. 921 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 922 // AR may not be a narrower type, or we may never exit. 923 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 924 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 925 continue; 926 927 // Avoid reusing a potentially undef value to compute other values that may 928 // have originally had a concrete definition. 929 if (!hasConcreteDef(Phi)) { 930 // We explicitly allow unknown phis as long as they are already used by 931 // the loop exit test. This is legal since performing LFTR could not 932 // increase the number of undef users. 933 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 934 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 935 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 936 continue; 937 } 938 939 // Avoid introducing undefined behavior due to poison which didn't exist in 940 // the original program. (Annoyingly, the rules for poison and undef 941 // propagation are distinct, so this does NOT cover the undef case above.) 942 // We have to ensure that we don't introduce UB by introducing a use on an 943 // iteration where said IV produces poison. Our strategy here differs for 944 // pointers and integer IVs. For integers, we strip and reinfer as needed, 945 // see code in linearFunctionTestReplace. For pointers, we restrict 946 // transforms as there is no good way to reinfer inbounds once lost. 947 if (!Phi->getType()->isIntegerTy() && 948 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 949 continue; 950 951 const SCEV *Init = AR->getStart(); 952 953 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 954 // Don't force a live loop counter if another IV can be used. 955 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 956 continue; 957 958 // Prefer to count-from-zero. This is a more "canonical" counter form. It 959 // also prefers integer to pointer IVs. 960 if (BestInit->isZero() != Init->isZero()) { 961 if (BestInit->isZero()) 962 continue; 963 } 964 // If two IVs both count from zero or both count from nonzero then the 965 // narrower is likely a dead phi that has been widened. Use the wider phi 966 // to allow the other to be eliminated. 967 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 968 continue; 969 } 970 BestPhi = Phi; 971 BestInit = Init; 972 } 973 return BestPhi; 974 } 975 976 /// Insert an IR expression which computes the value held by the IV IndVar 977 /// (which must be an loop counter w/unit stride) after the backedge of loop L 978 /// is taken ExitCount times. 979 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 980 const SCEV *ExitCount, bool UsePostInc, Loop *L, 981 SCEVExpander &Rewriter, ScalarEvolution *SE) { 982 assert(isLoopCounter(IndVar, L, SE)); 983 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 984 const SCEV *IVInit = AR->getStart(); 985 986 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 987 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 988 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 989 // the existing GEPs whenever possible. 990 if (IndVar->getType()->isPointerTy() && 991 !ExitCount->getType()->isPointerTy()) { 992 // IVOffset will be the new GEP offset that is interpreted by GEP as a 993 // signed value. ExitCount on the other hand represents the loop trip count, 994 // which is an unsigned value. FindLoopCounter only allows induction 995 // variables that have a positive unit stride of one. This means we don't 996 // have to handle the case of negative offsets (yet) and just need to zero 997 // extend ExitCount. 998 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 999 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 1000 if (UsePostInc) 1001 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 1002 1003 // Expand the code for the iteration count. 1004 assert(SE->isLoopInvariant(IVOffset, L) && 1005 "Computed iteration count is not loop invariant!"); 1006 1007 // We could handle pointer IVs other than i8*, but we need to compensate for 1008 // gep index scaling. 1009 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 1010 cast<PointerType>(IndVar->getType()) 1011 ->getElementType())->isOne() && 1012 "unit stride pointer IV must be i8*"); 1013 1014 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 1015 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1016 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 1017 } else { 1018 // In any other case, convert both IVInit and ExitCount to integers before 1019 // comparing. This may result in SCEV expansion of pointers, but in practice 1020 // SCEV will fold the pointer arithmetic away as such: 1021 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 1022 // 1023 // Valid Cases: (1) both integers is most common; (2) both may be pointers 1024 // for simple memset-style loops. 1025 // 1026 // IVInit integer and ExitCount pointer would only occur if a canonical IV 1027 // were generated on top of case #2, which is not expected. 1028 1029 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 1030 // For unit stride, IVCount = Start + ExitCount with 2's complement 1031 // overflow. 1032 1033 // For integer IVs, truncate the IV before computing IVInit + BECount, 1034 // unless we know apriori that the limit must be a constant when evaluated 1035 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 1036 // of the IV in the loop over a (potentially) expensive expansion of the 1037 // widened exit count add(zext(add)) expression. 1038 if (SE->getTypeSizeInBits(IVInit->getType()) 1039 > SE->getTypeSizeInBits(ExitCount->getType())) { 1040 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 1041 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 1042 else 1043 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 1044 } 1045 1046 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 1047 1048 if (UsePostInc) 1049 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 1050 1051 // Expand the code for the iteration count. 1052 assert(SE->isLoopInvariant(IVLimit, L) && 1053 "Computed iteration count is not loop invariant!"); 1054 // Ensure that we generate the same type as IndVar, or a smaller integer 1055 // type. In the presence of null pointer values, we have an integer type 1056 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 1057 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 1058 IndVar->getType() : ExitCount->getType(); 1059 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1060 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 1061 } 1062 } 1063 1064 /// This method rewrites the exit condition of the loop to be a canonical != 1065 /// comparison against the incremented loop induction variable. This pass is 1066 /// able to rewrite the exit tests of any loop where the SCEV analysis can 1067 /// determine a loop-invariant trip count of the loop, which is actually a much 1068 /// broader range than just linear tests. 1069 bool IndVarSimplify:: 1070 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 1071 const SCEV *ExitCount, 1072 PHINode *IndVar, SCEVExpander &Rewriter) { 1073 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 1074 assert(isLoopCounter(IndVar, L, SE)); 1075 Instruction * const IncVar = 1076 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 1077 1078 // Initialize CmpIndVar to the preincremented IV. 1079 Value *CmpIndVar = IndVar; 1080 bool UsePostInc = false; 1081 1082 // If the exiting block is the same as the backedge block, we prefer to 1083 // compare against the post-incremented value, otherwise we must compare 1084 // against the preincremented value. 1085 if (ExitingBB == L->getLoopLatch()) { 1086 // For pointer IVs, we chose to not strip inbounds which requires us not 1087 // to add a potentially UB introducing use. We need to either a) show 1088 // the loop test we're modifying is already in post-inc form, or b) show 1089 // that adding a use must not introduce UB. 1090 bool SafeToPostInc = 1091 IndVar->getType()->isIntegerTy() || 1092 isLoopExitTestBasedOn(IncVar, ExitingBB) || 1093 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 1094 if (SafeToPostInc) { 1095 UsePostInc = true; 1096 CmpIndVar = IncVar; 1097 } 1098 } 1099 1100 // It may be necessary to drop nowrap flags on the incrementing instruction 1101 // if either LFTR moves from a pre-inc check to a post-inc check (in which 1102 // case the increment might have previously been poison on the last iteration 1103 // only) or if LFTR switches to a different IV that was previously dynamically 1104 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 1105 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 1106 // check), because the pre-inc addrec flags may be adopted from the original 1107 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 1108 // TODO: This handling is inaccurate for one case: If we switch to a 1109 // dynamically dead IV that wraps on the first loop iteration only, which is 1110 // not covered by the post-inc addrec. (If the new IV was not dynamically 1111 // dead, it could not be poison on the first iteration in the first place.) 1112 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 1113 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 1114 if (BO->hasNoUnsignedWrap()) 1115 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 1116 if (BO->hasNoSignedWrap()) 1117 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 1118 } 1119 1120 Value *ExitCnt = genLoopLimit( 1121 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 1122 assert(ExitCnt->getType()->isPointerTy() == 1123 IndVar->getType()->isPointerTy() && 1124 "genLoopLimit missed a cast"); 1125 1126 // Insert a new icmp_ne or icmp_eq instruction before the branch. 1127 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1128 ICmpInst::Predicate P; 1129 if (L->contains(BI->getSuccessor(0))) 1130 P = ICmpInst::ICMP_NE; 1131 else 1132 P = ICmpInst::ICMP_EQ; 1133 1134 IRBuilder<> Builder(BI); 1135 1136 // The new loop exit condition should reuse the debug location of the 1137 // original loop exit condition. 1138 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 1139 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 1140 1141 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 1142 // avoid the expensive expansion of the limit expression in the wider type, 1143 // emit a truncate to narrow the IV to the ExitCount type. This is safe 1144 // since we know (from the exit count bitwidth), that we can't self-wrap in 1145 // the narrower type. 1146 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 1147 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 1148 if (CmpIndVarSize > ExitCntSize) { 1149 assert(!CmpIndVar->getType()->isPointerTy() && 1150 !ExitCnt->getType()->isPointerTy()); 1151 1152 // Before resorting to actually inserting the truncate, use the same 1153 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 1154 // the other side of the comparison instead. We still evaluate the limit 1155 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 1156 // a truncate within in. 1157 bool Extended = false; 1158 const SCEV *IV = SE->getSCEV(CmpIndVar); 1159 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 1160 ExitCnt->getType()); 1161 const SCEV *ZExtTrunc = 1162 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 1163 1164 if (ZExtTrunc == IV) { 1165 Extended = true; 1166 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 1167 "wide.trip.count"); 1168 } else { 1169 const SCEV *SExtTrunc = 1170 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 1171 if (SExtTrunc == IV) { 1172 Extended = true; 1173 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 1174 "wide.trip.count"); 1175 } 1176 } 1177 1178 if (Extended) { 1179 bool Discard; 1180 L->makeLoopInvariant(ExitCnt, Discard); 1181 } else 1182 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 1183 "lftr.wideiv"); 1184 } 1185 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 1186 << " LHS:" << *CmpIndVar << '\n' 1187 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 1188 << "\n" 1189 << " RHS:\t" << *ExitCnt << "\n" 1190 << "ExitCount:\t" << *ExitCount << "\n" 1191 << " was: " << *BI->getCondition() << "\n"); 1192 1193 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 1194 Value *OrigCond = BI->getCondition(); 1195 // It's tempting to use replaceAllUsesWith here to fully replace the old 1196 // comparison, but that's not immediately safe, since users of the old 1197 // comparison may not be dominated by the new comparison. Instead, just 1198 // update the branch to use the new comparison; in the common case this 1199 // will make old comparison dead. 1200 BI->setCondition(Cond); 1201 DeadInsts.emplace_back(OrigCond); 1202 1203 ++NumLFTR; 1204 return true; 1205 } 1206 1207 //===----------------------------------------------------------------------===// 1208 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 1209 //===----------------------------------------------------------------------===// 1210 1211 /// If there's a single exit block, sink any loop-invariant values that 1212 /// were defined in the preheader but not used inside the loop into the 1213 /// exit block to reduce register pressure in the loop. 1214 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 1215 BasicBlock *ExitBlock = L->getExitBlock(); 1216 if (!ExitBlock) return false; 1217 1218 BasicBlock *Preheader = L->getLoopPreheader(); 1219 if (!Preheader) return false; 1220 1221 bool MadeAnyChanges = false; 1222 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 1223 BasicBlock::iterator I(Preheader->getTerminator()); 1224 while (I != Preheader->begin()) { 1225 --I; 1226 // New instructions were inserted at the end of the preheader. 1227 if (isa<PHINode>(I)) 1228 break; 1229 1230 // Don't move instructions which might have side effects, since the side 1231 // effects need to complete before instructions inside the loop. Also don't 1232 // move instructions which might read memory, since the loop may modify 1233 // memory. Note that it's okay if the instruction might have undefined 1234 // behavior: LoopSimplify guarantees that the preheader dominates the exit 1235 // block. 1236 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 1237 continue; 1238 1239 // Skip debug info intrinsics. 1240 if (isa<DbgInfoIntrinsic>(I)) 1241 continue; 1242 1243 // Skip eh pad instructions. 1244 if (I->isEHPad()) 1245 continue; 1246 1247 // Don't sink alloca: we never want to sink static alloca's out of the 1248 // entry block, and correctly sinking dynamic alloca's requires 1249 // checks for stacksave/stackrestore intrinsics. 1250 // FIXME: Refactor this check somehow? 1251 if (isa<AllocaInst>(I)) 1252 continue; 1253 1254 // Determine if there is a use in or before the loop (direct or 1255 // otherwise). 1256 bool UsedInLoop = false; 1257 for (Use &U : I->uses()) { 1258 Instruction *User = cast<Instruction>(U.getUser()); 1259 BasicBlock *UseBB = User->getParent(); 1260 if (PHINode *P = dyn_cast<PHINode>(User)) { 1261 unsigned i = 1262 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 1263 UseBB = P->getIncomingBlock(i); 1264 } 1265 if (UseBB == Preheader || L->contains(UseBB)) { 1266 UsedInLoop = true; 1267 break; 1268 } 1269 } 1270 1271 // If there is, the def must remain in the preheader. 1272 if (UsedInLoop) 1273 continue; 1274 1275 // Otherwise, sink it to the exit block. 1276 Instruction *ToMove = &*I; 1277 bool Done = false; 1278 1279 if (I != Preheader->begin()) { 1280 // Skip debug info intrinsics. 1281 do { 1282 --I; 1283 } while (I->isDebugOrPseudoInst() && I != Preheader->begin()); 1284 1285 if (I->isDebugOrPseudoInst() && I == Preheader->begin()) 1286 Done = true; 1287 } else { 1288 Done = true; 1289 } 1290 1291 MadeAnyChanges = true; 1292 ToMove->moveBefore(*ExitBlock, InsertPt); 1293 if (Done) break; 1294 InsertPt = ToMove->getIterator(); 1295 } 1296 1297 return MadeAnyChanges; 1298 } 1299 1300 static void replaceExitCond(BranchInst *BI, Value *NewCond, 1301 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1302 auto *OldCond = BI->getCondition(); 1303 BI->setCondition(NewCond); 1304 if (OldCond->use_empty()) 1305 DeadInsts.emplace_back(OldCond); 1306 } 1307 1308 static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken, 1309 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1310 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1311 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1312 auto *OldCond = BI->getCondition(); 1313 auto *NewCond = 1314 ConstantInt::get(OldCond->getType(), IsTaken ? ExitIfTrue : !ExitIfTrue); 1315 replaceExitCond(BI, NewCond, DeadInsts); 1316 } 1317 1318 static void replaceLoopPHINodesWithPreheaderValues( 1319 Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1320 assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!"); 1321 auto *LoopPreheader = L->getLoopPreheader(); 1322 auto *LoopHeader = L->getHeader(); 1323 for (auto &PN : LoopHeader->phis()) { 1324 auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader); 1325 PN.replaceAllUsesWith(PreheaderIncoming); 1326 DeadInsts.emplace_back(&PN); 1327 } 1328 } 1329 1330 static void replaceWithInvariantCond( 1331 const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred, 1332 const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter, 1333 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1334 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1335 Rewriter.setInsertPoint(BI); 1336 auto *LHSV = Rewriter.expandCodeFor(InvariantLHS); 1337 auto *RHSV = Rewriter.expandCodeFor(InvariantRHS); 1338 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1339 if (ExitIfTrue) 1340 InvariantPred = ICmpInst::getInversePredicate(InvariantPred); 1341 IRBuilder<> Builder(BI); 1342 auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV, 1343 BI->getCondition()->getName()); 1344 replaceExitCond(BI, NewCond, DeadInsts); 1345 } 1346 1347 static bool optimizeLoopExitWithUnknownExitCount( 1348 const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, 1349 const SCEV *MaxIter, bool Inverted, bool SkipLastIter, 1350 ScalarEvolution *SE, SCEVExpander &Rewriter, 1351 SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 1352 ICmpInst::Predicate Pred; 1353 Value *LHS, *RHS; 1354 BasicBlock *TrueSucc, *FalseSucc; 1355 if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), 1356 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) 1357 return false; 1358 1359 assert((L->contains(TrueSucc) != L->contains(FalseSucc)) && 1360 "Not a loop exit!"); 1361 1362 // 'LHS pred RHS' should now mean that we stay in loop. 1363 if (L->contains(FalseSucc)) 1364 Pred = CmpInst::getInversePredicate(Pred); 1365 1366 // If we are proving loop exit, invert the predicate. 1367 if (Inverted) 1368 Pred = CmpInst::getInversePredicate(Pred); 1369 1370 const SCEV *LHSS = SE->getSCEVAtScope(LHS, L); 1371 const SCEV *RHSS = SE->getSCEVAtScope(RHS, L); 1372 // Can we prove it to be trivially true? 1373 if (SE->isKnownPredicateAt(Pred, LHSS, RHSS, BI)) { 1374 foldExit(L, ExitingBB, Inverted, DeadInsts); 1375 return true; 1376 } 1377 // Further logic works for non-inverted condition only. 1378 if (Inverted) 1379 return false; 1380 1381 auto *ARTy = LHSS->getType(); 1382 auto *MaxIterTy = MaxIter->getType(); 1383 // If possible, adjust types. 1384 if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy)) 1385 MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy); 1386 else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) { 1387 const SCEV *MinusOne = SE->getMinusOne(ARTy); 1388 auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy); 1389 if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI)) 1390 MaxIter = SE->getTruncateExpr(MaxIter, ARTy); 1391 } 1392 1393 if (SkipLastIter) { 1394 const SCEV *One = SE->getOne(MaxIter->getType()); 1395 MaxIter = SE->getMinusSCEV(MaxIter, One); 1396 } 1397 1398 // Check if there is a loop-invariant predicate equivalent to our check. 1399 auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS, 1400 L, BI, MaxIter); 1401 if (!LIP) 1402 return false; 1403 1404 // Can we prove it to be trivially true? 1405 if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI)) 1406 foldExit(L, ExitingBB, Inverted, DeadInsts); 1407 else 1408 replaceWithInvariantCond(L, ExitingBB, LIP->Pred, LIP->LHS, LIP->RHS, 1409 Rewriter, DeadInsts); 1410 1411 return true; 1412 } 1413 1414 bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { 1415 // Note: This is duplicating a particular part on SimplifyIndVars reasoning. 1416 // We need to duplicate it because given icmp zext(small-iv), C, IVUsers 1417 // never reaches the icmp since the zext doesn't fold to an AddRec unless 1418 // it already has flags. The alternative to this would be to extending the 1419 // set of "interesting" IV users to include the icmp, but doing that 1420 // regresses results in practice by querying SCEVs before trip counts which 1421 // rely on them which results in SCEV caching sub-optimal answers. The 1422 // concern about caching sub-optimal results is why we only query SCEVs of 1423 // the loop invariant RHS here. 1424 SmallVector<BasicBlock*, 16> ExitingBlocks; 1425 L->getExitingBlocks(ExitingBlocks); 1426 bool Changed = false; 1427 for (auto *ExitingBB : ExitingBlocks) { 1428 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1429 if (!BI) 1430 continue; 1431 assert(BI->isConditional() && "exit branch must be conditional"); 1432 1433 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1434 if (!ICmp || !ICmp->hasOneUse()) 1435 continue; 1436 1437 auto *LHS = ICmp->getOperand(0); 1438 auto *RHS = ICmp->getOperand(1); 1439 // For the range reasoning, avoid computing SCEVs in the loop to avoid 1440 // poisoning cache with sub-optimal results. For the must-execute case, 1441 // this is a neccessary precondition for correctness. 1442 if (!L->isLoopInvariant(RHS)) { 1443 if (!L->isLoopInvariant(LHS)) 1444 continue; 1445 // Same logic applies for the inverse case 1446 std::swap(LHS, RHS); 1447 } 1448 1449 // Match (icmp signed-cond zext, RHS) 1450 Value *LHSOp = nullptr; 1451 if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned()) 1452 continue; 1453 1454 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1455 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1456 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1457 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1458 FullCR = FullCR.zeroExtend(OuterBitWidth); 1459 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1460 if (FullCR.contains(RHSCR)) { 1461 // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus 1462 // replace the signed condition with the unsigned version. 1463 ICmp->setPredicate(ICmp->getUnsignedPredicate()); 1464 Changed = true; 1465 // Note: No SCEV invalidation needed. We've changed the predicate, but 1466 // have not changed exit counts, or the values produced by the compare. 1467 continue; 1468 } 1469 } 1470 1471 // Now that we've canonicalized the condition to match the extend, 1472 // see if we can rotate the extend out of the loop. 1473 for (auto *ExitingBB : ExitingBlocks) { 1474 auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1475 if (!BI) 1476 continue; 1477 assert(BI->isConditional() && "exit branch must be conditional"); 1478 1479 auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 1480 if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) 1481 continue; 1482 1483 bool Swapped = false; 1484 auto *LHS = ICmp->getOperand(0); 1485 auto *RHS = ICmp->getOperand(1); 1486 if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) 1487 // Nothing to rotate 1488 continue; 1489 if (L->isLoopInvariant(LHS)) { 1490 // Same logic applies for the inverse case until we actually pick 1491 // which operand of the compare to update. 1492 Swapped = true; 1493 std::swap(LHS, RHS); 1494 } 1495 assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); 1496 1497 // Match (icmp unsigned-cond zext, RHS) 1498 // TODO: Extend to handle corresponding sext/signed-cmp case 1499 // TODO: Extend to other invertible functions 1500 Value *LHSOp = nullptr; 1501 if (!match(LHS, m_ZExt(m_Value(LHSOp)))) 1502 continue; 1503 1504 // In general, we only rotate if we can do so without increasing the number 1505 // of instructions. The exception is when we have an zext(add-rec). The 1506 // reason for allowing this exception is that we know we need to get rid 1507 // of the zext for SCEV to be able to compute a trip count for said loops; 1508 // we consider the new trip count valuable enough to increase instruction 1509 // count by one. 1510 if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp))) 1511 continue; 1512 1513 // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS 1514 // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) 1515 // when zext is loop varying and RHS is loop invariant. This converts 1516 // loop varying work to loop-invariant work. 1517 auto doRotateTransform = [&]() { 1518 assert(ICmp->isUnsigned() && "must have proven unsigned already"); 1519 auto *NewRHS = 1520 CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", 1521 L->getLoopPreheader()->getTerminator()); 1522 ICmp->setOperand(Swapped ? 1 : 0, LHSOp); 1523 ICmp->setOperand(Swapped ? 0 : 1, NewRHS); 1524 if (LHS->use_empty()) 1525 DeadInsts.push_back(LHS); 1526 }; 1527 1528 1529 const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); 1530 const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); 1531 const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); 1532 auto FullCR = ConstantRange::getFull(InnerBitWidth); 1533 FullCR = FullCR.zeroExtend(OuterBitWidth); 1534 auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); 1535 if (FullCR.contains(RHSCR)) { 1536 doRotateTransform(); 1537 Changed = true; 1538 // Note, we are leaving SCEV in an unfortunately imprecise case here 1539 // as rotation tends to reveal information about trip counts not 1540 // previously visible. 1541 continue; 1542 } 1543 } 1544 1545 return Changed; 1546 } 1547 1548 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 1549 SmallVector<BasicBlock*, 16> ExitingBlocks; 1550 L->getExitingBlocks(ExitingBlocks); 1551 1552 // Remove all exits which aren't both rewriteable and execute on every 1553 // iteration. 1554 llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1555 // If our exitting block exits multiple loops, we can only rewrite the 1556 // innermost one. Otherwise, we're changing how many times the innermost 1557 // loop runs before it exits. 1558 if (LI->getLoopFor(ExitingBB) != L) 1559 return true; 1560 1561 // Can't rewrite non-branch yet. 1562 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1563 if (!BI) 1564 return true; 1565 1566 // If already constant, nothing to do. 1567 if (isa<Constant>(BI->getCondition())) 1568 return true; 1569 1570 // Likewise, the loop latch must be dominated by the exiting BB. 1571 if (!DT->dominates(ExitingBB, L->getLoopLatch())) 1572 return true; 1573 1574 return false; 1575 }); 1576 1577 if (ExitingBlocks.empty()) 1578 return false; 1579 1580 // Get a symbolic upper bound on the loop backedge taken count. 1581 const SCEV *MaxExitCount = SE->getSymbolicMaxBackedgeTakenCount(L); 1582 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 1583 return false; 1584 1585 // Visit our exit blocks in order of dominance. We know from the fact that 1586 // all exits must dominate the latch, so there is a total dominance order 1587 // between them. 1588 llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) { 1589 // std::sort sorts in ascending order, so we want the inverse of 1590 // the normal dominance relation. 1591 if (A == B) return false; 1592 if (DT->properlyDominates(A, B)) 1593 return true; 1594 else { 1595 assert(DT->properlyDominates(B, A) && 1596 "expected total dominance order!"); 1597 return false; 1598 } 1599 }); 1600 #ifdef ASSERT 1601 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 1602 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 1603 } 1604 #endif 1605 1606 bool Changed = false; 1607 bool SkipLastIter = false; 1608 SmallSet<const SCEV*, 8> DominatingExitCounts; 1609 for (BasicBlock *ExitingBB : ExitingBlocks) { 1610 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1611 if (isa<SCEVCouldNotCompute>(ExitCount)) { 1612 // Okay, we do not know the exit count here. Can we at least prove that it 1613 // will remain the same within iteration space? 1614 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1615 auto OptimizeCond = [&](bool Inverted, bool SkipLastIter) { 1616 return optimizeLoopExitWithUnknownExitCount( 1617 L, BI, ExitingBB, MaxExitCount, Inverted, SkipLastIter, SE, 1618 Rewriter, DeadInsts); 1619 }; 1620 1621 // TODO: We might have proved that we can skip the last iteration for 1622 // this check. In this case, we only want to check the condition on the 1623 // pre-last iteration (MaxExitCount - 1). However, there is a nasty 1624 // corner case: 1625 // 1626 // for (i = len; i != 0; i--) { ... check (i ult X) ... } 1627 // 1628 // If we could not prove that len != 0, then we also could not prove that 1629 // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then 1630 // OptimizeCond will likely not prove anything for it, even if it could 1631 // prove the same fact for len. 1632 // 1633 // As a temporary solution, we query both last and pre-last iterations in 1634 // hope that we will be able to prove triviality for at least one of 1635 // them. We can stop querying MaxExitCount for this case once SCEV 1636 // understands that (MaxExitCount - 1) will not overflow here. 1637 if (OptimizeCond(false, false) || OptimizeCond(true, false)) 1638 Changed = true; 1639 else if (SkipLastIter) 1640 if (OptimizeCond(false, true) || OptimizeCond(true, true)) 1641 Changed = true; 1642 continue; 1643 } 1644 1645 if (MaxExitCount == ExitCount) 1646 // If the loop has more than 1 iteration, all further checks will be 1647 // executed 1 iteration less. 1648 SkipLastIter = true; 1649 1650 // If we know we'd exit on the first iteration, rewrite the exit to 1651 // reflect this. This does not imply the loop must exit through this 1652 // exit; there may be an earlier one taken on the first iteration. 1653 // We know that the backedge can't be taken, so we replace all 1654 // the header PHIs with values coming from the preheader. 1655 if (ExitCount->isZero()) { 1656 foldExit(L, ExitingBB, true, DeadInsts); 1657 replaceLoopPHINodesWithPreheaderValues(L, DeadInsts); 1658 Changed = true; 1659 continue; 1660 } 1661 1662 assert(ExitCount->getType()->isIntegerTy() && 1663 MaxExitCount->getType()->isIntegerTy() && 1664 "Exit counts must be integers"); 1665 1666 Type *WiderType = 1667 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 1668 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 1669 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 1670 assert(MaxExitCount->getType() == ExitCount->getType()); 1671 1672 // Can we prove that some other exit must be taken strictly before this 1673 // one? 1674 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 1675 MaxExitCount, ExitCount)) { 1676 foldExit(L, ExitingBB, false, DeadInsts); 1677 Changed = true; 1678 continue; 1679 } 1680 1681 // As we run, keep track of which exit counts we've encountered. If we 1682 // find a duplicate, we've found an exit which would have exited on the 1683 // exiting iteration, but (from the visit order) strictly follows another 1684 // which does the same and is thus dead. 1685 if (!DominatingExitCounts.insert(ExitCount).second) { 1686 foldExit(L, ExitingBB, false, DeadInsts); 1687 Changed = true; 1688 continue; 1689 } 1690 1691 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 1692 // here. If we kept track of the min of dominanting exits so far, we could 1693 // discharge exits with EC >= MDEC. This is less powerful than the existing 1694 // transform (since later exits aren't considered), but potentially more 1695 // powerful for any case where SCEV can prove a >=u b, but neither a == b 1696 // or a >u b. Such a case is not currently known. 1697 } 1698 return Changed; 1699 } 1700 1701 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1702 SmallVector<BasicBlock*, 16> ExitingBlocks; 1703 L->getExitingBlocks(ExitingBlocks); 1704 1705 // Finally, see if we can rewrite our exit conditions into a loop invariant 1706 // form. If we have a read-only loop, and we can tell that we must exit down 1707 // a path which does not need any of the values computed within the loop, we 1708 // can rewrite the loop to exit on the first iteration. Note that this 1709 // doesn't either a) tell us the loop exits on the first iteration (unless 1710 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 1711 // This transformation looks a lot like a restricted form of dead loop 1712 // elimination, but restricted to read-only loops and without neccesssarily 1713 // needing to kill the loop entirely. 1714 if (!LoopPredication) 1715 return false; 1716 1717 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 1718 // through *explicit* control flow. We have to eliminate the possibility of 1719 // implicit exits (see below) before we know it's truly exact. 1720 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 1721 if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE)) 1722 return false; 1723 1724 assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant"); 1725 assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer"); 1726 1727 auto BadExit = [&](BasicBlock *ExitingBB) { 1728 // If our exiting block exits multiple loops, we can only rewrite the 1729 // innermost one. Otherwise, we're changing how many times the innermost 1730 // loop runs before it exits. 1731 if (LI->getLoopFor(ExitingBB) != L) 1732 return true; 1733 1734 // Can't rewrite non-branch yet. 1735 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1736 if (!BI) 1737 return true; 1738 1739 // If already constant, nothing to do. 1740 if (isa<Constant>(BI->getCondition())) 1741 return true; 1742 1743 // If the exit block has phis, we need to be able to compute the values 1744 // within the loop which contains them. This assumes trivially lcssa phis 1745 // have already been removed; TODO: generalize 1746 BasicBlock *ExitBlock = 1747 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 1748 if (!ExitBlock->phis().empty()) 1749 return true; 1750 1751 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1752 if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE)) 1753 return true; 1754 1755 assert(SE->isLoopInvariant(ExitCount, L) && 1756 "Exit count must be loop invariant"); 1757 assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer"); 1758 return false; 1759 }; 1760 1761 // If we have any exits which can't be predicated themselves, than we can't 1762 // predicate any exit which isn't guaranteed to execute before it. Consider 1763 // two exits (a) and (b) which would both exit on the same iteration. If we 1764 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 1765 // we could convert a loop from exiting through (a) to one exiting through 1766 // (b). Note that this problem exists only for exits with the same exit 1767 // count, and we could be more aggressive when exit counts are known inequal. 1768 llvm::sort(ExitingBlocks, 1769 [&](BasicBlock *A, BasicBlock *B) { 1770 // std::sort sorts in ascending order, so we want the inverse of 1771 // the normal dominance relation, plus a tie breaker for blocks 1772 // unordered by dominance. 1773 if (DT->properlyDominates(A, B)) return true; 1774 if (DT->properlyDominates(B, A)) return false; 1775 return A->getName() < B->getName(); 1776 }); 1777 // Check to see if our exit blocks are a total order (i.e. a linear chain of 1778 // exits before the backedge). If they aren't, reasoning about reachability 1779 // is complicated and we choose not to for now. 1780 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 1781 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 1782 return false; 1783 1784 // Given our sorted total order, we know that exit[j] must be evaluated 1785 // after all exit[i] such j > i. 1786 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 1787 if (BadExit(ExitingBlocks[i])) { 1788 ExitingBlocks.resize(i); 1789 break; 1790 } 1791 1792 if (ExitingBlocks.empty()) 1793 return false; 1794 1795 // We rely on not being able to reach an exiting block on a later iteration 1796 // then it's statically compute exit count. The implementaton of 1797 // getExitCount currently has this invariant, but assert it here so that 1798 // breakage is obvious if this ever changes.. 1799 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 1800 return DT->dominates(ExitingBB, L->getLoopLatch()); 1801 })); 1802 1803 // At this point, ExitingBlocks consists of only those blocks which are 1804 // predicatable. Given that, we know we have at least one exit we can 1805 // predicate if the loop is doesn't have side effects and doesn't have any 1806 // implicit exits (because then our exact BTC isn't actually exact). 1807 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 1808 // suggestions on how to improve this? I can obviously bail out for outer 1809 // loops, but that seems less than ideal. MemorySSA can find memory writes, 1810 // is that enough for *all* side effects? 1811 for (BasicBlock *BB : L->blocks()) 1812 for (auto &I : *BB) 1813 // TODO:isGuaranteedToTransfer 1814 if (I.mayHaveSideEffects()) 1815 return false; 1816 1817 bool Changed = false; 1818 // Finally, do the actual predication for all predicatable blocks. A couple 1819 // of notes here: 1820 // 1) We don't bother to constant fold dominated exits with identical exit 1821 // counts; that's simply a form of CSE/equality propagation and we leave 1822 // it for dedicated passes. 1823 // 2) We insert the comparison at the branch. Hoisting introduces additional 1824 // legality constraints and we leave that to dedicated logic. We want to 1825 // predicate even if we can't insert a loop invariant expression as 1826 // peeling or unrolling will likely reduce the cost of the otherwise loop 1827 // varying check. 1828 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 1829 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 1830 Value *ExactBTCV = nullptr; // Lazily generated if needed. 1831 for (BasicBlock *ExitingBB : ExitingBlocks) { 1832 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1833 1834 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 1835 Value *NewCond; 1836 if (ExitCount == ExactBTC) { 1837 NewCond = L->contains(BI->getSuccessor(0)) ? 1838 B.getFalse() : B.getTrue(); 1839 } else { 1840 Value *ECV = Rewriter.expandCodeFor(ExitCount); 1841 if (!ExactBTCV) 1842 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 1843 Value *RHS = ExactBTCV; 1844 if (ECV->getType() != RHS->getType()) { 1845 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1846 ECV = B.CreateZExt(ECV, WiderTy); 1847 RHS = B.CreateZExt(RHS, WiderTy); 1848 } 1849 auto Pred = L->contains(BI->getSuccessor(0)) ? 1850 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1851 NewCond = B.CreateICmp(Pred, ECV, RHS); 1852 } 1853 Value *OldCond = BI->getCondition(); 1854 BI->setCondition(NewCond); 1855 if (OldCond->use_empty()) 1856 DeadInsts.emplace_back(OldCond); 1857 Changed = true; 1858 } 1859 1860 return Changed; 1861 } 1862 1863 //===----------------------------------------------------------------------===// 1864 // IndVarSimplify driver. Manage several subpasses of IV simplification. 1865 //===----------------------------------------------------------------------===// 1866 1867 bool IndVarSimplify::run(Loop *L) { 1868 // We need (and expect!) the incoming loop to be in LCSSA. 1869 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 1870 "LCSSA required to run indvars!"); 1871 1872 // If LoopSimplify form is not available, stay out of trouble. Some notes: 1873 // - LSR currently only supports LoopSimplify-form loops. Indvars' 1874 // canonicalization can be a pessimization without LSR to "clean up" 1875 // afterwards. 1876 // - We depend on having a preheader; in particular, 1877 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 1878 // and we're in trouble if we can't find the induction variable even when 1879 // we've manually inserted one. 1880 // - LFTR relies on having a single backedge. 1881 if (!L->isLoopSimplifyForm()) 1882 return false; 1883 1884 #ifndef NDEBUG 1885 // Used below for a consistency check only 1886 // Note: Since the result returned by ScalarEvolution may depend on the order 1887 // in which previous results are added to its cache, the call to 1888 // getBackedgeTakenCount() may change following SCEV queries. 1889 const SCEV *BackedgeTakenCount; 1890 if (VerifyIndvars) 1891 BackedgeTakenCount = SE->getBackedgeTakenCount(L); 1892 #endif 1893 1894 bool Changed = false; 1895 // If there are any floating-point recurrences, attempt to 1896 // transform them to use integer recurrences. 1897 Changed |= rewriteNonIntegerIVs(L); 1898 1899 // Create a rewriter object which we'll use to transform the code with. 1900 SCEVExpander Rewriter(*SE, DL, "indvars"); 1901 #ifndef NDEBUG 1902 Rewriter.setDebugType(DEBUG_TYPE); 1903 #endif 1904 1905 // Eliminate redundant IV users. 1906 // 1907 // Simplification works best when run before other consumers of SCEV. We 1908 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 1909 // other expressions involving loop IVs have been evaluated. This helps SCEV 1910 // set no-wrap flags before normalizing sign/zero extension. 1911 Rewriter.disableCanonicalMode(); 1912 Changed |= simplifyAndExtend(L, Rewriter, LI); 1913 1914 // Check to see if we can compute the final value of any expressions 1915 // that are recurrent in the loop, and substitute the exit values from the 1916 // loop into any instructions outside of the loop that use the final values 1917 // of the current expressions. 1918 if (ReplaceExitValue != NeverRepl) { 1919 if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT, 1920 ReplaceExitValue, DeadInsts)) { 1921 NumReplaced += Rewrites; 1922 Changed = true; 1923 } 1924 } 1925 1926 // Eliminate redundant IV cycles. 1927 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI); 1928 1929 // Try to convert exit conditions to unsigned and rotate computation 1930 // out of the loop. Note: Handles invalidation internally if needed. 1931 Changed |= canonicalizeExitCondition(L); 1932 1933 // Try to eliminate loop exits based on analyzeable exit counts 1934 if (optimizeLoopExits(L, Rewriter)) { 1935 Changed = true; 1936 // Given we've changed exit counts, notify SCEV 1937 // Some nested loops may share same folded exit basic block, 1938 // thus we need to notify top most loop. 1939 SE->forgetTopmostLoop(L); 1940 } 1941 1942 // Try to form loop invariant tests for loop exits by changing how many 1943 // iterations of the loop run when that is unobservable. 1944 if (predicateLoopExits(L, Rewriter)) { 1945 Changed = true; 1946 // Given we've changed exit counts, notify SCEV 1947 SE->forgetLoop(L); 1948 } 1949 1950 // If we have a trip count expression, rewrite the loop's exit condition 1951 // using it. 1952 if (!DisableLFTR) { 1953 BasicBlock *PreHeader = L->getLoopPreheader(); 1954 BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); 1955 1956 SmallVector<BasicBlock*, 16> ExitingBlocks; 1957 L->getExitingBlocks(ExitingBlocks); 1958 for (BasicBlock *ExitingBB : ExitingBlocks) { 1959 // Can't rewrite non-branch yet. 1960 if (!isa<BranchInst>(ExitingBB->getTerminator())) 1961 continue; 1962 1963 // If our exitting block exits multiple loops, we can only rewrite the 1964 // innermost one. Otherwise, we're changing how many times the innermost 1965 // loop runs before it exits. 1966 if (LI->getLoopFor(ExitingBB) != L) 1967 continue; 1968 1969 if (!needsLFTR(L, ExitingBB)) 1970 continue; 1971 1972 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1973 if (isa<SCEVCouldNotCompute>(ExitCount)) 1974 continue; 1975 1976 // This was handled above, but as we form SCEVs, we can sometimes refine 1977 // existing ones; this allows exit counts to be folded to zero which 1978 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 1979 // until stable to handle cases like this better. 1980 if (ExitCount->isZero()) 1981 continue; 1982 1983 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 1984 if (!IndVar) 1985 continue; 1986 1987 // Avoid high cost expansions. Note: This heuristic is questionable in 1988 // that our definition of "high cost" is not exactly principled. 1989 if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, 1990 TTI, PreHeaderBR)) 1991 continue; 1992 1993 // Check preconditions for proper SCEVExpander operation. SCEV does not 1994 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 1995 // any pass that uses the SCEVExpander must do it. This does not work 1996 // well for loop passes because SCEVExpander makes assumptions about 1997 // all loops, while LoopPassManager only forces the current loop to be 1998 // simplified. 1999 // 2000 // FIXME: SCEV expansion has no way to bail out, so the caller must 2001 // explicitly check any assumptions made by SCEV. Brittle. 2002 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2003 if (!AR || AR->getLoop()->getLoopPreheader()) 2004 Changed |= linearFunctionTestReplace(L, ExitingBB, 2005 ExitCount, IndVar, 2006 Rewriter); 2007 } 2008 } 2009 // Clear the rewriter cache, because values that are in the rewriter's cache 2010 // can be deleted in the loop below, causing the AssertingVH in the cache to 2011 // trigger. 2012 Rewriter.clear(); 2013 2014 // Now that we're done iterating through lists, clean up any instructions 2015 // which are now dead. 2016 while (!DeadInsts.empty()) { 2017 Value *V = DeadInsts.pop_back_val(); 2018 2019 if (PHINode *PHI = dyn_cast_or_null<PHINode>(V)) 2020 Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get()); 2021 else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V)) 2022 Changed |= 2023 RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get()); 2024 } 2025 2026 // The Rewriter may not be used from this point on. 2027 2028 // Loop-invariant instructions in the preheader that aren't used in the 2029 // loop may be sunk below the loop to reduce register pressure. 2030 Changed |= sinkUnusedInvariants(L); 2031 2032 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2033 // trip count and therefore can further simplify exit values in addition to 2034 // rewriteLoopExitValues. 2035 Changed |= rewriteFirstIterationLoopExitValues(L); 2036 2037 // Clean up dead instructions. 2038 Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get()); 2039 2040 // Check a post-condition. 2041 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2042 "Indvars did not preserve LCSSA!"); 2043 2044 // Verify that LFTR, and any other change have not interfered with SCEV's 2045 // ability to compute trip count. We may have *changed* the exit count, but 2046 // only by reducing it. 2047 #ifndef NDEBUG 2048 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2049 SE->forgetLoop(L); 2050 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2051 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2052 SE->getTypeSizeInBits(NewBECount->getType())) 2053 NewBECount = SE->getTruncateOrNoop(NewBECount, 2054 BackedgeTakenCount->getType()); 2055 else 2056 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2057 NewBECount->getType()); 2058 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 2059 NewBECount) && "indvars must preserve SCEV"); 2060 } 2061 if (VerifyMemorySSA && MSSAU) 2062 MSSAU->getMemorySSA()->verifyMemorySSA(); 2063 #endif 2064 2065 return Changed; 2066 } 2067 2068 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2069 LoopStandardAnalysisResults &AR, 2070 LPMUpdater &) { 2071 Function *F = L.getHeader()->getParent(); 2072 const DataLayout &DL = F->getParent()->getDataLayout(); 2073 2074 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, 2075 WidenIndVars && AllowIVWidening); 2076 if (!IVS.run(&L)) 2077 return PreservedAnalyses::all(); 2078 2079 auto PA = getLoopPassPreservedAnalyses(); 2080 PA.preserveSet<CFGAnalyses>(); 2081 if (AR.MSSA) 2082 PA.preserve<MemorySSAAnalysis>(); 2083 return PA; 2084 } 2085 2086 namespace { 2087 2088 struct IndVarSimplifyLegacyPass : public LoopPass { 2089 static char ID; // Pass identification, replacement for typeid 2090 2091 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2092 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2093 } 2094 2095 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2096 if (skipLoop(L)) 2097 return false; 2098 2099 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2100 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2101 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2102 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2103 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 2104 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2105 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2106 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2107 auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 2108 MemorySSA *MSSA = nullptr; 2109 if (MSSAAnalysis) 2110 MSSA = &MSSAAnalysis->getMSSA(); 2111 2112 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening); 2113 return IVS.run(L); 2114 } 2115 2116 void getAnalysisUsage(AnalysisUsage &AU) const override { 2117 AU.setPreservesCFG(); 2118 AU.addPreserved<MemorySSAWrapperPass>(); 2119 getLoopAnalysisUsage(AU); 2120 } 2121 }; 2122 2123 } // end anonymous namespace 2124 2125 char IndVarSimplifyLegacyPass::ID = 0; 2126 2127 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2128 "Induction Variable Simplification", false, false) 2129 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2130 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2131 "Induction Variable Simplification", false, false) 2132 2133 Pass *llvm::createIndVarSimplifyPass() { 2134 return new IndVarSimplifyLegacyPass(); 2135 } 2136