1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/ScalarEvolution.h" 42 #include "llvm/Analysis/ScalarEvolutionExpander.h" 43 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/IR/BasicBlock.h" 48 #include "llvm/IR/Constant.h" 49 #include "llvm/IR/ConstantRange.h" 50 #include "llvm/IR/Constants.h" 51 #include "llvm/IR/DataLayout.h" 52 #include "llvm/IR/DerivedTypes.h" 53 #include "llvm/IR/Dominators.h" 54 #include "llvm/IR/Function.h" 55 #include "llvm/IR/IRBuilder.h" 56 #include "llvm/IR/InstrTypes.h" 57 #include "llvm/IR/Instruction.h" 58 #include "llvm/IR/Instructions.h" 59 #include "llvm/IR/IntrinsicInst.h" 60 #include "llvm/IR/Intrinsics.h" 61 #include "llvm/IR/Module.h" 62 #include "llvm/IR/Operator.h" 63 #include "llvm/IR/PassManager.h" 64 #include "llvm/IR/PatternMatch.h" 65 #include "llvm/IR/Type.h" 66 #include "llvm/IR/Use.h" 67 #include "llvm/IR/User.h" 68 #include "llvm/IR/Value.h" 69 #include "llvm/IR/ValueHandle.h" 70 #include "llvm/InitializePasses.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Compiler.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MathExtras.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/Scalar.h" 80 #include "llvm/Transforms/Scalar/LoopPassManager.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/Local.h" 83 #include "llvm/Transforms/Utils/LoopUtils.h" 84 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 85 #include <cassert> 86 #include <cstdint> 87 #include <utility> 88 89 using namespace llvm; 90 91 #define DEBUG_TYPE "indvars" 92 93 STATISTIC(NumWidened , "Number of indvars widened"); 94 STATISTIC(NumReplaced , "Number of exit values replaced"); 95 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 96 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 97 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 98 99 // Trip count verification can be enabled by default under NDEBUG if we 100 // implement a strong expression equivalence checker in SCEV. Until then, we 101 // use the verify-indvars flag, which may assert in some cases. 102 static cl::opt<bool> VerifyIndvars( 103 "verify-indvars", cl::Hidden, 104 cl::desc("Verify the ScalarEvolution result after running indvars")); 105 106 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 107 108 static cl::opt<ReplaceExitVal> ReplaceExitValue( 109 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 110 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 111 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 112 clEnumValN(OnlyCheapRepl, "cheap", 113 "only replace exit value when the cost is cheap"), 114 clEnumValN(NoHardUse, "noharduse", 115 "only replace exit values when loop def likely dead"), 116 clEnumValN(AlwaysRepl, "always", 117 "always replace exit value whenever possible"))); 118 119 static cl::opt<bool> UsePostIncrementRanges( 120 "indvars-post-increment-ranges", cl::Hidden, 121 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 122 cl::init(true)); 123 124 static cl::opt<bool> 125 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 126 cl::desc("Disable Linear Function Test Replace optimization")); 127 128 static cl::opt<bool> 129 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 130 cl::desc("Predicate conditions in read only loops")); 131 132 namespace { 133 134 struct RewritePhi; 135 136 class IndVarSimplify { 137 LoopInfo *LI; 138 ScalarEvolution *SE; 139 DominatorTree *DT; 140 const DataLayout &DL; 141 TargetLibraryInfo *TLI; 142 const TargetTransformInfo *TTI; 143 144 SmallVector<WeakTrackingVH, 16> DeadInsts; 145 146 bool isValidRewrite(Value *FromVal, Value *ToVal); 147 148 bool handleFloatingPointIV(Loop *L, PHINode *PH); 149 bool rewriteNonIntegerIVs(Loop *L); 150 151 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 152 /// Try to eliminate loop exits based on analyzeable exit counts 153 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 154 /// Try to form loop invariant tests for loop exits by changing how many 155 /// iterations of the loop run when that is unobservable. 156 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 157 158 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); 159 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 160 bool rewriteFirstIterationLoopExitValues(Loop *L); 161 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; 162 163 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 164 const SCEV *ExitCount, 165 PHINode *IndVar, SCEVExpander &Rewriter); 166 167 bool sinkUnusedInvariants(Loop *L); 168 169 public: 170 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 171 const DataLayout &DL, TargetLibraryInfo *TLI, 172 TargetTransformInfo *TTI) 173 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 174 175 bool run(Loop *L); 176 }; 177 178 } // end anonymous namespace 179 180 /// Return true if the SCEV expansion generated by the rewriter can replace the 181 /// original value. SCEV guarantees that it produces the same value, but the way 182 /// it is produced may be illegal IR. Ideally, this function will only be 183 /// called for verification. 184 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 185 // If an SCEV expression subsumed multiple pointers, its expansion could 186 // reassociate the GEP changing the base pointer. This is illegal because the 187 // final address produced by a GEP chain must be inbounds relative to its 188 // underlying object. Otherwise basic alias analysis, among other things, 189 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 190 // producing an expression involving multiple pointers. Until then, we must 191 // bail out here. 192 // 193 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 194 // because it understands lcssa phis while SCEV does not. 195 Value *FromPtr = FromVal; 196 Value *ToPtr = ToVal; 197 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { 198 FromPtr = GEP->getPointerOperand(); 199 } 200 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { 201 ToPtr = GEP->getPointerOperand(); 202 } 203 if (FromPtr != FromVal || ToPtr != ToVal) { 204 // Quickly check the common case 205 if (FromPtr == ToPtr) 206 return true; 207 208 // SCEV may have rewritten an expression that produces the GEP's pointer 209 // operand. That's ok as long as the pointer operand has the same base 210 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 211 // base of a recurrence. This handles the case in which SCEV expansion 212 // converts a pointer type recurrence into a nonrecurrent pointer base 213 // indexed by an integer recurrence. 214 215 // If the GEP base pointer is a vector of pointers, abort. 216 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 217 return false; 218 219 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 220 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 221 if (FromBase == ToBase) 222 return true; 223 224 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase 225 << " != " << *ToBase << "\n"); 226 227 return false; 228 } 229 return true; 230 } 231 232 /// Determine the insertion point for this user. By default, insert immediately 233 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 234 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 235 /// common dominator for the incoming blocks. A nullptr can be returned if no 236 /// viable location is found: it may happen if User is a PHI and Def only comes 237 /// to this PHI from unreachable blocks. 238 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 239 DominatorTree *DT, LoopInfo *LI) { 240 PHINode *PHI = dyn_cast<PHINode>(User); 241 if (!PHI) 242 return User; 243 244 Instruction *InsertPt = nullptr; 245 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 246 if (PHI->getIncomingValue(i) != Def) 247 continue; 248 249 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 250 251 if (!DT->isReachableFromEntry(InsertBB)) 252 continue; 253 254 if (!InsertPt) { 255 InsertPt = InsertBB->getTerminator(); 256 continue; 257 } 258 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 259 InsertPt = InsertBB->getTerminator(); 260 } 261 262 // If we have skipped all inputs, it means that Def only comes to Phi from 263 // unreachable blocks. 264 if (!InsertPt) 265 return nullptr; 266 267 auto *DefI = dyn_cast<Instruction>(Def); 268 if (!DefI) 269 return InsertPt; 270 271 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 272 273 auto *L = LI->getLoopFor(DefI->getParent()); 274 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 275 276 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 277 if (LI->getLoopFor(DTN->getBlock()) == L) 278 return DTN->getBlock()->getTerminator(); 279 280 llvm_unreachable("DefI dominates InsertPt!"); 281 } 282 283 //===----------------------------------------------------------------------===// 284 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 285 //===----------------------------------------------------------------------===// 286 287 /// Convert APF to an integer, if possible. 288 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 289 bool isExact = false; 290 // See if we can convert this to an int64_t 291 uint64_t UIntVal; 292 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 293 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 294 !isExact) 295 return false; 296 IntVal = UIntVal; 297 return true; 298 } 299 300 /// If the loop has floating induction variable then insert corresponding 301 /// integer induction variable if possible. 302 /// For example, 303 /// for(double i = 0; i < 10000; ++i) 304 /// bar(i) 305 /// is converted into 306 /// for(int i = 0; i < 10000; ++i) 307 /// bar((double)i); 308 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 309 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 310 unsigned BackEdge = IncomingEdge^1; 311 312 // Check incoming value. 313 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 314 315 int64_t InitValue; 316 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 317 return false; 318 319 // Check IV increment. Reject this PN if increment operation is not 320 // an add or increment value can not be represented by an integer. 321 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 322 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 323 324 // If this is not an add of the PHI with a constantfp, or if the constant fp 325 // is not an integer, bail out. 326 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 327 int64_t IncValue; 328 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 329 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 330 return false; 331 332 // Check Incr uses. One user is PN and the other user is an exit condition 333 // used by the conditional terminator. 334 Value::user_iterator IncrUse = Incr->user_begin(); 335 Instruction *U1 = cast<Instruction>(*IncrUse++); 336 if (IncrUse == Incr->user_end()) return false; 337 Instruction *U2 = cast<Instruction>(*IncrUse++); 338 if (IncrUse != Incr->user_end()) return false; 339 340 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 341 // only used by a branch, we can't transform it. 342 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 343 if (!Compare) 344 Compare = dyn_cast<FCmpInst>(U2); 345 if (!Compare || !Compare->hasOneUse() || 346 !isa<BranchInst>(Compare->user_back())) 347 return false; 348 349 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 350 351 // We need to verify that the branch actually controls the iteration count 352 // of the loop. If not, the new IV can overflow and no one will notice. 353 // The branch block must be in the loop and one of the successors must be out 354 // of the loop. 355 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 356 if (!L->contains(TheBr->getParent()) || 357 (L->contains(TheBr->getSuccessor(0)) && 358 L->contains(TheBr->getSuccessor(1)))) 359 return false; 360 361 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 362 // transform it. 363 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 364 int64_t ExitValue; 365 if (ExitValueVal == nullptr || 366 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 367 return false; 368 369 // Find new predicate for integer comparison. 370 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 371 switch (Compare->getPredicate()) { 372 default: return false; // Unknown comparison. 373 case CmpInst::FCMP_OEQ: 374 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 375 case CmpInst::FCMP_ONE: 376 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 377 case CmpInst::FCMP_OGT: 378 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 379 case CmpInst::FCMP_OGE: 380 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 381 case CmpInst::FCMP_OLT: 382 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 383 case CmpInst::FCMP_OLE: 384 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 385 } 386 387 // We convert the floating point induction variable to a signed i32 value if 388 // we can. This is only safe if the comparison will not overflow in a way 389 // that won't be trapped by the integer equivalent operations. Check for this 390 // now. 391 // TODO: We could use i64 if it is native and the range requires it. 392 393 // The start/stride/exit values must all fit in signed i32. 394 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 395 return false; 396 397 // If not actually striding (add x, 0.0), avoid touching the code. 398 if (IncValue == 0) 399 return false; 400 401 // Positive and negative strides have different safety conditions. 402 if (IncValue > 0) { 403 // If we have a positive stride, we require the init to be less than the 404 // exit value. 405 if (InitValue >= ExitValue) 406 return false; 407 408 uint32_t Range = uint32_t(ExitValue-InitValue); 409 // Check for infinite loop, either: 410 // while (i <= Exit) or until (i > Exit) 411 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 412 if (++Range == 0) return false; // Range overflows. 413 } 414 415 unsigned Leftover = Range % uint32_t(IncValue); 416 417 // If this is an equality comparison, we require that the strided value 418 // exactly land on the exit value, otherwise the IV condition will wrap 419 // around and do things the fp IV wouldn't. 420 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 421 Leftover != 0) 422 return false; 423 424 // If the stride would wrap around the i32 before exiting, we can't 425 // transform the IV. 426 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 427 return false; 428 } else { 429 // If we have a negative stride, we require the init to be greater than the 430 // exit value. 431 if (InitValue <= ExitValue) 432 return false; 433 434 uint32_t Range = uint32_t(InitValue-ExitValue); 435 // Check for infinite loop, either: 436 // while (i >= Exit) or until (i < Exit) 437 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 438 if (++Range == 0) return false; // Range overflows. 439 } 440 441 unsigned Leftover = Range % uint32_t(-IncValue); 442 443 // If this is an equality comparison, we require that the strided value 444 // exactly land on the exit value, otherwise the IV condition will wrap 445 // around and do things the fp IV wouldn't. 446 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 447 Leftover != 0) 448 return false; 449 450 // If the stride would wrap around the i32 before exiting, we can't 451 // transform the IV. 452 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 453 return false; 454 } 455 456 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 457 458 // Insert new integer induction variable. 459 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 460 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 461 PN->getIncomingBlock(IncomingEdge)); 462 463 Value *NewAdd = 464 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 465 Incr->getName()+".int", Incr); 466 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 467 468 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 469 ConstantInt::get(Int32Ty, ExitValue), 470 Compare->getName()); 471 472 // In the following deletions, PN may become dead and may be deleted. 473 // Use a WeakTrackingVH to observe whether this happens. 474 WeakTrackingVH WeakPH = PN; 475 476 // Delete the old floating point exit comparison. The branch starts using the 477 // new comparison. 478 NewCompare->takeName(Compare); 479 Compare->replaceAllUsesWith(NewCompare); 480 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 481 482 // Delete the old floating point increment. 483 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 484 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 485 486 // If the FP induction variable still has uses, this is because something else 487 // in the loop uses its value. In order to canonicalize the induction 488 // variable, we chose to eliminate the IV and rewrite it in terms of an 489 // int->fp cast. 490 // 491 // We give preference to sitofp over uitofp because it is faster on most 492 // platforms. 493 if (WeakPH) { 494 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 495 &*PN->getParent()->getFirstInsertionPt()); 496 PN->replaceAllUsesWith(Conv); 497 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 498 } 499 return true; 500 } 501 502 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 503 // First step. Check to see if there are any floating-point recurrences. 504 // If there are, change them into integer recurrences, permitting analysis by 505 // the SCEV routines. 506 BasicBlock *Header = L->getHeader(); 507 508 SmallVector<WeakTrackingVH, 8> PHIs; 509 for (PHINode &PN : Header->phis()) 510 PHIs.push_back(&PN); 511 512 bool Changed = false; 513 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 514 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 515 Changed |= handleFloatingPointIV(L, PN); 516 517 // If the loop previously had floating-point IV, ScalarEvolution 518 // may not have been able to compute a trip count. Now that we've done some 519 // re-writing, the trip count may be computable. 520 if (Changed) 521 SE->forgetLoop(L); 522 return Changed; 523 } 524 525 namespace { 526 527 // Collect information about PHI nodes which can be transformed in 528 // rewriteLoopExitValues. 529 struct RewritePhi { 530 PHINode *PN; 531 532 // Ith incoming value. 533 unsigned Ith; 534 535 // Exit value after expansion. 536 Value *Val; 537 538 // High Cost when expansion. 539 bool HighCost; 540 541 RewritePhi(PHINode *P, unsigned I, Value *V, bool H) 542 : PN(P), Ith(I), Val(V), HighCost(H) {} 543 }; 544 545 } // end anonymous namespace 546 547 //===----------------------------------------------------------------------===// 548 // rewriteLoopExitValues - Optimize IV users outside the loop. 549 // As a side effect, reduces the amount of IV processing within the loop. 550 //===----------------------------------------------------------------------===// 551 552 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { 553 SmallPtrSet<const Instruction *, 8> Visited; 554 SmallVector<const Instruction *, 8> WorkList; 555 Visited.insert(I); 556 WorkList.push_back(I); 557 while (!WorkList.empty()) { 558 const Instruction *Curr = WorkList.pop_back_val(); 559 // This use is outside the loop, nothing to do. 560 if (!L->contains(Curr)) 561 continue; 562 // Do we assume it is a "hard" use which will not be eliminated easily? 563 if (Curr->mayHaveSideEffects()) 564 return true; 565 // Otherwise, add all its users to worklist. 566 for (auto U : Curr->users()) { 567 auto *UI = cast<Instruction>(U); 568 if (Visited.insert(UI).second) 569 WorkList.push_back(UI); 570 } 571 } 572 return false; 573 } 574 575 /// Check to see if this loop has a computable loop-invariant execution count. 576 /// If so, this means that we can compute the final value of any expressions 577 /// that are recurrent in the loop, and substitute the exit values from the loop 578 /// into any instructions outside of the loop that use the final values of the 579 /// current expressions. 580 /// 581 /// This is mostly redundant with the regular IndVarSimplify activities that 582 /// happen later, except that it's more powerful in some cases, because it's 583 /// able to brute-force evaluate arbitrary instructions as long as they have 584 /// constant operands at the beginning of the loop. 585 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 586 // Check a pre-condition. 587 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 588 "Indvars did not preserve LCSSA!"); 589 590 SmallVector<BasicBlock*, 8> ExitBlocks; 591 L->getUniqueExitBlocks(ExitBlocks); 592 593 SmallVector<RewritePhi, 8> RewritePhiSet; 594 // Find all values that are computed inside the loop, but used outside of it. 595 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 596 // the exit blocks of the loop to find them. 597 for (BasicBlock *ExitBB : ExitBlocks) { 598 // If there are no PHI nodes in this exit block, then no values defined 599 // inside the loop are used on this path, skip it. 600 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 601 if (!PN) continue; 602 603 unsigned NumPreds = PN->getNumIncomingValues(); 604 605 // Iterate over all of the PHI nodes. 606 BasicBlock::iterator BBI = ExitBB->begin(); 607 while ((PN = dyn_cast<PHINode>(BBI++))) { 608 if (PN->use_empty()) 609 continue; // dead use, don't replace it 610 611 if (!SE->isSCEVable(PN->getType())) 612 continue; 613 614 // It's necessary to tell ScalarEvolution about this explicitly so that 615 // it can walk the def-use list and forget all SCEVs, as it may not be 616 // watching the PHI itself. Once the new exit value is in place, there 617 // may not be a def-use connection between the loop and every instruction 618 // which got a SCEVAddRecExpr for that loop. 619 SE->forgetValue(PN); 620 621 // Iterate over all of the values in all the PHI nodes. 622 for (unsigned i = 0; i != NumPreds; ++i) { 623 // If the value being merged in is not integer or is not defined 624 // in the loop, skip it. 625 Value *InVal = PN->getIncomingValue(i); 626 if (!isa<Instruction>(InVal)) 627 continue; 628 629 // If this pred is for a subloop, not L itself, skip it. 630 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 631 continue; // The Block is in a subloop, skip it. 632 633 // Check that InVal is defined in the loop. 634 Instruction *Inst = cast<Instruction>(InVal); 635 if (!L->contains(Inst)) 636 continue; 637 638 // Okay, this instruction has a user outside of the current loop 639 // and varies predictably *inside* the loop. Evaluate the value it 640 // contains when the loop exits, if possible. We prefer to start with 641 // expressions which are true for all exits (so as to maximize 642 // expression reuse by the SCEVExpander), but resort to per-exit 643 // evaluation if that fails. 644 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 645 if (isa<SCEVCouldNotCompute>(ExitValue) || 646 !SE->isLoopInvariant(ExitValue, L) || 647 !isSafeToExpand(ExitValue, *SE)) { 648 // TODO: This should probably be sunk into SCEV in some way; maybe a 649 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for 650 // most SCEV expressions and other recurrence types (e.g. shift 651 // recurrences). Is there existing code we can reuse? 652 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); 653 if (isa<SCEVCouldNotCompute>(ExitCount)) 654 continue; 655 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) 656 if (AddRec->getLoop() == L) 657 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); 658 if (isa<SCEVCouldNotCompute>(ExitValue) || 659 !SE->isLoopInvariant(ExitValue, L) || 660 !isSafeToExpand(ExitValue, *SE)) 661 continue; 662 } 663 664 // Computing the value outside of the loop brings no benefit if it is 665 // definitely used inside the loop in a way which can not be optimized 666 // away. Avoid doing so unless we know we have a value which computes 667 // the ExitValue already. TODO: This should be merged into SCEV 668 // expander to leverage its knowledge of existing expressions. 669 if (ReplaceExitValue != AlwaysRepl && 670 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) && 671 hasHardUserWithinLoop(L, Inst)) 672 continue; 673 674 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); 675 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 676 677 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal 678 << '\n' 679 << " LoopVal = " << *Inst << "\n"); 680 681 if (!isValidRewrite(Inst, ExitVal)) { 682 DeadInsts.push_back(ExitVal); 683 continue; 684 } 685 686 #ifndef NDEBUG 687 // If we reuse an instruction from a loop which is neither L nor one of 688 // its containing loops, we end up breaking LCSSA form for this loop by 689 // creating a new use of its instruction. 690 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 691 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 692 if (EVL != L) 693 assert(EVL->contains(L) && "LCSSA breach detected!"); 694 #endif 695 696 // Collect all the candidate PHINodes to be rewritten. 697 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); 698 } 699 } 700 } 701 702 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 703 704 bool Changed = false; 705 // Transformation. 706 for (const RewritePhi &Phi : RewritePhiSet) { 707 PHINode *PN = Phi.PN; 708 Value *ExitVal = Phi.Val; 709 710 // Only do the rewrite when the ExitValue can be expanded cheaply. 711 // If LoopCanBeDel is true, rewrite exit value aggressively. 712 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 713 DeadInsts.push_back(ExitVal); 714 continue; 715 } 716 717 Changed = true; 718 ++NumReplaced; 719 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 720 PN->setIncomingValue(Phi.Ith, ExitVal); 721 722 // If this instruction is dead now, delete it. Don't do it now to avoid 723 // invalidating iterators. 724 if (isInstructionTriviallyDead(Inst, TLI)) 725 DeadInsts.push_back(Inst); 726 727 // Replace PN with ExitVal if that is legal and does not break LCSSA. 728 if (PN->getNumIncomingValues() == 1 && 729 LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 730 PN->replaceAllUsesWith(ExitVal); 731 PN->eraseFromParent(); 732 } 733 } 734 735 // The insertion point instruction may have been deleted; clear it out 736 // so that the rewriter doesn't trip over it later. 737 Rewriter.clearInsertPoint(); 738 return Changed; 739 } 740 741 //===---------------------------------------------------------------------===// 742 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 743 // they will exit at the first iteration. 744 //===---------------------------------------------------------------------===// 745 746 /// Check to see if this loop has loop invariant conditions which lead to loop 747 /// exits. If so, we know that if the exit path is taken, it is at the first 748 /// loop iteration. This lets us predict exit values of PHI nodes that live in 749 /// loop header. 750 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 751 // Verify the input to the pass is already in LCSSA form. 752 assert(L->isLCSSAForm(*DT)); 753 754 SmallVector<BasicBlock *, 8> ExitBlocks; 755 L->getUniqueExitBlocks(ExitBlocks); 756 757 bool MadeAnyChanges = false; 758 for (auto *ExitBB : ExitBlocks) { 759 // If there are no more PHI nodes in this exit block, then no more 760 // values defined inside the loop are used on this path. 761 for (PHINode &PN : ExitBB->phis()) { 762 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 763 IncomingValIdx != E; ++IncomingValIdx) { 764 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 765 766 // Can we prove that the exit must run on the first iteration if it 767 // runs at all? (i.e. early exits are fine for our purposes, but 768 // traces which lead to this exit being taken on the 2nd iteration 769 // aren't.) Note that this is about whether the exit branch is 770 // executed, not about whether it is taken. 771 if (!L->getLoopLatch() || 772 !DT->dominates(IncomingBB, L->getLoopLatch())) 773 continue; 774 775 // Get condition that leads to the exit path. 776 auto *TermInst = IncomingBB->getTerminator(); 777 778 Value *Cond = nullptr; 779 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 780 // Must be a conditional branch, otherwise the block 781 // should not be in the loop. 782 Cond = BI->getCondition(); 783 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 784 Cond = SI->getCondition(); 785 else 786 continue; 787 788 if (!L->isLoopInvariant(Cond)) 789 continue; 790 791 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 792 793 // Only deal with PHIs in the loop header. 794 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 795 continue; 796 797 // If ExitVal is a PHI on the loop header, then we know its 798 // value along this exit because the exit can only be taken 799 // on the first iteration. 800 auto *LoopPreheader = L->getLoopPreheader(); 801 assert(LoopPreheader && "Invalid loop"); 802 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 803 if (PreheaderIdx != -1) { 804 assert(ExitVal->getParent() == L->getHeader() && 805 "ExitVal must be in loop header"); 806 MadeAnyChanges = true; 807 PN.setIncomingValue(IncomingValIdx, 808 ExitVal->getIncomingValue(PreheaderIdx)); 809 } 810 } 811 } 812 } 813 return MadeAnyChanges; 814 } 815 816 /// Check whether it is possible to delete the loop after rewriting exit 817 /// value. If it is possible, ignore ReplaceExitValue and do rewriting 818 /// aggressively. 819 bool IndVarSimplify::canLoopBeDeleted( 820 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 821 BasicBlock *Preheader = L->getLoopPreheader(); 822 // If there is no preheader, the loop will not be deleted. 823 if (!Preheader) 824 return false; 825 826 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 827 // We obviate multiple ExitingBlocks case for simplicity. 828 // TODO: If we see testcase with multiple ExitingBlocks can be deleted 829 // after exit value rewriting, we can enhance the logic here. 830 SmallVector<BasicBlock *, 4> ExitingBlocks; 831 L->getExitingBlocks(ExitingBlocks); 832 SmallVector<BasicBlock *, 8> ExitBlocks; 833 L->getUniqueExitBlocks(ExitBlocks); 834 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) 835 return false; 836 837 BasicBlock *ExitBlock = ExitBlocks[0]; 838 BasicBlock::iterator BI = ExitBlock->begin(); 839 while (PHINode *P = dyn_cast<PHINode>(BI)) { 840 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 841 842 // If the Incoming value of P is found in RewritePhiSet, we know it 843 // could be rewritten to use a loop invariant value in transformation 844 // phase later. Skip it in the loop invariant check below. 845 bool found = false; 846 for (const RewritePhi &Phi : RewritePhiSet) { 847 unsigned i = Phi.Ith; 848 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 849 found = true; 850 break; 851 } 852 } 853 854 Instruction *I; 855 if (!found && (I = dyn_cast<Instruction>(Incoming))) 856 if (!L->hasLoopInvariantOperands(I)) 857 return false; 858 859 ++BI; 860 } 861 862 for (auto *BB : L->blocks()) 863 if (llvm::any_of(*BB, [](Instruction &I) { 864 return I.mayHaveSideEffects(); 865 })) 866 return false; 867 868 return true; 869 } 870 871 //===----------------------------------------------------------------------===// 872 // IV Widening - Extend the width of an IV to cover its widest uses. 873 //===----------------------------------------------------------------------===// 874 875 namespace { 876 877 // Collect information about induction variables that are used by sign/zero 878 // extend operations. This information is recorded by CollectExtend and provides 879 // the input to WidenIV. 880 struct WideIVInfo { 881 PHINode *NarrowIV = nullptr; 882 883 // Widest integer type created [sz]ext 884 Type *WidestNativeType = nullptr; 885 886 // Was a sext user seen before a zext? 887 bool IsSigned = false; 888 }; 889 890 } // end anonymous namespace 891 892 /// Update information about the induction variable that is extended by this 893 /// sign or zero extend operation. This is used to determine the final width of 894 /// the IV before actually widening it. 895 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 896 const TargetTransformInfo *TTI) { 897 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 898 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 899 return; 900 901 Type *Ty = Cast->getType(); 902 uint64_t Width = SE->getTypeSizeInBits(Ty); 903 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 904 return; 905 906 // Check that `Cast` actually extends the induction variable (we rely on this 907 // later). This takes care of cases where `Cast` is extending a truncation of 908 // the narrow induction variable, and thus can end up being narrower than the 909 // "narrow" induction variable. 910 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 911 if (NarrowIVWidth >= Width) 912 return; 913 914 // Cast is either an sext or zext up to this point. 915 // We should not widen an indvar if arithmetics on the wider indvar are more 916 // expensive than those on the narrower indvar. We check only the cost of ADD 917 // because at least an ADD is required to increment the induction variable. We 918 // could compute more comprehensively the cost of all instructions on the 919 // induction variable when necessary. 920 if (TTI && 921 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 922 TTI->getArithmeticInstrCost(Instruction::Add, 923 Cast->getOperand(0)->getType())) { 924 return; 925 } 926 927 if (!WI.WidestNativeType) { 928 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 929 WI.IsSigned = IsSigned; 930 return; 931 } 932 933 // We extend the IV to satisfy the sign of its first user, arbitrarily. 934 if (WI.IsSigned != IsSigned) 935 return; 936 937 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 938 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 939 } 940 941 namespace { 942 943 /// Record a link in the Narrow IV def-use chain along with the WideIV that 944 /// computes the same value as the Narrow IV def. This avoids caching Use* 945 /// pointers. 946 struct NarrowIVDefUse { 947 Instruction *NarrowDef = nullptr; 948 Instruction *NarrowUse = nullptr; 949 Instruction *WideDef = nullptr; 950 951 // True if the narrow def is never negative. Tracking this information lets 952 // us use a sign extension instead of a zero extension or vice versa, when 953 // profitable and legal. 954 bool NeverNegative = false; 955 956 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 957 bool NeverNegative) 958 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 959 NeverNegative(NeverNegative) {} 960 }; 961 962 /// The goal of this transform is to remove sign and zero extends without 963 /// creating any new induction variables. To do this, it creates a new phi of 964 /// the wider type and redirects all users, either removing extends or inserting 965 /// truncs whenever we stop propagating the type. 966 class WidenIV { 967 // Parameters 968 PHINode *OrigPhi; 969 Type *WideType; 970 971 // Context 972 LoopInfo *LI; 973 Loop *L; 974 ScalarEvolution *SE; 975 DominatorTree *DT; 976 977 // Does the module have any calls to the llvm.experimental.guard intrinsic 978 // at all? If not we can avoid scanning instructions looking for guards. 979 bool HasGuards; 980 981 // Result 982 PHINode *WidePhi = nullptr; 983 Instruction *WideInc = nullptr; 984 const SCEV *WideIncExpr = nullptr; 985 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 986 987 SmallPtrSet<Instruction *,16> Widened; 988 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 989 990 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 991 992 // A map tracking the kind of extension used to widen each narrow IV 993 // and narrow IV user. 994 // Key: pointer to a narrow IV or IV user. 995 // Value: the kind of extension used to widen this Instruction. 996 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 997 998 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 999 1000 // A map with control-dependent ranges for post increment IV uses. The key is 1001 // a pair of IV def and a use of this def denoting the context. The value is 1002 // a ConstantRange representing possible values of the def at the given 1003 // context. 1004 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 1005 1006 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 1007 Instruction *UseI) { 1008 DefUserPair Key(Def, UseI); 1009 auto It = PostIncRangeInfos.find(Key); 1010 return It == PostIncRangeInfos.end() 1011 ? Optional<ConstantRange>(None) 1012 : Optional<ConstantRange>(It->second); 1013 } 1014 1015 void calculatePostIncRanges(PHINode *OrigPhi); 1016 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 1017 1018 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 1019 DefUserPair Key(Def, UseI); 1020 auto It = PostIncRangeInfos.find(Key); 1021 if (It == PostIncRangeInfos.end()) 1022 PostIncRangeInfos.insert({Key, R}); 1023 else 1024 It->second = R.intersectWith(It->second); 1025 } 1026 1027 public: 1028 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 1029 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 1030 bool HasGuards) 1031 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 1032 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1033 HasGuards(HasGuards), DeadInsts(DI) { 1034 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1035 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1036 } 1037 1038 PHINode *createWideIV(SCEVExpander &Rewriter); 1039 1040 protected: 1041 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1042 Instruction *Use); 1043 1044 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1045 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1046 const SCEVAddRecExpr *WideAR); 1047 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1048 1049 ExtendKind getExtendKind(Instruction *I); 1050 1051 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1052 1053 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1054 1055 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1056 1057 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1058 unsigned OpCode) const; 1059 1060 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1061 1062 bool widenLoopCompare(NarrowIVDefUse DU); 1063 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 1064 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 1065 1066 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1067 }; 1068 1069 } // end anonymous namespace 1070 1071 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1072 bool IsSigned, Instruction *Use) { 1073 // Set the debug location and conservative insertion point. 1074 IRBuilder<> Builder(Use); 1075 // Hoist the insertion point into loop preheaders as far as possible. 1076 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1077 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1078 L = L->getParentLoop()) 1079 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1080 1081 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1082 Builder.CreateZExt(NarrowOper, WideType); 1083 } 1084 1085 /// Instantiate a wide operation to replace a narrow operation. This only needs 1086 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1087 /// 0 for any operation we decide not to clone. 1088 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 1089 const SCEVAddRecExpr *WideAR) { 1090 unsigned Opcode = DU.NarrowUse->getOpcode(); 1091 switch (Opcode) { 1092 default: 1093 return nullptr; 1094 case Instruction::Add: 1095 case Instruction::Mul: 1096 case Instruction::UDiv: 1097 case Instruction::Sub: 1098 return cloneArithmeticIVUser(DU, WideAR); 1099 1100 case Instruction::And: 1101 case Instruction::Or: 1102 case Instruction::Xor: 1103 case Instruction::Shl: 1104 case Instruction::LShr: 1105 case Instruction::AShr: 1106 return cloneBitwiseIVUser(DU); 1107 } 1108 } 1109 1110 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 1111 Instruction *NarrowUse = DU.NarrowUse; 1112 Instruction *NarrowDef = DU.NarrowDef; 1113 Instruction *WideDef = DU.WideDef; 1114 1115 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1116 1117 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1118 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1119 // invariant and will be folded or hoisted. If it actually comes from a 1120 // widened IV, it should be removed during a future call to widenIVUse. 1121 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1122 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1123 ? WideDef 1124 : createExtendInst(NarrowUse->getOperand(0), WideType, 1125 IsSigned, NarrowUse); 1126 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1127 ? WideDef 1128 : createExtendInst(NarrowUse->getOperand(1), WideType, 1129 IsSigned, NarrowUse); 1130 1131 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1132 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1133 NarrowBO->getName()); 1134 IRBuilder<> Builder(NarrowUse); 1135 Builder.Insert(WideBO); 1136 WideBO->copyIRFlags(NarrowBO); 1137 return WideBO; 1138 } 1139 1140 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 1141 const SCEVAddRecExpr *WideAR) { 1142 Instruction *NarrowUse = DU.NarrowUse; 1143 Instruction *NarrowDef = DU.NarrowDef; 1144 Instruction *WideDef = DU.WideDef; 1145 1146 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1147 1148 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1149 1150 // We're trying to find X such that 1151 // 1152 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1153 // 1154 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1155 // and check using SCEV if any of them are correct. 1156 1157 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1158 // correct solution to X. 1159 auto GuessNonIVOperand = [&](bool SignExt) { 1160 const SCEV *WideLHS; 1161 const SCEV *WideRHS; 1162 1163 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1164 if (SignExt) 1165 return SE->getSignExtendExpr(S, Ty); 1166 return SE->getZeroExtendExpr(S, Ty); 1167 }; 1168 1169 if (IVOpIdx == 0) { 1170 WideLHS = SE->getSCEV(WideDef); 1171 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1172 WideRHS = GetExtend(NarrowRHS, WideType); 1173 } else { 1174 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1175 WideLHS = GetExtend(NarrowLHS, WideType); 1176 WideRHS = SE->getSCEV(WideDef); 1177 } 1178 1179 // WideUse is "WideDef `op.wide` X" as described in the comment. 1180 const SCEV *WideUse = nullptr; 1181 1182 switch (NarrowUse->getOpcode()) { 1183 default: 1184 llvm_unreachable("No other possibility!"); 1185 1186 case Instruction::Add: 1187 WideUse = SE->getAddExpr(WideLHS, WideRHS); 1188 break; 1189 1190 case Instruction::Mul: 1191 WideUse = SE->getMulExpr(WideLHS, WideRHS); 1192 break; 1193 1194 case Instruction::UDiv: 1195 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 1196 break; 1197 1198 case Instruction::Sub: 1199 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 1200 break; 1201 } 1202 1203 return WideUse == WideAR; 1204 }; 1205 1206 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1207 if (!GuessNonIVOperand(SignExtend)) { 1208 SignExtend = !SignExtend; 1209 if (!GuessNonIVOperand(SignExtend)) 1210 return nullptr; 1211 } 1212 1213 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1214 ? WideDef 1215 : createExtendInst(NarrowUse->getOperand(0), WideType, 1216 SignExtend, NarrowUse); 1217 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1218 ? WideDef 1219 : createExtendInst(NarrowUse->getOperand(1), WideType, 1220 SignExtend, NarrowUse); 1221 1222 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1223 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1224 NarrowBO->getName()); 1225 1226 IRBuilder<> Builder(NarrowUse); 1227 Builder.Insert(WideBO); 1228 WideBO->copyIRFlags(NarrowBO); 1229 return WideBO; 1230 } 1231 1232 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1233 auto It = ExtendKindMap.find(I); 1234 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1235 return It->second; 1236 } 1237 1238 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1239 unsigned OpCode) const { 1240 if (OpCode == Instruction::Add) 1241 return SE->getAddExpr(LHS, RHS); 1242 if (OpCode == Instruction::Sub) 1243 return SE->getMinusSCEV(LHS, RHS); 1244 if (OpCode == Instruction::Mul) 1245 return SE->getMulExpr(LHS, RHS); 1246 1247 llvm_unreachable("Unsupported opcode."); 1248 } 1249 1250 /// No-wrap operations can transfer sign extension of their result to their 1251 /// operands. Generate the SCEV value for the widened operation without 1252 /// actually modifying the IR yet. If the expression after extending the 1253 /// operands is an AddRec for this loop, return the AddRec and the kind of 1254 /// extension used. 1255 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 1256 // Handle the common case of add<nsw/nuw> 1257 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1258 // Only Add/Sub/Mul instructions supported yet. 1259 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1260 OpCode != Instruction::Mul) 1261 return {nullptr, Unknown}; 1262 1263 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1264 // if extending the other will lead to a recurrence. 1265 const unsigned ExtendOperIdx = 1266 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1267 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1268 1269 const SCEV *ExtendOperExpr = nullptr; 1270 const OverflowingBinaryOperator *OBO = 1271 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1272 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1273 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1274 ExtendOperExpr = SE->getSignExtendExpr( 1275 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1276 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1277 ExtendOperExpr = SE->getZeroExtendExpr( 1278 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1279 else 1280 return {nullptr, Unknown}; 1281 1282 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1283 // flags. This instruction may be guarded by control flow that the no-wrap 1284 // behavior depends on. Non-control-equivalent instructions can be mapped to 1285 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1286 // semantics to those operations. 1287 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1288 const SCEV *rhs = ExtendOperExpr; 1289 1290 // Let's swap operands to the initial order for the case of non-commutative 1291 // operations, like SUB. See PR21014. 1292 if (ExtendOperIdx == 0) 1293 std::swap(lhs, rhs); 1294 const SCEVAddRecExpr *AddRec = 1295 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1296 1297 if (!AddRec || AddRec->getLoop() != L) 1298 return {nullptr, Unknown}; 1299 1300 return {AddRec, ExtKind}; 1301 } 1302 1303 /// Is this instruction potentially interesting for further simplification after 1304 /// widening it's type? In other words, can the extend be safely hoisted out of 1305 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1306 /// so, return the extended recurrence and the kind of extension used. Otherwise 1307 /// return {nullptr, Unknown}. 1308 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 1309 if (!SE->isSCEVable(DU.NarrowUse->getType())) 1310 return {nullptr, Unknown}; 1311 1312 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1313 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1314 SE->getTypeSizeInBits(WideType)) { 1315 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1316 // index. So don't follow this use. 1317 return {nullptr, Unknown}; 1318 } 1319 1320 const SCEV *WideExpr; 1321 ExtendKind ExtKind; 1322 if (DU.NeverNegative) { 1323 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1324 if (isa<SCEVAddRecExpr>(WideExpr)) 1325 ExtKind = SignExtended; 1326 else { 1327 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1328 ExtKind = ZeroExtended; 1329 } 1330 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1331 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1332 ExtKind = SignExtended; 1333 } else { 1334 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1335 ExtKind = ZeroExtended; 1336 } 1337 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1338 if (!AddRec || AddRec->getLoop() != L) 1339 return {nullptr, Unknown}; 1340 return {AddRec, ExtKind}; 1341 } 1342 1343 /// This IV user cannot be widened. Replace this use of the original narrow IV 1344 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1345 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1346 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1347 if (!InsertPt) 1348 return; 1349 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1350 << *DU.NarrowUse << "\n"); 1351 IRBuilder<> Builder(InsertPt); 1352 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1353 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1354 } 1355 1356 /// If the narrow use is a compare instruction, then widen the compare 1357 // (and possibly the other operand). The extend operation is hoisted into the 1358 // loop preheader as far as possible. 1359 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1360 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1361 if (!Cmp) 1362 return false; 1363 1364 // We can legally widen the comparison in the following two cases: 1365 // 1366 // - The signedness of the IV extension and comparison match 1367 // 1368 // - The narrow IV is always positive (and thus its sign extension is equal 1369 // to its zero extension). For instance, let's say we're zero extending 1370 // %narrow for the following use 1371 // 1372 // icmp slt i32 %narrow, %val ... (A) 1373 // 1374 // and %narrow is always positive. Then 1375 // 1376 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1377 // == icmp slt i32 zext(%narrow), sext(%val) 1378 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1379 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1380 return false; 1381 1382 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1383 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1384 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1385 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1386 1387 // Widen the compare instruction. 1388 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1389 if (!InsertPt) 1390 return false; 1391 IRBuilder<> Builder(InsertPt); 1392 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1393 1394 // Widen the other operand of the compare, if necessary. 1395 if (CastWidth < IVWidth) { 1396 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1397 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1398 } 1399 return true; 1400 } 1401 1402 /// If the narrow use is an instruction whose two operands are the defining 1403 /// instruction of DU and a load instruction, then we have the following: 1404 /// if the load is hoisted outside the loop, then we do not reach this function 1405 /// as scalar evolution analysis works fine in widenIVUse with variables 1406 /// hoisted outside the loop and efficient code is subsequently generated by 1407 /// not emitting truncate instructions. But when the load is not hoisted 1408 /// (whether due to limitation in alias analysis or due to a true legality), 1409 /// then scalar evolution can not proceed with loop variant values and 1410 /// inefficient code is generated. This function handles the non-hoisted load 1411 /// special case by making the optimization generate the same type of code for 1412 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1413 /// instruction). This special case is important especially when the induction 1414 /// variables are affecting addressing mode in code generation. 1415 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1416 Instruction *NarrowUse = DU.NarrowUse; 1417 Instruction *NarrowDef = DU.NarrowDef; 1418 Instruction *WideDef = DU.WideDef; 1419 1420 // Handle the common case of add<nsw/nuw> 1421 const unsigned OpCode = NarrowUse->getOpcode(); 1422 // Only Add/Sub/Mul instructions are supported. 1423 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1424 OpCode != Instruction::Mul) 1425 return false; 1426 1427 // The operand that is not defined by NarrowDef of DU. Let's call it the 1428 // other operand. 1429 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1430 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1431 "bad DU"); 1432 1433 const SCEV *ExtendOperExpr = nullptr; 1434 const OverflowingBinaryOperator *OBO = 1435 cast<OverflowingBinaryOperator>(NarrowUse); 1436 ExtendKind ExtKind = getExtendKind(NarrowDef); 1437 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1438 ExtendOperExpr = SE->getSignExtendExpr( 1439 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1440 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1441 ExtendOperExpr = SE->getZeroExtendExpr( 1442 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1443 else 1444 return false; 1445 1446 // We are interested in the other operand being a load instruction. 1447 // But, we should look into relaxing this restriction later on. 1448 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1449 if (I && I->getOpcode() != Instruction::Load) 1450 return false; 1451 1452 // Verifying that Defining operand is an AddRec 1453 const SCEV *Op1 = SE->getSCEV(WideDef); 1454 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1455 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1456 return false; 1457 // Verifying that other operand is an Extend. 1458 if (ExtKind == SignExtended) { 1459 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1460 return false; 1461 } else { 1462 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1463 return false; 1464 } 1465 1466 if (ExtKind == SignExtended) { 1467 for (Use &U : NarrowUse->uses()) { 1468 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1469 if (!User || User->getType() != WideType) 1470 return false; 1471 } 1472 } else { // ExtKind == ZeroExtended 1473 for (Use &U : NarrowUse->uses()) { 1474 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1475 if (!User || User->getType() != WideType) 1476 return false; 1477 } 1478 } 1479 1480 return true; 1481 } 1482 1483 /// Special Case for widening with variant Loads (see 1484 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1485 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1486 Instruction *NarrowUse = DU.NarrowUse; 1487 Instruction *NarrowDef = DU.NarrowDef; 1488 Instruction *WideDef = DU.WideDef; 1489 1490 ExtendKind ExtKind = getExtendKind(NarrowDef); 1491 1492 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1493 1494 // Generating a widening use instruction. 1495 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1496 ? WideDef 1497 : createExtendInst(NarrowUse->getOperand(0), WideType, 1498 ExtKind, NarrowUse); 1499 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1500 ? WideDef 1501 : createExtendInst(NarrowUse->getOperand(1), WideType, 1502 ExtKind, NarrowUse); 1503 1504 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1505 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1506 NarrowBO->getName()); 1507 IRBuilder<> Builder(NarrowUse); 1508 Builder.Insert(WideBO); 1509 WideBO->copyIRFlags(NarrowBO); 1510 1511 if (ExtKind == SignExtended) 1512 ExtendKindMap[NarrowUse] = SignExtended; 1513 else 1514 ExtendKindMap[NarrowUse] = ZeroExtended; 1515 1516 // Update the Use. 1517 if (ExtKind == SignExtended) { 1518 for (Use &U : NarrowUse->uses()) { 1519 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1520 if (User && User->getType() == WideType) { 1521 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1522 << *WideBO << "\n"); 1523 ++NumElimExt; 1524 User->replaceAllUsesWith(WideBO); 1525 DeadInsts.emplace_back(User); 1526 } 1527 } 1528 } else { // ExtKind == ZeroExtended 1529 for (Use &U : NarrowUse->uses()) { 1530 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1531 if (User && User->getType() == WideType) { 1532 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1533 << *WideBO << "\n"); 1534 ++NumElimExt; 1535 User->replaceAllUsesWith(WideBO); 1536 DeadInsts.emplace_back(User); 1537 } 1538 } 1539 } 1540 } 1541 1542 /// Determine whether an individual user of the narrow IV can be widened. If so, 1543 /// return the wide clone of the user. 1544 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1545 assert(ExtendKindMap.count(DU.NarrowDef) && 1546 "Should already know the kind of extension used to widen NarrowDef"); 1547 1548 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1549 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1550 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1551 // For LCSSA phis, sink the truncate outside the loop. 1552 // After SimplifyCFG most loop exit targets have a single predecessor. 1553 // Otherwise fall back to a truncate within the loop. 1554 if (UsePhi->getNumOperands() != 1) 1555 truncateIVUse(DU, DT, LI); 1556 else { 1557 // Widening the PHI requires us to insert a trunc. The logical place 1558 // for this trunc is in the same BB as the PHI. This is not possible if 1559 // the BB is terminated by a catchswitch. 1560 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1561 return nullptr; 1562 1563 PHINode *WidePhi = 1564 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1565 UsePhi); 1566 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1567 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1568 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1569 UsePhi->replaceAllUsesWith(Trunc); 1570 DeadInsts.emplace_back(UsePhi); 1571 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1572 << *WidePhi << "\n"); 1573 } 1574 return nullptr; 1575 } 1576 } 1577 1578 // This narrow use can be widened by a sext if it's non-negative or its narrow 1579 // def was widended by a sext. Same for zext. 1580 auto canWidenBySExt = [&]() { 1581 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1582 }; 1583 auto canWidenByZExt = [&]() { 1584 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1585 }; 1586 1587 // Our raison d'etre! Eliminate sign and zero extension. 1588 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1589 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1590 Value *NewDef = DU.WideDef; 1591 if (DU.NarrowUse->getType() != WideType) { 1592 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1593 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1594 if (CastWidth < IVWidth) { 1595 // The cast isn't as wide as the IV, so insert a Trunc. 1596 IRBuilder<> Builder(DU.NarrowUse); 1597 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1598 } 1599 else { 1600 // A wider extend was hidden behind a narrower one. This may induce 1601 // another round of IV widening in which the intermediate IV becomes 1602 // dead. It should be very rare. 1603 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1604 << " not wide enough to subsume " << *DU.NarrowUse 1605 << "\n"); 1606 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1607 NewDef = DU.NarrowUse; 1608 } 1609 } 1610 if (NewDef != DU.NarrowUse) { 1611 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1612 << " replaced by " << *DU.WideDef << "\n"); 1613 ++NumElimExt; 1614 DU.NarrowUse->replaceAllUsesWith(NewDef); 1615 DeadInsts.emplace_back(DU.NarrowUse); 1616 } 1617 // Now that the extend is gone, we want to expose it's uses for potential 1618 // further simplification. We don't need to directly inform SimplifyIVUsers 1619 // of the new users, because their parent IV will be processed later as a 1620 // new loop phi. If we preserved IVUsers analysis, we would also want to 1621 // push the uses of WideDef here. 1622 1623 // No further widening is needed. The deceased [sz]ext had done it for us. 1624 return nullptr; 1625 } 1626 1627 // Does this user itself evaluate to a recurrence after widening? 1628 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1629 if (!WideAddRec.first) 1630 WideAddRec = getWideRecurrence(DU); 1631 1632 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1633 if (!WideAddRec.first) { 1634 // If use is a loop condition, try to promote the condition instead of 1635 // truncating the IV first. 1636 if (widenLoopCompare(DU)) 1637 return nullptr; 1638 1639 // We are here about to generate a truncate instruction that may hurt 1640 // performance because the scalar evolution expression computed earlier 1641 // in WideAddRec.first does not indicate a polynomial induction expression. 1642 // In that case, look at the operands of the use instruction to determine 1643 // if we can still widen the use instead of truncating its operand. 1644 if (widenWithVariantLoadUse(DU)) { 1645 widenWithVariantLoadUseCodegen(DU); 1646 return nullptr; 1647 } 1648 1649 // This user does not evaluate to a recurrence after widening, so don't 1650 // follow it. Instead insert a Trunc to kill off the original use, 1651 // eventually isolating the original narrow IV so it can be removed. 1652 truncateIVUse(DU, DT, LI); 1653 return nullptr; 1654 } 1655 // Assume block terminators cannot evaluate to a recurrence. We can't to 1656 // insert a Trunc after a terminator if there happens to be a critical edge. 1657 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1658 "SCEV is not expected to evaluate a block terminator"); 1659 1660 // Reuse the IV increment that SCEVExpander created as long as it dominates 1661 // NarrowUse. 1662 Instruction *WideUse = nullptr; 1663 if (WideAddRec.first == WideIncExpr && 1664 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1665 WideUse = WideInc; 1666 else { 1667 WideUse = cloneIVUser(DU, WideAddRec.first); 1668 if (!WideUse) 1669 return nullptr; 1670 } 1671 // Evaluation of WideAddRec ensured that the narrow expression could be 1672 // extended outside the loop without overflow. This suggests that the wide use 1673 // evaluates to the same expression as the extended narrow use, but doesn't 1674 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1675 // where it fails, we simply throw away the newly created wide use. 1676 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1677 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1678 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1679 << "\n"); 1680 DeadInsts.emplace_back(WideUse); 1681 return nullptr; 1682 } 1683 1684 // if we reached this point then we are going to replace 1685 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1686 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1687 1688 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1689 // Returning WideUse pushes it on the worklist. 1690 return WideUse; 1691 } 1692 1693 /// Add eligible users of NarrowDef to NarrowIVUsers. 1694 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1695 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1696 bool NonNegativeDef = 1697 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1698 SE->getConstant(NarrowSCEV->getType(), 0)); 1699 for (User *U : NarrowDef->users()) { 1700 Instruction *NarrowUser = cast<Instruction>(U); 1701 1702 // Handle data flow merges and bizarre phi cycles. 1703 if (!Widened.insert(NarrowUser).second) 1704 continue; 1705 1706 bool NonNegativeUse = false; 1707 if (!NonNegativeDef) { 1708 // We might have a control-dependent range information for this context. 1709 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1710 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1711 } 1712 1713 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1714 NonNegativeDef || NonNegativeUse); 1715 } 1716 } 1717 1718 /// Process a single induction variable. First use the SCEVExpander to create a 1719 /// wide induction variable that evaluates to the same recurrence as the 1720 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1721 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1722 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1723 /// 1724 /// It would be simpler to delete uses as they are processed, but we must avoid 1725 /// invalidating SCEV expressions. 1726 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1727 // Is this phi an induction variable? 1728 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1729 if (!AddRec) 1730 return nullptr; 1731 1732 // Widen the induction variable expression. 1733 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1734 ? SE->getSignExtendExpr(AddRec, WideType) 1735 : SE->getZeroExtendExpr(AddRec, WideType); 1736 1737 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1738 "Expect the new IV expression to preserve its type"); 1739 1740 // Can the IV be extended outside the loop without overflow? 1741 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1742 if (!AddRec || AddRec->getLoop() != L) 1743 return nullptr; 1744 1745 // An AddRec must have loop-invariant operands. Since this AddRec is 1746 // materialized by a loop header phi, the expression cannot have any post-loop 1747 // operands, so they must dominate the loop header. 1748 assert( 1749 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1750 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1751 "Loop header phi recurrence inputs do not dominate the loop"); 1752 1753 // Iterate over IV uses (including transitive ones) looking for IV increments 1754 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1755 // the increment calculate control-dependent range information basing on 1756 // dominating conditions inside of the loop (e.g. a range check inside of the 1757 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1758 // 1759 // Control-dependent range information is later used to prove that a narrow 1760 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1761 // this on demand because when pushNarrowIVUsers needs this information some 1762 // of the dominating conditions might be already widened. 1763 if (UsePostIncrementRanges) 1764 calculatePostIncRanges(OrigPhi); 1765 1766 // The rewriter provides a value for the desired IV expression. This may 1767 // either find an existing phi or materialize a new one. Either way, we 1768 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1769 // of the phi-SCC dominates the loop entry. 1770 Instruction *InsertPt = &L->getHeader()->front(); 1771 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1772 1773 // Remembering the WideIV increment generated by SCEVExpander allows 1774 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1775 // employ a general reuse mechanism because the call above is the only call to 1776 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1777 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1778 WideInc = 1779 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1780 WideIncExpr = SE->getSCEV(WideInc); 1781 // Propagate the debug location associated with the original loop increment 1782 // to the new (widened) increment. 1783 auto *OrigInc = 1784 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1785 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1786 } 1787 1788 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1789 ++NumWidened; 1790 1791 // Traverse the def-use chain using a worklist starting at the original IV. 1792 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1793 1794 Widened.insert(OrigPhi); 1795 pushNarrowIVUsers(OrigPhi, WidePhi); 1796 1797 while (!NarrowIVUsers.empty()) { 1798 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1799 1800 // Process a def-use edge. This may replace the use, so don't hold a 1801 // use_iterator across it. 1802 Instruction *WideUse = widenIVUse(DU, Rewriter); 1803 1804 // Follow all def-use edges from the previous narrow use. 1805 if (WideUse) 1806 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1807 1808 // widenIVUse may have removed the def-use edge. 1809 if (DU.NarrowDef->use_empty()) 1810 DeadInsts.emplace_back(DU.NarrowDef); 1811 } 1812 1813 // Attach any debug information to the new PHI. 1814 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1815 1816 return WidePhi; 1817 } 1818 1819 /// Calculates control-dependent range for the given def at the given context 1820 /// by looking at dominating conditions inside of the loop 1821 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1822 Instruction *NarrowUser) { 1823 using namespace llvm::PatternMatch; 1824 1825 Value *NarrowDefLHS; 1826 const APInt *NarrowDefRHS; 1827 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1828 m_APInt(NarrowDefRHS))) || 1829 !NarrowDefRHS->isNonNegative()) 1830 return; 1831 1832 auto UpdateRangeFromCondition = [&] (Value *Condition, 1833 bool TrueDest) { 1834 CmpInst::Predicate Pred; 1835 Value *CmpRHS; 1836 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1837 m_Value(CmpRHS)))) 1838 return; 1839 1840 CmpInst::Predicate P = 1841 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1842 1843 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1844 auto CmpConstrainedLHSRange = 1845 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1846 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1847 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1848 1849 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1850 }; 1851 1852 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1853 if (!HasGuards) 1854 return; 1855 1856 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1857 Ctx->getParent()->rend())) { 1858 Value *C = nullptr; 1859 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1860 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1861 } 1862 }; 1863 1864 UpdateRangeFromGuards(NarrowUser); 1865 1866 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1867 // If NarrowUserBB is statically unreachable asking dominator queries may 1868 // yield surprising results. (e.g. the block may not have a dom tree node) 1869 if (!DT->isReachableFromEntry(NarrowUserBB)) 1870 return; 1871 1872 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1873 L->contains(DTB->getBlock()); 1874 DTB = DTB->getIDom()) { 1875 auto *BB = DTB->getBlock(); 1876 auto *TI = BB->getTerminator(); 1877 UpdateRangeFromGuards(TI); 1878 1879 auto *BI = dyn_cast<BranchInst>(TI); 1880 if (!BI || !BI->isConditional()) 1881 continue; 1882 1883 auto *TrueSuccessor = BI->getSuccessor(0); 1884 auto *FalseSuccessor = BI->getSuccessor(1); 1885 1886 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1887 return BBE.isSingleEdge() && 1888 DT->dominates(BBE, NarrowUser->getParent()); 1889 }; 1890 1891 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1892 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1893 1894 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1895 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1896 } 1897 } 1898 1899 /// Calculates PostIncRangeInfos map for the given IV 1900 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1901 SmallPtrSet<Instruction *, 16> Visited; 1902 SmallVector<Instruction *, 6> Worklist; 1903 Worklist.push_back(OrigPhi); 1904 Visited.insert(OrigPhi); 1905 1906 while (!Worklist.empty()) { 1907 Instruction *NarrowDef = Worklist.pop_back_val(); 1908 1909 for (Use &U : NarrowDef->uses()) { 1910 auto *NarrowUser = cast<Instruction>(U.getUser()); 1911 1912 // Don't go looking outside the current loop. 1913 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1914 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1915 continue; 1916 1917 if (!Visited.insert(NarrowUser).second) 1918 continue; 1919 1920 Worklist.push_back(NarrowUser); 1921 1922 calculatePostIncRange(NarrowDef, NarrowUser); 1923 } 1924 } 1925 } 1926 1927 //===----------------------------------------------------------------------===// 1928 // Live IV Reduction - Minimize IVs live across the loop. 1929 //===----------------------------------------------------------------------===// 1930 1931 //===----------------------------------------------------------------------===// 1932 // Simplification of IV users based on SCEV evaluation. 1933 //===----------------------------------------------------------------------===// 1934 1935 namespace { 1936 1937 class IndVarSimplifyVisitor : public IVVisitor { 1938 ScalarEvolution *SE; 1939 const TargetTransformInfo *TTI; 1940 PHINode *IVPhi; 1941 1942 public: 1943 WideIVInfo WI; 1944 1945 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1946 const TargetTransformInfo *TTI, 1947 const DominatorTree *DTree) 1948 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1949 DT = DTree; 1950 WI.NarrowIV = IVPhi; 1951 } 1952 1953 // Implement the interface used by simplifyUsersOfIV. 1954 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1955 }; 1956 1957 } // end anonymous namespace 1958 1959 /// Iteratively perform simplification on a worklist of IV users. Each 1960 /// successive simplification may push more users which may themselves be 1961 /// candidates for simplification. 1962 /// 1963 /// Sign/Zero extend elimination is interleaved with IV simplification. 1964 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1965 SCEVExpander &Rewriter, 1966 LoopInfo *LI) { 1967 SmallVector<WideIVInfo, 8> WideIVs; 1968 1969 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1970 Intrinsic::getName(Intrinsic::experimental_guard)); 1971 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1972 1973 SmallVector<PHINode*, 8> LoopPhis; 1974 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1975 LoopPhis.push_back(cast<PHINode>(I)); 1976 } 1977 // Each round of simplification iterates through the SimplifyIVUsers worklist 1978 // for all current phis, then determines whether any IVs can be 1979 // widened. Widening adds new phis to LoopPhis, inducing another round of 1980 // simplification on the wide IVs. 1981 bool Changed = false; 1982 while (!LoopPhis.empty()) { 1983 // Evaluate as many IV expressions as possible before widening any IVs. This 1984 // forces SCEV to set no-wrap flags before evaluating sign/zero 1985 // extension. The first time SCEV attempts to normalize sign/zero extension, 1986 // the result becomes final. So for the most predictable results, we delay 1987 // evaluation of sign/zero extend evaluation until needed, and avoid running 1988 // other SCEV based analysis prior to simplifyAndExtend. 1989 do { 1990 PHINode *CurrIV = LoopPhis.pop_back_val(); 1991 1992 // Information about sign/zero extensions of CurrIV. 1993 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1994 1995 Changed |= 1996 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1997 1998 if (Visitor.WI.WidestNativeType) { 1999 WideIVs.push_back(Visitor.WI); 2000 } 2001 } while(!LoopPhis.empty()); 2002 2003 for (; !WideIVs.empty(); WideIVs.pop_back()) { 2004 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 2005 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 2006 Changed = true; 2007 LoopPhis.push_back(WidePhi); 2008 } 2009 } 2010 } 2011 return Changed; 2012 } 2013 2014 //===----------------------------------------------------------------------===// 2015 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 2016 //===----------------------------------------------------------------------===// 2017 2018 /// Given an Value which is hoped to be part of an add recurance in the given 2019 /// loop, return the associated Phi node if so. Otherwise, return null. Note 2020 /// that this is less general than SCEVs AddRec checking. 2021 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 2022 Instruction *IncI = dyn_cast<Instruction>(IncV); 2023 if (!IncI) 2024 return nullptr; 2025 2026 switch (IncI->getOpcode()) { 2027 case Instruction::Add: 2028 case Instruction::Sub: 2029 break; 2030 case Instruction::GetElementPtr: 2031 // An IV counter must preserve its type. 2032 if (IncI->getNumOperands() == 2) 2033 break; 2034 LLVM_FALLTHROUGH; 2035 default: 2036 return nullptr; 2037 } 2038 2039 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 2040 if (Phi && Phi->getParent() == L->getHeader()) { 2041 if (L->isLoopInvariant(IncI->getOperand(1))) 2042 return Phi; 2043 return nullptr; 2044 } 2045 if (IncI->getOpcode() == Instruction::GetElementPtr) 2046 return nullptr; 2047 2048 // Allow add/sub to be commuted. 2049 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 2050 if (Phi && Phi->getParent() == L->getHeader()) { 2051 if (L->isLoopInvariant(IncI->getOperand(0))) 2052 return Phi; 2053 } 2054 return nullptr; 2055 } 2056 2057 /// Whether the current loop exit test is based on this value. Currently this 2058 /// is limited to a direct use in the loop condition. 2059 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 2060 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2061 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 2062 // TODO: Allow non-icmp loop test. 2063 if (!ICmp) 2064 return false; 2065 2066 // TODO: Allow indirect use. 2067 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 2068 } 2069 2070 /// linearFunctionTestReplace policy. Return true unless we can show that the 2071 /// current exit test is already sufficiently canonical. 2072 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 2073 assert(L->getLoopLatch() && "Must be in simplified form"); 2074 2075 // Avoid converting a constant or loop invariant test back to a runtime 2076 // test. This is critical for when SCEV's cached ExitCount is less precise 2077 // than the current IR (such as after we've proven a particular exit is 2078 // actually dead and thus the BE count never reaches our ExitCount.) 2079 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2080 if (L->isLoopInvariant(BI->getCondition())) 2081 return false; 2082 2083 // Do LFTR to simplify the exit condition to an ICMP. 2084 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 2085 if (!Cond) 2086 return true; 2087 2088 // Do LFTR to simplify the exit ICMP to EQ/NE 2089 ICmpInst::Predicate Pred = Cond->getPredicate(); 2090 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 2091 return true; 2092 2093 // Look for a loop invariant RHS 2094 Value *LHS = Cond->getOperand(0); 2095 Value *RHS = Cond->getOperand(1); 2096 if (!L->isLoopInvariant(RHS)) { 2097 if (!L->isLoopInvariant(LHS)) 2098 return true; 2099 std::swap(LHS, RHS); 2100 } 2101 // Look for a simple IV counter LHS 2102 PHINode *Phi = dyn_cast<PHINode>(LHS); 2103 if (!Phi) 2104 Phi = getLoopPhiForCounter(LHS, L); 2105 2106 if (!Phi) 2107 return true; 2108 2109 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 2110 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2111 if (Idx < 0) 2112 return true; 2113 2114 // Do LFTR if the exit condition's IV is *not* a simple counter. 2115 Value *IncV = Phi->getIncomingValue(Idx); 2116 return Phi != getLoopPhiForCounter(IncV, L); 2117 } 2118 2119 /// Return true if undefined behavior would provable be executed on the path to 2120 /// OnPathTo if Root produced a posion result. Note that this doesn't say 2121 /// anything about whether OnPathTo is actually executed or whether Root is 2122 /// actually poison. This can be used to assess whether a new use of Root can 2123 /// be added at a location which is control equivalent with OnPathTo (such as 2124 /// immediately before it) without introducing UB which didn't previously 2125 /// exist. Note that a false result conveys no information. 2126 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 2127 Instruction *OnPathTo, 2128 DominatorTree *DT) { 2129 // Basic approach is to assume Root is poison, propagate poison forward 2130 // through all users we can easily track, and then check whether any of those 2131 // users are provable UB and must execute before out exiting block might 2132 // exit. 2133 2134 // The set of all recursive users we've visited (which are assumed to all be 2135 // poison because of said visit) 2136 SmallSet<const Value *, 16> KnownPoison; 2137 SmallVector<const Instruction*, 16> Worklist; 2138 Worklist.push_back(Root); 2139 while (!Worklist.empty()) { 2140 const Instruction *I = Worklist.pop_back_val(); 2141 2142 // If we know this must trigger UB on a path leading our target. 2143 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 2144 return true; 2145 2146 // If we can't analyze propagation through this instruction, just skip it 2147 // and transitive users. Safe as false is a conservative result. 2148 if (!propagatesFullPoison(I) && I != Root) 2149 continue; 2150 2151 if (KnownPoison.insert(I).second) 2152 for (const User *User : I->users()) 2153 Worklist.push_back(cast<Instruction>(User)); 2154 } 2155 2156 // Might be non-UB, or might have a path we couldn't prove must execute on 2157 // way to exiting bb. 2158 return false; 2159 } 2160 2161 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 2162 /// down to checking that all operands are constant and listing instructions 2163 /// that may hide undef. 2164 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 2165 unsigned Depth) { 2166 if (isa<Constant>(V)) 2167 return !isa<UndefValue>(V); 2168 2169 if (Depth >= 6) 2170 return false; 2171 2172 // Conservatively handle non-constant non-instructions. For example, Arguments 2173 // may be undef. 2174 Instruction *I = dyn_cast<Instruction>(V); 2175 if (!I) 2176 return false; 2177 2178 // Load and return values may be undef. 2179 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 2180 return false; 2181 2182 // Optimistically handle other instructions. 2183 for (Value *Op : I->operands()) { 2184 if (!Visited.insert(Op).second) 2185 continue; 2186 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 2187 return false; 2188 } 2189 return true; 2190 } 2191 2192 /// Return true if the given value is concrete. We must prove that undef can 2193 /// never reach it. 2194 /// 2195 /// TODO: If we decide that this is a good approach to checking for undef, we 2196 /// may factor it into a common location. 2197 static bool hasConcreteDef(Value *V) { 2198 SmallPtrSet<Value*, 8> Visited; 2199 Visited.insert(V); 2200 return hasConcreteDefImpl(V, Visited, 0); 2201 } 2202 2203 /// Return true if this IV has any uses other than the (soon to be rewritten) 2204 /// loop exit test. 2205 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 2206 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 2207 Value *IncV = Phi->getIncomingValue(LatchIdx); 2208 2209 for (User *U : Phi->users()) 2210 if (U != Cond && U != IncV) return false; 2211 2212 for (User *U : IncV->users()) 2213 if (U != Cond && U != Phi) return false; 2214 return true; 2215 } 2216 2217 /// Return true if the given phi is a "counter" in L. A counter is an 2218 /// add recurance (of integer or pointer type) with an arbitrary start, and a 2219 /// step of 1. Note that L must have exactly one latch. 2220 static bool isLoopCounter(PHINode* Phi, Loop *L, 2221 ScalarEvolution *SE) { 2222 assert(Phi->getParent() == L->getHeader()); 2223 assert(L->getLoopLatch()); 2224 2225 if (!SE->isSCEVable(Phi->getType())) 2226 return false; 2227 2228 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2229 if (!AR || AR->getLoop() != L || !AR->isAffine()) 2230 return false; 2231 2232 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 2233 if (!Step || !Step->isOne()) 2234 return false; 2235 2236 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2237 Value *IncV = Phi->getIncomingValue(LatchIdx); 2238 return (getLoopPhiForCounter(IncV, L) == Phi); 2239 } 2240 2241 /// Search the loop header for a loop counter (anadd rec w/step of one) 2242 /// suitable for use by LFTR. If multiple counters are available, select the 2243 /// "best" one based profitable heuristics. 2244 /// 2245 /// BECount may be an i8* pointer type. The pointer difference is already 2246 /// valid count without scaling the address stride, so it remains a pointer 2247 /// expression as far as SCEV is concerned. 2248 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 2249 const SCEV *BECount, 2250 ScalarEvolution *SE, DominatorTree *DT) { 2251 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 2252 2253 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 2254 2255 // Loop over all of the PHI nodes, looking for a simple counter. 2256 PHINode *BestPhi = nullptr; 2257 const SCEV *BestInit = nullptr; 2258 BasicBlock *LatchBlock = L->getLoopLatch(); 2259 assert(LatchBlock && "Must be in simplified form"); 2260 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2261 2262 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2263 PHINode *Phi = cast<PHINode>(I); 2264 if (!isLoopCounter(Phi, L, SE)) 2265 continue; 2266 2267 // Avoid comparing an integer IV against a pointer Limit. 2268 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 2269 continue; 2270 2271 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2272 2273 // AR may be a pointer type, while BECount is an integer type. 2274 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 2275 // AR may not be a narrower type, or we may never exit. 2276 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 2277 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 2278 continue; 2279 2280 // Avoid reusing a potentially undef value to compute other values that may 2281 // have originally had a concrete definition. 2282 if (!hasConcreteDef(Phi)) { 2283 // We explicitly allow unknown phis as long as they are already used by 2284 // the loop exit test. This is legal since performing LFTR could not 2285 // increase the number of undef users. 2286 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 2287 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 2288 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 2289 continue; 2290 } 2291 2292 // Avoid introducing undefined behavior due to poison which didn't exist in 2293 // the original program. (Annoyingly, the rules for poison and undef 2294 // propagation are distinct, so this does NOT cover the undef case above.) 2295 // We have to ensure that we don't introduce UB by introducing a use on an 2296 // iteration where said IV produces poison. Our strategy here differs for 2297 // pointers and integer IVs. For integers, we strip and reinfer as needed, 2298 // see code in linearFunctionTestReplace. For pointers, we restrict 2299 // transforms as there is no good way to reinfer inbounds once lost. 2300 if (!Phi->getType()->isIntegerTy() && 2301 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 2302 continue; 2303 2304 const SCEV *Init = AR->getStart(); 2305 2306 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 2307 // Don't force a live loop counter if another IV can be used. 2308 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 2309 continue; 2310 2311 // Prefer to count-from-zero. This is a more "canonical" counter form. It 2312 // also prefers integer to pointer IVs. 2313 if (BestInit->isZero() != Init->isZero()) { 2314 if (BestInit->isZero()) 2315 continue; 2316 } 2317 // If two IVs both count from zero or both count from nonzero then the 2318 // narrower is likely a dead phi that has been widened. Use the wider phi 2319 // to allow the other to be eliminated. 2320 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2321 continue; 2322 } 2323 BestPhi = Phi; 2324 BestInit = Init; 2325 } 2326 return BestPhi; 2327 } 2328 2329 /// Insert an IR expression which computes the value held by the IV IndVar 2330 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2331 /// is taken ExitCount times. 2332 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2333 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2334 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2335 assert(isLoopCounter(IndVar, L, SE)); 2336 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2337 const SCEV *IVInit = AR->getStart(); 2338 2339 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2340 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2341 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2342 // the existing GEPs whenever possible. 2343 if (IndVar->getType()->isPointerTy() && 2344 !ExitCount->getType()->isPointerTy()) { 2345 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2346 // signed value. ExitCount on the other hand represents the loop trip count, 2347 // which is an unsigned value. FindLoopCounter only allows induction 2348 // variables that have a positive unit stride of one. This means we don't 2349 // have to handle the case of negative offsets (yet) and just need to zero 2350 // extend ExitCount. 2351 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2352 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2353 if (UsePostInc) 2354 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2355 2356 // Expand the code for the iteration count. 2357 assert(SE->isLoopInvariant(IVOffset, L) && 2358 "Computed iteration count is not loop invariant!"); 2359 2360 // We could handle pointer IVs other than i8*, but we need to compensate for 2361 // gep index scaling. 2362 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2363 cast<PointerType>(IndVar->getType()) 2364 ->getElementType())->isOne() && 2365 "unit stride pointer IV must be i8*"); 2366 2367 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2368 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2369 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2370 } else { 2371 // In any other case, convert both IVInit and ExitCount to integers before 2372 // comparing. This may result in SCEV expansion of pointers, but in practice 2373 // SCEV will fold the pointer arithmetic away as such: 2374 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2375 // 2376 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2377 // for simple memset-style loops. 2378 // 2379 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2380 // were generated on top of case #2, which is not expected. 2381 2382 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2383 // For unit stride, IVCount = Start + ExitCount with 2's complement 2384 // overflow. 2385 2386 // For integer IVs, truncate the IV before computing IVInit + BECount, 2387 // unless we know apriori that the limit must be a constant when evaluated 2388 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2389 // of the IV in the loop over a (potentially) expensive expansion of the 2390 // widened exit count add(zext(add)) expression. 2391 if (SE->getTypeSizeInBits(IVInit->getType()) 2392 > SE->getTypeSizeInBits(ExitCount->getType())) { 2393 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2394 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2395 else 2396 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2397 } 2398 2399 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2400 2401 if (UsePostInc) 2402 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2403 2404 // Expand the code for the iteration count. 2405 assert(SE->isLoopInvariant(IVLimit, L) && 2406 "Computed iteration count is not loop invariant!"); 2407 // Ensure that we generate the same type as IndVar, or a smaller integer 2408 // type. In the presence of null pointer values, we have an integer type 2409 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2410 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2411 IndVar->getType() : ExitCount->getType(); 2412 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2413 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2414 } 2415 } 2416 2417 /// This method rewrites the exit condition of the loop to be a canonical != 2418 /// comparison against the incremented loop induction variable. This pass is 2419 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2420 /// determine a loop-invariant trip count of the loop, which is actually a much 2421 /// broader range than just linear tests. 2422 bool IndVarSimplify:: 2423 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2424 const SCEV *ExitCount, 2425 PHINode *IndVar, SCEVExpander &Rewriter) { 2426 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2427 assert(isLoopCounter(IndVar, L, SE)); 2428 Instruction * const IncVar = 2429 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2430 2431 // Initialize CmpIndVar to the preincremented IV. 2432 Value *CmpIndVar = IndVar; 2433 bool UsePostInc = false; 2434 2435 // If the exiting block is the same as the backedge block, we prefer to 2436 // compare against the post-incremented value, otherwise we must compare 2437 // against the preincremented value. 2438 if (ExitingBB == L->getLoopLatch()) { 2439 // For pointer IVs, we chose to not strip inbounds which requires us not 2440 // to add a potentially UB introducing use. We need to either a) show 2441 // the loop test we're modifying is already in post-inc form, or b) show 2442 // that adding a use must not introduce UB. 2443 bool SafeToPostInc = 2444 IndVar->getType()->isIntegerTy() || 2445 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2446 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2447 if (SafeToPostInc) { 2448 UsePostInc = true; 2449 CmpIndVar = IncVar; 2450 } 2451 } 2452 2453 // It may be necessary to drop nowrap flags on the incrementing instruction 2454 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2455 // case the increment might have previously been poison on the last iteration 2456 // only) or if LFTR switches to a different IV that was previously dynamically 2457 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2458 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2459 // check), because the pre-inc addrec flags may be adopted from the original 2460 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2461 // TODO: This handling is inaccurate for one case: If we switch to a 2462 // dynamically dead IV that wraps on the first loop iteration only, which is 2463 // not covered by the post-inc addrec. (If the new IV was not dynamically 2464 // dead, it could not be poison on the first iteration in the first place.) 2465 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2466 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2467 if (BO->hasNoUnsignedWrap()) 2468 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2469 if (BO->hasNoSignedWrap()) 2470 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2471 } 2472 2473 Value *ExitCnt = genLoopLimit( 2474 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2475 assert(ExitCnt->getType()->isPointerTy() == 2476 IndVar->getType()->isPointerTy() && 2477 "genLoopLimit missed a cast"); 2478 2479 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2480 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2481 ICmpInst::Predicate P; 2482 if (L->contains(BI->getSuccessor(0))) 2483 P = ICmpInst::ICMP_NE; 2484 else 2485 P = ICmpInst::ICMP_EQ; 2486 2487 IRBuilder<> Builder(BI); 2488 2489 // The new loop exit condition should reuse the debug location of the 2490 // original loop exit condition. 2491 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2492 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2493 2494 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2495 // avoid the expensive expansion of the limit expression in the wider type, 2496 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2497 // since we know (from the exit count bitwidth), that we can't self-wrap in 2498 // the narrower type. 2499 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2500 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2501 if (CmpIndVarSize > ExitCntSize) { 2502 assert(!CmpIndVar->getType()->isPointerTy() && 2503 !ExitCnt->getType()->isPointerTy()); 2504 2505 // Before resorting to actually inserting the truncate, use the same 2506 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2507 // the other side of the comparison instead. We still evaluate the limit 2508 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2509 // a truncate within in. 2510 bool Extended = false; 2511 const SCEV *IV = SE->getSCEV(CmpIndVar); 2512 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2513 ExitCnt->getType()); 2514 const SCEV *ZExtTrunc = 2515 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2516 2517 if (ZExtTrunc == IV) { 2518 Extended = true; 2519 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2520 "wide.trip.count"); 2521 } else { 2522 const SCEV *SExtTrunc = 2523 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2524 if (SExtTrunc == IV) { 2525 Extended = true; 2526 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2527 "wide.trip.count"); 2528 } 2529 } 2530 2531 if (Extended) { 2532 bool Discard; 2533 L->makeLoopInvariant(ExitCnt, Discard); 2534 } else 2535 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2536 "lftr.wideiv"); 2537 } 2538 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2539 << " LHS:" << *CmpIndVar << '\n' 2540 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2541 << "\n" 2542 << " RHS:\t" << *ExitCnt << "\n" 2543 << "ExitCount:\t" << *ExitCount << "\n" 2544 << " was: " << *BI->getCondition() << "\n"); 2545 2546 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2547 Value *OrigCond = BI->getCondition(); 2548 // It's tempting to use replaceAllUsesWith here to fully replace the old 2549 // comparison, but that's not immediately safe, since users of the old 2550 // comparison may not be dominated by the new comparison. Instead, just 2551 // update the branch to use the new comparison; in the common case this 2552 // will make old comparison dead. 2553 BI->setCondition(Cond); 2554 DeadInsts.push_back(OrigCond); 2555 2556 ++NumLFTR; 2557 return true; 2558 } 2559 2560 //===----------------------------------------------------------------------===// 2561 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2562 //===----------------------------------------------------------------------===// 2563 2564 /// If there's a single exit block, sink any loop-invariant values that 2565 /// were defined in the preheader but not used inside the loop into the 2566 /// exit block to reduce register pressure in the loop. 2567 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2568 BasicBlock *ExitBlock = L->getExitBlock(); 2569 if (!ExitBlock) return false; 2570 2571 BasicBlock *Preheader = L->getLoopPreheader(); 2572 if (!Preheader) return false; 2573 2574 bool MadeAnyChanges = false; 2575 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2576 BasicBlock::iterator I(Preheader->getTerminator()); 2577 while (I != Preheader->begin()) { 2578 --I; 2579 // New instructions were inserted at the end of the preheader. 2580 if (isa<PHINode>(I)) 2581 break; 2582 2583 // Don't move instructions which might have side effects, since the side 2584 // effects need to complete before instructions inside the loop. Also don't 2585 // move instructions which might read memory, since the loop may modify 2586 // memory. Note that it's okay if the instruction might have undefined 2587 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2588 // block. 2589 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2590 continue; 2591 2592 // Skip debug info intrinsics. 2593 if (isa<DbgInfoIntrinsic>(I)) 2594 continue; 2595 2596 // Skip eh pad instructions. 2597 if (I->isEHPad()) 2598 continue; 2599 2600 // Don't sink alloca: we never want to sink static alloca's out of the 2601 // entry block, and correctly sinking dynamic alloca's requires 2602 // checks for stacksave/stackrestore intrinsics. 2603 // FIXME: Refactor this check somehow? 2604 if (isa<AllocaInst>(I)) 2605 continue; 2606 2607 // Determine if there is a use in or before the loop (direct or 2608 // otherwise). 2609 bool UsedInLoop = false; 2610 for (Use &U : I->uses()) { 2611 Instruction *User = cast<Instruction>(U.getUser()); 2612 BasicBlock *UseBB = User->getParent(); 2613 if (PHINode *P = dyn_cast<PHINode>(User)) { 2614 unsigned i = 2615 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2616 UseBB = P->getIncomingBlock(i); 2617 } 2618 if (UseBB == Preheader || L->contains(UseBB)) { 2619 UsedInLoop = true; 2620 break; 2621 } 2622 } 2623 2624 // If there is, the def must remain in the preheader. 2625 if (UsedInLoop) 2626 continue; 2627 2628 // Otherwise, sink it to the exit block. 2629 Instruction *ToMove = &*I; 2630 bool Done = false; 2631 2632 if (I != Preheader->begin()) { 2633 // Skip debug info intrinsics. 2634 do { 2635 --I; 2636 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2637 2638 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2639 Done = true; 2640 } else { 2641 Done = true; 2642 } 2643 2644 MadeAnyChanges = true; 2645 ToMove->moveBefore(*ExitBlock, InsertPt); 2646 if (Done) break; 2647 InsertPt = ToMove->getIterator(); 2648 } 2649 2650 return MadeAnyChanges; 2651 } 2652 2653 /// Return a symbolic upper bound for the backedge taken count of the loop. 2654 /// This is more general than getConstantMaxBackedgeTakenCount as it returns 2655 /// an arbitrary expression as opposed to only constants. 2656 /// TODO: Move into the ScalarEvolution class. 2657 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, 2658 DominatorTree &DT, Loop *L) { 2659 SmallVector<BasicBlock*, 16> ExitingBlocks; 2660 L->getExitingBlocks(ExitingBlocks); 2661 2662 // Form an expression for the maximum exit count possible for this loop. We 2663 // merge the max and exact information to approximate a version of 2664 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. 2665 SmallVector<const SCEV*, 4> ExitCounts; 2666 for (BasicBlock *ExitingBB : ExitingBlocks) { 2667 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); 2668 if (isa<SCEVCouldNotCompute>(ExitCount)) 2669 ExitCount = SE.getExitCount(L, ExitingBB, 2670 ScalarEvolution::ConstantMaximum); 2671 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2672 assert(DT.dominates(ExitingBB, L->getLoopLatch()) && 2673 "We should only have known counts for exiting blocks that " 2674 "dominate latch!"); 2675 ExitCounts.push_back(ExitCount); 2676 } 2677 } 2678 if (ExitCounts.empty()) 2679 return SE.getCouldNotCompute(); 2680 return SE.getUMinFromMismatchedTypes(ExitCounts); 2681 } 2682 2683 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2684 SmallVector<BasicBlock*, 16> ExitingBlocks; 2685 L->getExitingBlocks(ExitingBlocks); 2686 2687 // Remove all exits which aren't both rewriteable and analyzeable. 2688 auto NewEnd = llvm::remove_if(ExitingBlocks, 2689 [&](BasicBlock *ExitingBB) { 2690 // If our exitting block exits multiple loops, we can only rewrite the 2691 // innermost one. Otherwise, we're changing how many times the innermost 2692 // loop runs before it exits. 2693 if (LI->getLoopFor(ExitingBB) != L) 2694 return true; 2695 2696 // Can't rewrite non-branch yet. 2697 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2698 if (!BI) 2699 return true; 2700 2701 // If already constant, nothing to do. 2702 if (isa<Constant>(BI->getCondition())) 2703 return true; 2704 2705 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2706 if (isa<SCEVCouldNotCompute>(ExitCount)) 2707 return true; 2708 return false; 2709 }); 2710 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2711 2712 if (ExitingBlocks.empty()) 2713 return false; 2714 2715 // Get a symbolic upper bound on the loop backedge taken count. 2716 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); 2717 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2718 return false; 2719 2720 // Visit our exit blocks in order of dominance. We know from the fact that 2721 // all exits (left) are analyzeable that the must be a total dominance order 2722 // between them as each must dominate the latch. The visit order only 2723 // matters for the provably equal case. 2724 llvm::sort(ExitingBlocks, 2725 [&](BasicBlock *A, BasicBlock *B) { 2726 // std::sort sorts in ascending order, so we want the inverse of 2727 // the normal dominance relation. 2728 if (DT->properlyDominates(A, B)) return true; 2729 if (DT->properlyDominates(B, A)) return false; 2730 llvm_unreachable("expected total dominance order!"); 2731 }); 2732 #ifdef ASSERT 2733 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2734 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2735 } 2736 #endif 2737 2738 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2739 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2740 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2741 auto *OldCond = BI->getCondition(); 2742 auto *NewCond = ConstantInt::get(OldCond->getType(), 2743 IsTaken ? ExitIfTrue : !ExitIfTrue); 2744 BI->setCondition(NewCond); 2745 if (OldCond->use_empty()) 2746 DeadInsts.push_back(OldCond); 2747 }; 2748 2749 bool Changed = false; 2750 SmallSet<const SCEV*, 8> DominatingExitCounts; 2751 for (BasicBlock *ExitingBB : ExitingBlocks) { 2752 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2753 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above"); 2754 2755 // If we know we'd exit on the first iteration, rewrite the exit to 2756 // reflect this. This does not imply the loop must exit through this 2757 // exit; there may be an earlier one taken on the first iteration. 2758 // TODO: Given we know the backedge can't be taken, we should go ahead 2759 // and break it. Or at least, kill all the header phis and simplify. 2760 if (ExitCount->isZero()) { 2761 FoldExit(ExitingBB, true); 2762 Changed = true; 2763 continue; 2764 } 2765 2766 // If we end up with a pointer exit count, bail. Note that we can end up 2767 // with a pointer exit count for one exiting block, and not for another in 2768 // the same loop. 2769 if (!ExitCount->getType()->isIntegerTy() || 2770 !MaxExitCount->getType()->isIntegerTy()) 2771 continue; 2772 2773 Type *WiderType = 2774 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2775 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2776 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2777 assert(MaxExitCount->getType() == ExitCount->getType()); 2778 2779 // Can we prove that some other exit must be taken strictly before this 2780 // one? 2781 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2782 MaxExitCount, ExitCount)) { 2783 FoldExit(ExitingBB, false); 2784 Changed = true; 2785 continue; 2786 } 2787 2788 // As we run, keep track of which exit counts we've encountered. If we 2789 // find a duplicate, we've found an exit which would have exited on the 2790 // exiting iteration, but (from the visit order) strictly follows another 2791 // which does the same and is thus dead. 2792 if (!DominatingExitCounts.insert(ExitCount).second) { 2793 FoldExit(ExitingBB, false); 2794 Changed = true; 2795 continue; 2796 } 2797 2798 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2799 // here. If we kept track of the min of dominanting exits so far, we could 2800 // discharge exits with EC >= MDEC. This is less powerful than the existing 2801 // transform (since later exits aren't considered), but potentially more 2802 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2803 // or a >u b. Such a case is not currently known. 2804 } 2805 return Changed; 2806 } 2807 2808 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2809 SmallVector<BasicBlock*, 16> ExitingBlocks; 2810 L->getExitingBlocks(ExitingBlocks); 2811 2812 bool Changed = false; 2813 2814 // Finally, see if we can rewrite our exit conditions into a loop invariant 2815 // form. If we have a read-only loop, and we can tell that we must exit down 2816 // a path which does not need any of the values computed within the loop, we 2817 // can rewrite the loop to exit on the first iteration. Note that this 2818 // doesn't either a) tell us the loop exits on the first iteration (unless 2819 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2820 // This transformation looks a lot like a restricted form of dead loop 2821 // elimination, but restricted to read-only loops and without neccesssarily 2822 // needing to kill the loop entirely. 2823 if (!LoopPredication) 2824 return Changed; 2825 2826 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2827 return Changed; 2828 2829 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2830 // through *explicit* control flow. We have to eliminate the possibility of 2831 // implicit exits (see below) before we know it's truly exact. 2832 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2833 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2834 !SE->isLoopInvariant(ExactBTC, L) || 2835 !isSafeToExpand(ExactBTC, *SE)) 2836 return Changed; 2837 2838 // If we end up with a pointer exit count, bail. It may be unsized. 2839 if (!ExactBTC->getType()->isIntegerTy()) 2840 return Changed; 2841 2842 auto BadExit = [&](BasicBlock *ExitingBB) { 2843 // If our exiting block exits multiple loops, we can only rewrite the 2844 // innermost one. Otherwise, we're changing how many times the innermost 2845 // loop runs before it exits. 2846 if (LI->getLoopFor(ExitingBB) != L) 2847 return true; 2848 2849 // Can't rewrite non-branch yet. 2850 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2851 if (!BI) 2852 return true; 2853 2854 // If already constant, nothing to do. 2855 if (isa<Constant>(BI->getCondition())) 2856 return true; 2857 2858 // If the exit block has phis, we need to be able to compute the values 2859 // within the loop which contains them. This assumes trivially lcssa phis 2860 // have already been removed; TODO: generalize 2861 BasicBlock *ExitBlock = 2862 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2863 if (!ExitBlock->phis().empty()) 2864 return true; 2865 2866 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2867 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2868 if (!SE->isLoopInvariant(ExitCount, L) || 2869 !isSafeToExpand(ExitCount, *SE)) 2870 return true; 2871 2872 // If we end up with a pointer exit count, bail. It may be unsized. 2873 if (!ExitCount->getType()->isIntegerTy()) 2874 return true; 2875 2876 return false; 2877 }; 2878 2879 // If we have any exits which can't be predicated themselves, than we can't 2880 // predicate any exit which isn't guaranteed to execute before it. Consider 2881 // two exits (a) and (b) which would both exit on the same iteration. If we 2882 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2883 // we could convert a loop from exiting through (a) to one exiting through 2884 // (b). Note that this problem exists only for exits with the same exit 2885 // count, and we could be more aggressive when exit counts are known inequal. 2886 llvm::sort(ExitingBlocks, 2887 [&](BasicBlock *A, BasicBlock *B) { 2888 // std::sort sorts in ascending order, so we want the inverse of 2889 // the normal dominance relation, plus a tie breaker for blocks 2890 // unordered by dominance. 2891 if (DT->properlyDominates(A, B)) return true; 2892 if (DT->properlyDominates(B, A)) return false; 2893 return A->getName() < B->getName(); 2894 }); 2895 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2896 // exits before the backedge). If they aren't, reasoning about reachability 2897 // is complicated and we choose not to for now. 2898 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2899 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2900 return Changed; 2901 2902 // Given our sorted total order, we know that exit[j] must be evaluated 2903 // after all exit[i] such j > i. 2904 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2905 if (BadExit(ExitingBlocks[i])) { 2906 ExitingBlocks.resize(i); 2907 break; 2908 } 2909 2910 if (ExitingBlocks.empty()) 2911 return Changed; 2912 2913 // We rely on not being able to reach an exiting block on a later iteration 2914 // then it's statically compute exit count. The implementaton of 2915 // getExitCount currently has this invariant, but assert it here so that 2916 // breakage is obvious if this ever changes.. 2917 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2918 return DT->dominates(ExitingBB, L->getLoopLatch()); 2919 })); 2920 2921 // At this point, ExitingBlocks consists of only those blocks which are 2922 // predicatable. Given that, we know we have at least one exit we can 2923 // predicate if the loop is doesn't have side effects and doesn't have any 2924 // implicit exits (because then our exact BTC isn't actually exact). 2925 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2926 // suggestions on how to improve this? I can obviously bail out for outer 2927 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2928 // is that enough for *all* side effects? 2929 for (BasicBlock *BB : L->blocks()) 2930 for (auto &I : *BB) 2931 // TODO:isGuaranteedToTransfer 2932 if (I.mayHaveSideEffects() || I.mayThrow()) 2933 return Changed; 2934 2935 // Finally, do the actual predication for all predicatable blocks. A couple 2936 // of notes here: 2937 // 1) We don't bother to constant fold dominated exits with identical exit 2938 // counts; that's simply a form of CSE/equality propagation and we leave 2939 // it for dedicated passes. 2940 // 2) We insert the comparison at the branch. Hoisting introduces additional 2941 // legality constraints and we leave that to dedicated logic. We want to 2942 // predicate even if we can't insert a loop invariant expression as 2943 // peeling or unrolling will likely reduce the cost of the otherwise loop 2944 // varying check. 2945 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2946 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2947 Value *ExactBTCV = nullptr; // Lazily generated if needed. 2948 for (BasicBlock *ExitingBB : ExitingBlocks) { 2949 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2950 2951 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2952 Value *NewCond; 2953 if (ExitCount == ExactBTC) { 2954 NewCond = L->contains(BI->getSuccessor(0)) ? 2955 B.getFalse() : B.getTrue(); 2956 } else { 2957 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2958 if (!ExactBTCV) 2959 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2960 Value *RHS = ExactBTCV; 2961 if (ECV->getType() != RHS->getType()) { 2962 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2963 ECV = B.CreateZExt(ECV, WiderTy); 2964 RHS = B.CreateZExt(RHS, WiderTy); 2965 } 2966 auto Pred = L->contains(BI->getSuccessor(0)) ? 2967 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2968 NewCond = B.CreateICmp(Pred, ECV, RHS); 2969 } 2970 Value *OldCond = BI->getCondition(); 2971 BI->setCondition(NewCond); 2972 if (OldCond->use_empty()) 2973 DeadInsts.push_back(OldCond); 2974 Changed = true; 2975 } 2976 2977 return Changed; 2978 } 2979 2980 //===----------------------------------------------------------------------===// 2981 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2982 //===----------------------------------------------------------------------===// 2983 2984 bool IndVarSimplify::run(Loop *L) { 2985 // We need (and expect!) the incoming loop to be in LCSSA. 2986 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2987 "LCSSA required to run indvars!"); 2988 bool Changed = false; 2989 2990 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2991 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2992 // canonicalization can be a pessimization without LSR to "clean up" 2993 // afterwards. 2994 // - We depend on having a preheader; in particular, 2995 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2996 // and we're in trouble if we can't find the induction variable even when 2997 // we've manually inserted one. 2998 // - LFTR relies on having a single backedge. 2999 if (!L->isLoopSimplifyForm()) 3000 return false; 3001 3002 #ifndef NDEBUG 3003 // Used below for a consistency check only 3004 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 3005 #endif 3006 3007 // If there are any floating-point recurrences, attempt to 3008 // transform them to use integer recurrences. 3009 Changed |= rewriteNonIntegerIVs(L); 3010 3011 // Create a rewriter object which we'll use to transform the code with. 3012 SCEVExpander Rewriter(*SE, DL, "indvars"); 3013 #ifndef NDEBUG 3014 Rewriter.setDebugType(DEBUG_TYPE); 3015 #endif 3016 3017 // Eliminate redundant IV users. 3018 // 3019 // Simplification works best when run before other consumers of SCEV. We 3020 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 3021 // other expressions involving loop IVs have been evaluated. This helps SCEV 3022 // set no-wrap flags before normalizing sign/zero extension. 3023 Rewriter.disableCanonicalMode(); 3024 Changed |= simplifyAndExtend(L, Rewriter, LI); 3025 3026 // Check to see if we can compute the final value of any expressions 3027 // that are recurrent in the loop, and substitute the exit values from the 3028 // loop into any instructions outside of the loop that use the final values 3029 // of the current expressions. 3030 if (ReplaceExitValue != NeverRepl) 3031 Changed |= rewriteLoopExitValues(L, Rewriter); 3032 3033 // Eliminate redundant IV cycles. 3034 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 3035 3036 // Try to eliminate loop exits based on analyzeable exit counts 3037 if (optimizeLoopExits(L, Rewriter)) { 3038 Changed = true; 3039 // Given we've changed exit counts, notify SCEV 3040 SE->forgetLoop(L); 3041 } 3042 3043 // Try to form loop invariant tests for loop exits by changing how many 3044 // iterations of the loop run when that is unobservable. 3045 if (predicateLoopExits(L, Rewriter)) { 3046 Changed = true; 3047 // Given we've changed exit counts, notify SCEV 3048 SE->forgetLoop(L); 3049 } 3050 3051 // If we have a trip count expression, rewrite the loop's exit condition 3052 // using it. 3053 if (!DisableLFTR) { 3054 SmallVector<BasicBlock*, 16> ExitingBlocks; 3055 L->getExitingBlocks(ExitingBlocks); 3056 for (BasicBlock *ExitingBB : ExitingBlocks) { 3057 // Can't rewrite non-branch yet. 3058 if (!isa<BranchInst>(ExitingBB->getTerminator())) 3059 continue; 3060 3061 // If our exitting block exits multiple loops, we can only rewrite the 3062 // innermost one. Otherwise, we're changing how many times the innermost 3063 // loop runs before it exits. 3064 if (LI->getLoopFor(ExitingBB) != L) 3065 continue; 3066 3067 if (!needsLFTR(L, ExitingBB)) 3068 continue; 3069 3070 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 3071 if (isa<SCEVCouldNotCompute>(ExitCount)) 3072 continue; 3073 3074 // This was handled above, but as we form SCEVs, we can sometimes refine 3075 // existing ones; this allows exit counts to be folded to zero which 3076 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 3077 // until stable to handle cases like this better. 3078 if (ExitCount->isZero()) 3079 continue; 3080 3081 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 3082 if (!IndVar) 3083 continue; 3084 3085 // Avoid high cost expansions. Note: This heuristic is questionable in 3086 // that our definition of "high cost" is not exactly principled. 3087 if (Rewriter.isHighCostExpansion(ExitCount, L)) 3088 continue; 3089 3090 // Check preconditions for proper SCEVExpander operation. SCEV does not 3091 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 3092 // any pass that uses the SCEVExpander must do it. This does not work 3093 // well for loop passes because SCEVExpander makes assumptions about 3094 // all loops, while LoopPassManager only forces the current loop to be 3095 // simplified. 3096 // 3097 // FIXME: SCEV expansion has no way to bail out, so the caller must 3098 // explicitly check any assumptions made by SCEV. Brittle. 3099 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 3100 if (!AR || AR->getLoop()->getLoopPreheader()) 3101 Changed |= linearFunctionTestReplace(L, ExitingBB, 3102 ExitCount, IndVar, 3103 Rewriter); 3104 } 3105 } 3106 // Clear the rewriter cache, because values that are in the rewriter's cache 3107 // can be deleted in the loop below, causing the AssertingVH in the cache to 3108 // trigger. 3109 Rewriter.clear(); 3110 3111 // Now that we're done iterating through lists, clean up any instructions 3112 // which are now dead. 3113 while (!DeadInsts.empty()) 3114 if (Instruction *Inst = 3115 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 3116 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 3117 3118 // The Rewriter may not be used from this point on. 3119 3120 // Loop-invariant instructions in the preheader that aren't used in the 3121 // loop may be sunk below the loop to reduce register pressure. 3122 Changed |= sinkUnusedInvariants(L); 3123 3124 // rewriteFirstIterationLoopExitValues does not rely on the computation of 3125 // trip count and therefore can further simplify exit values in addition to 3126 // rewriteLoopExitValues. 3127 Changed |= rewriteFirstIterationLoopExitValues(L); 3128 3129 // Clean up dead instructions. 3130 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 3131 3132 // Check a post-condition. 3133 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 3134 "Indvars did not preserve LCSSA!"); 3135 3136 // Verify that LFTR, and any other change have not interfered with SCEV's 3137 // ability to compute trip count. We may have *changed* the exit count, but 3138 // only by reducing it. 3139 #ifndef NDEBUG 3140 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 3141 SE->forgetLoop(L); 3142 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 3143 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 3144 SE->getTypeSizeInBits(NewBECount->getType())) 3145 NewBECount = SE->getTruncateOrNoop(NewBECount, 3146 BackedgeTakenCount->getType()); 3147 else 3148 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 3149 NewBECount->getType()); 3150 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 3151 NewBECount) && "indvars must preserve SCEV"); 3152 } 3153 #endif 3154 3155 return Changed; 3156 } 3157 3158 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 3159 LoopStandardAnalysisResults &AR, 3160 LPMUpdater &) { 3161 Function *F = L.getHeader()->getParent(); 3162 const DataLayout &DL = F->getParent()->getDataLayout(); 3163 3164 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 3165 if (!IVS.run(&L)) 3166 return PreservedAnalyses::all(); 3167 3168 auto PA = getLoopPassPreservedAnalyses(); 3169 PA.preserveSet<CFGAnalyses>(); 3170 return PA; 3171 } 3172 3173 namespace { 3174 3175 struct IndVarSimplifyLegacyPass : public LoopPass { 3176 static char ID; // Pass identification, replacement for typeid 3177 3178 IndVarSimplifyLegacyPass() : LoopPass(ID) { 3179 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 3180 } 3181 3182 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 3183 if (skipLoop(L)) 3184 return false; 3185 3186 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 3187 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 3188 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 3189 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 3190 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 3191 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 3192 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 3193 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 3194 3195 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 3196 return IVS.run(L); 3197 } 3198 3199 void getAnalysisUsage(AnalysisUsage &AU) const override { 3200 AU.setPreservesCFG(); 3201 getLoopAnalysisUsage(AU); 3202 } 3203 }; 3204 3205 } // end anonymous namespace 3206 3207 char IndVarSimplifyLegacyPass::ID = 0; 3208 3209 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 3210 "Induction Variable Simplification", false, false) 3211 INITIALIZE_PASS_DEPENDENCY(LoopPass) 3212 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 3213 "Induction Variable Simplification", false, false) 3214 3215 Pass *llvm::createIndVarSimplifyPass() { 3216 return new IndVarSimplifyLegacyPass(); 3217 } 3218