1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallSet.h" 35 #include "llvm/ADT/SmallPtrSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/ScalarEvolution.h" 42 #include "llvm/Analysis/ScalarEvolutionExpander.h" 43 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/Transforms/Utils/Local.h" 48 #include "llvm/IR/BasicBlock.h" 49 #include "llvm/IR/Constant.h" 50 #include "llvm/IR/ConstantRange.h" 51 #include "llvm/IR/Constants.h" 52 #include "llvm/IR/DataLayout.h" 53 #include "llvm/IR/DerivedTypes.h" 54 #include "llvm/IR/Dominators.h" 55 #include "llvm/IR/Function.h" 56 #include "llvm/IR/IRBuilder.h" 57 #include "llvm/IR/InstrTypes.h" 58 #include "llvm/IR/Instruction.h" 59 #include "llvm/IR/Instructions.h" 60 #include "llvm/IR/IntrinsicInst.h" 61 #include "llvm/IR/Intrinsics.h" 62 #include "llvm/IR/Module.h" 63 #include "llvm/IR/Operator.h" 64 #include "llvm/IR/PassManager.h" 65 #include "llvm/IR/PatternMatch.h" 66 #include "llvm/IR/Type.h" 67 #include "llvm/IR/Use.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/IR/Value.h" 70 #include "llvm/IR/ValueHandle.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Compiler.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MathExtras.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/Scalar.h" 80 #include "llvm/Transforms/Scalar/LoopPassManager.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/LoopUtils.h" 83 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 84 #include <cassert> 85 #include <cstdint> 86 #include <utility> 87 88 using namespace llvm; 89 90 #define DEBUG_TYPE "indvars" 91 92 STATISTIC(NumWidened , "Number of indvars widened"); 93 STATISTIC(NumReplaced , "Number of exit values replaced"); 94 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 95 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 96 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 97 98 // Trip count verification can be enabled by default under NDEBUG if we 99 // implement a strong expression equivalence checker in SCEV. Until then, we 100 // use the verify-indvars flag, which may assert in some cases. 101 static cl::opt<bool> VerifyIndvars( 102 "verify-indvars", cl::Hidden, 103 cl::desc("Verify the ScalarEvolution result after running indvars")); 104 105 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 106 107 static cl::opt<ReplaceExitVal> ReplaceExitValue( 108 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 109 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 110 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 111 clEnumValN(OnlyCheapRepl, "cheap", 112 "only replace exit value when the cost is cheap"), 113 clEnumValN(NoHardUse, "noharduse", 114 "only replace exit values when loop def likely dead"), 115 clEnumValN(AlwaysRepl, "always", 116 "always replace exit value whenever possible"))); 117 118 static cl::opt<bool> UsePostIncrementRanges( 119 "indvars-post-increment-ranges", cl::Hidden, 120 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 121 cl::init(true)); 122 123 static cl::opt<bool> 124 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 125 cl::desc("Disable Linear Function Test Replace optimization")); 126 127 namespace { 128 129 struct RewritePhi; 130 131 class IndVarSimplify { 132 LoopInfo *LI; 133 ScalarEvolution *SE; 134 DominatorTree *DT; 135 const DataLayout &DL; 136 TargetLibraryInfo *TLI; 137 const TargetTransformInfo *TTI; 138 139 SmallVector<WeakTrackingVH, 16> DeadInsts; 140 141 bool isValidRewrite(Value *FromVal, Value *ToVal); 142 143 bool handleFloatingPointIV(Loop *L, PHINode *PH); 144 bool rewriteNonIntegerIVs(Loop *L); 145 146 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 147 bool optimizeLoopExits(Loop *L); 148 149 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); 150 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 151 bool rewriteFirstIterationLoopExitValues(Loop *L); 152 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; 153 154 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 155 const SCEV *ExitCount, 156 PHINode *IndVar, SCEVExpander &Rewriter); 157 158 bool sinkUnusedInvariants(Loop *L); 159 160 public: 161 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 162 const DataLayout &DL, TargetLibraryInfo *TLI, 163 TargetTransformInfo *TTI) 164 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 165 166 bool run(Loop *L); 167 }; 168 169 } // end anonymous namespace 170 171 /// Return true if the SCEV expansion generated by the rewriter can replace the 172 /// original value. SCEV guarantees that it produces the same value, but the way 173 /// it is produced may be illegal IR. Ideally, this function will only be 174 /// called for verification. 175 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 176 // If an SCEV expression subsumed multiple pointers, its expansion could 177 // reassociate the GEP changing the base pointer. This is illegal because the 178 // final address produced by a GEP chain must be inbounds relative to its 179 // underlying object. Otherwise basic alias analysis, among other things, 180 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 181 // producing an expression involving multiple pointers. Until then, we must 182 // bail out here. 183 // 184 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 185 // because it understands lcssa phis while SCEV does not. 186 Value *FromPtr = FromVal; 187 Value *ToPtr = ToVal; 188 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { 189 FromPtr = GEP->getPointerOperand(); 190 } 191 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { 192 ToPtr = GEP->getPointerOperand(); 193 } 194 if (FromPtr != FromVal || ToPtr != ToVal) { 195 // Quickly check the common case 196 if (FromPtr == ToPtr) 197 return true; 198 199 // SCEV may have rewritten an expression that produces the GEP's pointer 200 // operand. That's ok as long as the pointer operand has the same base 201 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 202 // base of a recurrence. This handles the case in which SCEV expansion 203 // converts a pointer type recurrence into a nonrecurrent pointer base 204 // indexed by an integer recurrence. 205 206 // If the GEP base pointer is a vector of pointers, abort. 207 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 208 return false; 209 210 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 211 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 212 if (FromBase == ToBase) 213 return true; 214 215 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase 216 << " != " << *ToBase << "\n"); 217 218 return false; 219 } 220 return true; 221 } 222 223 /// Determine the insertion point for this user. By default, insert immediately 224 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 225 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 226 /// common dominator for the incoming blocks. A nullptr can be returned if no 227 /// viable location is found: it may happen if User is a PHI and Def only comes 228 /// to this PHI from unreachable blocks. 229 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 230 DominatorTree *DT, LoopInfo *LI) { 231 PHINode *PHI = dyn_cast<PHINode>(User); 232 if (!PHI) 233 return User; 234 235 Instruction *InsertPt = nullptr; 236 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 237 if (PHI->getIncomingValue(i) != Def) 238 continue; 239 240 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 241 242 if (!DT->isReachableFromEntry(InsertBB)) 243 continue; 244 245 if (!InsertPt) { 246 InsertPt = InsertBB->getTerminator(); 247 continue; 248 } 249 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 250 InsertPt = InsertBB->getTerminator(); 251 } 252 253 // If we have skipped all inputs, it means that Def only comes to Phi from 254 // unreachable blocks. 255 if (!InsertPt) 256 return nullptr; 257 258 auto *DefI = dyn_cast<Instruction>(Def); 259 if (!DefI) 260 return InsertPt; 261 262 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 263 264 auto *L = LI->getLoopFor(DefI->getParent()); 265 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 266 267 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 268 if (LI->getLoopFor(DTN->getBlock()) == L) 269 return DTN->getBlock()->getTerminator(); 270 271 llvm_unreachable("DefI dominates InsertPt!"); 272 } 273 274 //===----------------------------------------------------------------------===// 275 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 276 //===----------------------------------------------------------------------===// 277 278 /// Convert APF to an integer, if possible. 279 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 280 bool isExact = false; 281 // See if we can convert this to an int64_t 282 uint64_t UIntVal; 283 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 284 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 285 !isExact) 286 return false; 287 IntVal = UIntVal; 288 return true; 289 } 290 291 /// If the loop has floating induction variable then insert corresponding 292 /// integer induction variable if possible. 293 /// For example, 294 /// for(double i = 0; i < 10000; ++i) 295 /// bar(i) 296 /// is converted into 297 /// for(int i = 0; i < 10000; ++i) 298 /// bar((double)i); 299 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 300 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 301 unsigned BackEdge = IncomingEdge^1; 302 303 // Check incoming value. 304 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 305 306 int64_t InitValue; 307 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 308 return false; 309 310 // Check IV increment. Reject this PN if increment operation is not 311 // an add or increment value can not be represented by an integer. 312 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 313 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 314 315 // If this is not an add of the PHI with a constantfp, or if the constant fp 316 // is not an integer, bail out. 317 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 318 int64_t IncValue; 319 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 320 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 321 return false; 322 323 // Check Incr uses. One user is PN and the other user is an exit condition 324 // used by the conditional terminator. 325 Value::user_iterator IncrUse = Incr->user_begin(); 326 Instruction *U1 = cast<Instruction>(*IncrUse++); 327 if (IncrUse == Incr->user_end()) return false; 328 Instruction *U2 = cast<Instruction>(*IncrUse++); 329 if (IncrUse != Incr->user_end()) return false; 330 331 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 332 // only used by a branch, we can't transform it. 333 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 334 if (!Compare) 335 Compare = dyn_cast<FCmpInst>(U2); 336 if (!Compare || !Compare->hasOneUse() || 337 !isa<BranchInst>(Compare->user_back())) 338 return false; 339 340 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 341 342 // We need to verify that the branch actually controls the iteration count 343 // of the loop. If not, the new IV can overflow and no one will notice. 344 // The branch block must be in the loop and one of the successors must be out 345 // of the loop. 346 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 347 if (!L->contains(TheBr->getParent()) || 348 (L->contains(TheBr->getSuccessor(0)) && 349 L->contains(TheBr->getSuccessor(1)))) 350 return false; 351 352 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 353 // transform it. 354 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 355 int64_t ExitValue; 356 if (ExitValueVal == nullptr || 357 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 358 return false; 359 360 // Find new predicate for integer comparison. 361 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 362 switch (Compare->getPredicate()) { 363 default: return false; // Unknown comparison. 364 case CmpInst::FCMP_OEQ: 365 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 366 case CmpInst::FCMP_ONE: 367 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 368 case CmpInst::FCMP_OGT: 369 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 370 case CmpInst::FCMP_OGE: 371 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 372 case CmpInst::FCMP_OLT: 373 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 374 case CmpInst::FCMP_OLE: 375 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 376 } 377 378 // We convert the floating point induction variable to a signed i32 value if 379 // we can. This is only safe if the comparison will not overflow in a way 380 // that won't be trapped by the integer equivalent operations. Check for this 381 // now. 382 // TODO: We could use i64 if it is native and the range requires it. 383 384 // The start/stride/exit values must all fit in signed i32. 385 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 386 return false; 387 388 // If not actually striding (add x, 0.0), avoid touching the code. 389 if (IncValue == 0) 390 return false; 391 392 // Positive and negative strides have different safety conditions. 393 if (IncValue > 0) { 394 // If we have a positive stride, we require the init to be less than the 395 // exit value. 396 if (InitValue >= ExitValue) 397 return false; 398 399 uint32_t Range = uint32_t(ExitValue-InitValue); 400 // Check for infinite loop, either: 401 // while (i <= Exit) or until (i > Exit) 402 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 403 if (++Range == 0) return false; // Range overflows. 404 } 405 406 unsigned Leftover = Range % uint32_t(IncValue); 407 408 // If this is an equality comparison, we require that the strided value 409 // exactly land on the exit value, otherwise the IV condition will wrap 410 // around and do things the fp IV wouldn't. 411 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 412 Leftover != 0) 413 return false; 414 415 // If the stride would wrap around the i32 before exiting, we can't 416 // transform the IV. 417 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 418 return false; 419 } else { 420 // If we have a negative stride, we require the init to be greater than the 421 // exit value. 422 if (InitValue <= ExitValue) 423 return false; 424 425 uint32_t Range = uint32_t(InitValue-ExitValue); 426 // Check for infinite loop, either: 427 // while (i >= Exit) or until (i < Exit) 428 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 429 if (++Range == 0) return false; // Range overflows. 430 } 431 432 unsigned Leftover = Range % uint32_t(-IncValue); 433 434 // If this is an equality comparison, we require that the strided value 435 // exactly land on the exit value, otherwise the IV condition will wrap 436 // around and do things the fp IV wouldn't. 437 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 438 Leftover != 0) 439 return false; 440 441 // If the stride would wrap around the i32 before exiting, we can't 442 // transform the IV. 443 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 444 return false; 445 } 446 447 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 448 449 // Insert new integer induction variable. 450 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 451 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 452 PN->getIncomingBlock(IncomingEdge)); 453 454 Value *NewAdd = 455 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 456 Incr->getName()+".int", Incr); 457 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 458 459 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 460 ConstantInt::get(Int32Ty, ExitValue), 461 Compare->getName()); 462 463 // In the following deletions, PN may become dead and may be deleted. 464 // Use a WeakTrackingVH to observe whether this happens. 465 WeakTrackingVH WeakPH = PN; 466 467 // Delete the old floating point exit comparison. The branch starts using the 468 // new comparison. 469 NewCompare->takeName(Compare); 470 Compare->replaceAllUsesWith(NewCompare); 471 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 472 473 // Delete the old floating point increment. 474 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 475 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 476 477 // If the FP induction variable still has uses, this is because something else 478 // in the loop uses its value. In order to canonicalize the induction 479 // variable, we chose to eliminate the IV and rewrite it in terms of an 480 // int->fp cast. 481 // 482 // We give preference to sitofp over uitofp because it is faster on most 483 // platforms. 484 if (WeakPH) { 485 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 486 &*PN->getParent()->getFirstInsertionPt()); 487 PN->replaceAllUsesWith(Conv); 488 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 489 } 490 return true; 491 } 492 493 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 494 // First step. Check to see if there are any floating-point recurrences. 495 // If there are, change them into integer recurrences, permitting analysis by 496 // the SCEV routines. 497 BasicBlock *Header = L->getHeader(); 498 499 SmallVector<WeakTrackingVH, 8> PHIs; 500 for (PHINode &PN : Header->phis()) 501 PHIs.push_back(&PN); 502 503 bool Changed = false; 504 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 505 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 506 Changed |= handleFloatingPointIV(L, PN); 507 508 // If the loop previously had floating-point IV, ScalarEvolution 509 // may not have been able to compute a trip count. Now that we've done some 510 // re-writing, the trip count may be computable. 511 if (Changed) 512 SE->forgetLoop(L); 513 return Changed; 514 } 515 516 namespace { 517 518 // Collect information about PHI nodes which can be transformed in 519 // rewriteLoopExitValues. 520 struct RewritePhi { 521 PHINode *PN; 522 523 // Ith incoming value. 524 unsigned Ith; 525 526 // Exit value after expansion. 527 Value *Val; 528 529 // High Cost when expansion. 530 bool HighCost; 531 532 RewritePhi(PHINode *P, unsigned I, Value *V, bool H) 533 : PN(P), Ith(I), Val(V), HighCost(H) {} 534 }; 535 536 } // end anonymous namespace 537 538 //===----------------------------------------------------------------------===// 539 // rewriteLoopExitValues - Optimize IV users outside the loop. 540 // As a side effect, reduces the amount of IV processing within the loop. 541 //===----------------------------------------------------------------------===// 542 543 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { 544 SmallPtrSet<const Instruction *, 8> Visited; 545 SmallVector<const Instruction *, 8> WorkList; 546 Visited.insert(I); 547 WorkList.push_back(I); 548 while (!WorkList.empty()) { 549 const Instruction *Curr = WorkList.pop_back_val(); 550 // This use is outside the loop, nothing to do. 551 if (!L->contains(Curr)) 552 continue; 553 // Do we assume it is a "hard" use which will not be eliminated easily? 554 if (Curr->mayHaveSideEffects()) 555 return true; 556 // Otherwise, add all its users to worklist. 557 for (auto U : Curr->users()) { 558 auto *UI = cast<Instruction>(U); 559 if (Visited.insert(UI).second) 560 WorkList.push_back(UI); 561 } 562 } 563 return false; 564 } 565 566 /// Check to see if this loop has a computable loop-invariant execution count. 567 /// If so, this means that we can compute the final value of any expressions 568 /// that are recurrent in the loop, and substitute the exit values from the loop 569 /// into any instructions outside of the loop that use the final values of the 570 /// current expressions. 571 /// 572 /// This is mostly redundant with the regular IndVarSimplify activities that 573 /// happen later, except that it's more powerful in some cases, because it's 574 /// able to brute-force evaluate arbitrary instructions as long as they have 575 /// constant operands at the beginning of the loop. 576 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 577 // Check a pre-condition. 578 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 579 "Indvars did not preserve LCSSA!"); 580 581 SmallVector<BasicBlock*, 8> ExitBlocks; 582 L->getUniqueExitBlocks(ExitBlocks); 583 584 SmallVector<RewritePhi, 8> RewritePhiSet; 585 // Find all values that are computed inside the loop, but used outside of it. 586 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 587 // the exit blocks of the loop to find them. 588 for (BasicBlock *ExitBB : ExitBlocks) { 589 // If there are no PHI nodes in this exit block, then no values defined 590 // inside the loop are used on this path, skip it. 591 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 592 if (!PN) continue; 593 594 unsigned NumPreds = PN->getNumIncomingValues(); 595 596 // Iterate over all of the PHI nodes. 597 BasicBlock::iterator BBI = ExitBB->begin(); 598 while ((PN = dyn_cast<PHINode>(BBI++))) { 599 if (PN->use_empty()) 600 continue; // dead use, don't replace it 601 602 if (!SE->isSCEVable(PN->getType())) 603 continue; 604 605 // It's necessary to tell ScalarEvolution about this explicitly so that 606 // it can walk the def-use list and forget all SCEVs, as it may not be 607 // watching the PHI itself. Once the new exit value is in place, there 608 // may not be a def-use connection between the loop and every instruction 609 // which got a SCEVAddRecExpr for that loop. 610 SE->forgetValue(PN); 611 612 // Iterate over all of the values in all the PHI nodes. 613 for (unsigned i = 0; i != NumPreds; ++i) { 614 // If the value being merged in is not integer or is not defined 615 // in the loop, skip it. 616 Value *InVal = PN->getIncomingValue(i); 617 if (!isa<Instruction>(InVal)) 618 continue; 619 620 // If this pred is for a subloop, not L itself, skip it. 621 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 622 continue; // The Block is in a subloop, skip it. 623 624 // Check that InVal is defined in the loop. 625 Instruction *Inst = cast<Instruction>(InVal); 626 if (!L->contains(Inst)) 627 continue; 628 629 // Okay, this instruction has a user outside of the current loop 630 // and varies predictably *inside* the loop. Evaluate the value it 631 // contains when the loop exits, if possible. 632 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 633 if (!SE->isLoopInvariant(ExitValue, L) || 634 !isSafeToExpand(ExitValue, *SE)) 635 continue; 636 637 // Computing the value outside of the loop brings no benefit if it is 638 // definitely used inside the loop in a way which can not be optimized 639 // away. Avoid doing so unless we know we have a value which computes 640 // the ExitValue already. TODO: This should be merged into SCEV 641 // expander to leverage its knowledge of existing expressions. 642 if (ReplaceExitValue != AlwaysRepl && 643 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) && 644 hasHardUserWithinLoop(L, Inst)) 645 continue; 646 647 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); 648 Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); 649 650 LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal 651 << '\n' 652 << " LoopVal = " << *Inst << "\n"); 653 654 if (!isValidRewrite(Inst, ExitVal)) { 655 DeadInsts.push_back(ExitVal); 656 continue; 657 } 658 659 #ifndef NDEBUG 660 // If we reuse an instruction from a loop which is neither L nor one of 661 // its containing loops, we end up breaking LCSSA form for this loop by 662 // creating a new use of its instruction. 663 if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal)) 664 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 665 if (EVL != L) 666 assert(EVL->contains(L) && "LCSSA breach detected!"); 667 #endif 668 669 // Collect all the candidate PHINodes to be rewritten. 670 RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost); 671 } 672 } 673 } 674 675 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 676 677 bool Changed = false; 678 // Transformation. 679 for (const RewritePhi &Phi : RewritePhiSet) { 680 PHINode *PN = Phi.PN; 681 Value *ExitVal = Phi.Val; 682 683 // Only do the rewrite when the ExitValue can be expanded cheaply. 684 // If LoopCanBeDel is true, rewrite exit value aggressively. 685 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 686 DeadInsts.push_back(ExitVal); 687 continue; 688 } 689 690 Changed = true; 691 ++NumReplaced; 692 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 693 PN->setIncomingValue(Phi.Ith, ExitVal); 694 695 // If this instruction is dead now, delete it. Don't do it now to avoid 696 // invalidating iterators. 697 if (isInstructionTriviallyDead(Inst, TLI)) 698 DeadInsts.push_back(Inst); 699 700 // Replace PN with ExitVal if that is legal and does not break LCSSA. 701 if (PN->getNumIncomingValues() == 1 && 702 LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 703 PN->replaceAllUsesWith(ExitVal); 704 PN->eraseFromParent(); 705 } 706 } 707 708 // The insertion point instruction may have been deleted; clear it out 709 // so that the rewriter doesn't trip over it later. 710 Rewriter.clearInsertPoint(); 711 return Changed; 712 } 713 714 //===---------------------------------------------------------------------===// 715 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 716 // they will exit at the first iteration. 717 //===---------------------------------------------------------------------===// 718 719 /// Check to see if this loop has loop invariant conditions which lead to loop 720 /// exits. If so, we know that if the exit path is taken, it is at the first 721 /// loop iteration. This lets us predict exit values of PHI nodes that live in 722 /// loop header. 723 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 724 // Verify the input to the pass is already in LCSSA form. 725 assert(L->isLCSSAForm(*DT)); 726 727 SmallVector<BasicBlock *, 8> ExitBlocks; 728 L->getUniqueExitBlocks(ExitBlocks); 729 730 bool MadeAnyChanges = false; 731 for (auto *ExitBB : ExitBlocks) { 732 // If there are no more PHI nodes in this exit block, then no more 733 // values defined inside the loop are used on this path. 734 for (PHINode &PN : ExitBB->phis()) { 735 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 736 IncomingValIdx != E; ++IncomingValIdx) { 737 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 738 739 // Can we prove that the exit must run on the first iteration if it 740 // runs at all? (i.e. early exits are fine for our purposes, but 741 // traces which lead to this exit being taken on the 2nd iteration 742 // aren't.) Note that this is about whether the exit branch is 743 // executed, not about whether it is taken. 744 if (!L->getLoopLatch() || 745 !DT->dominates(IncomingBB, L->getLoopLatch())) 746 continue; 747 748 // Get condition that leads to the exit path. 749 auto *TermInst = IncomingBB->getTerminator(); 750 751 Value *Cond = nullptr; 752 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 753 // Must be a conditional branch, otherwise the block 754 // should not be in the loop. 755 Cond = BI->getCondition(); 756 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 757 Cond = SI->getCondition(); 758 else 759 continue; 760 761 if (!L->isLoopInvariant(Cond)) 762 continue; 763 764 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 765 766 // Only deal with PHIs in the loop header. 767 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 768 continue; 769 770 // If ExitVal is a PHI on the loop header, then we know its 771 // value along this exit because the exit can only be taken 772 // on the first iteration. 773 auto *LoopPreheader = L->getLoopPreheader(); 774 assert(LoopPreheader && "Invalid loop"); 775 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 776 if (PreheaderIdx != -1) { 777 assert(ExitVal->getParent() == L->getHeader() && 778 "ExitVal must be in loop header"); 779 MadeAnyChanges = true; 780 PN.setIncomingValue(IncomingValIdx, 781 ExitVal->getIncomingValue(PreheaderIdx)); 782 } 783 } 784 } 785 } 786 return MadeAnyChanges; 787 } 788 789 /// Check whether it is possible to delete the loop after rewriting exit 790 /// value. If it is possible, ignore ReplaceExitValue and do rewriting 791 /// aggressively. 792 bool IndVarSimplify::canLoopBeDeleted( 793 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 794 BasicBlock *Preheader = L->getLoopPreheader(); 795 // If there is no preheader, the loop will not be deleted. 796 if (!Preheader) 797 return false; 798 799 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 800 // We obviate multiple ExitingBlocks case for simplicity. 801 // TODO: If we see testcase with multiple ExitingBlocks can be deleted 802 // after exit value rewriting, we can enhance the logic here. 803 SmallVector<BasicBlock *, 4> ExitingBlocks; 804 L->getExitingBlocks(ExitingBlocks); 805 SmallVector<BasicBlock *, 8> ExitBlocks; 806 L->getUniqueExitBlocks(ExitBlocks); 807 if (ExitBlocks.size() > 1 || ExitingBlocks.size() > 1) 808 return false; 809 810 BasicBlock *ExitBlock = ExitBlocks[0]; 811 BasicBlock::iterator BI = ExitBlock->begin(); 812 while (PHINode *P = dyn_cast<PHINode>(BI)) { 813 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 814 815 // If the Incoming value of P is found in RewritePhiSet, we know it 816 // could be rewritten to use a loop invariant value in transformation 817 // phase later. Skip it in the loop invariant check below. 818 bool found = false; 819 for (const RewritePhi &Phi : RewritePhiSet) { 820 unsigned i = Phi.Ith; 821 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 822 found = true; 823 break; 824 } 825 } 826 827 Instruction *I; 828 if (!found && (I = dyn_cast<Instruction>(Incoming))) 829 if (!L->hasLoopInvariantOperands(I)) 830 return false; 831 832 ++BI; 833 } 834 835 for (auto *BB : L->blocks()) 836 if (llvm::any_of(*BB, [](Instruction &I) { 837 return I.mayHaveSideEffects(); 838 })) 839 return false; 840 841 return true; 842 } 843 844 //===----------------------------------------------------------------------===// 845 // IV Widening - Extend the width of an IV to cover its widest uses. 846 //===----------------------------------------------------------------------===// 847 848 namespace { 849 850 // Collect information about induction variables that are used by sign/zero 851 // extend operations. This information is recorded by CollectExtend and provides 852 // the input to WidenIV. 853 struct WideIVInfo { 854 PHINode *NarrowIV = nullptr; 855 856 // Widest integer type created [sz]ext 857 Type *WidestNativeType = nullptr; 858 859 // Was a sext user seen before a zext? 860 bool IsSigned = false; 861 }; 862 863 } // end anonymous namespace 864 865 /// Update information about the induction variable that is extended by this 866 /// sign or zero extend operation. This is used to determine the final width of 867 /// the IV before actually widening it. 868 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 869 const TargetTransformInfo *TTI) { 870 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 871 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 872 return; 873 874 Type *Ty = Cast->getType(); 875 uint64_t Width = SE->getTypeSizeInBits(Ty); 876 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 877 return; 878 879 // Check that `Cast` actually extends the induction variable (we rely on this 880 // later). This takes care of cases where `Cast` is extending a truncation of 881 // the narrow induction variable, and thus can end up being narrower than the 882 // "narrow" induction variable. 883 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 884 if (NarrowIVWidth >= Width) 885 return; 886 887 // Cast is either an sext or zext up to this point. 888 // We should not widen an indvar if arithmetics on the wider indvar are more 889 // expensive than those on the narrower indvar. We check only the cost of ADD 890 // because at least an ADD is required to increment the induction variable. We 891 // could compute more comprehensively the cost of all instructions on the 892 // induction variable when necessary. 893 if (TTI && 894 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 895 TTI->getArithmeticInstrCost(Instruction::Add, 896 Cast->getOperand(0)->getType())) { 897 return; 898 } 899 900 if (!WI.WidestNativeType) { 901 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 902 WI.IsSigned = IsSigned; 903 return; 904 } 905 906 // We extend the IV to satisfy the sign of its first user, arbitrarily. 907 if (WI.IsSigned != IsSigned) 908 return; 909 910 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 911 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 912 } 913 914 namespace { 915 916 /// Record a link in the Narrow IV def-use chain along with the WideIV that 917 /// computes the same value as the Narrow IV def. This avoids caching Use* 918 /// pointers. 919 struct NarrowIVDefUse { 920 Instruction *NarrowDef = nullptr; 921 Instruction *NarrowUse = nullptr; 922 Instruction *WideDef = nullptr; 923 924 // True if the narrow def is never negative. Tracking this information lets 925 // us use a sign extension instead of a zero extension or vice versa, when 926 // profitable and legal. 927 bool NeverNegative = false; 928 929 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 930 bool NeverNegative) 931 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 932 NeverNegative(NeverNegative) {} 933 }; 934 935 /// The goal of this transform is to remove sign and zero extends without 936 /// creating any new induction variables. To do this, it creates a new phi of 937 /// the wider type and redirects all users, either removing extends or inserting 938 /// truncs whenever we stop propagating the type. 939 class WidenIV { 940 // Parameters 941 PHINode *OrigPhi; 942 Type *WideType; 943 944 // Context 945 LoopInfo *LI; 946 Loop *L; 947 ScalarEvolution *SE; 948 DominatorTree *DT; 949 950 // Does the module have any calls to the llvm.experimental.guard intrinsic 951 // at all? If not we can avoid scanning instructions looking for guards. 952 bool HasGuards; 953 954 // Result 955 PHINode *WidePhi = nullptr; 956 Instruction *WideInc = nullptr; 957 const SCEV *WideIncExpr = nullptr; 958 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 959 960 SmallPtrSet<Instruction *,16> Widened; 961 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 962 963 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 964 965 // A map tracking the kind of extension used to widen each narrow IV 966 // and narrow IV user. 967 // Key: pointer to a narrow IV or IV user. 968 // Value: the kind of extension used to widen this Instruction. 969 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 970 971 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 972 973 // A map with control-dependent ranges for post increment IV uses. The key is 974 // a pair of IV def and a use of this def denoting the context. The value is 975 // a ConstantRange representing possible values of the def at the given 976 // context. 977 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 978 979 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 980 Instruction *UseI) { 981 DefUserPair Key(Def, UseI); 982 auto It = PostIncRangeInfos.find(Key); 983 return It == PostIncRangeInfos.end() 984 ? Optional<ConstantRange>(None) 985 : Optional<ConstantRange>(It->second); 986 } 987 988 void calculatePostIncRanges(PHINode *OrigPhi); 989 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 990 991 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 992 DefUserPair Key(Def, UseI); 993 auto It = PostIncRangeInfos.find(Key); 994 if (It == PostIncRangeInfos.end()) 995 PostIncRangeInfos.insert({Key, R}); 996 else 997 It->second = R.intersectWith(It->second); 998 } 999 1000 public: 1001 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 1002 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 1003 bool HasGuards) 1004 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 1005 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1006 HasGuards(HasGuards), DeadInsts(DI) { 1007 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1008 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1009 } 1010 1011 PHINode *createWideIV(SCEVExpander &Rewriter); 1012 1013 protected: 1014 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1015 Instruction *Use); 1016 1017 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1018 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1019 const SCEVAddRecExpr *WideAR); 1020 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1021 1022 ExtendKind getExtendKind(Instruction *I); 1023 1024 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1025 1026 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1027 1028 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1029 1030 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1031 unsigned OpCode) const; 1032 1033 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1034 1035 bool widenLoopCompare(NarrowIVDefUse DU); 1036 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 1037 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 1038 1039 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1040 }; 1041 1042 } // end anonymous namespace 1043 1044 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1045 bool IsSigned, Instruction *Use) { 1046 // Set the debug location and conservative insertion point. 1047 IRBuilder<> Builder(Use); 1048 // Hoist the insertion point into loop preheaders as far as possible. 1049 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1050 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1051 L = L->getParentLoop()) 1052 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1053 1054 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1055 Builder.CreateZExt(NarrowOper, WideType); 1056 } 1057 1058 /// Instantiate a wide operation to replace a narrow operation. This only needs 1059 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1060 /// 0 for any operation we decide not to clone. 1061 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 1062 const SCEVAddRecExpr *WideAR) { 1063 unsigned Opcode = DU.NarrowUse->getOpcode(); 1064 switch (Opcode) { 1065 default: 1066 return nullptr; 1067 case Instruction::Add: 1068 case Instruction::Mul: 1069 case Instruction::UDiv: 1070 case Instruction::Sub: 1071 return cloneArithmeticIVUser(DU, WideAR); 1072 1073 case Instruction::And: 1074 case Instruction::Or: 1075 case Instruction::Xor: 1076 case Instruction::Shl: 1077 case Instruction::LShr: 1078 case Instruction::AShr: 1079 return cloneBitwiseIVUser(DU); 1080 } 1081 } 1082 1083 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 1084 Instruction *NarrowUse = DU.NarrowUse; 1085 Instruction *NarrowDef = DU.NarrowDef; 1086 Instruction *WideDef = DU.WideDef; 1087 1088 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1089 1090 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1091 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1092 // invariant and will be folded or hoisted. If it actually comes from a 1093 // widened IV, it should be removed during a future call to widenIVUse. 1094 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1095 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1096 ? WideDef 1097 : createExtendInst(NarrowUse->getOperand(0), WideType, 1098 IsSigned, NarrowUse); 1099 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1100 ? WideDef 1101 : createExtendInst(NarrowUse->getOperand(1), WideType, 1102 IsSigned, NarrowUse); 1103 1104 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1105 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1106 NarrowBO->getName()); 1107 IRBuilder<> Builder(NarrowUse); 1108 Builder.Insert(WideBO); 1109 WideBO->copyIRFlags(NarrowBO); 1110 return WideBO; 1111 } 1112 1113 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 1114 const SCEVAddRecExpr *WideAR) { 1115 Instruction *NarrowUse = DU.NarrowUse; 1116 Instruction *NarrowDef = DU.NarrowDef; 1117 Instruction *WideDef = DU.WideDef; 1118 1119 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1120 1121 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1122 1123 // We're trying to find X such that 1124 // 1125 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1126 // 1127 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1128 // and check using SCEV if any of them are correct. 1129 1130 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1131 // correct solution to X. 1132 auto GuessNonIVOperand = [&](bool SignExt) { 1133 const SCEV *WideLHS; 1134 const SCEV *WideRHS; 1135 1136 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1137 if (SignExt) 1138 return SE->getSignExtendExpr(S, Ty); 1139 return SE->getZeroExtendExpr(S, Ty); 1140 }; 1141 1142 if (IVOpIdx == 0) { 1143 WideLHS = SE->getSCEV(WideDef); 1144 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1145 WideRHS = GetExtend(NarrowRHS, WideType); 1146 } else { 1147 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1148 WideLHS = GetExtend(NarrowLHS, WideType); 1149 WideRHS = SE->getSCEV(WideDef); 1150 } 1151 1152 // WideUse is "WideDef `op.wide` X" as described in the comment. 1153 const SCEV *WideUse = nullptr; 1154 1155 switch (NarrowUse->getOpcode()) { 1156 default: 1157 llvm_unreachable("No other possibility!"); 1158 1159 case Instruction::Add: 1160 WideUse = SE->getAddExpr(WideLHS, WideRHS); 1161 break; 1162 1163 case Instruction::Mul: 1164 WideUse = SE->getMulExpr(WideLHS, WideRHS); 1165 break; 1166 1167 case Instruction::UDiv: 1168 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 1169 break; 1170 1171 case Instruction::Sub: 1172 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 1173 break; 1174 } 1175 1176 return WideUse == WideAR; 1177 }; 1178 1179 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1180 if (!GuessNonIVOperand(SignExtend)) { 1181 SignExtend = !SignExtend; 1182 if (!GuessNonIVOperand(SignExtend)) 1183 return nullptr; 1184 } 1185 1186 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1187 ? WideDef 1188 : createExtendInst(NarrowUse->getOperand(0), WideType, 1189 SignExtend, NarrowUse); 1190 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1191 ? WideDef 1192 : createExtendInst(NarrowUse->getOperand(1), WideType, 1193 SignExtend, NarrowUse); 1194 1195 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1196 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1197 NarrowBO->getName()); 1198 1199 IRBuilder<> Builder(NarrowUse); 1200 Builder.Insert(WideBO); 1201 WideBO->copyIRFlags(NarrowBO); 1202 return WideBO; 1203 } 1204 1205 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1206 auto It = ExtendKindMap.find(I); 1207 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1208 return It->second; 1209 } 1210 1211 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1212 unsigned OpCode) const { 1213 if (OpCode == Instruction::Add) 1214 return SE->getAddExpr(LHS, RHS); 1215 if (OpCode == Instruction::Sub) 1216 return SE->getMinusSCEV(LHS, RHS); 1217 if (OpCode == Instruction::Mul) 1218 return SE->getMulExpr(LHS, RHS); 1219 1220 llvm_unreachable("Unsupported opcode."); 1221 } 1222 1223 /// No-wrap operations can transfer sign extension of their result to their 1224 /// operands. Generate the SCEV value for the widened operation without 1225 /// actually modifying the IR yet. If the expression after extending the 1226 /// operands is an AddRec for this loop, return the AddRec and the kind of 1227 /// extension used. 1228 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 1229 // Handle the common case of add<nsw/nuw> 1230 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1231 // Only Add/Sub/Mul instructions supported yet. 1232 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1233 OpCode != Instruction::Mul) 1234 return {nullptr, Unknown}; 1235 1236 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1237 // if extending the other will lead to a recurrence. 1238 const unsigned ExtendOperIdx = 1239 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1240 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1241 1242 const SCEV *ExtendOperExpr = nullptr; 1243 const OverflowingBinaryOperator *OBO = 1244 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1245 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1246 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1247 ExtendOperExpr = SE->getSignExtendExpr( 1248 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1249 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1250 ExtendOperExpr = SE->getZeroExtendExpr( 1251 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1252 else 1253 return {nullptr, Unknown}; 1254 1255 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1256 // flags. This instruction may be guarded by control flow that the no-wrap 1257 // behavior depends on. Non-control-equivalent instructions can be mapped to 1258 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1259 // semantics to those operations. 1260 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1261 const SCEV *rhs = ExtendOperExpr; 1262 1263 // Let's swap operands to the initial order for the case of non-commutative 1264 // operations, like SUB. See PR21014. 1265 if (ExtendOperIdx == 0) 1266 std::swap(lhs, rhs); 1267 const SCEVAddRecExpr *AddRec = 1268 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1269 1270 if (!AddRec || AddRec->getLoop() != L) 1271 return {nullptr, Unknown}; 1272 1273 return {AddRec, ExtKind}; 1274 } 1275 1276 /// Is this instruction potentially interesting for further simplification after 1277 /// widening it's type? In other words, can the extend be safely hoisted out of 1278 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1279 /// so, return the extended recurrence and the kind of extension used. Otherwise 1280 /// return {nullptr, Unknown}. 1281 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 1282 if (!SE->isSCEVable(DU.NarrowUse->getType())) 1283 return {nullptr, Unknown}; 1284 1285 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1286 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1287 SE->getTypeSizeInBits(WideType)) { 1288 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1289 // index. So don't follow this use. 1290 return {nullptr, Unknown}; 1291 } 1292 1293 const SCEV *WideExpr; 1294 ExtendKind ExtKind; 1295 if (DU.NeverNegative) { 1296 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1297 if (isa<SCEVAddRecExpr>(WideExpr)) 1298 ExtKind = SignExtended; 1299 else { 1300 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1301 ExtKind = ZeroExtended; 1302 } 1303 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1304 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1305 ExtKind = SignExtended; 1306 } else { 1307 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1308 ExtKind = ZeroExtended; 1309 } 1310 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1311 if (!AddRec || AddRec->getLoop() != L) 1312 return {nullptr, Unknown}; 1313 return {AddRec, ExtKind}; 1314 } 1315 1316 /// This IV user cannot be widened. Replace this use of the original narrow IV 1317 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1318 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1319 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1320 if (!InsertPt) 1321 return; 1322 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1323 << *DU.NarrowUse << "\n"); 1324 IRBuilder<> Builder(InsertPt); 1325 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1326 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1327 } 1328 1329 /// If the narrow use is a compare instruction, then widen the compare 1330 // (and possibly the other operand). The extend operation is hoisted into the 1331 // loop preheader as far as possible. 1332 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1333 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1334 if (!Cmp) 1335 return false; 1336 1337 // We can legally widen the comparison in the following two cases: 1338 // 1339 // - The signedness of the IV extension and comparison match 1340 // 1341 // - The narrow IV is always positive (and thus its sign extension is equal 1342 // to its zero extension). For instance, let's say we're zero extending 1343 // %narrow for the following use 1344 // 1345 // icmp slt i32 %narrow, %val ... (A) 1346 // 1347 // and %narrow is always positive. Then 1348 // 1349 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1350 // == icmp slt i32 zext(%narrow), sext(%val) 1351 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1352 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1353 return false; 1354 1355 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1356 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1357 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1358 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1359 1360 // Widen the compare instruction. 1361 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1362 if (!InsertPt) 1363 return false; 1364 IRBuilder<> Builder(InsertPt); 1365 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1366 1367 // Widen the other operand of the compare, if necessary. 1368 if (CastWidth < IVWidth) { 1369 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1370 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1371 } 1372 return true; 1373 } 1374 1375 /// If the narrow use is an instruction whose two operands are the defining 1376 /// instruction of DU and a load instruction, then we have the following: 1377 /// if the load is hoisted outside the loop, then we do not reach this function 1378 /// as scalar evolution analysis works fine in widenIVUse with variables 1379 /// hoisted outside the loop and efficient code is subsequently generated by 1380 /// not emitting truncate instructions. But when the load is not hoisted 1381 /// (whether due to limitation in alias analysis or due to a true legality), 1382 /// then scalar evolution can not proceed with loop variant values and 1383 /// inefficient code is generated. This function handles the non-hoisted load 1384 /// special case by making the optimization generate the same type of code for 1385 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1386 /// instruction). This special case is important especially when the induction 1387 /// variables are affecting addressing mode in code generation. 1388 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1389 Instruction *NarrowUse = DU.NarrowUse; 1390 Instruction *NarrowDef = DU.NarrowDef; 1391 Instruction *WideDef = DU.WideDef; 1392 1393 // Handle the common case of add<nsw/nuw> 1394 const unsigned OpCode = NarrowUse->getOpcode(); 1395 // Only Add/Sub/Mul instructions are supported. 1396 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1397 OpCode != Instruction::Mul) 1398 return false; 1399 1400 // The operand that is not defined by NarrowDef of DU. Let's call it the 1401 // other operand. 1402 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1403 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1404 "bad DU"); 1405 1406 const SCEV *ExtendOperExpr = nullptr; 1407 const OverflowingBinaryOperator *OBO = 1408 cast<OverflowingBinaryOperator>(NarrowUse); 1409 ExtendKind ExtKind = getExtendKind(NarrowDef); 1410 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1411 ExtendOperExpr = SE->getSignExtendExpr( 1412 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1413 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1414 ExtendOperExpr = SE->getZeroExtendExpr( 1415 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1416 else 1417 return false; 1418 1419 // We are interested in the other operand being a load instruction. 1420 // But, we should look into relaxing this restriction later on. 1421 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1422 if (I && I->getOpcode() != Instruction::Load) 1423 return false; 1424 1425 // Verifying that Defining operand is an AddRec 1426 const SCEV *Op1 = SE->getSCEV(WideDef); 1427 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1428 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1429 return false; 1430 // Verifying that other operand is an Extend. 1431 if (ExtKind == SignExtended) { 1432 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1433 return false; 1434 } else { 1435 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1436 return false; 1437 } 1438 1439 if (ExtKind == SignExtended) { 1440 for (Use &U : NarrowUse->uses()) { 1441 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1442 if (!User || User->getType() != WideType) 1443 return false; 1444 } 1445 } else { // ExtKind == ZeroExtended 1446 for (Use &U : NarrowUse->uses()) { 1447 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1448 if (!User || User->getType() != WideType) 1449 return false; 1450 } 1451 } 1452 1453 return true; 1454 } 1455 1456 /// Special Case for widening with variant Loads (see 1457 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1458 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1459 Instruction *NarrowUse = DU.NarrowUse; 1460 Instruction *NarrowDef = DU.NarrowDef; 1461 Instruction *WideDef = DU.WideDef; 1462 1463 ExtendKind ExtKind = getExtendKind(NarrowDef); 1464 1465 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1466 1467 // Generating a widening use instruction. 1468 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1469 ? WideDef 1470 : createExtendInst(NarrowUse->getOperand(0), WideType, 1471 ExtKind, NarrowUse); 1472 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1473 ? WideDef 1474 : createExtendInst(NarrowUse->getOperand(1), WideType, 1475 ExtKind, NarrowUse); 1476 1477 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1478 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1479 NarrowBO->getName()); 1480 IRBuilder<> Builder(NarrowUse); 1481 Builder.Insert(WideBO); 1482 WideBO->copyIRFlags(NarrowBO); 1483 1484 if (ExtKind == SignExtended) 1485 ExtendKindMap[NarrowUse] = SignExtended; 1486 else 1487 ExtendKindMap[NarrowUse] = ZeroExtended; 1488 1489 // Update the Use. 1490 if (ExtKind == SignExtended) { 1491 for (Use &U : NarrowUse->uses()) { 1492 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1493 if (User && User->getType() == WideType) { 1494 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1495 << *WideBO << "\n"); 1496 ++NumElimExt; 1497 User->replaceAllUsesWith(WideBO); 1498 DeadInsts.emplace_back(User); 1499 } 1500 } 1501 } else { // ExtKind == ZeroExtended 1502 for (Use &U : NarrowUse->uses()) { 1503 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1504 if (User && User->getType() == WideType) { 1505 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1506 << *WideBO << "\n"); 1507 ++NumElimExt; 1508 User->replaceAllUsesWith(WideBO); 1509 DeadInsts.emplace_back(User); 1510 } 1511 } 1512 } 1513 } 1514 1515 /// Determine whether an individual user of the narrow IV can be widened. If so, 1516 /// return the wide clone of the user. 1517 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1518 assert(ExtendKindMap.count(DU.NarrowDef) && 1519 "Should already know the kind of extension used to widen NarrowDef"); 1520 1521 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1522 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1523 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1524 // For LCSSA phis, sink the truncate outside the loop. 1525 // After SimplifyCFG most loop exit targets have a single predecessor. 1526 // Otherwise fall back to a truncate within the loop. 1527 if (UsePhi->getNumOperands() != 1) 1528 truncateIVUse(DU, DT, LI); 1529 else { 1530 // Widening the PHI requires us to insert a trunc. The logical place 1531 // for this trunc is in the same BB as the PHI. This is not possible if 1532 // the BB is terminated by a catchswitch. 1533 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1534 return nullptr; 1535 1536 PHINode *WidePhi = 1537 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1538 UsePhi); 1539 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1540 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1541 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1542 UsePhi->replaceAllUsesWith(Trunc); 1543 DeadInsts.emplace_back(UsePhi); 1544 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1545 << *WidePhi << "\n"); 1546 } 1547 return nullptr; 1548 } 1549 } 1550 1551 // This narrow use can be widened by a sext if it's non-negative or its narrow 1552 // def was widended by a sext. Same for zext. 1553 auto canWidenBySExt = [&]() { 1554 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1555 }; 1556 auto canWidenByZExt = [&]() { 1557 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1558 }; 1559 1560 // Our raison d'etre! Eliminate sign and zero extension. 1561 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1562 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1563 Value *NewDef = DU.WideDef; 1564 if (DU.NarrowUse->getType() != WideType) { 1565 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1566 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1567 if (CastWidth < IVWidth) { 1568 // The cast isn't as wide as the IV, so insert a Trunc. 1569 IRBuilder<> Builder(DU.NarrowUse); 1570 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1571 } 1572 else { 1573 // A wider extend was hidden behind a narrower one. This may induce 1574 // another round of IV widening in which the intermediate IV becomes 1575 // dead. It should be very rare. 1576 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1577 << " not wide enough to subsume " << *DU.NarrowUse 1578 << "\n"); 1579 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1580 NewDef = DU.NarrowUse; 1581 } 1582 } 1583 if (NewDef != DU.NarrowUse) { 1584 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1585 << " replaced by " << *DU.WideDef << "\n"); 1586 ++NumElimExt; 1587 DU.NarrowUse->replaceAllUsesWith(NewDef); 1588 DeadInsts.emplace_back(DU.NarrowUse); 1589 } 1590 // Now that the extend is gone, we want to expose it's uses for potential 1591 // further simplification. We don't need to directly inform SimplifyIVUsers 1592 // of the new users, because their parent IV will be processed later as a 1593 // new loop phi. If we preserved IVUsers analysis, we would also want to 1594 // push the uses of WideDef here. 1595 1596 // No further widening is needed. The deceased [sz]ext had done it for us. 1597 return nullptr; 1598 } 1599 1600 // Does this user itself evaluate to a recurrence after widening? 1601 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1602 if (!WideAddRec.first) 1603 WideAddRec = getWideRecurrence(DU); 1604 1605 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1606 if (!WideAddRec.first) { 1607 // If use is a loop condition, try to promote the condition instead of 1608 // truncating the IV first. 1609 if (widenLoopCompare(DU)) 1610 return nullptr; 1611 1612 // We are here about to generate a truncate instruction that may hurt 1613 // performance because the scalar evolution expression computed earlier 1614 // in WideAddRec.first does not indicate a polynomial induction expression. 1615 // In that case, look at the operands of the use instruction to determine 1616 // if we can still widen the use instead of truncating its operand. 1617 if (widenWithVariantLoadUse(DU)) { 1618 widenWithVariantLoadUseCodegen(DU); 1619 return nullptr; 1620 } 1621 1622 // This user does not evaluate to a recurrence after widening, so don't 1623 // follow it. Instead insert a Trunc to kill off the original use, 1624 // eventually isolating the original narrow IV so it can be removed. 1625 truncateIVUse(DU, DT, LI); 1626 return nullptr; 1627 } 1628 // Assume block terminators cannot evaluate to a recurrence. We can't to 1629 // insert a Trunc after a terminator if there happens to be a critical edge. 1630 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1631 "SCEV is not expected to evaluate a block terminator"); 1632 1633 // Reuse the IV increment that SCEVExpander created as long as it dominates 1634 // NarrowUse. 1635 Instruction *WideUse = nullptr; 1636 if (WideAddRec.first == WideIncExpr && 1637 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1638 WideUse = WideInc; 1639 else { 1640 WideUse = cloneIVUser(DU, WideAddRec.first); 1641 if (!WideUse) 1642 return nullptr; 1643 } 1644 // Evaluation of WideAddRec ensured that the narrow expression could be 1645 // extended outside the loop without overflow. This suggests that the wide use 1646 // evaluates to the same expression as the extended narrow use, but doesn't 1647 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1648 // where it fails, we simply throw away the newly created wide use. 1649 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1650 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1651 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1652 << "\n"); 1653 DeadInsts.emplace_back(WideUse); 1654 return nullptr; 1655 } 1656 1657 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1658 // Returning WideUse pushes it on the worklist. 1659 return WideUse; 1660 } 1661 1662 /// Add eligible users of NarrowDef to NarrowIVUsers. 1663 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1664 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1665 bool NonNegativeDef = 1666 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1667 SE->getConstant(NarrowSCEV->getType(), 0)); 1668 for (User *U : NarrowDef->users()) { 1669 Instruction *NarrowUser = cast<Instruction>(U); 1670 1671 // Handle data flow merges and bizarre phi cycles. 1672 if (!Widened.insert(NarrowUser).second) 1673 continue; 1674 1675 bool NonNegativeUse = false; 1676 if (!NonNegativeDef) { 1677 // We might have a control-dependent range information for this context. 1678 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1679 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1680 } 1681 1682 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1683 NonNegativeDef || NonNegativeUse); 1684 } 1685 } 1686 1687 /// Process a single induction variable. First use the SCEVExpander to create a 1688 /// wide induction variable that evaluates to the same recurrence as the 1689 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1690 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1691 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1692 /// 1693 /// It would be simpler to delete uses as they are processed, but we must avoid 1694 /// invalidating SCEV expressions. 1695 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1696 // Is this phi an induction variable? 1697 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1698 if (!AddRec) 1699 return nullptr; 1700 1701 // Widen the induction variable expression. 1702 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1703 ? SE->getSignExtendExpr(AddRec, WideType) 1704 : SE->getZeroExtendExpr(AddRec, WideType); 1705 1706 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1707 "Expect the new IV expression to preserve its type"); 1708 1709 // Can the IV be extended outside the loop without overflow? 1710 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1711 if (!AddRec || AddRec->getLoop() != L) 1712 return nullptr; 1713 1714 // An AddRec must have loop-invariant operands. Since this AddRec is 1715 // materialized by a loop header phi, the expression cannot have any post-loop 1716 // operands, so they must dominate the loop header. 1717 assert( 1718 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1719 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1720 "Loop header phi recurrence inputs do not dominate the loop"); 1721 1722 // Iterate over IV uses (including transitive ones) looking for IV increments 1723 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1724 // the increment calculate control-dependent range information basing on 1725 // dominating conditions inside of the loop (e.g. a range check inside of the 1726 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1727 // 1728 // Control-dependent range information is later used to prove that a narrow 1729 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1730 // this on demand because when pushNarrowIVUsers needs this information some 1731 // of the dominating conditions might be already widened. 1732 if (UsePostIncrementRanges) 1733 calculatePostIncRanges(OrigPhi); 1734 1735 // The rewriter provides a value for the desired IV expression. This may 1736 // either find an existing phi or materialize a new one. Either way, we 1737 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1738 // of the phi-SCC dominates the loop entry. 1739 Instruction *InsertPt = &L->getHeader()->front(); 1740 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1741 1742 // Remembering the WideIV increment generated by SCEVExpander allows 1743 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1744 // employ a general reuse mechanism because the call above is the only call to 1745 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1746 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1747 WideInc = 1748 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1749 WideIncExpr = SE->getSCEV(WideInc); 1750 // Propagate the debug location associated with the original loop increment 1751 // to the new (widened) increment. 1752 auto *OrigInc = 1753 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1754 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1755 } 1756 1757 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1758 ++NumWidened; 1759 1760 // Traverse the def-use chain using a worklist starting at the original IV. 1761 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1762 1763 Widened.insert(OrigPhi); 1764 pushNarrowIVUsers(OrigPhi, WidePhi); 1765 1766 while (!NarrowIVUsers.empty()) { 1767 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1768 1769 // Process a def-use edge. This may replace the use, so don't hold a 1770 // use_iterator across it. 1771 Instruction *WideUse = widenIVUse(DU, Rewriter); 1772 1773 // Follow all def-use edges from the previous narrow use. 1774 if (WideUse) 1775 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1776 1777 // widenIVUse may have removed the def-use edge. 1778 if (DU.NarrowDef->use_empty()) 1779 DeadInsts.emplace_back(DU.NarrowDef); 1780 } 1781 1782 // Attach any debug information to the new PHI. Since OrigPhi and WidePHI 1783 // evaluate the same recurrence, we can just copy the debug info over. 1784 SmallVector<DbgValueInst *, 1> DbgValues; 1785 llvm::findDbgValues(DbgValues, OrigPhi); 1786 auto *MDPhi = MetadataAsValue::get(WidePhi->getContext(), 1787 ValueAsMetadata::get(WidePhi)); 1788 for (auto &DbgValue : DbgValues) 1789 DbgValue->setOperand(0, MDPhi); 1790 return WidePhi; 1791 } 1792 1793 /// Calculates control-dependent range for the given def at the given context 1794 /// by looking at dominating conditions inside of the loop 1795 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1796 Instruction *NarrowUser) { 1797 using namespace llvm::PatternMatch; 1798 1799 Value *NarrowDefLHS; 1800 const APInt *NarrowDefRHS; 1801 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1802 m_APInt(NarrowDefRHS))) || 1803 !NarrowDefRHS->isNonNegative()) 1804 return; 1805 1806 auto UpdateRangeFromCondition = [&] (Value *Condition, 1807 bool TrueDest) { 1808 CmpInst::Predicate Pred; 1809 Value *CmpRHS; 1810 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1811 m_Value(CmpRHS)))) 1812 return; 1813 1814 CmpInst::Predicate P = 1815 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1816 1817 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1818 auto CmpConstrainedLHSRange = 1819 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1820 auto NarrowDefRange = 1821 CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS); 1822 1823 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1824 }; 1825 1826 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1827 if (!HasGuards) 1828 return; 1829 1830 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1831 Ctx->getParent()->rend())) { 1832 Value *C = nullptr; 1833 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1834 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1835 } 1836 }; 1837 1838 UpdateRangeFromGuards(NarrowUser); 1839 1840 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1841 // If NarrowUserBB is statically unreachable asking dominator queries may 1842 // yield surprising results. (e.g. the block may not have a dom tree node) 1843 if (!DT->isReachableFromEntry(NarrowUserBB)) 1844 return; 1845 1846 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1847 L->contains(DTB->getBlock()); 1848 DTB = DTB->getIDom()) { 1849 auto *BB = DTB->getBlock(); 1850 auto *TI = BB->getTerminator(); 1851 UpdateRangeFromGuards(TI); 1852 1853 auto *BI = dyn_cast<BranchInst>(TI); 1854 if (!BI || !BI->isConditional()) 1855 continue; 1856 1857 auto *TrueSuccessor = BI->getSuccessor(0); 1858 auto *FalseSuccessor = BI->getSuccessor(1); 1859 1860 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1861 return BBE.isSingleEdge() && 1862 DT->dominates(BBE, NarrowUser->getParent()); 1863 }; 1864 1865 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1866 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1867 1868 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1869 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1870 } 1871 } 1872 1873 /// Calculates PostIncRangeInfos map for the given IV 1874 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1875 SmallPtrSet<Instruction *, 16> Visited; 1876 SmallVector<Instruction *, 6> Worklist; 1877 Worklist.push_back(OrigPhi); 1878 Visited.insert(OrigPhi); 1879 1880 while (!Worklist.empty()) { 1881 Instruction *NarrowDef = Worklist.pop_back_val(); 1882 1883 for (Use &U : NarrowDef->uses()) { 1884 auto *NarrowUser = cast<Instruction>(U.getUser()); 1885 1886 // Don't go looking outside the current loop. 1887 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1888 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1889 continue; 1890 1891 if (!Visited.insert(NarrowUser).second) 1892 continue; 1893 1894 Worklist.push_back(NarrowUser); 1895 1896 calculatePostIncRange(NarrowDef, NarrowUser); 1897 } 1898 } 1899 } 1900 1901 //===----------------------------------------------------------------------===// 1902 // Live IV Reduction - Minimize IVs live across the loop. 1903 //===----------------------------------------------------------------------===// 1904 1905 //===----------------------------------------------------------------------===// 1906 // Simplification of IV users based on SCEV evaluation. 1907 //===----------------------------------------------------------------------===// 1908 1909 namespace { 1910 1911 class IndVarSimplifyVisitor : public IVVisitor { 1912 ScalarEvolution *SE; 1913 const TargetTransformInfo *TTI; 1914 PHINode *IVPhi; 1915 1916 public: 1917 WideIVInfo WI; 1918 1919 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1920 const TargetTransformInfo *TTI, 1921 const DominatorTree *DTree) 1922 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1923 DT = DTree; 1924 WI.NarrowIV = IVPhi; 1925 } 1926 1927 // Implement the interface used by simplifyUsersOfIV. 1928 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1929 }; 1930 1931 } // end anonymous namespace 1932 1933 /// Iteratively perform simplification on a worklist of IV users. Each 1934 /// successive simplification may push more users which may themselves be 1935 /// candidates for simplification. 1936 /// 1937 /// Sign/Zero extend elimination is interleaved with IV simplification. 1938 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1939 SCEVExpander &Rewriter, 1940 LoopInfo *LI) { 1941 SmallVector<WideIVInfo, 8> WideIVs; 1942 1943 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1944 Intrinsic::getName(Intrinsic::experimental_guard)); 1945 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1946 1947 SmallVector<PHINode*, 8> LoopPhis; 1948 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1949 LoopPhis.push_back(cast<PHINode>(I)); 1950 } 1951 // Each round of simplification iterates through the SimplifyIVUsers worklist 1952 // for all current phis, then determines whether any IVs can be 1953 // widened. Widening adds new phis to LoopPhis, inducing another round of 1954 // simplification on the wide IVs. 1955 bool Changed = false; 1956 while (!LoopPhis.empty()) { 1957 // Evaluate as many IV expressions as possible before widening any IVs. This 1958 // forces SCEV to set no-wrap flags before evaluating sign/zero 1959 // extension. The first time SCEV attempts to normalize sign/zero extension, 1960 // the result becomes final. So for the most predictable results, we delay 1961 // evaluation of sign/zero extend evaluation until needed, and avoid running 1962 // other SCEV based analysis prior to simplifyAndExtend. 1963 do { 1964 PHINode *CurrIV = LoopPhis.pop_back_val(); 1965 1966 // Information about sign/zero extensions of CurrIV. 1967 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 1968 1969 Changed |= 1970 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 1971 1972 if (Visitor.WI.WidestNativeType) { 1973 WideIVs.push_back(Visitor.WI); 1974 } 1975 } while(!LoopPhis.empty()); 1976 1977 for (; !WideIVs.empty(); WideIVs.pop_back()) { 1978 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 1979 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 1980 Changed = true; 1981 LoopPhis.push_back(WidePhi); 1982 } 1983 } 1984 } 1985 return Changed; 1986 } 1987 1988 //===----------------------------------------------------------------------===// 1989 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 1990 //===----------------------------------------------------------------------===// 1991 1992 /// Given an Value which is hoped to be part of an add recurance in the given 1993 /// loop, return the associated Phi node if so. Otherwise, return null. Note 1994 /// that this is less general than SCEVs AddRec checking. 1995 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 1996 Instruction *IncI = dyn_cast<Instruction>(IncV); 1997 if (!IncI) 1998 return nullptr; 1999 2000 switch (IncI->getOpcode()) { 2001 case Instruction::Add: 2002 case Instruction::Sub: 2003 break; 2004 case Instruction::GetElementPtr: 2005 // An IV counter must preserve its type. 2006 if (IncI->getNumOperands() == 2) 2007 break; 2008 LLVM_FALLTHROUGH; 2009 default: 2010 return nullptr; 2011 } 2012 2013 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 2014 if (Phi && Phi->getParent() == L->getHeader()) { 2015 if (L->isLoopInvariant(IncI->getOperand(1))) 2016 return Phi; 2017 return nullptr; 2018 } 2019 if (IncI->getOpcode() == Instruction::GetElementPtr) 2020 return nullptr; 2021 2022 // Allow add/sub to be commuted. 2023 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 2024 if (Phi && Phi->getParent() == L->getHeader()) { 2025 if (L->isLoopInvariant(IncI->getOperand(0))) 2026 return Phi; 2027 } 2028 return nullptr; 2029 } 2030 2031 /// Whether the current loop exit test is based on this value. Currently this 2032 /// is limited to a direct use in the loop condition. 2033 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 2034 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2035 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 2036 // TODO: Allow non-icmp loop test. 2037 if (!ICmp) 2038 return false; 2039 2040 // TODO: Allow indirect use. 2041 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 2042 } 2043 2044 /// linearFunctionTestReplace policy. Return true unless we can show that the 2045 /// current exit test is already sufficiently canonical. 2046 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 2047 assert(L->getLoopLatch() && "Must be in simplified form"); 2048 2049 // Avoid converting a constant or loop invariant test back to a runtime 2050 // test. This is critical for when SCEV's cached ExitCount is less precise 2051 // than the current IR (such as after we've proven a particular exit is 2052 // actually dead and thus the BE count never reaches our ExitCount.) 2053 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2054 if (L->isLoopInvariant(BI->getCondition())) 2055 return false; 2056 2057 // Do LFTR to simplify the exit condition to an ICMP. 2058 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 2059 if (!Cond) 2060 return true; 2061 2062 // Do LFTR to simplify the exit ICMP to EQ/NE 2063 ICmpInst::Predicate Pred = Cond->getPredicate(); 2064 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 2065 return true; 2066 2067 // Look for a loop invariant RHS 2068 Value *LHS = Cond->getOperand(0); 2069 Value *RHS = Cond->getOperand(1); 2070 if (!L->isLoopInvariant(RHS)) { 2071 if (!L->isLoopInvariant(LHS)) 2072 return true; 2073 std::swap(LHS, RHS); 2074 } 2075 // Look for a simple IV counter LHS 2076 PHINode *Phi = dyn_cast<PHINode>(LHS); 2077 if (!Phi) 2078 Phi = getLoopPhiForCounter(LHS, L); 2079 2080 if (!Phi) 2081 return true; 2082 2083 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 2084 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2085 if (Idx < 0) 2086 return true; 2087 2088 // Do LFTR if the exit condition's IV is *not* a simple counter. 2089 Value *IncV = Phi->getIncomingValue(Idx); 2090 return Phi != getLoopPhiForCounter(IncV, L); 2091 } 2092 2093 /// Return true if undefined behavior would provable be executed on the path to 2094 /// OnPathTo if Root produced a posion result. Note that this doesn't say 2095 /// anything about whether OnPathTo is actually executed or whether Root is 2096 /// actually poison. This can be used to assess whether a new use of Root can 2097 /// be added at a location which is control equivalent with OnPathTo (such as 2098 /// immediately before it) without introducing UB which didn't previously 2099 /// exist. Note that a false result conveys no information. 2100 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 2101 Instruction *OnPathTo, 2102 DominatorTree *DT) { 2103 // Basic approach is to assume Root is poison, propagate poison forward 2104 // through all users we can easily track, and then check whether any of those 2105 // users are provable UB and must execute before out exiting block might 2106 // exit. 2107 2108 // The set of all recursive users we've visited (which are assumed to all be 2109 // poison because of said visit) 2110 SmallSet<const Value *, 16> KnownPoison; 2111 SmallVector<const Instruction*, 16> Worklist; 2112 Worklist.push_back(Root); 2113 while (!Worklist.empty()) { 2114 const Instruction *I = Worklist.pop_back_val(); 2115 2116 // If we know this must trigger UB on a path leading our target. 2117 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 2118 return true; 2119 2120 // If we can't analyze propagation through this instruction, just skip it 2121 // and transitive users. Safe as false is a conservative result. 2122 if (!propagatesFullPoison(I) && I != Root) 2123 continue; 2124 2125 if (KnownPoison.insert(I).second) 2126 for (const User *User : I->users()) 2127 Worklist.push_back(cast<Instruction>(User)); 2128 } 2129 2130 // Might be non-UB, or might have a path we couldn't prove must execute on 2131 // way to exiting bb. 2132 return false; 2133 } 2134 2135 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 2136 /// down to checking that all operands are constant and listing instructions 2137 /// that may hide undef. 2138 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 2139 unsigned Depth) { 2140 if (isa<Constant>(V)) 2141 return !isa<UndefValue>(V); 2142 2143 if (Depth >= 6) 2144 return false; 2145 2146 // Conservatively handle non-constant non-instructions. For example, Arguments 2147 // may be undef. 2148 Instruction *I = dyn_cast<Instruction>(V); 2149 if (!I) 2150 return false; 2151 2152 // Load and return values may be undef. 2153 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 2154 return false; 2155 2156 // Optimistically handle other instructions. 2157 for (Value *Op : I->operands()) { 2158 if (!Visited.insert(Op).second) 2159 continue; 2160 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 2161 return false; 2162 } 2163 return true; 2164 } 2165 2166 /// Return true if the given value is concrete. We must prove that undef can 2167 /// never reach it. 2168 /// 2169 /// TODO: If we decide that this is a good approach to checking for undef, we 2170 /// may factor it into a common location. 2171 static bool hasConcreteDef(Value *V) { 2172 SmallPtrSet<Value*, 8> Visited; 2173 Visited.insert(V); 2174 return hasConcreteDefImpl(V, Visited, 0); 2175 } 2176 2177 /// Return true if this IV has any uses other than the (soon to be rewritten) 2178 /// loop exit test. 2179 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 2180 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 2181 Value *IncV = Phi->getIncomingValue(LatchIdx); 2182 2183 for (User *U : Phi->users()) 2184 if (U != Cond && U != IncV) return false; 2185 2186 for (User *U : IncV->users()) 2187 if (U != Cond && U != Phi) return false; 2188 return true; 2189 } 2190 2191 /// Return true if the given phi is a "counter" in L. A counter is an 2192 /// add recurance (of integer or pointer type) with an arbitrary start, and a 2193 /// step of 1. Note that L must have exactly one latch. 2194 static bool isLoopCounter(PHINode* Phi, Loop *L, 2195 ScalarEvolution *SE) { 2196 assert(Phi->getParent() == L->getHeader()); 2197 assert(L->getLoopLatch()); 2198 2199 if (!SE->isSCEVable(Phi->getType())) 2200 return false; 2201 2202 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2203 if (!AR || AR->getLoop() != L || !AR->isAffine()) 2204 return false; 2205 2206 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 2207 if (!Step || !Step->isOne()) 2208 return false; 2209 2210 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2211 Value *IncV = Phi->getIncomingValue(LatchIdx); 2212 return (getLoopPhiForCounter(IncV, L) == Phi); 2213 } 2214 2215 /// Search the loop header for a loop counter (anadd rec w/step of one) 2216 /// suitable for use by LFTR. If multiple counters are available, select the 2217 /// "best" one based profitable heuristics. 2218 /// 2219 /// BECount may be an i8* pointer type. The pointer difference is already 2220 /// valid count without scaling the address stride, so it remains a pointer 2221 /// expression as far as SCEV is concerned. 2222 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 2223 const SCEV *BECount, 2224 ScalarEvolution *SE, DominatorTree *DT) { 2225 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 2226 2227 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 2228 2229 // Loop over all of the PHI nodes, looking for a simple counter. 2230 PHINode *BestPhi = nullptr; 2231 const SCEV *BestInit = nullptr; 2232 BasicBlock *LatchBlock = L->getLoopLatch(); 2233 assert(LatchBlock && "Must be in simplified form"); 2234 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2235 2236 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2237 PHINode *Phi = cast<PHINode>(I); 2238 if (!isLoopCounter(Phi, L, SE)) 2239 continue; 2240 2241 // Avoid comparing an integer IV against a pointer Limit. 2242 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 2243 continue; 2244 2245 const auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2246 2247 // AR may be a pointer type, while BECount is an integer type. 2248 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 2249 // AR may not be a narrower type, or we may never exit. 2250 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 2251 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 2252 continue; 2253 2254 // Avoid reusing a potentially undef value to compute other values that may 2255 // have originally had a concrete definition. 2256 if (!hasConcreteDef(Phi)) { 2257 // We explicitly allow unknown phis as long as they are already used by 2258 // the loop exit test. This is legal since performing LFTR could not 2259 // increase the number of undef users. 2260 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 2261 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 2262 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 2263 continue; 2264 } 2265 2266 // Avoid introducing undefined behavior due to poison which didn't exist in 2267 // the original program. (Annoyingly, the rules for poison and undef 2268 // propagation are distinct, so this does NOT cover the undef case above.) 2269 // We have to ensure that we don't introduce UB by introducing a use on an 2270 // iteration where said IV produces poison. Our strategy here differs for 2271 // pointers and integer IVs. For integers, we strip and reinfer as needed, 2272 // see code in linearFunctionTestReplace. For pointers, we restrict 2273 // transforms as there is no good way to reinfer inbounds once lost. 2274 if (!Phi->getType()->isIntegerTy() && 2275 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 2276 continue; 2277 2278 const SCEV *Init = AR->getStart(); 2279 2280 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 2281 // Don't force a live loop counter if another IV can be used. 2282 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 2283 continue; 2284 2285 // Prefer to count-from-zero. This is a more "canonical" counter form. It 2286 // also prefers integer to pointer IVs. 2287 if (BestInit->isZero() != Init->isZero()) { 2288 if (BestInit->isZero()) 2289 continue; 2290 } 2291 // If two IVs both count from zero or both count from nonzero then the 2292 // narrower is likely a dead phi that has been widened. Use the wider phi 2293 // to allow the other to be eliminated. 2294 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2295 continue; 2296 } 2297 BestPhi = Phi; 2298 BestInit = Init; 2299 } 2300 return BestPhi; 2301 } 2302 2303 /// Insert an IR expression which computes the value held by the IV IndVar 2304 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2305 /// is taken ExitCount times. 2306 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2307 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2308 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2309 assert(isLoopCounter(IndVar, L, SE)); 2310 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2311 const SCEV *IVInit = AR->getStart(); 2312 2313 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2314 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2315 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2316 // the existing GEPs whenever possible. 2317 if (IndVar->getType()->isPointerTy() && 2318 !ExitCount->getType()->isPointerTy()) { 2319 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2320 // signed value. ExitCount on the other hand represents the loop trip count, 2321 // which is an unsigned value. FindLoopCounter only allows induction 2322 // variables that have a positive unit stride of one. This means we don't 2323 // have to handle the case of negative offsets (yet) and just need to zero 2324 // extend ExitCount. 2325 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2326 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2327 if (UsePostInc) 2328 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2329 2330 // Expand the code for the iteration count. 2331 assert(SE->isLoopInvariant(IVOffset, L) && 2332 "Computed iteration count is not loop invariant!"); 2333 2334 // We could handle pointer IVs other than i8*, but we need to compensate for 2335 // gep index scaling. 2336 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2337 cast<PointerType>(IndVar->getType()) 2338 ->getElementType())->isOne() && 2339 "unit stride pointer IV must be i8*"); 2340 2341 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2342 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2343 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2344 } else { 2345 // In any other case, convert both IVInit and ExitCount to integers before 2346 // comparing. This may result in SCEV expansion of pointers, but in practice 2347 // SCEV will fold the pointer arithmetic away as such: 2348 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2349 // 2350 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2351 // for simple memset-style loops. 2352 // 2353 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2354 // were generated on top of case #2, which is not expected. 2355 2356 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2357 // For unit stride, IVCount = Start + ExitCount with 2's complement 2358 // overflow. 2359 2360 // For integer IVs, truncate the IV before computing IVInit + BECount, 2361 // unless we know apriori that the limit must be a constant when evaluated 2362 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2363 // of the IV in the loop over a (potentially) expensive expansion of the 2364 // widened exit count add(zext(add)) expression. 2365 if (SE->getTypeSizeInBits(IVInit->getType()) 2366 > SE->getTypeSizeInBits(ExitCount->getType())) { 2367 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2368 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2369 else 2370 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2371 } 2372 2373 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2374 2375 if (UsePostInc) 2376 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2377 2378 // Expand the code for the iteration count. 2379 assert(SE->isLoopInvariant(IVLimit, L) && 2380 "Computed iteration count is not loop invariant!"); 2381 // Ensure that we generate the same type as IndVar, or a smaller integer 2382 // type. In the presence of null pointer values, we have an integer type 2383 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2384 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2385 IndVar->getType() : ExitCount->getType(); 2386 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2387 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2388 } 2389 } 2390 2391 /// This method rewrites the exit condition of the loop to be a canonical != 2392 /// comparison against the incremented loop induction variable. This pass is 2393 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2394 /// determine a loop-invariant trip count of the loop, which is actually a much 2395 /// broader range than just linear tests. 2396 bool IndVarSimplify:: 2397 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2398 const SCEV *ExitCount, 2399 PHINode *IndVar, SCEVExpander &Rewriter) { 2400 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2401 assert(isLoopCounter(IndVar, L, SE)); 2402 Instruction * const IncVar = 2403 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2404 2405 // Initialize CmpIndVar to the preincremented IV. 2406 Value *CmpIndVar = IndVar; 2407 bool UsePostInc = false; 2408 2409 // If the exiting block is the same as the backedge block, we prefer to 2410 // compare against the post-incremented value, otherwise we must compare 2411 // against the preincremented value. 2412 if (ExitingBB == L->getLoopLatch()) { 2413 // For pointer IVs, we chose to not strip inbounds which requires us not 2414 // to add a potentially UB introducing use. We need to either a) show 2415 // the loop test we're modifying is already in post-inc form, or b) show 2416 // that adding a use must not introduce UB. 2417 bool SafeToPostInc = 2418 IndVar->getType()->isIntegerTy() || 2419 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2420 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2421 if (SafeToPostInc) { 2422 UsePostInc = true; 2423 CmpIndVar = IncVar; 2424 } 2425 } 2426 2427 // It may be necessary to drop nowrap flags on the incrementing instruction 2428 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2429 // case the increment might have previously been poison on the last iteration 2430 // only) or if LFTR switches to a different IV that was previously dynamically 2431 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2432 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2433 // check), because the pre-inc addrec flags may be adopted from the original 2434 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2435 // TODO: This handling is inaccurate for one case: If we switch to a 2436 // dynamically dead IV that wraps on the first loop iteration only, which is 2437 // not covered by the post-inc addrec. (If the new IV was not dynamically 2438 // dead, it could not be poison on the first iteration in the first place.) 2439 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2440 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2441 if (BO->hasNoUnsignedWrap()) 2442 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2443 if (BO->hasNoSignedWrap()) 2444 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2445 } 2446 2447 Value *ExitCnt = genLoopLimit( 2448 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2449 assert(ExitCnt->getType()->isPointerTy() == 2450 IndVar->getType()->isPointerTy() && 2451 "genLoopLimit missed a cast"); 2452 2453 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2454 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2455 ICmpInst::Predicate P; 2456 if (L->contains(BI->getSuccessor(0))) 2457 P = ICmpInst::ICMP_NE; 2458 else 2459 P = ICmpInst::ICMP_EQ; 2460 2461 IRBuilder<> Builder(BI); 2462 2463 // The new loop exit condition should reuse the debug location of the 2464 // original loop exit condition. 2465 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2466 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2467 2468 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2469 // avoid the expensive expansion of the limit expression in the wider type, 2470 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2471 // since we know (from the exit count bitwidth), that we can't self-wrap in 2472 // the narrower type. 2473 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2474 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2475 if (CmpIndVarSize > ExitCntSize) { 2476 assert(!CmpIndVar->getType()->isPointerTy() && 2477 !ExitCnt->getType()->isPointerTy()); 2478 2479 // Before resorting to actually inserting the truncate, use the same 2480 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2481 // the other side of the comparison instead. We still evaluate the limit 2482 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2483 // a truncate within in. 2484 bool Extended = false; 2485 const SCEV *IV = SE->getSCEV(CmpIndVar); 2486 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2487 ExitCnt->getType()); 2488 const SCEV *ZExtTrunc = 2489 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2490 2491 if (ZExtTrunc == IV) { 2492 Extended = true; 2493 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2494 "wide.trip.count"); 2495 } else { 2496 const SCEV *SExtTrunc = 2497 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2498 if (SExtTrunc == IV) { 2499 Extended = true; 2500 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2501 "wide.trip.count"); 2502 } 2503 } 2504 2505 if (Extended) { 2506 bool Discard; 2507 L->makeLoopInvariant(ExitCnt, Discard); 2508 } else 2509 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2510 "lftr.wideiv"); 2511 } 2512 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2513 << " LHS:" << *CmpIndVar << '\n' 2514 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2515 << "\n" 2516 << " RHS:\t" << *ExitCnt << "\n" 2517 << "ExitCount:\t" << *ExitCount << "\n" 2518 << " was: " << *BI->getCondition() << "\n"); 2519 2520 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2521 Value *OrigCond = BI->getCondition(); 2522 // It's tempting to use replaceAllUsesWith here to fully replace the old 2523 // comparison, but that's not immediately safe, since users of the old 2524 // comparison may not be dominated by the new comparison. Instead, just 2525 // update the branch to use the new comparison; in the common case this 2526 // will make old comparison dead. 2527 BI->setCondition(Cond); 2528 DeadInsts.push_back(OrigCond); 2529 2530 ++NumLFTR; 2531 return true; 2532 } 2533 2534 //===----------------------------------------------------------------------===// 2535 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2536 //===----------------------------------------------------------------------===// 2537 2538 /// If there's a single exit block, sink any loop-invariant values that 2539 /// were defined in the preheader but not used inside the loop into the 2540 /// exit block to reduce register pressure in the loop. 2541 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2542 BasicBlock *ExitBlock = L->getExitBlock(); 2543 if (!ExitBlock) return false; 2544 2545 BasicBlock *Preheader = L->getLoopPreheader(); 2546 if (!Preheader) return false; 2547 2548 bool MadeAnyChanges = false; 2549 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2550 BasicBlock::iterator I(Preheader->getTerminator()); 2551 while (I != Preheader->begin()) { 2552 --I; 2553 // New instructions were inserted at the end of the preheader. 2554 if (isa<PHINode>(I)) 2555 break; 2556 2557 // Don't move instructions which might have side effects, since the side 2558 // effects need to complete before instructions inside the loop. Also don't 2559 // move instructions which might read memory, since the loop may modify 2560 // memory. Note that it's okay if the instruction might have undefined 2561 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2562 // block. 2563 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2564 continue; 2565 2566 // Skip debug info intrinsics. 2567 if (isa<DbgInfoIntrinsic>(I)) 2568 continue; 2569 2570 // Skip eh pad instructions. 2571 if (I->isEHPad()) 2572 continue; 2573 2574 // Don't sink alloca: we never want to sink static alloca's out of the 2575 // entry block, and correctly sinking dynamic alloca's requires 2576 // checks for stacksave/stackrestore intrinsics. 2577 // FIXME: Refactor this check somehow? 2578 if (isa<AllocaInst>(I)) 2579 continue; 2580 2581 // Determine if there is a use in or before the loop (direct or 2582 // otherwise). 2583 bool UsedInLoop = false; 2584 for (Use &U : I->uses()) { 2585 Instruction *User = cast<Instruction>(U.getUser()); 2586 BasicBlock *UseBB = User->getParent(); 2587 if (PHINode *P = dyn_cast<PHINode>(User)) { 2588 unsigned i = 2589 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2590 UseBB = P->getIncomingBlock(i); 2591 } 2592 if (UseBB == Preheader || L->contains(UseBB)) { 2593 UsedInLoop = true; 2594 break; 2595 } 2596 } 2597 2598 // If there is, the def must remain in the preheader. 2599 if (UsedInLoop) 2600 continue; 2601 2602 // Otherwise, sink it to the exit block. 2603 Instruction *ToMove = &*I; 2604 bool Done = false; 2605 2606 if (I != Preheader->begin()) { 2607 // Skip debug info intrinsics. 2608 do { 2609 --I; 2610 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2611 2612 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2613 Done = true; 2614 } else { 2615 Done = true; 2616 } 2617 2618 MadeAnyChanges = true; 2619 ToMove->moveBefore(*ExitBlock, InsertPt); 2620 if (Done) break; 2621 InsertPt = ToMove->getIterator(); 2622 } 2623 2624 return MadeAnyChanges; 2625 } 2626 2627 bool IndVarSimplify::optimizeLoopExits(Loop *L) { 2628 SmallVector<BasicBlock*, 16> ExitingBlocks; 2629 L->getExitingBlocks(ExitingBlocks); 2630 2631 // Form an expression for the maximum exit count possible for this loop. We 2632 // merge the max and exact information to approximate a version of 2633 // getMaxBackedgeTakenInfo which isn't restricted to just constants. 2634 // TODO: factor this out as a version of getMaxBackedgeTakenCount which 2635 // isn't guaranteed to return a constant. 2636 SmallVector<const SCEV*, 4> ExitCounts; 2637 const SCEV *MaxConstEC = SE->getMaxBackedgeTakenCount(L); 2638 if (!isa<SCEVCouldNotCompute>(MaxConstEC)) 2639 ExitCounts.push_back(MaxConstEC); 2640 for (BasicBlock *ExitingBB : ExitingBlocks) { 2641 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2642 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2643 assert(DT->dominates(ExitingBB, L->getLoopLatch()) && 2644 "We should only have known counts for exiting blocks that " 2645 "dominate latch!"); 2646 ExitCounts.push_back(ExitCount); 2647 } 2648 } 2649 if (ExitCounts.empty()) 2650 return false; 2651 const SCEV *MaxExitCount = SE->getUMinFromMismatchedTypes(ExitCounts); 2652 2653 bool Changed = false; 2654 for (BasicBlock *ExitingBB : ExitingBlocks) { 2655 // If our exitting block exits multiple loops, we can only rewrite the 2656 // innermost one. Otherwise, we're changing how many times the innermost 2657 // loop runs before it exits. 2658 if (LI->getLoopFor(ExitingBB) != L) 2659 continue; 2660 2661 // Can't rewrite non-branch yet. 2662 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2663 if (!BI) 2664 continue; 2665 2666 // If already constant, nothing to do. 2667 if (isa<Constant>(BI->getCondition())) 2668 continue; 2669 2670 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2671 if (isa<SCEVCouldNotCompute>(ExitCount)) 2672 continue; 2673 2674 // If we know we'd exit on the first iteration, rewrite the exit to 2675 // reflect this. This does not imply the loop must exit through this 2676 // exit; there may be an earlier one taken on the first iteration. 2677 // TODO: Given we know the backedge can't be taken, we should go ahead 2678 // and break it. Or at least, kill all the header phis and simplify. 2679 if (ExitCount->isZero()) { 2680 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2681 auto *OldCond = BI->getCondition(); 2682 auto *NewCond = ExitIfTrue ? ConstantInt::getTrue(OldCond->getType()) : 2683 ConstantInt::getFalse(OldCond->getType()); 2684 BI->setCondition(NewCond); 2685 if (OldCond->use_empty()) 2686 DeadInsts.push_back(OldCond); 2687 Changed = true; 2688 continue; 2689 } 2690 2691 // If we end up with a pointer exit count, bail. 2692 if (!ExitCount->getType()->isIntegerTy() || 2693 !MaxExitCount->getType()->isIntegerTy()) 2694 return false; 2695 2696 Type *WiderType = 2697 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2698 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2699 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2700 assert(MaxExitCount->getType() == ExitCount->getType()); 2701 2702 // Can we prove that some other exit must be taken strictly before this 2703 // one? TODO: handle cases where ule is known, and equality is covered 2704 // by a dominating exit 2705 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2706 MaxExitCount, ExitCount)) { 2707 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2708 auto *OldCond = BI->getCondition(); 2709 auto *NewCond = ExitIfTrue ? ConstantInt::getFalse(OldCond->getType()) : 2710 ConstantInt::getTrue(OldCond->getType()); 2711 BI->setCondition(NewCond); 2712 if (OldCond->use_empty()) 2713 DeadInsts.push_back(OldCond); 2714 Changed = true; 2715 continue; 2716 } 2717 2718 // TODO: If we can prove that the exiting iteration is equal to the exit 2719 // count for this exit and that no previous exit oppurtunities exist within 2720 // the loop, then we can discharge all other exits. (May fall out of 2721 // previous TODO.) 2722 2723 // TODO: If we can't prove any relation between our exit count and the 2724 // loops exit count, but taking this exit doesn't require actually running 2725 // the loop (i.e. no side effects, no computed values used in exit), then 2726 // we can replace the exit test with a loop invariant test which exits on 2727 // the first iteration. 2728 } 2729 return Changed; 2730 } 2731 2732 //===----------------------------------------------------------------------===// 2733 // IndVarSimplify driver. Manage several subpasses of IV simplification. 2734 //===----------------------------------------------------------------------===// 2735 2736 bool IndVarSimplify::run(Loop *L) { 2737 // We need (and expect!) the incoming loop to be in LCSSA. 2738 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2739 "LCSSA required to run indvars!"); 2740 bool Changed = false; 2741 2742 // If LoopSimplify form is not available, stay out of trouble. Some notes: 2743 // - LSR currently only supports LoopSimplify-form loops. Indvars' 2744 // canonicalization can be a pessimization without LSR to "clean up" 2745 // afterwards. 2746 // - We depend on having a preheader; in particular, 2747 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 2748 // and we're in trouble if we can't find the induction variable even when 2749 // we've manually inserted one. 2750 // - LFTR relies on having a single backedge. 2751 if (!L->isLoopSimplifyForm()) 2752 return false; 2753 2754 // If there are any floating-point recurrences, attempt to 2755 // transform them to use integer recurrences. 2756 Changed |= rewriteNonIntegerIVs(L); 2757 2758 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 2759 2760 // Create a rewriter object which we'll use to transform the code with. 2761 SCEVExpander Rewriter(*SE, DL, "indvars"); 2762 #ifndef NDEBUG 2763 Rewriter.setDebugType(DEBUG_TYPE); 2764 #endif 2765 2766 // Eliminate redundant IV users. 2767 // 2768 // Simplification works best when run before other consumers of SCEV. We 2769 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 2770 // other expressions involving loop IVs have been evaluated. This helps SCEV 2771 // set no-wrap flags before normalizing sign/zero extension. 2772 Rewriter.disableCanonicalMode(); 2773 Changed |= simplifyAndExtend(L, Rewriter, LI); 2774 2775 // Check to see if this loop has a computable loop-invariant execution count. 2776 // If so, this means that we can compute the final value of any expressions 2777 // that are recurrent in the loop, and substitute the exit values from the 2778 // loop into any instructions outside of the loop that use the final values of 2779 // the current expressions. 2780 // 2781 if (ReplaceExitValue != NeverRepl && 2782 !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 2783 Changed |= rewriteLoopExitValues(L, Rewriter); 2784 2785 // Eliminate redundant IV cycles. 2786 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 2787 2788 Changed |= optimizeLoopExits(L); 2789 2790 // If we have a trip count expression, rewrite the loop's exit condition 2791 // using it. 2792 if (!DisableLFTR) { 2793 SmallVector<BasicBlock*, 16> ExitingBlocks; 2794 L->getExitingBlocks(ExitingBlocks); 2795 for (BasicBlock *ExitingBB : ExitingBlocks) { 2796 // Can't rewrite non-branch yet. 2797 if (!isa<BranchInst>(ExitingBB->getTerminator())) 2798 continue; 2799 2800 // If our exitting block exits multiple loops, we can only rewrite the 2801 // innermost one. Otherwise, we're changing how many times the innermost 2802 // loop runs before it exits. 2803 if (LI->getLoopFor(ExitingBB) != L) 2804 continue; 2805 2806 if (!needsLFTR(L, ExitingBB)) 2807 continue; 2808 2809 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2810 if (isa<SCEVCouldNotCompute>(ExitCount)) 2811 continue; 2812 2813 // This was handled above, but as we form SCEVs, we can sometimes refine 2814 // existing ones; this allows exit counts to be folded to zero which 2815 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 2816 // until stable to handle cases like this better. 2817 if (ExitCount->isZero()) 2818 continue; 2819 2820 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 2821 if (!IndVar) 2822 continue; 2823 2824 // Avoid high cost expansions. Note: This heuristic is questionable in 2825 // that our definition of "high cost" is not exactly principled. 2826 if (Rewriter.isHighCostExpansion(ExitCount, L)) 2827 continue; 2828 2829 // Check preconditions for proper SCEVExpander operation. SCEV does not 2830 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 2831 // any pass that uses the SCEVExpander must do it. This does not work 2832 // well for loop passes because SCEVExpander makes assumptions about 2833 // all loops, while LoopPassManager only forces the current loop to be 2834 // simplified. 2835 // 2836 // FIXME: SCEV expansion has no way to bail out, so the caller must 2837 // explicitly check any assumptions made by SCEV. Brittle. 2838 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 2839 if (!AR || AR->getLoop()->getLoopPreheader()) 2840 Changed |= linearFunctionTestReplace(L, ExitingBB, 2841 ExitCount, IndVar, 2842 Rewriter); 2843 } 2844 } 2845 // Clear the rewriter cache, because values that are in the rewriter's cache 2846 // can be deleted in the loop below, causing the AssertingVH in the cache to 2847 // trigger. 2848 Rewriter.clear(); 2849 2850 // Now that we're done iterating through lists, clean up any instructions 2851 // which are now dead. 2852 while (!DeadInsts.empty()) 2853 if (Instruction *Inst = 2854 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 2855 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 2856 2857 // The Rewriter may not be used from this point on. 2858 2859 // Loop-invariant instructions in the preheader that aren't used in the 2860 // loop may be sunk below the loop to reduce register pressure. 2861 Changed |= sinkUnusedInvariants(L); 2862 2863 // rewriteFirstIterationLoopExitValues does not rely on the computation of 2864 // trip count and therefore can further simplify exit values in addition to 2865 // rewriteLoopExitValues. 2866 Changed |= rewriteFirstIterationLoopExitValues(L); 2867 2868 // Clean up dead instructions. 2869 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 2870 2871 // Check a post-condition. 2872 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 2873 "Indvars did not preserve LCSSA!"); 2874 2875 // Verify that LFTR, and any other change have not interfered with SCEV's 2876 // ability to compute trip count. 2877 #ifndef NDEBUG 2878 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 2879 SE->forgetLoop(L); 2880 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 2881 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 2882 SE->getTypeSizeInBits(NewBECount->getType())) 2883 NewBECount = SE->getTruncateOrNoop(NewBECount, 2884 BackedgeTakenCount->getType()); 2885 else 2886 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 2887 NewBECount->getType()); 2888 assert(BackedgeTakenCount == NewBECount && "indvars must preserve SCEV"); 2889 } 2890 #endif 2891 2892 return Changed; 2893 } 2894 2895 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 2896 LoopStandardAnalysisResults &AR, 2897 LPMUpdater &) { 2898 Function *F = L.getHeader()->getParent(); 2899 const DataLayout &DL = F->getParent()->getDataLayout(); 2900 2901 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 2902 if (!IVS.run(&L)) 2903 return PreservedAnalyses::all(); 2904 2905 auto PA = getLoopPassPreservedAnalyses(); 2906 PA.preserveSet<CFGAnalyses>(); 2907 return PA; 2908 } 2909 2910 namespace { 2911 2912 struct IndVarSimplifyLegacyPass : public LoopPass { 2913 static char ID; // Pass identification, replacement for typeid 2914 2915 IndVarSimplifyLegacyPass() : LoopPass(ID) { 2916 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 2917 } 2918 2919 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 2920 if (skipLoop(L)) 2921 return false; 2922 2923 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 2924 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 2925 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2926 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 2927 auto *TLI = TLIP ? &TLIP->getTLI() : nullptr; 2928 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 2929 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 2930 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2931 2932 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 2933 return IVS.run(L); 2934 } 2935 2936 void getAnalysisUsage(AnalysisUsage &AU) const override { 2937 AU.setPreservesCFG(); 2938 getLoopAnalysisUsage(AU); 2939 } 2940 }; 2941 2942 } // end anonymous namespace 2943 2944 char IndVarSimplifyLegacyPass::ID = 0; 2945 2946 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 2947 "Induction Variable Simplification", false, false) 2948 INITIALIZE_PASS_DEPENDENCY(LoopPass) 2949 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 2950 "Induction Variable Simplification", false, false) 2951 2952 Pass *llvm::createIndVarSimplifyPass() { 2953 return new IndVarSimplifyLegacyPass(); 2954 } 2955