1 //===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This transformation analyzes and transforms the induction variables (and 10 // computations derived from them) into simpler forms suitable for subsequent 11 // analysis and transformation. 12 // 13 // If the trip count of a loop is computable, this pass also makes the following 14 // changes: 15 // 1. The exit condition for the loop is canonicalized to compare the 16 // induction value against the exit value. This turns loops like: 17 // 'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)' 18 // 2. Any use outside of the loop of an expression derived from the indvar 19 // is changed to compute the derived value outside of the loop, eliminating 20 // the dependence on the exit value of the induction variable. If the only 21 // purpose of the loop is to compute the exit value of some derived 22 // expression, this transformation will make the loop dead. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "llvm/Transforms/Scalar/IndVarSimplify.h" 27 #include "llvm/ADT/APFloat.h" 28 #include "llvm/ADT/APInt.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/SmallPtrSet.h" 35 #include "llvm/ADT/SmallSet.h" 36 #include "llvm/ADT/SmallVector.h" 37 #include "llvm/ADT/Statistic.h" 38 #include "llvm/ADT/iterator_range.h" 39 #include "llvm/Analysis/LoopInfo.h" 40 #include "llvm/Analysis/LoopPass.h" 41 #include "llvm/Analysis/ScalarEvolution.h" 42 #include "llvm/Analysis/ScalarEvolutionExpander.h" 43 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 44 #include "llvm/Analysis/TargetLibraryInfo.h" 45 #include "llvm/Analysis/TargetTransformInfo.h" 46 #include "llvm/Analysis/ValueTracking.h" 47 #include "llvm/IR/BasicBlock.h" 48 #include "llvm/IR/Constant.h" 49 #include "llvm/IR/ConstantRange.h" 50 #include "llvm/IR/Constants.h" 51 #include "llvm/IR/DataLayout.h" 52 #include "llvm/IR/DerivedTypes.h" 53 #include "llvm/IR/Dominators.h" 54 #include "llvm/IR/Function.h" 55 #include "llvm/IR/IRBuilder.h" 56 #include "llvm/IR/InstrTypes.h" 57 #include "llvm/IR/Instruction.h" 58 #include "llvm/IR/Instructions.h" 59 #include "llvm/IR/IntrinsicInst.h" 60 #include "llvm/IR/Intrinsics.h" 61 #include "llvm/IR/Module.h" 62 #include "llvm/IR/Operator.h" 63 #include "llvm/IR/PassManager.h" 64 #include "llvm/IR/PatternMatch.h" 65 #include "llvm/IR/Type.h" 66 #include "llvm/IR/Use.h" 67 #include "llvm/IR/User.h" 68 #include "llvm/IR/Value.h" 69 #include "llvm/IR/ValueHandle.h" 70 #include "llvm/InitializePasses.h" 71 #include "llvm/Pass.h" 72 #include "llvm/Support/Casting.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Compiler.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MathExtras.h" 78 #include "llvm/Support/raw_ostream.h" 79 #include "llvm/Transforms/Scalar.h" 80 #include "llvm/Transforms/Scalar/LoopPassManager.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/Local.h" 83 #include "llvm/Transforms/Utils/LoopUtils.h" 84 #include "llvm/Transforms/Utils/SimplifyIndVar.h" 85 #include <cassert> 86 #include <cstdint> 87 #include <utility> 88 89 using namespace llvm; 90 91 #define DEBUG_TYPE "indvars" 92 93 STATISTIC(NumWidened , "Number of indvars widened"); 94 STATISTIC(NumReplaced , "Number of exit values replaced"); 95 STATISTIC(NumLFTR , "Number of loop exit tests replaced"); 96 STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); 97 STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); 98 99 // Trip count verification can be enabled by default under NDEBUG if we 100 // implement a strong expression equivalence checker in SCEV. Until then, we 101 // use the verify-indvars flag, which may assert in some cases. 102 static cl::opt<bool> VerifyIndvars( 103 "verify-indvars", cl::Hidden, 104 cl::desc("Verify the ScalarEvolution result after running indvars")); 105 106 enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; 107 108 static cl::opt<ReplaceExitVal> ReplaceExitValue( 109 "replexitval", cl::Hidden, cl::init(OnlyCheapRepl), 110 cl::desc("Choose the strategy to replace exit value in IndVarSimplify"), 111 cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"), 112 clEnumValN(OnlyCheapRepl, "cheap", 113 "only replace exit value when the cost is cheap"), 114 clEnumValN(NoHardUse, "noharduse", 115 "only replace exit values when loop def likely dead"), 116 clEnumValN(AlwaysRepl, "always", 117 "always replace exit value whenever possible"))); 118 119 static cl::opt<bool> UsePostIncrementRanges( 120 "indvars-post-increment-ranges", cl::Hidden, 121 cl::desc("Use post increment control-dependent ranges in IndVarSimplify"), 122 cl::init(true)); 123 124 static cl::opt<bool> 125 DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), 126 cl::desc("Disable Linear Function Test Replace optimization")); 127 128 static cl::opt<bool> 129 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true), 130 cl::desc("Predicate conditions in read only loops")); 131 132 namespace { 133 134 struct RewritePhi; 135 136 class IndVarSimplify { 137 LoopInfo *LI; 138 ScalarEvolution *SE; 139 DominatorTree *DT; 140 const DataLayout &DL; 141 TargetLibraryInfo *TLI; 142 const TargetTransformInfo *TTI; 143 144 SmallVector<WeakTrackingVH, 16> DeadInsts; 145 146 bool isValidRewrite(Value *FromVal, Value *ToVal); 147 148 bool handleFloatingPointIV(Loop *L, PHINode *PH); 149 bool rewriteNonIntegerIVs(Loop *L); 150 151 bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); 152 /// Try to eliminate loop exits based on analyzeable exit counts 153 bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter); 154 /// Try to form loop invariant tests for loop exits by changing how many 155 /// iterations of the loop run when that is unobservable. 156 bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 157 158 bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet); 159 bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); 160 bool rewriteFirstIterationLoopExitValues(Loop *L); 161 bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; 162 163 bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 164 const SCEV *ExitCount, 165 PHINode *IndVar, SCEVExpander &Rewriter); 166 167 bool sinkUnusedInvariants(Loop *L); 168 169 public: 170 IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, 171 const DataLayout &DL, TargetLibraryInfo *TLI, 172 TargetTransformInfo *TTI) 173 : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {} 174 175 bool run(Loop *L); 176 }; 177 178 } // end anonymous namespace 179 180 /// Return true if the SCEV expansion generated by the rewriter can replace the 181 /// original value. SCEV guarantees that it produces the same value, but the way 182 /// it is produced may be illegal IR. Ideally, this function will only be 183 /// called for verification. 184 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { 185 // If an SCEV expression subsumed multiple pointers, its expansion could 186 // reassociate the GEP changing the base pointer. This is illegal because the 187 // final address produced by a GEP chain must be inbounds relative to its 188 // underlying object. Otherwise basic alias analysis, among other things, 189 // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid 190 // producing an expression involving multiple pointers. Until then, we must 191 // bail out here. 192 // 193 // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject 194 // because it understands lcssa phis while SCEV does not. 195 Value *FromPtr = FromVal; 196 Value *ToPtr = ToVal; 197 if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) { 198 FromPtr = GEP->getPointerOperand(); 199 } 200 if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) { 201 ToPtr = GEP->getPointerOperand(); 202 } 203 if (FromPtr != FromVal || ToPtr != ToVal) { 204 // Quickly check the common case 205 if (FromPtr == ToPtr) 206 return true; 207 208 // SCEV may have rewritten an expression that produces the GEP's pointer 209 // operand. That's ok as long as the pointer operand has the same base 210 // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the 211 // base of a recurrence. This handles the case in which SCEV expansion 212 // converts a pointer type recurrence into a nonrecurrent pointer base 213 // indexed by an integer recurrence. 214 215 // If the GEP base pointer is a vector of pointers, abort. 216 if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) 217 return false; 218 219 const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); 220 const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); 221 if (FromBase == ToBase) 222 return true; 223 224 LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase 225 << " != " << *ToBase << "\n"); 226 227 return false; 228 } 229 return true; 230 } 231 232 /// Determine the insertion point for this user. By default, insert immediately 233 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the 234 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest 235 /// common dominator for the incoming blocks. A nullptr can be returned if no 236 /// viable location is found: it may happen if User is a PHI and Def only comes 237 /// to this PHI from unreachable blocks. 238 static Instruction *getInsertPointForUses(Instruction *User, Value *Def, 239 DominatorTree *DT, LoopInfo *LI) { 240 PHINode *PHI = dyn_cast<PHINode>(User); 241 if (!PHI) 242 return User; 243 244 Instruction *InsertPt = nullptr; 245 for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { 246 if (PHI->getIncomingValue(i) != Def) 247 continue; 248 249 BasicBlock *InsertBB = PHI->getIncomingBlock(i); 250 251 if (!DT->isReachableFromEntry(InsertBB)) 252 continue; 253 254 if (!InsertPt) { 255 InsertPt = InsertBB->getTerminator(); 256 continue; 257 } 258 InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB); 259 InsertPt = InsertBB->getTerminator(); 260 } 261 262 // If we have skipped all inputs, it means that Def only comes to Phi from 263 // unreachable blocks. 264 if (!InsertPt) 265 return nullptr; 266 267 auto *DefI = dyn_cast<Instruction>(Def); 268 if (!DefI) 269 return InsertPt; 270 271 assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); 272 273 auto *L = LI->getLoopFor(DefI->getParent()); 274 assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); 275 276 for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) 277 if (LI->getLoopFor(DTN->getBlock()) == L) 278 return DTN->getBlock()->getTerminator(); 279 280 llvm_unreachable("DefI dominates InsertPt!"); 281 } 282 283 //===----------------------------------------------------------------------===// 284 // rewriteNonIntegerIVs and helpers. Prefer integer IVs. 285 //===----------------------------------------------------------------------===// 286 287 /// Convert APF to an integer, if possible. 288 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { 289 bool isExact = false; 290 // See if we can convert this to an int64_t 291 uint64_t UIntVal; 292 if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true, 293 APFloat::rmTowardZero, &isExact) != APFloat::opOK || 294 !isExact) 295 return false; 296 IntVal = UIntVal; 297 return true; 298 } 299 300 /// If the loop has floating induction variable then insert corresponding 301 /// integer induction variable if possible. 302 /// For example, 303 /// for(double i = 0; i < 10000; ++i) 304 /// bar(i) 305 /// is converted into 306 /// for(int i = 0; i < 10000; ++i) 307 /// bar((double)i); 308 bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { 309 unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); 310 unsigned BackEdge = IncomingEdge^1; 311 312 // Check incoming value. 313 auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); 314 315 int64_t InitValue; 316 if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) 317 return false; 318 319 // Check IV increment. Reject this PN if increment operation is not 320 // an add or increment value can not be represented by an integer. 321 auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); 322 if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false; 323 324 // If this is not an add of the PHI with a constantfp, or if the constant fp 325 // is not an integer, bail out. 326 ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); 327 int64_t IncValue; 328 if (IncValueVal == nullptr || Incr->getOperand(0) != PN || 329 !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) 330 return false; 331 332 // Check Incr uses. One user is PN and the other user is an exit condition 333 // used by the conditional terminator. 334 Value::user_iterator IncrUse = Incr->user_begin(); 335 Instruction *U1 = cast<Instruction>(*IncrUse++); 336 if (IncrUse == Incr->user_end()) return false; 337 Instruction *U2 = cast<Instruction>(*IncrUse++); 338 if (IncrUse != Incr->user_end()) return false; 339 340 // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't 341 // only used by a branch, we can't transform it. 342 FCmpInst *Compare = dyn_cast<FCmpInst>(U1); 343 if (!Compare) 344 Compare = dyn_cast<FCmpInst>(U2); 345 if (!Compare || !Compare->hasOneUse() || 346 !isa<BranchInst>(Compare->user_back())) 347 return false; 348 349 BranchInst *TheBr = cast<BranchInst>(Compare->user_back()); 350 351 // We need to verify that the branch actually controls the iteration count 352 // of the loop. If not, the new IV can overflow and no one will notice. 353 // The branch block must be in the loop and one of the successors must be out 354 // of the loop. 355 assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); 356 if (!L->contains(TheBr->getParent()) || 357 (L->contains(TheBr->getSuccessor(0)) && 358 L->contains(TheBr->getSuccessor(1)))) 359 return false; 360 361 // If it isn't a comparison with an integer-as-fp (the exit value), we can't 362 // transform it. 363 ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); 364 int64_t ExitValue; 365 if (ExitValueVal == nullptr || 366 !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) 367 return false; 368 369 // Find new predicate for integer comparison. 370 CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; 371 switch (Compare->getPredicate()) { 372 default: return false; // Unknown comparison. 373 case CmpInst::FCMP_OEQ: 374 case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; 375 case CmpInst::FCMP_ONE: 376 case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; 377 case CmpInst::FCMP_OGT: 378 case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; 379 case CmpInst::FCMP_OGE: 380 case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; 381 case CmpInst::FCMP_OLT: 382 case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; 383 case CmpInst::FCMP_OLE: 384 case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; 385 } 386 387 // We convert the floating point induction variable to a signed i32 value if 388 // we can. This is only safe if the comparison will not overflow in a way 389 // that won't be trapped by the integer equivalent operations. Check for this 390 // now. 391 // TODO: We could use i64 if it is native and the range requires it. 392 393 // The start/stride/exit values must all fit in signed i32. 394 if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) 395 return false; 396 397 // If not actually striding (add x, 0.0), avoid touching the code. 398 if (IncValue == 0) 399 return false; 400 401 // Positive and negative strides have different safety conditions. 402 if (IncValue > 0) { 403 // If we have a positive stride, we require the init to be less than the 404 // exit value. 405 if (InitValue >= ExitValue) 406 return false; 407 408 uint32_t Range = uint32_t(ExitValue-InitValue); 409 // Check for infinite loop, either: 410 // while (i <= Exit) or until (i > Exit) 411 if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) { 412 if (++Range == 0) return false; // Range overflows. 413 } 414 415 unsigned Leftover = Range % uint32_t(IncValue); 416 417 // If this is an equality comparison, we require that the strided value 418 // exactly land on the exit value, otherwise the IV condition will wrap 419 // around and do things the fp IV wouldn't. 420 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 421 Leftover != 0) 422 return false; 423 424 // If the stride would wrap around the i32 before exiting, we can't 425 // transform the IV. 426 if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) 427 return false; 428 } else { 429 // If we have a negative stride, we require the init to be greater than the 430 // exit value. 431 if (InitValue <= ExitValue) 432 return false; 433 434 uint32_t Range = uint32_t(InitValue-ExitValue); 435 // Check for infinite loop, either: 436 // while (i >= Exit) or until (i < Exit) 437 if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) { 438 if (++Range == 0) return false; // Range overflows. 439 } 440 441 unsigned Leftover = Range % uint32_t(-IncValue); 442 443 // If this is an equality comparison, we require that the strided value 444 // exactly land on the exit value, otherwise the IV condition will wrap 445 // around and do things the fp IV wouldn't. 446 if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && 447 Leftover != 0) 448 return false; 449 450 // If the stride would wrap around the i32 before exiting, we can't 451 // transform the IV. 452 if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) 453 return false; 454 } 455 456 IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); 457 458 // Insert new integer induction variable. 459 PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); 460 NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), 461 PN->getIncomingBlock(IncomingEdge)); 462 463 Value *NewAdd = 464 BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), 465 Incr->getName()+".int", Incr); 466 NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); 467 468 ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, 469 ConstantInt::get(Int32Ty, ExitValue), 470 Compare->getName()); 471 472 // In the following deletions, PN may become dead and may be deleted. 473 // Use a WeakTrackingVH to observe whether this happens. 474 WeakTrackingVH WeakPH = PN; 475 476 // Delete the old floating point exit comparison. The branch starts using the 477 // new comparison. 478 NewCompare->takeName(Compare); 479 Compare->replaceAllUsesWith(NewCompare); 480 RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI); 481 482 // Delete the old floating point increment. 483 Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); 484 RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI); 485 486 // If the FP induction variable still has uses, this is because something else 487 // in the loop uses its value. In order to canonicalize the induction 488 // variable, we chose to eliminate the IV and rewrite it in terms of an 489 // int->fp cast. 490 // 491 // We give preference to sitofp over uitofp because it is faster on most 492 // platforms. 493 if (WeakPH) { 494 Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", 495 &*PN->getParent()->getFirstInsertionPt()); 496 PN->replaceAllUsesWith(Conv); 497 RecursivelyDeleteTriviallyDeadInstructions(PN, TLI); 498 } 499 return true; 500 } 501 502 bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { 503 // First step. Check to see if there are any floating-point recurrences. 504 // If there are, change them into integer recurrences, permitting analysis by 505 // the SCEV routines. 506 BasicBlock *Header = L->getHeader(); 507 508 SmallVector<WeakTrackingVH, 8> PHIs; 509 for (PHINode &PN : Header->phis()) 510 PHIs.push_back(&PN); 511 512 bool Changed = false; 513 for (unsigned i = 0, e = PHIs.size(); i != e; ++i) 514 if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) 515 Changed |= handleFloatingPointIV(L, PN); 516 517 // If the loop previously had floating-point IV, ScalarEvolution 518 // may not have been able to compute a trip count. Now that we've done some 519 // re-writing, the trip count may be computable. 520 if (Changed) 521 SE->forgetLoop(L); 522 return Changed; 523 } 524 525 namespace { 526 527 // Collect information about PHI nodes which can be transformed in 528 // rewriteLoopExitValues. 529 struct RewritePhi { 530 PHINode *PN; // For which PHI node is this replacement? 531 unsigned Ith; // For which incoming value? 532 const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting. 533 Instruction *ExpansionPoint; // Where we'd like to expand that SCEV? 534 bool HighCost; // Is this expansion a high-cost? 535 536 Value *Expansion = nullptr; 537 bool ValidRewrite = false; 538 539 RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt, 540 bool H) 541 : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt), 542 HighCost(H) {} 543 }; 544 545 } // end anonymous namespace 546 547 //===----------------------------------------------------------------------===// 548 // rewriteLoopExitValues - Optimize IV users outside the loop. 549 // As a side effect, reduces the amount of IV processing within the loop. 550 //===----------------------------------------------------------------------===// 551 552 bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { 553 SmallPtrSet<const Instruction *, 8> Visited; 554 SmallVector<const Instruction *, 8> WorkList; 555 Visited.insert(I); 556 WorkList.push_back(I); 557 while (!WorkList.empty()) { 558 const Instruction *Curr = WorkList.pop_back_val(); 559 // This use is outside the loop, nothing to do. 560 if (!L->contains(Curr)) 561 continue; 562 // Do we assume it is a "hard" use which will not be eliminated easily? 563 if (Curr->mayHaveSideEffects()) 564 return true; 565 // Otherwise, add all its users to worklist. 566 for (auto U : Curr->users()) { 567 auto *UI = cast<Instruction>(U); 568 if (Visited.insert(UI).second) 569 WorkList.push_back(UI); 570 } 571 } 572 return false; 573 } 574 575 /// Check to see if this loop has a computable loop-invariant execution count. 576 /// If so, this means that we can compute the final value of any expressions 577 /// that are recurrent in the loop, and substitute the exit values from the loop 578 /// into any instructions outside of the loop that use the final values of the 579 /// current expressions. 580 /// 581 /// This is mostly redundant with the regular IndVarSimplify activities that 582 /// happen later, except that it's more powerful in some cases, because it's 583 /// able to brute-force evaluate arbitrary instructions as long as they have 584 /// constant operands at the beginning of the loop. 585 bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { 586 // Check a pre-condition. 587 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 588 "Indvars did not preserve LCSSA!"); 589 590 SmallVector<BasicBlock*, 8> ExitBlocks; 591 L->getUniqueExitBlocks(ExitBlocks); 592 593 SmallVector<RewritePhi, 8> RewritePhiSet; 594 // Find all values that are computed inside the loop, but used outside of it. 595 // Because of LCSSA, these values will only occur in LCSSA PHI Nodes. Scan 596 // the exit blocks of the loop to find them. 597 for (BasicBlock *ExitBB : ExitBlocks) { 598 // If there are no PHI nodes in this exit block, then no values defined 599 // inside the loop are used on this path, skip it. 600 PHINode *PN = dyn_cast<PHINode>(ExitBB->begin()); 601 if (!PN) continue; 602 603 unsigned NumPreds = PN->getNumIncomingValues(); 604 605 // Iterate over all of the PHI nodes. 606 BasicBlock::iterator BBI = ExitBB->begin(); 607 while ((PN = dyn_cast<PHINode>(BBI++))) { 608 if (PN->use_empty()) 609 continue; // dead use, don't replace it 610 611 if (!SE->isSCEVable(PN->getType())) 612 continue; 613 614 // It's necessary to tell ScalarEvolution about this explicitly so that 615 // it can walk the def-use list and forget all SCEVs, as it may not be 616 // watching the PHI itself. Once the new exit value is in place, there 617 // may not be a def-use connection between the loop and every instruction 618 // which got a SCEVAddRecExpr for that loop. 619 SE->forgetValue(PN); 620 621 // Iterate over all of the values in all the PHI nodes. 622 for (unsigned i = 0; i != NumPreds; ++i) { 623 // If the value being merged in is not integer or is not defined 624 // in the loop, skip it. 625 Value *InVal = PN->getIncomingValue(i); 626 if (!isa<Instruction>(InVal)) 627 continue; 628 629 // If this pred is for a subloop, not L itself, skip it. 630 if (LI->getLoopFor(PN->getIncomingBlock(i)) != L) 631 continue; // The Block is in a subloop, skip it. 632 633 // Check that InVal is defined in the loop. 634 Instruction *Inst = cast<Instruction>(InVal); 635 if (!L->contains(Inst)) 636 continue; 637 638 // Okay, this instruction has a user outside of the current loop 639 // and varies predictably *inside* the loop. Evaluate the value it 640 // contains when the loop exits, if possible. We prefer to start with 641 // expressions which are true for all exits (so as to maximize 642 // expression reuse by the SCEVExpander), but resort to per-exit 643 // evaluation if that fails. 644 const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); 645 if (isa<SCEVCouldNotCompute>(ExitValue) || 646 !SE->isLoopInvariant(ExitValue, L) || 647 !isSafeToExpand(ExitValue, *SE)) { 648 // TODO: This should probably be sunk into SCEV in some way; maybe a 649 // getSCEVForExit(SCEV*, L, ExitingBB)? It can be generalized for 650 // most SCEV expressions and other recurrence types (e.g. shift 651 // recurrences). Is there existing code we can reuse? 652 const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i)); 653 if (isa<SCEVCouldNotCompute>(ExitCount)) 654 continue; 655 if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst))) 656 if (AddRec->getLoop() == L) 657 ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE); 658 if (isa<SCEVCouldNotCompute>(ExitValue) || 659 !SE->isLoopInvariant(ExitValue, L) || 660 !isSafeToExpand(ExitValue, *SE)) 661 continue; 662 } 663 664 // Computing the value outside of the loop brings no benefit if it is 665 // definitely used inside the loop in a way which can not be optimized 666 // away. Avoid doing so unless we know we have a value which computes 667 // the ExitValue already. TODO: This should be merged into SCEV 668 // expander to leverage its knowledge of existing expressions. 669 if (ReplaceExitValue != AlwaysRepl && 670 !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) && 671 hasHardUserWithinLoop(L, Inst)) 672 continue; 673 674 // Check if expansions of this SCEV would count as being high cost. 675 bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); 676 677 // Note that we must not perform expansions until after 678 // we query *all* the costs, because if we perform temporary expansion 679 // inbetween, one that we might not intend to keep, said expansion 680 // *may* affect cost calculation of the the next SCEV's we'll query, 681 // and next SCEV may errneously get smaller cost. 682 683 // Collect all the candidate PHINodes to be rewritten. 684 RewritePhiSet.emplace_back(PN, i, ExitValue, Inst, HighCost); 685 } 686 } 687 } 688 689 // Now that we've done preliminary filtering and billed all the SCEV's, 690 // we can perform the last sanity check - the expansion must be valid. 691 for (RewritePhi &Phi : RewritePhiSet) { 692 Phi.Expansion = Rewriter.expandCodeFor(Phi.ExpansionSCEV, Phi.PN->getType(), 693 Phi.ExpansionPoint); 694 695 LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " 696 << *(Phi.Expansion) << '\n' 697 << " LoopVal = " << *(Phi.ExpansionPoint) << "\n"); 698 699 // FIXME: isValidRewrite() is a hack. it should be an assert, eventually. 700 Phi.ValidRewrite = isValidRewrite(Phi.ExpansionPoint, Phi.Expansion); 701 if (!Phi.ValidRewrite) { 702 DeadInsts.push_back(Phi.Expansion); 703 continue; 704 } 705 706 #ifndef NDEBUG 707 // If we reuse an instruction from a loop which is neither L nor one of 708 // its containing loops, we end up breaking LCSSA form for this loop by 709 // creating a new use of its instruction. 710 if (auto *ExitInsn = dyn_cast<Instruction>(Phi.Expansion)) 711 if (auto *EVL = LI->getLoopFor(ExitInsn->getParent())) 712 if (EVL != L) 713 assert(EVL->contains(L) && "LCSSA breach detected!"); 714 #endif 715 } 716 717 // TODO: after isValidRewrite() is an assertion, evaluate whether 718 // it is beneficial to change how we calculate high-cost: 719 // if we have SCEV 'A' which we know we will expand, should we calculate 720 // the cost of other SCEV's after expanding SCEV 'A', 721 // thus potentially giving cost bonus to those other SCEV's? 722 723 bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet); 724 725 bool Changed = false; 726 // Transformation. 727 for (const RewritePhi &Phi : RewritePhiSet) { 728 if (!Phi.ValidRewrite) 729 continue; 730 731 PHINode *PN = Phi.PN; 732 Value *ExitVal = Phi.Expansion; 733 734 // Only do the rewrite when the ExitValue can be expanded cheaply. 735 // If LoopCanBeDel is true, rewrite exit value aggressively. 736 if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) { 737 DeadInsts.push_back(ExitVal); 738 continue; 739 } 740 741 Changed = true; 742 ++NumReplaced; 743 Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith)); 744 PN->setIncomingValue(Phi.Ith, ExitVal); 745 746 // If this instruction is dead now, delete it. Don't do it now to avoid 747 // invalidating iterators. 748 if (isInstructionTriviallyDead(Inst, TLI)) 749 DeadInsts.push_back(Inst); 750 751 // Replace PN with ExitVal if that is legal and does not break LCSSA. 752 if (PN->getNumIncomingValues() == 1 && 753 LI->replacementPreservesLCSSAForm(PN, ExitVal)) { 754 PN->replaceAllUsesWith(ExitVal); 755 PN->eraseFromParent(); 756 } 757 } 758 759 // The insertion point instruction may have been deleted; clear it out 760 // so that the rewriter doesn't trip over it later. 761 Rewriter.clearInsertPoint(); 762 return Changed; 763 } 764 765 //===---------------------------------------------------------------------===// 766 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know 767 // they will exit at the first iteration. 768 //===---------------------------------------------------------------------===// 769 770 /// Check to see if this loop has loop invariant conditions which lead to loop 771 /// exits. If so, we know that if the exit path is taken, it is at the first 772 /// loop iteration. This lets us predict exit values of PHI nodes that live in 773 /// loop header. 774 bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { 775 // Verify the input to the pass is already in LCSSA form. 776 assert(L->isLCSSAForm(*DT)); 777 778 SmallVector<BasicBlock *, 8> ExitBlocks; 779 L->getUniqueExitBlocks(ExitBlocks); 780 781 bool MadeAnyChanges = false; 782 for (auto *ExitBB : ExitBlocks) { 783 // If there are no more PHI nodes in this exit block, then no more 784 // values defined inside the loop are used on this path. 785 for (PHINode &PN : ExitBB->phis()) { 786 for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues(); 787 IncomingValIdx != E; ++IncomingValIdx) { 788 auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx); 789 790 // Can we prove that the exit must run on the first iteration if it 791 // runs at all? (i.e. early exits are fine for our purposes, but 792 // traces which lead to this exit being taken on the 2nd iteration 793 // aren't.) Note that this is about whether the exit branch is 794 // executed, not about whether it is taken. 795 if (!L->getLoopLatch() || 796 !DT->dominates(IncomingBB, L->getLoopLatch())) 797 continue; 798 799 // Get condition that leads to the exit path. 800 auto *TermInst = IncomingBB->getTerminator(); 801 802 Value *Cond = nullptr; 803 if (auto *BI = dyn_cast<BranchInst>(TermInst)) { 804 // Must be a conditional branch, otherwise the block 805 // should not be in the loop. 806 Cond = BI->getCondition(); 807 } else if (auto *SI = dyn_cast<SwitchInst>(TermInst)) 808 Cond = SI->getCondition(); 809 else 810 continue; 811 812 if (!L->isLoopInvariant(Cond)) 813 continue; 814 815 auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx)); 816 817 // Only deal with PHIs in the loop header. 818 if (!ExitVal || ExitVal->getParent() != L->getHeader()) 819 continue; 820 821 // If ExitVal is a PHI on the loop header, then we know its 822 // value along this exit because the exit can only be taken 823 // on the first iteration. 824 auto *LoopPreheader = L->getLoopPreheader(); 825 assert(LoopPreheader && "Invalid loop"); 826 int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader); 827 if (PreheaderIdx != -1) { 828 assert(ExitVal->getParent() == L->getHeader() && 829 "ExitVal must be in loop header"); 830 MadeAnyChanges = true; 831 PN.setIncomingValue(IncomingValIdx, 832 ExitVal->getIncomingValue(PreheaderIdx)); 833 } 834 } 835 } 836 } 837 return MadeAnyChanges; 838 } 839 840 /// Check whether it is possible to delete the loop after rewriting exit 841 /// value. If it is possible, ignore ReplaceExitValue and do rewriting 842 /// aggressively. 843 bool IndVarSimplify::canLoopBeDeleted( 844 Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) { 845 BasicBlock *Preheader = L->getLoopPreheader(); 846 // If there is no preheader, the loop will not be deleted. 847 if (!Preheader) 848 return false; 849 850 // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1. 851 // We obviate multiple ExitingBlocks case for simplicity. 852 // TODO: If we see testcase with multiple ExitingBlocks can be deleted 853 // after exit value rewriting, we can enhance the logic here. 854 SmallVector<BasicBlock *, 4> ExitingBlocks; 855 L->getExitingBlocks(ExitingBlocks); 856 SmallVector<BasicBlock *, 8> ExitBlocks; 857 L->getUniqueExitBlocks(ExitBlocks); 858 if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1) 859 return false; 860 861 BasicBlock *ExitBlock = ExitBlocks[0]; 862 BasicBlock::iterator BI = ExitBlock->begin(); 863 while (PHINode *P = dyn_cast<PHINode>(BI)) { 864 Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]); 865 866 // If the Incoming value of P is found in RewritePhiSet, we know it 867 // could be rewritten to use a loop invariant value in transformation 868 // phase later. Skip it in the loop invariant check below. 869 bool found = false; 870 for (const RewritePhi &Phi : RewritePhiSet) { 871 if (!Phi.ValidRewrite) 872 continue; 873 unsigned i = Phi.Ith; 874 if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) { 875 found = true; 876 break; 877 } 878 } 879 880 Instruction *I; 881 if (!found && (I = dyn_cast<Instruction>(Incoming))) 882 if (!L->hasLoopInvariantOperands(I)) 883 return false; 884 885 ++BI; 886 } 887 888 for (auto *BB : L->blocks()) 889 if (llvm::any_of(*BB, [](Instruction &I) { 890 return I.mayHaveSideEffects(); 891 })) 892 return false; 893 894 return true; 895 } 896 897 //===----------------------------------------------------------------------===// 898 // IV Widening - Extend the width of an IV to cover its widest uses. 899 //===----------------------------------------------------------------------===// 900 901 namespace { 902 903 // Collect information about induction variables that are used by sign/zero 904 // extend operations. This information is recorded by CollectExtend and provides 905 // the input to WidenIV. 906 struct WideIVInfo { 907 PHINode *NarrowIV = nullptr; 908 909 // Widest integer type created [sz]ext 910 Type *WidestNativeType = nullptr; 911 912 // Was a sext user seen before a zext? 913 bool IsSigned = false; 914 }; 915 916 } // end anonymous namespace 917 918 /// Update information about the induction variable that is extended by this 919 /// sign or zero extend operation. This is used to determine the final width of 920 /// the IV before actually widening it. 921 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE, 922 const TargetTransformInfo *TTI) { 923 bool IsSigned = Cast->getOpcode() == Instruction::SExt; 924 if (!IsSigned && Cast->getOpcode() != Instruction::ZExt) 925 return; 926 927 Type *Ty = Cast->getType(); 928 uint64_t Width = SE->getTypeSizeInBits(Ty); 929 if (!Cast->getModule()->getDataLayout().isLegalInteger(Width)) 930 return; 931 932 // Check that `Cast` actually extends the induction variable (we rely on this 933 // later). This takes care of cases where `Cast` is extending a truncation of 934 // the narrow induction variable, and thus can end up being narrower than the 935 // "narrow" induction variable. 936 uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType()); 937 if (NarrowIVWidth >= Width) 938 return; 939 940 // Cast is either an sext or zext up to this point. 941 // We should not widen an indvar if arithmetics on the wider indvar are more 942 // expensive than those on the narrower indvar. We check only the cost of ADD 943 // because at least an ADD is required to increment the induction variable. We 944 // could compute more comprehensively the cost of all instructions on the 945 // induction variable when necessary. 946 if (TTI && 947 TTI->getArithmeticInstrCost(Instruction::Add, Ty) > 948 TTI->getArithmeticInstrCost(Instruction::Add, 949 Cast->getOperand(0)->getType())) { 950 return; 951 } 952 953 if (!WI.WidestNativeType) { 954 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 955 WI.IsSigned = IsSigned; 956 return; 957 } 958 959 // We extend the IV to satisfy the sign of its first user, arbitrarily. 960 if (WI.IsSigned != IsSigned) 961 return; 962 963 if (Width > SE->getTypeSizeInBits(WI.WidestNativeType)) 964 WI.WidestNativeType = SE->getEffectiveSCEVType(Ty); 965 } 966 967 namespace { 968 969 /// Record a link in the Narrow IV def-use chain along with the WideIV that 970 /// computes the same value as the Narrow IV def. This avoids caching Use* 971 /// pointers. 972 struct NarrowIVDefUse { 973 Instruction *NarrowDef = nullptr; 974 Instruction *NarrowUse = nullptr; 975 Instruction *WideDef = nullptr; 976 977 // True if the narrow def is never negative. Tracking this information lets 978 // us use a sign extension instead of a zero extension or vice versa, when 979 // profitable and legal. 980 bool NeverNegative = false; 981 982 NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD, 983 bool NeverNegative) 984 : NarrowDef(ND), NarrowUse(NU), WideDef(WD), 985 NeverNegative(NeverNegative) {} 986 }; 987 988 /// The goal of this transform is to remove sign and zero extends without 989 /// creating any new induction variables. To do this, it creates a new phi of 990 /// the wider type and redirects all users, either removing extends or inserting 991 /// truncs whenever we stop propagating the type. 992 class WidenIV { 993 // Parameters 994 PHINode *OrigPhi; 995 Type *WideType; 996 997 // Context 998 LoopInfo *LI; 999 Loop *L; 1000 ScalarEvolution *SE; 1001 DominatorTree *DT; 1002 1003 // Does the module have any calls to the llvm.experimental.guard intrinsic 1004 // at all? If not we can avoid scanning instructions looking for guards. 1005 bool HasGuards; 1006 1007 // Result 1008 PHINode *WidePhi = nullptr; 1009 Instruction *WideInc = nullptr; 1010 const SCEV *WideIncExpr = nullptr; 1011 SmallVectorImpl<WeakTrackingVH> &DeadInsts; 1012 1013 SmallPtrSet<Instruction *,16> Widened; 1014 SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; 1015 1016 enum ExtendKind { ZeroExtended, SignExtended, Unknown }; 1017 1018 // A map tracking the kind of extension used to widen each narrow IV 1019 // and narrow IV user. 1020 // Key: pointer to a narrow IV or IV user. 1021 // Value: the kind of extension used to widen this Instruction. 1022 DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap; 1023 1024 using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>; 1025 1026 // A map with control-dependent ranges for post increment IV uses. The key is 1027 // a pair of IV def and a use of this def denoting the context. The value is 1028 // a ConstantRange representing possible values of the def at the given 1029 // context. 1030 DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos; 1031 1032 Optional<ConstantRange> getPostIncRangeInfo(Value *Def, 1033 Instruction *UseI) { 1034 DefUserPair Key(Def, UseI); 1035 auto It = PostIncRangeInfos.find(Key); 1036 return It == PostIncRangeInfos.end() 1037 ? Optional<ConstantRange>(None) 1038 : Optional<ConstantRange>(It->second); 1039 } 1040 1041 void calculatePostIncRanges(PHINode *OrigPhi); 1042 void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); 1043 1044 void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { 1045 DefUserPair Key(Def, UseI); 1046 auto It = PostIncRangeInfos.find(Key); 1047 if (It == PostIncRangeInfos.end()) 1048 PostIncRangeInfos.insert({Key, R}); 1049 else 1050 It->second = R.intersectWith(It->second); 1051 } 1052 1053 public: 1054 WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, 1055 DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, 1056 bool HasGuards) 1057 : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), 1058 L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), 1059 HasGuards(HasGuards), DeadInsts(DI) { 1060 assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); 1061 ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; 1062 } 1063 1064 PHINode *createWideIV(SCEVExpander &Rewriter); 1065 1066 protected: 1067 Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned, 1068 Instruction *Use); 1069 1070 Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR); 1071 Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU, 1072 const SCEVAddRecExpr *WideAR); 1073 Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU); 1074 1075 ExtendKind getExtendKind(Instruction *I); 1076 1077 using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>; 1078 1079 WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); 1080 1081 WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU); 1082 1083 const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1084 unsigned OpCode) const; 1085 1086 Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter); 1087 1088 bool widenLoopCompare(NarrowIVDefUse DU); 1089 bool widenWithVariantLoadUse(NarrowIVDefUse DU); 1090 void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU); 1091 1092 void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); 1093 }; 1094 1095 } // end anonymous namespace 1096 1097 Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType, 1098 bool IsSigned, Instruction *Use) { 1099 // Set the debug location and conservative insertion point. 1100 IRBuilder<> Builder(Use); 1101 // Hoist the insertion point into loop preheaders as far as possible. 1102 for (const Loop *L = LI->getLoopFor(Use->getParent()); 1103 L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper); 1104 L = L->getParentLoop()) 1105 Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); 1106 1107 return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : 1108 Builder.CreateZExt(NarrowOper, WideType); 1109 } 1110 1111 /// Instantiate a wide operation to replace a narrow operation. This only needs 1112 /// to handle operations that can evaluation to SCEVAddRec. It can safely return 1113 /// 0 for any operation we decide not to clone. 1114 Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU, 1115 const SCEVAddRecExpr *WideAR) { 1116 unsigned Opcode = DU.NarrowUse->getOpcode(); 1117 switch (Opcode) { 1118 default: 1119 return nullptr; 1120 case Instruction::Add: 1121 case Instruction::Mul: 1122 case Instruction::UDiv: 1123 case Instruction::Sub: 1124 return cloneArithmeticIVUser(DU, WideAR); 1125 1126 case Instruction::And: 1127 case Instruction::Or: 1128 case Instruction::Xor: 1129 case Instruction::Shl: 1130 case Instruction::LShr: 1131 case Instruction::AShr: 1132 return cloneBitwiseIVUser(DU); 1133 } 1134 } 1135 1136 Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) { 1137 Instruction *NarrowUse = DU.NarrowUse; 1138 Instruction *NarrowDef = DU.NarrowDef; 1139 Instruction *WideDef = DU.WideDef; 1140 1141 LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n"); 1142 1143 // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything 1144 // about the narrow operand yet so must insert a [sz]ext. It is probably loop 1145 // invariant and will be folded or hoisted. If it actually comes from a 1146 // widened IV, it should be removed during a future call to widenIVUse. 1147 bool IsSigned = getExtendKind(NarrowDef) == SignExtended; 1148 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1149 ? WideDef 1150 : createExtendInst(NarrowUse->getOperand(0), WideType, 1151 IsSigned, NarrowUse); 1152 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1153 ? WideDef 1154 : createExtendInst(NarrowUse->getOperand(1), WideType, 1155 IsSigned, NarrowUse); 1156 1157 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1158 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1159 NarrowBO->getName()); 1160 IRBuilder<> Builder(NarrowUse); 1161 Builder.Insert(WideBO); 1162 WideBO->copyIRFlags(NarrowBO); 1163 return WideBO; 1164 } 1165 1166 Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU, 1167 const SCEVAddRecExpr *WideAR) { 1168 Instruction *NarrowUse = DU.NarrowUse; 1169 Instruction *NarrowDef = DU.NarrowDef; 1170 Instruction *WideDef = DU.WideDef; 1171 1172 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1173 1174 unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1; 1175 1176 // We're trying to find X such that 1177 // 1178 // Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X 1179 // 1180 // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef), 1181 // and check using SCEV if any of them are correct. 1182 1183 // Returns true if extending NonIVNarrowDef according to `SignExt` is a 1184 // correct solution to X. 1185 auto GuessNonIVOperand = [&](bool SignExt) { 1186 const SCEV *WideLHS; 1187 const SCEV *WideRHS; 1188 1189 auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) { 1190 if (SignExt) 1191 return SE->getSignExtendExpr(S, Ty); 1192 return SE->getZeroExtendExpr(S, Ty); 1193 }; 1194 1195 if (IVOpIdx == 0) { 1196 WideLHS = SE->getSCEV(WideDef); 1197 const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1)); 1198 WideRHS = GetExtend(NarrowRHS, WideType); 1199 } else { 1200 const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0)); 1201 WideLHS = GetExtend(NarrowLHS, WideType); 1202 WideRHS = SE->getSCEV(WideDef); 1203 } 1204 1205 // WideUse is "WideDef `op.wide` X" as described in the comment. 1206 const SCEV *WideUse = nullptr; 1207 1208 switch (NarrowUse->getOpcode()) { 1209 default: 1210 llvm_unreachable("No other possibility!"); 1211 1212 case Instruction::Add: 1213 WideUse = SE->getAddExpr(WideLHS, WideRHS); 1214 break; 1215 1216 case Instruction::Mul: 1217 WideUse = SE->getMulExpr(WideLHS, WideRHS); 1218 break; 1219 1220 case Instruction::UDiv: 1221 WideUse = SE->getUDivExpr(WideLHS, WideRHS); 1222 break; 1223 1224 case Instruction::Sub: 1225 WideUse = SE->getMinusSCEV(WideLHS, WideRHS); 1226 break; 1227 } 1228 1229 return WideUse == WideAR; 1230 }; 1231 1232 bool SignExtend = getExtendKind(NarrowDef) == SignExtended; 1233 if (!GuessNonIVOperand(SignExtend)) { 1234 SignExtend = !SignExtend; 1235 if (!GuessNonIVOperand(SignExtend)) 1236 return nullptr; 1237 } 1238 1239 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1240 ? WideDef 1241 : createExtendInst(NarrowUse->getOperand(0), WideType, 1242 SignExtend, NarrowUse); 1243 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1244 ? WideDef 1245 : createExtendInst(NarrowUse->getOperand(1), WideType, 1246 SignExtend, NarrowUse); 1247 1248 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1249 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1250 NarrowBO->getName()); 1251 1252 IRBuilder<> Builder(NarrowUse); 1253 Builder.Insert(WideBO); 1254 WideBO->copyIRFlags(NarrowBO); 1255 return WideBO; 1256 } 1257 1258 WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) { 1259 auto It = ExtendKindMap.find(I); 1260 assert(It != ExtendKindMap.end() && "Instruction not yet extended!"); 1261 return It->second; 1262 } 1263 1264 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, 1265 unsigned OpCode) const { 1266 if (OpCode == Instruction::Add) 1267 return SE->getAddExpr(LHS, RHS); 1268 if (OpCode == Instruction::Sub) 1269 return SE->getMinusSCEV(LHS, RHS); 1270 if (OpCode == Instruction::Mul) 1271 return SE->getMulExpr(LHS, RHS); 1272 1273 llvm_unreachable("Unsupported opcode."); 1274 } 1275 1276 /// No-wrap operations can transfer sign extension of their result to their 1277 /// operands. Generate the SCEV value for the widened operation without 1278 /// actually modifying the IR yet. If the expression after extending the 1279 /// operands is an AddRec for this loop, return the AddRec and the kind of 1280 /// extension used. 1281 WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { 1282 // Handle the common case of add<nsw/nuw> 1283 const unsigned OpCode = DU.NarrowUse->getOpcode(); 1284 // Only Add/Sub/Mul instructions supported yet. 1285 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1286 OpCode != Instruction::Mul) 1287 return {nullptr, Unknown}; 1288 1289 // One operand (NarrowDef) has already been extended to WideDef. Now determine 1290 // if extending the other will lead to a recurrence. 1291 const unsigned ExtendOperIdx = 1292 DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; 1293 assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); 1294 1295 const SCEV *ExtendOperExpr = nullptr; 1296 const OverflowingBinaryOperator *OBO = 1297 cast<OverflowingBinaryOperator>(DU.NarrowUse); 1298 ExtendKind ExtKind = getExtendKind(DU.NarrowDef); 1299 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1300 ExtendOperExpr = SE->getSignExtendExpr( 1301 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1302 else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1303 ExtendOperExpr = SE->getZeroExtendExpr( 1304 SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); 1305 else 1306 return {nullptr, Unknown}; 1307 1308 // When creating this SCEV expr, don't apply the current operations NSW or NUW 1309 // flags. This instruction may be guarded by control flow that the no-wrap 1310 // behavior depends on. Non-control-equivalent instructions can be mapped to 1311 // the same SCEV expression, and it would be incorrect to transfer NSW/NUW 1312 // semantics to those operations. 1313 const SCEV *lhs = SE->getSCEV(DU.WideDef); 1314 const SCEV *rhs = ExtendOperExpr; 1315 1316 // Let's swap operands to the initial order for the case of non-commutative 1317 // operations, like SUB. See PR21014. 1318 if (ExtendOperIdx == 0) 1319 std::swap(lhs, rhs); 1320 const SCEVAddRecExpr *AddRec = 1321 dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode)); 1322 1323 if (!AddRec || AddRec->getLoop() != L) 1324 return {nullptr, Unknown}; 1325 1326 return {AddRec, ExtKind}; 1327 } 1328 1329 /// Is this instruction potentially interesting for further simplification after 1330 /// widening it's type? In other words, can the extend be safely hoisted out of 1331 /// the loop with SCEV reducing the value to a recurrence on the same loop. If 1332 /// so, return the extended recurrence and the kind of extension used. Otherwise 1333 /// return {nullptr, Unknown}. 1334 WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) { 1335 if (!SE->isSCEVable(DU.NarrowUse->getType())) 1336 return {nullptr, Unknown}; 1337 1338 const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse); 1339 if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= 1340 SE->getTypeSizeInBits(WideType)) { 1341 // NarrowUse implicitly widens its operand. e.g. a gep with a narrow 1342 // index. So don't follow this use. 1343 return {nullptr, Unknown}; 1344 } 1345 1346 const SCEV *WideExpr; 1347 ExtendKind ExtKind; 1348 if (DU.NeverNegative) { 1349 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1350 if (isa<SCEVAddRecExpr>(WideExpr)) 1351 ExtKind = SignExtended; 1352 else { 1353 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1354 ExtKind = ZeroExtended; 1355 } 1356 } else if (getExtendKind(DU.NarrowDef) == SignExtended) { 1357 WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType); 1358 ExtKind = SignExtended; 1359 } else { 1360 WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType); 1361 ExtKind = ZeroExtended; 1362 } 1363 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); 1364 if (!AddRec || AddRec->getLoop() != L) 1365 return {nullptr, Unknown}; 1366 return {AddRec, ExtKind}; 1367 } 1368 1369 /// This IV user cannot be widened. Replace this use of the original narrow IV 1370 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. 1371 static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { 1372 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1373 if (!InsertPt) 1374 return; 1375 LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " 1376 << *DU.NarrowUse << "\n"); 1377 IRBuilder<> Builder(InsertPt); 1378 Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); 1379 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); 1380 } 1381 1382 /// If the narrow use is a compare instruction, then widen the compare 1383 // (and possibly the other operand). The extend operation is hoisted into the 1384 // loop preheader as far as possible. 1385 bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { 1386 ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse); 1387 if (!Cmp) 1388 return false; 1389 1390 // We can legally widen the comparison in the following two cases: 1391 // 1392 // - The signedness of the IV extension and comparison match 1393 // 1394 // - The narrow IV is always positive (and thus its sign extension is equal 1395 // to its zero extension). For instance, let's say we're zero extending 1396 // %narrow for the following use 1397 // 1398 // icmp slt i32 %narrow, %val ... (A) 1399 // 1400 // and %narrow is always positive. Then 1401 // 1402 // (A) == icmp slt i32 sext(%narrow), sext(%val) 1403 // == icmp slt i32 zext(%narrow), sext(%val) 1404 bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended; 1405 if (!(DU.NeverNegative || IsSigned == Cmp->isSigned())) 1406 return false; 1407 1408 Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); 1409 unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); 1410 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1411 assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); 1412 1413 // Widen the compare instruction. 1414 auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI); 1415 if (!InsertPt) 1416 return false; 1417 IRBuilder<> Builder(InsertPt); 1418 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1419 1420 // Widen the other operand of the compare, if necessary. 1421 if (CastWidth < IVWidth) { 1422 Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp); 1423 DU.NarrowUse->replaceUsesOfWith(Op, ExtOp); 1424 } 1425 return true; 1426 } 1427 1428 /// If the narrow use is an instruction whose two operands are the defining 1429 /// instruction of DU and a load instruction, then we have the following: 1430 /// if the load is hoisted outside the loop, then we do not reach this function 1431 /// as scalar evolution analysis works fine in widenIVUse with variables 1432 /// hoisted outside the loop and efficient code is subsequently generated by 1433 /// not emitting truncate instructions. But when the load is not hoisted 1434 /// (whether due to limitation in alias analysis or due to a true legality), 1435 /// then scalar evolution can not proceed with loop variant values and 1436 /// inefficient code is generated. This function handles the non-hoisted load 1437 /// special case by making the optimization generate the same type of code for 1438 /// hoisted and non-hoisted load (widen use and eliminate sign extend 1439 /// instruction). This special case is important especially when the induction 1440 /// variables are affecting addressing mode in code generation. 1441 bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) { 1442 Instruction *NarrowUse = DU.NarrowUse; 1443 Instruction *NarrowDef = DU.NarrowDef; 1444 Instruction *WideDef = DU.WideDef; 1445 1446 // Handle the common case of add<nsw/nuw> 1447 const unsigned OpCode = NarrowUse->getOpcode(); 1448 // Only Add/Sub/Mul instructions are supported. 1449 if (OpCode != Instruction::Add && OpCode != Instruction::Sub && 1450 OpCode != Instruction::Mul) 1451 return false; 1452 1453 // The operand that is not defined by NarrowDef of DU. Let's call it the 1454 // other operand. 1455 unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0; 1456 assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef && 1457 "bad DU"); 1458 1459 const SCEV *ExtendOperExpr = nullptr; 1460 const OverflowingBinaryOperator *OBO = 1461 cast<OverflowingBinaryOperator>(NarrowUse); 1462 ExtendKind ExtKind = getExtendKind(NarrowDef); 1463 if (ExtKind == SignExtended && OBO->hasNoSignedWrap()) 1464 ExtendOperExpr = SE->getSignExtendExpr( 1465 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1466 else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap()) 1467 ExtendOperExpr = SE->getZeroExtendExpr( 1468 SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType); 1469 else 1470 return false; 1471 1472 // We are interested in the other operand being a load instruction. 1473 // But, we should look into relaxing this restriction later on. 1474 auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx)); 1475 if (I && I->getOpcode() != Instruction::Load) 1476 return false; 1477 1478 // Verifying that Defining operand is an AddRec 1479 const SCEV *Op1 = SE->getSCEV(WideDef); 1480 const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1); 1481 if (!AddRecOp1 || AddRecOp1->getLoop() != L) 1482 return false; 1483 // Verifying that other operand is an Extend. 1484 if (ExtKind == SignExtended) { 1485 if (!isa<SCEVSignExtendExpr>(ExtendOperExpr)) 1486 return false; 1487 } else { 1488 if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr)) 1489 return false; 1490 } 1491 1492 if (ExtKind == SignExtended) { 1493 for (Use &U : NarrowUse->uses()) { 1494 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1495 if (!User || User->getType() != WideType) 1496 return false; 1497 } 1498 } else { // ExtKind == ZeroExtended 1499 for (Use &U : NarrowUse->uses()) { 1500 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1501 if (!User || User->getType() != WideType) 1502 return false; 1503 } 1504 } 1505 1506 return true; 1507 } 1508 1509 /// Special Case for widening with variant Loads (see 1510 /// WidenIV::widenWithVariantLoadUse). This is the code generation part. 1511 void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) { 1512 Instruction *NarrowUse = DU.NarrowUse; 1513 Instruction *NarrowDef = DU.NarrowDef; 1514 Instruction *WideDef = DU.WideDef; 1515 1516 ExtendKind ExtKind = getExtendKind(NarrowDef); 1517 1518 LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n"); 1519 1520 // Generating a widening use instruction. 1521 Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) 1522 ? WideDef 1523 : createExtendInst(NarrowUse->getOperand(0), WideType, 1524 ExtKind, NarrowUse); 1525 Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) 1526 ? WideDef 1527 : createExtendInst(NarrowUse->getOperand(1), WideType, 1528 ExtKind, NarrowUse); 1529 1530 auto *NarrowBO = cast<BinaryOperator>(NarrowUse); 1531 auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS, 1532 NarrowBO->getName()); 1533 IRBuilder<> Builder(NarrowUse); 1534 Builder.Insert(WideBO); 1535 WideBO->copyIRFlags(NarrowBO); 1536 1537 if (ExtKind == SignExtended) 1538 ExtendKindMap[NarrowUse] = SignExtended; 1539 else 1540 ExtendKindMap[NarrowUse] = ZeroExtended; 1541 1542 // Update the Use. 1543 if (ExtKind == SignExtended) { 1544 for (Use &U : NarrowUse->uses()) { 1545 SExtInst *User = dyn_cast<SExtInst>(U.getUser()); 1546 if (User && User->getType() == WideType) { 1547 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1548 << *WideBO << "\n"); 1549 ++NumElimExt; 1550 User->replaceAllUsesWith(WideBO); 1551 DeadInsts.emplace_back(User); 1552 } 1553 } 1554 } else { // ExtKind == ZeroExtended 1555 for (Use &U : NarrowUse->uses()) { 1556 ZExtInst *User = dyn_cast<ZExtInst>(U.getUser()); 1557 if (User && User->getType() == WideType) { 1558 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by " 1559 << *WideBO << "\n"); 1560 ++NumElimExt; 1561 User->replaceAllUsesWith(WideBO); 1562 DeadInsts.emplace_back(User); 1563 } 1564 } 1565 } 1566 } 1567 1568 /// Determine whether an individual user of the narrow IV can be widened. If so, 1569 /// return the wide clone of the user. 1570 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { 1571 assert(ExtendKindMap.count(DU.NarrowDef) && 1572 "Should already know the kind of extension used to widen NarrowDef"); 1573 1574 // Stop traversing the def-use chain at inner-loop phis or post-loop phis. 1575 if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) { 1576 if (LI->getLoopFor(UsePhi->getParent()) != L) { 1577 // For LCSSA phis, sink the truncate outside the loop. 1578 // After SimplifyCFG most loop exit targets have a single predecessor. 1579 // Otherwise fall back to a truncate within the loop. 1580 if (UsePhi->getNumOperands() != 1) 1581 truncateIVUse(DU, DT, LI); 1582 else { 1583 // Widening the PHI requires us to insert a trunc. The logical place 1584 // for this trunc is in the same BB as the PHI. This is not possible if 1585 // the BB is terminated by a catchswitch. 1586 if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator())) 1587 return nullptr; 1588 1589 PHINode *WidePhi = 1590 PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", 1591 UsePhi); 1592 WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0)); 1593 IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt()); 1594 Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType()); 1595 UsePhi->replaceAllUsesWith(Trunc); 1596 DeadInsts.emplace_back(UsePhi); 1597 LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " 1598 << *WidePhi << "\n"); 1599 } 1600 return nullptr; 1601 } 1602 } 1603 1604 // This narrow use can be widened by a sext if it's non-negative or its narrow 1605 // def was widended by a sext. Same for zext. 1606 auto canWidenBySExt = [&]() { 1607 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended; 1608 }; 1609 auto canWidenByZExt = [&]() { 1610 return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended; 1611 }; 1612 1613 // Our raison d'etre! Eliminate sign and zero extension. 1614 if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) || 1615 (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) { 1616 Value *NewDef = DU.WideDef; 1617 if (DU.NarrowUse->getType() != WideType) { 1618 unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); 1619 unsigned IVWidth = SE->getTypeSizeInBits(WideType); 1620 if (CastWidth < IVWidth) { 1621 // The cast isn't as wide as the IV, so insert a Trunc. 1622 IRBuilder<> Builder(DU.NarrowUse); 1623 NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType()); 1624 } 1625 else { 1626 // A wider extend was hidden behind a narrower one. This may induce 1627 // another round of IV widening in which the intermediate IV becomes 1628 // dead. It should be very rare. 1629 LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi 1630 << " not wide enough to subsume " << *DU.NarrowUse 1631 << "\n"); 1632 DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); 1633 NewDef = DU.NarrowUse; 1634 } 1635 } 1636 if (NewDef != DU.NarrowUse) { 1637 LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse 1638 << " replaced by " << *DU.WideDef << "\n"); 1639 ++NumElimExt; 1640 DU.NarrowUse->replaceAllUsesWith(NewDef); 1641 DeadInsts.emplace_back(DU.NarrowUse); 1642 } 1643 // Now that the extend is gone, we want to expose it's uses for potential 1644 // further simplification. We don't need to directly inform SimplifyIVUsers 1645 // of the new users, because their parent IV will be processed later as a 1646 // new loop phi. If we preserved IVUsers analysis, we would also want to 1647 // push the uses of WideDef here. 1648 1649 // No further widening is needed. The deceased [sz]ext had done it for us. 1650 return nullptr; 1651 } 1652 1653 // Does this user itself evaluate to a recurrence after widening? 1654 WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU); 1655 if (!WideAddRec.first) 1656 WideAddRec = getWideRecurrence(DU); 1657 1658 assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown)); 1659 if (!WideAddRec.first) { 1660 // If use is a loop condition, try to promote the condition instead of 1661 // truncating the IV first. 1662 if (widenLoopCompare(DU)) 1663 return nullptr; 1664 1665 // We are here about to generate a truncate instruction that may hurt 1666 // performance because the scalar evolution expression computed earlier 1667 // in WideAddRec.first does not indicate a polynomial induction expression. 1668 // In that case, look at the operands of the use instruction to determine 1669 // if we can still widen the use instead of truncating its operand. 1670 if (widenWithVariantLoadUse(DU)) { 1671 widenWithVariantLoadUseCodegen(DU); 1672 return nullptr; 1673 } 1674 1675 // This user does not evaluate to a recurrence after widening, so don't 1676 // follow it. Instead insert a Trunc to kill off the original use, 1677 // eventually isolating the original narrow IV so it can be removed. 1678 truncateIVUse(DU, DT, LI); 1679 return nullptr; 1680 } 1681 // Assume block terminators cannot evaluate to a recurrence. We can't to 1682 // insert a Trunc after a terminator if there happens to be a critical edge. 1683 assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() && 1684 "SCEV is not expected to evaluate a block terminator"); 1685 1686 // Reuse the IV increment that SCEVExpander created as long as it dominates 1687 // NarrowUse. 1688 Instruction *WideUse = nullptr; 1689 if (WideAddRec.first == WideIncExpr && 1690 Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) 1691 WideUse = WideInc; 1692 else { 1693 WideUse = cloneIVUser(DU, WideAddRec.first); 1694 if (!WideUse) 1695 return nullptr; 1696 } 1697 // Evaluation of WideAddRec ensured that the narrow expression could be 1698 // extended outside the loop without overflow. This suggests that the wide use 1699 // evaluates to the same expression as the extended narrow use, but doesn't 1700 // absolutely guarantee it. Hence the following failsafe check. In rare cases 1701 // where it fails, we simply throw away the newly created wide use. 1702 if (WideAddRec.first != SE->getSCEV(WideUse)) { 1703 LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " 1704 << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first 1705 << "\n"); 1706 DeadInsts.emplace_back(WideUse); 1707 return nullptr; 1708 } 1709 1710 // if we reached this point then we are going to replace 1711 // DU.NarrowUse with WideUse. Reattach DbgValue then. 1712 replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT); 1713 1714 ExtendKindMap[DU.NarrowUse] = WideAddRec.second; 1715 // Returning WideUse pushes it on the worklist. 1716 return WideUse; 1717 } 1718 1719 /// Add eligible users of NarrowDef to NarrowIVUsers. 1720 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { 1721 const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); 1722 bool NonNegativeDef = 1723 SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, 1724 SE->getConstant(NarrowSCEV->getType(), 0)); 1725 for (User *U : NarrowDef->users()) { 1726 Instruction *NarrowUser = cast<Instruction>(U); 1727 1728 // Handle data flow merges and bizarre phi cycles. 1729 if (!Widened.insert(NarrowUser).second) 1730 continue; 1731 1732 bool NonNegativeUse = false; 1733 if (!NonNegativeDef) { 1734 // We might have a control-dependent range information for this context. 1735 if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) 1736 NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); 1737 } 1738 1739 NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, 1740 NonNegativeDef || NonNegativeUse); 1741 } 1742 } 1743 1744 /// Process a single induction variable. First use the SCEVExpander to create a 1745 /// wide induction variable that evaluates to the same recurrence as the 1746 /// original narrow IV. Then use a worklist to forward traverse the narrow IV's 1747 /// def-use chain. After widenIVUse has processed all interesting IV users, the 1748 /// narrow IV will be isolated for removal by DeleteDeadPHIs. 1749 /// 1750 /// It would be simpler to delete uses as they are processed, but we must avoid 1751 /// invalidating SCEV expressions. 1752 PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { 1753 // Is this phi an induction variable? 1754 const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi)); 1755 if (!AddRec) 1756 return nullptr; 1757 1758 // Widen the induction variable expression. 1759 const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended 1760 ? SE->getSignExtendExpr(AddRec, WideType) 1761 : SE->getZeroExtendExpr(AddRec, WideType); 1762 1763 assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType && 1764 "Expect the new IV expression to preserve its type"); 1765 1766 // Can the IV be extended outside the loop without overflow? 1767 AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr); 1768 if (!AddRec || AddRec->getLoop() != L) 1769 return nullptr; 1770 1771 // An AddRec must have loop-invariant operands. Since this AddRec is 1772 // materialized by a loop header phi, the expression cannot have any post-loop 1773 // operands, so they must dominate the loop header. 1774 assert( 1775 SE->properlyDominates(AddRec->getStart(), L->getHeader()) && 1776 SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && 1777 "Loop header phi recurrence inputs do not dominate the loop"); 1778 1779 // Iterate over IV uses (including transitive ones) looking for IV increments 1780 // of the form 'add nsw %iv, <const>'. For each increment and each use of 1781 // the increment calculate control-dependent range information basing on 1782 // dominating conditions inside of the loop (e.g. a range check inside of the 1783 // loop). Calculated ranges are stored in PostIncRangeInfos map. 1784 // 1785 // Control-dependent range information is later used to prove that a narrow 1786 // definition is not negative (see pushNarrowIVUsers). It's difficult to do 1787 // this on demand because when pushNarrowIVUsers needs this information some 1788 // of the dominating conditions might be already widened. 1789 if (UsePostIncrementRanges) 1790 calculatePostIncRanges(OrigPhi); 1791 1792 // The rewriter provides a value for the desired IV expression. This may 1793 // either find an existing phi or materialize a new one. Either way, we 1794 // expect a well-formed cyclic phi-with-increments. i.e. any operand not part 1795 // of the phi-SCC dominates the loop entry. 1796 Instruction *InsertPt = &L->getHeader()->front(); 1797 WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt)); 1798 1799 // Remembering the WideIV increment generated by SCEVExpander allows 1800 // widenIVUse to reuse it when widening the narrow IV's increment. We don't 1801 // employ a general reuse mechanism because the call above is the only call to 1802 // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses. 1803 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1804 WideInc = 1805 cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock)); 1806 WideIncExpr = SE->getSCEV(WideInc); 1807 // Propagate the debug location associated with the original loop increment 1808 // to the new (widened) increment. 1809 auto *OrigInc = 1810 cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); 1811 WideInc->setDebugLoc(OrigInc->getDebugLoc()); 1812 } 1813 1814 LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n"); 1815 ++NumWidened; 1816 1817 // Traverse the def-use chain using a worklist starting at the original IV. 1818 assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" ); 1819 1820 Widened.insert(OrigPhi); 1821 pushNarrowIVUsers(OrigPhi, WidePhi); 1822 1823 while (!NarrowIVUsers.empty()) { 1824 NarrowIVDefUse DU = NarrowIVUsers.pop_back_val(); 1825 1826 // Process a def-use edge. This may replace the use, so don't hold a 1827 // use_iterator across it. 1828 Instruction *WideUse = widenIVUse(DU, Rewriter); 1829 1830 // Follow all def-use edges from the previous narrow use. 1831 if (WideUse) 1832 pushNarrowIVUsers(DU.NarrowUse, WideUse); 1833 1834 // widenIVUse may have removed the def-use edge. 1835 if (DU.NarrowDef->use_empty()) 1836 DeadInsts.emplace_back(DU.NarrowDef); 1837 } 1838 1839 // Attach any debug information to the new PHI. 1840 replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT); 1841 1842 return WidePhi; 1843 } 1844 1845 /// Calculates control-dependent range for the given def at the given context 1846 /// by looking at dominating conditions inside of the loop 1847 void WidenIV::calculatePostIncRange(Instruction *NarrowDef, 1848 Instruction *NarrowUser) { 1849 using namespace llvm::PatternMatch; 1850 1851 Value *NarrowDefLHS; 1852 const APInt *NarrowDefRHS; 1853 if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), 1854 m_APInt(NarrowDefRHS))) || 1855 !NarrowDefRHS->isNonNegative()) 1856 return; 1857 1858 auto UpdateRangeFromCondition = [&] (Value *Condition, 1859 bool TrueDest) { 1860 CmpInst::Predicate Pred; 1861 Value *CmpRHS; 1862 if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS), 1863 m_Value(CmpRHS)))) 1864 return; 1865 1866 CmpInst::Predicate P = 1867 TrueDest ? Pred : CmpInst::getInversePredicate(Pred); 1868 1869 auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); 1870 auto CmpConstrainedLHSRange = 1871 ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); 1872 auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap( 1873 *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap); 1874 1875 updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); 1876 }; 1877 1878 auto UpdateRangeFromGuards = [&](Instruction *Ctx) { 1879 if (!HasGuards) 1880 return; 1881 1882 for (Instruction &I : make_range(Ctx->getIterator().getReverse(), 1883 Ctx->getParent()->rend())) { 1884 Value *C = nullptr; 1885 if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C)))) 1886 UpdateRangeFromCondition(C, /*TrueDest=*/true); 1887 } 1888 }; 1889 1890 UpdateRangeFromGuards(NarrowUser); 1891 1892 BasicBlock *NarrowUserBB = NarrowUser->getParent(); 1893 // If NarrowUserBB is statically unreachable asking dominator queries may 1894 // yield surprising results. (e.g. the block may not have a dom tree node) 1895 if (!DT->isReachableFromEntry(NarrowUserBB)) 1896 return; 1897 1898 for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); 1899 L->contains(DTB->getBlock()); 1900 DTB = DTB->getIDom()) { 1901 auto *BB = DTB->getBlock(); 1902 auto *TI = BB->getTerminator(); 1903 UpdateRangeFromGuards(TI); 1904 1905 auto *BI = dyn_cast<BranchInst>(TI); 1906 if (!BI || !BI->isConditional()) 1907 continue; 1908 1909 auto *TrueSuccessor = BI->getSuccessor(0); 1910 auto *FalseSuccessor = BI->getSuccessor(1); 1911 1912 auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) { 1913 return BBE.isSingleEdge() && 1914 DT->dominates(BBE, NarrowUser->getParent()); 1915 }; 1916 1917 if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor))) 1918 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true); 1919 1920 if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor))) 1921 UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false); 1922 } 1923 } 1924 1925 /// Calculates PostIncRangeInfos map for the given IV 1926 void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { 1927 SmallPtrSet<Instruction *, 16> Visited; 1928 SmallVector<Instruction *, 6> Worklist; 1929 Worklist.push_back(OrigPhi); 1930 Visited.insert(OrigPhi); 1931 1932 while (!Worklist.empty()) { 1933 Instruction *NarrowDef = Worklist.pop_back_val(); 1934 1935 for (Use &U : NarrowDef->uses()) { 1936 auto *NarrowUser = cast<Instruction>(U.getUser()); 1937 1938 // Don't go looking outside the current loop. 1939 auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; 1940 if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) 1941 continue; 1942 1943 if (!Visited.insert(NarrowUser).second) 1944 continue; 1945 1946 Worklist.push_back(NarrowUser); 1947 1948 calculatePostIncRange(NarrowDef, NarrowUser); 1949 } 1950 } 1951 } 1952 1953 //===----------------------------------------------------------------------===// 1954 // Live IV Reduction - Minimize IVs live across the loop. 1955 //===----------------------------------------------------------------------===// 1956 1957 //===----------------------------------------------------------------------===// 1958 // Simplification of IV users based on SCEV evaluation. 1959 //===----------------------------------------------------------------------===// 1960 1961 namespace { 1962 1963 class IndVarSimplifyVisitor : public IVVisitor { 1964 ScalarEvolution *SE; 1965 const TargetTransformInfo *TTI; 1966 PHINode *IVPhi; 1967 1968 public: 1969 WideIVInfo WI; 1970 1971 IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV, 1972 const TargetTransformInfo *TTI, 1973 const DominatorTree *DTree) 1974 : SE(SCEV), TTI(TTI), IVPhi(IV) { 1975 DT = DTree; 1976 WI.NarrowIV = IVPhi; 1977 } 1978 1979 // Implement the interface used by simplifyUsersOfIV. 1980 void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } 1981 }; 1982 1983 } // end anonymous namespace 1984 1985 /// Iteratively perform simplification on a worklist of IV users. Each 1986 /// successive simplification may push more users which may themselves be 1987 /// candidates for simplification. 1988 /// 1989 /// Sign/Zero extend elimination is interleaved with IV simplification. 1990 bool IndVarSimplify::simplifyAndExtend(Loop *L, 1991 SCEVExpander &Rewriter, 1992 LoopInfo *LI) { 1993 SmallVector<WideIVInfo, 8> WideIVs; 1994 1995 auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction( 1996 Intrinsic::getName(Intrinsic::experimental_guard)); 1997 bool HasGuards = GuardDecl && !GuardDecl->use_empty(); 1998 1999 SmallVector<PHINode*, 8> LoopPhis; 2000 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2001 LoopPhis.push_back(cast<PHINode>(I)); 2002 } 2003 // Each round of simplification iterates through the SimplifyIVUsers worklist 2004 // for all current phis, then determines whether any IVs can be 2005 // widened. Widening adds new phis to LoopPhis, inducing another round of 2006 // simplification on the wide IVs. 2007 bool Changed = false; 2008 while (!LoopPhis.empty()) { 2009 // Evaluate as many IV expressions as possible before widening any IVs. This 2010 // forces SCEV to set no-wrap flags before evaluating sign/zero 2011 // extension. The first time SCEV attempts to normalize sign/zero extension, 2012 // the result becomes final. So for the most predictable results, we delay 2013 // evaluation of sign/zero extend evaluation until needed, and avoid running 2014 // other SCEV based analysis prior to simplifyAndExtend. 2015 do { 2016 PHINode *CurrIV = LoopPhis.pop_back_val(); 2017 2018 // Information about sign/zero extensions of CurrIV. 2019 IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); 2020 2021 Changed |= 2022 simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); 2023 2024 if (Visitor.WI.WidestNativeType) { 2025 WideIVs.push_back(Visitor.WI); 2026 } 2027 } while(!LoopPhis.empty()); 2028 2029 for (; !WideIVs.empty(); WideIVs.pop_back()) { 2030 WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards); 2031 if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) { 2032 Changed = true; 2033 LoopPhis.push_back(WidePhi); 2034 } 2035 } 2036 } 2037 return Changed; 2038 } 2039 2040 //===----------------------------------------------------------------------===// 2041 // linearFunctionTestReplace and its kin. Rewrite the loop exit condition. 2042 //===----------------------------------------------------------------------===// 2043 2044 /// Given an Value which is hoped to be part of an add recurance in the given 2045 /// loop, return the associated Phi node if so. Otherwise, return null. Note 2046 /// that this is less general than SCEVs AddRec checking. 2047 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) { 2048 Instruction *IncI = dyn_cast<Instruction>(IncV); 2049 if (!IncI) 2050 return nullptr; 2051 2052 switch (IncI->getOpcode()) { 2053 case Instruction::Add: 2054 case Instruction::Sub: 2055 break; 2056 case Instruction::GetElementPtr: 2057 // An IV counter must preserve its type. 2058 if (IncI->getNumOperands() == 2) 2059 break; 2060 LLVM_FALLTHROUGH; 2061 default: 2062 return nullptr; 2063 } 2064 2065 PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0)); 2066 if (Phi && Phi->getParent() == L->getHeader()) { 2067 if (L->isLoopInvariant(IncI->getOperand(1))) 2068 return Phi; 2069 return nullptr; 2070 } 2071 if (IncI->getOpcode() == Instruction::GetElementPtr) 2072 return nullptr; 2073 2074 // Allow add/sub to be commuted. 2075 Phi = dyn_cast<PHINode>(IncI->getOperand(1)); 2076 if (Phi && Phi->getParent() == L->getHeader()) { 2077 if (L->isLoopInvariant(IncI->getOperand(0))) 2078 return Phi; 2079 } 2080 return nullptr; 2081 } 2082 2083 /// Whether the current loop exit test is based on this value. Currently this 2084 /// is limited to a direct use in the loop condition. 2085 static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { 2086 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2087 ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition()); 2088 // TODO: Allow non-icmp loop test. 2089 if (!ICmp) 2090 return false; 2091 2092 // TODO: Allow indirect use. 2093 return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; 2094 } 2095 2096 /// linearFunctionTestReplace policy. Return true unless we can show that the 2097 /// current exit test is already sufficiently canonical. 2098 static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { 2099 assert(L->getLoopLatch() && "Must be in simplified form"); 2100 2101 // Avoid converting a constant or loop invariant test back to a runtime 2102 // test. This is critical for when SCEV's cached ExitCount is less precise 2103 // than the current IR (such as after we've proven a particular exit is 2104 // actually dead and thus the BE count never reaches our ExitCount.) 2105 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2106 if (L->isLoopInvariant(BI->getCondition())) 2107 return false; 2108 2109 // Do LFTR to simplify the exit condition to an ICMP. 2110 ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); 2111 if (!Cond) 2112 return true; 2113 2114 // Do LFTR to simplify the exit ICMP to EQ/NE 2115 ICmpInst::Predicate Pred = Cond->getPredicate(); 2116 if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ) 2117 return true; 2118 2119 // Look for a loop invariant RHS 2120 Value *LHS = Cond->getOperand(0); 2121 Value *RHS = Cond->getOperand(1); 2122 if (!L->isLoopInvariant(RHS)) { 2123 if (!L->isLoopInvariant(LHS)) 2124 return true; 2125 std::swap(LHS, RHS); 2126 } 2127 // Look for a simple IV counter LHS 2128 PHINode *Phi = dyn_cast<PHINode>(LHS); 2129 if (!Phi) 2130 Phi = getLoopPhiForCounter(LHS, L); 2131 2132 if (!Phi) 2133 return true; 2134 2135 // Do LFTR if PHI node is defined in the loop, but is *not* a counter. 2136 int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2137 if (Idx < 0) 2138 return true; 2139 2140 // Do LFTR if the exit condition's IV is *not* a simple counter. 2141 Value *IncV = Phi->getIncomingValue(Idx); 2142 return Phi != getLoopPhiForCounter(IncV, L); 2143 } 2144 2145 /// Return true if undefined behavior would provable be executed on the path to 2146 /// OnPathTo if Root produced a posion result. Note that this doesn't say 2147 /// anything about whether OnPathTo is actually executed or whether Root is 2148 /// actually poison. This can be used to assess whether a new use of Root can 2149 /// be added at a location which is control equivalent with OnPathTo (such as 2150 /// immediately before it) without introducing UB which didn't previously 2151 /// exist. Note that a false result conveys no information. 2152 static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, 2153 Instruction *OnPathTo, 2154 DominatorTree *DT) { 2155 // Basic approach is to assume Root is poison, propagate poison forward 2156 // through all users we can easily track, and then check whether any of those 2157 // users are provable UB and must execute before out exiting block might 2158 // exit. 2159 2160 // The set of all recursive users we've visited (which are assumed to all be 2161 // poison because of said visit) 2162 SmallSet<const Value *, 16> KnownPoison; 2163 SmallVector<const Instruction*, 16> Worklist; 2164 Worklist.push_back(Root); 2165 while (!Worklist.empty()) { 2166 const Instruction *I = Worklist.pop_back_val(); 2167 2168 // If we know this must trigger UB on a path leading our target. 2169 if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) 2170 return true; 2171 2172 // If we can't analyze propagation through this instruction, just skip it 2173 // and transitive users. Safe as false is a conservative result. 2174 if (!propagatesFullPoison(I) && I != Root) 2175 continue; 2176 2177 if (KnownPoison.insert(I).second) 2178 for (const User *User : I->users()) 2179 Worklist.push_back(cast<Instruction>(User)); 2180 } 2181 2182 // Might be non-UB, or might have a path we couldn't prove must execute on 2183 // way to exiting bb. 2184 return false; 2185 } 2186 2187 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils 2188 /// down to checking that all operands are constant and listing instructions 2189 /// that may hide undef. 2190 static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited, 2191 unsigned Depth) { 2192 if (isa<Constant>(V)) 2193 return !isa<UndefValue>(V); 2194 2195 if (Depth >= 6) 2196 return false; 2197 2198 // Conservatively handle non-constant non-instructions. For example, Arguments 2199 // may be undef. 2200 Instruction *I = dyn_cast<Instruction>(V); 2201 if (!I) 2202 return false; 2203 2204 // Load and return values may be undef. 2205 if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I)) 2206 return false; 2207 2208 // Optimistically handle other instructions. 2209 for (Value *Op : I->operands()) { 2210 if (!Visited.insert(Op).second) 2211 continue; 2212 if (!hasConcreteDefImpl(Op, Visited, Depth+1)) 2213 return false; 2214 } 2215 return true; 2216 } 2217 2218 /// Return true if the given value is concrete. We must prove that undef can 2219 /// never reach it. 2220 /// 2221 /// TODO: If we decide that this is a good approach to checking for undef, we 2222 /// may factor it into a common location. 2223 static bool hasConcreteDef(Value *V) { 2224 SmallPtrSet<Value*, 8> Visited; 2225 Visited.insert(V); 2226 return hasConcreteDefImpl(V, Visited, 0); 2227 } 2228 2229 /// Return true if this IV has any uses other than the (soon to be rewritten) 2230 /// loop exit test. 2231 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { 2232 int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); 2233 Value *IncV = Phi->getIncomingValue(LatchIdx); 2234 2235 for (User *U : Phi->users()) 2236 if (U != Cond && U != IncV) return false; 2237 2238 for (User *U : IncV->users()) 2239 if (U != Cond && U != Phi) return false; 2240 return true; 2241 } 2242 2243 /// Return true if the given phi is a "counter" in L. A counter is an 2244 /// add recurance (of integer or pointer type) with an arbitrary start, and a 2245 /// step of 1. Note that L must have exactly one latch. 2246 static bool isLoopCounter(PHINode* Phi, Loop *L, 2247 ScalarEvolution *SE) { 2248 assert(Phi->getParent() == L->getHeader()); 2249 assert(L->getLoopLatch()); 2250 2251 if (!SE->isSCEVable(Phi->getType())) 2252 return false; 2253 2254 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2255 if (!AR || AR->getLoop() != L || !AR->isAffine()) 2256 return false; 2257 2258 const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); 2259 if (!Step || !Step->isOne()) 2260 return false; 2261 2262 int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch()); 2263 Value *IncV = Phi->getIncomingValue(LatchIdx); 2264 return (getLoopPhiForCounter(IncV, L) == Phi); 2265 } 2266 2267 /// Search the loop header for a loop counter (anadd rec w/step of one) 2268 /// suitable for use by LFTR. If multiple counters are available, select the 2269 /// "best" one based profitable heuristics. 2270 /// 2271 /// BECount may be an i8* pointer type. The pointer difference is already 2272 /// valid count without scaling the address stride, so it remains a pointer 2273 /// expression as far as SCEV is concerned. 2274 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, 2275 const SCEV *BECount, 2276 ScalarEvolution *SE, DominatorTree *DT) { 2277 uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); 2278 2279 Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition(); 2280 2281 // Loop over all of the PHI nodes, looking for a simple counter. 2282 PHINode *BestPhi = nullptr; 2283 const SCEV *BestInit = nullptr; 2284 BasicBlock *LatchBlock = L->getLoopLatch(); 2285 assert(LatchBlock && "Must be in simplified form"); 2286 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 2287 2288 for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 2289 PHINode *Phi = cast<PHINode>(I); 2290 if (!isLoopCounter(Phi, L, SE)) 2291 continue; 2292 2293 // Avoid comparing an integer IV against a pointer Limit. 2294 if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) 2295 continue; 2296 2297 const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi)); 2298 2299 // AR may be a pointer type, while BECount is an integer type. 2300 // AR may be wider than BECount. With eq/ne tests overflow is immaterial. 2301 // AR may not be a narrower type, or we may never exit. 2302 uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType()); 2303 if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth)) 2304 continue; 2305 2306 // Avoid reusing a potentially undef value to compute other values that may 2307 // have originally had a concrete definition. 2308 if (!hasConcreteDef(Phi)) { 2309 // We explicitly allow unknown phis as long as they are already used by 2310 // the loop exit test. This is legal since performing LFTR could not 2311 // increase the number of undef users. 2312 Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock); 2313 if (!isLoopExitTestBasedOn(Phi, ExitingBB) && 2314 !isLoopExitTestBasedOn(IncPhi, ExitingBB)) 2315 continue; 2316 } 2317 2318 // Avoid introducing undefined behavior due to poison which didn't exist in 2319 // the original program. (Annoyingly, the rules for poison and undef 2320 // propagation are distinct, so this does NOT cover the undef case above.) 2321 // We have to ensure that we don't introduce UB by introducing a use on an 2322 // iteration where said IV produces poison. Our strategy here differs for 2323 // pointers and integer IVs. For integers, we strip and reinfer as needed, 2324 // see code in linearFunctionTestReplace. For pointers, we restrict 2325 // transforms as there is no good way to reinfer inbounds once lost. 2326 if (!Phi->getType()->isIntegerTy() && 2327 !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT)) 2328 continue; 2329 2330 const SCEV *Init = AR->getStart(); 2331 2332 if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { 2333 // Don't force a live loop counter if another IV can be used. 2334 if (AlmostDeadIV(Phi, LatchBlock, Cond)) 2335 continue; 2336 2337 // Prefer to count-from-zero. This is a more "canonical" counter form. It 2338 // also prefers integer to pointer IVs. 2339 if (BestInit->isZero() != Init->isZero()) { 2340 if (BestInit->isZero()) 2341 continue; 2342 } 2343 // If two IVs both count from zero or both count from nonzero then the 2344 // narrower is likely a dead phi that has been widened. Use the wider phi 2345 // to allow the other to be eliminated. 2346 else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType())) 2347 continue; 2348 } 2349 BestPhi = Phi; 2350 BestInit = Init; 2351 } 2352 return BestPhi; 2353 } 2354 2355 /// Insert an IR expression which computes the value held by the IV IndVar 2356 /// (which must be an loop counter w/unit stride) after the backedge of loop L 2357 /// is taken ExitCount times. 2358 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, 2359 const SCEV *ExitCount, bool UsePostInc, Loop *L, 2360 SCEVExpander &Rewriter, ScalarEvolution *SE) { 2361 assert(isLoopCounter(IndVar, L, SE)); 2362 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar)); 2363 const SCEV *IVInit = AR->getStart(); 2364 2365 // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter 2366 // finds a valid pointer IV. Sign extend ExitCount in order to materialize a 2367 // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing 2368 // the existing GEPs whenever possible. 2369 if (IndVar->getType()->isPointerTy() && 2370 !ExitCount->getType()->isPointerTy()) { 2371 // IVOffset will be the new GEP offset that is interpreted by GEP as a 2372 // signed value. ExitCount on the other hand represents the loop trip count, 2373 // which is an unsigned value. FindLoopCounter only allows induction 2374 // variables that have a positive unit stride of one. This means we don't 2375 // have to handle the case of negative offsets (yet) and just need to zero 2376 // extend ExitCount. 2377 Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); 2378 const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); 2379 if (UsePostInc) 2380 IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); 2381 2382 // Expand the code for the iteration count. 2383 assert(SE->isLoopInvariant(IVOffset, L) && 2384 "Computed iteration count is not loop invariant!"); 2385 2386 // We could handle pointer IVs other than i8*, but we need to compensate for 2387 // gep index scaling. 2388 assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()), 2389 cast<PointerType>(IndVar->getType()) 2390 ->getElementType())->isOne() && 2391 "unit stride pointer IV must be i8*"); 2392 2393 const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); 2394 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2395 return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); 2396 } else { 2397 // In any other case, convert both IVInit and ExitCount to integers before 2398 // comparing. This may result in SCEV expansion of pointers, but in practice 2399 // SCEV will fold the pointer arithmetic away as such: 2400 // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). 2401 // 2402 // Valid Cases: (1) both integers is most common; (2) both may be pointers 2403 // for simple memset-style loops. 2404 // 2405 // IVInit integer and ExitCount pointer would only occur if a canonical IV 2406 // were generated on top of case #2, which is not expected. 2407 2408 assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); 2409 // For unit stride, IVCount = Start + ExitCount with 2's complement 2410 // overflow. 2411 2412 // For integer IVs, truncate the IV before computing IVInit + BECount, 2413 // unless we know apriori that the limit must be a constant when evaluated 2414 // in the bitwidth of the IV. We prefer (potentially) keeping a truncate 2415 // of the IV in the loop over a (potentially) expensive expansion of the 2416 // widened exit count add(zext(add)) expression. 2417 if (SE->getTypeSizeInBits(IVInit->getType()) 2418 > SE->getTypeSizeInBits(ExitCount->getType())) { 2419 if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount)) 2420 ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); 2421 else 2422 IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); 2423 } 2424 2425 const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); 2426 2427 if (UsePostInc) 2428 IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); 2429 2430 // Expand the code for the iteration count. 2431 assert(SE->isLoopInvariant(IVLimit, L) && 2432 "Computed iteration count is not loop invariant!"); 2433 // Ensure that we generate the same type as IndVar, or a smaller integer 2434 // type. In the presence of null pointer values, we have an integer type 2435 // SCEV expression (IVInit) for a pointer type IV value (IndVar). 2436 Type *LimitTy = ExitCount->getType()->isPointerTy() ? 2437 IndVar->getType() : ExitCount->getType(); 2438 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2439 return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); 2440 } 2441 } 2442 2443 /// This method rewrites the exit condition of the loop to be a canonical != 2444 /// comparison against the incremented loop induction variable. This pass is 2445 /// able to rewrite the exit tests of any loop where the SCEV analysis can 2446 /// determine a loop-invariant trip count of the loop, which is actually a much 2447 /// broader range than just linear tests. 2448 bool IndVarSimplify:: 2449 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, 2450 const SCEV *ExitCount, 2451 PHINode *IndVar, SCEVExpander &Rewriter) { 2452 assert(L->getLoopLatch() && "Loop no longer in simplified form?"); 2453 assert(isLoopCounter(IndVar, L, SE)); 2454 Instruction * const IncVar = 2455 cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch())); 2456 2457 // Initialize CmpIndVar to the preincremented IV. 2458 Value *CmpIndVar = IndVar; 2459 bool UsePostInc = false; 2460 2461 // If the exiting block is the same as the backedge block, we prefer to 2462 // compare against the post-incremented value, otherwise we must compare 2463 // against the preincremented value. 2464 if (ExitingBB == L->getLoopLatch()) { 2465 // For pointer IVs, we chose to not strip inbounds which requires us not 2466 // to add a potentially UB introducing use. We need to either a) show 2467 // the loop test we're modifying is already in post-inc form, or b) show 2468 // that adding a use must not introduce UB. 2469 bool SafeToPostInc = 2470 IndVar->getType()->isIntegerTy() || 2471 isLoopExitTestBasedOn(IncVar, ExitingBB) || 2472 mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT); 2473 if (SafeToPostInc) { 2474 UsePostInc = true; 2475 CmpIndVar = IncVar; 2476 } 2477 } 2478 2479 // It may be necessary to drop nowrap flags on the incrementing instruction 2480 // if either LFTR moves from a pre-inc check to a post-inc check (in which 2481 // case the increment might have previously been poison on the last iteration 2482 // only) or if LFTR switches to a different IV that was previously dynamically 2483 // dead (and as such may be arbitrarily poison). We remove any nowrap flags 2484 // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc 2485 // check), because the pre-inc addrec flags may be adopted from the original 2486 // instruction, while SCEV has to explicitly prove the post-inc nowrap flags. 2487 // TODO: This handling is inaccurate for one case: If we switch to a 2488 // dynamically dead IV that wraps on the first loop iteration only, which is 2489 // not covered by the post-inc addrec. (If the new IV was not dynamically 2490 // dead, it could not be poison on the first iteration in the first place.) 2491 if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) { 2492 const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar)); 2493 if (BO->hasNoUnsignedWrap()) 2494 BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap()); 2495 if (BO->hasNoSignedWrap()) 2496 BO->setHasNoSignedWrap(AR->hasNoSignedWrap()); 2497 } 2498 2499 Value *ExitCnt = genLoopLimit( 2500 IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE); 2501 assert(ExitCnt->getType()->isPointerTy() == 2502 IndVar->getType()->isPointerTy() && 2503 "genLoopLimit missed a cast"); 2504 2505 // Insert a new icmp_ne or icmp_eq instruction before the branch. 2506 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2507 ICmpInst::Predicate P; 2508 if (L->contains(BI->getSuccessor(0))) 2509 P = ICmpInst::ICMP_NE; 2510 else 2511 P = ICmpInst::ICMP_EQ; 2512 2513 IRBuilder<> Builder(BI); 2514 2515 // The new loop exit condition should reuse the debug location of the 2516 // original loop exit condition. 2517 if (auto *Cond = dyn_cast<Instruction>(BI->getCondition())) 2518 Builder.SetCurrentDebugLocation(Cond->getDebugLoc()); 2519 2520 // For integer IVs, if we evaluated the limit in the narrower bitwidth to 2521 // avoid the expensive expansion of the limit expression in the wider type, 2522 // emit a truncate to narrow the IV to the ExitCount type. This is safe 2523 // since we know (from the exit count bitwidth), that we can't self-wrap in 2524 // the narrower type. 2525 unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType()); 2526 unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType()); 2527 if (CmpIndVarSize > ExitCntSize) { 2528 assert(!CmpIndVar->getType()->isPointerTy() && 2529 !ExitCnt->getType()->isPointerTy()); 2530 2531 // Before resorting to actually inserting the truncate, use the same 2532 // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend 2533 // the other side of the comparison instead. We still evaluate the limit 2534 // in the narrower bitwidth, we just prefer a zext/sext outside the loop to 2535 // a truncate within in. 2536 bool Extended = false; 2537 const SCEV *IV = SE->getSCEV(CmpIndVar); 2538 const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), 2539 ExitCnt->getType()); 2540 const SCEV *ZExtTrunc = 2541 SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); 2542 2543 if (ZExtTrunc == IV) { 2544 Extended = true; 2545 ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(), 2546 "wide.trip.count"); 2547 } else { 2548 const SCEV *SExtTrunc = 2549 SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType()); 2550 if (SExtTrunc == IV) { 2551 Extended = true; 2552 ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(), 2553 "wide.trip.count"); 2554 } 2555 } 2556 2557 if (Extended) { 2558 bool Discard; 2559 L->makeLoopInvariant(ExitCnt, Discard); 2560 } else 2561 CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), 2562 "lftr.wideiv"); 2563 } 2564 LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" 2565 << " LHS:" << *CmpIndVar << '\n' 2566 << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") 2567 << "\n" 2568 << " RHS:\t" << *ExitCnt << "\n" 2569 << "ExitCount:\t" << *ExitCount << "\n" 2570 << " was: " << *BI->getCondition() << "\n"); 2571 2572 Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); 2573 Value *OrigCond = BI->getCondition(); 2574 // It's tempting to use replaceAllUsesWith here to fully replace the old 2575 // comparison, but that's not immediately safe, since users of the old 2576 // comparison may not be dominated by the new comparison. Instead, just 2577 // update the branch to use the new comparison; in the common case this 2578 // will make old comparison dead. 2579 BI->setCondition(Cond); 2580 DeadInsts.push_back(OrigCond); 2581 2582 ++NumLFTR; 2583 return true; 2584 } 2585 2586 //===----------------------------------------------------------------------===// 2587 // sinkUnusedInvariants. A late subpass to cleanup loop preheaders. 2588 //===----------------------------------------------------------------------===// 2589 2590 /// If there's a single exit block, sink any loop-invariant values that 2591 /// were defined in the preheader but not used inside the loop into the 2592 /// exit block to reduce register pressure in the loop. 2593 bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { 2594 BasicBlock *ExitBlock = L->getExitBlock(); 2595 if (!ExitBlock) return false; 2596 2597 BasicBlock *Preheader = L->getLoopPreheader(); 2598 if (!Preheader) return false; 2599 2600 bool MadeAnyChanges = false; 2601 BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt(); 2602 BasicBlock::iterator I(Preheader->getTerminator()); 2603 while (I != Preheader->begin()) { 2604 --I; 2605 // New instructions were inserted at the end of the preheader. 2606 if (isa<PHINode>(I)) 2607 break; 2608 2609 // Don't move instructions which might have side effects, since the side 2610 // effects need to complete before instructions inside the loop. Also don't 2611 // move instructions which might read memory, since the loop may modify 2612 // memory. Note that it's okay if the instruction might have undefined 2613 // behavior: LoopSimplify guarantees that the preheader dominates the exit 2614 // block. 2615 if (I->mayHaveSideEffects() || I->mayReadFromMemory()) 2616 continue; 2617 2618 // Skip debug info intrinsics. 2619 if (isa<DbgInfoIntrinsic>(I)) 2620 continue; 2621 2622 // Skip eh pad instructions. 2623 if (I->isEHPad()) 2624 continue; 2625 2626 // Don't sink alloca: we never want to sink static alloca's out of the 2627 // entry block, and correctly sinking dynamic alloca's requires 2628 // checks for stacksave/stackrestore intrinsics. 2629 // FIXME: Refactor this check somehow? 2630 if (isa<AllocaInst>(I)) 2631 continue; 2632 2633 // Determine if there is a use in or before the loop (direct or 2634 // otherwise). 2635 bool UsedInLoop = false; 2636 for (Use &U : I->uses()) { 2637 Instruction *User = cast<Instruction>(U.getUser()); 2638 BasicBlock *UseBB = User->getParent(); 2639 if (PHINode *P = dyn_cast<PHINode>(User)) { 2640 unsigned i = 2641 PHINode::getIncomingValueNumForOperand(U.getOperandNo()); 2642 UseBB = P->getIncomingBlock(i); 2643 } 2644 if (UseBB == Preheader || L->contains(UseBB)) { 2645 UsedInLoop = true; 2646 break; 2647 } 2648 } 2649 2650 // If there is, the def must remain in the preheader. 2651 if (UsedInLoop) 2652 continue; 2653 2654 // Otherwise, sink it to the exit block. 2655 Instruction *ToMove = &*I; 2656 bool Done = false; 2657 2658 if (I != Preheader->begin()) { 2659 // Skip debug info intrinsics. 2660 do { 2661 --I; 2662 } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); 2663 2664 if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) 2665 Done = true; 2666 } else { 2667 Done = true; 2668 } 2669 2670 MadeAnyChanges = true; 2671 ToMove->moveBefore(*ExitBlock, InsertPt); 2672 if (Done) break; 2673 InsertPt = ToMove->getIterator(); 2674 } 2675 2676 return MadeAnyChanges; 2677 } 2678 2679 /// Return a symbolic upper bound for the backedge taken count of the loop. 2680 /// This is more general than getConstantMaxBackedgeTakenCount as it returns 2681 /// an arbitrary expression as opposed to only constants. 2682 /// TODO: Move into the ScalarEvolution class. 2683 static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, 2684 DominatorTree &DT, Loop *L) { 2685 SmallVector<BasicBlock*, 16> ExitingBlocks; 2686 L->getExitingBlocks(ExitingBlocks); 2687 2688 // Form an expression for the maximum exit count possible for this loop. We 2689 // merge the max and exact information to approximate a version of 2690 // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. 2691 SmallVector<const SCEV*, 4> ExitCounts; 2692 for (BasicBlock *ExitingBB : ExitingBlocks) { 2693 const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); 2694 if (isa<SCEVCouldNotCompute>(ExitCount)) 2695 ExitCount = SE.getExitCount(L, ExitingBB, 2696 ScalarEvolution::ConstantMaximum); 2697 if (!isa<SCEVCouldNotCompute>(ExitCount)) { 2698 assert(DT.dominates(ExitingBB, L->getLoopLatch()) && 2699 "We should only have known counts for exiting blocks that " 2700 "dominate latch!"); 2701 ExitCounts.push_back(ExitCount); 2702 } 2703 } 2704 if (ExitCounts.empty()) 2705 return SE.getCouldNotCompute(); 2706 return SE.getUMinFromMismatchedTypes(ExitCounts); 2707 } 2708 2709 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { 2710 SmallVector<BasicBlock*, 16> ExitingBlocks; 2711 L->getExitingBlocks(ExitingBlocks); 2712 2713 // Remove all exits which aren't both rewriteable and analyzeable. 2714 auto NewEnd = llvm::remove_if(ExitingBlocks, 2715 [&](BasicBlock *ExitingBB) { 2716 // If our exitting block exits multiple loops, we can only rewrite the 2717 // innermost one. Otherwise, we're changing how many times the innermost 2718 // loop runs before it exits. 2719 if (LI->getLoopFor(ExitingBB) != L) 2720 return true; 2721 2722 // Can't rewrite non-branch yet. 2723 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2724 if (!BI) 2725 return true; 2726 2727 // If already constant, nothing to do. 2728 if (isa<Constant>(BI->getCondition())) 2729 return true; 2730 2731 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2732 if (isa<SCEVCouldNotCompute>(ExitCount)) 2733 return true; 2734 return false; 2735 }); 2736 ExitingBlocks.erase(NewEnd, ExitingBlocks.end()); 2737 2738 if (ExitingBlocks.empty()) 2739 return false; 2740 2741 // Get a symbolic upper bound on the loop backedge taken count. 2742 const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); 2743 if (isa<SCEVCouldNotCompute>(MaxExitCount)) 2744 return false; 2745 2746 // Visit our exit blocks in order of dominance. We know from the fact that 2747 // all exits (left) are analyzeable that the must be a total dominance order 2748 // between them as each must dominate the latch. The visit order only 2749 // matters for the provably equal case. 2750 llvm::sort(ExitingBlocks, 2751 [&](BasicBlock *A, BasicBlock *B) { 2752 // std::sort sorts in ascending order, so we want the inverse of 2753 // the normal dominance relation. 2754 if (DT->properlyDominates(A, B)) return true; 2755 if (DT->properlyDominates(B, A)) return false; 2756 llvm_unreachable("expected total dominance order!"); 2757 }); 2758 #ifdef ASSERT 2759 for (unsigned i = 1; i < ExitingBlocks.size(); i++) { 2760 assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])); 2761 } 2762 #endif 2763 2764 auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) { 2765 BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2766 bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 2767 auto *OldCond = BI->getCondition(); 2768 auto *NewCond = ConstantInt::get(OldCond->getType(), 2769 IsTaken ? ExitIfTrue : !ExitIfTrue); 2770 BI->setCondition(NewCond); 2771 if (OldCond->use_empty()) 2772 DeadInsts.push_back(OldCond); 2773 }; 2774 2775 bool Changed = false; 2776 SmallSet<const SCEV*, 8> DominatingExitCounts; 2777 for (BasicBlock *ExitingBB : ExitingBlocks) { 2778 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2779 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above"); 2780 2781 // If we know we'd exit on the first iteration, rewrite the exit to 2782 // reflect this. This does not imply the loop must exit through this 2783 // exit; there may be an earlier one taken on the first iteration. 2784 // TODO: Given we know the backedge can't be taken, we should go ahead 2785 // and break it. Or at least, kill all the header phis and simplify. 2786 if (ExitCount->isZero()) { 2787 FoldExit(ExitingBB, true); 2788 Changed = true; 2789 continue; 2790 } 2791 2792 // If we end up with a pointer exit count, bail. Note that we can end up 2793 // with a pointer exit count for one exiting block, and not for another in 2794 // the same loop. 2795 if (!ExitCount->getType()->isIntegerTy() || 2796 !MaxExitCount->getType()->isIntegerTy()) 2797 continue; 2798 2799 Type *WiderType = 2800 SE->getWiderType(MaxExitCount->getType(), ExitCount->getType()); 2801 ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType); 2802 MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType); 2803 assert(MaxExitCount->getType() == ExitCount->getType()); 2804 2805 // Can we prove that some other exit must be taken strictly before this 2806 // one? 2807 if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, 2808 MaxExitCount, ExitCount)) { 2809 FoldExit(ExitingBB, false); 2810 Changed = true; 2811 continue; 2812 } 2813 2814 // As we run, keep track of which exit counts we've encountered. If we 2815 // find a duplicate, we've found an exit which would have exited on the 2816 // exiting iteration, but (from the visit order) strictly follows another 2817 // which does the same and is thus dead. 2818 if (!DominatingExitCounts.insert(ExitCount).second) { 2819 FoldExit(ExitingBB, false); 2820 Changed = true; 2821 continue; 2822 } 2823 2824 // TODO: There might be another oppurtunity to leverage SCEV's reasoning 2825 // here. If we kept track of the min of dominanting exits so far, we could 2826 // discharge exits with EC >= MDEC. This is less powerful than the existing 2827 // transform (since later exits aren't considered), but potentially more 2828 // powerful for any case where SCEV can prove a >=u b, but neither a == b 2829 // or a >u b. Such a case is not currently known. 2830 } 2831 return Changed; 2832 } 2833 2834 bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 2835 SmallVector<BasicBlock*, 16> ExitingBlocks; 2836 L->getExitingBlocks(ExitingBlocks); 2837 2838 bool Changed = false; 2839 2840 // Finally, see if we can rewrite our exit conditions into a loop invariant 2841 // form. If we have a read-only loop, and we can tell that we must exit down 2842 // a path which does not need any of the values computed within the loop, we 2843 // can rewrite the loop to exit on the first iteration. Note that this 2844 // doesn't either a) tell us the loop exits on the first iteration (unless 2845 // *all* exits are predicateable) or b) tell us *which* exit might be taken. 2846 // This transformation looks a lot like a restricted form of dead loop 2847 // elimination, but restricted to read-only loops and without neccesssarily 2848 // needing to kill the loop entirely. 2849 if (!LoopPredication) 2850 return Changed; 2851 2852 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 2853 return Changed; 2854 2855 // Note: ExactBTC is the exact backedge taken count *iff* the loop exits 2856 // through *explicit* control flow. We have to eliminate the possibility of 2857 // implicit exits (see below) before we know it's truly exact. 2858 const SCEV *ExactBTC = SE->getBackedgeTakenCount(L); 2859 if (isa<SCEVCouldNotCompute>(ExactBTC) || 2860 !SE->isLoopInvariant(ExactBTC, L) || 2861 !isSafeToExpand(ExactBTC, *SE)) 2862 return Changed; 2863 2864 // If we end up with a pointer exit count, bail. It may be unsized. 2865 if (!ExactBTC->getType()->isIntegerTy()) 2866 return Changed; 2867 2868 auto BadExit = [&](BasicBlock *ExitingBB) { 2869 // If our exiting block exits multiple loops, we can only rewrite the 2870 // innermost one. Otherwise, we're changing how many times the innermost 2871 // loop runs before it exits. 2872 if (LI->getLoopFor(ExitingBB) != L) 2873 return true; 2874 2875 // Can't rewrite non-branch yet. 2876 BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 2877 if (!BI) 2878 return true; 2879 2880 // If already constant, nothing to do. 2881 if (isa<Constant>(BI->getCondition())) 2882 return true; 2883 2884 // If the exit block has phis, we need to be able to compute the values 2885 // within the loop which contains them. This assumes trivially lcssa phis 2886 // have already been removed; TODO: generalize 2887 BasicBlock *ExitBlock = 2888 BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0); 2889 if (!ExitBlock->phis().empty()) 2890 return true; 2891 2892 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2893 assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count"); 2894 if (!SE->isLoopInvariant(ExitCount, L) || 2895 !isSafeToExpand(ExitCount, *SE)) 2896 return true; 2897 2898 // If we end up with a pointer exit count, bail. It may be unsized. 2899 if (!ExitCount->getType()->isIntegerTy()) 2900 return true; 2901 2902 return false; 2903 }; 2904 2905 // If we have any exits which can't be predicated themselves, than we can't 2906 // predicate any exit which isn't guaranteed to execute before it. Consider 2907 // two exits (a) and (b) which would both exit on the same iteration. If we 2908 // can predicate (b), but not (a), and (a) preceeds (b) along some path, then 2909 // we could convert a loop from exiting through (a) to one exiting through 2910 // (b). Note that this problem exists only for exits with the same exit 2911 // count, and we could be more aggressive when exit counts are known inequal. 2912 llvm::sort(ExitingBlocks, 2913 [&](BasicBlock *A, BasicBlock *B) { 2914 // std::sort sorts in ascending order, so we want the inverse of 2915 // the normal dominance relation, plus a tie breaker for blocks 2916 // unordered by dominance. 2917 if (DT->properlyDominates(A, B)) return true; 2918 if (DT->properlyDominates(B, A)) return false; 2919 return A->getName() < B->getName(); 2920 }); 2921 // Check to see if our exit blocks are a total order (i.e. a linear chain of 2922 // exits before the backedge). If they aren't, reasoning about reachability 2923 // is complicated and we choose not to for now. 2924 for (unsigned i = 1; i < ExitingBlocks.size(); i++) 2925 if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i])) 2926 return Changed; 2927 2928 // Given our sorted total order, we know that exit[j] must be evaluated 2929 // after all exit[i] such j > i. 2930 for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++) 2931 if (BadExit(ExitingBlocks[i])) { 2932 ExitingBlocks.resize(i); 2933 break; 2934 } 2935 2936 if (ExitingBlocks.empty()) 2937 return Changed; 2938 2939 // We rely on not being able to reach an exiting block on a later iteration 2940 // then it's statically compute exit count. The implementaton of 2941 // getExitCount currently has this invariant, but assert it here so that 2942 // breakage is obvious if this ever changes.. 2943 assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) { 2944 return DT->dominates(ExitingBB, L->getLoopLatch()); 2945 })); 2946 2947 // At this point, ExitingBlocks consists of only those blocks which are 2948 // predicatable. Given that, we know we have at least one exit we can 2949 // predicate if the loop is doesn't have side effects and doesn't have any 2950 // implicit exits (because then our exact BTC isn't actually exact). 2951 // @Reviewers - As structured, this is O(I^2) for loop nests. Any 2952 // suggestions on how to improve this? I can obviously bail out for outer 2953 // loops, but that seems less than ideal. MemorySSA can find memory writes, 2954 // is that enough for *all* side effects? 2955 for (BasicBlock *BB : L->blocks()) 2956 for (auto &I : *BB) 2957 // TODO:isGuaranteedToTransfer 2958 if (I.mayHaveSideEffects() || I.mayThrow()) 2959 return Changed; 2960 2961 // Finally, do the actual predication for all predicatable blocks. A couple 2962 // of notes here: 2963 // 1) We don't bother to constant fold dominated exits with identical exit 2964 // counts; that's simply a form of CSE/equality propagation and we leave 2965 // it for dedicated passes. 2966 // 2) We insert the comparison at the branch. Hoisting introduces additional 2967 // legality constraints and we leave that to dedicated logic. We want to 2968 // predicate even if we can't insert a loop invariant expression as 2969 // peeling or unrolling will likely reduce the cost of the otherwise loop 2970 // varying check. 2971 Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator()); 2972 IRBuilder<> B(L->getLoopPreheader()->getTerminator()); 2973 Value *ExactBTCV = nullptr; // Lazily generated if needed. 2974 for (BasicBlock *ExitingBB : ExitingBlocks) { 2975 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 2976 2977 auto *BI = cast<BranchInst>(ExitingBB->getTerminator()); 2978 Value *NewCond; 2979 if (ExitCount == ExactBTC) { 2980 NewCond = L->contains(BI->getSuccessor(0)) ? 2981 B.getFalse() : B.getTrue(); 2982 } else { 2983 Value *ECV = Rewriter.expandCodeFor(ExitCount); 2984 if (!ExactBTCV) 2985 ExactBTCV = Rewriter.expandCodeFor(ExactBTC); 2986 Value *RHS = ExactBTCV; 2987 if (ECV->getType() != RHS->getType()) { 2988 Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 2989 ECV = B.CreateZExt(ECV, WiderTy); 2990 RHS = B.CreateZExt(RHS, WiderTy); 2991 } 2992 auto Pred = L->contains(BI->getSuccessor(0)) ? 2993 ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 2994 NewCond = B.CreateICmp(Pred, ECV, RHS); 2995 } 2996 Value *OldCond = BI->getCondition(); 2997 BI->setCondition(NewCond); 2998 if (OldCond->use_empty()) 2999 DeadInsts.push_back(OldCond); 3000 Changed = true; 3001 } 3002 3003 return Changed; 3004 } 3005 3006 //===----------------------------------------------------------------------===// 3007 // IndVarSimplify driver. Manage several subpasses of IV simplification. 3008 //===----------------------------------------------------------------------===// 3009 3010 bool IndVarSimplify::run(Loop *L) { 3011 // We need (and expect!) the incoming loop to be in LCSSA. 3012 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 3013 "LCSSA required to run indvars!"); 3014 bool Changed = false; 3015 3016 // If LoopSimplify form is not available, stay out of trouble. Some notes: 3017 // - LSR currently only supports LoopSimplify-form loops. Indvars' 3018 // canonicalization can be a pessimization without LSR to "clean up" 3019 // afterwards. 3020 // - We depend on having a preheader; in particular, 3021 // Loop::getCanonicalInductionVariable only supports loops with preheaders, 3022 // and we're in trouble if we can't find the induction variable even when 3023 // we've manually inserted one. 3024 // - LFTR relies on having a single backedge. 3025 if (!L->isLoopSimplifyForm()) 3026 return false; 3027 3028 #ifndef NDEBUG 3029 // Used below for a consistency check only 3030 const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); 3031 #endif 3032 3033 // If there are any floating-point recurrences, attempt to 3034 // transform them to use integer recurrences. 3035 Changed |= rewriteNonIntegerIVs(L); 3036 3037 // Create a rewriter object which we'll use to transform the code with. 3038 SCEVExpander Rewriter(*SE, DL, "indvars"); 3039 #ifndef NDEBUG 3040 Rewriter.setDebugType(DEBUG_TYPE); 3041 #endif 3042 3043 // Eliminate redundant IV users. 3044 // 3045 // Simplification works best when run before other consumers of SCEV. We 3046 // attempt to avoid evaluating SCEVs for sign/zero extend operations until 3047 // other expressions involving loop IVs have been evaluated. This helps SCEV 3048 // set no-wrap flags before normalizing sign/zero extension. 3049 Rewriter.disableCanonicalMode(); 3050 Changed |= simplifyAndExtend(L, Rewriter, LI); 3051 3052 // Check to see if we can compute the final value of any expressions 3053 // that are recurrent in the loop, and substitute the exit values from the 3054 // loop into any instructions outside of the loop that use the final values 3055 // of the current expressions. 3056 if (ReplaceExitValue != NeverRepl) 3057 Changed |= rewriteLoopExitValues(L, Rewriter); 3058 3059 // Eliminate redundant IV cycles. 3060 NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); 3061 3062 // Try to eliminate loop exits based on analyzeable exit counts 3063 if (optimizeLoopExits(L, Rewriter)) { 3064 Changed = true; 3065 // Given we've changed exit counts, notify SCEV 3066 SE->forgetLoop(L); 3067 } 3068 3069 // Try to form loop invariant tests for loop exits by changing how many 3070 // iterations of the loop run when that is unobservable. 3071 if (predicateLoopExits(L, Rewriter)) { 3072 Changed = true; 3073 // Given we've changed exit counts, notify SCEV 3074 SE->forgetLoop(L); 3075 } 3076 3077 // If we have a trip count expression, rewrite the loop's exit condition 3078 // using it. 3079 if (!DisableLFTR) { 3080 SmallVector<BasicBlock*, 16> ExitingBlocks; 3081 L->getExitingBlocks(ExitingBlocks); 3082 for (BasicBlock *ExitingBB : ExitingBlocks) { 3083 // Can't rewrite non-branch yet. 3084 if (!isa<BranchInst>(ExitingBB->getTerminator())) 3085 continue; 3086 3087 // If our exitting block exits multiple loops, we can only rewrite the 3088 // innermost one. Otherwise, we're changing how many times the innermost 3089 // loop runs before it exits. 3090 if (LI->getLoopFor(ExitingBB) != L) 3091 continue; 3092 3093 if (!needsLFTR(L, ExitingBB)) 3094 continue; 3095 3096 const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 3097 if (isa<SCEVCouldNotCompute>(ExitCount)) 3098 continue; 3099 3100 // This was handled above, but as we form SCEVs, we can sometimes refine 3101 // existing ones; this allows exit counts to be folded to zero which 3102 // weren't when optimizeLoopExits saw them. Arguably, we should iterate 3103 // until stable to handle cases like this better. 3104 if (ExitCount->isZero()) 3105 continue; 3106 3107 PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT); 3108 if (!IndVar) 3109 continue; 3110 3111 // Avoid high cost expansions. Note: This heuristic is questionable in 3112 // that our definition of "high cost" is not exactly principled. 3113 if (Rewriter.isHighCostExpansion(ExitCount, L)) 3114 continue; 3115 3116 // Check preconditions for proper SCEVExpander operation. SCEV does not 3117 // express SCEVExpander's dependencies, such as LoopSimplify. Instead 3118 // any pass that uses the SCEVExpander must do it. This does not work 3119 // well for loop passes because SCEVExpander makes assumptions about 3120 // all loops, while LoopPassManager only forces the current loop to be 3121 // simplified. 3122 // 3123 // FIXME: SCEV expansion has no way to bail out, so the caller must 3124 // explicitly check any assumptions made by SCEV. Brittle. 3125 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount); 3126 if (!AR || AR->getLoop()->getLoopPreheader()) 3127 Changed |= linearFunctionTestReplace(L, ExitingBB, 3128 ExitCount, IndVar, 3129 Rewriter); 3130 } 3131 } 3132 // Clear the rewriter cache, because values that are in the rewriter's cache 3133 // can be deleted in the loop below, causing the AssertingVH in the cache to 3134 // trigger. 3135 Rewriter.clear(); 3136 3137 // Now that we're done iterating through lists, clean up any instructions 3138 // which are now dead. 3139 while (!DeadInsts.empty()) 3140 if (Instruction *Inst = 3141 dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) 3142 Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); 3143 3144 // The Rewriter may not be used from this point on. 3145 3146 // Loop-invariant instructions in the preheader that aren't used in the 3147 // loop may be sunk below the loop to reduce register pressure. 3148 Changed |= sinkUnusedInvariants(L); 3149 3150 // rewriteFirstIterationLoopExitValues does not rely on the computation of 3151 // trip count and therefore can further simplify exit values in addition to 3152 // rewriteLoopExitValues. 3153 Changed |= rewriteFirstIterationLoopExitValues(L); 3154 3155 // Clean up dead instructions. 3156 Changed |= DeleteDeadPHIs(L->getHeader(), TLI); 3157 3158 // Check a post-condition. 3159 assert(L->isRecursivelyLCSSAForm(*DT, *LI) && 3160 "Indvars did not preserve LCSSA!"); 3161 3162 // Verify that LFTR, and any other change have not interfered with SCEV's 3163 // ability to compute trip count. We may have *changed* the exit count, but 3164 // only by reducing it. 3165 #ifndef NDEBUG 3166 if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { 3167 SE->forgetLoop(L); 3168 const SCEV *NewBECount = SE->getBackedgeTakenCount(L); 3169 if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < 3170 SE->getTypeSizeInBits(NewBECount->getType())) 3171 NewBECount = SE->getTruncateOrNoop(NewBECount, 3172 BackedgeTakenCount->getType()); 3173 else 3174 BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, 3175 NewBECount->getType()); 3176 assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount, 3177 NewBECount) && "indvars must preserve SCEV"); 3178 } 3179 #endif 3180 3181 return Changed; 3182 } 3183 3184 PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, 3185 LoopStandardAnalysisResults &AR, 3186 LPMUpdater &) { 3187 Function *F = L.getHeader()->getParent(); 3188 const DataLayout &DL = F->getParent()->getDataLayout(); 3189 3190 IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI); 3191 if (!IVS.run(&L)) 3192 return PreservedAnalyses::all(); 3193 3194 auto PA = getLoopPassPreservedAnalyses(); 3195 PA.preserveSet<CFGAnalyses>(); 3196 return PA; 3197 } 3198 3199 namespace { 3200 3201 struct IndVarSimplifyLegacyPass : public LoopPass { 3202 static char ID; // Pass identification, replacement for typeid 3203 3204 IndVarSimplifyLegacyPass() : LoopPass(ID) { 3205 initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); 3206 } 3207 3208 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 3209 if (skipLoop(L)) 3210 return false; 3211 3212 auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 3213 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 3214 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 3215 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); 3216 auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr; 3217 auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 3218 auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr; 3219 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 3220 3221 IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI); 3222 return IVS.run(L); 3223 } 3224 3225 void getAnalysisUsage(AnalysisUsage &AU) const override { 3226 AU.setPreservesCFG(); 3227 getLoopAnalysisUsage(AU); 3228 } 3229 }; 3230 3231 } // end anonymous namespace 3232 3233 char IndVarSimplifyLegacyPass::ID = 0; 3234 3235 INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", 3236 "Induction Variable Simplification", false, false) 3237 INITIALIZE_PASS_DEPENDENCY(LoopPass) 3238 INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars", 3239 "Induction Variable Simplification", false, false) 3240 3241 Pass *llvm::createIndVarSimplifyPass() { 3242 return new IndVarSimplifyLegacyPass(); 3243 } 3244