10b57cec5SDimitry Andric //===-- LoopPredication.cpp - Guard based loop predication pass -----------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // The LoopPredication pass tries to convert loop variant range checks to loop 100b57cec5SDimitry Andric // invariant by widening checks across loop iterations. For example, it will 110b57cec5SDimitry Andric // convert 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // for (i = 0; i < n; i++) { 140b57cec5SDimitry Andric // guard(i < len); 150b57cec5SDimitry Andric // ... 160b57cec5SDimitry Andric // } 170b57cec5SDimitry Andric // 180b57cec5SDimitry Andric // to 190b57cec5SDimitry Andric // 200b57cec5SDimitry Andric // for (i = 0; i < n; i++) { 210b57cec5SDimitry Andric // guard(n - 1 < len); 220b57cec5SDimitry Andric // ... 230b57cec5SDimitry Andric // } 240b57cec5SDimitry Andric // 250b57cec5SDimitry Andric // After this transformation the condition of the guard is loop invariant, so 260b57cec5SDimitry Andric // loop-unswitch can later unswitch the loop by this condition which basically 270b57cec5SDimitry Andric // predicates the loop by the widened condition: 280b57cec5SDimitry Andric // 290b57cec5SDimitry Andric // if (n - 1 < len) 300b57cec5SDimitry Andric // for (i = 0; i < n; i++) { 310b57cec5SDimitry Andric // ... 320b57cec5SDimitry Andric // } 330b57cec5SDimitry Andric // else 340b57cec5SDimitry Andric // deoptimize 350b57cec5SDimitry Andric // 360b57cec5SDimitry Andric // It's tempting to rely on SCEV here, but it has proven to be problematic. 370b57cec5SDimitry Andric // Generally the facts SCEV provides about the increment step of add 380b57cec5SDimitry Andric // recurrences are true if the backedge of the loop is taken, which implicitly 390b57cec5SDimitry Andric // assumes that the guard doesn't fail. Using these facts to optimize the 400b57cec5SDimitry Andric // guard results in a circular logic where the guard is optimized under the 410b57cec5SDimitry Andric // assumption that it never fails. 420b57cec5SDimitry Andric // 430b57cec5SDimitry Andric // For example, in the loop below the induction variable will be marked as nuw 440b57cec5SDimitry Andric // basing on the guard. Basing on nuw the guard predicate will be considered 450b57cec5SDimitry Andric // monotonic. Given a monotonic condition it's tempting to replace the induction 460b57cec5SDimitry Andric // variable in the condition with its value on the last iteration. But this 470b57cec5SDimitry Andric // transformation is not correct, e.g. e = 4, b = 5 breaks the loop. 480b57cec5SDimitry Andric // 490b57cec5SDimitry Andric // for (int i = b; i != e; i++) 500b57cec5SDimitry Andric // guard(i u< len) 510b57cec5SDimitry Andric // 520b57cec5SDimitry Andric // One of the ways to reason about this problem is to use an inductive proof 530b57cec5SDimitry Andric // approach. Given the loop: 540b57cec5SDimitry Andric // 550b57cec5SDimitry Andric // if (B(0)) { 560b57cec5SDimitry Andric // do { 570b57cec5SDimitry Andric // I = PHI(0, I.INC) 580b57cec5SDimitry Andric // I.INC = I + Step 590b57cec5SDimitry Andric // guard(G(I)); 600b57cec5SDimitry Andric // } while (B(I)); 610b57cec5SDimitry Andric // } 620b57cec5SDimitry Andric // 630b57cec5SDimitry Andric // where B(x) and G(x) are predicates that map integers to booleans, we want a 640b57cec5SDimitry Andric // loop invariant expression M such the following program has the same semantics 650b57cec5SDimitry Andric // as the above: 660b57cec5SDimitry Andric // 670b57cec5SDimitry Andric // if (B(0)) { 680b57cec5SDimitry Andric // do { 690b57cec5SDimitry Andric // I = PHI(0, I.INC) 700b57cec5SDimitry Andric // I.INC = I + Step 710b57cec5SDimitry Andric // guard(G(0) && M); 720b57cec5SDimitry Andric // } while (B(I)); 730b57cec5SDimitry Andric // } 740b57cec5SDimitry Andric // 750b57cec5SDimitry Andric // One solution for M is M = forall X . (G(X) && B(X)) => G(X + Step) 760b57cec5SDimitry Andric // 770b57cec5SDimitry Andric // Informal proof that the transformation above is correct: 780b57cec5SDimitry Andric // 790b57cec5SDimitry Andric // By the definition of guards we can rewrite the guard condition to: 800b57cec5SDimitry Andric // G(I) && G(0) && M 810b57cec5SDimitry Andric // 820b57cec5SDimitry Andric // Let's prove that for each iteration of the loop: 830b57cec5SDimitry Andric // G(0) && M => G(I) 840b57cec5SDimitry Andric // And the condition above can be simplified to G(Start) && M. 850b57cec5SDimitry Andric // 860b57cec5SDimitry Andric // Induction base. 870b57cec5SDimitry Andric // G(0) && M => G(0) 880b57cec5SDimitry Andric // 890b57cec5SDimitry Andric // Induction step. Assuming G(0) && M => G(I) on the subsequent 900b57cec5SDimitry Andric // iteration: 910b57cec5SDimitry Andric // 920b57cec5SDimitry Andric // B(I) is true because it's the backedge condition. 930b57cec5SDimitry Andric // G(I) is true because the backedge is guarded by this condition. 940b57cec5SDimitry Andric // 950b57cec5SDimitry Andric // So M = forall X . (G(X) && B(X)) => G(X + Step) implies G(I + Step). 960b57cec5SDimitry Andric // 970b57cec5SDimitry Andric // Note that we can use anything stronger than M, i.e. any condition which 980b57cec5SDimitry Andric // implies M. 990b57cec5SDimitry Andric // 1000b57cec5SDimitry Andric // When S = 1 (i.e. forward iterating loop), the transformation is supported 1010b57cec5SDimitry Andric // when: 1020b57cec5SDimitry Andric // * The loop has a single latch with the condition of the form: 1030b57cec5SDimitry Andric // B(X) = latchStart + X <pred> latchLimit, 1040b57cec5SDimitry Andric // where <pred> is u<, u<=, s<, or s<=. 1050b57cec5SDimitry Andric // * The guard condition is of the form 1060b57cec5SDimitry Andric // G(X) = guardStart + X u< guardLimit 1070b57cec5SDimitry Andric // 1080b57cec5SDimitry Andric // For the ult latch comparison case M is: 1090b57cec5SDimitry Andric // forall X . guardStart + X u< guardLimit && latchStart + X <u latchLimit => 1100b57cec5SDimitry Andric // guardStart + X + 1 u< guardLimit 1110b57cec5SDimitry Andric // 1120b57cec5SDimitry Andric // The only way the antecedent can be true and the consequent can be false is 1130b57cec5SDimitry Andric // if 1140b57cec5SDimitry Andric // X == guardLimit - 1 - guardStart 1150b57cec5SDimitry Andric // (and guardLimit is non-zero, but we won't use this latter fact). 1160b57cec5SDimitry Andric // If X == guardLimit - 1 - guardStart then the second half of the antecedent is 1170b57cec5SDimitry Andric // latchStart + guardLimit - 1 - guardStart u< latchLimit 1180b57cec5SDimitry Andric // and its negation is 1190b57cec5SDimitry Andric // latchStart + guardLimit - 1 - guardStart u>= latchLimit 1200b57cec5SDimitry Andric // 1210b57cec5SDimitry Andric // In other words, if 1220b57cec5SDimitry Andric // latchLimit u<= latchStart + guardLimit - 1 - guardStart 1230b57cec5SDimitry Andric // then: 1240b57cec5SDimitry Andric // (the ranges below are written in ConstantRange notation, where [A, B) is the 1250b57cec5SDimitry Andric // set for (I = A; I != B; I++ /*maywrap*/) yield(I);) 1260b57cec5SDimitry Andric // 1270b57cec5SDimitry Andric // forall X . guardStart + X u< guardLimit && 1280b57cec5SDimitry Andric // latchStart + X u< latchLimit => 1290b57cec5SDimitry Andric // guardStart + X + 1 u< guardLimit 1300b57cec5SDimitry Andric // == forall X . guardStart + X u< guardLimit && 1310b57cec5SDimitry Andric // latchStart + X u< latchStart + guardLimit - 1 - guardStart => 1320b57cec5SDimitry Andric // guardStart + X + 1 u< guardLimit 1330b57cec5SDimitry Andric // == forall X . (guardStart + X) in [0, guardLimit) && 1340b57cec5SDimitry Andric // (latchStart + X) in [0, latchStart + guardLimit - 1 - guardStart) => 1350b57cec5SDimitry Andric // (guardStart + X + 1) in [0, guardLimit) 1360b57cec5SDimitry Andric // == forall X . X in [-guardStart, guardLimit - guardStart) && 1370b57cec5SDimitry Andric // X in [-latchStart, guardLimit - 1 - guardStart) => 1380b57cec5SDimitry Andric // X in [-guardStart - 1, guardLimit - guardStart - 1) 1390b57cec5SDimitry Andric // == true 1400b57cec5SDimitry Andric // 1410b57cec5SDimitry Andric // So the widened condition is: 1420b57cec5SDimitry Andric // guardStart u< guardLimit && 1430b57cec5SDimitry Andric // latchStart + guardLimit - 1 - guardStart u>= latchLimit 1440b57cec5SDimitry Andric // Similarly for ule condition the widened condition is: 1450b57cec5SDimitry Andric // guardStart u< guardLimit && 1460b57cec5SDimitry Andric // latchStart + guardLimit - 1 - guardStart u> latchLimit 1470b57cec5SDimitry Andric // For slt condition the widened condition is: 1480b57cec5SDimitry Andric // guardStart u< guardLimit && 1490b57cec5SDimitry Andric // latchStart + guardLimit - 1 - guardStart s>= latchLimit 1500b57cec5SDimitry Andric // For sle condition the widened condition is: 1510b57cec5SDimitry Andric // guardStart u< guardLimit && 1520b57cec5SDimitry Andric // latchStart + guardLimit - 1 - guardStart s> latchLimit 1530b57cec5SDimitry Andric // 1540b57cec5SDimitry Andric // When S = -1 (i.e. reverse iterating loop), the transformation is supported 1550b57cec5SDimitry Andric // when: 1560b57cec5SDimitry Andric // * The loop has a single latch with the condition of the form: 1570b57cec5SDimitry Andric // B(X) = X <pred> latchLimit, where <pred> is u>, u>=, s>, or s>=. 1580b57cec5SDimitry Andric // * The guard condition is of the form 1590b57cec5SDimitry Andric // G(X) = X - 1 u< guardLimit 1600b57cec5SDimitry Andric // 1610b57cec5SDimitry Andric // For the ugt latch comparison case M is: 1620b57cec5SDimitry Andric // forall X. X-1 u< guardLimit and X u> latchLimit => X-2 u< guardLimit 1630b57cec5SDimitry Andric // 1640b57cec5SDimitry Andric // The only way the antecedent can be true and the consequent can be false is if 1650b57cec5SDimitry Andric // X == 1. 1660b57cec5SDimitry Andric // If X == 1 then the second half of the antecedent is 1670b57cec5SDimitry Andric // 1 u> latchLimit, and its negation is latchLimit u>= 1. 1680b57cec5SDimitry Andric // 1690b57cec5SDimitry Andric // So the widened condition is: 1700b57cec5SDimitry Andric // guardStart u< guardLimit && latchLimit u>= 1. 1710b57cec5SDimitry Andric // Similarly for sgt condition the widened condition is: 1720b57cec5SDimitry Andric // guardStart u< guardLimit && latchLimit s>= 1. 1730b57cec5SDimitry Andric // For uge condition the widened condition is: 1740b57cec5SDimitry Andric // guardStart u< guardLimit && latchLimit u> 1. 1750b57cec5SDimitry Andric // For sge condition the widened condition is: 1760b57cec5SDimitry Andric // guardStart u< guardLimit && latchLimit s> 1. 1770b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 1780b57cec5SDimitry Andric 1790b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopPredication.h" 1800b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 1810b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 1820b57cec5SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h" 1830b57cec5SDimitry Andric #include "llvm/Analysis/GuardUtils.h" 1840b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 1850b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h" 186349cc55cSDimitry Andric #include "llvm/Analysis/MemorySSA.h" 187349cc55cSDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h" 1880b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 1890b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 1900b57cec5SDimitry Andric #include "llvm/IR/Function.h" 1910b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 1920b57cec5SDimitry Andric #include "llvm/IR/Module.h" 1930b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 194bdd1243dSDimitry Andric #include "llvm/IR/ProfDataUtils.h" 195480093f4SDimitry Andric #include "llvm/InitializePasses.h" 1960b57cec5SDimitry Andric #include "llvm/Pass.h" 197480093f4SDimitry Andric #include "llvm/Support/CommandLine.h" 1980b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 1990b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 200480093f4SDimitry Andric #include "llvm/Transforms/Utils/GuardUtils.h" 2010b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 2020b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 2035ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 204bdd1243dSDimitry Andric #include <optional> 2050b57cec5SDimitry Andric 2060b57cec5SDimitry Andric #define DEBUG_TYPE "loop-predication" 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric STATISTIC(TotalConsidered, "Number of guards considered"); 2090b57cec5SDimitry Andric STATISTIC(TotalWidened, "Number of checks widened"); 2100b57cec5SDimitry Andric 2110b57cec5SDimitry Andric using namespace llvm; 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric static cl::opt<bool> EnableIVTruncation("loop-predication-enable-iv-truncation", 2140b57cec5SDimitry Andric cl::Hidden, cl::init(true)); 2150b57cec5SDimitry Andric 2160b57cec5SDimitry Andric static cl::opt<bool> EnableCountDownLoop("loop-predication-enable-count-down-loop", 2170b57cec5SDimitry Andric cl::Hidden, cl::init(true)); 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric static cl::opt<bool> 2200b57cec5SDimitry Andric SkipProfitabilityChecks("loop-predication-skip-profitability-checks", 2210b57cec5SDimitry Andric cl::Hidden, cl::init(false)); 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric // This is the scale factor for the latch probability. We use this during 2240b57cec5SDimitry Andric // profitability analysis to find other exiting blocks that have a much higher 2250b57cec5SDimitry Andric // probability of exiting the loop instead of loop exiting via latch. 2260b57cec5SDimitry Andric // This value should be greater than 1 for a sane profitability check. 2270b57cec5SDimitry Andric static cl::opt<float> LatchExitProbabilityScale( 2280b57cec5SDimitry Andric "loop-predication-latch-probability-scale", cl::Hidden, cl::init(2.0), 2290b57cec5SDimitry Andric cl::desc("scale factor for the latch probability. Value should be greater " 2300b57cec5SDimitry Andric "than 1. Lower values are ignored")); 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric static cl::opt<bool> PredicateWidenableBranchGuards( 2330b57cec5SDimitry Andric "loop-predication-predicate-widenable-branches-to-deopt", cl::Hidden, 2340b57cec5SDimitry Andric cl::desc("Whether or not we should predicate guards " 2350b57cec5SDimitry Andric "expressed as widenable branches to deoptimize blocks"), 2360b57cec5SDimitry Andric cl::init(true)); 2370b57cec5SDimitry Andric 238bdd1243dSDimitry Andric static cl::opt<bool> InsertAssumesOfPredicatedGuardsConditions( 239bdd1243dSDimitry Andric "loop-predication-insert-assumes-of-predicated-guards-conditions", 240bdd1243dSDimitry Andric cl::Hidden, 241bdd1243dSDimitry Andric cl::desc("Whether or not we should insert assumes of conditions of " 242bdd1243dSDimitry Andric "predicated guards"), 243bdd1243dSDimitry Andric cl::init(true)); 244bdd1243dSDimitry Andric 2450b57cec5SDimitry Andric namespace { 2460b57cec5SDimitry Andric /// Represents an induction variable check: 2470b57cec5SDimitry Andric /// icmp Pred, <induction variable>, <loop invariant limit> 2480b57cec5SDimitry Andric struct LoopICmp { 2490b57cec5SDimitry Andric ICmpInst::Predicate Pred; 2500b57cec5SDimitry Andric const SCEVAddRecExpr *IV; 2510b57cec5SDimitry Andric const SCEV *Limit; 2520b57cec5SDimitry Andric LoopICmp(ICmpInst::Predicate Pred, const SCEVAddRecExpr *IV, 2530b57cec5SDimitry Andric const SCEV *Limit) 2540b57cec5SDimitry Andric : Pred(Pred), IV(IV), Limit(Limit) {} 25581ad6265SDimitry Andric LoopICmp() = default; 2560b57cec5SDimitry Andric void dump() { 2570b57cec5SDimitry Andric dbgs() << "LoopICmp Pred = " << Pred << ", IV = " << *IV 2580b57cec5SDimitry Andric << ", Limit = " << *Limit << "\n"; 2590b57cec5SDimitry Andric } 2600b57cec5SDimitry Andric }; 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric class LoopPredication { 2630b57cec5SDimitry Andric AliasAnalysis *AA; 264480093f4SDimitry Andric DominatorTree *DT; 2650b57cec5SDimitry Andric ScalarEvolution *SE; 266480093f4SDimitry Andric LoopInfo *LI; 267349cc55cSDimitry Andric MemorySSAUpdater *MSSAU; 2680b57cec5SDimitry Andric 2690b57cec5SDimitry Andric Loop *L; 2700b57cec5SDimitry Andric const DataLayout *DL; 2710b57cec5SDimitry Andric BasicBlock *Preheader; 2720b57cec5SDimitry Andric LoopICmp LatchCheck; 2730b57cec5SDimitry Andric 2740b57cec5SDimitry Andric bool isSupportedStep(const SCEV* Step); 275bdd1243dSDimitry Andric std::optional<LoopICmp> parseLoopICmp(ICmpInst *ICI); 276bdd1243dSDimitry Andric std::optional<LoopICmp> parseLoopLatchICmp(); 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric /// Return an insertion point suitable for inserting a safe to speculate 2790b57cec5SDimitry Andric /// instruction whose only user will be 'User' which has operands 'Ops'. A 2800b57cec5SDimitry Andric /// trivial result would be the at the User itself, but we try to return a 2810b57cec5SDimitry Andric /// loop invariant location if possible. 2820b57cec5SDimitry Andric Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops); 2830b57cec5SDimitry Andric /// Same as above, *except* that this uses the SCEV definition of invariant 2840b57cec5SDimitry Andric /// which is that an expression *can be made* invariant via SCEVExpander. 285*5f757f3fSDimitry Andric /// Thus, this version is only suitable for finding an insert point to be 2860b57cec5SDimitry Andric /// passed to SCEVExpander! 287fcaf7f86SDimitry Andric Instruction *findInsertPt(const SCEVExpander &Expander, Instruction *User, 288fcaf7f86SDimitry Andric ArrayRef<const SCEV *> Ops); 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric /// Return true if the value is known to produce a single fixed value across 2910b57cec5SDimitry Andric /// all iterations on which it executes. Note that this does not imply 2925ffd83dbSDimitry Andric /// speculation safety. That must be established separately. 2930b57cec5SDimitry Andric bool isLoopInvariantValue(const SCEV* S); 2940b57cec5SDimitry Andric 2950b57cec5SDimitry Andric Value *expandCheck(SCEVExpander &Expander, Instruction *Guard, 2960b57cec5SDimitry Andric ICmpInst::Predicate Pred, const SCEV *LHS, 2970b57cec5SDimitry Andric const SCEV *RHS); 2980b57cec5SDimitry Andric 299bdd1243dSDimitry Andric std::optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, 3000b57cec5SDimitry Andric SCEVExpander &Expander, 3010b57cec5SDimitry Andric Instruction *Guard); 302bdd1243dSDimitry Andric std::optional<Value *> 303bdd1243dSDimitry Andric widenICmpRangeCheckIncrementingLoop(LoopICmp LatchCheck, LoopICmp RangeCheck, 304bdd1243dSDimitry Andric SCEVExpander &Expander, 305bdd1243dSDimitry Andric Instruction *Guard); 306bdd1243dSDimitry Andric std::optional<Value *> 307bdd1243dSDimitry Andric widenICmpRangeCheckDecrementingLoop(LoopICmp LatchCheck, LoopICmp RangeCheck, 3080b57cec5SDimitry Andric SCEVExpander &Expander, 3090b57cec5SDimitry Andric Instruction *Guard); 310*5f757f3fSDimitry Andric void widenChecks(SmallVectorImpl<Value *> &Checks, 311*5f757f3fSDimitry Andric SmallVectorImpl<Value *> &WidenedChecks, 3120b57cec5SDimitry Andric SCEVExpander &Expander, Instruction *Guard); 3130b57cec5SDimitry Andric bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander); 3140b57cec5SDimitry Andric bool widenWidenableBranchGuardConditions(BranchInst *Guard, SCEVExpander &Expander); 3150b57cec5SDimitry Andric // If the loop always exits through another block in the loop, we should not 3160b57cec5SDimitry Andric // predicate based on the latch check. For example, the latch check can be a 3170b57cec5SDimitry Andric // very coarse grained check and there can be more fine grained exit checks 318349cc55cSDimitry Andric // within the loop. 3190b57cec5SDimitry Andric bool isLoopProfitableToPredicate(); 3200b57cec5SDimitry Andric 321480093f4SDimitry Andric bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter); 322480093f4SDimitry Andric 3230b57cec5SDimitry Andric public: 324349cc55cSDimitry Andric LoopPredication(AliasAnalysis *AA, DominatorTree *DT, ScalarEvolution *SE, 325349cc55cSDimitry Andric LoopInfo *LI, MemorySSAUpdater *MSSAU) 326349cc55cSDimitry Andric : AA(AA), DT(DT), SE(SE), LI(LI), MSSAU(MSSAU){}; 3270b57cec5SDimitry Andric bool runOnLoop(Loop *L); 3280b57cec5SDimitry Andric }; 3290b57cec5SDimitry Andric 3305ffd83dbSDimitry Andric } // end namespace 3310b57cec5SDimitry Andric 3320b57cec5SDimitry Andric PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM, 3330b57cec5SDimitry Andric LoopStandardAnalysisResults &AR, 3340b57cec5SDimitry Andric LPMUpdater &U) { 335349cc55cSDimitry Andric std::unique_ptr<MemorySSAUpdater> MSSAU; 336349cc55cSDimitry Andric if (AR.MSSA) 337349cc55cSDimitry Andric MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA); 338349cc55cSDimitry Andric LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, 339349cc55cSDimitry Andric MSSAU ? MSSAU.get() : nullptr); 3400b57cec5SDimitry Andric if (!LP.runOnLoop(&L)) 3410b57cec5SDimitry Andric return PreservedAnalyses::all(); 3420b57cec5SDimitry Andric 343349cc55cSDimitry Andric auto PA = getLoopPassPreservedAnalyses(); 344349cc55cSDimitry Andric if (AR.MSSA) 345349cc55cSDimitry Andric PA.preserve<MemorySSAAnalysis>(); 346349cc55cSDimitry Andric return PA; 3470b57cec5SDimitry Andric } 3480b57cec5SDimitry Andric 349bdd1243dSDimitry Andric std::optional<LoopICmp> LoopPredication::parseLoopICmp(ICmpInst *ICI) { 3500b57cec5SDimitry Andric auto Pred = ICI->getPredicate(); 3510b57cec5SDimitry Andric auto *LHS = ICI->getOperand(0); 3520b57cec5SDimitry Andric auto *RHS = ICI->getOperand(1); 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric const SCEV *LHSS = SE->getSCEV(LHS); 3550b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(LHSS)) 356bdd1243dSDimitry Andric return std::nullopt; 3570b57cec5SDimitry Andric const SCEV *RHSS = SE->getSCEV(RHS); 3580b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(RHSS)) 359bdd1243dSDimitry Andric return std::nullopt; 3600b57cec5SDimitry Andric 3610b57cec5SDimitry Andric // Canonicalize RHS to be loop invariant bound, LHS - a loop computable IV 3620b57cec5SDimitry Andric if (SE->isLoopInvariant(LHSS, L)) { 3630b57cec5SDimitry Andric std::swap(LHS, RHS); 3640b57cec5SDimitry Andric std::swap(LHSS, RHSS); 3650b57cec5SDimitry Andric Pred = ICmpInst::getSwappedPredicate(Pred); 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHSS); 3690b57cec5SDimitry Andric if (!AR || AR->getLoop() != L) 370bdd1243dSDimitry Andric return std::nullopt; 3710b57cec5SDimitry Andric 3720b57cec5SDimitry Andric return LoopICmp(Pred, AR, RHSS); 3730b57cec5SDimitry Andric } 3740b57cec5SDimitry Andric 3750b57cec5SDimitry Andric Value *LoopPredication::expandCheck(SCEVExpander &Expander, 3760b57cec5SDimitry Andric Instruction *Guard, 3770b57cec5SDimitry Andric ICmpInst::Predicate Pred, const SCEV *LHS, 3780b57cec5SDimitry Andric const SCEV *RHS) { 3790b57cec5SDimitry Andric Type *Ty = LHS->getType(); 3800b57cec5SDimitry Andric assert(Ty == RHS->getType() && "expandCheck operands have different types?"); 3810b57cec5SDimitry Andric 3820b57cec5SDimitry Andric if (SE->isLoopInvariant(LHS, L) && SE->isLoopInvariant(RHS, L)) { 3830b57cec5SDimitry Andric IRBuilder<> Builder(Guard); 3840b57cec5SDimitry Andric if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS)) 3850b57cec5SDimitry Andric return Builder.getTrue(); 3860b57cec5SDimitry Andric if (SE->isLoopEntryGuardedByCond(L, ICmpInst::getInversePredicate(Pred), 3870b57cec5SDimitry Andric LHS, RHS)) 3880b57cec5SDimitry Andric return Builder.getFalse(); 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric 391fcaf7f86SDimitry Andric Value *LHSV = 392fcaf7f86SDimitry Andric Expander.expandCodeFor(LHS, Ty, findInsertPt(Expander, Guard, {LHS})); 393fcaf7f86SDimitry Andric Value *RHSV = 394fcaf7f86SDimitry Andric Expander.expandCodeFor(RHS, Ty, findInsertPt(Expander, Guard, {RHS})); 3950b57cec5SDimitry Andric IRBuilder<> Builder(findInsertPt(Guard, {LHSV, RHSV})); 3960b57cec5SDimitry Andric return Builder.CreateICmp(Pred, LHSV, RHSV); 3970b57cec5SDimitry Andric } 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric // Returns true if its safe to truncate the IV to RangeCheckType. 4000b57cec5SDimitry Andric // When the IV type is wider than the range operand type, we can still do loop 4010b57cec5SDimitry Andric // predication, by generating SCEVs for the range and latch that are of the 4020b57cec5SDimitry Andric // same type. We achieve this by generating a SCEV truncate expression for the 4030b57cec5SDimitry Andric // latch IV. This is done iff truncation of the IV is a safe operation, 4040b57cec5SDimitry Andric // without loss of information. 4050b57cec5SDimitry Andric // Another way to achieve this is by generating a wider type SCEV for the 4060b57cec5SDimitry Andric // range check operand, however, this needs a more involved check that 4070b57cec5SDimitry Andric // operands do not overflow. This can lead to loss of information when the 4080b57cec5SDimitry Andric // range operand is of the form: add i32 %offset, %iv. We need to prove that 4090b57cec5SDimitry Andric // sext(x + y) is same as sext(x) + sext(y). 4100b57cec5SDimitry Andric // This function returns true if we can safely represent the IV type in 4110b57cec5SDimitry Andric // the RangeCheckType without loss of information. 4120b57cec5SDimitry Andric static bool isSafeToTruncateWideIVType(const DataLayout &DL, 4130b57cec5SDimitry Andric ScalarEvolution &SE, 4140b57cec5SDimitry Andric const LoopICmp LatchCheck, 4150b57cec5SDimitry Andric Type *RangeCheckType) { 4160b57cec5SDimitry Andric if (!EnableIVTruncation) 4170b57cec5SDimitry Andric return false; 418bdd1243dSDimitry Andric assert(DL.getTypeSizeInBits(LatchCheck.IV->getType()).getFixedValue() > 419bdd1243dSDimitry Andric DL.getTypeSizeInBits(RangeCheckType).getFixedValue() && 4200b57cec5SDimitry Andric "Expected latch check IV type to be larger than range check operand " 4210b57cec5SDimitry Andric "type!"); 4220b57cec5SDimitry Andric // The start and end values of the IV should be known. This is to guarantee 4230b57cec5SDimitry Andric // that truncating the wide type will not lose information. 4240b57cec5SDimitry Andric auto *Limit = dyn_cast<SCEVConstant>(LatchCheck.Limit); 4250b57cec5SDimitry Andric auto *Start = dyn_cast<SCEVConstant>(LatchCheck.IV->getStart()); 4260b57cec5SDimitry Andric if (!Limit || !Start) 4270b57cec5SDimitry Andric return false; 4280b57cec5SDimitry Andric // This check makes sure that the IV does not change sign during loop 4290b57cec5SDimitry Andric // iterations. Consider latchType = i64, LatchStart = 5, Pred = ICMP_SGE, 4300b57cec5SDimitry Andric // LatchEnd = 2, rangeCheckType = i32. If it's not a monotonic predicate, the 4310b57cec5SDimitry Andric // IV wraps around, and the truncation of the IV would lose the range of 4320b57cec5SDimitry Andric // iterations between 2^32 and 2^64. 433e8d8bef9SDimitry Andric if (!SE.getMonotonicPredicateType(LatchCheck.IV, LatchCheck.Pred)) 4340b57cec5SDimitry Andric return false; 4350b57cec5SDimitry Andric // The active bits should be less than the bits in the RangeCheckType. This 4360b57cec5SDimitry Andric // guarantees that truncating the latch check to RangeCheckType is a safe 4370b57cec5SDimitry Andric // operation. 438e8d8bef9SDimitry Andric auto RangeCheckTypeBitSize = 439bdd1243dSDimitry Andric DL.getTypeSizeInBits(RangeCheckType).getFixedValue(); 4400b57cec5SDimitry Andric return Start->getAPInt().getActiveBits() < RangeCheckTypeBitSize && 4410b57cec5SDimitry Andric Limit->getAPInt().getActiveBits() < RangeCheckTypeBitSize; 4420b57cec5SDimitry Andric } 4430b57cec5SDimitry Andric 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric // Return an LoopICmp describing a latch check equivlent to LatchCheck but with 4460b57cec5SDimitry Andric // the requested type if safe to do so. May involve the use of a new IV. 447bdd1243dSDimitry Andric static std::optional<LoopICmp> generateLoopLatchCheck(const DataLayout &DL, 4480b57cec5SDimitry Andric ScalarEvolution &SE, 4490b57cec5SDimitry Andric const LoopICmp LatchCheck, 4500b57cec5SDimitry Andric Type *RangeCheckType) { 4510b57cec5SDimitry Andric 4520b57cec5SDimitry Andric auto *LatchType = LatchCheck.IV->getType(); 4530b57cec5SDimitry Andric if (RangeCheckType == LatchType) 4540b57cec5SDimitry Andric return LatchCheck; 4550b57cec5SDimitry Andric // For now, bail out if latch type is narrower than range type. 456bdd1243dSDimitry Andric if (DL.getTypeSizeInBits(LatchType).getFixedValue() < 457bdd1243dSDimitry Andric DL.getTypeSizeInBits(RangeCheckType).getFixedValue()) 458bdd1243dSDimitry Andric return std::nullopt; 4590b57cec5SDimitry Andric if (!isSafeToTruncateWideIVType(DL, SE, LatchCheck, RangeCheckType)) 460bdd1243dSDimitry Andric return std::nullopt; 4610b57cec5SDimitry Andric // We can now safely identify the truncated version of the IV and limit for 4620b57cec5SDimitry Andric // RangeCheckType. 4630b57cec5SDimitry Andric LoopICmp NewLatchCheck; 4640b57cec5SDimitry Andric NewLatchCheck.Pred = LatchCheck.Pred; 4650b57cec5SDimitry Andric NewLatchCheck.IV = dyn_cast<SCEVAddRecExpr>( 4660b57cec5SDimitry Andric SE.getTruncateExpr(LatchCheck.IV, RangeCheckType)); 4670b57cec5SDimitry Andric if (!NewLatchCheck.IV) 468bdd1243dSDimitry Andric return std::nullopt; 4690b57cec5SDimitry Andric NewLatchCheck.Limit = SE.getTruncateExpr(LatchCheck.Limit, RangeCheckType); 4700b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "IV of type: " << *LatchType 4710b57cec5SDimitry Andric << "can be represented as range check type:" 4720b57cec5SDimitry Andric << *RangeCheckType << "\n"); 4730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LatchCheck.IV: " << *NewLatchCheck.IV << "\n"); 4740b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LatchCheck.Limit: " << *NewLatchCheck.Limit << "\n"); 4750b57cec5SDimitry Andric return NewLatchCheck; 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 4780b57cec5SDimitry Andric bool LoopPredication::isSupportedStep(const SCEV* Step) { 4790b57cec5SDimitry Andric return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop); 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric Instruction *LoopPredication::findInsertPt(Instruction *Use, 4830b57cec5SDimitry Andric ArrayRef<Value*> Ops) { 4840b57cec5SDimitry Andric for (Value *Op : Ops) 4850b57cec5SDimitry Andric if (!L->isLoopInvariant(Op)) 4860b57cec5SDimitry Andric return Use; 4870b57cec5SDimitry Andric return Preheader->getTerminator(); 4880b57cec5SDimitry Andric } 4890b57cec5SDimitry Andric 490fcaf7f86SDimitry Andric Instruction *LoopPredication::findInsertPt(const SCEVExpander &Expander, 491fcaf7f86SDimitry Andric Instruction *Use, 4920b57cec5SDimitry Andric ArrayRef<const SCEV *> Ops) { 4930b57cec5SDimitry Andric // Subtlety: SCEV considers things to be invariant if the value produced is 4940b57cec5SDimitry Andric // the same across iterations. This is not the same as being able to 4950b57cec5SDimitry Andric // evaluate outside the loop, which is what we actually need here. 4960b57cec5SDimitry Andric for (const SCEV *Op : Ops) 4970b57cec5SDimitry Andric if (!SE->isLoopInvariant(Op, L) || 498fcaf7f86SDimitry Andric !Expander.isSafeToExpandAt(Op, Preheader->getTerminator())) 4990b57cec5SDimitry Andric return Use; 5000b57cec5SDimitry Andric return Preheader->getTerminator(); 5010b57cec5SDimitry Andric } 5020b57cec5SDimitry Andric 5030b57cec5SDimitry Andric bool LoopPredication::isLoopInvariantValue(const SCEV* S) { 5040b57cec5SDimitry Andric // Handling expressions which produce invariant results, but *haven't* yet 5050b57cec5SDimitry Andric // been removed from the loop serves two important purposes. 5060b57cec5SDimitry Andric // 1) Most importantly, it resolves a pass ordering cycle which would 5070b57cec5SDimitry Andric // otherwise need us to iteration licm, loop-predication, and either 5080b57cec5SDimitry Andric // loop-unswitch or loop-peeling to make progress on examples with lots of 5090b57cec5SDimitry Andric // predicable range checks in a row. (Since, in the general case, we can't 5100b57cec5SDimitry Andric // hoist the length checks until the dominating checks have been discharged 5110b57cec5SDimitry Andric // as we can't prove doing so is safe.) 5120b57cec5SDimitry Andric // 2) As a nice side effect, this exposes the value of peeling or unswitching 5130b57cec5SDimitry Andric // much more obviously in the IR. Otherwise, the cost modeling for other 5140b57cec5SDimitry Andric // transforms would end up needing to duplicate all of this logic to model a 5150b57cec5SDimitry Andric // check which becomes predictable based on a modeled peel or unswitch. 5160b57cec5SDimitry Andric // 5170b57cec5SDimitry Andric // The cost of doing so in the worst case is an extra fill from the stack in 5180b57cec5SDimitry Andric // the loop to materialize the loop invariant test value instead of checking 5190b57cec5SDimitry Andric // against the original IV which is presumable in a register inside the loop. 5200b57cec5SDimitry Andric // Such cases are presumably rare, and hint at missing oppurtunities for 5210b57cec5SDimitry Andric // other passes. 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric if (SE->isLoopInvariant(S, L)) 5240b57cec5SDimitry Andric // Note: This the SCEV variant, so the original Value* may be within the 5250b57cec5SDimitry Andric // loop even though SCEV has proven it is loop invariant. 5260b57cec5SDimitry Andric return true; 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric // Handle a particular important case which SCEV doesn't yet know about which 5290b57cec5SDimitry Andric // shows up in range checks on arrays with immutable lengths. 5300b57cec5SDimitry Andric // TODO: This should be sunk inside SCEV. 5310b57cec5SDimitry Andric if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) 5320b57cec5SDimitry Andric if (const auto *LI = dyn_cast<LoadInst>(U->getValue())) 5330b57cec5SDimitry Andric if (LI->isUnordered() && L->hasLoopInvariantOperands(LI)) 534bdd1243dSDimitry Andric if (!isModSet(AA->getModRefInfoMask(LI->getOperand(0))) || 5358bcb0991SDimitry Andric LI->hasMetadata(LLVMContext::MD_invariant_load)) 5360b57cec5SDimitry Andric return true; 5370b57cec5SDimitry Andric return false; 5380b57cec5SDimitry Andric } 5390b57cec5SDimitry Andric 540bdd1243dSDimitry Andric std::optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop( 541bdd1243dSDimitry Andric LoopICmp LatchCheck, LoopICmp RangeCheck, SCEVExpander &Expander, 542bdd1243dSDimitry Andric Instruction *Guard) { 5430b57cec5SDimitry Andric auto *Ty = RangeCheck.IV->getType(); 5440b57cec5SDimitry Andric // Generate the widened condition for the forward loop: 5450b57cec5SDimitry Andric // guardStart u< guardLimit && 5460b57cec5SDimitry Andric // latchLimit <pred> guardLimit - 1 - guardStart + latchStart 5470b57cec5SDimitry Andric // where <pred> depends on the latch condition predicate. See the file 5480b57cec5SDimitry Andric // header comment for the reasoning. 5490b57cec5SDimitry Andric // guardLimit - guardStart + latchStart - 1 5500b57cec5SDimitry Andric const SCEV *GuardStart = RangeCheck.IV->getStart(); 5510b57cec5SDimitry Andric const SCEV *GuardLimit = RangeCheck.Limit; 5520b57cec5SDimitry Andric const SCEV *LatchStart = LatchCheck.IV->getStart(); 5530b57cec5SDimitry Andric const SCEV *LatchLimit = LatchCheck.Limit; 5540b57cec5SDimitry Andric // Subtlety: We need all the values to be *invariant* across all iterations, 5550b57cec5SDimitry Andric // but we only need to check expansion safety for those which *aren't* 5560b57cec5SDimitry Andric // already guaranteed to dominate the guard. 5570b57cec5SDimitry Andric if (!isLoopInvariantValue(GuardStart) || 5580b57cec5SDimitry Andric !isLoopInvariantValue(GuardLimit) || 5590b57cec5SDimitry Andric !isLoopInvariantValue(LatchStart) || 5600b57cec5SDimitry Andric !isLoopInvariantValue(LatchLimit)) { 5610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); 562bdd1243dSDimitry Andric return std::nullopt; 5630b57cec5SDimitry Andric } 564fcaf7f86SDimitry Andric if (!Expander.isSafeToExpandAt(LatchStart, Guard) || 565fcaf7f86SDimitry Andric !Expander.isSafeToExpandAt(LatchLimit, Guard)) { 5660b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); 567bdd1243dSDimitry Andric return std::nullopt; 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric // guardLimit - guardStart + latchStart - 1 5710b57cec5SDimitry Andric const SCEV *RHS = 5720b57cec5SDimitry Andric SE->getAddExpr(SE->getMinusSCEV(GuardLimit, GuardStart), 5730b57cec5SDimitry Andric SE->getMinusSCEV(LatchStart, SE->getOne(Ty))); 5740b57cec5SDimitry Andric auto LimitCheckPred = 5750b57cec5SDimitry Andric ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred); 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LHS: " << *LatchLimit << "\n"); 5780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "RHS: " << *RHS << "\n"); 5790b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Pred: " << LimitCheckPred << "\n"); 5800b57cec5SDimitry Andric 5810b57cec5SDimitry Andric auto *LimitCheck = 5820b57cec5SDimitry Andric expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, RHS); 5830b57cec5SDimitry Andric auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred, 5840b57cec5SDimitry Andric GuardStart, GuardLimit); 5850b57cec5SDimitry Andric IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck})); 58606c3fb27SDimitry Andric return Builder.CreateFreeze( 58706c3fb27SDimitry Andric Builder.CreateAnd(FirstIterationCheck, LimitCheck)); 5880b57cec5SDimitry Andric } 5890b57cec5SDimitry Andric 590bdd1243dSDimitry Andric std::optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop( 591bdd1243dSDimitry Andric LoopICmp LatchCheck, LoopICmp RangeCheck, SCEVExpander &Expander, 592bdd1243dSDimitry Andric Instruction *Guard) { 5930b57cec5SDimitry Andric auto *Ty = RangeCheck.IV->getType(); 5940b57cec5SDimitry Andric const SCEV *GuardStart = RangeCheck.IV->getStart(); 5950b57cec5SDimitry Andric const SCEV *GuardLimit = RangeCheck.Limit; 5960b57cec5SDimitry Andric const SCEV *LatchStart = LatchCheck.IV->getStart(); 5970b57cec5SDimitry Andric const SCEV *LatchLimit = LatchCheck.Limit; 5980b57cec5SDimitry Andric // Subtlety: We need all the values to be *invariant* across all iterations, 5990b57cec5SDimitry Andric // but we only need to check expansion safety for those which *aren't* 6000b57cec5SDimitry Andric // already guaranteed to dominate the guard. 6010b57cec5SDimitry Andric if (!isLoopInvariantValue(GuardStart) || 6020b57cec5SDimitry Andric !isLoopInvariantValue(GuardLimit) || 6030b57cec5SDimitry Andric !isLoopInvariantValue(LatchStart) || 6040b57cec5SDimitry Andric !isLoopInvariantValue(LatchLimit)) { 6050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); 606bdd1243dSDimitry Andric return std::nullopt; 6070b57cec5SDimitry Andric } 608fcaf7f86SDimitry Andric if (!Expander.isSafeToExpandAt(LatchStart, Guard) || 609fcaf7f86SDimitry Andric !Expander.isSafeToExpandAt(LatchLimit, Guard)) { 6100b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Can't expand limit check!\n"); 611bdd1243dSDimitry Andric return std::nullopt; 6120b57cec5SDimitry Andric } 6130b57cec5SDimitry Andric // The decrement of the latch check IV should be the same as the 6140b57cec5SDimitry Andric // rangeCheckIV. 6150b57cec5SDimitry Andric auto *PostDecLatchCheckIV = LatchCheck.IV->getPostIncExpr(*SE); 6160b57cec5SDimitry Andric if (RangeCheck.IV != PostDecLatchCheckIV) { 6170b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Not the same. PostDecLatchCheckIV: " 6180b57cec5SDimitry Andric << *PostDecLatchCheckIV 6190b57cec5SDimitry Andric << " and RangeCheckIV: " << *RangeCheck.IV << "\n"); 620bdd1243dSDimitry Andric return std::nullopt; 6210b57cec5SDimitry Andric } 6220b57cec5SDimitry Andric 6230b57cec5SDimitry Andric // Generate the widened condition for CountDownLoop: 6240b57cec5SDimitry Andric // guardStart u< guardLimit && 6250b57cec5SDimitry Andric // latchLimit <pred> 1. 6260b57cec5SDimitry Andric // See the header comment for reasoning of the checks. 6270b57cec5SDimitry Andric auto LimitCheckPred = 6280b57cec5SDimitry Andric ICmpInst::getFlippedStrictnessPredicate(LatchCheck.Pred); 6290b57cec5SDimitry Andric auto *FirstIterationCheck = expandCheck(Expander, Guard, 6300b57cec5SDimitry Andric ICmpInst::ICMP_ULT, 6310b57cec5SDimitry Andric GuardStart, GuardLimit); 6320b57cec5SDimitry Andric auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit, 6330b57cec5SDimitry Andric SE->getOne(Ty)); 6340b57cec5SDimitry Andric IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck})); 63506c3fb27SDimitry Andric return Builder.CreateFreeze( 63606c3fb27SDimitry Andric Builder.CreateAnd(FirstIterationCheck, LimitCheck)); 6370b57cec5SDimitry Andric } 6380b57cec5SDimitry Andric 6390b57cec5SDimitry Andric static void normalizePredicate(ScalarEvolution *SE, Loop *L, 6400b57cec5SDimitry Andric LoopICmp& RC) { 6410b57cec5SDimitry Andric // LFTR canonicalizes checks to the ICMP_NE/EQ form; normalize back to the 6420b57cec5SDimitry Andric // ULT/UGE form for ease of handling by our caller. 6430b57cec5SDimitry Andric if (ICmpInst::isEquality(RC.Pred) && 6440b57cec5SDimitry Andric RC.IV->getStepRecurrence(*SE)->isOne() && 6450b57cec5SDimitry Andric SE->isKnownPredicate(ICmpInst::ICMP_ULE, RC.IV->getStart(), RC.Limit)) 6460b57cec5SDimitry Andric RC.Pred = RC.Pred == ICmpInst::ICMP_NE ? 6470b57cec5SDimitry Andric ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE; 6480b57cec5SDimitry Andric } 6490b57cec5SDimitry Andric 6500b57cec5SDimitry Andric /// If ICI can be widened to a loop invariant condition emits the loop 6510b57cec5SDimitry Andric /// invariant condition in the loop preheader and return it, otherwise 652bdd1243dSDimitry Andric /// returns std::nullopt. 653bdd1243dSDimitry Andric std::optional<Value *> 654bdd1243dSDimitry Andric LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander, 6550b57cec5SDimitry Andric Instruction *Guard) { 6560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Analyzing ICmpInst condition:\n"); 6570b57cec5SDimitry Andric LLVM_DEBUG(ICI->dump()); 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric // parseLoopStructure guarantees that the latch condition is: 6600b57cec5SDimitry Andric // ++i <pred> latchLimit, where <pred> is u<, u<=, s<, or s<=. 6610b57cec5SDimitry Andric // We are looking for the range checks of the form: 6620b57cec5SDimitry Andric // i u< guardLimit 6630b57cec5SDimitry Andric auto RangeCheck = parseLoopICmp(ICI); 6640b57cec5SDimitry Andric if (!RangeCheck) { 6650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); 666bdd1243dSDimitry Andric return std::nullopt; 6670b57cec5SDimitry Andric } 6680b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Guard check:\n"); 6690b57cec5SDimitry Andric LLVM_DEBUG(RangeCheck->dump()); 6700b57cec5SDimitry Andric if (RangeCheck->Pred != ICmpInst::ICMP_ULT) { 6710b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Unsupported range check predicate(" 6720b57cec5SDimitry Andric << RangeCheck->Pred << ")!\n"); 673bdd1243dSDimitry Andric return std::nullopt; 6740b57cec5SDimitry Andric } 6750b57cec5SDimitry Andric auto *RangeCheckIV = RangeCheck->IV; 6760b57cec5SDimitry Andric if (!RangeCheckIV->isAffine()) { 6770b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Range check IV is not affine!\n"); 678bdd1243dSDimitry Andric return std::nullopt; 6790b57cec5SDimitry Andric } 6800b57cec5SDimitry Andric auto *Step = RangeCheckIV->getStepRecurrence(*SE); 6810b57cec5SDimitry Andric // We cannot just compare with latch IV step because the latch and range IVs 6820b57cec5SDimitry Andric // may have different types. 6830b57cec5SDimitry Andric if (!isSupportedStep(Step)) { 6840b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Range check and latch have IVs different steps!\n"); 685bdd1243dSDimitry Andric return std::nullopt; 6860b57cec5SDimitry Andric } 6870b57cec5SDimitry Andric auto *Ty = RangeCheckIV->getType(); 6880b57cec5SDimitry Andric auto CurrLatchCheckOpt = generateLoopLatchCheck(*DL, *SE, LatchCheck, Ty); 6890b57cec5SDimitry Andric if (!CurrLatchCheckOpt) { 6900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Failed to generate a loop latch check " 6910b57cec5SDimitry Andric "corresponding to range type: " 6920b57cec5SDimitry Andric << *Ty << "\n"); 693bdd1243dSDimitry Andric return std::nullopt; 6940b57cec5SDimitry Andric } 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric LoopICmp CurrLatchCheck = *CurrLatchCheckOpt; 6970b57cec5SDimitry Andric // At this point, the range and latch step should have the same type, but need 6980b57cec5SDimitry Andric // not have the same value (we support both 1 and -1 steps). 6990b57cec5SDimitry Andric assert(Step->getType() == 7000b57cec5SDimitry Andric CurrLatchCheck.IV->getStepRecurrence(*SE)->getType() && 7010b57cec5SDimitry Andric "Range and latch steps should be of same type!"); 7020b57cec5SDimitry Andric if (Step != CurrLatchCheck.IV->getStepRecurrence(*SE)) { 7030b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Range and latch have different step values!\n"); 704bdd1243dSDimitry Andric return std::nullopt; 7050b57cec5SDimitry Andric } 7060b57cec5SDimitry Andric 7070b57cec5SDimitry Andric if (Step->isOne()) 7080b57cec5SDimitry Andric return widenICmpRangeCheckIncrementingLoop(CurrLatchCheck, *RangeCheck, 7090b57cec5SDimitry Andric Expander, Guard); 7100b57cec5SDimitry Andric else { 7110b57cec5SDimitry Andric assert(Step->isAllOnesValue() && "Step should be -1!"); 7120b57cec5SDimitry Andric return widenICmpRangeCheckDecrementingLoop(CurrLatchCheck, *RangeCheck, 7130b57cec5SDimitry Andric Expander, Guard); 7140b57cec5SDimitry Andric } 7150b57cec5SDimitry Andric } 7160b57cec5SDimitry Andric 717*5f757f3fSDimitry Andric void LoopPredication::widenChecks(SmallVectorImpl<Value *> &Checks, 718*5f757f3fSDimitry Andric SmallVectorImpl<Value *> &WidenedChecks, 719*5f757f3fSDimitry Andric SCEVExpander &Expander, Instruction *Guard) { 720*5f757f3fSDimitry Andric for (auto &Check : Checks) 721*5f757f3fSDimitry Andric if (ICmpInst *ICI = dyn_cast<ICmpInst>(Check)) 722*5f757f3fSDimitry Andric if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Guard)) { 723*5f757f3fSDimitry Andric WidenedChecks.push_back(Check); 724*5f757f3fSDimitry Andric Check = *NewRangeCheck; 7250b57cec5SDimitry Andric } 7260b57cec5SDimitry Andric } 7270b57cec5SDimitry Andric 7280b57cec5SDimitry Andric bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard, 7290b57cec5SDimitry Andric SCEVExpander &Expander) { 7300b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Processing guard:\n"); 7310b57cec5SDimitry Andric LLVM_DEBUG(Guard->dump()); 7320b57cec5SDimitry Andric 7330b57cec5SDimitry Andric TotalConsidered++; 7340b57cec5SDimitry Andric SmallVector<Value *, 4> Checks; 735*5f757f3fSDimitry Andric SmallVector<Value *> WidenedChecks; 736*5f757f3fSDimitry Andric parseWidenableGuard(Guard, Checks); 737*5f757f3fSDimitry Andric widenChecks(Checks, WidenedChecks, Expander, Guard); 738*5f757f3fSDimitry Andric if (WidenedChecks.empty()) 7390b57cec5SDimitry Andric return false; 7400b57cec5SDimitry Andric 741*5f757f3fSDimitry Andric TotalWidened += WidenedChecks.size(); 7420b57cec5SDimitry Andric 7430b57cec5SDimitry Andric // Emit the new guard condition 7440b57cec5SDimitry Andric IRBuilder<> Builder(findInsertPt(Guard, Checks)); 7450b57cec5SDimitry Andric Value *AllChecks = Builder.CreateAnd(Checks); 7460b57cec5SDimitry Andric auto *OldCond = Guard->getOperand(0); 7470b57cec5SDimitry Andric Guard->setOperand(0, AllChecks); 748bdd1243dSDimitry Andric if (InsertAssumesOfPredicatedGuardsConditions) { 749bdd1243dSDimitry Andric Builder.SetInsertPoint(&*++BasicBlock::iterator(Guard)); 750bdd1243dSDimitry Andric Builder.CreateAssumption(OldCond); 751bdd1243dSDimitry Andric } 752349cc55cSDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU); 7530b57cec5SDimitry Andric 754*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Widened checks = " << WidenedChecks.size() << "\n"); 7550b57cec5SDimitry Andric return true; 7560b57cec5SDimitry Andric } 7570b57cec5SDimitry Andric 7580b57cec5SDimitry Andric bool LoopPredication::widenWidenableBranchGuardConditions( 7590b57cec5SDimitry Andric BranchInst *BI, SCEVExpander &Expander) { 7600b57cec5SDimitry Andric assert(isGuardAsWidenableBranch(BI) && "Must be!"); 7610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Processing guard:\n"); 7620b57cec5SDimitry Andric LLVM_DEBUG(BI->dump()); 7630b57cec5SDimitry Andric 7640b57cec5SDimitry Andric TotalConsidered++; 7650b57cec5SDimitry Andric SmallVector<Value *, 4> Checks; 766*5f757f3fSDimitry Andric SmallVector<Value *> WidenedChecks; 767*5f757f3fSDimitry Andric parseWidenableGuard(BI, Checks); 768*5f757f3fSDimitry Andric // At the moment, our matching logic for wideable conditions implicitly 769*5f757f3fSDimitry Andric // assumes we preserve the form: (br (and Cond, WC())). FIXME 770*5f757f3fSDimitry Andric auto WC = extractWidenableCondition(BI); 771*5f757f3fSDimitry Andric Checks.push_back(WC); 772*5f757f3fSDimitry Andric widenChecks(Checks, WidenedChecks, Expander, BI); 773*5f757f3fSDimitry Andric if (WidenedChecks.empty()) 7740b57cec5SDimitry Andric return false; 7750b57cec5SDimitry Andric 776*5f757f3fSDimitry Andric TotalWidened += WidenedChecks.size(); 7770b57cec5SDimitry Andric 7780b57cec5SDimitry Andric // Emit the new guard condition 7790b57cec5SDimitry Andric IRBuilder<> Builder(findInsertPt(BI, Checks)); 7800b57cec5SDimitry Andric Value *AllChecks = Builder.CreateAnd(Checks); 7810b57cec5SDimitry Andric auto *OldCond = BI->getCondition(); 7820b57cec5SDimitry Andric BI->setCondition(AllChecks); 783bdd1243dSDimitry Andric if (InsertAssumesOfPredicatedGuardsConditions) { 784*5f757f3fSDimitry Andric BasicBlock *IfTrueBB = BI->getSuccessor(0); 785bdd1243dSDimitry Andric Builder.SetInsertPoint(IfTrueBB, IfTrueBB->getFirstInsertionPt()); 78606c3fb27SDimitry Andric // If this block has other predecessors, we might not be able to use Cond. 78706c3fb27SDimitry Andric // In this case, create a Phi where every other input is `true` and input 78806c3fb27SDimitry Andric // from guard block is Cond. 789*5f757f3fSDimitry Andric Value *AssumeCond = Builder.CreateAnd(WidenedChecks); 79006c3fb27SDimitry Andric if (!IfTrueBB->getUniquePredecessor()) { 79106c3fb27SDimitry Andric auto *GuardBB = BI->getParent(); 792*5f757f3fSDimitry Andric auto *PN = Builder.CreatePHI(AssumeCond->getType(), pred_size(IfTrueBB), 79306c3fb27SDimitry Andric "assume.cond"); 79406c3fb27SDimitry Andric for (auto *Pred : predecessors(IfTrueBB)) 795*5f757f3fSDimitry Andric PN->addIncoming(Pred == GuardBB ? AssumeCond : Builder.getTrue(), Pred); 79606c3fb27SDimitry Andric AssumeCond = PN; 79706c3fb27SDimitry Andric } 79806c3fb27SDimitry Andric Builder.CreateAssumption(AssumeCond); 799bdd1243dSDimitry Andric } 800349cc55cSDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU); 8010b57cec5SDimitry Andric assert(isGuardAsWidenableBranch(BI) && 8020b57cec5SDimitry Andric "Stopped being a guard after transform?"); 8030b57cec5SDimitry Andric 804*5f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "Widened checks = " << WidenedChecks.size() << "\n"); 8050b57cec5SDimitry Andric return true; 8060b57cec5SDimitry Andric } 8070b57cec5SDimitry Andric 808bdd1243dSDimitry Andric std::optional<LoopICmp> LoopPredication::parseLoopLatchICmp() { 8090b57cec5SDimitry Andric using namespace PatternMatch; 8100b57cec5SDimitry Andric 8110b57cec5SDimitry Andric BasicBlock *LoopLatch = L->getLoopLatch(); 8120b57cec5SDimitry Andric if (!LoopLatch) { 8130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "The loop doesn't have a single latch!\n"); 814bdd1243dSDimitry Andric return std::nullopt; 8150b57cec5SDimitry Andric } 8160b57cec5SDimitry Andric 8170b57cec5SDimitry Andric auto *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator()); 8180b57cec5SDimitry Andric if (!BI || !BI->isConditional()) { 8190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Failed to match the latch terminator!\n"); 820bdd1243dSDimitry Andric return std::nullopt; 8210b57cec5SDimitry Andric } 8220b57cec5SDimitry Andric BasicBlock *TrueDest = BI->getSuccessor(0); 8230b57cec5SDimitry Andric assert( 8240b57cec5SDimitry Andric (TrueDest == L->getHeader() || BI->getSuccessor(1) == L->getHeader()) && 8250b57cec5SDimitry Andric "One of the latch's destinations must be the header"); 8260b57cec5SDimitry Andric 8270b57cec5SDimitry Andric auto *ICI = dyn_cast<ICmpInst>(BI->getCondition()); 8280b57cec5SDimitry Andric if (!ICI) { 8290b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Failed to match the latch condition!\n"); 830bdd1243dSDimitry Andric return std::nullopt; 8310b57cec5SDimitry Andric } 8320b57cec5SDimitry Andric auto Result = parseLoopICmp(ICI); 8330b57cec5SDimitry Andric if (!Result) { 8340b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); 835bdd1243dSDimitry Andric return std::nullopt; 8360b57cec5SDimitry Andric } 8370b57cec5SDimitry Andric 8380b57cec5SDimitry Andric if (TrueDest != L->getHeader()) 8390b57cec5SDimitry Andric Result->Pred = ICmpInst::getInversePredicate(Result->Pred); 8400b57cec5SDimitry Andric 8410b57cec5SDimitry Andric // Check affine first, so if it's not we don't try to compute the step 8420b57cec5SDimitry Andric // recurrence. 8430b57cec5SDimitry Andric if (!Result->IV->isAffine()) { 8440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "The induction variable is not affine!\n"); 845bdd1243dSDimitry Andric return std::nullopt; 8460b57cec5SDimitry Andric } 8470b57cec5SDimitry Andric 8480b57cec5SDimitry Andric auto *Step = Result->IV->getStepRecurrence(*SE); 8490b57cec5SDimitry Andric if (!isSupportedStep(Step)) { 8500b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n"); 851bdd1243dSDimitry Andric return std::nullopt; 8520b57cec5SDimitry Andric } 8530b57cec5SDimitry Andric 8540b57cec5SDimitry Andric auto IsUnsupportedPredicate = [](const SCEV *Step, ICmpInst::Predicate Pred) { 8550b57cec5SDimitry Andric if (Step->isOne()) { 8560b57cec5SDimitry Andric return Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_SLT && 8570b57cec5SDimitry Andric Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_SLE; 8580b57cec5SDimitry Andric } else { 8590b57cec5SDimitry Andric assert(Step->isAllOnesValue() && "Step should be -1!"); 8600b57cec5SDimitry Andric return Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_SGT && 8610b57cec5SDimitry Andric Pred != ICmpInst::ICMP_UGE && Pred != ICmpInst::ICMP_SGE; 8620b57cec5SDimitry Andric } 8630b57cec5SDimitry Andric }; 8640b57cec5SDimitry Andric 8650b57cec5SDimitry Andric normalizePredicate(SE, L, *Result); 8660b57cec5SDimitry Andric if (IsUnsupportedPredicate(Step, Result->Pred)) { 8670b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred 8680b57cec5SDimitry Andric << ")!\n"); 869bdd1243dSDimitry Andric return std::nullopt; 8700b57cec5SDimitry Andric } 8710b57cec5SDimitry Andric 8720b57cec5SDimitry Andric return Result; 8730b57cec5SDimitry Andric } 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric bool LoopPredication::isLoopProfitableToPredicate() { 876349cc55cSDimitry Andric if (SkipProfitabilityChecks) 8770b57cec5SDimitry Andric return true; 8780b57cec5SDimitry Andric 8790b57cec5SDimitry Andric SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges; 8800b57cec5SDimitry Andric L->getExitEdges(ExitEdges); 8810b57cec5SDimitry Andric // If there is only one exiting edge in the loop, it is always profitable to 8820b57cec5SDimitry Andric // predicate the loop. 8830b57cec5SDimitry Andric if (ExitEdges.size() == 1) 8840b57cec5SDimitry Andric return true; 8850b57cec5SDimitry Andric 8860b57cec5SDimitry Andric // Calculate the exiting probabilities of all exiting edges from the loop, 8870b57cec5SDimitry Andric // starting with the LatchExitProbability. 8880b57cec5SDimitry Andric // Heuristic for profitability: If any of the exiting blocks' probability of 8890b57cec5SDimitry Andric // exiting the loop is larger than exiting through the latch block, it's not 8900b57cec5SDimitry Andric // profitable to predicate the loop. 8910b57cec5SDimitry Andric auto *LatchBlock = L->getLoopLatch(); 8920b57cec5SDimitry Andric assert(LatchBlock && "Should have a single latch at this point!"); 8930b57cec5SDimitry Andric auto *LatchTerm = LatchBlock->getTerminator(); 8940b57cec5SDimitry Andric assert(LatchTerm->getNumSuccessors() == 2 && 8950b57cec5SDimitry Andric "expected to be an exiting block with 2 succs!"); 8960b57cec5SDimitry Andric unsigned LatchBrExitIdx = 8970b57cec5SDimitry Andric LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0; 898349cc55cSDimitry Andric // We compute branch probabilities without BPI. We do not rely on BPI since 899349cc55cSDimitry Andric // Loop predication is usually run in an LPM and BPI is only preserved 900349cc55cSDimitry Andric // lossily within loop pass managers, while BPI has an inherent notion of 901349cc55cSDimitry Andric // being complete for an entire function. 902349cc55cSDimitry Andric 903349cc55cSDimitry Andric // If the latch exits into a deoptimize or an unreachable block, do not 904349cc55cSDimitry Andric // predicate on that latch check. 905349cc55cSDimitry Andric auto *LatchExitBlock = LatchTerm->getSuccessor(LatchBrExitIdx); 906349cc55cSDimitry Andric if (isa<UnreachableInst>(LatchTerm) || 907349cc55cSDimitry Andric LatchExitBlock->getTerminatingDeoptimizeCall()) 908349cc55cSDimitry Andric return false; 909349cc55cSDimitry Andric 910349cc55cSDimitry Andric // Latch terminator has no valid profile data, so nothing to check 911349cc55cSDimitry Andric // profitability on. 912bdd1243dSDimitry Andric if (!hasValidBranchWeightMD(*LatchTerm)) 913349cc55cSDimitry Andric return true; 914349cc55cSDimitry Andric 915349cc55cSDimitry Andric auto ComputeBranchProbability = 916349cc55cSDimitry Andric [&](const BasicBlock *ExitingBlock, 917349cc55cSDimitry Andric const BasicBlock *ExitBlock) -> BranchProbability { 918349cc55cSDimitry Andric auto *Term = ExitingBlock->getTerminator(); 919349cc55cSDimitry Andric unsigned NumSucc = Term->getNumSuccessors(); 920bdd1243dSDimitry Andric if (MDNode *ProfileData = getValidBranchWeightMDNode(*Term)) { 921bdd1243dSDimitry Andric SmallVector<uint32_t> Weights; 922bdd1243dSDimitry Andric extractBranchWeights(ProfileData, Weights); 923bdd1243dSDimitry Andric uint64_t Numerator = 0, Denominator = 0; 924bdd1243dSDimitry Andric for (auto [i, Weight] : llvm::enumerate(Weights)) { 925349cc55cSDimitry Andric if (Term->getSuccessor(i) == ExitBlock) 926bdd1243dSDimitry Andric Numerator += Weight; 927bdd1243dSDimitry Andric Denominator += Weight; 928349cc55cSDimitry Andric } 929*5f757f3fSDimitry Andric // If all weights are zero act as if there was no profile data 930*5f757f3fSDimitry Andric if (Denominator == 0) 931*5f757f3fSDimitry Andric return BranchProbability::getBranchProbability(1, NumSucc); 932349cc55cSDimitry Andric return BranchProbability::getBranchProbability(Numerator, Denominator); 933349cc55cSDimitry Andric } else { 934349cc55cSDimitry Andric assert(LatchBlock != ExitingBlock && 935349cc55cSDimitry Andric "Latch term should always have profile data!"); 936349cc55cSDimitry Andric // No profile data, so we choose the weight as 1/num_of_succ(Src) 937349cc55cSDimitry Andric return BranchProbability::getBranchProbability(1, NumSucc); 938349cc55cSDimitry Andric } 939349cc55cSDimitry Andric }; 940349cc55cSDimitry Andric 9410b57cec5SDimitry Andric BranchProbability LatchExitProbability = 942349cc55cSDimitry Andric ComputeBranchProbability(LatchBlock, LatchExitBlock); 9430b57cec5SDimitry Andric 9440b57cec5SDimitry Andric // Protect against degenerate inputs provided by the user. Providing a value 9450b57cec5SDimitry Andric // less than one, can invert the definition of profitable loop predication. 9460b57cec5SDimitry Andric float ScaleFactor = LatchExitProbabilityScale; 9470b57cec5SDimitry Andric if (ScaleFactor < 1) { 9480b57cec5SDimitry Andric LLVM_DEBUG( 9490b57cec5SDimitry Andric dbgs() 9500b57cec5SDimitry Andric << "Ignored user setting for loop-predication-latch-probability-scale: " 9510b57cec5SDimitry Andric << LatchExitProbabilityScale << "\n"); 9520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "The value is set to 1.0\n"); 9530b57cec5SDimitry Andric ScaleFactor = 1.0; 9540b57cec5SDimitry Andric } 955349cc55cSDimitry Andric const auto LatchProbabilityThreshold = LatchExitProbability * ScaleFactor; 9560b57cec5SDimitry Andric 9570b57cec5SDimitry Andric for (const auto &ExitEdge : ExitEdges) { 9580b57cec5SDimitry Andric BranchProbability ExitingBlockProbability = 959349cc55cSDimitry Andric ComputeBranchProbability(ExitEdge.first, ExitEdge.second); 9600b57cec5SDimitry Andric // Some exiting edge has higher probability than the latch exiting edge. 9610b57cec5SDimitry Andric // No longer profitable to predicate. 9620b57cec5SDimitry Andric if (ExitingBlockProbability > LatchProbabilityThreshold) 9630b57cec5SDimitry Andric return false; 9640b57cec5SDimitry Andric } 965349cc55cSDimitry Andric 966349cc55cSDimitry Andric // We have concluded that the most probable way to exit from the 9670b57cec5SDimitry Andric // loop is through the latch (or there's no profile information and all 9680b57cec5SDimitry Andric // exits are equally likely). 9690b57cec5SDimitry Andric return true; 9700b57cec5SDimitry Andric } 9710b57cec5SDimitry Andric 972480093f4SDimitry Andric /// If we can (cheaply) find a widenable branch which controls entry into the 973480093f4SDimitry Andric /// loop, return it. 974480093f4SDimitry Andric static BranchInst *FindWidenableTerminatorAboveLoop(Loop *L, LoopInfo &LI) { 975480093f4SDimitry Andric // Walk back through any unconditional executed blocks and see if we can find 976480093f4SDimitry Andric // a widenable condition which seems to control execution of this loop. Note 977480093f4SDimitry Andric // that we predict that maythrow calls are likely untaken and thus that it's 978480093f4SDimitry Andric // profitable to widen a branch before a maythrow call with a condition 979480093f4SDimitry Andric // afterwards even though that may cause the slow path to run in a case where 980480093f4SDimitry Andric // it wouldn't have otherwise. 981480093f4SDimitry Andric BasicBlock *BB = L->getLoopPreheader(); 982480093f4SDimitry Andric if (!BB) 983480093f4SDimitry Andric return nullptr; 984480093f4SDimitry Andric do { 985480093f4SDimitry Andric if (BasicBlock *Pred = BB->getSinglePredecessor()) 986480093f4SDimitry Andric if (BB == Pred->getSingleSuccessor()) { 987480093f4SDimitry Andric BB = Pred; 988480093f4SDimitry Andric continue; 989480093f4SDimitry Andric } 990480093f4SDimitry Andric break; 991480093f4SDimitry Andric } while (true); 992480093f4SDimitry Andric 993480093f4SDimitry Andric if (BasicBlock *Pred = BB->getSinglePredecessor()) { 994*5f757f3fSDimitry Andric if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator())) 995*5f757f3fSDimitry Andric if (BI->getSuccessor(0) == BB && isWidenableBranch(BI)) 996*5f757f3fSDimitry Andric return BI; 997480093f4SDimitry Andric } 998480093f4SDimitry Andric return nullptr; 999480093f4SDimitry Andric } 1000480093f4SDimitry Andric 1001480093f4SDimitry Andric /// Return the minimum of all analyzeable exit counts. This is an upper bound 1002480093f4SDimitry Andric /// on the actual exit count. If there are not at least two analyzeable exits, 1003480093f4SDimitry Andric /// returns SCEVCouldNotCompute. 1004480093f4SDimitry Andric static const SCEV *getMinAnalyzeableBackedgeTakenCount(ScalarEvolution &SE, 1005480093f4SDimitry Andric DominatorTree &DT, 1006480093f4SDimitry Andric Loop *L) { 1007480093f4SDimitry Andric SmallVector<BasicBlock *, 16> ExitingBlocks; 1008480093f4SDimitry Andric L->getExitingBlocks(ExitingBlocks); 1009480093f4SDimitry Andric 1010480093f4SDimitry Andric SmallVector<const SCEV *, 4> ExitCounts; 1011480093f4SDimitry Andric for (BasicBlock *ExitingBB : ExitingBlocks) { 1012480093f4SDimitry Andric const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); 1013480093f4SDimitry Andric if (isa<SCEVCouldNotCompute>(ExitCount)) 1014480093f4SDimitry Andric continue; 1015480093f4SDimitry Andric assert(DT.dominates(ExitingBB, L->getLoopLatch()) && 1016480093f4SDimitry Andric "We should only have known counts for exiting blocks that " 1017480093f4SDimitry Andric "dominate latch!"); 1018480093f4SDimitry Andric ExitCounts.push_back(ExitCount); 1019480093f4SDimitry Andric } 1020480093f4SDimitry Andric if (ExitCounts.size() < 2) 1021480093f4SDimitry Andric return SE.getCouldNotCompute(); 1022480093f4SDimitry Andric return SE.getUMinFromMismatchedTypes(ExitCounts); 1023480093f4SDimitry Andric } 1024480093f4SDimitry Andric 1025480093f4SDimitry Andric /// This implements an analogous, but entirely distinct transform from the main 1026480093f4SDimitry Andric /// loop predication transform. This one is phrased in terms of using a 1027480093f4SDimitry Andric /// widenable branch *outside* the loop to allow us to simplify loop exits in a 1028480093f4SDimitry Andric /// following loop. This is close in spirit to the IndVarSimplify transform 1029480093f4SDimitry Andric /// of the same name, but is materially different widening loosens legality 1030480093f4SDimitry Andric /// sharply. 1031480093f4SDimitry Andric bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { 1032480093f4SDimitry Andric // The transformation performed here aims to widen a widenable condition 1033480093f4SDimitry Andric // above the loop such that all analyzeable exit leading to deopt are dead. 1034480093f4SDimitry Andric // It assumes that the latch is the dominant exit for profitability and that 1035480093f4SDimitry Andric // exits branching to deoptimizing blocks are rarely taken. It relies on the 1036480093f4SDimitry Andric // semantics of widenable expressions for legality. (i.e. being able to fall 1037480093f4SDimitry Andric // down the widenable path spuriously allows us to ignore exit order, 1038480093f4SDimitry Andric // unanalyzeable exits, side effects, exceptional exits, and other challenges 1039480093f4SDimitry Andric // which restrict the applicability of the non-WC based version of this 1040480093f4SDimitry Andric // transform in IndVarSimplify.) 1041480093f4SDimitry Andric // 1042480093f4SDimitry Andric // NOTE ON POISON/UNDEF - We're hoisting an expression above guards which may 1043480093f4SDimitry Andric // imply flags on the expression being hoisted and inserting new uses (flags 1044480093f4SDimitry Andric // are only correct for current uses). The result is that we may be 1045480093f4SDimitry Andric // inserting a branch on the value which can be either poison or undef. In 1046480093f4SDimitry Andric // this case, the branch can legally go either way; we just need to avoid 1047480093f4SDimitry Andric // introducing UB. This is achieved through the use of the freeze 1048480093f4SDimitry Andric // instruction. 1049480093f4SDimitry Andric 1050480093f4SDimitry Andric SmallVector<BasicBlock *, 16> ExitingBlocks; 1051480093f4SDimitry Andric L->getExitingBlocks(ExitingBlocks); 1052480093f4SDimitry Andric 1053480093f4SDimitry Andric if (ExitingBlocks.empty()) 1054480093f4SDimitry Andric return false; // Nothing to do. 1055480093f4SDimitry Andric 1056480093f4SDimitry Andric auto *Latch = L->getLoopLatch(); 1057480093f4SDimitry Andric if (!Latch) 1058480093f4SDimitry Andric return false; 1059480093f4SDimitry Andric 1060480093f4SDimitry Andric auto *WidenableBR = FindWidenableTerminatorAboveLoop(L, *LI); 1061480093f4SDimitry Andric if (!WidenableBR) 1062480093f4SDimitry Andric return false; 1063480093f4SDimitry Andric 1064480093f4SDimitry Andric const SCEV *LatchEC = SE->getExitCount(L, Latch); 1065480093f4SDimitry Andric if (isa<SCEVCouldNotCompute>(LatchEC)) 1066480093f4SDimitry Andric return false; // profitability - want hot exit in analyzeable set 1067480093f4SDimitry Andric 1068480093f4SDimitry Andric // At this point, we have found an analyzeable latch, and a widenable 1069480093f4SDimitry Andric // condition above the loop. If we have a widenable exit within the loop 1070480093f4SDimitry Andric // (for which we can't compute exit counts), drop the ability to further 1071480093f4SDimitry Andric // widen so that we gain ability to analyze it's exit count and perform this 1072480093f4SDimitry Andric // transform. TODO: It'd be nice to know for sure the exit became 1073480093f4SDimitry Andric // analyzeable after dropping widenability. 1074349cc55cSDimitry Andric bool ChangedLoop = false; 1075480093f4SDimitry Andric 1076480093f4SDimitry Andric for (auto *ExitingBB : ExitingBlocks) { 1077480093f4SDimitry Andric if (LI->getLoopFor(ExitingBB) != L) 1078480093f4SDimitry Andric continue; 1079480093f4SDimitry Andric 1080480093f4SDimitry Andric auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1081480093f4SDimitry Andric if (!BI) 1082480093f4SDimitry Andric continue; 1083480093f4SDimitry Andric 1084*5f757f3fSDimitry Andric if (auto WC = extractWidenableCondition(BI)) 1085*5f757f3fSDimitry Andric if (L->contains(BI->getSuccessor(0))) { 1086*5f757f3fSDimitry Andric assert(WC->hasOneUse() && "Not appropriate widenable branch!"); 1087*5f757f3fSDimitry Andric WC->user_back()->replaceUsesOfWith( 1088*5f757f3fSDimitry Andric WC, ConstantInt::getTrue(BI->getContext())); 1089349cc55cSDimitry Andric ChangedLoop = true; 1090480093f4SDimitry Andric } 1091480093f4SDimitry Andric } 1092349cc55cSDimitry Andric if (ChangedLoop) 1093480093f4SDimitry Andric SE->forgetLoop(L); 1094480093f4SDimitry Andric 109506c3fb27SDimitry Andric // The insertion point for the widening should be at the widenably call, not 109606c3fb27SDimitry Andric // at the WidenableBR. If we do this at the widenableBR, we can incorrectly 109706c3fb27SDimitry Andric // change a loop-invariant condition to a loop-varying one. 109806c3fb27SDimitry Andric auto *IP = cast<Instruction>(WidenableBR->getCondition()); 109906c3fb27SDimitry Andric 1100480093f4SDimitry Andric // The use of umin(all analyzeable exits) instead of latch is subtle, but 1101480093f4SDimitry Andric // important for profitability. We may have a loop which hasn't been fully 1102480093f4SDimitry Andric // canonicalized just yet. If the exit we chose to widen is provably never 1103480093f4SDimitry Andric // taken, we want the widened form to *also* be provably never taken. We 1104480093f4SDimitry Andric // can't guarantee this as a current unanalyzeable exit may later become 1105480093f4SDimitry Andric // analyzeable, but we can at least avoid the obvious cases. 1106480093f4SDimitry Andric const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L); 1107480093f4SDimitry Andric if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() || 1108480093f4SDimitry Andric !SE->isLoopInvariant(MinEC, L) || 110906c3fb27SDimitry Andric !Rewriter.isSafeToExpandAt(MinEC, IP)) 1110349cc55cSDimitry Andric return ChangedLoop; 1111480093f4SDimitry Andric 1112480093f4SDimitry Andric Rewriter.setInsertPoint(IP); 1113480093f4SDimitry Andric IRBuilder<> B(IP); 1114480093f4SDimitry Andric 1115349cc55cSDimitry Andric bool InvalidateLoop = false; 1116480093f4SDimitry Andric Value *MinECV = nullptr; // lazily generated if needed 1117480093f4SDimitry Andric for (BasicBlock *ExitingBB : ExitingBlocks) { 1118480093f4SDimitry Andric // If our exiting block exits multiple loops, we can only rewrite the 1119480093f4SDimitry Andric // innermost one. Otherwise, we're changing how many times the innermost 1120480093f4SDimitry Andric // loop runs before it exits. 1121480093f4SDimitry Andric if (LI->getLoopFor(ExitingBB) != L) 1122480093f4SDimitry Andric continue; 1123480093f4SDimitry Andric 1124480093f4SDimitry Andric // Can't rewrite non-branch yet. 1125480093f4SDimitry Andric auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); 1126480093f4SDimitry Andric if (!BI) 1127480093f4SDimitry Andric continue; 1128480093f4SDimitry Andric 1129480093f4SDimitry Andric // If already constant, nothing to do. 1130480093f4SDimitry Andric if (isa<Constant>(BI->getCondition())) 1131480093f4SDimitry Andric continue; 1132480093f4SDimitry Andric 1133480093f4SDimitry Andric const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); 1134480093f4SDimitry Andric if (isa<SCEVCouldNotCompute>(ExitCount) || 1135480093f4SDimitry Andric ExitCount->getType()->isPointerTy() || 1136fcaf7f86SDimitry Andric !Rewriter.isSafeToExpandAt(ExitCount, WidenableBR)) 1137480093f4SDimitry Andric continue; 1138480093f4SDimitry Andric 1139480093f4SDimitry Andric const bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB)); 1140480093f4SDimitry Andric BasicBlock *ExitBB = BI->getSuccessor(ExitIfTrue ? 0 : 1); 11415ffd83dbSDimitry Andric if (!ExitBB->getPostdominatingDeoptimizeCall()) 1142480093f4SDimitry Andric continue; 1143480093f4SDimitry Andric 11445ffd83dbSDimitry Andric /// Here we can be fairly sure that executing this exit will most likely 11455ffd83dbSDimitry Andric /// lead to executing llvm.experimental.deoptimize. 11465ffd83dbSDimitry Andric /// This is a profitability heuristic, not a legality constraint. 11475ffd83dbSDimitry Andric 1148480093f4SDimitry Andric // If we found a widenable exit condition, do two things: 1149480093f4SDimitry Andric // 1) fold the widened exit test into the widenable condition 1150480093f4SDimitry Andric // 2) fold the branch to untaken - avoids infinite looping 1151480093f4SDimitry Andric 1152480093f4SDimitry Andric Value *ECV = Rewriter.expandCodeFor(ExitCount); 1153480093f4SDimitry Andric if (!MinECV) 1154480093f4SDimitry Andric MinECV = Rewriter.expandCodeFor(MinEC); 1155480093f4SDimitry Andric Value *RHS = MinECV; 1156480093f4SDimitry Andric if (ECV->getType() != RHS->getType()) { 1157480093f4SDimitry Andric Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType()); 1158480093f4SDimitry Andric ECV = B.CreateZExt(ECV, WiderTy); 1159480093f4SDimitry Andric RHS = B.CreateZExt(RHS, WiderTy); 1160480093f4SDimitry Andric } 1161480093f4SDimitry Andric assert(!Latch || DT->dominates(ExitingBB, Latch)); 1162480093f4SDimitry Andric Value *NewCond = B.CreateICmp(ICmpInst::ICMP_UGT, ECV, RHS); 1163480093f4SDimitry Andric // Freeze poison or undef to an arbitrary bit pattern to ensure we can 1164480093f4SDimitry Andric // branch without introducing UB. See NOTE ON POISON/UNDEF above for 1165480093f4SDimitry Andric // context. 1166480093f4SDimitry Andric NewCond = B.CreateFreeze(NewCond); 1167480093f4SDimitry Andric 1168480093f4SDimitry Andric widenWidenableBranch(WidenableBR, NewCond); 1169480093f4SDimitry Andric 1170480093f4SDimitry Andric Value *OldCond = BI->getCondition(); 1171480093f4SDimitry Andric BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue)); 1172349cc55cSDimitry Andric InvalidateLoop = true; 1173480093f4SDimitry Andric } 1174480093f4SDimitry Andric 1175349cc55cSDimitry Andric if (InvalidateLoop) 1176480093f4SDimitry Andric // We just mutated a bunch of loop exits changing there exit counts 1177480093f4SDimitry Andric // widely. We need to force recomputation of the exit counts given these 1178480093f4SDimitry Andric // changes. Note that all of the inserted exits are never taken, and 1179480093f4SDimitry Andric // should be removed next time the CFG is modified. 1180480093f4SDimitry Andric SE->forgetLoop(L); 1181349cc55cSDimitry Andric 1182349cc55cSDimitry Andric // Always return `true` since we have moved the WidenableBR's condition. 1183349cc55cSDimitry Andric return true; 1184480093f4SDimitry Andric } 1185480093f4SDimitry Andric 11860b57cec5SDimitry Andric bool LoopPredication::runOnLoop(Loop *Loop) { 11870b57cec5SDimitry Andric L = Loop; 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Analyzing "); 11900b57cec5SDimitry Andric LLVM_DEBUG(L->dump()); 11910b57cec5SDimitry Andric 11920b57cec5SDimitry Andric Module *M = L->getHeader()->getModule(); 11930b57cec5SDimitry Andric 11940b57cec5SDimitry Andric // There is nothing to do if the module doesn't use guards 11950b57cec5SDimitry Andric auto *GuardDecl = 11960b57cec5SDimitry Andric M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard)); 11970b57cec5SDimitry Andric bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty(); 11980b57cec5SDimitry Andric auto *WCDecl = M->getFunction( 11990b57cec5SDimitry Andric Intrinsic::getName(Intrinsic::experimental_widenable_condition)); 12000b57cec5SDimitry Andric bool HasWidenableConditions = 12010b57cec5SDimitry Andric PredicateWidenableBranchGuards && WCDecl && !WCDecl->use_empty(); 12020b57cec5SDimitry Andric if (!HasIntrinsicGuards && !HasWidenableConditions) 12030b57cec5SDimitry Andric return false; 12040b57cec5SDimitry Andric 12050b57cec5SDimitry Andric DL = &M->getDataLayout(); 12060b57cec5SDimitry Andric 12070b57cec5SDimitry Andric Preheader = L->getLoopPreheader(); 12080b57cec5SDimitry Andric if (!Preheader) 12090b57cec5SDimitry Andric return false; 12100b57cec5SDimitry Andric 12110b57cec5SDimitry Andric auto LatchCheckOpt = parseLoopLatchICmp(); 12120b57cec5SDimitry Andric if (!LatchCheckOpt) 12130b57cec5SDimitry Andric return false; 12140b57cec5SDimitry Andric LatchCheck = *LatchCheckOpt; 12150b57cec5SDimitry Andric 12160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Latch check:\n"); 12170b57cec5SDimitry Andric LLVM_DEBUG(LatchCheck.dump()); 12180b57cec5SDimitry Andric 12190b57cec5SDimitry Andric if (!isLoopProfitableToPredicate()) { 12200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop not profitable to predicate!\n"); 12210b57cec5SDimitry Andric return false; 12220b57cec5SDimitry Andric } 12230b57cec5SDimitry Andric // Collect all the guards into a vector and process later, so as not 12240b57cec5SDimitry Andric // to invalidate the instruction iterator. 12250b57cec5SDimitry Andric SmallVector<IntrinsicInst *, 4> Guards; 12260b57cec5SDimitry Andric SmallVector<BranchInst *, 4> GuardsAsWidenableBranches; 12270b57cec5SDimitry Andric for (const auto BB : L->blocks()) { 12280b57cec5SDimitry Andric for (auto &I : *BB) 12290b57cec5SDimitry Andric if (isGuard(&I)) 12300b57cec5SDimitry Andric Guards.push_back(cast<IntrinsicInst>(&I)); 12310b57cec5SDimitry Andric if (PredicateWidenableBranchGuards && 12320b57cec5SDimitry Andric isGuardAsWidenableBranch(BB->getTerminator())) 12330b57cec5SDimitry Andric GuardsAsWidenableBranches.push_back( 12340b57cec5SDimitry Andric cast<BranchInst>(BB->getTerminator())); 12350b57cec5SDimitry Andric } 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric SCEVExpander Expander(*SE, *DL, "loop-predication"); 12380b57cec5SDimitry Andric bool Changed = false; 12390b57cec5SDimitry Andric for (auto *Guard : Guards) 12400b57cec5SDimitry Andric Changed |= widenGuardConditions(Guard, Expander); 12410b57cec5SDimitry Andric for (auto *Guard : GuardsAsWidenableBranches) 12420b57cec5SDimitry Andric Changed |= widenWidenableBranchGuardConditions(Guard, Expander); 1243480093f4SDimitry Andric Changed |= predicateLoopExits(L, Expander); 1244349cc55cSDimitry Andric 1245349cc55cSDimitry Andric if (MSSAU && VerifyMemorySSA) 1246349cc55cSDimitry Andric MSSAU->getMemorySSA()->verifyMemorySSA(); 12470b57cec5SDimitry Andric return Changed; 12480b57cec5SDimitry Andric } 1249