1 //===- GuardWidening.cpp - ---- Guard widening ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the guard widening pass. The semantics of the 10 // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails 11 // more often that it did before the transform. This optimization is called 12 // "widening" and can be used hoist and common runtime checks in situations like 13 // these: 14 // 15 // %cmp0 = 7 u< Length 16 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 17 // call @unknown_side_effects() 18 // %cmp1 = 9 u< Length 19 // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] 20 // ... 21 // 22 // => 23 // 24 // %cmp0 = 9 u< Length 25 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 26 // call @unknown_side_effects() 27 // ... 28 // 29 // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a 30 // generic implementation of the same function, which will have the correct 31 // semantics from that point onward. It is always _legal_ to deoptimize (so 32 // replacing %cmp0 with false is "correct"), though it may not always be 33 // profitable to do so. 34 // 35 // NB! This pass is a work in progress. It hasn't been tuned to be "production 36 // ready" yet. It is known to have quadriatic running time and will not scale 37 // to large numbers of guards 38 // 39 //===----------------------------------------------------------------------===// 40 41 #include "llvm/Transforms/Scalar/GuardWidening.h" 42 #include <functional> 43 #include "llvm/ADT/DenseMap.h" 44 #include "llvm/ADT/DepthFirstIterator.h" 45 #include "llvm/ADT/Statistic.h" 46 #include "llvm/Analysis/BranchProbabilityInfo.h" 47 #include "llvm/Analysis/GuardUtils.h" 48 #include "llvm/Analysis/LoopInfo.h" 49 #include "llvm/Analysis/LoopPass.h" 50 #include "llvm/Analysis/PostDominators.h" 51 #include "llvm/Analysis/ValueTracking.h" 52 #include "llvm/IR/ConstantRange.h" 53 #include "llvm/IR/Dominators.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/PatternMatch.h" 56 #include "llvm/Pass.h" 57 #include "llvm/Support/Debug.h" 58 #include "llvm/Support/KnownBits.h" 59 #include "llvm/Transforms/Scalar.h" 60 #include "llvm/Transforms/Utils/LoopUtils.h" 61 62 using namespace llvm; 63 64 #define DEBUG_TYPE "guard-widening" 65 66 STATISTIC(GuardsEliminated, "Number of eliminated guards"); 67 STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches"); 68 69 static cl::opt<bool> WidenFrequentBranches( 70 "guard-widening-widen-frequent-branches", cl::Hidden, 71 cl::desc("Widen conditions of explicit branches into dominating guards in " 72 "case if their taken frequency exceeds threshold set by " 73 "guard-widening-frequent-branch-threshold option"), 74 cl::init(false)); 75 76 static cl::opt<unsigned> FrequentBranchThreshold( 77 "guard-widening-frequent-branch-threshold", cl::Hidden, 78 cl::desc("When WidenFrequentBranches is set to true, this option is used " 79 "to determine which branches are frequently taken. The criteria " 80 "that a branch is taken more often than " 81 "((FrequentBranchThreshold - 1) / FrequentBranchThreshold), then " 82 "it is considered frequently taken"), 83 cl::init(1000)); 84 85 static cl::opt<bool> 86 WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden, 87 cl::desc("Whether or not we should widen guards " 88 "expressed as branches by widenable conditions"), 89 cl::init(true)); 90 91 namespace { 92 93 // Get the condition of \p I. It can either be a guard or a conditional branch. 94 static Value *getCondition(Instruction *I) { 95 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 96 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 97 "Bad guard intrinsic?"); 98 return GI->getArgOperand(0); 99 } 100 if (isGuardAsWidenableBranch(I)) { 101 auto *Cond = cast<BranchInst>(I)->getCondition(); 102 return cast<BinaryOperator>(Cond)->getOperand(0); 103 } 104 return cast<BranchInst>(I)->getCondition(); 105 } 106 107 // Set the condition for \p I to \p NewCond. \p I can either be a guard or a 108 // conditional branch. 109 static void setCondition(Instruction *I, Value *NewCond) { 110 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 111 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 112 "Bad guard intrinsic?"); 113 GI->setArgOperand(0, NewCond); 114 return; 115 } 116 cast<BranchInst>(I)->setCondition(NewCond); 117 } 118 119 // Eliminates the guard instruction properly. 120 static void eliminateGuard(Instruction *GuardInst) { 121 GuardInst->eraseFromParent(); 122 ++GuardsEliminated; 123 } 124 125 class GuardWideningImpl { 126 DominatorTree &DT; 127 PostDominatorTree *PDT; 128 LoopInfo &LI; 129 BranchProbabilityInfo *BPI; 130 131 /// Together, these describe the region of interest. This might be all of 132 /// the blocks within a function, or only a given loop's blocks and preheader. 133 DomTreeNode *Root; 134 std::function<bool(BasicBlock*)> BlockFilter; 135 136 /// The set of guards and conditional branches whose conditions have been 137 /// widened into dominating guards. 138 SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; 139 140 /// The set of guards which have been widened to include conditions to other 141 /// guards. 142 DenseSet<Instruction *> WidenedGuards; 143 144 /// Try to eliminate instruction \p Instr by widening it into an earlier 145 /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that 146 /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock 147 /// maps BasicBlocks to the set of guards seen in that block. 148 bool eliminateInstrViaWidening( 149 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 150 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & 151 GuardsPerBlock, bool InvertCondition = false); 152 153 /// Used to keep track of which widening potential is more effective. 154 enum WideningScore { 155 /// Don't widen. 156 WS_IllegalOrNegative, 157 158 /// Widening is performance neutral as far as the cycles spent in check 159 /// conditions goes (but can still help, e.g., code layout, having less 160 /// deopt state). 161 WS_Neutral, 162 163 /// Widening is profitable. 164 WS_Positive, 165 166 /// Widening is very profitable. Not significantly different from \c 167 /// WS_Positive, except by the order. 168 WS_VeryPositive 169 }; 170 171 static StringRef scoreTypeToString(WideningScore WS); 172 173 /// Compute the score for widening the condition in \p DominatedInstr 174 /// into \p DominatingGuard. If \p InvertCond is set, then we widen the 175 /// inverted condition of the dominating guard. 176 WideningScore computeWideningScore(Instruction *DominatedInstr, 177 Instruction *DominatingGuard, 178 bool InvertCond); 179 180 /// Helper to check if \p V can be hoisted to \p InsertPos. 181 bool isAvailableAt(const Value *V, const Instruction *InsertPos) const { 182 SmallPtrSet<const Instruction *, 8> Visited; 183 return isAvailableAt(V, InsertPos, Visited); 184 } 185 186 bool isAvailableAt(const Value *V, const Instruction *InsertPos, 187 SmallPtrSetImpl<const Instruction *> &Visited) const; 188 189 /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c 190 /// isAvailableAt returned true. 191 void makeAvailableAt(Value *V, Instruction *InsertPos) const; 192 193 /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try 194 /// to generate an expression computing the logical AND of \p Cond0 and (\p 195 /// Cond1 XOR \p InvertCondition). 196 /// Return true if the expression computing the AND is only as 197 /// expensive as computing one of the two. If \p InsertPt is true then 198 /// actually generate the resulting expression, make it available at \p 199 /// InsertPt and return it in \p Result (else no change to the IR is made). 200 bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt, 201 Value *&Result, bool InvertCondition); 202 203 /// Represents a range check of the form \c Base + \c Offset u< \c Length, 204 /// with the constraint that \c Length is not negative. \c CheckInst is the 205 /// pre-existing instruction in the IR that computes the result of this range 206 /// check. 207 class RangeCheck { 208 const Value *Base; 209 const ConstantInt *Offset; 210 const Value *Length; 211 ICmpInst *CheckInst; 212 213 public: 214 explicit RangeCheck(const Value *Base, const ConstantInt *Offset, 215 const Value *Length, ICmpInst *CheckInst) 216 : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} 217 218 void setBase(const Value *NewBase) { Base = NewBase; } 219 void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } 220 221 const Value *getBase() const { return Base; } 222 const ConstantInt *getOffset() const { return Offset; } 223 const APInt &getOffsetValue() const { return getOffset()->getValue(); } 224 const Value *getLength() const { return Length; }; 225 ICmpInst *getCheckInst() const { return CheckInst; } 226 227 void print(raw_ostream &OS, bool PrintTypes = false) { 228 OS << "Base: "; 229 Base->printAsOperand(OS, PrintTypes); 230 OS << " Offset: "; 231 Offset->printAsOperand(OS, PrintTypes); 232 OS << " Length: "; 233 Length->printAsOperand(OS, PrintTypes); 234 } 235 236 LLVM_DUMP_METHOD void dump() { 237 print(dbgs()); 238 dbgs() << "\n"; 239 } 240 }; 241 242 /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and 243 /// append them to \p Checks. Returns true on success, may clobber \c Checks 244 /// on failure. 245 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) { 246 SmallPtrSet<const Value *, 8> Visited; 247 return parseRangeChecks(CheckCond, Checks, Visited); 248 } 249 250 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks, 251 SmallPtrSetImpl<const Value *> &Visited); 252 253 /// Combine the checks in \p Checks into a smaller set of checks and append 254 /// them into \p CombinedChecks. Return true on success (i.e. all of checks 255 /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks 256 /// and \p CombinedChecks on success and on failure. 257 bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, 258 SmallVectorImpl<RangeCheck> &CombinedChecks) const; 259 260 /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of 261 /// computing only one of the two expressions? 262 bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) { 263 Value *ResultUnused; 264 return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused, 265 InvertCond); 266 } 267 268 /// If \p InvertCondition is false, Widen \p ToWiden to fail if 269 /// \p NewCondition is false, otherwise make it fail if \p NewCondition is 270 /// true (in addition to whatever it is already checking). 271 void widenGuard(Instruction *ToWiden, Value *NewCondition, 272 bool InvertCondition) { 273 Value *Result; 274 widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result, 275 InvertCondition); 276 Value *WidenableCondition = nullptr; 277 if (isGuardAsWidenableBranch(ToWiden)) { 278 auto *Cond = cast<BranchInst>(ToWiden)->getCondition(); 279 WidenableCondition = cast<BinaryOperator>(Cond)->getOperand(1); 280 } 281 if (WidenableCondition) 282 Result = BinaryOperator::CreateAnd(Result, WidenableCondition, 283 "guard.chk", ToWiden); 284 setCondition(ToWiden, Result); 285 } 286 287 public: 288 289 explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, 290 LoopInfo &LI, BranchProbabilityInfo *BPI, 291 DomTreeNode *Root, 292 std::function<bool(BasicBlock*)> BlockFilter) 293 : DT(DT), PDT(PDT), LI(LI), BPI(BPI), Root(Root), BlockFilter(BlockFilter) 294 {} 295 296 /// The entry point for this pass. 297 bool run(); 298 }; 299 } 300 301 static bool isSupportedGuardInstruction(const Instruction *Insn) { 302 if (isGuard(Insn)) 303 return true; 304 if (WidenBranchGuards && isGuardAsWidenableBranch(Insn)) 305 return true; 306 return false; 307 } 308 309 bool GuardWideningImpl::run() { 310 DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; 311 bool Changed = false; 312 Optional<BranchProbability> LikelyTaken = None; 313 if (WidenFrequentBranches && BPI) { 314 unsigned Threshold = FrequentBranchThreshold; 315 assert(Threshold > 0 && "Zero threshold makes no sense!"); 316 LikelyTaken = BranchProbability(Threshold - 1, Threshold); 317 } 318 319 for (auto DFI = df_begin(Root), DFE = df_end(Root); 320 DFI != DFE; ++DFI) { 321 auto *BB = (*DFI)->getBlock(); 322 if (!BlockFilter(BB)) 323 continue; 324 325 auto &CurrentList = GuardsInBlock[BB]; 326 327 for (auto &I : *BB) 328 if (isSupportedGuardInstruction(&I)) 329 CurrentList.push_back(cast<Instruction>(&I)); 330 331 for (auto *II : CurrentList) 332 Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock); 333 if (WidenFrequentBranches && BPI) 334 if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator())) 335 if (BI->isConditional()) { 336 // If one of branches of a conditional is likely taken, try to 337 // eliminate it. 338 if (BPI->getEdgeProbability(BB, 0U) >= *LikelyTaken) 339 Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock); 340 else if (BPI->getEdgeProbability(BB, 1U) >= *LikelyTaken) 341 Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock, 342 /*InvertCondition*/true); 343 } 344 } 345 346 assert(EliminatedGuardsAndBranches.empty() || Changed); 347 for (auto *I : EliminatedGuardsAndBranches) 348 if (!WidenedGuards.count(I)) { 349 assert(isa<ConstantInt>(getCondition(I)) && "Should be!"); 350 if (isSupportedGuardInstruction(I)) 351 eliminateGuard(I); 352 else { 353 assert(isa<BranchInst>(I) && 354 "Eliminated something other than guard or branch?"); 355 ++CondBranchEliminated; 356 } 357 } 358 359 return Changed; 360 } 361 362 bool GuardWideningImpl::eliminateInstrViaWidening( 363 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 364 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & 365 GuardsInBlock, bool InvertCondition) { 366 // Ignore trivial true or false conditions. These instructions will be 367 // trivially eliminated by any cleanup pass. Do not erase them because other 368 // guards can possibly be widened into them. 369 if (isa<ConstantInt>(getCondition(Instr))) 370 return false; 371 372 Instruction *BestSoFar = nullptr; 373 auto BestScoreSoFar = WS_IllegalOrNegative; 374 375 // In the set of dominating guards, find the one we can merge GuardInst with 376 // for the most profit. 377 for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { 378 auto *CurBB = DFSI.getPath(i)->getBlock(); 379 if (!BlockFilter(CurBB)) 380 break; 381 assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!"); 382 const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; 383 384 auto I = GuardsInCurBB.begin(); 385 auto E = Instr->getParent() == CurBB 386 ? std::find(GuardsInCurBB.begin(), GuardsInCurBB.end(), Instr) 387 : GuardsInCurBB.end(); 388 389 #ifndef NDEBUG 390 { 391 unsigned Index = 0; 392 for (auto &I : *CurBB) { 393 if (Index == GuardsInCurBB.size()) 394 break; 395 if (GuardsInCurBB[Index] == &I) 396 Index++; 397 } 398 assert(Index == GuardsInCurBB.size() && 399 "Guards expected to be in order!"); 400 } 401 #endif 402 403 assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?"); 404 405 for (auto *Candidate : make_range(I, E)) { 406 auto Score = computeWideningScore(Instr, Candidate, InvertCondition); 407 LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr) 408 << " and " << *getCondition(Candidate) << " is " 409 << scoreTypeToString(Score) << "\n"); 410 if (Score > BestScoreSoFar) { 411 BestScoreSoFar = Score; 412 BestSoFar = Candidate; 413 } 414 } 415 } 416 417 if (BestScoreSoFar == WS_IllegalOrNegative) { 418 LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n"); 419 return false; 420 } 421 422 assert(BestSoFar != Instr && "Should have never visited same guard!"); 423 assert(DT.dominates(BestSoFar, Instr) && "Should be!"); 424 425 LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar 426 << " with score " << scoreTypeToString(BestScoreSoFar) 427 << "\n"); 428 widenGuard(BestSoFar, getCondition(Instr), InvertCondition); 429 auto NewGuardCondition = InvertCondition 430 ? ConstantInt::getFalse(Instr->getContext()) 431 : ConstantInt::getTrue(Instr->getContext()); 432 setCondition(Instr, NewGuardCondition); 433 EliminatedGuardsAndBranches.push_back(Instr); 434 WidenedGuards.insert(BestSoFar); 435 return true; 436 } 437 438 GuardWideningImpl::WideningScore 439 GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr, 440 Instruction *DominatingGuard, 441 bool InvertCond) { 442 Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent()); 443 Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent()); 444 bool HoistingOutOfLoop = false; 445 446 if (DominatingGuardLoop != DominatedInstrLoop) { 447 // Be conservative and don't widen into a sibling loop. TODO: If the 448 // sibling is colder, we should consider allowing this. 449 if (DominatingGuardLoop && 450 !DominatingGuardLoop->contains(DominatedInstrLoop)) 451 return WS_IllegalOrNegative; 452 453 HoistingOutOfLoop = true; 454 } 455 456 if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard)) 457 return WS_IllegalOrNegative; 458 459 // If the guard was conditional executed, it may never be reached 460 // dynamically. There are two potential downsides to hoisting it out of the 461 // conditionally executed region: 1) we may spuriously deopt without need and 462 // 2) we have the extra cost of computing the guard condition in the common 463 // case. At the moment, we really only consider the second in our heuristic 464 // here. TODO: evaluate cost model for spurious deopt 465 // NOTE: As written, this also lets us hoist right over another guard which 466 // is essentially just another spelling for control flow. 467 if (isWideningCondProfitable(getCondition(DominatedInstr), 468 getCondition(DominatingGuard), InvertCond)) 469 return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; 470 471 if (HoistingOutOfLoop) 472 return WS_Positive; 473 474 // Returns true if we might be hoisting above explicit control flow. Note 475 // that this completely ignores implicit control flow (guards, calls which 476 // throw, etc...). That choice appears arbitrary. 477 auto MaybeHoistingOutOfIf = [&]() { 478 auto *DominatingBlock = DominatingGuard->getParent(); 479 auto *DominatedBlock = DominatedInstr->getParent(); 480 if (isGuardAsWidenableBranch(DominatingGuard)) 481 DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0); 482 483 // Same Block? 484 if (DominatedBlock == DominatingBlock) 485 return false; 486 // Obvious successor (common loop header/preheader case) 487 if (DominatedBlock == DominatingBlock->getUniqueSuccessor()) 488 return false; 489 // TODO: diamond, triangle cases 490 if (!PDT) return true; 491 return !PDT->dominates(DominatedBlock, DominatingBlock); 492 }; 493 494 return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral; 495 } 496 497 bool GuardWideningImpl::isAvailableAt( 498 const Value *V, const Instruction *Loc, 499 SmallPtrSetImpl<const Instruction *> &Visited) const { 500 auto *Inst = dyn_cast<Instruction>(V); 501 if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst)) 502 return true; 503 504 if (!isSafeToSpeculativelyExecute(Inst, Loc, &DT) || 505 Inst->mayReadFromMemory()) 506 return false; 507 508 Visited.insert(Inst); 509 510 // We only want to go _up_ the dominance chain when recursing. 511 assert(!isa<PHINode>(Loc) && 512 "PHIs should return false for isSafeToSpeculativelyExecute"); 513 assert(DT.isReachableFromEntry(Inst->getParent()) && 514 "We did a DFS from the block entry!"); 515 return all_of(Inst->operands(), 516 [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); }); 517 } 518 519 void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const { 520 auto *Inst = dyn_cast<Instruction>(V); 521 if (!Inst || DT.dominates(Inst, Loc)) 522 return; 523 524 assert(isSafeToSpeculativelyExecute(Inst, Loc, &DT) && 525 !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!"); 526 527 for (Value *Op : Inst->operands()) 528 makeAvailableAt(Op, Loc); 529 530 Inst->moveBefore(Loc); 531 } 532 533 bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, 534 Instruction *InsertPt, Value *&Result, 535 bool InvertCondition) { 536 using namespace llvm::PatternMatch; 537 538 { 539 // L >u C0 && L >u C1 -> L >u max(C0, C1) 540 ConstantInt *RHS0, *RHS1; 541 Value *LHS; 542 ICmpInst::Predicate Pred0, Pred1; 543 if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && 544 match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { 545 if (InvertCondition) 546 Pred1 = ICmpInst::getInversePredicate(Pred1); 547 548 ConstantRange CR0 = 549 ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue()); 550 ConstantRange CR1 = 551 ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); 552 553 // SubsetIntersect is a subset of the actual mathematical intersection of 554 // CR0 and CR1, while SupersetIntersect is a superset of the actual 555 // mathematical intersection. If these two ConstantRanges are equal, then 556 // we know we were able to represent the actual mathematical intersection 557 // of CR0 and CR1, and can use the same to generate an icmp instruction. 558 // 559 // Given what we're doing here and the semantics of guards, it would 560 // actually be correct to just use SubsetIntersect, but that may be too 561 // aggressive in cases we care about. 562 auto SubsetIntersect = CR0.inverse().unionWith(CR1.inverse()).inverse(); 563 auto SupersetIntersect = CR0.intersectWith(CR1); 564 565 APInt NewRHSAP; 566 CmpInst::Predicate Pred; 567 if (SubsetIntersect == SupersetIntersect && 568 SubsetIntersect.getEquivalentICmp(Pred, NewRHSAP)) { 569 if (InsertPt) { 570 ConstantInt *NewRHS = ConstantInt::get(Cond0->getContext(), NewRHSAP); 571 Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk"); 572 } 573 return true; 574 } 575 } 576 } 577 578 { 579 SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; 580 // TODO: Support InvertCondition case? 581 if (!InvertCondition && 582 parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) && 583 combineRangeChecks(Checks, CombinedChecks)) { 584 if (InsertPt) { 585 Result = nullptr; 586 for (auto &RC : CombinedChecks) { 587 makeAvailableAt(RC.getCheckInst(), InsertPt); 588 if (Result) 589 Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "", 590 InsertPt); 591 else 592 Result = RC.getCheckInst(); 593 } 594 assert(Result && "Failed to find result value"); 595 Result->setName("wide.chk"); 596 } 597 return true; 598 } 599 } 600 601 // Base case -- just logical-and the two conditions together. 602 603 if (InsertPt) { 604 makeAvailableAt(Cond0, InsertPt); 605 makeAvailableAt(Cond1, InsertPt); 606 if (InvertCondition) 607 Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt); 608 Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt); 609 } 610 611 // We were not able to compute Cond0 AND Cond1 for the price of one. 612 return false; 613 } 614 615 bool GuardWideningImpl::parseRangeChecks( 616 Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 617 SmallPtrSetImpl<const Value *> &Visited) { 618 if (!Visited.insert(CheckCond).second) 619 return true; 620 621 using namespace llvm::PatternMatch; 622 623 { 624 Value *AndLHS, *AndRHS; 625 if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS)))) 626 return parseRangeChecks(AndLHS, Checks) && 627 parseRangeChecks(AndRHS, Checks); 628 } 629 630 auto *IC = dyn_cast<ICmpInst>(CheckCond); 631 if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() || 632 (IC->getPredicate() != ICmpInst::ICMP_ULT && 633 IC->getPredicate() != ICmpInst::ICMP_UGT)) 634 return false; 635 636 const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1); 637 if (IC->getPredicate() == ICmpInst::ICMP_UGT) 638 std::swap(CmpLHS, CmpRHS); 639 640 auto &DL = IC->getModule()->getDataLayout(); 641 642 GuardWideningImpl::RangeCheck Check( 643 CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())), 644 CmpRHS, IC); 645 646 if (!isKnownNonNegative(Check.getLength(), DL)) 647 return false; 648 649 // What we have in \c Check now is a correct interpretation of \p CheckCond. 650 // Try to see if we can move some constant offsets into the \c Offset field. 651 652 bool Changed; 653 auto &Ctx = CheckCond->getContext(); 654 655 do { 656 Value *OpLHS; 657 ConstantInt *OpRHS; 658 Changed = false; 659 660 #ifndef NDEBUG 661 auto *BaseInst = dyn_cast<Instruction>(Check.getBase()); 662 assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && 663 "Unreachable instruction?"); 664 #endif 665 666 if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 667 Check.setBase(OpLHS); 668 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 669 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 670 Changed = true; 671 } else if (match(Check.getBase(), 672 m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 673 KnownBits Known = computeKnownBits(OpLHS, DL); 674 if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { 675 Check.setBase(OpLHS); 676 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 677 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 678 Changed = true; 679 } 680 } 681 } while (Changed); 682 683 Checks.push_back(Check); 684 return true; 685 } 686 687 bool GuardWideningImpl::combineRangeChecks( 688 SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 689 SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { 690 unsigned OldCount = Checks.size(); 691 while (!Checks.empty()) { 692 // Pick all of the range checks with a specific base and length, and try to 693 // merge them. 694 const Value *CurrentBase = Checks.front().getBase(); 695 const Value *CurrentLength = Checks.front().getLength(); 696 697 SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; 698 699 auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { 700 return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; 701 }; 702 703 copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck); 704 Checks.erase(remove_if(Checks, IsCurrentCheck), Checks.end()); 705 706 assert(CurrentChecks.size() != 0 && "We know we have at least one!"); 707 708 if (CurrentChecks.size() < 3) { 709 RangeChecksOut.insert(RangeChecksOut.end(), CurrentChecks.begin(), 710 CurrentChecks.end()); 711 continue; 712 } 713 714 // CurrentChecks.size() will typically be 3 here, but so far there has been 715 // no need to hard-code that fact. 716 717 llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS, 718 const GuardWideningImpl::RangeCheck &RHS) { 719 return LHS.getOffsetValue().slt(RHS.getOffsetValue()); 720 }); 721 722 // Note: std::sort should not invalidate the ChecksStart iterator. 723 724 const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); 725 const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); 726 727 unsigned BitWidth = MaxOffset->getValue().getBitWidth(); 728 if ((MaxOffset->getValue() - MinOffset->getValue()) 729 .ugt(APInt::getSignedMinValue(BitWidth))) 730 return false; 731 732 APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); 733 const APInt &HighOffset = MaxOffset->getValue(); 734 auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { 735 return (HighOffset - RC.getOffsetValue()).ult(MaxDiff); 736 }; 737 738 if (MaxDiff.isMinValue() || 739 !std::all_of(std::next(CurrentChecks.begin()), CurrentChecks.end(), 740 OffsetOK)) 741 return false; 742 743 // We have a series of f+1 checks as: 744 // 745 // I+k_0 u< L ... Chk_0 746 // I+k_1 u< L ... Chk_1 747 // ... 748 // I+k_f u< L ... Chk_f 749 // 750 // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 751 // k_f-k_0 u< INT_MIN+k_f ... Precond_1 752 // k_f != k_0 ... Precond_2 753 // 754 // Claim: 755 // Chk_0 AND Chk_f implies all the other checks 756 // 757 // Informal proof sketch: 758 // 759 // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap 760 // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and 761 // thus I+k_f is the greatest unsigned value in that range. 762 // 763 // This combined with Ckh_(f+1) shows that everything in that range is u< L. 764 // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) 765 // lie in [I+k_0,I+k_f], this proving our claim. 766 // 767 // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are 768 // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal 769 // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping 770 // range by definition, and the latter case is impossible: 771 // 772 // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) 773 // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 774 // 775 // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted 776 // with 'x' above) to be at least >u INT_MIN. 777 778 RangeChecksOut.emplace_back(CurrentChecks.front()); 779 RangeChecksOut.emplace_back(CurrentChecks.back()); 780 } 781 782 assert(RangeChecksOut.size() <= OldCount && "We pessimized!"); 783 return RangeChecksOut.size() != OldCount; 784 } 785 786 #ifndef NDEBUG 787 StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { 788 switch (WS) { 789 case WS_IllegalOrNegative: 790 return "IllegalOrNegative"; 791 case WS_Neutral: 792 return "Neutral"; 793 case WS_Positive: 794 return "Positive"; 795 case WS_VeryPositive: 796 return "VeryPositive"; 797 } 798 799 llvm_unreachable("Fully covered switch above!"); 800 } 801 #endif 802 803 PreservedAnalyses GuardWideningPass::run(Function &F, 804 FunctionAnalysisManager &AM) { 805 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 806 auto &LI = AM.getResult<LoopAnalysis>(F); 807 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 808 BranchProbabilityInfo *BPI = nullptr; 809 if (WidenFrequentBranches) 810 BPI = AM.getCachedResult<BranchProbabilityAnalysis>(F); 811 if (!GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(), 812 [](BasicBlock*) { return true; } ).run()) 813 return PreservedAnalyses::all(); 814 815 PreservedAnalyses PA; 816 PA.preserveSet<CFGAnalyses>(); 817 return PA; 818 } 819 820 PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, 821 LoopStandardAnalysisResults &AR, 822 LPMUpdater &U) { 823 824 const auto &FAM = 825 AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); 826 Function &F = *L.getHeader()->getParent(); 827 BranchProbabilityInfo *BPI = nullptr; 828 if (WidenFrequentBranches) 829 BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(F); 830 831 BasicBlock *RootBB = L.getLoopPredecessor(); 832 if (!RootBB) 833 RootBB = L.getHeader(); 834 auto BlockFilter = [&](BasicBlock *BB) { 835 return BB == RootBB || L.contains(BB); 836 }; 837 if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, BPI, 838 AR.DT.getNode(RootBB), 839 BlockFilter).run()) 840 return PreservedAnalyses::all(); 841 842 return getLoopPassPreservedAnalyses(); 843 } 844 845 namespace { 846 struct GuardWideningLegacyPass : public FunctionPass { 847 static char ID; 848 849 GuardWideningLegacyPass() : FunctionPass(ID) { 850 initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); 851 } 852 853 bool runOnFunction(Function &F) override { 854 if (skipFunction(F)) 855 return false; 856 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 857 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 858 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); 859 BranchProbabilityInfo *BPI = nullptr; 860 if (WidenFrequentBranches) 861 BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); 862 return GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(), 863 [](BasicBlock*) { return true; } ).run(); 864 } 865 866 void getAnalysisUsage(AnalysisUsage &AU) const override { 867 AU.setPreservesCFG(); 868 AU.addRequired<DominatorTreeWrapperPass>(); 869 AU.addRequired<PostDominatorTreeWrapperPass>(); 870 AU.addRequired<LoopInfoWrapperPass>(); 871 if (WidenFrequentBranches) 872 AU.addRequired<BranchProbabilityInfoWrapperPass>(); 873 } 874 }; 875 876 /// Same as above, but restricted to a single loop at a time. Can be 877 /// scheduled with other loop passes w/o breaking out of LPM 878 struct LoopGuardWideningLegacyPass : public LoopPass { 879 static char ID; 880 881 LoopGuardWideningLegacyPass() : LoopPass(ID) { 882 initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); 883 } 884 885 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 886 if (skipLoop(L)) 887 return false; 888 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 889 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 890 auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); 891 auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; 892 BasicBlock *RootBB = L->getLoopPredecessor(); 893 if (!RootBB) 894 RootBB = L->getHeader(); 895 auto BlockFilter = [&](BasicBlock *BB) { 896 return BB == RootBB || L->contains(BB); 897 }; 898 BranchProbabilityInfo *BPI = nullptr; 899 if (WidenFrequentBranches) 900 BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); 901 return GuardWideningImpl(DT, PDT, LI, BPI, 902 DT.getNode(RootBB), BlockFilter).run(); 903 } 904 905 void getAnalysisUsage(AnalysisUsage &AU) const override { 906 if (WidenFrequentBranches) 907 AU.addRequired<BranchProbabilityInfoWrapperPass>(); 908 AU.setPreservesCFG(); 909 getLoopAnalysisUsage(AU); 910 AU.addPreserved<PostDominatorTreeWrapperPass>(); 911 } 912 }; 913 } 914 915 char GuardWideningLegacyPass::ID = 0; 916 char LoopGuardWideningLegacyPass::ID = 0; 917 918 INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards", 919 false, false) 920 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 921 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 922 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 923 if (WidenFrequentBranches) 924 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 925 INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards", 926 false, false) 927 928 INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening", 929 "Widen guards (within a single loop, as a loop pass)", 930 false, false) 931 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 932 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 933 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 934 if (WidenFrequentBranches) 935 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 936 INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening", 937 "Widen guards (within a single loop, as a loop pass)", 938 false, false) 939 940 FunctionPass *llvm::createGuardWideningPass() { 941 return new GuardWideningLegacyPass(); 942 } 943 944 Pass *llvm::createLoopGuardWideningPass() { 945 return new LoopGuardWideningLegacyPass(); 946 } 947