1 //===- GuardWidening.cpp - ---- Guard widening ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the guard widening pass. The semantics of the 10 // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails 11 // more often that it did before the transform. This optimization is called 12 // "widening" and can be used hoist and common runtime checks in situations like 13 // these: 14 // 15 // %cmp0 = 7 u< Length 16 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 17 // call @unknown_side_effects() 18 // %cmp1 = 9 u< Length 19 // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] 20 // ... 21 // 22 // => 23 // 24 // %cmp0 = 9 u< Length 25 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 26 // call @unknown_side_effects() 27 // ... 28 // 29 // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a 30 // generic implementation of the same function, which will have the correct 31 // semantics from that point onward. It is always _legal_ to deoptimize (so 32 // replacing %cmp0 with false is "correct"), though it may not always be 33 // profitable to do so. 34 // 35 // NB! This pass is a work in progress. It hasn't been tuned to be "production 36 // ready" yet. It is known to have quadriatic running time and will not scale 37 // to large numbers of guards 38 // 39 //===----------------------------------------------------------------------===// 40 41 #include "llvm/Transforms/Scalar/GuardWidening.h" 42 #include "llvm/ADT/DenseMap.h" 43 #include "llvm/ADT/DepthFirstIterator.h" 44 #include "llvm/ADT/Statistic.h" 45 #include "llvm/Analysis/AssumptionCache.h" 46 #include "llvm/Analysis/GuardUtils.h" 47 #include "llvm/Analysis/LoopInfo.h" 48 #include "llvm/Analysis/LoopPass.h" 49 #include "llvm/Analysis/MemorySSAUpdater.h" 50 #include "llvm/Analysis/PostDominators.h" 51 #include "llvm/Analysis/ValueTracking.h" 52 #include "llvm/IR/ConstantRange.h" 53 #include "llvm/IR/Dominators.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/PatternMatch.h" 56 #include "llvm/InitializePasses.h" 57 #include "llvm/Pass.h" 58 #include "llvm/Support/CommandLine.h" 59 #include "llvm/Support/Debug.h" 60 #include "llvm/Support/KnownBits.h" 61 #include "llvm/Transforms/Scalar.h" 62 #include "llvm/Transforms/Utils/GuardUtils.h" 63 #include "llvm/Transforms/Utils/LoopUtils.h" 64 #include <functional> 65 66 using namespace llvm; 67 68 #define DEBUG_TYPE "guard-widening" 69 70 STATISTIC(GuardsEliminated, "Number of eliminated guards"); 71 STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches"); 72 73 static cl::opt<bool> 74 WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden, 75 cl::desc("Whether or not we should widen guards " 76 "expressed as branches by widenable conditions"), 77 cl::init(true)); 78 79 namespace { 80 81 // Get the condition of \p I. It can either be a guard or a conditional branch. 82 static Value *getCondition(Instruction *I) { 83 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 84 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 85 "Bad guard intrinsic?"); 86 return GI->getArgOperand(0); 87 } 88 Value *Cond, *WC; 89 BasicBlock *IfTrueBB, *IfFalseBB; 90 if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB)) 91 return Cond; 92 93 return cast<BranchInst>(I)->getCondition(); 94 } 95 96 // Set the condition for \p I to \p NewCond. \p I can either be a guard or a 97 // conditional branch. 98 static void setCondition(Instruction *I, Value *NewCond) { 99 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 100 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 101 "Bad guard intrinsic?"); 102 GI->setArgOperand(0, NewCond); 103 return; 104 } 105 cast<BranchInst>(I)->setCondition(NewCond); 106 } 107 108 // Eliminates the guard instruction properly. 109 static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) { 110 GuardInst->eraseFromParent(); 111 if (MSSAU) 112 MSSAU->removeMemoryAccess(GuardInst); 113 ++GuardsEliminated; 114 } 115 116 class GuardWideningImpl { 117 DominatorTree &DT; 118 PostDominatorTree *PDT; 119 LoopInfo &LI; 120 AssumptionCache &AC; 121 MemorySSAUpdater *MSSAU; 122 123 /// Together, these describe the region of interest. This might be all of 124 /// the blocks within a function, or only a given loop's blocks and preheader. 125 DomTreeNode *Root; 126 std::function<bool(BasicBlock*)> BlockFilter; 127 128 /// The set of guards and conditional branches whose conditions have been 129 /// widened into dominating guards. 130 SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; 131 132 /// The set of guards which have been widened to include conditions to other 133 /// guards. 134 DenseSet<Instruction *> WidenedGuards; 135 136 /// Try to eliminate instruction \p Instr by widening it into an earlier 137 /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that 138 /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock 139 /// maps BasicBlocks to the set of guards seen in that block. 140 bool eliminateInstrViaWidening( 141 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 142 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & 143 GuardsPerBlock, bool InvertCondition = false); 144 145 /// Used to keep track of which widening potential is more effective. 146 enum WideningScore { 147 /// Don't widen. 148 WS_IllegalOrNegative, 149 150 /// Widening is performance neutral as far as the cycles spent in check 151 /// conditions goes (but can still help, e.g., code layout, having less 152 /// deopt state). 153 WS_Neutral, 154 155 /// Widening is profitable. 156 WS_Positive, 157 158 /// Widening is very profitable. Not significantly different from \c 159 /// WS_Positive, except by the order. 160 WS_VeryPositive 161 }; 162 163 static StringRef scoreTypeToString(WideningScore WS); 164 165 /// Compute the score for widening the condition in \p DominatedInstr 166 /// into \p DominatingGuard. If \p InvertCond is set, then we widen the 167 /// inverted condition of the dominating guard. 168 WideningScore computeWideningScore(Instruction *DominatedInstr, 169 Instruction *DominatingGuard, 170 bool InvertCond); 171 172 /// Helper to check if \p V can be hoisted to \p InsertPos. 173 bool isAvailableAt(const Value *V, const Instruction *InsertPos) const { 174 SmallPtrSet<const Instruction *, 8> Visited; 175 return isAvailableAt(V, InsertPos, Visited); 176 } 177 178 bool isAvailableAt(const Value *V, const Instruction *InsertPos, 179 SmallPtrSetImpl<const Instruction *> &Visited) const; 180 181 /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c 182 /// isAvailableAt returned true. 183 void makeAvailableAt(Value *V, Instruction *InsertPos) const; 184 185 /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try 186 /// to generate an expression computing the logical AND of \p Cond0 and (\p 187 /// Cond1 XOR \p InvertCondition). 188 /// Return true if the expression computing the AND is only as 189 /// expensive as computing one of the two. If \p InsertPt is true then 190 /// actually generate the resulting expression, make it available at \p 191 /// InsertPt and return it in \p Result (else no change to the IR is made). 192 bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt, 193 Value *&Result, bool InvertCondition); 194 195 /// Represents a range check of the form \c Base + \c Offset u< \c Length, 196 /// with the constraint that \c Length is not negative. \c CheckInst is the 197 /// pre-existing instruction in the IR that computes the result of this range 198 /// check. 199 class RangeCheck { 200 const Value *Base; 201 const ConstantInt *Offset; 202 const Value *Length; 203 ICmpInst *CheckInst; 204 205 public: 206 explicit RangeCheck(const Value *Base, const ConstantInt *Offset, 207 const Value *Length, ICmpInst *CheckInst) 208 : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} 209 210 void setBase(const Value *NewBase) { Base = NewBase; } 211 void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } 212 213 const Value *getBase() const { return Base; } 214 const ConstantInt *getOffset() const { return Offset; } 215 const APInt &getOffsetValue() const { return getOffset()->getValue(); } 216 const Value *getLength() const { return Length; }; 217 ICmpInst *getCheckInst() const { return CheckInst; } 218 219 void print(raw_ostream &OS, bool PrintTypes = false) { 220 OS << "Base: "; 221 Base->printAsOperand(OS, PrintTypes); 222 OS << " Offset: "; 223 Offset->printAsOperand(OS, PrintTypes); 224 OS << " Length: "; 225 Length->printAsOperand(OS, PrintTypes); 226 } 227 228 LLVM_DUMP_METHOD void dump() { 229 print(dbgs()); 230 dbgs() << "\n"; 231 } 232 }; 233 234 /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and 235 /// append them to \p Checks. Returns true on success, may clobber \c Checks 236 /// on failure. 237 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) { 238 SmallPtrSet<const Value *, 8> Visited; 239 return parseRangeChecks(CheckCond, Checks, Visited); 240 } 241 242 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks, 243 SmallPtrSetImpl<const Value *> &Visited); 244 245 /// Combine the checks in \p Checks into a smaller set of checks and append 246 /// them into \p CombinedChecks. Return true on success (i.e. all of checks 247 /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks 248 /// and \p CombinedChecks on success and on failure. 249 bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, 250 SmallVectorImpl<RangeCheck> &CombinedChecks) const; 251 252 /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of 253 /// computing only one of the two expressions? 254 bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) { 255 Value *ResultUnused; 256 return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused, 257 InvertCond); 258 } 259 260 /// If \p InvertCondition is false, Widen \p ToWiden to fail if 261 /// \p NewCondition is false, otherwise make it fail if \p NewCondition is 262 /// true (in addition to whatever it is already checking). 263 void widenGuard(Instruction *ToWiden, Value *NewCondition, 264 bool InvertCondition) { 265 Value *Result; 266 267 widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result, 268 InvertCondition); 269 if (isGuardAsWidenableBranch(ToWiden)) { 270 setWidenableBranchCond(cast<BranchInst>(ToWiden), Result); 271 return; 272 } 273 setCondition(ToWiden, Result); 274 } 275 276 public: 277 explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, 278 LoopInfo &LI, AssumptionCache &AC, 279 MemorySSAUpdater *MSSAU, DomTreeNode *Root, 280 std::function<bool(BasicBlock *)> BlockFilter) 281 : DT(DT), PDT(PDT), LI(LI), AC(AC), MSSAU(MSSAU), Root(Root), 282 BlockFilter(BlockFilter) {} 283 284 /// The entry point for this pass. 285 bool run(); 286 }; 287 } 288 289 static bool isSupportedGuardInstruction(const Instruction *Insn) { 290 if (isGuard(Insn)) 291 return true; 292 if (WidenBranchGuards && isGuardAsWidenableBranch(Insn)) 293 return true; 294 return false; 295 } 296 297 bool GuardWideningImpl::run() { 298 DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; 299 bool Changed = false; 300 for (auto DFI = df_begin(Root), DFE = df_end(Root); 301 DFI != DFE; ++DFI) { 302 auto *BB = (*DFI)->getBlock(); 303 if (!BlockFilter(BB)) 304 continue; 305 306 auto &CurrentList = GuardsInBlock[BB]; 307 308 for (auto &I : *BB) 309 if (isSupportedGuardInstruction(&I)) 310 CurrentList.push_back(cast<Instruction>(&I)); 311 312 for (auto *II : CurrentList) 313 Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock); 314 } 315 316 assert(EliminatedGuardsAndBranches.empty() || Changed); 317 for (auto *I : EliminatedGuardsAndBranches) 318 if (!WidenedGuards.count(I)) { 319 assert(isa<ConstantInt>(getCondition(I)) && "Should be!"); 320 if (isSupportedGuardInstruction(I)) 321 eliminateGuard(I, MSSAU); 322 else { 323 assert(isa<BranchInst>(I) && 324 "Eliminated something other than guard or branch?"); 325 ++CondBranchEliminated; 326 } 327 } 328 329 return Changed; 330 } 331 332 bool GuardWideningImpl::eliminateInstrViaWidening( 333 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 334 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & 335 GuardsInBlock, bool InvertCondition) { 336 // Ignore trivial true or false conditions. These instructions will be 337 // trivially eliminated by any cleanup pass. Do not erase them because other 338 // guards can possibly be widened into them. 339 if (isa<ConstantInt>(getCondition(Instr))) 340 return false; 341 342 Instruction *BestSoFar = nullptr; 343 auto BestScoreSoFar = WS_IllegalOrNegative; 344 345 // In the set of dominating guards, find the one we can merge GuardInst with 346 // for the most profit. 347 for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { 348 auto *CurBB = DFSI.getPath(i)->getBlock(); 349 if (!BlockFilter(CurBB)) 350 break; 351 assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!"); 352 const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; 353 354 auto I = GuardsInCurBB.begin(); 355 auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr) 356 : GuardsInCurBB.end(); 357 358 #ifndef NDEBUG 359 { 360 unsigned Index = 0; 361 for (auto &I : *CurBB) { 362 if (Index == GuardsInCurBB.size()) 363 break; 364 if (GuardsInCurBB[Index] == &I) 365 Index++; 366 } 367 assert(Index == GuardsInCurBB.size() && 368 "Guards expected to be in order!"); 369 } 370 #endif 371 372 assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?"); 373 374 for (auto *Candidate : make_range(I, E)) { 375 auto Score = computeWideningScore(Instr, Candidate, InvertCondition); 376 LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr) 377 << " and " << *getCondition(Candidate) << " is " 378 << scoreTypeToString(Score) << "\n"); 379 if (Score > BestScoreSoFar) { 380 BestScoreSoFar = Score; 381 BestSoFar = Candidate; 382 } 383 } 384 } 385 386 if (BestScoreSoFar == WS_IllegalOrNegative) { 387 LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n"); 388 return false; 389 } 390 391 assert(BestSoFar != Instr && "Should have never visited same guard!"); 392 assert(DT.dominates(BestSoFar, Instr) && "Should be!"); 393 394 LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar 395 << " with score " << scoreTypeToString(BestScoreSoFar) 396 << "\n"); 397 widenGuard(BestSoFar, getCondition(Instr), InvertCondition); 398 auto NewGuardCondition = InvertCondition 399 ? ConstantInt::getFalse(Instr->getContext()) 400 : ConstantInt::getTrue(Instr->getContext()); 401 setCondition(Instr, NewGuardCondition); 402 EliminatedGuardsAndBranches.push_back(Instr); 403 WidenedGuards.insert(BestSoFar); 404 return true; 405 } 406 407 GuardWideningImpl::WideningScore 408 GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr, 409 Instruction *DominatingGuard, 410 bool InvertCond) { 411 Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent()); 412 Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent()); 413 bool HoistingOutOfLoop = false; 414 415 if (DominatingGuardLoop != DominatedInstrLoop) { 416 // Be conservative and don't widen into a sibling loop. TODO: If the 417 // sibling is colder, we should consider allowing this. 418 if (DominatingGuardLoop && 419 !DominatingGuardLoop->contains(DominatedInstrLoop)) 420 return WS_IllegalOrNegative; 421 422 HoistingOutOfLoop = true; 423 } 424 425 if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard)) 426 return WS_IllegalOrNegative; 427 428 // If the guard was conditional executed, it may never be reached 429 // dynamically. There are two potential downsides to hoisting it out of the 430 // conditionally executed region: 1) we may spuriously deopt without need and 431 // 2) we have the extra cost of computing the guard condition in the common 432 // case. At the moment, we really only consider the second in our heuristic 433 // here. TODO: evaluate cost model for spurious deopt 434 // NOTE: As written, this also lets us hoist right over another guard which 435 // is essentially just another spelling for control flow. 436 if (isWideningCondProfitable(getCondition(DominatedInstr), 437 getCondition(DominatingGuard), InvertCond)) 438 return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; 439 440 if (HoistingOutOfLoop) 441 return WS_Positive; 442 443 // Returns true if we might be hoisting above explicit control flow. Note 444 // that this completely ignores implicit control flow (guards, calls which 445 // throw, etc...). That choice appears arbitrary. 446 auto MaybeHoistingOutOfIf = [&]() { 447 auto *DominatingBlock = DominatingGuard->getParent(); 448 auto *DominatedBlock = DominatedInstr->getParent(); 449 if (isGuardAsWidenableBranch(DominatingGuard)) 450 DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0); 451 452 // Same Block? 453 if (DominatedBlock == DominatingBlock) 454 return false; 455 // Obvious successor (common loop header/preheader case) 456 if (DominatedBlock == DominatingBlock->getUniqueSuccessor()) 457 return false; 458 // TODO: diamond, triangle cases 459 if (!PDT) return true; 460 return !PDT->dominates(DominatedBlock, DominatingBlock); 461 }; 462 463 return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral; 464 } 465 466 bool GuardWideningImpl::isAvailableAt( 467 const Value *V, const Instruction *Loc, 468 SmallPtrSetImpl<const Instruction *> &Visited) const { 469 auto *Inst = dyn_cast<Instruction>(V); 470 if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst)) 471 return true; 472 473 if (!isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) || 474 Inst->mayReadFromMemory()) 475 return false; 476 477 Visited.insert(Inst); 478 479 // We only want to go _up_ the dominance chain when recursing. 480 assert(!isa<PHINode>(Loc) && 481 "PHIs should return false for isSafeToSpeculativelyExecute"); 482 assert(DT.isReachableFromEntry(Inst->getParent()) && 483 "We did a DFS from the block entry!"); 484 return all_of(Inst->operands(), 485 [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); }); 486 } 487 488 void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const { 489 auto *Inst = dyn_cast<Instruction>(V); 490 if (!Inst || DT.dominates(Inst, Loc)) 491 return; 492 493 assert(isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) && 494 !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!"); 495 496 for (Value *Op : Inst->operands()) 497 makeAvailableAt(Op, Loc); 498 499 Inst->moveBefore(Loc); 500 // If we moved instruction before guard we must clean poison generating flags. 501 Inst->dropPoisonGeneratingFlags(); 502 } 503 504 bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, 505 Instruction *InsertPt, Value *&Result, 506 bool InvertCondition) { 507 using namespace llvm::PatternMatch; 508 509 { 510 // L >u C0 && L >u C1 -> L >u max(C0, C1) 511 ConstantInt *RHS0, *RHS1; 512 Value *LHS; 513 ICmpInst::Predicate Pred0, Pred1; 514 if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && 515 match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { 516 if (InvertCondition) 517 Pred1 = ICmpInst::getInversePredicate(Pred1); 518 519 ConstantRange CR0 = 520 ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue()); 521 ConstantRange CR1 = 522 ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); 523 524 // Given what we're doing here and the semantics of guards, it would 525 // be correct to use a subset intersection, but that may be too 526 // aggressive in cases we care about. 527 if (std::optional<ConstantRange> Intersect = 528 CR0.exactIntersectWith(CR1)) { 529 APInt NewRHSAP; 530 CmpInst::Predicate Pred; 531 if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) { 532 if (InsertPt) { 533 ConstantInt *NewRHS = 534 ConstantInt::get(Cond0->getContext(), NewRHSAP); 535 Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk"); 536 } 537 return true; 538 } 539 } 540 } 541 } 542 543 { 544 SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; 545 // TODO: Support InvertCondition case? 546 if (!InvertCondition && 547 parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) && 548 combineRangeChecks(Checks, CombinedChecks)) { 549 if (InsertPt) { 550 Result = nullptr; 551 for (auto &RC : CombinedChecks) { 552 makeAvailableAt(RC.getCheckInst(), InsertPt); 553 if (Result) 554 Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "", 555 InsertPt); 556 else 557 Result = RC.getCheckInst(); 558 } 559 assert(Result && "Failed to find result value"); 560 Result->setName("wide.chk"); 561 } 562 return true; 563 } 564 } 565 566 // Base case -- just logical-and the two conditions together. 567 568 if (InsertPt) { 569 makeAvailableAt(Cond0, InsertPt); 570 makeAvailableAt(Cond1, InsertPt); 571 if (InvertCondition) 572 Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt); 573 Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt); 574 } 575 576 // We were not able to compute Cond0 AND Cond1 for the price of one. 577 return false; 578 } 579 580 bool GuardWideningImpl::parseRangeChecks( 581 Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 582 SmallPtrSetImpl<const Value *> &Visited) { 583 if (!Visited.insert(CheckCond).second) 584 return true; 585 586 using namespace llvm::PatternMatch; 587 588 { 589 Value *AndLHS, *AndRHS; 590 if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS)))) 591 return parseRangeChecks(AndLHS, Checks) && 592 parseRangeChecks(AndRHS, Checks); 593 } 594 595 auto *IC = dyn_cast<ICmpInst>(CheckCond); 596 if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() || 597 (IC->getPredicate() != ICmpInst::ICMP_ULT && 598 IC->getPredicate() != ICmpInst::ICMP_UGT)) 599 return false; 600 601 const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1); 602 if (IC->getPredicate() == ICmpInst::ICMP_UGT) 603 std::swap(CmpLHS, CmpRHS); 604 605 auto &DL = IC->getModule()->getDataLayout(); 606 607 GuardWideningImpl::RangeCheck Check( 608 CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())), 609 CmpRHS, IC); 610 611 if (!isKnownNonNegative(Check.getLength(), DL)) 612 return false; 613 614 // What we have in \c Check now is a correct interpretation of \p CheckCond. 615 // Try to see if we can move some constant offsets into the \c Offset field. 616 617 bool Changed; 618 auto &Ctx = CheckCond->getContext(); 619 620 do { 621 Value *OpLHS; 622 ConstantInt *OpRHS; 623 Changed = false; 624 625 #ifndef NDEBUG 626 auto *BaseInst = dyn_cast<Instruction>(Check.getBase()); 627 assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && 628 "Unreachable instruction?"); 629 #endif 630 631 if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 632 Check.setBase(OpLHS); 633 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 634 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 635 Changed = true; 636 } else if (match(Check.getBase(), 637 m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 638 KnownBits Known = computeKnownBits(OpLHS, DL); 639 if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { 640 Check.setBase(OpLHS); 641 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 642 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 643 Changed = true; 644 } 645 } 646 } while (Changed); 647 648 Checks.push_back(Check); 649 return true; 650 } 651 652 bool GuardWideningImpl::combineRangeChecks( 653 SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 654 SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { 655 unsigned OldCount = Checks.size(); 656 while (!Checks.empty()) { 657 // Pick all of the range checks with a specific base and length, and try to 658 // merge them. 659 const Value *CurrentBase = Checks.front().getBase(); 660 const Value *CurrentLength = Checks.front().getLength(); 661 662 SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; 663 664 auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { 665 return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; 666 }; 667 668 copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck); 669 erase_if(Checks, IsCurrentCheck); 670 671 assert(CurrentChecks.size() != 0 && "We know we have at least one!"); 672 673 if (CurrentChecks.size() < 3) { 674 llvm::append_range(RangeChecksOut, CurrentChecks); 675 continue; 676 } 677 678 // CurrentChecks.size() will typically be 3 here, but so far there has been 679 // no need to hard-code that fact. 680 681 llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS, 682 const GuardWideningImpl::RangeCheck &RHS) { 683 return LHS.getOffsetValue().slt(RHS.getOffsetValue()); 684 }); 685 686 // Note: std::sort should not invalidate the ChecksStart iterator. 687 688 const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); 689 const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); 690 691 unsigned BitWidth = MaxOffset->getValue().getBitWidth(); 692 if ((MaxOffset->getValue() - MinOffset->getValue()) 693 .ugt(APInt::getSignedMinValue(BitWidth))) 694 return false; 695 696 APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); 697 const APInt &HighOffset = MaxOffset->getValue(); 698 auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { 699 return (HighOffset - RC.getOffsetValue()).ult(MaxDiff); 700 }; 701 702 if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK)) 703 return false; 704 705 // We have a series of f+1 checks as: 706 // 707 // I+k_0 u< L ... Chk_0 708 // I+k_1 u< L ... Chk_1 709 // ... 710 // I+k_f u< L ... Chk_f 711 // 712 // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 713 // k_f-k_0 u< INT_MIN+k_f ... Precond_1 714 // k_f != k_0 ... Precond_2 715 // 716 // Claim: 717 // Chk_0 AND Chk_f implies all the other checks 718 // 719 // Informal proof sketch: 720 // 721 // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap 722 // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and 723 // thus I+k_f is the greatest unsigned value in that range. 724 // 725 // This combined with Ckh_(f+1) shows that everything in that range is u< L. 726 // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) 727 // lie in [I+k_0,I+k_f], this proving our claim. 728 // 729 // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are 730 // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal 731 // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping 732 // range by definition, and the latter case is impossible: 733 // 734 // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) 735 // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 736 // 737 // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted 738 // with 'x' above) to be at least >u INT_MIN. 739 740 RangeChecksOut.emplace_back(CurrentChecks.front()); 741 RangeChecksOut.emplace_back(CurrentChecks.back()); 742 } 743 744 assert(RangeChecksOut.size() <= OldCount && "We pessimized!"); 745 return RangeChecksOut.size() != OldCount; 746 } 747 748 #ifndef NDEBUG 749 StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { 750 switch (WS) { 751 case WS_IllegalOrNegative: 752 return "IllegalOrNegative"; 753 case WS_Neutral: 754 return "Neutral"; 755 case WS_Positive: 756 return "Positive"; 757 case WS_VeryPositive: 758 return "VeryPositive"; 759 } 760 761 llvm_unreachable("Fully covered switch above!"); 762 } 763 #endif 764 765 PreservedAnalyses GuardWideningPass::run(Function &F, 766 FunctionAnalysisManager &AM) { 767 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 768 auto &LI = AM.getResult<LoopAnalysis>(F); 769 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 770 auto &AC = AM.getResult<AssumptionAnalysis>(F); 771 auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F); 772 std::unique_ptr<MemorySSAUpdater> MSSAU; 773 if (MSSAA) 774 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA()); 775 if (!GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, 776 DT.getRootNode(), [](BasicBlock *) { return true; }) 777 .run()) 778 return PreservedAnalyses::all(); 779 780 PreservedAnalyses PA; 781 PA.preserveSet<CFGAnalyses>(); 782 PA.preserve<MemorySSAAnalysis>(); 783 return PA; 784 } 785 786 PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, 787 LoopStandardAnalysisResults &AR, 788 LPMUpdater &U) { 789 BasicBlock *RootBB = L.getLoopPredecessor(); 790 if (!RootBB) 791 RootBB = L.getHeader(); 792 auto BlockFilter = [&](BasicBlock *BB) { 793 return BB == RootBB || L.contains(BB); 794 }; 795 std::unique_ptr<MemorySSAUpdater> MSSAU; 796 if (AR.MSSA) 797 MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA); 798 if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.AC, 799 MSSAU ? MSSAU.get() : nullptr, AR.DT.getNode(RootBB), 800 BlockFilter) 801 .run()) 802 return PreservedAnalyses::all(); 803 804 auto PA = getLoopPassPreservedAnalyses(); 805 if (AR.MSSA) 806 PA.preserve<MemorySSAAnalysis>(); 807 return PA; 808 } 809 810 namespace { 811 struct GuardWideningLegacyPass : public FunctionPass { 812 static char ID; 813 814 GuardWideningLegacyPass() : FunctionPass(ID) { 815 initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); 816 } 817 818 bool runOnFunction(Function &F) override { 819 if (skipFunction(F)) 820 return false; 821 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 822 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 823 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 824 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); 825 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 826 std::unique_ptr<MemorySSAUpdater> MSSAU; 827 if (MSSAWP) 828 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA()); 829 return GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, 830 DT.getRootNode(), 831 [](BasicBlock *) { return true; }) 832 .run(); 833 } 834 835 void getAnalysisUsage(AnalysisUsage &AU) const override { 836 AU.setPreservesCFG(); 837 AU.addRequired<DominatorTreeWrapperPass>(); 838 AU.addRequired<PostDominatorTreeWrapperPass>(); 839 AU.addRequired<LoopInfoWrapperPass>(); 840 AU.addPreserved<MemorySSAWrapperPass>(); 841 } 842 }; 843 844 /// Same as above, but restricted to a single loop at a time. Can be 845 /// scheduled with other loop passes w/o breaking out of LPM 846 struct LoopGuardWideningLegacyPass : public LoopPass { 847 static char ID; 848 849 LoopGuardWideningLegacyPass() : LoopPass(ID) { 850 initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); 851 } 852 853 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 854 if (skipLoop(L)) 855 return false; 856 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 857 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 858 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( 859 *L->getHeader()->getParent()); 860 auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); 861 auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; 862 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 863 std::unique_ptr<MemorySSAUpdater> MSSAU; 864 if (MSSAWP) 865 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA()); 866 867 BasicBlock *RootBB = L->getLoopPredecessor(); 868 if (!RootBB) 869 RootBB = L->getHeader(); 870 auto BlockFilter = [&](BasicBlock *BB) { 871 return BB == RootBB || L->contains(BB); 872 }; 873 return GuardWideningImpl(DT, PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, 874 DT.getNode(RootBB), BlockFilter) 875 .run(); 876 } 877 878 void getAnalysisUsage(AnalysisUsage &AU) const override { 879 AU.setPreservesCFG(); 880 getLoopAnalysisUsage(AU); 881 AU.addPreserved<PostDominatorTreeWrapperPass>(); 882 AU.addPreserved<MemorySSAWrapperPass>(); 883 } 884 }; 885 } 886 887 char GuardWideningLegacyPass::ID = 0; 888 char LoopGuardWideningLegacyPass::ID = 0; 889 890 INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards", 891 false, false) 892 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 893 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 894 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 895 INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards", 896 false, false) 897 898 INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening", 899 "Widen guards (within a single loop, as a loop pass)", 900 false, false) 901 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 902 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 903 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 904 INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening", 905 "Widen guards (within a single loop, as a loop pass)", 906 false, false) 907 908 FunctionPass *llvm::createGuardWideningPass() { 909 return new GuardWideningLegacyPass(); 910 } 911 912 Pass *llvm::createLoopGuardWideningPass() { 913 return new LoopGuardWideningLegacyPass(); 914 } 915