1 //===- GuardWidening.cpp - ---- Guard widening ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the guard widening pass. The semantics of the 10 // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails 11 // more often that it did before the transform. This optimization is called 12 // "widening" and can be used hoist and common runtime checks in situations like 13 // these: 14 // 15 // %cmp0 = 7 u< Length 16 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 17 // call @unknown_side_effects() 18 // %cmp1 = 9 u< Length 19 // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] 20 // ... 21 // 22 // => 23 // 24 // %cmp0 = 9 u< Length 25 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 26 // call @unknown_side_effects() 27 // ... 28 // 29 // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a 30 // generic implementation of the same function, which will have the correct 31 // semantics from that point onward. It is always _legal_ to deoptimize (so 32 // replacing %cmp0 with false is "correct"), though it may not always be 33 // profitable to do so. 34 // 35 // NB! This pass is a work in progress. It hasn't been tuned to be "production 36 // ready" yet. It is known to have quadriatic running time and will not scale 37 // to large numbers of guards 38 // 39 //===----------------------------------------------------------------------===// 40 41 #include "llvm/Transforms/Scalar/GuardWidening.h" 42 #include "llvm/ADT/DenseMap.h" 43 #include "llvm/ADT/DepthFirstIterator.h" 44 #include "llvm/ADT/Statistic.h" 45 #include "llvm/Analysis/BranchProbabilityInfo.h" 46 #include "llvm/Analysis/GuardUtils.h" 47 #include "llvm/Analysis/LoopInfo.h" 48 #include "llvm/Analysis/LoopPass.h" 49 #include "llvm/Analysis/MemorySSAUpdater.h" 50 #include "llvm/Analysis/PostDominators.h" 51 #include "llvm/Analysis/ValueTracking.h" 52 #include "llvm/IR/ConstantRange.h" 53 #include "llvm/IR/Dominators.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/PatternMatch.h" 56 #include "llvm/InitializePasses.h" 57 #include "llvm/Pass.h" 58 #include "llvm/Support/CommandLine.h" 59 #include "llvm/Support/Debug.h" 60 #include "llvm/Support/KnownBits.h" 61 #include "llvm/Transforms/Scalar.h" 62 #include "llvm/Transforms/Utils/GuardUtils.h" 63 #include "llvm/Transforms/Utils/LoopUtils.h" 64 #include <functional> 65 66 using namespace llvm; 67 68 #define DEBUG_TYPE "guard-widening" 69 70 STATISTIC(GuardsEliminated, "Number of eliminated guards"); 71 STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches"); 72 73 static cl::opt<bool> 74 WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden, 75 cl::desc("Whether or not we should widen guards " 76 "expressed as branches by widenable conditions"), 77 cl::init(true)); 78 79 namespace { 80 81 // Get the condition of \p I. It can either be a guard or a conditional branch. 82 static Value *getCondition(Instruction *I) { 83 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 84 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 85 "Bad guard intrinsic?"); 86 return GI->getArgOperand(0); 87 } 88 Value *Cond, *WC; 89 BasicBlock *IfTrueBB, *IfFalseBB; 90 if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB)) 91 return Cond; 92 93 return cast<BranchInst>(I)->getCondition(); 94 } 95 96 // Set the condition for \p I to \p NewCond. \p I can either be a guard or a 97 // conditional branch. 98 static void setCondition(Instruction *I, Value *NewCond) { 99 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 100 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 101 "Bad guard intrinsic?"); 102 GI->setArgOperand(0, NewCond); 103 return; 104 } 105 cast<BranchInst>(I)->setCondition(NewCond); 106 } 107 108 // Eliminates the guard instruction properly. 109 static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) { 110 GuardInst->eraseFromParent(); 111 if (MSSAU) 112 MSSAU->removeMemoryAccess(GuardInst); 113 ++GuardsEliminated; 114 } 115 116 class GuardWideningImpl { 117 DominatorTree &DT; 118 PostDominatorTree *PDT; 119 LoopInfo &LI; 120 MemorySSAUpdater *MSSAU; 121 122 /// Together, these describe the region of interest. This might be all of 123 /// the blocks within a function, or only a given loop's blocks and preheader. 124 DomTreeNode *Root; 125 std::function<bool(BasicBlock*)> BlockFilter; 126 127 /// The set of guards and conditional branches whose conditions have been 128 /// widened into dominating guards. 129 SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; 130 131 /// The set of guards which have been widened to include conditions to other 132 /// guards. 133 DenseSet<Instruction *> WidenedGuards; 134 135 /// Try to eliminate instruction \p Instr by widening it into an earlier 136 /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that 137 /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock 138 /// maps BasicBlocks to the set of guards seen in that block. 139 bool eliminateInstrViaWidening( 140 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 141 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & 142 GuardsPerBlock, bool InvertCondition = false); 143 144 /// Used to keep track of which widening potential is more effective. 145 enum WideningScore { 146 /// Don't widen. 147 WS_IllegalOrNegative, 148 149 /// Widening is performance neutral as far as the cycles spent in check 150 /// conditions goes (but can still help, e.g., code layout, having less 151 /// deopt state). 152 WS_Neutral, 153 154 /// Widening is profitable. 155 WS_Positive, 156 157 /// Widening is very profitable. Not significantly different from \c 158 /// WS_Positive, except by the order. 159 WS_VeryPositive 160 }; 161 162 static StringRef scoreTypeToString(WideningScore WS); 163 164 /// Compute the score for widening the condition in \p DominatedInstr 165 /// into \p DominatingGuard. If \p InvertCond is set, then we widen the 166 /// inverted condition of the dominating guard. 167 WideningScore computeWideningScore(Instruction *DominatedInstr, 168 Instruction *DominatingGuard, 169 bool InvertCond); 170 171 /// Helper to check if \p V can be hoisted to \p InsertPos. 172 bool isAvailableAt(const Value *V, const Instruction *InsertPos) const { 173 SmallPtrSet<const Instruction *, 8> Visited; 174 return isAvailableAt(V, InsertPos, Visited); 175 } 176 177 bool isAvailableAt(const Value *V, const Instruction *InsertPos, 178 SmallPtrSetImpl<const Instruction *> &Visited) const; 179 180 /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c 181 /// isAvailableAt returned true. 182 void makeAvailableAt(Value *V, Instruction *InsertPos) const; 183 184 /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try 185 /// to generate an expression computing the logical AND of \p Cond0 and (\p 186 /// Cond1 XOR \p InvertCondition). 187 /// Return true if the expression computing the AND is only as 188 /// expensive as computing one of the two. If \p InsertPt is true then 189 /// actually generate the resulting expression, make it available at \p 190 /// InsertPt and return it in \p Result (else no change to the IR is made). 191 bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt, 192 Value *&Result, bool InvertCondition); 193 194 /// Represents a range check of the form \c Base + \c Offset u< \c Length, 195 /// with the constraint that \c Length is not negative. \c CheckInst is the 196 /// pre-existing instruction in the IR that computes the result of this range 197 /// check. 198 class RangeCheck { 199 const Value *Base; 200 const ConstantInt *Offset; 201 const Value *Length; 202 ICmpInst *CheckInst; 203 204 public: 205 explicit RangeCheck(const Value *Base, const ConstantInt *Offset, 206 const Value *Length, ICmpInst *CheckInst) 207 : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} 208 209 void setBase(const Value *NewBase) { Base = NewBase; } 210 void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } 211 212 const Value *getBase() const { return Base; } 213 const ConstantInt *getOffset() const { return Offset; } 214 const APInt &getOffsetValue() const { return getOffset()->getValue(); } 215 const Value *getLength() const { return Length; }; 216 ICmpInst *getCheckInst() const { return CheckInst; } 217 218 void print(raw_ostream &OS, bool PrintTypes = false) { 219 OS << "Base: "; 220 Base->printAsOperand(OS, PrintTypes); 221 OS << " Offset: "; 222 Offset->printAsOperand(OS, PrintTypes); 223 OS << " Length: "; 224 Length->printAsOperand(OS, PrintTypes); 225 } 226 227 LLVM_DUMP_METHOD void dump() { 228 print(dbgs()); 229 dbgs() << "\n"; 230 } 231 }; 232 233 /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and 234 /// append them to \p Checks. Returns true on success, may clobber \c Checks 235 /// on failure. 236 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) { 237 SmallPtrSet<const Value *, 8> Visited; 238 return parseRangeChecks(CheckCond, Checks, Visited); 239 } 240 241 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks, 242 SmallPtrSetImpl<const Value *> &Visited); 243 244 /// Combine the checks in \p Checks into a smaller set of checks and append 245 /// them into \p CombinedChecks. Return true on success (i.e. all of checks 246 /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks 247 /// and \p CombinedChecks on success and on failure. 248 bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, 249 SmallVectorImpl<RangeCheck> &CombinedChecks) const; 250 251 /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of 252 /// computing only one of the two expressions? 253 bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) { 254 Value *ResultUnused; 255 return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused, 256 InvertCond); 257 } 258 259 /// If \p InvertCondition is false, Widen \p ToWiden to fail if 260 /// \p NewCondition is false, otherwise make it fail if \p NewCondition is 261 /// true (in addition to whatever it is already checking). 262 void widenGuard(Instruction *ToWiden, Value *NewCondition, 263 bool InvertCondition) { 264 Value *Result; 265 266 widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result, 267 InvertCondition); 268 if (isGuardAsWidenableBranch(ToWiden)) { 269 setWidenableBranchCond(cast<BranchInst>(ToWiden), Result); 270 return; 271 } 272 setCondition(ToWiden, Result); 273 } 274 275 public: 276 explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, 277 LoopInfo &LI, MemorySSAUpdater *MSSAU, 278 DomTreeNode *Root, 279 std::function<bool(BasicBlock*)> BlockFilter) 280 : DT(DT), PDT(PDT), LI(LI), MSSAU(MSSAU), Root(Root), 281 BlockFilter(BlockFilter) {} 282 283 /// The entry point for this pass. 284 bool run(); 285 }; 286 } 287 288 static bool isSupportedGuardInstruction(const Instruction *Insn) { 289 if (isGuard(Insn)) 290 return true; 291 if (WidenBranchGuards && isGuardAsWidenableBranch(Insn)) 292 return true; 293 return false; 294 } 295 296 bool GuardWideningImpl::run() { 297 DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; 298 bool Changed = false; 299 for (auto DFI = df_begin(Root), DFE = df_end(Root); 300 DFI != DFE; ++DFI) { 301 auto *BB = (*DFI)->getBlock(); 302 if (!BlockFilter(BB)) 303 continue; 304 305 auto &CurrentList = GuardsInBlock[BB]; 306 307 for (auto &I : *BB) 308 if (isSupportedGuardInstruction(&I)) 309 CurrentList.push_back(cast<Instruction>(&I)); 310 311 for (auto *II : CurrentList) 312 Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock); 313 } 314 315 assert(EliminatedGuardsAndBranches.empty() || Changed); 316 for (auto *I : EliminatedGuardsAndBranches) 317 if (!WidenedGuards.count(I)) { 318 assert(isa<ConstantInt>(getCondition(I)) && "Should be!"); 319 if (isSupportedGuardInstruction(I)) 320 eliminateGuard(I, MSSAU); 321 else { 322 assert(isa<BranchInst>(I) && 323 "Eliminated something other than guard or branch?"); 324 ++CondBranchEliminated; 325 } 326 } 327 328 return Changed; 329 } 330 331 bool GuardWideningImpl::eliminateInstrViaWidening( 332 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 333 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & 334 GuardsInBlock, bool InvertCondition) { 335 // Ignore trivial true or false conditions. These instructions will be 336 // trivially eliminated by any cleanup pass. Do not erase them because other 337 // guards can possibly be widened into them. 338 if (isa<ConstantInt>(getCondition(Instr))) 339 return false; 340 341 Instruction *BestSoFar = nullptr; 342 auto BestScoreSoFar = WS_IllegalOrNegative; 343 344 // In the set of dominating guards, find the one we can merge GuardInst with 345 // for the most profit. 346 for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { 347 auto *CurBB = DFSI.getPath(i)->getBlock(); 348 if (!BlockFilter(CurBB)) 349 break; 350 assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!"); 351 const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; 352 353 auto I = GuardsInCurBB.begin(); 354 auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr) 355 : GuardsInCurBB.end(); 356 357 #ifndef NDEBUG 358 { 359 unsigned Index = 0; 360 for (auto &I : *CurBB) { 361 if (Index == GuardsInCurBB.size()) 362 break; 363 if (GuardsInCurBB[Index] == &I) 364 Index++; 365 } 366 assert(Index == GuardsInCurBB.size() && 367 "Guards expected to be in order!"); 368 } 369 #endif 370 371 assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?"); 372 373 for (auto *Candidate : make_range(I, E)) { 374 auto Score = computeWideningScore(Instr, Candidate, InvertCondition); 375 LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr) 376 << " and " << *getCondition(Candidate) << " is " 377 << scoreTypeToString(Score) << "\n"); 378 if (Score > BestScoreSoFar) { 379 BestScoreSoFar = Score; 380 BestSoFar = Candidate; 381 } 382 } 383 } 384 385 if (BestScoreSoFar == WS_IllegalOrNegative) { 386 LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n"); 387 return false; 388 } 389 390 assert(BestSoFar != Instr && "Should have never visited same guard!"); 391 assert(DT.dominates(BestSoFar, Instr) && "Should be!"); 392 393 LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar 394 << " with score " << scoreTypeToString(BestScoreSoFar) 395 << "\n"); 396 widenGuard(BestSoFar, getCondition(Instr), InvertCondition); 397 auto NewGuardCondition = InvertCondition 398 ? ConstantInt::getFalse(Instr->getContext()) 399 : ConstantInt::getTrue(Instr->getContext()); 400 setCondition(Instr, NewGuardCondition); 401 EliminatedGuardsAndBranches.push_back(Instr); 402 WidenedGuards.insert(BestSoFar); 403 return true; 404 } 405 406 GuardWideningImpl::WideningScore 407 GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr, 408 Instruction *DominatingGuard, 409 bool InvertCond) { 410 Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent()); 411 Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent()); 412 bool HoistingOutOfLoop = false; 413 414 if (DominatingGuardLoop != DominatedInstrLoop) { 415 // Be conservative and don't widen into a sibling loop. TODO: If the 416 // sibling is colder, we should consider allowing this. 417 if (DominatingGuardLoop && 418 !DominatingGuardLoop->contains(DominatedInstrLoop)) 419 return WS_IllegalOrNegative; 420 421 HoistingOutOfLoop = true; 422 } 423 424 if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard)) 425 return WS_IllegalOrNegative; 426 427 // If the guard was conditional executed, it may never be reached 428 // dynamically. There are two potential downsides to hoisting it out of the 429 // conditionally executed region: 1) we may spuriously deopt without need and 430 // 2) we have the extra cost of computing the guard condition in the common 431 // case. At the moment, we really only consider the second in our heuristic 432 // here. TODO: evaluate cost model for spurious deopt 433 // NOTE: As written, this also lets us hoist right over another guard which 434 // is essentially just another spelling for control flow. 435 if (isWideningCondProfitable(getCondition(DominatedInstr), 436 getCondition(DominatingGuard), InvertCond)) 437 return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; 438 439 if (HoistingOutOfLoop) 440 return WS_Positive; 441 442 // Returns true if we might be hoisting above explicit control flow. Note 443 // that this completely ignores implicit control flow (guards, calls which 444 // throw, etc...). That choice appears arbitrary. 445 auto MaybeHoistingOutOfIf = [&]() { 446 auto *DominatingBlock = DominatingGuard->getParent(); 447 auto *DominatedBlock = DominatedInstr->getParent(); 448 if (isGuardAsWidenableBranch(DominatingGuard)) 449 DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0); 450 451 // Same Block? 452 if (DominatedBlock == DominatingBlock) 453 return false; 454 // Obvious successor (common loop header/preheader case) 455 if (DominatedBlock == DominatingBlock->getUniqueSuccessor()) 456 return false; 457 // TODO: diamond, triangle cases 458 if (!PDT) return true; 459 return !PDT->dominates(DominatedBlock, DominatingBlock); 460 }; 461 462 return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral; 463 } 464 465 bool GuardWideningImpl::isAvailableAt( 466 const Value *V, const Instruction *Loc, 467 SmallPtrSetImpl<const Instruction *> &Visited) const { 468 auto *Inst = dyn_cast<Instruction>(V); 469 if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst)) 470 return true; 471 472 if (!isSafeToSpeculativelyExecute(Inst, Loc, &DT) || 473 Inst->mayReadFromMemory()) 474 return false; 475 476 Visited.insert(Inst); 477 478 // We only want to go _up_ the dominance chain when recursing. 479 assert(!isa<PHINode>(Loc) && 480 "PHIs should return false for isSafeToSpeculativelyExecute"); 481 assert(DT.isReachableFromEntry(Inst->getParent()) && 482 "We did a DFS from the block entry!"); 483 return all_of(Inst->operands(), 484 [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); }); 485 } 486 487 void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const { 488 auto *Inst = dyn_cast<Instruction>(V); 489 if (!Inst || DT.dominates(Inst, Loc)) 490 return; 491 492 assert(isSafeToSpeculativelyExecute(Inst, Loc, &DT) && 493 !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!"); 494 495 for (Value *Op : Inst->operands()) 496 makeAvailableAt(Op, Loc); 497 498 Inst->moveBefore(Loc); 499 } 500 501 bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, 502 Instruction *InsertPt, Value *&Result, 503 bool InvertCondition) { 504 using namespace llvm::PatternMatch; 505 506 { 507 // L >u C0 && L >u C1 -> L >u max(C0, C1) 508 ConstantInt *RHS0, *RHS1; 509 Value *LHS; 510 ICmpInst::Predicate Pred0, Pred1; 511 if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && 512 match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { 513 if (InvertCondition) 514 Pred1 = ICmpInst::getInversePredicate(Pred1); 515 516 ConstantRange CR0 = 517 ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue()); 518 ConstantRange CR1 = 519 ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); 520 521 // Given what we're doing here and the semantics of guards, it would 522 // be correct to use a subset intersection, but that may be too 523 // aggressive in cases we care about. 524 if (Optional<ConstantRange> Intersect = CR0.exactIntersectWith(CR1)) { 525 APInt NewRHSAP; 526 CmpInst::Predicate Pred; 527 if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) { 528 if (InsertPt) { 529 ConstantInt *NewRHS = 530 ConstantInt::get(Cond0->getContext(), NewRHSAP); 531 Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk"); 532 } 533 return true; 534 } 535 } 536 } 537 } 538 539 { 540 SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; 541 // TODO: Support InvertCondition case? 542 if (!InvertCondition && 543 parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) && 544 combineRangeChecks(Checks, CombinedChecks)) { 545 if (InsertPt) { 546 Result = nullptr; 547 for (auto &RC : CombinedChecks) { 548 makeAvailableAt(RC.getCheckInst(), InsertPt); 549 if (Result) 550 Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "", 551 InsertPt); 552 else 553 Result = RC.getCheckInst(); 554 } 555 assert(Result && "Failed to find result value"); 556 Result->setName("wide.chk"); 557 } 558 return true; 559 } 560 } 561 562 // Base case -- just logical-and the two conditions together. 563 564 if (InsertPt) { 565 makeAvailableAt(Cond0, InsertPt); 566 makeAvailableAt(Cond1, InsertPt); 567 if (InvertCondition) 568 Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt); 569 Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt); 570 } 571 572 // We were not able to compute Cond0 AND Cond1 for the price of one. 573 return false; 574 } 575 576 bool GuardWideningImpl::parseRangeChecks( 577 Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 578 SmallPtrSetImpl<const Value *> &Visited) { 579 if (!Visited.insert(CheckCond).second) 580 return true; 581 582 using namespace llvm::PatternMatch; 583 584 { 585 Value *AndLHS, *AndRHS; 586 if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS)))) 587 return parseRangeChecks(AndLHS, Checks) && 588 parseRangeChecks(AndRHS, Checks); 589 } 590 591 auto *IC = dyn_cast<ICmpInst>(CheckCond); 592 if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() || 593 (IC->getPredicate() != ICmpInst::ICMP_ULT && 594 IC->getPredicate() != ICmpInst::ICMP_UGT)) 595 return false; 596 597 const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1); 598 if (IC->getPredicate() == ICmpInst::ICMP_UGT) 599 std::swap(CmpLHS, CmpRHS); 600 601 auto &DL = IC->getModule()->getDataLayout(); 602 603 GuardWideningImpl::RangeCheck Check( 604 CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())), 605 CmpRHS, IC); 606 607 if (!isKnownNonNegative(Check.getLength(), DL)) 608 return false; 609 610 // What we have in \c Check now is a correct interpretation of \p CheckCond. 611 // Try to see if we can move some constant offsets into the \c Offset field. 612 613 bool Changed; 614 auto &Ctx = CheckCond->getContext(); 615 616 do { 617 Value *OpLHS; 618 ConstantInt *OpRHS; 619 Changed = false; 620 621 #ifndef NDEBUG 622 auto *BaseInst = dyn_cast<Instruction>(Check.getBase()); 623 assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && 624 "Unreachable instruction?"); 625 #endif 626 627 if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 628 Check.setBase(OpLHS); 629 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 630 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 631 Changed = true; 632 } else if (match(Check.getBase(), 633 m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 634 KnownBits Known = computeKnownBits(OpLHS, DL); 635 if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { 636 Check.setBase(OpLHS); 637 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 638 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 639 Changed = true; 640 } 641 } 642 } while (Changed); 643 644 Checks.push_back(Check); 645 return true; 646 } 647 648 bool GuardWideningImpl::combineRangeChecks( 649 SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 650 SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { 651 unsigned OldCount = Checks.size(); 652 while (!Checks.empty()) { 653 // Pick all of the range checks with a specific base and length, and try to 654 // merge them. 655 const Value *CurrentBase = Checks.front().getBase(); 656 const Value *CurrentLength = Checks.front().getLength(); 657 658 SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; 659 660 auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { 661 return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; 662 }; 663 664 copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck); 665 erase_if(Checks, IsCurrentCheck); 666 667 assert(CurrentChecks.size() != 0 && "We know we have at least one!"); 668 669 if (CurrentChecks.size() < 3) { 670 llvm::append_range(RangeChecksOut, CurrentChecks); 671 continue; 672 } 673 674 // CurrentChecks.size() will typically be 3 here, but so far there has been 675 // no need to hard-code that fact. 676 677 llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS, 678 const GuardWideningImpl::RangeCheck &RHS) { 679 return LHS.getOffsetValue().slt(RHS.getOffsetValue()); 680 }); 681 682 // Note: std::sort should not invalidate the ChecksStart iterator. 683 684 const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); 685 const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); 686 687 unsigned BitWidth = MaxOffset->getValue().getBitWidth(); 688 if ((MaxOffset->getValue() - MinOffset->getValue()) 689 .ugt(APInt::getSignedMinValue(BitWidth))) 690 return false; 691 692 APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); 693 const APInt &HighOffset = MaxOffset->getValue(); 694 auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { 695 return (HighOffset - RC.getOffsetValue()).ult(MaxDiff); 696 }; 697 698 if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK)) 699 return false; 700 701 // We have a series of f+1 checks as: 702 // 703 // I+k_0 u< L ... Chk_0 704 // I+k_1 u< L ... Chk_1 705 // ... 706 // I+k_f u< L ... Chk_f 707 // 708 // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 709 // k_f-k_0 u< INT_MIN+k_f ... Precond_1 710 // k_f != k_0 ... Precond_2 711 // 712 // Claim: 713 // Chk_0 AND Chk_f implies all the other checks 714 // 715 // Informal proof sketch: 716 // 717 // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap 718 // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and 719 // thus I+k_f is the greatest unsigned value in that range. 720 // 721 // This combined with Ckh_(f+1) shows that everything in that range is u< L. 722 // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) 723 // lie in [I+k_0,I+k_f], this proving our claim. 724 // 725 // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are 726 // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal 727 // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping 728 // range by definition, and the latter case is impossible: 729 // 730 // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) 731 // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 732 // 733 // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted 734 // with 'x' above) to be at least >u INT_MIN. 735 736 RangeChecksOut.emplace_back(CurrentChecks.front()); 737 RangeChecksOut.emplace_back(CurrentChecks.back()); 738 } 739 740 assert(RangeChecksOut.size() <= OldCount && "We pessimized!"); 741 return RangeChecksOut.size() != OldCount; 742 } 743 744 #ifndef NDEBUG 745 StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { 746 switch (WS) { 747 case WS_IllegalOrNegative: 748 return "IllegalOrNegative"; 749 case WS_Neutral: 750 return "Neutral"; 751 case WS_Positive: 752 return "Positive"; 753 case WS_VeryPositive: 754 return "VeryPositive"; 755 } 756 757 llvm_unreachable("Fully covered switch above!"); 758 } 759 #endif 760 761 PreservedAnalyses GuardWideningPass::run(Function &F, 762 FunctionAnalysisManager &AM) { 763 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 764 auto &LI = AM.getResult<LoopAnalysis>(F); 765 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 766 auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F); 767 std::unique_ptr<MemorySSAUpdater> MSSAU; 768 if (MSSAA) 769 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA()); 770 if (!GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr, 771 DT.getRootNode(), [](BasicBlock *) { return true; }) 772 .run()) 773 return PreservedAnalyses::all(); 774 775 PreservedAnalyses PA; 776 PA.preserveSet<CFGAnalyses>(); 777 PA.preserve<MemorySSAAnalysis>(); 778 return PA; 779 } 780 781 PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, 782 LoopStandardAnalysisResults &AR, 783 LPMUpdater &U) { 784 BasicBlock *RootBB = L.getLoopPredecessor(); 785 if (!RootBB) 786 RootBB = L.getHeader(); 787 auto BlockFilter = [&](BasicBlock *BB) { 788 return BB == RootBB || L.contains(BB); 789 }; 790 std::unique_ptr<MemorySSAUpdater> MSSAU; 791 if (AR.MSSA) 792 MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA); 793 if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, MSSAU ? MSSAU.get() : nullptr, 794 AR.DT.getNode(RootBB), BlockFilter).run()) 795 return PreservedAnalyses::all(); 796 797 auto PA = getLoopPassPreservedAnalyses(); 798 if (AR.MSSA) 799 PA.preserve<MemorySSAAnalysis>(); 800 return PA; 801 } 802 803 namespace { 804 struct GuardWideningLegacyPass : public FunctionPass { 805 static char ID; 806 807 GuardWideningLegacyPass() : FunctionPass(ID) { 808 initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); 809 } 810 811 bool runOnFunction(Function &F) override { 812 if (skipFunction(F)) 813 return false; 814 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 815 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 816 auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); 817 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 818 std::unique_ptr<MemorySSAUpdater> MSSAU; 819 if (MSSAWP) 820 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA()); 821 return GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr, 822 DT.getRootNode(), 823 [](BasicBlock *) { return true; }) 824 .run(); 825 } 826 827 void getAnalysisUsage(AnalysisUsage &AU) const override { 828 AU.setPreservesCFG(); 829 AU.addRequired<DominatorTreeWrapperPass>(); 830 AU.addRequired<PostDominatorTreeWrapperPass>(); 831 AU.addRequired<LoopInfoWrapperPass>(); 832 AU.addPreserved<MemorySSAWrapperPass>(); 833 } 834 }; 835 836 /// Same as above, but restricted to a single loop at a time. Can be 837 /// scheduled with other loop passes w/o breaking out of LPM 838 struct LoopGuardWideningLegacyPass : public LoopPass { 839 static char ID; 840 841 LoopGuardWideningLegacyPass() : LoopPass(ID) { 842 initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); 843 } 844 845 bool runOnLoop(Loop *L, LPPassManager &LPM) override { 846 if (skipLoop(L)) 847 return false; 848 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 849 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 850 auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); 851 auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; 852 auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 853 std::unique_ptr<MemorySSAUpdater> MSSAU; 854 if (MSSAWP) 855 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA()); 856 857 BasicBlock *RootBB = L->getLoopPredecessor(); 858 if (!RootBB) 859 RootBB = L->getHeader(); 860 auto BlockFilter = [&](BasicBlock *BB) { 861 return BB == RootBB || L->contains(BB); 862 }; 863 return GuardWideningImpl(DT, PDT, LI, MSSAU ? MSSAU.get() : nullptr, 864 DT.getNode(RootBB), BlockFilter).run(); 865 } 866 867 void getAnalysisUsage(AnalysisUsage &AU) const override { 868 AU.setPreservesCFG(); 869 getLoopAnalysisUsage(AU); 870 AU.addPreserved<PostDominatorTreeWrapperPass>(); 871 AU.addPreserved<MemorySSAWrapperPass>(); 872 } 873 }; 874 } 875 876 char GuardWideningLegacyPass::ID = 0; 877 char LoopGuardWideningLegacyPass::ID = 0; 878 879 INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards", 880 false, false) 881 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 882 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 883 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 884 INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards", 885 false, false) 886 887 INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening", 888 "Widen guards (within a single loop, as a loop pass)", 889 false, false) 890 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 891 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 892 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 893 INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening", 894 "Widen guards (within a single loop, as a loop pass)", 895 false, false) 896 897 FunctionPass *llvm::createGuardWideningPass() { 898 return new GuardWideningLegacyPass(); 899 } 900 901 Pass *llvm::createLoopGuardWideningPass() { 902 return new LoopGuardWideningLegacyPass(); 903 } 904