1 //===- GuardWidening.cpp - ---- Guard widening ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the guard widening pass. The semantics of the 10 // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails 11 // more often that it did before the transform. This optimization is called 12 // "widening" and can be used hoist and common runtime checks in situations like 13 // these: 14 // 15 // %cmp0 = 7 u< Length 16 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 17 // call @unknown_side_effects() 18 // %cmp1 = 9 u< Length 19 // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] 20 // ... 21 // 22 // => 23 // 24 // %cmp0 = 9 u< Length 25 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 26 // call @unknown_side_effects() 27 // ... 28 // 29 // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a 30 // generic implementation of the same function, which will have the correct 31 // semantics from that point onward. It is always _legal_ to deoptimize (so 32 // replacing %cmp0 with false is "correct"), though it may not always be 33 // profitable to do so. 34 // 35 // NB! This pass is a work in progress. It hasn't been tuned to be "production 36 // ready" yet. It is known to have quadriatic running time and will not scale 37 // to large numbers of guards 38 // 39 //===----------------------------------------------------------------------===// 40 41 #include "llvm/Transforms/Scalar/GuardWidening.h" 42 #include "llvm/ADT/DenseMap.h" 43 #include "llvm/ADT/DepthFirstIterator.h" 44 #include "llvm/ADT/Statistic.h" 45 #include "llvm/Analysis/AssumptionCache.h" 46 #include "llvm/Analysis/GuardUtils.h" 47 #include "llvm/Analysis/LoopInfo.h" 48 #include "llvm/Analysis/MemorySSAUpdater.h" 49 #include "llvm/Analysis/PostDominators.h" 50 #include "llvm/Analysis/ValueTracking.h" 51 #include "llvm/IR/ConstantRange.h" 52 #include "llvm/IR/Dominators.h" 53 #include "llvm/IR/IRBuilder.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/Module.h" 56 #include "llvm/IR/PatternMatch.h" 57 #include "llvm/Support/CommandLine.h" 58 #include "llvm/Support/Debug.h" 59 #include "llvm/Support/KnownBits.h" 60 #include "llvm/Transforms/Scalar.h" 61 #include "llvm/Transforms/Utils/GuardUtils.h" 62 #include "llvm/Transforms/Utils/LoopUtils.h" 63 #include <functional> 64 65 using namespace llvm; 66 67 #define DEBUG_TYPE "guard-widening" 68 69 STATISTIC(GuardsEliminated, "Number of eliminated guards"); 70 STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches"); 71 STATISTIC(FreezeAdded, "Number of freeze instruction introduced"); 72 73 static cl::opt<bool> 74 WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden, 75 cl::desc("Whether or not we should widen guards " 76 "expressed as branches by widenable conditions"), 77 cl::init(true)); 78 79 namespace { 80 81 // Get the condition of \p I. It can either be a guard or a conditional branch. 82 static Value *getCondition(Instruction *I) { 83 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 84 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 85 "Bad guard intrinsic?"); 86 return GI->getArgOperand(0); 87 } 88 Value *Cond, *WC; 89 BasicBlock *IfTrueBB, *IfFalseBB; 90 if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB)) 91 return Cond; 92 93 return cast<BranchInst>(I)->getCondition(); 94 } 95 96 // Set the condition for \p I to \p NewCond. \p I can either be a guard or a 97 // conditional branch. 98 static void setCondition(Instruction *I, Value *NewCond) { 99 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 100 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 101 "Bad guard intrinsic?"); 102 GI->setArgOperand(0, NewCond); 103 return; 104 } 105 cast<BranchInst>(I)->setCondition(NewCond); 106 } 107 108 // Eliminates the guard instruction properly. 109 static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) { 110 GuardInst->eraseFromParent(); 111 if (MSSAU) 112 MSSAU->removeMemoryAccess(GuardInst); 113 ++GuardsEliminated; 114 } 115 116 /// Find a point at which the widened condition of \p Guard should be inserted. 117 /// When it is represented as intrinsic call, we can do it right before the call 118 /// instruction. However, when we are dealing with widenable branch, we must 119 /// account for the following situation: widening should not turn a 120 /// loop-invariant condition into a loop-variant. It means that if 121 /// widenable.condition() call is invariant (w.r.t. any loop), the new wide 122 /// condition should stay invariant. Otherwise there can be a miscompile, like 123 /// the one described at https://github.com/llvm/llvm-project/issues/60234. The 124 /// safest way to do it is to expand the new condition at WC's block. 125 static std::optional<BasicBlock::iterator> 126 findInsertionPointForWideCondition(Instruction *WCOrGuard) { 127 if (isGuard(WCOrGuard)) 128 return WCOrGuard->getIterator(); 129 if (auto WC = extractWidenableCondition(WCOrGuard)) 130 return cast<Instruction>(WC)->getIterator(); 131 return std::nullopt; 132 } 133 134 class GuardWideningImpl { 135 DominatorTree &DT; 136 PostDominatorTree *PDT; 137 LoopInfo &LI; 138 AssumptionCache &AC; 139 MemorySSAUpdater *MSSAU; 140 141 /// Together, these describe the region of interest. This might be all of 142 /// the blocks within a function, or only a given loop's blocks and preheader. 143 DomTreeNode *Root; 144 std::function<bool(BasicBlock*)> BlockFilter; 145 146 /// The set of guards and conditional branches whose conditions have been 147 /// widened into dominating guards. 148 SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; 149 150 /// The set of guards which have been widened to include conditions to other 151 /// guards. 152 DenseSet<Instruction *> WidenedGuards; 153 154 /// Try to eliminate instruction \p Instr by widening it into an earlier 155 /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that 156 /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock 157 /// maps BasicBlocks to the set of guards seen in that block. 158 bool eliminateInstrViaWidening( 159 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 160 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> 161 &GuardsPerBlock); 162 163 /// Used to keep track of which widening potential is more effective. 164 enum WideningScore { 165 /// Don't widen. 166 WS_IllegalOrNegative, 167 168 /// Widening is performance neutral as far as the cycles spent in check 169 /// conditions goes (but can still help, e.g., code layout, having less 170 /// deopt state). 171 WS_Neutral, 172 173 /// Widening is profitable. 174 WS_Positive, 175 176 /// Widening is very profitable. Not significantly different from \c 177 /// WS_Positive, except by the order. 178 WS_VeryPositive 179 }; 180 181 static StringRef scoreTypeToString(WideningScore WS); 182 183 /// Compute the score for widening the condition in \p DominatedInstr 184 /// into \p WideningPoint. 185 WideningScore computeWideningScore(Instruction *DominatedInstr, 186 Instruction *ToWiden, 187 BasicBlock::iterator WideningPoint, 188 SmallVectorImpl<Value *> &ChecksToHoist, 189 SmallVectorImpl<Value *> &ChecksToWiden); 190 191 /// Helper to check if \p V can be hoisted to \p InsertPos. 192 bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos) const { 193 SmallPtrSet<const Instruction *, 8> Visited; 194 return canBeHoistedTo(V, InsertPos, Visited); 195 } 196 197 bool canBeHoistedTo(const Value *V, BasicBlock::iterator InsertPos, 198 SmallPtrSetImpl<const Instruction *> &Visited) const; 199 200 bool canBeHoistedTo(const SmallVectorImpl<Value *> &Checks, 201 BasicBlock::iterator InsertPos) const { 202 return all_of(Checks, 203 [&](const Value *V) { return canBeHoistedTo(V, InsertPos); }); 204 } 205 /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c 206 /// canBeHoistedTo returned true. 207 void makeAvailableAt(Value *V, BasicBlock::iterator InsertPos) const; 208 209 void makeAvailableAt(const SmallVectorImpl<Value *> &Checks, 210 BasicBlock::iterator InsertPos) const { 211 for (Value *V : Checks) 212 makeAvailableAt(V, InsertPos); 213 } 214 215 /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try 216 /// to generate an expression computing the logical AND of \p ChecksToHoist 217 /// and \p ChecksToWiden. Return true if the expression computing the AND is 218 /// only as expensive as computing one of the set of expressions. If \p 219 /// InsertPt is true then actually generate the resulting expression, make it 220 /// available at \p InsertPt and return it in \p Result (else no change to the 221 /// IR is made). 222 std::optional<Value *> 223 mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, 224 SmallVectorImpl<Value *> &ChecksToWiden, 225 std::optional<BasicBlock::iterator> InsertPt); 226 227 /// Generate the logical AND of \p ChecksToHoist and \p OldCondition and make 228 /// it available at InsertPt 229 Value *hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, 230 Value *OldCondition, BasicBlock::iterator InsertPt); 231 232 /// Adds freeze to Orig and push it as far as possible very aggressively. 233 /// Also replaces all uses of frozen instruction with frozen version. 234 Value *freezeAndPush(Value *Orig, BasicBlock::iterator InsertPt); 235 236 /// Represents a range check of the form \c Base + \c Offset u< \c Length, 237 /// with the constraint that \c Length is not negative. \c CheckInst is the 238 /// pre-existing instruction in the IR that computes the result of this range 239 /// check. 240 class RangeCheck { 241 const Value *Base; 242 const ConstantInt *Offset; 243 const Value *Length; 244 ICmpInst *CheckInst; 245 246 public: 247 explicit RangeCheck(const Value *Base, const ConstantInt *Offset, 248 const Value *Length, ICmpInst *CheckInst) 249 : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} 250 251 void setBase(const Value *NewBase) { Base = NewBase; } 252 void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } 253 254 const Value *getBase() const { return Base; } 255 const ConstantInt *getOffset() const { return Offset; } 256 const APInt &getOffsetValue() const { return getOffset()->getValue(); } 257 const Value *getLength() const { return Length; }; 258 ICmpInst *getCheckInst() const { return CheckInst; } 259 260 void print(raw_ostream &OS, bool PrintTypes = false) { 261 OS << "Base: "; 262 Base->printAsOperand(OS, PrintTypes); 263 OS << " Offset: "; 264 Offset->printAsOperand(OS, PrintTypes); 265 OS << " Length: "; 266 Length->printAsOperand(OS, PrintTypes); 267 } 268 269 LLVM_DUMP_METHOD void dump() { 270 print(dbgs()); 271 dbgs() << "\n"; 272 } 273 }; 274 275 /// Parse \p ToParse into a conjunction (logical-and) of range checks; and 276 /// append them to \p Checks. Returns true on success, may clobber \c Checks 277 /// on failure. 278 bool parseRangeChecks(SmallVectorImpl<Value *> &ToParse, 279 SmallVectorImpl<RangeCheck> &Checks) { 280 for (auto CheckCond : ToParse) { 281 if (!parseRangeChecks(CheckCond, Checks)) 282 return false; 283 } 284 return true; 285 } 286 287 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks); 288 289 /// Combine the checks in \p Checks into a smaller set of checks and append 290 /// them into \p CombinedChecks. Return true on success (i.e. all of checks 291 /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks 292 /// and \p CombinedChecks on success and on failure. 293 bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, 294 SmallVectorImpl<RangeCheck> &CombinedChecks) const; 295 296 /// Can we compute the logical AND of \p ChecksToHoist and \p ChecksToWiden 297 /// for the price of computing only one of the set of expressions? 298 bool isWideningCondProfitable(SmallVectorImpl<Value *> &ChecksToHoist, 299 SmallVectorImpl<Value *> &ChecksToWiden) { 300 return mergeChecks(ChecksToHoist, ChecksToWiden, /*InsertPt=*/std::nullopt) 301 .has_value(); 302 } 303 304 /// Widen \p ChecksToWiden to fail if any of \p ChecksToHoist is false 305 void widenGuard(SmallVectorImpl<Value *> &ChecksToHoist, 306 SmallVectorImpl<Value *> &ChecksToWiden, 307 Instruction *ToWiden) { 308 auto InsertPt = findInsertionPointForWideCondition(ToWiden); 309 auto MergedCheck = mergeChecks(ChecksToHoist, ChecksToWiden, InsertPt); 310 Value *Result = MergedCheck ? *MergedCheck 311 : hoistChecks(ChecksToHoist, 312 getCondition(ToWiden), *InsertPt); 313 314 if (isGuardAsWidenableBranch(ToWiden)) { 315 setWidenableBranchCond(cast<BranchInst>(ToWiden), Result); 316 return; 317 } 318 setCondition(ToWiden, Result); 319 } 320 321 public: 322 explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, 323 LoopInfo &LI, AssumptionCache &AC, 324 MemorySSAUpdater *MSSAU, DomTreeNode *Root, 325 std::function<bool(BasicBlock *)> BlockFilter) 326 : DT(DT), PDT(PDT), LI(LI), AC(AC), MSSAU(MSSAU), Root(Root), 327 BlockFilter(BlockFilter) {} 328 329 /// The entry point for this pass. 330 bool run(); 331 }; 332 } 333 334 static bool isSupportedGuardInstruction(const Instruction *Insn) { 335 if (isGuard(Insn)) 336 return true; 337 if (WidenBranchGuards && isGuardAsWidenableBranch(Insn)) 338 return true; 339 return false; 340 } 341 342 bool GuardWideningImpl::run() { 343 DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; 344 bool Changed = false; 345 for (auto DFI = df_begin(Root), DFE = df_end(Root); 346 DFI != DFE; ++DFI) { 347 auto *BB = (*DFI)->getBlock(); 348 if (!BlockFilter(BB)) 349 continue; 350 351 auto &CurrentList = GuardsInBlock[BB]; 352 353 for (auto &I : *BB) 354 if (isSupportedGuardInstruction(&I)) 355 CurrentList.push_back(cast<Instruction>(&I)); 356 357 for (auto *II : CurrentList) 358 Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock); 359 } 360 361 assert(EliminatedGuardsAndBranches.empty() || Changed); 362 for (auto *I : EliminatedGuardsAndBranches) 363 if (!WidenedGuards.count(I)) { 364 assert(isa<ConstantInt>(getCondition(I)) && "Should be!"); 365 if (isSupportedGuardInstruction(I)) 366 eliminateGuard(I, MSSAU); 367 else { 368 assert(isa<BranchInst>(I) && 369 "Eliminated something other than guard or branch?"); 370 ++CondBranchEliminated; 371 } 372 } 373 374 return Changed; 375 } 376 377 bool GuardWideningImpl::eliminateInstrViaWidening( 378 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 379 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> 380 &GuardsInBlock) { 381 SmallVector<Value *> ChecksToHoist; 382 parseWidenableGuard(Instr, ChecksToHoist); 383 // Ignore trivial true or false conditions. These instructions will be 384 // trivially eliminated by any cleanup pass. Do not erase them because other 385 // guards can possibly be widened into them. 386 if (ChecksToHoist.empty() || 387 (ChecksToHoist.size() == 1 && isa<ConstantInt>(ChecksToHoist.front()))) 388 return false; 389 390 Instruction *BestSoFar = nullptr; 391 auto BestScoreSoFar = WS_IllegalOrNegative; 392 393 // In the set of dominating guards, find the one we can merge GuardInst with 394 // for the most profit. 395 for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { 396 auto *CurBB = DFSI.getPath(i)->getBlock(); 397 if (!BlockFilter(CurBB)) 398 break; 399 assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!"); 400 const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; 401 402 auto I = GuardsInCurBB.begin(); 403 auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr) 404 : GuardsInCurBB.end(); 405 406 #ifndef NDEBUG 407 { 408 unsigned Index = 0; 409 for (auto &I : *CurBB) { 410 if (Index == GuardsInCurBB.size()) 411 break; 412 if (GuardsInCurBB[Index] == &I) 413 Index++; 414 } 415 assert(Index == GuardsInCurBB.size() && 416 "Guards expected to be in order!"); 417 } 418 #endif 419 420 assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?"); 421 422 for (auto *Candidate : make_range(I, E)) { 423 auto WideningPoint = findInsertionPointForWideCondition(Candidate); 424 if (!WideningPoint) 425 continue; 426 SmallVector<Value *> CandidateChecks; 427 parseWidenableGuard(Candidate, CandidateChecks); 428 auto Score = computeWideningScore(Instr, Candidate, *WideningPoint, 429 ChecksToHoist, CandidateChecks); 430 LLVM_DEBUG(dbgs() << "Score between " << *Instr << " and " << *Candidate 431 << " is " << scoreTypeToString(Score) << "\n"); 432 if (Score > BestScoreSoFar) { 433 BestScoreSoFar = Score; 434 BestSoFar = Candidate; 435 } 436 } 437 } 438 439 if (BestScoreSoFar == WS_IllegalOrNegative) { 440 LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n"); 441 return false; 442 } 443 444 assert(BestSoFar != Instr && "Should have never visited same guard!"); 445 assert(DT.dominates(BestSoFar, Instr) && "Should be!"); 446 447 LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar 448 << " with score " << scoreTypeToString(BestScoreSoFar) 449 << "\n"); 450 SmallVector<Value *> ChecksToWiden; 451 parseWidenableGuard(BestSoFar, ChecksToWiden); 452 widenGuard(ChecksToHoist, ChecksToWiden, BestSoFar); 453 auto NewGuardCondition = ConstantInt::getTrue(Instr->getContext()); 454 setCondition(Instr, NewGuardCondition); 455 EliminatedGuardsAndBranches.push_back(Instr); 456 WidenedGuards.insert(BestSoFar); 457 return true; 458 } 459 460 GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( 461 Instruction *DominatedInstr, Instruction *ToWiden, 462 BasicBlock::iterator WideningPoint, SmallVectorImpl<Value *> &ChecksToHoist, 463 SmallVectorImpl<Value *> &ChecksToWiden) { 464 Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent()); 465 Loop *DominatingGuardLoop = LI.getLoopFor(WideningPoint->getParent()); 466 bool HoistingOutOfLoop = false; 467 468 if (DominatingGuardLoop != DominatedInstrLoop) { 469 // Be conservative and don't widen into a sibling loop. TODO: If the 470 // sibling is colder, we should consider allowing this. 471 if (DominatingGuardLoop && 472 !DominatingGuardLoop->contains(DominatedInstrLoop)) 473 return WS_IllegalOrNegative; 474 475 HoistingOutOfLoop = true; 476 } 477 478 if (!canBeHoistedTo(ChecksToHoist, WideningPoint)) 479 return WS_IllegalOrNegative; 480 // Further in the GuardWideningImpl::hoistChecks the entire condition might be 481 // widened, not the parsed list of checks. So we need to check the possibility 482 // of that condition hoisting. 483 if (!canBeHoistedTo(getCondition(ToWiden), WideningPoint)) 484 return WS_IllegalOrNegative; 485 486 // If the guard was conditional executed, it may never be reached 487 // dynamically. There are two potential downsides to hoisting it out of the 488 // conditionally executed region: 1) we may spuriously deopt without need and 489 // 2) we have the extra cost of computing the guard condition in the common 490 // case. At the moment, we really only consider the second in our heuristic 491 // here. TODO: evaluate cost model for spurious deopt 492 // NOTE: As written, this also lets us hoist right over another guard which 493 // is essentially just another spelling for control flow. 494 if (isWideningCondProfitable(ChecksToHoist, ChecksToWiden)) 495 return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; 496 497 if (HoistingOutOfLoop) 498 return WS_Positive; 499 500 // For a given basic block \p BB, return its successor which is guaranteed or 501 // highly likely will be taken as its successor. 502 auto GetLikelySuccessor = [](const BasicBlock * BB)->const BasicBlock * { 503 if (auto *UniqueSucc = BB->getUniqueSuccessor()) 504 return UniqueSucc; 505 auto *Term = BB->getTerminator(); 506 Value *Cond = nullptr; 507 const BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; 508 using namespace PatternMatch; 509 if (!match(Term, m_Br(m_Value(Cond), m_BasicBlock(IfTrue), 510 m_BasicBlock(IfFalse)))) 511 return nullptr; 512 // For constant conditions, only one dynamical successor is possible 513 if (auto *ConstCond = dyn_cast<ConstantInt>(Cond)) 514 return ConstCond->isAllOnesValue() ? IfTrue : IfFalse; 515 // If one of successors ends with deopt, another one is likely. 516 if (IfFalse->getPostdominatingDeoptimizeCall()) 517 return IfTrue; 518 if (IfTrue->getPostdominatingDeoptimizeCall()) 519 return IfFalse; 520 // TODO: Use branch frequency metatada to allow hoisting through non-deopt 521 // branches? 522 return nullptr; 523 }; 524 525 // Returns true if we might be hoisting above explicit control flow into a 526 // considerably hotter block. Note that this completely ignores implicit 527 // control flow (guards, calls which throw, etc...). That choice appears 528 // arbitrary (we assume that implicit control flow exits are all rare). 529 auto MaybeHoistingToHotterBlock = [&]() { 530 const auto *DominatingBlock = WideningPoint->getParent(); 531 const auto *DominatedBlock = DominatedInstr->getParent(); 532 533 // Descend as low as we can, always taking the likely successor. 534 assert(DT.isReachableFromEntry(DominatingBlock) && "Unreached code"); 535 assert(DT.isReachableFromEntry(DominatedBlock) && "Unreached code"); 536 assert(DT.dominates(DominatingBlock, DominatedBlock) && "No dominance"); 537 while (DominatedBlock != DominatingBlock) { 538 auto *LikelySucc = GetLikelySuccessor(DominatingBlock); 539 // No likely successor? 540 if (!LikelySucc) 541 break; 542 // Only go down the dominator tree. 543 if (!DT.properlyDominates(DominatingBlock, LikelySucc)) 544 break; 545 DominatingBlock = LikelySucc; 546 } 547 548 // Found? 549 if (DominatedBlock == DominatingBlock) 550 return false; 551 // We followed the likely successor chain and went past the dominated 552 // block. It means that the dominated guard is in dead/very cold code. 553 if (!DT.dominates(DominatingBlock, DominatedBlock)) 554 return true; 555 // TODO: diamond, triangle cases 556 if (!PDT) 557 return true; 558 return !PDT->dominates(DominatedBlock, DominatingBlock); 559 }; 560 561 return MaybeHoistingToHotterBlock() ? WS_IllegalOrNegative : WS_Neutral; 562 } 563 564 bool GuardWideningImpl::canBeHoistedTo( 565 const Value *V, BasicBlock::iterator Loc, 566 SmallPtrSetImpl<const Instruction *> &Visited) const { 567 auto *Inst = dyn_cast<Instruction>(V); 568 if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst)) 569 return true; 570 571 if (!isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) || 572 Inst->mayReadFromMemory()) 573 return false; 574 575 Visited.insert(Inst); 576 577 // We only want to go _up_ the dominance chain when recursing. 578 assert(!isa<PHINode>(Loc) && 579 "PHIs should return false for isSafeToSpeculativelyExecute"); 580 assert(DT.isReachableFromEntry(Inst->getParent()) && 581 "We did a DFS from the block entry!"); 582 return all_of(Inst->operands(), 583 [&](Value *Op) { return canBeHoistedTo(Op, Loc, Visited); }); 584 } 585 586 void GuardWideningImpl::makeAvailableAt(Value *V, 587 BasicBlock::iterator Loc) const { 588 auto *Inst = dyn_cast<Instruction>(V); 589 if (!Inst || DT.dominates(Inst, Loc)) 590 return; 591 592 assert(isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) && 593 !Inst->mayReadFromMemory() && 594 "Should've checked with canBeHoistedTo!"); 595 596 for (Value *Op : Inst->operands()) 597 makeAvailableAt(Op, Loc); 598 599 Inst->moveBefore(*Loc->getParent(), Loc); 600 } 601 602 // Return Instruction before which we can insert freeze for the value V as close 603 // to def as possible. If there is no place to add freeze, return empty. 604 static std::optional<BasicBlock::iterator> 605 getFreezeInsertPt(Value *V, const DominatorTree &DT) { 606 auto *I = dyn_cast<Instruction>(V); 607 if (!I) 608 return DT.getRoot()->getFirstNonPHIOrDbgOrAlloca()->getIterator(); 609 610 std::optional<BasicBlock::iterator> Res = I->getInsertionPointAfterDef(); 611 // If there is no place to add freeze - return nullptr. 612 if (!Res || !DT.dominates(I, &**Res)) 613 return std::nullopt; 614 615 Instruction *ResInst = &**Res; 616 617 // If there is a User dominated by original I, then it should be dominated 618 // by Freeze instruction as well. 619 if (any_of(I->users(), [&](User *U) { 620 Instruction *User = cast<Instruction>(U); 621 return ResInst != User && DT.dominates(I, User) && 622 !DT.dominates(ResInst, User); 623 })) 624 return std::nullopt; 625 return Res; 626 } 627 628 Value *GuardWideningImpl::freezeAndPush(Value *Orig, 629 BasicBlock::iterator InsertPt) { 630 if (isGuaranteedNotToBePoison(Orig, nullptr, InsertPt, &DT)) 631 return Orig; 632 std::optional<BasicBlock::iterator> InsertPtAtDef = 633 getFreezeInsertPt(Orig, DT); 634 if (!InsertPtAtDef) { 635 FreezeInst *FI = new FreezeInst(Orig, "gw.freeze"); 636 FI->insertBefore(*InsertPt->getParent(), InsertPt); 637 return FI; 638 } 639 if (isa<Constant>(Orig) || isa<GlobalValue>(Orig)) { 640 BasicBlock::iterator InsertPt = *InsertPtAtDef; 641 FreezeInst *FI = new FreezeInst(Orig, "gw.freeze"); 642 FI->insertBefore(*InsertPt->getParent(), InsertPt); 643 return FI; 644 } 645 646 SmallSet<Value *, 16> Visited; 647 SmallVector<Value *, 16> Worklist; 648 SmallSet<Instruction *, 16> DropPoisonFlags; 649 SmallVector<Value *, 16> NeedFreeze; 650 DenseMap<Value *, FreezeInst *> CacheOfFreezes; 651 652 // A bit overloaded data structures. Visited contains constant/GV 653 // if we already met it. In this case CacheOfFreezes has a freeze if it is 654 // required. 655 auto handleConstantOrGlobal = [&](Use &U) { 656 Value *Def = U.get(); 657 if (!isa<Constant>(Def) && !isa<GlobalValue>(Def)) 658 return false; 659 660 if (Visited.insert(Def).second) { 661 if (isGuaranteedNotToBePoison(Def, nullptr, InsertPt, &DT)) 662 return true; 663 BasicBlock::iterator InsertPt = *getFreezeInsertPt(Def, DT); 664 FreezeInst *FI = new FreezeInst(Def, Def->getName() + ".gw.fr"); 665 FI->insertBefore(*InsertPt->getParent(), InsertPt); 666 CacheOfFreezes[Def] = FI; 667 } 668 669 if (CacheOfFreezes.count(Def)) 670 U.set(CacheOfFreezes[Def]); 671 return true; 672 }; 673 674 Worklist.push_back(Orig); 675 while (!Worklist.empty()) { 676 Value *V = Worklist.pop_back_val(); 677 if (!Visited.insert(V).second) 678 continue; 679 680 if (isGuaranteedNotToBePoison(V, nullptr, InsertPt, &DT)) 681 continue; 682 683 Instruction *I = dyn_cast<Instruction>(V); 684 if (!I || canCreateUndefOrPoison(cast<Operator>(I), 685 /*ConsiderFlagsAndMetadata*/ false)) { 686 NeedFreeze.push_back(V); 687 continue; 688 } 689 // Check all operands. If for any of them we cannot insert Freeze, 690 // stop here. Otherwise, iterate. 691 if (any_of(I->operands(), [&](Value *Op) { 692 return isa<Instruction>(Op) && !getFreezeInsertPt(Op, DT); 693 })) { 694 NeedFreeze.push_back(I); 695 continue; 696 } 697 DropPoisonFlags.insert(I); 698 for (Use &U : I->operands()) 699 if (!handleConstantOrGlobal(U)) 700 Worklist.push_back(U.get()); 701 } 702 for (Instruction *I : DropPoisonFlags) 703 I->dropPoisonGeneratingAnnotations(); 704 705 Value *Result = Orig; 706 for (Value *V : NeedFreeze) { 707 BasicBlock::iterator FreezeInsertPt = *getFreezeInsertPt(V, DT); 708 FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr"); 709 FI->insertBefore(*FreezeInsertPt->getParent(), FreezeInsertPt); 710 ++FreezeAdded; 711 if (V == Orig) 712 Result = FI; 713 V->replaceUsesWithIf( 714 FI, [&](const Use & U)->bool { return U.getUser() != FI; }); 715 } 716 717 return Result; 718 } 719 720 std::optional<Value *> 721 GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, 722 SmallVectorImpl<Value *> &ChecksToWiden, 723 std::optional<BasicBlock::iterator> InsertPt) { 724 using namespace llvm::PatternMatch; 725 726 Value *Result = nullptr; 727 { 728 // L >u C0 && L >u C1 -> L >u max(C0, C1) 729 ConstantInt *RHS0, *RHS1; 730 Value *LHS; 731 ICmpInst::Predicate Pred0, Pred1; 732 // TODO: Support searching for pairs to merge from both whole lists of 733 // ChecksToHoist and ChecksToWiden. 734 if (ChecksToWiden.size() == 1 && ChecksToHoist.size() == 1 && 735 match(ChecksToWiden.front(), 736 m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && 737 match(ChecksToHoist.front(), 738 m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { 739 740 ConstantRange CR0 = 741 ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue()); 742 ConstantRange CR1 = 743 ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); 744 745 // Given what we're doing here and the semantics of guards, it would 746 // be correct to use a subset intersection, but that may be too 747 // aggressive in cases we care about. 748 if (std::optional<ConstantRange> Intersect = 749 CR0.exactIntersectWith(CR1)) { 750 APInt NewRHSAP; 751 CmpInst::Predicate Pred; 752 if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) { 753 if (InsertPt) { 754 ConstantInt *NewRHS = 755 ConstantInt::get((*InsertPt)->getContext(), NewRHSAP); 756 assert(canBeHoistedTo(LHS, *InsertPt) && "must be"); 757 makeAvailableAt(LHS, *InsertPt); 758 Result = new ICmpInst(*InsertPt, Pred, LHS, NewRHS, "wide.chk"); 759 } 760 return Result; 761 } 762 } 763 } 764 } 765 766 { 767 SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; 768 if (parseRangeChecks(ChecksToWiden, Checks) && 769 parseRangeChecks(ChecksToHoist, Checks) && 770 combineRangeChecks(Checks, CombinedChecks)) { 771 if (InsertPt) { 772 for (auto &RC : CombinedChecks) { 773 makeAvailableAt(RC.getCheckInst(), *InsertPt); 774 if (Result) 775 Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "", 776 *InsertPt); 777 else 778 Result = RC.getCheckInst(); 779 } 780 assert(Result && "Failed to find result value"); 781 Result->setName("wide.chk"); 782 Result = freezeAndPush(Result, *InsertPt); 783 } 784 return Result; 785 } 786 } 787 // We were not able to compute ChecksToHoist AND ChecksToWiden for the price 788 // of one. 789 return std::nullopt; 790 } 791 792 Value *GuardWideningImpl::hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, 793 Value *OldCondition, 794 BasicBlock::iterator InsertPt) { 795 assert(!ChecksToHoist.empty()); 796 IRBuilder<> Builder(InsertPt->getParent(), InsertPt); 797 makeAvailableAt(ChecksToHoist, InsertPt); 798 makeAvailableAt(OldCondition, InsertPt); 799 Value *Result = Builder.CreateAnd(ChecksToHoist); 800 Result = freezeAndPush(Result, InsertPt); 801 Result = Builder.CreateAnd(OldCondition, Result); 802 Result->setName("wide.chk"); 803 return Result; 804 } 805 806 bool GuardWideningImpl::parseRangeChecks( 807 Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks) { 808 using namespace llvm::PatternMatch; 809 810 auto *IC = dyn_cast<ICmpInst>(CheckCond); 811 if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() || 812 (IC->getPredicate() != ICmpInst::ICMP_ULT && 813 IC->getPredicate() != ICmpInst::ICMP_UGT)) 814 return false; 815 816 const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1); 817 if (IC->getPredicate() == ICmpInst::ICMP_UGT) 818 std::swap(CmpLHS, CmpRHS); 819 820 auto &DL = IC->getDataLayout(); 821 822 GuardWideningImpl::RangeCheck Check( 823 CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())), 824 CmpRHS, IC); 825 826 if (!isKnownNonNegative(Check.getLength(), DL)) 827 return false; 828 829 // What we have in \c Check now is a correct interpretation of \p CheckCond. 830 // Try to see if we can move some constant offsets into the \c Offset field. 831 832 bool Changed; 833 auto &Ctx = CheckCond->getContext(); 834 835 do { 836 Value *OpLHS; 837 ConstantInt *OpRHS; 838 Changed = false; 839 840 #ifndef NDEBUG 841 auto *BaseInst = dyn_cast<Instruction>(Check.getBase()); 842 assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && 843 "Unreachable instruction?"); 844 #endif 845 846 if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 847 Check.setBase(OpLHS); 848 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 849 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 850 Changed = true; 851 } else if (match(Check.getBase(), 852 m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 853 KnownBits Known = computeKnownBits(OpLHS, DL); 854 if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { 855 Check.setBase(OpLHS); 856 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 857 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 858 Changed = true; 859 } 860 } 861 } while (Changed); 862 863 Checks.push_back(Check); 864 return true; 865 } 866 867 bool GuardWideningImpl::combineRangeChecks( 868 SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 869 SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { 870 unsigned OldCount = Checks.size(); 871 while (!Checks.empty()) { 872 // Pick all of the range checks with a specific base and length, and try to 873 // merge them. 874 const Value *CurrentBase = Checks.front().getBase(); 875 const Value *CurrentLength = Checks.front().getLength(); 876 877 SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; 878 879 auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { 880 return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; 881 }; 882 883 copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck); 884 erase_if(Checks, IsCurrentCheck); 885 886 assert(CurrentChecks.size() != 0 && "We know we have at least one!"); 887 888 if (CurrentChecks.size() < 3) { 889 llvm::append_range(RangeChecksOut, CurrentChecks); 890 continue; 891 } 892 893 // CurrentChecks.size() will typically be 3 here, but so far there has been 894 // no need to hard-code that fact. 895 896 llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS, 897 const GuardWideningImpl::RangeCheck &RHS) { 898 return LHS.getOffsetValue().slt(RHS.getOffsetValue()); 899 }); 900 901 // Note: std::sort should not invalidate the ChecksStart iterator. 902 903 const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); 904 const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); 905 906 unsigned BitWidth = MaxOffset->getValue().getBitWidth(); 907 if ((MaxOffset->getValue() - MinOffset->getValue()) 908 .ugt(APInt::getSignedMinValue(BitWidth))) 909 return false; 910 911 APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); 912 const APInt &HighOffset = MaxOffset->getValue(); 913 auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { 914 return (HighOffset - RC.getOffsetValue()).ult(MaxDiff); 915 }; 916 917 if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK)) 918 return false; 919 920 // We have a series of f+1 checks as: 921 // 922 // I+k_0 u< L ... Chk_0 923 // I+k_1 u< L ... Chk_1 924 // ... 925 // I+k_f u< L ... Chk_f 926 // 927 // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 928 // k_f-k_0 u< INT_MIN+k_f ... Precond_1 929 // k_f != k_0 ... Precond_2 930 // 931 // Claim: 932 // Chk_0 AND Chk_f implies all the other checks 933 // 934 // Informal proof sketch: 935 // 936 // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap 937 // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and 938 // thus I+k_f is the greatest unsigned value in that range. 939 // 940 // This combined with Ckh_(f+1) shows that everything in that range is u< L. 941 // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) 942 // lie in [I+k_0,I+k_f], this proving our claim. 943 // 944 // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are 945 // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal 946 // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping 947 // range by definition, and the latter case is impossible: 948 // 949 // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) 950 // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 951 // 952 // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted 953 // with 'x' above) to be at least >u INT_MIN. 954 955 RangeChecksOut.emplace_back(CurrentChecks.front()); 956 RangeChecksOut.emplace_back(CurrentChecks.back()); 957 } 958 959 assert(RangeChecksOut.size() <= OldCount && "We pessimized!"); 960 return RangeChecksOut.size() != OldCount; 961 } 962 963 #ifndef NDEBUG 964 StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { 965 switch (WS) { 966 case WS_IllegalOrNegative: 967 return "IllegalOrNegative"; 968 case WS_Neutral: 969 return "Neutral"; 970 case WS_Positive: 971 return "Positive"; 972 case WS_VeryPositive: 973 return "VeryPositive"; 974 } 975 976 llvm_unreachable("Fully covered switch above!"); 977 } 978 #endif 979 980 PreservedAnalyses GuardWideningPass::run(Function &F, 981 FunctionAnalysisManager &AM) { 982 // Avoid requesting analyses if there are no guards or widenable conditions. 983 auto *GuardDecl = F.getParent()->getFunction( 984 Intrinsic::getName(Intrinsic::experimental_guard)); 985 bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty(); 986 auto *WCDecl = F.getParent()->getFunction( 987 Intrinsic::getName(Intrinsic::experimental_widenable_condition)); 988 bool HasWidenableConditions = WCDecl && !WCDecl->use_empty(); 989 if (!HasIntrinsicGuards && !HasWidenableConditions) 990 return PreservedAnalyses::all(); 991 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 992 auto &LI = AM.getResult<LoopAnalysis>(F); 993 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 994 auto &AC = AM.getResult<AssumptionAnalysis>(F); 995 auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F); 996 std::unique_ptr<MemorySSAUpdater> MSSAU; 997 if (MSSAA) 998 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA()); 999 if (!GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, 1000 DT.getRootNode(), [](BasicBlock *) { return true; }) 1001 .run()) 1002 return PreservedAnalyses::all(); 1003 1004 PreservedAnalyses PA; 1005 PA.preserveSet<CFGAnalyses>(); 1006 PA.preserve<MemorySSAAnalysis>(); 1007 return PA; 1008 } 1009 1010 PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, 1011 LoopStandardAnalysisResults &AR, 1012 LPMUpdater &U) { 1013 BasicBlock *RootBB = L.getLoopPredecessor(); 1014 if (!RootBB) 1015 RootBB = L.getHeader(); 1016 auto BlockFilter = [&](BasicBlock *BB) { 1017 return BB == RootBB || L.contains(BB); 1018 }; 1019 std::unique_ptr<MemorySSAUpdater> MSSAU; 1020 if (AR.MSSA) 1021 MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA); 1022 if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.AC, 1023 MSSAU ? MSSAU.get() : nullptr, AR.DT.getNode(RootBB), 1024 BlockFilter) 1025 .run()) 1026 return PreservedAnalyses::all(); 1027 1028 auto PA = getLoopPassPreservedAnalyses(); 1029 if (AR.MSSA) 1030 PA.preserve<MemorySSAAnalysis>(); 1031 return PA; 1032 } 1033