1 //===- GuardWidening.cpp - ---- Guard widening ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the guard widening pass. The semantics of the 10 // @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails 11 // more often that it did before the transform. This optimization is called 12 // "widening" and can be used hoist and common runtime checks in situations like 13 // these: 14 // 15 // %cmp0 = 7 u< Length 16 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 17 // call @unknown_side_effects() 18 // %cmp1 = 9 u< Length 19 // call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] 20 // ... 21 // 22 // => 23 // 24 // %cmp0 = 9 u< Length 25 // call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] 26 // call @unknown_side_effects() 27 // ... 28 // 29 // If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a 30 // generic implementation of the same function, which will have the correct 31 // semantics from that point onward. It is always _legal_ to deoptimize (so 32 // replacing %cmp0 with false is "correct"), though it may not always be 33 // profitable to do so. 34 // 35 // NB! This pass is a work in progress. It hasn't been tuned to be "production 36 // ready" yet. It is known to have quadriatic running time and will not scale 37 // to large numbers of guards 38 // 39 //===----------------------------------------------------------------------===// 40 41 #include "llvm/Transforms/Scalar/GuardWidening.h" 42 #include "llvm/ADT/DenseMap.h" 43 #include "llvm/ADT/DepthFirstIterator.h" 44 #include "llvm/ADT/Statistic.h" 45 #include "llvm/Analysis/AssumptionCache.h" 46 #include "llvm/Analysis/GuardUtils.h" 47 #include "llvm/Analysis/LoopInfo.h" 48 #include "llvm/Analysis/MemorySSAUpdater.h" 49 #include "llvm/Analysis/PostDominators.h" 50 #include "llvm/Analysis/ValueTracking.h" 51 #include "llvm/IR/ConstantRange.h" 52 #include "llvm/IR/Dominators.h" 53 #include "llvm/IR/IRBuilder.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/PatternMatch.h" 56 #include "llvm/Support/CommandLine.h" 57 #include "llvm/Support/Debug.h" 58 #include "llvm/Support/KnownBits.h" 59 #include "llvm/Transforms/Scalar.h" 60 #include "llvm/Transforms/Utils/GuardUtils.h" 61 #include "llvm/Transforms/Utils/LoopUtils.h" 62 #include <functional> 63 64 using namespace llvm; 65 66 #define DEBUG_TYPE "guard-widening" 67 68 STATISTIC(GuardsEliminated, "Number of eliminated guards"); 69 STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches"); 70 STATISTIC(FreezeAdded, "Number of freeze instruction introduced"); 71 72 static cl::opt<bool> 73 WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden, 74 cl::desc("Whether or not we should widen guards " 75 "expressed as branches by widenable conditions"), 76 cl::init(true)); 77 78 namespace { 79 80 // Get the condition of \p I. It can either be a guard or a conditional branch. 81 static Value *getCondition(Instruction *I) { 82 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 83 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 84 "Bad guard intrinsic?"); 85 return GI->getArgOperand(0); 86 } 87 Value *Cond, *WC; 88 BasicBlock *IfTrueBB, *IfFalseBB; 89 if (parseWidenableBranch(I, Cond, WC, IfTrueBB, IfFalseBB)) 90 return Cond; 91 92 return cast<BranchInst>(I)->getCondition(); 93 } 94 95 // Set the condition for \p I to \p NewCond. \p I can either be a guard or a 96 // conditional branch. 97 static void setCondition(Instruction *I, Value *NewCond) { 98 if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { 99 assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && 100 "Bad guard intrinsic?"); 101 GI->setArgOperand(0, NewCond); 102 return; 103 } 104 cast<BranchInst>(I)->setCondition(NewCond); 105 } 106 107 // Eliminates the guard instruction properly. 108 static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) { 109 GuardInst->eraseFromParent(); 110 if (MSSAU) 111 MSSAU->removeMemoryAccess(GuardInst); 112 ++GuardsEliminated; 113 } 114 115 /// Find a point at which the widened condition of \p Guard should be inserted. 116 /// When it is represented as intrinsic call, we can do it right before the call 117 /// instruction. However, when we are dealing with widenable branch, we must 118 /// account for the following situation: widening should not turn a 119 /// loop-invariant condition into a loop-variant. It means that if 120 /// widenable.condition() call is invariant (w.r.t. any loop), the new wide 121 /// condition should stay invariant. Otherwise there can be a miscompile, like 122 /// the one described at https://github.com/llvm/llvm-project/issues/60234. The 123 /// safest way to do it is to expand the new condition at WC's block. 124 static Instruction *findInsertionPointForWideCondition(Instruction *WCOrGuard) { 125 if (isGuard(WCOrGuard)) 126 return WCOrGuard; 127 if (auto WC = extractWidenableCondition(WCOrGuard)) 128 return cast<Instruction>(WC); 129 return nullptr; 130 } 131 132 class GuardWideningImpl { 133 DominatorTree &DT; 134 PostDominatorTree *PDT; 135 LoopInfo &LI; 136 AssumptionCache &AC; 137 MemorySSAUpdater *MSSAU; 138 139 /// Together, these describe the region of interest. This might be all of 140 /// the blocks within a function, or only a given loop's blocks and preheader. 141 DomTreeNode *Root; 142 std::function<bool(BasicBlock*)> BlockFilter; 143 144 /// The set of guards and conditional branches whose conditions have been 145 /// widened into dominating guards. 146 SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; 147 148 /// The set of guards which have been widened to include conditions to other 149 /// guards. 150 DenseSet<Instruction *> WidenedGuards; 151 152 /// Try to eliminate instruction \p Instr by widening it into an earlier 153 /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that 154 /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock 155 /// maps BasicBlocks to the set of guards seen in that block. 156 bool eliminateInstrViaWidening( 157 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 158 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> 159 &GuardsPerBlock); 160 161 /// Used to keep track of which widening potential is more effective. 162 enum WideningScore { 163 /// Don't widen. 164 WS_IllegalOrNegative, 165 166 /// Widening is performance neutral as far as the cycles spent in check 167 /// conditions goes (but can still help, e.g., code layout, having less 168 /// deopt state). 169 WS_Neutral, 170 171 /// Widening is profitable. 172 WS_Positive, 173 174 /// Widening is very profitable. Not significantly different from \c 175 /// WS_Positive, except by the order. 176 WS_VeryPositive 177 }; 178 179 static StringRef scoreTypeToString(WideningScore WS); 180 181 /// Compute the score for widening the condition in \p DominatedInstr 182 /// into \p WideningPoint. 183 WideningScore computeWideningScore(Instruction *DominatedInstr, 184 Instruction *ToWiden, 185 Instruction *WideningPoint, 186 SmallVectorImpl<Value *> &ChecksToHoist, 187 SmallVectorImpl<Value *> &ChecksToWiden); 188 189 /// Helper to check if \p V can be hoisted to \p InsertPos. 190 bool canBeHoistedTo(const Value *V, const Instruction *InsertPos) const { 191 SmallPtrSet<const Instruction *, 8> Visited; 192 return canBeHoistedTo(V, InsertPos, Visited); 193 } 194 195 bool canBeHoistedTo(const Value *V, const Instruction *InsertPos, 196 SmallPtrSetImpl<const Instruction *> &Visited) const; 197 198 bool canBeHoistedTo(const SmallVectorImpl<Value *> &Checks, 199 const Instruction *InsertPos) const { 200 return all_of(Checks, 201 [&](const Value *V) { return canBeHoistedTo(V, InsertPos); }); 202 } 203 /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c 204 /// canBeHoistedTo returned true. 205 void makeAvailableAt(Value *V, Instruction *InsertPos) const; 206 207 void makeAvailableAt(const SmallVectorImpl<Value *> &Checks, 208 Instruction *InsertPos) const { 209 for (Value *V : Checks) 210 makeAvailableAt(V, InsertPos); 211 } 212 213 /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try 214 /// to generate an expression computing the logical AND of \p ChecksToHoist 215 /// and \p ChecksToWiden. Return true if the expression computing the AND is 216 /// only as expensive as computing one of the set of expressions. If \p 217 /// InsertPt is true then actually generate the resulting expression, make it 218 /// available at \p InsertPt and return it in \p Result (else no change to the 219 /// IR is made). 220 std::optional<Value *> mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, 221 SmallVectorImpl<Value *> &ChecksToWiden, 222 Instruction *InsertPt); 223 224 /// Generate the logical AND of \p ChecksToHoist and \p OldCondition and make 225 /// it available at InsertPt 226 Value *hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, 227 Value *OldCondition, Instruction *InsertPt); 228 229 /// Adds freeze to Orig and push it as far as possible very aggressively. 230 /// Also replaces all uses of frozen instruction with frozen version. 231 Value *freezeAndPush(Value *Orig, Instruction *InsertPt); 232 233 /// Represents a range check of the form \c Base + \c Offset u< \c Length, 234 /// with the constraint that \c Length is not negative. \c CheckInst is the 235 /// pre-existing instruction in the IR that computes the result of this range 236 /// check. 237 class RangeCheck { 238 const Value *Base; 239 const ConstantInt *Offset; 240 const Value *Length; 241 ICmpInst *CheckInst; 242 243 public: 244 explicit RangeCheck(const Value *Base, const ConstantInt *Offset, 245 const Value *Length, ICmpInst *CheckInst) 246 : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} 247 248 void setBase(const Value *NewBase) { Base = NewBase; } 249 void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } 250 251 const Value *getBase() const { return Base; } 252 const ConstantInt *getOffset() const { return Offset; } 253 const APInt &getOffsetValue() const { return getOffset()->getValue(); } 254 const Value *getLength() const { return Length; }; 255 ICmpInst *getCheckInst() const { return CheckInst; } 256 257 void print(raw_ostream &OS, bool PrintTypes = false) { 258 OS << "Base: "; 259 Base->printAsOperand(OS, PrintTypes); 260 OS << " Offset: "; 261 Offset->printAsOperand(OS, PrintTypes); 262 OS << " Length: "; 263 Length->printAsOperand(OS, PrintTypes); 264 } 265 266 LLVM_DUMP_METHOD void dump() { 267 print(dbgs()); 268 dbgs() << "\n"; 269 } 270 }; 271 272 /// Parse \p ToParse into a conjunction (logical-and) of range checks; and 273 /// append them to \p Checks. Returns true on success, may clobber \c Checks 274 /// on failure. 275 bool parseRangeChecks(SmallVectorImpl<Value *> &ToParse, 276 SmallVectorImpl<RangeCheck> &Checks) { 277 for (auto CheckCond : ToParse) { 278 if (!parseRangeChecks(CheckCond, Checks)) 279 return false; 280 } 281 return true; 282 } 283 284 bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks); 285 286 /// Combine the checks in \p Checks into a smaller set of checks and append 287 /// them into \p CombinedChecks. Return true on success (i.e. all of checks 288 /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks 289 /// and \p CombinedChecks on success and on failure. 290 bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, 291 SmallVectorImpl<RangeCheck> &CombinedChecks) const; 292 293 /// Can we compute the logical AND of \p ChecksToHoist and \p ChecksToWiden 294 /// for the price of computing only one of the set of expressions? 295 bool isWideningCondProfitable(SmallVectorImpl<Value *> &ChecksToHoist, 296 SmallVectorImpl<Value *> &ChecksToWiden) { 297 return mergeChecks(ChecksToHoist, ChecksToWiden, /*InsertPt=*/nullptr) 298 .has_value(); 299 } 300 301 /// Widen \p ChecksToWiden to fail if any of \p ChecksToHoist is false 302 void widenGuard(SmallVectorImpl<Value *> &ChecksToHoist, 303 SmallVectorImpl<Value *> &ChecksToWiden, 304 Instruction *ToWiden) { 305 Instruction *InsertPt = findInsertionPointForWideCondition(ToWiden); 306 auto MergedCheck = mergeChecks(ChecksToHoist, ChecksToWiden, InsertPt); 307 Value *Result = MergedCheck ? *MergedCheck 308 : hoistChecks(ChecksToHoist, 309 getCondition(ToWiden), InsertPt); 310 311 if (isGuardAsWidenableBranch(ToWiden)) { 312 setWidenableBranchCond(cast<BranchInst>(ToWiden), Result); 313 return; 314 } 315 setCondition(ToWiden, Result); 316 } 317 318 public: 319 explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, 320 LoopInfo &LI, AssumptionCache &AC, 321 MemorySSAUpdater *MSSAU, DomTreeNode *Root, 322 std::function<bool(BasicBlock *)> BlockFilter) 323 : DT(DT), PDT(PDT), LI(LI), AC(AC), MSSAU(MSSAU), Root(Root), 324 BlockFilter(BlockFilter) {} 325 326 /// The entry point for this pass. 327 bool run(); 328 }; 329 } 330 331 static bool isSupportedGuardInstruction(const Instruction *Insn) { 332 if (isGuard(Insn)) 333 return true; 334 if (WidenBranchGuards && isGuardAsWidenableBranch(Insn)) 335 return true; 336 return false; 337 } 338 339 bool GuardWideningImpl::run() { 340 DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; 341 bool Changed = false; 342 for (auto DFI = df_begin(Root), DFE = df_end(Root); 343 DFI != DFE; ++DFI) { 344 auto *BB = (*DFI)->getBlock(); 345 if (!BlockFilter(BB)) 346 continue; 347 348 auto &CurrentList = GuardsInBlock[BB]; 349 350 for (auto &I : *BB) 351 if (isSupportedGuardInstruction(&I)) 352 CurrentList.push_back(cast<Instruction>(&I)); 353 354 for (auto *II : CurrentList) 355 Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock); 356 } 357 358 assert(EliminatedGuardsAndBranches.empty() || Changed); 359 for (auto *I : EliminatedGuardsAndBranches) 360 if (!WidenedGuards.count(I)) { 361 assert(isa<ConstantInt>(getCondition(I)) && "Should be!"); 362 if (isSupportedGuardInstruction(I)) 363 eliminateGuard(I, MSSAU); 364 else { 365 assert(isa<BranchInst>(I) && 366 "Eliminated something other than guard or branch?"); 367 ++CondBranchEliminated; 368 } 369 } 370 371 return Changed; 372 } 373 374 bool GuardWideningImpl::eliminateInstrViaWidening( 375 Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, 376 const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> 377 &GuardsInBlock) { 378 SmallVector<Value *> ChecksToHoist; 379 parseWidenableGuard(Instr, ChecksToHoist); 380 // Ignore trivial true or false conditions. These instructions will be 381 // trivially eliminated by any cleanup pass. Do not erase them because other 382 // guards can possibly be widened into them. 383 if (ChecksToHoist.empty() || 384 (ChecksToHoist.size() == 1 && isa<ConstantInt>(ChecksToHoist.front()))) 385 return false; 386 387 Instruction *BestSoFar = nullptr; 388 auto BestScoreSoFar = WS_IllegalOrNegative; 389 390 // In the set of dominating guards, find the one we can merge GuardInst with 391 // for the most profit. 392 for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { 393 auto *CurBB = DFSI.getPath(i)->getBlock(); 394 if (!BlockFilter(CurBB)) 395 break; 396 assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!"); 397 const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; 398 399 auto I = GuardsInCurBB.begin(); 400 auto E = Instr->getParent() == CurBB ? find(GuardsInCurBB, Instr) 401 : GuardsInCurBB.end(); 402 403 #ifndef NDEBUG 404 { 405 unsigned Index = 0; 406 for (auto &I : *CurBB) { 407 if (Index == GuardsInCurBB.size()) 408 break; 409 if (GuardsInCurBB[Index] == &I) 410 Index++; 411 } 412 assert(Index == GuardsInCurBB.size() && 413 "Guards expected to be in order!"); 414 } 415 #endif 416 417 assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?"); 418 419 for (auto *Candidate : make_range(I, E)) { 420 auto *WideningPoint = findInsertionPointForWideCondition(Candidate); 421 if (!WideningPoint) 422 continue; 423 SmallVector<Value *> CandidateChecks; 424 parseWidenableGuard(Candidate, CandidateChecks); 425 auto Score = computeWideningScore(Instr, Candidate, WideningPoint, 426 ChecksToHoist, CandidateChecks); 427 LLVM_DEBUG(dbgs() << "Score between " << *Instr << " and " << *Candidate 428 << " is " << scoreTypeToString(Score) << "\n"); 429 if (Score > BestScoreSoFar) { 430 BestScoreSoFar = Score; 431 BestSoFar = Candidate; 432 } 433 } 434 } 435 436 if (BestScoreSoFar == WS_IllegalOrNegative) { 437 LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n"); 438 return false; 439 } 440 441 assert(BestSoFar != Instr && "Should have never visited same guard!"); 442 assert(DT.dominates(BestSoFar, Instr) && "Should be!"); 443 444 LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar 445 << " with score " << scoreTypeToString(BestScoreSoFar) 446 << "\n"); 447 SmallVector<Value *> ChecksToWiden; 448 parseWidenableGuard(BestSoFar, ChecksToWiden); 449 widenGuard(ChecksToHoist, ChecksToWiden, BestSoFar); 450 auto NewGuardCondition = ConstantInt::getTrue(Instr->getContext()); 451 setCondition(Instr, NewGuardCondition); 452 EliminatedGuardsAndBranches.push_back(Instr); 453 WidenedGuards.insert(BestSoFar); 454 return true; 455 } 456 457 GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( 458 Instruction *DominatedInstr, Instruction *ToWiden, 459 Instruction *WideningPoint, SmallVectorImpl<Value *> &ChecksToHoist, 460 SmallVectorImpl<Value *> &ChecksToWiden) { 461 Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent()); 462 Loop *DominatingGuardLoop = LI.getLoopFor(WideningPoint->getParent()); 463 bool HoistingOutOfLoop = false; 464 465 if (DominatingGuardLoop != DominatedInstrLoop) { 466 // Be conservative and don't widen into a sibling loop. TODO: If the 467 // sibling is colder, we should consider allowing this. 468 if (DominatingGuardLoop && 469 !DominatingGuardLoop->contains(DominatedInstrLoop)) 470 return WS_IllegalOrNegative; 471 472 HoistingOutOfLoop = true; 473 } 474 475 if (!canBeHoistedTo(ChecksToHoist, WideningPoint)) 476 return WS_IllegalOrNegative; 477 // Further in the GuardWideningImpl::hoistChecks the entire condition might be 478 // widened, not the parsed list of checks. So we need to check the possibility 479 // of that condition hoisting. 480 if (!canBeHoistedTo(getCondition(ToWiden), WideningPoint)) 481 return WS_IllegalOrNegative; 482 483 // If the guard was conditional executed, it may never be reached 484 // dynamically. There are two potential downsides to hoisting it out of the 485 // conditionally executed region: 1) we may spuriously deopt without need and 486 // 2) we have the extra cost of computing the guard condition in the common 487 // case. At the moment, we really only consider the second in our heuristic 488 // here. TODO: evaluate cost model for spurious deopt 489 // NOTE: As written, this also lets us hoist right over another guard which 490 // is essentially just another spelling for control flow. 491 if (isWideningCondProfitable(ChecksToHoist, ChecksToWiden)) 492 return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; 493 494 if (HoistingOutOfLoop) 495 return WS_Positive; 496 497 // For a given basic block \p BB, return its successor which is guaranteed or 498 // highly likely will be taken as its successor. 499 auto GetLikelySuccessor = [](const BasicBlock * BB)->const BasicBlock * { 500 if (auto *UniqueSucc = BB->getUniqueSuccessor()) 501 return UniqueSucc; 502 auto *Term = BB->getTerminator(); 503 Value *Cond = nullptr; 504 const BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; 505 using namespace PatternMatch; 506 if (!match(Term, m_Br(m_Value(Cond), m_BasicBlock(IfTrue), 507 m_BasicBlock(IfFalse)))) 508 return nullptr; 509 // For constant conditions, only one dynamical successor is possible 510 if (auto *ConstCond = dyn_cast<ConstantInt>(Cond)) 511 return ConstCond->isAllOnesValue() ? IfTrue : IfFalse; 512 // If one of successors ends with deopt, another one is likely. 513 if (IfFalse->getPostdominatingDeoptimizeCall()) 514 return IfTrue; 515 if (IfTrue->getPostdominatingDeoptimizeCall()) 516 return IfFalse; 517 // TODO: Use branch frequency metatada to allow hoisting through non-deopt 518 // branches? 519 return nullptr; 520 }; 521 522 // Returns true if we might be hoisting above explicit control flow into a 523 // considerably hotter block. Note that this completely ignores implicit 524 // control flow (guards, calls which throw, etc...). That choice appears 525 // arbitrary (we assume that implicit control flow exits are all rare). 526 auto MaybeHoistingToHotterBlock = [&]() { 527 const auto *DominatingBlock = WideningPoint->getParent(); 528 const auto *DominatedBlock = DominatedInstr->getParent(); 529 530 // Descend as low as we can, always taking the likely successor. 531 assert(DT.isReachableFromEntry(DominatingBlock) && "Unreached code"); 532 assert(DT.isReachableFromEntry(DominatedBlock) && "Unreached code"); 533 assert(DT.dominates(DominatingBlock, DominatedBlock) && "No dominance"); 534 while (DominatedBlock != DominatingBlock) { 535 auto *LikelySucc = GetLikelySuccessor(DominatingBlock); 536 // No likely successor? 537 if (!LikelySucc) 538 break; 539 // Only go down the dominator tree. 540 if (!DT.properlyDominates(DominatingBlock, LikelySucc)) 541 break; 542 DominatingBlock = LikelySucc; 543 } 544 545 // Found? 546 if (DominatedBlock == DominatingBlock) 547 return false; 548 // We followed the likely successor chain and went past the dominated 549 // block. It means that the dominated guard is in dead/very cold code. 550 if (!DT.dominates(DominatingBlock, DominatedBlock)) 551 return true; 552 // TODO: diamond, triangle cases 553 if (!PDT) 554 return true; 555 return !PDT->dominates(DominatedBlock, DominatingBlock); 556 }; 557 558 return MaybeHoistingToHotterBlock() ? WS_IllegalOrNegative : WS_Neutral; 559 } 560 561 bool GuardWideningImpl::canBeHoistedTo( 562 const Value *V, const Instruction *Loc, 563 SmallPtrSetImpl<const Instruction *> &Visited) const { 564 auto *Inst = dyn_cast<Instruction>(V); 565 if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst)) 566 return true; 567 568 if (!isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) || 569 Inst->mayReadFromMemory()) 570 return false; 571 572 Visited.insert(Inst); 573 574 // We only want to go _up_ the dominance chain when recursing. 575 assert(!isa<PHINode>(Loc) && 576 "PHIs should return false for isSafeToSpeculativelyExecute"); 577 assert(DT.isReachableFromEntry(Inst->getParent()) && 578 "We did a DFS from the block entry!"); 579 return all_of(Inst->operands(), 580 [&](Value *Op) { return canBeHoistedTo(Op, Loc, Visited); }); 581 } 582 583 void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const { 584 auto *Inst = dyn_cast<Instruction>(V); 585 if (!Inst || DT.dominates(Inst, Loc)) 586 return; 587 588 assert(isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) && 589 !Inst->mayReadFromMemory() && 590 "Should've checked with canBeHoistedTo!"); 591 592 for (Value *Op : Inst->operands()) 593 makeAvailableAt(Op, Loc); 594 595 Inst->moveBefore(Loc); 596 } 597 598 // Return Instruction before which we can insert freeze for the value V as close 599 // to def as possible. If there is no place to add freeze, return empty. 600 static std::optional<BasicBlock::iterator> 601 getFreezeInsertPt(Value *V, const DominatorTree &DT) { 602 auto *I = dyn_cast<Instruction>(V); 603 if (!I) 604 return DT.getRoot()->getFirstNonPHIOrDbgOrAlloca()->getIterator(); 605 606 std::optional<BasicBlock::iterator> Res = I->getInsertionPointAfterDef(); 607 // If there is no place to add freeze - return nullptr. 608 if (!Res || !DT.dominates(I, &**Res)) 609 return std::nullopt; 610 611 Instruction *ResInst = &**Res; 612 613 // If there is a User dominated by original I, then it should be dominated 614 // by Freeze instruction as well. 615 if (any_of(I->users(), [&](User *U) { 616 Instruction *User = cast<Instruction>(U); 617 return ResInst != User && DT.dominates(I, User) && 618 !DT.dominates(ResInst, User); 619 })) 620 return std::nullopt; 621 return Res; 622 } 623 624 Value *GuardWideningImpl::freezeAndPush(Value *Orig, Instruction *InsertPt) { 625 if (isGuaranteedNotToBePoison(Orig, nullptr, InsertPt, &DT)) 626 return Orig; 627 std::optional<BasicBlock::iterator> InsertPtAtDef = 628 getFreezeInsertPt(Orig, DT); 629 if (!InsertPtAtDef) { 630 FreezeInst *FI = new FreezeInst(Orig, "gw.freeze"); 631 FI->insertBefore(InsertPt); 632 return FI; 633 } 634 if (isa<Constant>(Orig) || isa<GlobalValue>(Orig)) { 635 BasicBlock::iterator InsertPt = *InsertPtAtDef; 636 FreezeInst *FI = new FreezeInst(Orig, "gw.freeze"); 637 FI->insertBefore(*InsertPt->getParent(), InsertPt); 638 return FI; 639 } 640 641 SmallSet<Value *, 16> Visited; 642 SmallVector<Value *, 16> Worklist; 643 SmallSet<Instruction *, 16> DropPoisonFlags; 644 SmallVector<Value *, 16> NeedFreeze; 645 DenseMap<Value *, FreezeInst *> CacheOfFreezes; 646 647 // A bit overloaded data structures. Visited contains constant/GV 648 // if we already met it. In this case CacheOfFreezes has a freeze if it is 649 // required. 650 auto handleConstantOrGlobal = [&](Use &U) { 651 Value *Def = U.get(); 652 if (!isa<Constant>(Def) && !isa<GlobalValue>(Def)) 653 return false; 654 655 if (Visited.insert(Def).second) { 656 if (isGuaranteedNotToBePoison(Def, nullptr, InsertPt, &DT)) 657 return true; 658 BasicBlock::iterator InsertPt = *getFreezeInsertPt(Def, DT); 659 FreezeInst *FI = new FreezeInst(Def, Def->getName() + ".gw.fr"); 660 FI->insertBefore(*InsertPt->getParent(), InsertPt); 661 CacheOfFreezes[Def] = FI; 662 } 663 664 if (CacheOfFreezes.count(Def)) 665 U.set(CacheOfFreezes[Def]); 666 return true; 667 }; 668 669 Worklist.push_back(Orig); 670 while (!Worklist.empty()) { 671 Value *V = Worklist.pop_back_val(); 672 if (!Visited.insert(V).second) 673 continue; 674 675 if (isGuaranteedNotToBePoison(V, nullptr, InsertPt, &DT)) 676 continue; 677 678 Instruction *I = dyn_cast<Instruction>(V); 679 if (!I || canCreateUndefOrPoison(cast<Operator>(I), 680 /*ConsiderFlagsAndMetadata*/ false)) { 681 NeedFreeze.push_back(V); 682 continue; 683 } 684 // Check all operands. If for any of them we cannot insert Freeze, 685 // stop here. Otherwise, iterate. 686 if (any_of(I->operands(), [&](Value *Op) { 687 return isa<Instruction>(Op) && !getFreezeInsertPt(Op, DT); 688 })) { 689 NeedFreeze.push_back(I); 690 continue; 691 } 692 DropPoisonFlags.insert(I); 693 for (Use &U : I->operands()) 694 if (!handleConstantOrGlobal(U)) 695 Worklist.push_back(U.get()); 696 } 697 for (Instruction *I : DropPoisonFlags) 698 I->dropPoisonGeneratingFlagsAndMetadata(); 699 700 Value *Result = Orig; 701 for (Value *V : NeedFreeze) { 702 BasicBlock::iterator FreezeInsertPt = *getFreezeInsertPt(V, DT); 703 FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr"); 704 FI->insertBefore(*FreezeInsertPt->getParent(), FreezeInsertPt); 705 ++FreezeAdded; 706 if (V == Orig) 707 Result = FI; 708 V->replaceUsesWithIf( 709 FI, [&](const Use & U)->bool { return U.getUser() != FI; }); 710 } 711 712 return Result; 713 } 714 715 std::optional<Value *> 716 GuardWideningImpl::mergeChecks(SmallVectorImpl<Value *> &ChecksToHoist, 717 SmallVectorImpl<Value *> &ChecksToWiden, 718 Instruction *InsertPt) { 719 using namespace llvm::PatternMatch; 720 721 Value *Result = nullptr; 722 { 723 // L >u C0 && L >u C1 -> L >u max(C0, C1) 724 ConstantInt *RHS0, *RHS1; 725 Value *LHS; 726 ICmpInst::Predicate Pred0, Pred1; 727 // TODO: Support searching for pairs to merge from both whole lists of 728 // ChecksToHoist and ChecksToWiden. 729 if (ChecksToWiden.size() == 1 && ChecksToHoist.size() == 1 && 730 match(ChecksToWiden.front(), 731 m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && 732 match(ChecksToHoist.front(), 733 m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { 734 735 ConstantRange CR0 = 736 ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue()); 737 ConstantRange CR1 = 738 ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); 739 740 // Given what we're doing here and the semantics of guards, it would 741 // be correct to use a subset intersection, but that may be too 742 // aggressive in cases we care about. 743 if (std::optional<ConstantRange> Intersect = 744 CR0.exactIntersectWith(CR1)) { 745 APInt NewRHSAP; 746 CmpInst::Predicate Pred; 747 if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) { 748 if (InsertPt) { 749 ConstantInt *NewRHS = 750 ConstantInt::get(InsertPt->getContext(), NewRHSAP); 751 assert(canBeHoistedTo(LHS, InsertPt) && "must be"); 752 makeAvailableAt(LHS, InsertPt); 753 Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk"); 754 } 755 return Result; 756 } 757 } 758 } 759 } 760 761 { 762 SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; 763 if (parseRangeChecks(ChecksToWiden, Checks) && 764 parseRangeChecks(ChecksToHoist, Checks) && 765 combineRangeChecks(Checks, CombinedChecks)) { 766 if (InsertPt) { 767 for (auto &RC : CombinedChecks) { 768 makeAvailableAt(RC.getCheckInst(), InsertPt); 769 if (Result) 770 Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "", 771 InsertPt); 772 else 773 Result = RC.getCheckInst(); 774 } 775 assert(Result && "Failed to find result value"); 776 Result->setName("wide.chk"); 777 Result = freezeAndPush(Result, InsertPt); 778 } 779 return Result; 780 } 781 } 782 // We were not able to compute ChecksToHoist AND ChecksToWiden for the price 783 // of one. 784 return std::nullopt; 785 } 786 787 Value *GuardWideningImpl::hoistChecks(SmallVectorImpl<Value *> &ChecksToHoist, 788 Value *OldCondition, 789 Instruction *InsertPt) { 790 assert(!ChecksToHoist.empty()); 791 IRBuilder<> Builder(InsertPt); 792 makeAvailableAt(ChecksToHoist, InsertPt); 793 makeAvailableAt(OldCondition, InsertPt); 794 Value *Result = Builder.CreateAnd(ChecksToHoist); 795 Result = freezeAndPush(Result, InsertPt); 796 Result = Builder.CreateAnd(OldCondition, Result); 797 Result->setName("wide.chk"); 798 return Result; 799 } 800 801 bool GuardWideningImpl::parseRangeChecks( 802 Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks) { 803 using namespace llvm::PatternMatch; 804 805 auto *IC = dyn_cast<ICmpInst>(CheckCond); 806 if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() || 807 (IC->getPredicate() != ICmpInst::ICMP_ULT && 808 IC->getPredicate() != ICmpInst::ICMP_UGT)) 809 return false; 810 811 const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1); 812 if (IC->getPredicate() == ICmpInst::ICMP_UGT) 813 std::swap(CmpLHS, CmpRHS); 814 815 auto &DL = IC->getModule()->getDataLayout(); 816 817 GuardWideningImpl::RangeCheck Check( 818 CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())), 819 CmpRHS, IC); 820 821 if (!isKnownNonNegative(Check.getLength(), DL)) 822 return false; 823 824 // What we have in \c Check now is a correct interpretation of \p CheckCond. 825 // Try to see if we can move some constant offsets into the \c Offset field. 826 827 bool Changed; 828 auto &Ctx = CheckCond->getContext(); 829 830 do { 831 Value *OpLHS; 832 ConstantInt *OpRHS; 833 Changed = false; 834 835 #ifndef NDEBUG 836 auto *BaseInst = dyn_cast<Instruction>(Check.getBase()); 837 assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && 838 "Unreachable instruction?"); 839 #endif 840 841 if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 842 Check.setBase(OpLHS); 843 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 844 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 845 Changed = true; 846 } else if (match(Check.getBase(), 847 m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { 848 KnownBits Known = computeKnownBits(OpLHS, DL); 849 if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { 850 Check.setBase(OpLHS); 851 APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); 852 Check.setOffset(ConstantInt::get(Ctx, NewOffset)); 853 Changed = true; 854 } 855 } 856 } while (Changed); 857 858 Checks.push_back(Check); 859 return true; 860 } 861 862 bool GuardWideningImpl::combineRangeChecks( 863 SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, 864 SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { 865 unsigned OldCount = Checks.size(); 866 while (!Checks.empty()) { 867 // Pick all of the range checks with a specific base and length, and try to 868 // merge them. 869 const Value *CurrentBase = Checks.front().getBase(); 870 const Value *CurrentLength = Checks.front().getLength(); 871 872 SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; 873 874 auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { 875 return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; 876 }; 877 878 copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck); 879 erase_if(Checks, IsCurrentCheck); 880 881 assert(CurrentChecks.size() != 0 && "We know we have at least one!"); 882 883 if (CurrentChecks.size() < 3) { 884 llvm::append_range(RangeChecksOut, CurrentChecks); 885 continue; 886 } 887 888 // CurrentChecks.size() will typically be 3 here, but so far there has been 889 // no need to hard-code that fact. 890 891 llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS, 892 const GuardWideningImpl::RangeCheck &RHS) { 893 return LHS.getOffsetValue().slt(RHS.getOffsetValue()); 894 }); 895 896 // Note: std::sort should not invalidate the ChecksStart iterator. 897 898 const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); 899 const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); 900 901 unsigned BitWidth = MaxOffset->getValue().getBitWidth(); 902 if ((MaxOffset->getValue() - MinOffset->getValue()) 903 .ugt(APInt::getSignedMinValue(BitWidth))) 904 return false; 905 906 APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); 907 const APInt &HighOffset = MaxOffset->getValue(); 908 auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { 909 return (HighOffset - RC.getOffsetValue()).ult(MaxDiff); 910 }; 911 912 if (MaxDiff.isMinValue() || !all_of(drop_begin(CurrentChecks), OffsetOK)) 913 return false; 914 915 // We have a series of f+1 checks as: 916 // 917 // I+k_0 u< L ... Chk_0 918 // I+k_1 u< L ... Chk_1 919 // ... 920 // I+k_f u< L ... Chk_f 921 // 922 // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 923 // k_f-k_0 u< INT_MIN+k_f ... Precond_1 924 // k_f != k_0 ... Precond_2 925 // 926 // Claim: 927 // Chk_0 AND Chk_f implies all the other checks 928 // 929 // Informal proof sketch: 930 // 931 // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap 932 // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and 933 // thus I+k_f is the greatest unsigned value in that range. 934 // 935 // This combined with Ckh_(f+1) shows that everything in that range is u< L. 936 // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) 937 // lie in [I+k_0,I+k_f], this proving our claim. 938 // 939 // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are 940 // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal 941 // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping 942 // range by definition, and the latter case is impossible: 943 // 944 // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) 945 // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 946 // 947 // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted 948 // with 'x' above) to be at least >u INT_MIN. 949 950 RangeChecksOut.emplace_back(CurrentChecks.front()); 951 RangeChecksOut.emplace_back(CurrentChecks.back()); 952 } 953 954 assert(RangeChecksOut.size() <= OldCount && "We pessimized!"); 955 return RangeChecksOut.size() != OldCount; 956 } 957 958 #ifndef NDEBUG 959 StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { 960 switch (WS) { 961 case WS_IllegalOrNegative: 962 return "IllegalOrNegative"; 963 case WS_Neutral: 964 return "Neutral"; 965 case WS_Positive: 966 return "Positive"; 967 case WS_VeryPositive: 968 return "VeryPositive"; 969 } 970 971 llvm_unreachable("Fully covered switch above!"); 972 } 973 #endif 974 975 PreservedAnalyses GuardWideningPass::run(Function &F, 976 FunctionAnalysisManager &AM) { 977 // Avoid requesting analyses if there are no guards or widenable conditions. 978 auto *GuardDecl = F.getParent()->getFunction( 979 Intrinsic::getName(Intrinsic::experimental_guard)); 980 bool HasIntrinsicGuards = GuardDecl && !GuardDecl->use_empty(); 981 auto *WCDecl = F.getParent()->getFunction( 982 Intrinsic::getName(Intrinsic::experimental_widenable_condition)); 983 bool HasWidenableConditions = WCDecl && !WCDecl->use_empty(); 984 if (!HasIntrinsicGuards && !HasWidenableConditions) 985 return PreservedAnalyses::all(); 986 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 987 auto &LI = AM.getResult<LoopAnalysis>(F); 988 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 989 auto &AC = AM.getResult<AssumptionAnalysis>(F); 990 auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F); 991 std::unique_ptr<MemorySSAUpdater> MSSAU; 992 if (MSSAA) 993 MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA()); 994 if (!GuardWideningImpl(DT, &PDT, LI, AC, MSSAU ? MSSAU.get() : nullptr, 995 DT.getRootNode(), [](BasicBlock *) { return true; }) 996 .run()) 997 return PreservedAnalyses::all(); 998 999 PreservedAnalyses PA; 1000 PA.preserveSet<CFGAnalyses>(); 1001 PA.preserve<MemorySSAAnalysis>(); 1002 return PA; 1003 } 1004 1005 PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, 1006 LoopStandardAnalysisResults &AR, 1007 LPMUpdater &U) { 1008 BasicBlock *RootBB = L.getLoopPredecessor(); 1009 if (!RootBB) 1010 RootBB = L.getHeader(); 1011 auto BlockFilter = [&](BasicBlock *BB) { 1012 return BB == RootBB || L.contains(BB); 1013 }; 1014 std::unique_ptr<MemorySSAUpdater> MSSAU; 1015 if (AR.MSSA) 1016 MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA); 1017 if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.AC, 1018 MSSAU ? MSSAU.get() : nullptr, AR.DT.getNode(RootBB), 1019 BlockFilter) 1020 .run()) 1021 return PreservedAnalyses::all(); 1022 1023 auto PA = getLoopPassPreservedAnalyses(); 1024 if (AR.MSSA) 1025 PA.preserve<MemorySSAAnalysis>(); 1026 return PA; 1027 } 1028