1 //===- BranchProbabilityInfo.cpp - Branch Probability Analysis ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Loops should be simplified before this analysis. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Analysis/BranchProbabilityInfo.h" 14 #include "llvm/ADT/PostOrderIterator.h" 15 #include "llvm/ADT/SCCIterator.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/Analysis/ConstantFolding.h" 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/PostDominators.h" 21 #include "llvm/Analysis/TargetLibraryInfo.h" 22 #include "llvm/IR/Attributes.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/CFG.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/InstrTypes.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/LLVMContext.h" 32 #include "llvm/IR/Metadata.h" 33 #include "llvm/IR/PassManager.h" 34 #include "llvm/IR/ProfDataUtils.h" 35 #include "llvm/IR/Type.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/InitializePasses.h" 38 #include "llvm/Pass.h" 39 #include "llvm/Support/BranchProbability.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/CommandLine.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/raw_ostream.h" 44 #include <cassert> 45 #include <cstdint> 46 #include <map> 47 #include <utility> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "branch-prob" 52 53 static cl::opt<bool> PrintBranchProb( 54 "print-bpi", cl::init(false), cl::Hidden, 55 cl::desc("Print the branch probability info.")); 56 57 static cl::opt<std::string> PrintBranchProbFuncName( 58 "print-bpi-func-name", cl::Hidden, 59 cl::desc("The option to specify the name of the function " 60 "whose branch probability info is printed.")); 61 62 INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob", 63 "Branch Probability Analysis", false, true) 64 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 65 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 66 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 67 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) 68 INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob", 69 "Branch Probability Analysis", false, true) 70 71 BranchProbabilityInfoWrapperPass::BranchProbabilityInfoWrapperPass() 72 : FunctionPass(ID) {} 73 74 char BranchProbabilityInfoWrapperPass::ID = 0; 75 76 // Weights are for internal use only. They are used by heuristics to help to 77 // estimate edges' probability. Example: 78 // 79 // Using "Loop Branch Heuristics" we predict weights of edges for the 80 // block BB2. 81 // ... 82 // | 83 // V 84 // BB1<-+ 85 // | | 86 // | | (Weight = 124) 87 // V | 88 // BB2--+ 89 // | 90 // | (Weight = 4) 91 // V 92 // BB3 93 // 94 // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 95 // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 96 static const uint32_t LBH_TAKEN_WEIGHT = 124; 97 static const uint32_t LBH_NONTAKEN_WEIGHT = 4; 98 99 /// Unreachable-terminating branch taken probability. 100 /// 101 /// This is the probability for a branch being taken to a block that terminates 102 /// (eventually) in unreachable. These are predicted as unlikely as possible. 103 /// All reachable probability will proportionally share the remaining part. 104 static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1); 105 106 /// Heuristics and lookup tables for non-loop branches: 107 /// Pointer Heuristics (PH) 108 static const uint32_t PH_TAKEN_WEIGHT = 20; 109 static const uint32_t PH_NONTAKEN_WEIGHT = 12; 110 static const BranchProbability 111 PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); 112 static const BranchProbability 113 PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); 114 115 using ProbabilityList = SmallVector<BranchProbability>; 116 using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>; 117 118 /// Pointer comparisons: 119 static const ProbabilityTable PointerTable{ 120 {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely 121 {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely 122 }; 123 124 /// Zero Heuristics (ZH) 125 static const uint32_t ZH_TAKEN_WEIGHT = 20; 126 static const uint32_t ZH_NONTAKEN_WEIGHT = 12; 127 static const BranchProbability 128 ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); 129 static const BranchProbability 130 ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); 131 132 /// Integer compares with 0: 133 static const ProbabilityTable ICmpWithZeroTable{ 134 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely 135 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely 136 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely 137 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely 138 }; 139 140 /// Integer compares with -1: 141 static const ProbabilityTable ICmpWithMinusOneTable{ 142 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely 143 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely 144 // InstCombine canonicalizes X >= 0 into X > -1 145 {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely 146 }; 147 148 /// Integer compares with 1: 149 static const ProbabilityTable ICmpWithOneTable{ 150 // InstCombine canonicalizes X <= 0 into X < 1 151 {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely 152 }; 153 154 /// strcmp and similar functions return zero, negative, or positive, if the 155 /// first string is equal, less, or greater than the second. We consider it 156 /// likely that the strings are not equal, so a comparison with zero is 157 /// probably false, but also a comparison with any other number is also 158 /// probably false given that what exactly is returned for nonzero values is 159 /// not specified. Any kind of comparison other than equality we know 160 /// nothing about. 161 static const ProbabilityTable ICmpWithLibCallTable{ 162 {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, 163 {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, 164 }; 165 166 // Floating-Point Heuristics (FPH) 167 static const uint32_t FPH_TAKEN_WEIGHT = 20; 168 static const uint32_t FPH_NONTAKEN_WEIGHT = 12; 169 170 /// This is the probability for an ordered floating point comparison. 171 static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1; 172 /// This is the probability for an unordered floating point comparison, it means 173 /// one or two of the operands are NaN. Usually it is used to test for an 174 /// exceptional case, so the result is unlikely. 175 static const uint32_t FPH_UNO_WEIGHT = 1; 176 177 static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT, 178 FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); 179 static const BranchProbability 180 FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT + FPH_UNO_WEIGHT); 181 static const BranchProbability 182 FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); 183 static const BranchProbability 184 FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); 185 186 /// Floating-Point compares: 187 static const ProbabilityTable FCmpTable{ 188 {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely 189 {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely 190 }; 191 192 /// Set of dedicated "absolute" execution weights for a block. These weights are 193 /// meaningful relative to each other and their derivatives only. 194 enum class BlockExecWeight : std::uint32_t { 195 /// Special weight used for cases with exact zero probability. 196 ZERO = 0x0, 197 /// Minimal possible non zero weight. 198 LOWEST_NON_ZERO = 0x1, 199 /// Weight to an 'unreachable' block. 200 UNREACHABLE = ZERO, 201 /// Weight to a block containing non returning call. 202 NORETURN = LOWEST_NON_ZERO, 203 /// Weight to 'unwind' block of an invoke instruction. 204 UNWIND = LOWEST_NON_ZERO, 205 /// Weight to a 'cold' block. Cold blocks are the ones containing calls marked 206 /// with attribute 'cold'. 207 COLD = 0xffff, 208 /// Default weight is used in cases when there is no dedicated execution 209 /// weight set. It is not propagated through the domination line either. 210 DEFAULT = 0xfffff 211 }; 212 213 BranchProbabilityInfo::SccInfo::SccInfo(const Function &F) { 214 // Record SCC numbers of blocks in the CFG to identify irreducible loops. 215 // FIXME: We could only calculate this if the CFG is known to be irreducible 216 // (perhaps cache this info in LoopInfo if we can easily calculate it there?). 217 int SccNum = 0; 218 for (scc_iterator<const Function *> It = scc_begin(&F); !It.isAtEnd(); 219 ++It, ++SccNum) { 220 // Ignore single-block SCCs since they either aren't loops or LoopInfo will 221 // catch them. 222 const std::vector<const BasicBlock *> &Scc = *It; 223 if (Scc.size() == 1) 224 continue; 225 226 LLVM_DEBUG(dbgs() << "BPI: SCC " << SccNum << ":"); 227 for (const auto *BB : Scc) { 228 LLVM_DEBUG(dbgs() << " " << BB->getName()); 229 SccNums[BB] = SccNum; 230 calculateSccBlockType(BB, SccNum); 231 } 232 LLVM_DEBUG(dbgs() << "\n"); 233 } 234 } 235 236 int BranchProbabilityInfo::SccInfo::getSCCNum(const BasicBlock *BB) const { 237 auto SccIt = SccNums.find(BB); 238 if (SccIt == SccNums.end()) 239 return -1; 240 return SccIt->second; 241 } 242 243 void BranchProbabilityInfo::SccInfo::getSccEnterBlocks( 244 int SccNum, SmallVectorImpl<BasicBlock *> &Enters) const { 245 246 for (auto MapIt : SccBlocks[SccNum]) { 247 const auto *BB = MapIt.first; 248 if (isSCCHeader(BB, SccNum)) 249 for (const auto *Pred : predecessors(BB)) 250 if (getSCCNum(Pred) != SccNum) 251 Enters.push_back(const_cast<BasicBlock *>(BB)); 252 } 253 } 254 255 void BranchProbabilityInfo::SccInfo::getSccExitBlocks( 256 int SccNum, SmallVectorImpl<BasicBlock *> &Exits) const { 257 for (auto MapIt : SccBlocks[SccNum]) { 258 const auto *BB = MapIt.first; 259 if (isSCCExitingBlock(BB, SccNum)) 260 for (const auto *Succ : successors(BB)) 261 if (getSCCNum(Succ) != SccNum) 262 Exits.push_back(const_cast<BasicBlock *>(Succ)); 263 } 264 } 265 266 uint32_t BranchProbabilityInfo::SccInfo::getSccBlockType(const BasicBlock *BB, 267 int SccNum) const { 268 assert(getSCCNum(BB) == SccNum); 269 270 assert(SccBlocks.size() > static_cast<unsigned>(SccNum) && "Unknown SCC"); 271 const auto &SccBlockTypes = SccBlocks[SccNum]; 272 273 auto It = SccBlockTypes.find(BB); 274 if (It != SccBlockTypes.end()) { 275 return It->second; 276 } 277 return Inner; 278 } 279 280 void BranchProbabilityInfo::SccInfo::calculateSccBlockType(const BasicBlock *BB, 281 int SccNum) { 282 assert(getSCCNum(BB) == SccNum); 283 uint32_t BlockType = Inner; 284 285 if (llvm::any_of(predecessors(BB), [&](const BasicBlock *Pred) { 286 // Consider any block that is an entry point to the SCC as 287 // a header. 288 return getSCCNum(Pred) != SccNum; 289 })) 290 BlockType |= Header; 291 292 if (llvm::any_of(successors(BB), [&](const BasicBlock *Succ) { 293 return getSCCNum(Succ) != SccNum; 294 })) 295 BlockType |= Exiting; 296 297 // Lazily compute the set of headers for a given SCC and cache the results 298 // in the SccHeaderMap. 299 if (SccBlocks.size() <= static_cast<unsigned>(SccNum)) 300 SccBlocks.resize(SccNum + 1); 301 auto &SccBlockTypes = SccBlocks[SccNum]; 302 303 if (BlockType != Inner) { 304 bool IsInserted; 305 std::tie(std::ignore, IsInserted) = 306 SccBlockTypes.insert(std::make_pair(BB, BlockType)); 307 assert(IsInserted && "Duplicated block in SCC"); 308 } 309 } 310 311 BranchProbabilityInfo::LoopBlock::LoopBlock(const BasicBlock *BB, 312 const LoopInfo &LI, 313 const SccInfo &SccI) 314 : BB(BB) { 315 LD.first = LI.getLoopFor(BB); 316 if (!LD.first) { 317 LD.second = SccI.getSCCNum(BB); 318 } 319 } 320 321 bool BranchProbabilityInfo::isLoopEnteringEdge(const LoopEdge &Edge) const { 322 const auto &SrcBlock = Edge.first; 323 const auto &DstBlock = Edge.second; 324 return (DstBlock.getLoop() && 325 !DstBlock.getLoop()->contains(SrcBlock.getLoop())) || 326 // Assume that SCCs can't be nested. 327 (DstBlock.getSccNum() != -1 && 328 SrcBlock.getSccNum() != DstBlock.getSccNum()); 329 } 330 331 bool BranchProbabilityInfo::isLoopExitingEdge(const LoopEdge &Edge) const { 332 return isLoopEnteringEdge({Edge.second, Edge.first}); 333 } 334 335 bool BranchProbabilityInfo::isLoopEnteringExitingEdge( 336 const LoopEdge &Edge) const { 337 return isLoopEnteringEdge(Edge) || isLoopExitingEdge(Edge); 338 } 339 340 bool BranchProbabilityInfo::isLoopBackEdge(const LoopEdge &Edge) const { 341 const auto &SrcBlock = Edge.first; 342 const auto &DstBlock = Edge.second; 343 return SrcBlock.belongsToSameLoop(DstBlock) && 344 ((DstBlock.getLoop() && 345 DstBlock.getLoop()->getHeader() == DstBlock.getBlock()) || 346 (DstBlock.getSccNum() != -1 && 347 SccI->isSCCHeader(DstBlock.getBlock(), DstBlock.getSccNum()))); 348 } 349 350 void BranchProbabilityInfo::getLoopEnterBlocks( 351 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Enters) const { 352 if (LB.getLoop()) { 353 auto *Header = LB.getLoop()->getHeader(); 354 Enters.append(pred_begin(Header), pred_end(Header)); 355 } else { 356 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?"); 357 SccI->getSccEnterBlocks(LB.getSccNum(), Enters); 358 } 359 } 360 361 void BranchProbabilityInfo::getLoopExitBlocks( 362 const LoopBlock &LB, SmallVectorImpl<BasicBlock *> &Exits) const { 363 if (LB.getLoop()) { 364 LB.getLoop()->getExitBlocks(Exits); 365 } else { 366 assert(LB.getSccNum() != -1 && "LB doesn't belong to any loop?"); 367 SccI->getSccExitBlocks(LB.getSccNum(), Exits); 368 } 369 } 370 371 // Propagate existing explicit probabilities from either profile data or 372 // 'expect' intrinsic processing. Examine metadata against unreachable 373 // heuristic. The probability of the edge coming to unreachable block is 374 // set to min of metadata and unreachable heuristic. 375 bool BranchProbabilityInfo::calcMetadataWeights(const BasicBlock *BB) { 376 const Instruction *TI = BB->getTerminator(); 377 assert(TI->getNumSuccessors() > 1 && "expected more than one successor!"); 378 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) || isa<IndirectBrInst>(TI) || 379 isa<InvokeInst>(TI) || isa<CallBrInst>(TI))) 380 return false; 381 382 MDNode *WeightsNode = getValidBranchWeightMDNode(*TI); 383 if (!WeightsNode) 384 return false; 385 386 // Check that the number of successors is manageable. 387 assert(TI->getNumSuccessors() < UINT32_MAX && "Too many successors"); 388 389 // Build up the final weights that will be used in a temporary buffer. 390 // Compute the sum of all weights to later decide whether they need to 391 // be scaled to fit in 32 bits. 392 uint64_t WeightSum = 0; 393 SmallVector<uint32_t, 2> Weights; 394 SmallVector<unsigned, 2> UnreachableIdxs; 395 SmallVector<unsigned, 2> ReachableIdxs; 396 397 extractBranchWeights(WeightsNode, Weights); 398 for (unsigned I = 0, E = Weights.size(); I != E; ++I) { 399 WeightSum += Weights[I]; 400 const LoopBlock SrcLoopBB = getLoopBlock(BB); 401 const LoopBlock DstLoopBB = getLoopBlock(TI->getSuccessor(I)); 402 auto EstimatedWeight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); 403 if (EstimatedWeight && 404 *EstimatedWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) 405 UnreachableIdxs.push_back(I); 406 else 407 ReachableIdxs.push_back(I); 408 } 409 assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); 410 411 // If the sum of weights does not fit in 32 bits, scale every weight down 412 // accordingly. 413 uint64_t ScalingFactor = 414 (WeightSum > UINT32_MAX) ? WeightSum / UINT32_MAX + 1 : 1; 415 416 if (ScalingFactor > 1) { 417 WeightSum = 0; 418 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { 419 Weights[I] /= ScalingFactor; 420 WeightSum += Weights[I]; 421 } 422 } 423 assert(WeightSum <= UINT32_MAX && 424 "Expected weights to scale down to 32 bits"); 425 426 if (WeightSum == 0 || ReachableIdxs.size() == 0) { 427 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) 428 Weights[I] = 1; 429 WeightSum = TI->getNumSuccessors(); 430 } 431 432 // Set the probability. 433 SmallVector<BranchProbability, 2> BP; 434 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) 435 BP.push_back({ Weights[I], static_cast<uint32_t>(WeightSum) }); 436 437 // Examine the metadata against unreachable heuristic. 438 // If the unreachable heuristic is more strong then we use it for this edge. 439 if (UnreachableIdxs.size() == 0 || ReachableIdxs.size() == 0) { 440 setEdgeProbability(BB, BP); 441 return true; 442 } 443 444 auto UnreachableProb = UR_TAKEN_PROB; 445 for (auto I : UnreachableIdxs) 446 if (UnreachableProb < BP[I]) { 447 BP[I] = UnreachableProb; 448 } 449 450 // Sum of all edge probabilities must be 1.0. If we modified the probability 451 // of some edges then we must distribute the introduced difference over the 452 // reachable blocks. 453 // 454 // Proportional distribution: the relation between probabilities of the 455 // reachable edges is kept unchanged. That is for any reachable edges i and j: 456 // newBP[i] / newBP[j] == oldBP[i] / oldBP[j] => 457 // newBP[i] / oldBP[i] == newBP[j] / oldBP[j] == K 458 // Where K is independent of i,j. 459 // newBP[i] == oldBP[i] * K 460 // We need to find K. 461 // Make sum of all reachables of the left and right parts: 462 // sum_of_reachable(newBP) == K * sum_of_reachable(oldBP) 463 // Sum of newBP must be equal to 1.0: 464 // sum_of_reachable(newBP) + sum_of_unreachable(newBP) == 1.0 => 465 // sum_of_reachable(newBP) = 1.0 - sum_of_unreachable(newBP) 466 // Where sum_of_unreachable(newBP) is what has been just changed. 467 // Finally: 468 // K == sum_of_reachable(newBP) / sum_of_reachable(oldBP) => 469 // K == (1.0 - sum_of_unreachable(newBP)) / sum_of_reachable(oldBP) 470 BranchProbability NewUnreachableSum = BranchProbability::getZero(); 471 for (auto I : UnreachableIdxs) 472 NewUnreachableSum += BP[I]; 473 474 BranchProbability NewReachableSum = 475 BranchProbability::getOne() - NewUnreachableSum; 476 477 BranchProbability OldReachableSum = BranchProbability::getZero(); 478 for (auto I : ReachableIdxs) 479 OldReachableSum += BP[I]; 480 481 if (OldReachableSum != NewReachableSum) { // Anything to dsitribute? 482 if (OldReachableSum.isZero()) { 483 // If all oldBP[i] are zeroes then the proportional distribution results 484 // in all zero probabilities and the error stays big. In this case we 485 // evenly spread NewReachableSum over the reachable edges. 486 BranchProbability PerEdge = NewReachableSum / ReachableIdxs.size(); 487 for (auto I : ReachableIdxs) 488 BP[I] = PerEdge; 489 } else { 490 for (auto I : ReachableIdxs) { 491 // We use uint64_t to avoid double rounding error of the following 492 // calculation: BP[i] = BP[i] * NewReachableSum / OldReachableSum 493 // The formula is taken from the private constructor 494 // BranchProbability(uint32_t Numerator, uint32_t Denominator) 495 uint64_t Mul = static_cast<uint64_t>(NewReachableSum.getNumerator()) * 496 BP[I].getNumerator(); 497 uint32_t Div = static_cast<uint32_t>( 498 divideNearest(Mul, OldReachableSum.getNumerator())); 499 BP[I] = BranchProbability::getRaw(Div); 500 } 501 } 502 } 503 504 setEdgeProbability(BB, BP); 505 506 return true; 507 } 508 509 // Calculate Edge Weights using "Pointer Heuristics". Predict a comparison 510 // between two pointer or pointer and NULL will fail. 511 bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) { 512 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 513 if (!BI || !BI->isConditional()) 514 return false; 515 516 Value *Cond = BI->getCondition(); 517 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 518 if (!CI || !CI->isEquality()) 519 return false; 520 521 Value *LHS = CI->getOperand(0); 522 523 if (!LHS->getType()->isPointerTy()) 524 return false; 525 526 assert(CI->getOperand(1)->getType()->isPointerTy()); 527 528 auto Search = PointerTable.find(CI->getPredicate()); 529 if (Search == PointerTable.end()) 530 return false; 531 setEdgeProbability(BB, Search->second); 532 return true; 533 } 534 535 // Compute the unlikely successors to the block BB in the loop L, specifically 536 // those that are unlikely because this is a loop, and add them to the 537 // UnlikelyBlocks set. 538 static void 539 computeUnlikelySuccessors(const BasicBlock *BB, Loop *L, 540 SmallPtrSetImpl<const BasicBlock*> &UnlikelyBlocks) { 541 // Sometimes in a loop we have a branch whose condition is made false by 542 // taking it. This is typically something like 543 // int n = 0; 544 // while (...) { 545 // if (++n >= MAX) { 546 // n = 0; 547 // } 548 // } 549 // In this sort of situation taking the branch means that at the very least it 550 // won't be taken again in the next iteration of the loop, so we should 551 // consider it less likely than a typical branch. 552 // 553 // We detect this by looking back through the graph of PHI nodes that sets the 554 // value that the condition depends on, and seeing if we can reach a successor 555 // block which can be determined to make the condition false. 556 // 557 // FIXME: We currently consider unlikely blocks to be half as likely as other 558 // blocks, but if we consider the example above the likelyhood is actually 559 // 1/MAX. We could therefore be more precise in how unlikely we consider 560 // blocks to be, but it would require more careful examination of the form 561 // of the comparison expression. 562 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 563 if (!BI || !BI->isConditional()) 564 return; 565 566 // Check if the branch is based on an instruction compared with a constant 567 CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); 568 if (!CI || !isa<Instruction>(CI->getOperand(0)) || 569 !isa<Constant>(CI->getOperand(1))) 570 return; 571 572 // Either the instruction must be a PHI, or a chain of operations involving 573 // constants that ends in a PHI which we can then collapse into a single value 574 // if the PHI value is known. 575 Instruction *CmpLHS = dyn_cast<Instruction>(CI->getOperand(0)); 576 PHINode *CmpPHI = dyn_cast<PHINode>(CmpLHS); 577 Constant *CmpConst = dyn_cast<Constant>(CI->getOperand(1)); 578 // Collect the instructions until we hit a PHI 579 SmallVector<BinaryOperator *, 1> InstChain; 580 while (!CmpPHI && CmpLHS && isa<BinaryOperator>(CmpLHS) && 581 isa<Constant>(CmpLHS->getOperand(1))) { 582 // Stop if the chain extends outside of the loop 583 if (!L->contains(CmpLHS)) 584 return; 585 InstChain.push_back(cast<BinaryOperator>(CmpLHS)); 586 CmpLHS = dyn_cast<Instruction>(CmpLHS->getOperand(0)); 587 if (CmpLHS) 588 CmpPHI = dyn_cast<PHINode>(CmpLHS); 589 } 590 if (!CmpPHI || !L->contains(CmpPHI)) 591 return; 592 593 // Trace the phi node to find all values that come from successors of BB 594 SmallPtrSet<PHINode*, 8> VisitedInsts; 595 SmallVector<PHINode*, 8> WorkList; 596 WorkList.push_back(CmpPHI); 597 VisitedInsts.insert(CmpPHI); 598 while (!WorkList.empty()) { 599 PHINode *P = WorkList.pop_back_val(); 600 for (BasicBlock *B : P->blocks()) { 601 // Skip blocks that aren't part of the loop 602 if (!L->contains(B)) 603 continue; 604 Value *V = P->getIncomingValueForBlock(B); 605 // If the source is a PHI add it to the work list if we haven't 606 // already visited it. 607 if (PHINode *PN = dyn_cast<PHINode>(V)) { 608 if (VisitedInsts.insert(PN).second) 609 WorkList.push_back(PN); 610 continue; 611 } 612 // If this incoming value is a constant and B is a successor of BB, then 613 // we can constant-evaluate the compare to see if it makes the branch be 614 // taken or not. 615 Constant *CmpLHSConst = dyn_cast<Constant>(V); 616 if (!CmpLHSConst || !llvm::is_contained(successors(BB), B)) 617 continue; 618 // First collapse InstChain 619 const DataLayout &DL = BB->getDataLayout(); 620 for (Instruction *I : llvm::reverse(InstChain)) { 621 CmpLHSConst = ConstantFoldBinaryOpOperands( 622 I->getOpcode(), CmpLHSConst, cast<Constant>(I->getOperand(1)), DL); 623 if (!CmpLHSConst) 624 break; 625 } 626 if (!CmpLHSConst) 627 continue; 628 // Now constant-evaluate the compare 629 Constant *Result = ConstantFoldCompareInstOperands( 630 CI->getPredicate(), CmpLHSConst, CmpConst, DL); 631 // If the result means we don't branch to the block then that block is 632 // unlikely. 633 if (Result && 634 ((Result->isZeroValue() && B == BI->getSuccessor(0)) || 635 (Result->isOneValue() && B == BI->getSuccessor(1)))) 636 UnlikelyBlocks.insert(B); 637 } 638 } 639 } 640 641 std::optional<uint32_t> 642 BranchProbabilityInfo::getEstimatedBlockWeight(const BasicBlock *BB) const { 643 auto WeightIt = EstimatedBlockWeight.find(BB); 644 if (WeightIt == EstimatedBlockWeight.end()) 645 return std::nullopt; 646 return WeightIt->second; 647 } 648 649 std::optional<uint32_t> 650 BranchProbabilityInfo::getEstimatedLoopWeight(const LoopData &L) const { 651 auto WeightIt = EstimatedLoopWeight.find(L); 652 if (WeightIt == EstimatedLoopWeight.end()) 653 return std::nullopt; 654 return WeightIt->second; 655 } 656 657 std::optional<uint32_t> 658 BranchProbabilityInfo::getEstimatedEdgeWeight(const LoopEdge &Edge) const { 659 // For edges entering a loop take weight of a loop rather than an individual 660 // block in the loop. 661 return isLoopEnteringEdge(Edge) 662 ? getEstimatedLoopWeight(Edge.second.getLoopData()) 663 : getEstimatedBlockWeight(Edge.second.getBlock()); 664 } 665 666 template <class IterT> 667 std::optional<uint32_t> BranchProbabilityInfo::getMaxEstimatedEdgeWeight( 668 const LoopBlock &SrcLoopBB, iterator_range<IterT> Successors) const { 669 std::optional<uint32_t> MaxWeight; 670 for (const BasicBlock *DstBB : Successors) { 671 const LoopBlock DstLoopBB = getLoopBlock(DstBB); 672 auto Weight = getEstimatedEdgeWeight({SrcLoopBB, DstLoopBB}); 673 674 if (!Weight) 675 return std::nullopt; 676 677 if (!MaxWeight || *MaxWeight < *Weight) 678 MaxWeight = Weight; 679 } 680 681 return MaxWeight; 682 } 683 684 // Updates \p LoopBB's weight and returns true. If \p LoopBB has already 685 // an associated weight it is unchanged and false is returned. 686 // 687 // Please note by the algorithm the weight is not expected to change once set 688 // thus 'false' status is used to track visited blocks. 689 bool BranchProbabilityInfo::updateEstimatedBlockWeight( 690 LoopBlock &LoopBB, uint32_t BBWeight, 691 SmallVectorImpl<BasicBlock *> &BlockWorkList, 692 SmallVectorImpl<LoopBlock> &LoopWorkList) { 693 BasicBlock *BB = LoopBB.getBlock(); 694 695 // In general, weight is assigned to a block when it has final value and 696 // can't/shouldn't be changed. However, there are cases when a block 697 // inherently has several (possibly "contradicting") weights. For example, 698 // "unwind" block may also contain "cold" call. In that case the first 699 // set weight is favored and all consequent weights are ignored. 700 if (!EstimatedBlockWeight.insert({BB, BBWeight}).second) 701 return false; 702 703 for (BasicBlock *PredBlock : predecessors(BB)) { 704 LoopBlock PredLoop = getLoopBlock(PredBlock); 705 // Add affected block/loop to a working list. 706 if (isLoopExitingEdge({PredLoop, LoopBB})) { 707 if (!EstimatedLoopWeight.count(PredLoop.getLoopData())) 708 LoopWorkList.push_back(PredLoop); 709 } else if (!EstimatedBlockWeight.count(PredBlock)) 710 BlockWorkList.push_back(PredBlock); 711 } 712 return true; 713 } 714 715 // Starting from \p BB traverse through dominator blocks and assign \p BBWeight 716 // to all such blocks that are post dominated by \BB. In other words to all 717 // blocks that the one is executed if and only if another one is executed. 718 // Importantly, we skip loops here for two reasons. First weights of blocks in 719 // a loop should be scaled by trip count (yet possibly unknown). Second there is 720 // no any value in doing that because that doesn't give any additional 721 // information regarding distribution of probabilities inside the loop. 722 // Exception is loop 'enter' and 'exit' edges that are handled in a special way 723 // at calcEstimatedHeuristics. 724 // 725 // In addition, \p WorkList is populated with basic blocks if at leas one 726 // successor has updated estimated weight. 727 void BranchProbabilityInfo::propagateEstimatedBlockWeight( 728 const LoopBlock &LoopBB, DominatorTree *DT, PostDominatorTree *PDT, 729 uint32_t BBWeight, SmallVectorImpl<BasicBlock *> &BlockWorkList, 730 SmallVectorImpl<LoopBlock> &LoopWorkList) { 731 const BasicBlock *BB = LoopBB.getBlock(); 732 const auto *DTStartNode = DT->getNode(BB); 733 const auto *PDTStartNode = PDT->getNode(BB); 734 735 // TODO: Consider propagating weight down the domination line as well. 736 for (const auto *DTNode = DTStartNode; DTNode != nullptr; 737 DTNode = DTNode->getIDom()) { 738 auto *DomBB = DTNode->getBlock(); 739 // Consider blocks which lie on one 'line'. 740 if (!PDT->dominates(PDTStartNode, PDT->getNode(DomBB))) 741 // If BB doesn't post dominate DomBB it will not post dominate dominators 742 // of DomBB as well. 743 break; 744 745 LoopBlock DomLoopBB = getLoopBlock(DomBB); 746 const LoopEdge Edge{DomLoopBB, LoopBB}; 747 // Don't propagate weight to blocks belonging to different loops. 748 if (!isLoopEnteringExitingEdge(Edge)) { 749 if (!updateEstimatedBlockWeight(DomLoopBB, BBWeight, BlockWorkList, 750 LoopWorkList)) 751 // If DomBB has weight set then all it's predecessors are already 752 // processed (since we propagate weight up to the top of IR each time). 753 break; 754 } else if (isLoopExitingEdge(Edge)) { 755 LoopWorkList.push_back(DomLoopBB); 756 } 757 } 758 } 759 760 std::optional<uint32_t> 761 BranchProbabilityInfo::getInitialEstimatedBlockWeight(const BasicBlock *BB) { 762 // Returns true if \p BB has call marked with "NoReturn" attribute. 763 auto hasNoReturn = [&](const BasicBlock *BB) { 764 for (const auto &I : reverse(*BB)) 765 if (const CallInst *CI = dyn_cast<CallInst>(&I)) 766 if (CI->hasFnAttr(Attribute::NoReturn)) 767 return true; 768 769 return false; 770 }; 771 772 // Important note regarding the order of checks. They are ordered by weight 773 // from lowest to highest. Doing that allows to avoid "unstable" results 774 // when several conditions heuristics can be applied simultaneously. 775 if (isa<UnreachableInst>(BB->getTerminator()) || 776 // If this block is terminated by a call to 777 // @llvm.experimental.deoptimize then treat it like an unreachable 778 // since it is expected to practically never execute. 779 // TODO: Should we actually treat as never returning call? 780 BB->getTerminatingDeoptimizeCall()) 781 return hasNoReturn(BB) 782 ? static_cast<uint32_t>(BlockExecWeight::NORETURN) 783 : static_cast<uint32_t>(BlockExecWeight::UNREACHABLE); 784 785 // Check if the block is an exception handling block. 786 if (BB->isEHPad()) 787 return static_cast<uint32_t>(BlockExecWeight::UNWIND); 788 789 // Check if the block contains 'cold' call. 790 for (const auto &I : *BB) 791 if (const CallInst *CI = dyn_cast<CallInst>(&I)) 792 if (CI->hasFnAttr(Attribute::Cold)) 793 return static_cast<uint32_t>(BlockExecWeight::COLD); 794 795 return std::nullopt; 796 } 797 798 // Does RPO traversal over all blocks in \p F and assigns weights to 799 // 'unreachable', 'noreturn', 'cold', 'unwind' blocks. In addition it does its 800 // best to propagate the weight to up/down the IR. 801 void BranchProbabilityInfo::estimateBlockWeights(const Function &F, 802 DominatorTree *DT, 803 PostDominatorTree *PDT) { 804 SmallVector<BasicBlock *, 8> BlockWorkList; 805 SmallVector<LoopBlock, 8> LoopWorkList; 806 SmallDenseMap<LoopData, SmallVector<BasicBlock *, 4>> LoopExitBlocks; 807 808 // By doing RPO we make sure that all predecessors already have weights 809 // calculated before visiting theirs successors. 810 ReversePostOrderTraversal<const Function *> RPOT(&F); 811 for (const auto *BB : RPOT) 812 if (auto BBWeight = getInitialEstimatedBlockWeight(BB)) 813 // If we were able to find estimated weight for the block set it to this 814 // block and propagate up the IR. 815 propagateEstimatedBlockWeight(getLoopBlock(BB), DT, PDT, *BBWeight, 816 BlockWorkList, LoopWorkList); 817 818 // BlockWorklist/LoopWorkList contains blocks/loops with at least one 819 // successor/exit having estimated weight. Try to propagate weight to such 820 // blocks/loops from successors/exits. 821 // Process loops and blocks. Order is not important. 822 do { 823 while (!LoopWorkList.empty()) { 824 const LoopBlock LoopBB = LoopWorkList.pop_back_val(); 825 const LoopData LD = LoopBB.getLoopData(); 826 if (EstimatedLoopWeight.count(LD)) 827 continue; 828 829 auto Res = LoopExitBlocks.try_emplace(LD); 830 SmallVectorImpl<BasicBlock *> &Exits = Res.first->second; 831 if (Res.second) 832 getLoopExitBlocks(LoopBB, Exits); 833 auto LoopWeight = getMaxEstimatedEdgeWeight( 834 LoopBB, make_range(Exits.begin(), Exits.end())); 835 836 if (LoopWeight) { 837 // If we never exit the loop then we can enter it once at maximum. 838 if (LoopWeight <= static_cast<uint32_t>(BlockExecWeight::UNREACHABLE)) 839 LoopWeight = static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); 840 841 EstimatedLoopWeight.insert({LD, *LoopWeight}); 842 // Add all blocks entering the loop into working list. 843 getLoopEnterBlocks(LoopBB, BlockWorkList); 844 } 845 } 846 847 while (!BlockWorkList.empty()) { 848 // We can reach here only if BlockWorkList is not empty. 849 const BasicBlock *BB = BlockWorkList.pop_back_val(); 850 if (EstimatedBlockWeight.count(BB)) 851 continue; 852 853 // We take maximum over all weights of successors. In other words we take 854 // weight of "hot" path. In theory we can probably find a better function 855 // which gives higher accuracy results (comparing to "maximum") but I 856 // can't 857 // think of any right now. And I doubt it will make any difference in 858 // practice. 859 const LoopBlock LoopBB = getLoopBlock(BB); 860 auto MaxWeight = getMaxEstimatedEdgeWeight(LoopBB, successors(BB)); 861 862 if (MaxWeight) 863 propagateEstimatedBlockWeight(LoopBB, DT, PDT, *MaxWeight, 864 BlockWorkList, LoopWorkList); 865 } 866 } while (!BlockWorkList.empty() || !LoopWorkList.empty()); 867 } 868 869 // Calculate edge probabilities based on block's estimated weight. 870 // Note that gathered weights were not scaled for loops. Thus edges entering 871 // and exiting loops requires special processing. 872 bool BranchProbabilityInfo::calcEstimatedHeuristics(const BasicBlock *BB) { 873 assert(BB->getTerminator()->getNumSuccessors() > 1 && 874 "expected more than one successor!"); 875 876 const LoopBlock LoopBB = getLoopBlock(BB); 877 878 SmallPtrSet<const BasicBlock *, 8> UnlikelyBlocks; 879 uint32_t TC = LBH_TAKEN_WEIGHT / LBH_NONTAKEN_WEIGHT; 880 if (LoopBB.getLoop()) 881 computeUnlikelySuccessors(BB, LoopBB.getLoop(), UnlikelyBlocks); 882 883 // Changed to 'true' if at least one successor has estimated weight. 884 bool FoundEstimatedWeight = false; 885 SmallVector<uint32_t, 4> SuccWeights; 886 uint64_t TotalWeight = 0; 887 // Go over all successors of BB and put their weights into SuccWeights. 888 for (const BasicBlock *SuccBB : successors(BB)) { 889 std::optional<uint32_t> Weight; 890 const LoopBlock SuccLoopBB = getLoopBlock(SuccBB); 891 const LoopEdge Edge{LoopBB, SuccLoopBB}; 892 893 Weight = getEstimatedEdgeWeight(Edge); 894 895 if (isLoopExitingEdge(Edge) && 896 // Avoid adjustment of ZERO weight since it should remain unchanged. 897 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { 898 // Scale down loop exiting weight by trip count. 899 Weight = std::max( 900 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), 901 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 902 TC); 903 } 904 bool IsUnlikelyEdge = LoopBB.getLoop() && UnlikelyBlocks.contains(SuccBB); 905 if (IsUnlikelyEdge && 906 // Avoid adjustment of ZERO weight since it should remain unchanged. 907 Weight != static_cast<uint32_t>(BlockExecWeight::ZERO)) { 908 // 'Unlikely' blocks have twice lower weight. 909 Weight = std::max( 910 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO), 911 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)) / 2); 912 } 913 914 if (Weight) 915 FoundEstimatedWeight = true; 916 917 auto WeightVal = 918 Weight.value_or(static_cast<uint32_t>(BlockExecWeight::DEFAULT)); 919 TotalWeight += WeightVal; 920 SuccWeights.push_back(WeightVal); 921 } 922 923 // If non of blocks have estimated weight bail out. 924 // If TotalWeight is 0 that means weight of each successor is 0 as well and 925 // equally likely. Bail out early to not deal with devision by zero. 926 if (!FoundEstimatedWeight || TotalWeight == 0) 927 return false; 928 929 assert(SuccWeights.size() == succ_size(BB) && "Missed successor?"); 930 const unsigned SuccCount = SuccWeights.size(); 931 932 // If the sum of weights does not fit in 32 bits, scale every weight down 933 // accordingly. 934 if (TotalWeight > UINT32_MAX) { 935 uint64_t ScalingFactor = TotalWeight / UINT32_MAX + 1; 936 TotalWeight = 0; 937 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { 938 SuccWeights[Idx] /= ScalingFactor; 939 if (SuccWeights[Idx] == static_cast<uint32_t>(BlockExecWeight::ZERO)) 940 SuccWeights[Idx] = 941 static_cast<uint32_t>(BlockExecWeight::LOWEST_NON_ZERO); 942 TotalWeight += SuccWeights[Idx]; 943 } 944 assert(TotalWeight <= UINT32_MAX && "Total weight overflows"); 945 } 946 947 // Finally set probabilities to edges according to estimated block weights. 948 SmallVector<BranchProbability, 4> EdgeProbabilities( 949 SuccCount, BranchProbability::getUnknown()); 950 951 for (unsigned Idx = 0; Idx < SuccCount; ++Idx) { 952 EdgeProbabilities[Idx] = 953 BranchProbability(SuccWeights[Idx], (uint32_t)TotalWeight); 954 } 955 setEdgeProbability(BB, EdgeProbabilities); 956 return true; 957 } 958 959 bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, 960 const TargetLibraryInfo *TLI) { 961 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 962 if (!BI || !BI->isConditional()) 963 return false; 964 965 Value *Cond = BI->getCondition(); 966 ICmpInst *CI = dyn_cast<ICmpInst>(Cond); 967 if (!CI) 968 return false; 969 970 auto GetConstantInt = [](Value *V) { 971 if (auto *I = dyn_cast<BitCastInst>(V)) 972 return dyn_cast<ConstantInt>(I->getOperand(0)); 973 return dyn_cast<ConstantInt>(V); 974 }; 975 976 Value *RHS = CI->getOperand(1); 977 ConstantInt *CV = GetConstantInt(RHS); 978 if (!CV) 979 return false; 980 981 // If the LHS is the result of AND'ing a value with a single bit bitmask, 982 // we don't have information about probabilities. 983 if (Instruction *LHS = dyn_cast<Instruction>(CI->getOperand(0))) 984 if (LHS->getOpcode() == Instruction::And) 985 if (ConstantInt *AndRHS = GetConstantInt(LHS->getOperand(1))) 986 if (AndRHS->getValue().isPowerOf2()) 987 return false; 988 989 // Check if the LHS is the return value of a library function 990 LibFunc Func = NumLibFuncs; 991 if (TLI) 992 if (CallInst *Call = dyn_cast<CallInst>(CI->getOperand(0))) 993 if (Function *CalledFn = Call->getCalledFunction()) 994 TLI->getLibFunc(*CalledFn, Func); 995 996 ProbabilityTable::const_iterator Search; 997 if (Func == LibFunc_strcasecmp || 998 Func == LibFunc_strcmp || 999 Func == LibFunc_strncasecmp || 1000 Func == LibFunc_strncmp || 1001 Func == LibFunc_memcmp || 1002 Func == LibFunc_bcmp) { 1003 Search = ICmpWithLibCallTable.find(CI->getPredicate()); 1004 if (Search == ICmpWithLibCallTable.end()) 1005 return false; 1006 } else if (CV->isZero()) { 1007 Search = ICmpWithZeroTable.find(CI->getPredicate()); 1008 if (Search == ICmpWithZeroTable.end()) 1009 return false; 1010 } else if (CV->isOne()) { 1011 Search = ICmpWithOneTable.find(CI->getPredicate()); 1012 if (Search == ICmpWithOneTable.end()) 1013 return false; 1014 } else if (CV->isMinusOne()) { 1015 Search = ICmpWithMinusOneTable.find(CI->getPredicate()); 1016 if (Search == ICmpWithMinusOneTable.end()) 1017 return false; 1018 } else { 1019 return false; 1020 } 1021 1022 setEdgeProbability(BB, Search->second); 1023 return true; 1024 } 1025 1026 bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) { 1027 const BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 1028 if (!BI || !BI->isConditional()) 1029 return false; 1030 1031 Value *Cond = BI->getCondition(); 1032 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond); 1033 if (!FCmp) 1034 return false; 1035 1036 ProbabilityList ProbList; 1037 if (FCmp->isEquality()) { 1038 ProbList = !FCmp->isTrueWhenEqual() ? 1039 // f1 == f2 -> Unlikely 1040 ProbabilityList({FPTakenProb, FPUntakenProb}) : 1041 // f1 != f2 -> Likely 1042 ProbabilityList({FPUntakenProb, FPTakenProb}); 1043 } else { 1044 auto Search = FCmpTable.find(FCmp->getPredicate()); 1045 if (Search == FCmpTable.end()) 1046 return false; 1047 ProbList = Search->second; 1048 } 1049 1050 setEdgeProbability(BB, ProbList); 1051 return true; 1052 } 1053 1054 void BranchProbabilityInfo::releaseMemory() { 1055 Probs.clear(); 1056 Handles.clear(); 1057 } 1058 1059 bool BranchProbabilityInfo::invalidate(Function &, const PreservedAnalyses &PA, 1060 FunctionAnalysisManager::Invalidator &) { 1061 // Check whether the analysis, all analyses on functions, or the function's 1062 // CFG have been preserved. 1063 auto PAC = PA.getChecker<BranchProbabilityAnalysis>(); 1064 return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() || 1065 PAC.preservedSet<CFGAnalyses>()); 1066 } 1067 1068 void BranchProbabilityInfo::print(raw_ostream &OS) const { 1069 OS << "---- Branch Probabilities ----\n"; 1070 // We print the probabilities from the last function the analysis ran over, 1071 // or the function it is currently running over. 1072 assert(LastF && "Cannot print prior to running over a function"); 1073 for (const auto &BI : *LastF) { 1074 for (const BasicBlock *Succ : successors(&BI)) 1075 printEdgeProbability(OS << " ", &BI, Succ); 1076 } 1077 } 1078 1079 bool BranchProbabilityInfo:: 1080 isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { 1081 // Hot probability is at least 4/5 = 80% 1082 // FIXME: Compare against a static "hot" BranchProbability. 1083 return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); 1084 } 1085 1086 /// Get the raw edge probability for the edge. If can't find it, return a 1087 /// default probability 1/N where N is the number of successors. Here an edge is 1088 /// specified using PredBlock and an 1089 /// index to the successors. 1090 BranchProbability 1091 BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, 1092 unsigned IndexInSuccessors) const { 1093 auto I = Probs.find(std::make_pair(Src, IndexInSuccessors)); 1094 assert((Probs.end() == Probs.find(std::make_pair(Src, 0))) == 1095 (Probs.end() == I) && 1096 "Probability for I-th successor must always be defined along with the " 1097 "probability for the first successor"); 1098 1099 if (I != Probs.end()) 1100 return I->second; 1101 1102 return {1, static_cast<uint32_t>(succ_size(Src))}; 1103 } 1104 1105 BranchProbability 1106 BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, 1107 const_succ_iterator Dst) const { 1108 return getEdgeProbability(Src, Dst.getSuccessorIndex()); 1109 } 1110 1111 /// Get the raw edge probability calculated for the block pair. This returns the 1112 /// sum of all raw edge probabilities from Src to Dst. 1113 BranchProbability 1114 BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, 1115 const BasicBlock *Dst) const { 1116 if (!Probs.count(std::make_pair(Src, 0))) 1117 return BranchProbability(llvm::count(successors(Src), Dst), succ_size(Src)); 1118 1119 auto Prob = BranchProbability::getZero(); 1120 for (const_succ_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I) 1121 if (*I == Dst) 1122 Prob += Probs.find(std::make_pair(Src, I.getSuccessorIndex()))->second; 1123 1124 return Prob; 1125 } 1126 1127 /// Set the edge probability for all edges at once. 1128 void BranchProbabilityInfo::setEdgeProbability( 1129 const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs) { 1130 assert(Src->getTerminator()->getNumSuccessors() == Probs.size()); 1131 eraseBlock(Src); // Erase stale data if any. 1132 if (Probs.size() == 0) 1133 return; // Nothing to set. 1134 1135 Handles.insert(BasicBlockCallbackVH(Src, this)); 1136 uint64_t TotalNumerator = 0; 1137 for (unsigned SuccIdx = 0; SuccIdx < Probs.size(); ++SuccIdx) { 1138 this->Probs[std::make_pair(Src, SuccIdx)] = Probs[SuccIdx]; 1139 LLVM_DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << SuccIdx 1140 << " successor probability to " << Probs[SuccIdx] 1141 << "\n"); 1142 TotalNumerator += Probs[SuccIdx].getNumerator(); 1143 } 1144 1145 // Because of rounding errors the total probability cannot be checked to be 1146 // 1.0 exactly. That is TotalNumerator == BranchProbability::getDenominator. 1147 // Instead, every single probability in Probs must be as accurate as possible. 1148 // This results in error 1/denominator at most, thus the total absolute error 1149 // should be within Probs.size / BranchProbability::getDenominator. 1150 assert(TotalNumerator <= BranchProbability::getDenominator() + Probs.size()); 1151 assert(TotalNumerator >= BranchProbability::getDenominator() - Probs.size()); 1152 (void)TotalNumerator; 1153 } 1154 1155 void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src, 1156 BasicBlock *Dst) { 1157 eraseBlock(Dst); // Erase stale data if any. 1158 unsigned NumSuccessors = Src->getTerminator()->getNumSuccessors(); 1159 assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors()); 1160 if (NumSuccessors == 0) 1161 return; // Nothing to set. 1162 if (!this->Probs.contains(std::make_pair(Src, 0))) 1163 return; // No probability is set for edges from Src. Keep the same for Dst. 1164 1165 Handles.insert(BasicBlockCallbackVH(Dst, this)); 1166 for (unsigned SuccIdx = 0; SuccIdx < NumSuccessors; ++SuccIdx) { 1167 auto Prob = this->Probs[std::make_pair(Src, SuccIdx)]; 1168 this->Probs[std::make_pair(Dst, SuccIdx)] = Prob; 1169 LLVM_DEBUG(dbgs() << "set edge " << Dst->getName() << " -> " << SuccIdx 1170 << " successor probability to " << Prob << "\n"); 1171 } 1172 } 1173 1174 void BranchProbabilityInfo::swapSuccEdgesProbabilities(const BasicBlock *Src) { 1175 assert(Src->getTerminator()->getNumSuccessors() == 2); 1176 auto It0 = Probs.find(std::make_pair(Src, 0)); 1177 if (It0 == Probs.end()) 1178 return; // No probability is set for edges from Src 1179 auto It1 = Probs.find(std::make_pair(Src, 1)); 1180 assert(It1 != Probs.end()); 1181 std::swap(It0->second, It1->second); 1182 } 1183 1184 raw_ostream & 1185 BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, 1186 const BasicBlock *Src, 1187 const BasicBlock *Dst) const { 1188 const BranchProbability Prob = getEdgeProbability(Src, Dst); 1189 OS << "edge "; 1190 Src->printAsOperand(OS, false, Src->getModule()); 1191 OS << " -> "; 1192 Dst->printAsOperand(OS, false, Dst->getModule()); 1193 OS << " probability is " << Prob 1194 << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); 1195 1196 return OS; 1197 } 1198 1199 void BranchProbabilityInfo::eraseBlock(const BasicBlock *BB) { 1200 LLVM_DEBUG(dbgs() << "eraseBlock " << BB->getName() << "\n"); 1201 1202 // Note that we cannot use successors of BB because the terminator of BB may 1203 // have changed when eraseBlock is called as a BasicBlockCallbackVH callback. 1204 // Instead we remove prob data for the block by iterating successors by their 1205 // indices from 0 till the last which exists. There could not be prob data for 1206 // a pair (BB, N) if there is no data for (BB, N-1) because the data is always 1207 // set for all successors from 0 to M at once by the method 1208 // setEdgeProbability(). 1209 Handles.erase(BasicBlockCallbackVH(BB, this)); 1210 for (unsigned I = 0;; ++I) { 1211 auto MapI = Probs.find(std::make_pair(BB, I)); 1212 if (MapI == Probs.end()) { 1213 assert(Probs.count(std::make_pair(BB, I + 1)) == 0 && 1214 "Must be no more successors"); 1215 return; 1216 } 1217 Probs.erase(MapI); 1218 } 1219 } 1220 1221 void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LoopI, 1222 const TargetLibraryInfo *TLI, 1223 DominatorTree *DT, 1224 PostDominatorTree *PDT) { 1225 LLVM_DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() 1226 << " ----\n\n"); 1227 LastF = &F; // Store the last function we ran on for printing. 1228 LI = &LoopI; 1229 1230 SccI = std::make_unique<SccInfo>(F); 1231 1232 assert(EstimatedBlockWeight.empty()); 1233 assert(EstimatedLoopWeight.empty()); 1234 1235 std::unique_ptr<DominatorTree> DTPtr; 1236 std::unique_ptr<PostDominatorTree> PDTPtr; 1237 1238 if (!DT) { 1239 DTPtr = std::make_unique<DominatorTree>(const_cast<Function &>(F)); 1240 DT = DTPtr.get(); 1241 } 1242 1243 if (!PDT) { 1244 PDTPtr = std::make_unique<PostDominatorTree>(const_cast<Function &>(F)); 1245 PDT = PDTPtr.get(); 1246 } 1247 1248 estimateBlockWeights(F, DT, PDT); 1249 1250 // Walk the basic blocks in post-order so that we can build up state about 1251 // the successors of a block iteratively. 1252 for (const auto *BB : post_order(&F.getEntryBlock())) { 1253 LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() 1254 << "\n"); 1255 // If there is no at least two successors, no sense to set probability. 1256 if (BB->getTerminator()->getNumSuccessors() < 2) 1257 continue; 1258 if (calcMetadataWeights(BB)) 1259 continue; 1260 if (calcEstimatedHeuristics(BB)) 1261 continue; 1262 if (calcPointerHeuristics(BB)) 1263 continue; 1264 if (calcZeroHeuristics(BB, TLI)) 1265 continue; 1266 if (calcFloatingPointHeuristics(BB)) 1267 continue; 1268 } 1269 1270 EstimatedLoopWeight.clear(); 1271 EstimatedBlockWeight.clear(); 1272 SccI.reset(); 1273 1274 if (PrintBranchProb && (PrintBranchProbFuncName.empty() || 1275 F.getName() == PrintBranchProbFuncName)) { 1276 print(dbgs()); 1277 } 1278 } 1279 1280 void BranchProbabilityInfoWrapperPass::getAnalysisUsage( 1281 AnalysisUsage &AU) const { 1282 // We require DT so it's available when LI is available. The LI updating code 1283 // asserts that DT is also present so if we don't make sure that we have DT 1284 // here, that assert will trigger. 1285 AU.addRequired<DominatorTreeWrapperPass>(); 1286 AU.addRequired<LoopInfoWrapperPass>(); 1287 AU.addRequired<TargetLibraryInfoWrapperPass>(); 1288 AU.addRequired<DominatorTreeWrapperPass>(); 1289 AU.addRequired<PostDominatorTreeWrapperPass>(); 1290 AU.setPreservesAll(); 1291 } 1292 1293 bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { 1294 const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 1295 const TargetLibraryInfo &TLI = 1296 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 1297 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1298 PostDominatorTree &PDT = 1299 getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); 1300 BPI.calculate(F, LI, &TLI, &DT, &PDT); 1301 return false; 1302 } 1303 1304 void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); } 1305 1306 void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS, 1307 const Module *) const { 1308 BPI.print(OS); 1309 } 1310 1311 AnalysisKey BranchProbabilityAnalysis::Key; 1312 BranchProbabilityInfo 1313 BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) { 1314 auto &LI = AM.getResult<LoopAnalysis>(F); 1315 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); 1316 auto &DT = AM.getResult<DominatorTreeAnalysis>(F); 1317 auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); 1318 BranchProbabilityInfo BPI; 1319 BPI.calculate(F, LI, &TLI, &DT, &PDT); 1320 return BPI; 1321 } 1322 1323 PreservedAnalyses 1324 BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { 1325 OS << "Printing analysis 'Branch Probability Analysis' for function '" 1326 << F.getName() << "':\n"; 1327 AM.getResult<BranchProbabilityAnalysis>(F).print(OS); 1328 return PreservedAnalyses::all(); 1329 } 1330