1 //===- ProfileSummaryInfo.cpp - Global profile summary information --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that provides access to the global profile summary 10 // information. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/ProfileSummaryInfo.h" 15 #include "llvm/Analysis/BlockFrequencyInfo.h" 16 #include "llvm/IR/BasicBlock.h" 17 #include "llvm/IR/Instructions.h" 18 #include "llvm/IR/Metadata.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/IR/ProfileSummary.h" 21 #include "llvm/InitializePasses.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/Support/CommandLine.h" 24 using namespace llvm; 25 26 // Knobs for profile summary based thresholds. 27 extern cl::opt<int> ProfileSummaryCutoffHot; 28 extern cl::opt<int> ProfileSummaryCutoffCold; 29 extern cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold; 30 extern cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold; 31 extern cl::opt<int> ProfileSummaryHotCount; 32 extern cl::opt<int> ProfileSummaryColdCount; 33 34 static cl::opt<bool> PartialProfile( 35 "partial-profile", cl::Hidden, cl::init(false), 36 cl::desc("Specify the current profile is used as a partial profile.")); 37 38 cl::opt<bool> ScalePartialSampleProfileWorkingSetSize( 39 "scale-partial-sample-profile-working-set-size", cl::Hidden, cl::init(true), 40 cl::desc( 41 "If true, scale the working set size of the partial sample profile " 42 "by the partial profile ratio to reflect the size of the program " 43 "being compiled.")); 44 45 static cl::opt<double> PartialSampleProfileWorkingSetSizeScaleFactor( 46 "partial-sample-profile-working-set-size-scale-factor", cl::Hidden, 47 cl::init(0.008), 48 cl::desc("The scale factor used to scale the working set size of the " 49 "partial sample profile along with the partial profile ratio. " 50 "This includes the factor of the profile counter per block " 51 "and the factor to scale the working set size to use the same " 52 "shared thresholds as PGO.")); 53 54 // The profile summary metadata may be attached either by the frontend or by 55 // any backend passes (IR level instrumentation, for example). This method 56 // checks if the Summary is null and if so checks if the summary metadata is now 57 // available in the module and parses it to get the Summary object. 58 void ProfileSummaryInfo::refresh() { 59 if (hasProfileSummary()) 60 return; 61 // First try to get context sensitive ProfileSummary. 62 auto *SummaryMD = M->getProfileSummary(/* IsCS */ true); 63 if (SummaryMD) 64 Summary.reset(ProfileSummary::getFromMD(SummaryMD)); 65 66 if (!hasProfileSummary()) { 67 // This will actually return PSK_Instr or PSK_Sample summary. 68 SummaryMD = M->getProfileSummary(/* IsCS */ false); 69 if (SummaryMD) 70 Summary.reset(ProfileSummary::getFromMD(SummaryMD)); 71 } 72 if (!hasProfileSummary()) 73 return; 74 computeThresholds(); 75 } 76 77 Optional<uint64_t> ProfileSummaryInfo::getProfileCount( 78 const CallBase &Call, BlockFrequencyInfo *BFI, bool AllowSynthetic) const { 79 assert((isa<CallInst>(Call) || isa<InvokeInst>(Call)) && 80 "We can only get profile count for call/invoke instruction."); 81 if (hasSampleProfile()) { 82 // In sample PGO mode, check if there is a profile metadata on the 83 // instruction. If it is present, determine hotness solely based on that, 84 // since the sampled entry count may not be accurate. If there is no 85 // annotated on the instruction, return None. 86 uint64_t TotalCount; 87 if (Call.extractProfTotalWeight(TotalCount)) 88 return TotalCount; 89 return None; 90 } 91 if (BFI) 92 return BFI->getBlockProfileCount(Call.getParent(), AllowSynthetic); 93 return None; 94 } 95 96 /// Returns true if the function's entry is hot. If it returns false, it 97 /// either means it is not hot or it is unknown whether it is hot or not (for 98 /// example, no profile data is available). 99 bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) const { 100 if (!F || !hasProfileSummary()) 101 return false; 102 auto FunctionCount = F->getEntryCount(); 103 // FIXME: The heuristic used below for determining hotness is based on 104 // preliminary SPEC tuning for inliner. This will eventually be a 105 // convenience method that calls isHotCount. 106 return FunctionCount && isHotCount(FunctionCount->getCount()); 107 } 108 109 /// Returns true if the function contains hot code. This can include a hot 110 /// function entry count, hot basic block, or (in the case of Sample PGO) 111 /// hot total call edge count. 112 /// If it returns false, it either means it is not hot or it is unknown 113 /// (for example, no profile data is available). 114 bool ProfileSummaryInfo::isFunctionHotInCallGraph( 115 const Function *F, BlockFrequencyInfo &BFI) const { 116 if (!F || !hasProfileSummary()) 117 return false; 118 if (auto FunctionCount = F->getEntryCount()) 119 if (isHotCount(FunctionCount->getCount())) 120 return true; 121 122 if (hasSampleProfile()) { 123 uint64_t TotalCallCount = 0; 124 for (const auto &BB : *F) 125 for (const auto &I : BB) 126 if (isa<CallInst>(I) || isa<InvokeInst>(I)) 127 if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr)) 128 TotalCallCount += CallCount.getValue(); 129 if (isHotCount(TotalCallCount)) 130 return true; 131 } 132 for (const auto &BB : *F) 133 if (isHotBlock(&BB, &BFI)) 134 return true; 135 return false; 136 } 137 138 /// Returns true if the function only contains cold code. This means that 139 /// the function entry and blocks are all cold, and (in the case of Sample PGO) 140 /// the total call edge count is cold. 141 /// If it returns false, it either means it is not cold or it is unknown 142 /// (for example, no profile data is available). 143 bool ProfileSummaryInfo::isFunctionColdInCallGraph( 144 const Function *F, BlockFrequencyInfo &BFI) const { 145 if (!F || !hasProfileSummary()) 146 return false; 147 if (auto FunctionCount = F->getEntryCount()) 148 if (!isColdCount(FunctionCount->getCount())) 149 return false; 150 151 if (hasSampleProfile()) { 152 uint64_t TotalCallCount = 0; 153 for (const auto &BB : *F) 154 for (const auto &I : BB) 155 if (isa<CallInst>(I) || isa<InvokeInst>(I)) 156 if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr)) 157 TotalCallCount += CallCount.getValue(); 158 if (!isColdCount(TotalCallCount)) 159 return false; 160 } 161 for (const auto &BB : *F) 162 if (!isColdBlock(&BB, &BFI)) 163 return false; 164 return true; 165 } 166 167 bool ProfileSummaryInfo::isFunctionHotnessUnknown(const Function &F) const { 168 assert(hasPartialSampleProfile() && "Expect partial sample profile"); 169 return !F.getEntryCount().hasValue(); 170 } 171 172 template <bool isHot> 173 bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile( 174 int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const { 175 if (!F || !hasProfileSummary()) 176 return false; 177 if (auto FunctionCount = F->getEntryCount()) { 178 if (isHot && 179 isHotCountNthPercentile(PercentileCutoff, FunctionCount->getCount())) 180 return true; 181 if (!isHot && 182 !isColdCountNthPercentile(PercentileCutoff, FunctionCount->getCount())) 183 return false; 184 } 185 if (hasSampleProfile()) { 186 uint64_t TotalCallCount = 0; 187 for (const auto &BB : *F) 188 for (const auto &I : BB) 189 if (isa<CallInst>(I) || isa<InvokeInst>(I)) 190 if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr)) 191 TotalCallCount += CallCount.getValue(); 192 if (isHot && isHotCountNthPercentile(PercentileCutoff, TotalCallCount)) 193 return true; 194 if (!isHot && !isColdCountNthPercentile(PercentileCutoff, TotalCallCount)) 195 return false; 196 } 197 for (const auto &BB : *F) { 198 if (isHot && isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI)) 199 return true; 200 if (!isHot && !isColdBlockNthPercentile(PercentileCutoff, &BB, &BFI)) 201 return false; 202 } 203 return !isHot; 204 } 205 206 // Like isFunctionHotInCallGraph but for a given cutoff. 207 bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile( 208 int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const { 209 return isFunctionHotOrColdInCallGraphNthPercentile<true>( 210 PercentileCutoff, F, BFI); 211 } 212 213 bool ProfileSummaryInfo::isFunctionColdInCallGraphNthPercentile( 214 int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const { 215 return isFunctionHotOrColdInCallGraphNthPercentile<false>( 216 PercentileCutoff, F, BFI); 217 } 218 219 /// Returns true if the function's entry is a cold. If it returns false, it 220 /// either means it is not cold or it is unknown whether it is cold or not (for 221 /// example, no profile data is available). 222 bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) const { 223 if (!F) 224 return false; 225 if (F->hasFnAttribute(Attribute::Cold)) 226 return true; 227 if (!hasProfileSummary()) 228 return false; 229 auto FunctionCount = F->getEntryCount(); 230 // FIXME: The heuristic used below for determining coldness is based on 231 // preliminary SPEC tuning for inliner. This will eventually be a 232 // convenience method that calls isHotCount. 233 return FunctionCount && isColdCount(FunctionCount->getCount()); 234 } 235 236 /// Compute the hot and cold thresholds. 237 void ProfileSummaryInfo::computeThresholds() { 238 auto &DetailedSummary = Summary->getDetailedSummary(); 239 auto &HotEntry = ProfileSummaryBuilder::getEntryForPercentile( 240 DetailedSummary, ProfileSummaryCutoffHot); 241 HotCountThreshold = 242 ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary); 243 ColdCountThreshold = 244 ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary); 245 assert(ColdCountThreshold <= HotCountThreshold && 246 "Cold count threshold cannot exceed hot count threshold!"); 247 if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) { 248 HasHugeWorkingSetSize = 249 HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; 250 HasLargeWorkingSetSize = 251 HotEntry.NumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; 252 } else { 253 // Scale the working set size of the partial sample profile to reflect the 254 // size of the program being compiled. 255 double PartialProfileRatio = Summary->getPartialProfileRatio(); 256 uint64_t ScaledHotEntryNumCounts = 257 static_cast<uint64_t>(HotEntry.NumCounts * PartialProfileRatio * 258 PartialSampleProfileWorkingSetSizeScaleFactor); 259 HasHugeWorkingSetSize = 260 ScaledHotEntryNumCounts > ProfileSummaryHugeWorkingSetSizeThreshold; 261 HasLargeWorkingSetSize = 262 ScaledHotEntryNumCounts > ProfileSummaryLargeWorkingSetSizeThreshold; 263 } 264 } 265 266 Optional<uint64_t> 267 ProfileSummaryInfo::computeThreshold(int PercentileCutoff) const { 268 if (!hasProfileSummary()) 269 return None; 270 auto iter = ThresholdCache.find(PercentileCutoff); 271 if (iter != ThresholdCache.end()) { 272 return iter->second; 273 } 274 auto &DetailedSummary = Summary->getDetailedSummary(); 275 auto &Entry = ProfileSummaryBuilder::getEntryForPercentile(DetailedSummary, 276 PercentileCutoff); 277 uint64_t CountThreshold = Entry.MinCount; 278 ThresholdCache[PercentileCutoff] = CountThreshold; 279 return CountThreshold; 280 } 281 282 bool ProfileSummaryInfo::hasHugeWorkingSetSize() const { 283 return HasHugeWorkingSetSize && HasHugeWorkingSetSize.getValue(); 284 } 285 286 bool ProfileSummaryInfo::hasLargeWorkingSetSize() const { 287 return HasLargeWorkingSetSize && HasLargeWorkingSetSize.getValue(); 288 } 289 290 bool ProfileSummaryInfo::isHotCount(uint64_t C) const { 291 return HotCountThreshold && C >= HotCountThreshold.getValue(); 292 } 293 294 bool ProfileSummaryInfo::isColdCount(uint64_t C) const { 295 return ColdCountThreshold && C <= ColdCountThreshold.getValue(); 296 } 297 298 template <bool isHot> 299 bool ProfileSummaryInfo::isHotOrColdCountNthPercentile(int PercentileCutoff, 300 uint64_t C) const { 301 auto CountThreshold = computeThreshold(PercentileCutoff); 302 if (isHot) 303 return CountThreshold && C >= CountThreshold.getValue(); 304 else 305 return CountThreshold && C <= CountThreshold.getValue(); 306 } 307 308 bool ProfileSummaryInfo::isHotCountNthPercentile(int PercentileCutoff, 309 uint64_t C) const { 310 return isHotOrColdCountNthPercentile<true>(PercentileCutoff, C); 311 } 312 313 bool ProfileSummaryInfo::isColdCountNthPercentile(int PercentileCutoff, 314 uint64_t C) const { 315 return isHotOrColdCountNthPercentile<false>(PercentileCutoff, C); 316 } 317 318 uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() const { 319 return HotCountThreshold.getValueOr(UINT64_MAX); 320 } 321 322 uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() const { 323 return ColdCountThreshold.getValueOr(0); 324 } 325 326 bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB, 327 BlockFrequencyInfo *BFI) const { 328 auto Count = BFI->getBlockProfileCount(BB); 329 return Count && isHotCount(*Count); 330 } 331 332 bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB, 333 BlockFrequencyInfo *BFI) const { 334 auto Count = BFI->getBlockProfileCount(BB); 335 return Count && isColdCount(*Count); 336 } 337 338 template <bool isHot> 339 bool ProfileSummaryInfo::isHotOrColdBlockNthPercentile( 340 int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const { 341 auto Count = BFI->getBlockProfileCount(BB); 342 if (isHot) 343 return Count && isHotCountNthPercentile(PercentileCutoff, *Count); 344 else 345 return Count && isColdCountNthPercentile(PercentileCutoff, *Count); 346 } 347 348 bool ProfileSummaryInfo::isHotBlockNthPercentile( 349 int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const { 350 return isHotOrColdBlockNthPercentile<true>(PercentileCutoff, BB, BFI); 351 } 352 353 bool ProfileSummaryInfo::isColdBlockNthPercentile( 354 int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const { 355 return isHotOrColdBlockNthPercentile<false>(PercentileCutoff, BB, BFI); 356 } 357 358 bool ProfileSummaryInfo::isHotCallSite(const CallBase &CB, 359 BlockFrequencyInfo *BFI) const { 360 auto C = getProfileCount(CB, BFI); 361 return C && isHotCount(*C); 362 } 363 364 bool ProfileSummaryInfo::isColdCallSite(const CallBase &CB, 365 BlockFrequencyInfo *BFI) const { 366 auto C = getProfileCount(CB, BFI); 367 if (C) 368 return isColdCount(*C); 369 370 // In SamplePGO, if the caller has been sampled, and there is no profile 371 // annotated on the callsite, we consider the callsite as cold. 372 return hasSampleProfile() && CB.getCaller()->hasProfileData(); 373 } 374 375 bool ProfileSummaryInfo::hasPartialSampleProfile() const { 376 return hasProfileSummary() && 377 Summary->getKind() == ProfileSummary::PSK_Sample && 378 (PartialProfile || Summary->isPartialProfile()); 379 } 380 381 INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info", 382 "Profile summary info", false, true) 383 384 ProfileSummaryInfoWrapperPass::ProfileSummaryInfoWrapperPass() 385 : ImmutablePass(ID) { 386 initializeProfileSummaryInfoWrapperPassPass(*PassRegistry::getPassRegistry()); 387 } 388 389 bool ProfileSummaryInfoWrapperPass::doInitialization(Module &M) { 390 PSI.reset(new ProfileSummaryInfo(M)); 391 return false; 392 } 393 394 bool ProfileSummaryInfoWrapperPass::doFinalization(Module &M) { 395 PSI.reset(); 396 return false; 397 } 398 399 AnalysisKey ProfileSummaryAnalysis::Key; 400 ProfileSummaryInfo ProfileSummaryAnalysis::run(Module &M, 401 ModuleAnalysisManager &) { 402 return ProfileSummaryInfo(M); 403 } 404 405 PreservedAnalyses ProfileSummaryPrinterPass::run(Module &M, 406 ModuleAnalysisManager &AM) { 407 ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M); 408 409 OS << "Functions in " << M.getName() << " with hot/cold annotations: \n"; 410 for (auto &F : M) { 411 OS << F.getName(); 412 if (PSI.isFunctionEntryHot(&F)) 413 OS << " :hot entry "; 414 else if (PSI.isFunctionEntryCold(&F)) 415 OS << " :cold entry "; 416 OS << "\n"; 417 } 418 return PreservedAnalyses::all(); 419 } 420 421 char ProfileSummaryInfoWrapperPass::ID = 0; 422