1 //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for computing profile summary data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/IR/ProfileSummary.h" 14 #include "llvm/ProfileData/InstrProf.h" 15 #include "llvm/ProfileData/ProfileCommon.h" 16 #include "llvm/ProfileData/SampleProf.h" 17 #include "llvm/Support/CommandLine.h" 18 19 using namespace llvm; 20 21 cl::opt<bool> UseContextLessSummary( 22 "profile-summary-contextless", cl::Hidden, 23 cl::desc("Merge context profiles before calculating thresholds.")); 24 25 // The following two parameters determine the threshold for a count to be 26 // considered hot/cold. These two parameters are percentile values (multiplied 27 // by 10000). If the counts are sorted in descending order, the minimum count to 28 // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count. 29 // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the 30 // threshold for determining cold count (everything <= this threshold is 31 // considered cold). 32 cl::opt<int> ProfileSummaryCutoffHot( 33 "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), 34 cl::desc("A count is hot if it exceeds the minimum count to" 35 " reach this percentile of total counts.")); 36 37 cl::opt<int> ProfileSummaryCutoffCold( 38 "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), 39 cl::desc("A count is cold if it is below the minimum count" 40 " to reach this percentile of total counts.")); 41 42 cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold( 43 "profile-summary-huge-working-set-size-threshold", cl::Hidden, 44 cl::init(15000), 45 cl::desc("The code working set size is considered huge if the number of" 46 " blocks required to reach the -profile-summary-cutoff-hot" 47 " percentile exceeds this count.")); 48 49 cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold( 50 "profile-summary-large-working-set-size-threshold", cl::Hidden, 51 cl::init(12500), 52 cl::desc("The code working set size is considered large if the number of" 53 " blocks required to reach the -profile-summary-cutoff-hot" 54 " percentile exceeds this count.")); 55 56 // The next two options override the counts derived from summary computation and 57 // are useful for debugging purposes. 58 cl::opt<uint64_t> ProfileSummaryHotCount( 59 "profile-summary-hot-count", cl::ReallyHidden, 60 cl::desc("A fixed hot count that overrides the count derived from" 61 " profile-summary-cutoff-hot")); 62 63 cl::opt<uint64_t> ProfileSummaryColdCount( 64 "profile-summary-cold-count", cl::ReallyHidden, 65 cl::desc("A fixed cold count that overrides the count derived from" 66 " profile-summary-cutoff-cold")); 67 68 // A set of cutoff values. Each value, when divided by ProfileSummary::Scale 69 // (which is 1000000) is a desired percentile of total counts. 70 static const uint32_t DefaultCutoffsData[] = { 71 10000, /* 1% */ 72 100000, /* 10% */ 73 200000, 300000, 400000, 500000, 600000, 700000, 800000, 74 900000, 950000, 990000, 999000, 999900, 999990, 999999}; 75 const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs = 76 DefaultCutoffsData; 77 78 const ProfileSummaryEntry & 79 ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS, 80 uint64_t Percentile) { 81 auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) { 82 return Entry.Cutoff < Percentile; 83 }); 84 // The required percentile has to be <= one of the percentiles in the 85 // detailed summary. 86 if (It == DS.end()) 87 report_fatal_error("Desired percentile exceeds the maximum cutoff"); 88 return *It; 89 } 90 91 void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) { 92 // The first counter is not necessarily an entry count for IR 93 // instrumentation profiles. 94 // Eventually MaxFunctionCount will become obsolete and this can be 95 // removed. 96 addEntryCount(R.Counts[0]); 97 for (size_t I = 1, E = R.Counts.size(); I < E; ++I) 98 addInternalCount(R.Counts[I]); 99 } 100 101 // To compute the detailed summary, we consider each line containing samples as 102 // equivalent to a block with a count in the instrumented profile. 103 void SampleProfileSummaryBuilder::addRecord( 104 const sampleprof::FunctionSamples &FS, bool isCallsiteSample) { 105 if (!isCallsiteSample) { 106 NumFunctions++; 107 if (FS.getHeadSamples() > MaxFunctionCount) 108 MaxFunctionCount = FS.getHeadSamples(); 109 } else if (FS.getContext().hasAttribute( 110 sampleprof::ContextDuplicatedIntoBase)) { 111 // Do not recount callee samples if they are already merged into their base 112 // profiles. This can happen to CS nested profile. 113 return; 114 } 115 116 for (const auto &I : FS.getBodySamples()) { 117 uint64_t Count = I.second.getSamples(); 118 addCount(Count); 119 } 120 for (const auto &I : FS.getCallsiteSamples()) 121 for (const auto &CS : I.second) 122 addRecord(CS.second, true); 123 } 124 125 // The argument to this method is a vector of cutoff percentages and the return 126 // value is a vector of (Cutoff, MinCount, NumCounts) triplets. 127 void ProfileSummaryBuilder::computeDetailedSummary() { 128 if (DetailedSummaryCutoffs.empty()) 129 return; 130 llvm::sort(DetailedSummaryCutoffs); 131 auto Iter = CountFrequencies.begin(); 132 const auto End = CountFrequencies.end(); 133 134 uint32_t CountsSeen = 0; 135 uint64_t CurrSum = 0, Count = 0; 136 137 for (const uint32_t Cutoff : DetailedSummaryCutoffs) { 138 assert(Cutoff <= 999999); 139 APInt Temp(128, TotalCount); 140 APInt N(128, Cutoff); 141 APInt D(128, ProfileSummary::Scale); 142 Temp *= N; 143 Temp = Temp.sdiv(D); 144 uint64_t DesiredCount = Temp.getZExtValue(); 145 assert(DesiredCount <= TotalCount); 146 while (CurrSum < DesiredCount && Iter != End) { 147 Count = Iter->first; 148 uint32_t Freq = Iter->second; 149 CurrSum += (Count * Freq); 150 CountsSeen += Freq; 151 Iter++; 152 } 153 assert(CurrSum >= DesiredCount); 154 ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen}; 155 DetailedSummary.push_back(PSE); 156 } 157 } 158 159 uint64_t 160 ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) { 161 auto &HotEntry = 162 ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot); 163 uint64_t HotCountThreshold = HotEntry.MinCount; 164 if (ProfileSummaryHotCount.getNumOccurrences() > 0) 165 HotCountThreshold = ProfileSummaryHotCount; 166 return HotCountThreshold; 167 } 168 169 uint64_t 170 ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) { 171 auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( 172 DS, ProfileSummaryCutoffCold); 173 uint64_t ColdCountThreshold = ColdEntry.MinCount; 174 if (ProfileSummaryColdCount.getNumOccurrences() > 0) 175 ColdCountThreshold = ProfileSummaryColdCount; 176 return ColdCountThreshold; 177 } 178 179 std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() { 180 computeDetailedSummary(); 181 return std::make_unique<ProfileSummary>( 182 ProfileSummary::PSK_Sample, DetailedSummary, TotalCount, MaxCount, 0, 183 MaxFunctionCount, NumCounts, NumFunctions); 184 } 185 186 std::unique_ptr<ProfileSummary> 187 SampleProfileSummaryBuilder::computeSummaryForProfiles( 188 const SampleProfileMap &Profiles) { 189 assert(NumFunctions == 0 && 190 "This can only be called on an empty summary builder"); 191 sampleprof::SampleProfileMap ContextLessProfiles; 192 const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles; 193 // For CSSPGO, context-sensitive profile effectively split a function profile 194 // into many copies each representing the CFG profile of a particular calling 195 // context. That makes the count distribution looks more flat as we now have 196 // more function profiles each with lower counts, which in turn leads to lower 197 // hot thresholds. To compensate for that, by default we merge context 198 // profiles before computing profile summary. 199 if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS && 200 !UseContextLessSummary.getNumOccurrences())) { 201 for (const auto &I : Profiles) { 202 ContextLessProfiles[I.second.getName()].merge(I.second); 203 } 204 ProfilesToUse = &ContextLessProfiles; 205 } 206 207 for (const auto &I : *ProfilesToUse) { 208 const sampleprof::FunctionSamples &Profile = I.second; 209 addRecord(Profile); 210 } 211 212 return getSummary(); 213 } 214 215 std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() { 216 computeDetailedSummary(); 217 return std::make_unique<ProfileSummary>( 218 ProfileSummary::PSK_Instr, DetailedSummary, TotalCount, MaxCount, 219 MaxInternalBlockCount, MaxFunctionCount, NumCounts, NumFunctions); 220 } 221 222 void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) { 223 NumFunctions++; 224 225 // Skip invalid count. 226 if (Count == (uint64_t)-1) 227 return; 228 229 addCount(Count); 230 if (Count > MaxFunctionCount) 231 MaxFunctionCount = Count; 232 } 233 234 void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) { 235 // Skip invalid count. 236 if (Count == (uint64_t)-1) 237 return; 238 239 addCount(Count); 240 if (Count > MaxInternalBlockCount) 241 MaxInternalBlockCount = Count; 242 } 243