1 //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for computing profile summary data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/IR/ProfileSummary.h" 14 #include "llvm/ProfileData/InstrProf.h" 15 #include "llvm/ProfileData/ProfileCommon.h" 16 #include "llvm/ProfileData/SampleProf.h" 17 #include "llvm/Support/CommandLine.h" 18 19 using namespace llvm; 20 21 namespace llvm { 22 cl::opt<bool> UseContextLessSummary( 23 "profile-summary-contextless", cl::Hidden, 24 cl::desc("Merge context profiles before calculating thresholds.")); 25 26 // The following two parameters determine the threshold for a count to be 27 // considered hot/cold. These two parameters are percentile values (multiplied 28 // by 10000). If the counts are sorted in descending order, the minimum count to 29 // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count. 30 // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the 31 // threshold for determining cold count (everything <= this threshold is 32 // considered cold). 33 cl::opt<int> ProfileSummaryCutoffHot( 34 "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), 35 cl::desc("A count is hot if it exceeds the minimum count to" 36 " reach this percentile of total counts.")); 37 38 cl::opt<int> ProfileSummaryCutoffCold( 39 "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), 40 cl::desc("A count is cold if it is below the minimum count" 41 " to reach this percentile of total counts.")); 42 43 cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold( 44 "profile-summary-huge-working-set-size-threshold", cl::Hidden, 45 cl::init(15000), 46 cl::desc("The code working set size is considered huge if the number of" 47 " blocks required to reach the -profile-summary-cutoff-hot" 48 " percentile exceeds this count.")); 49 50 cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold( 51 "profile-summary-large-working-set-size-threshold", cl::Hidden, 52 cl::init(12500), 53 cl::desc("The code working set size is considered large if the number of" 54 " blocks required to reach the -profile-summary-cutoff-hot" 55 " percentile exceeds this count.")); 56 57 // The next two options override the counts derived from summary computation and 58 // are useful for debugging purposes. 59 cl::opt<uint64_t> ProfileSummaryHotCount( 60 "profile-summary-hot-count", cl::ReallyHidden, 61 cl::desc("A fixed hot count that overrides the count derived from" 62 " profile-summary-cutoff-hot")); 63 64 cl::opt<uint64_t> ProfileSummaryColdCount( 65 "profile-summary-cold-count", cl::ReallyHidden, 66 cl::desc("A fixed cold count that overrides the count derived from" 67 " profile-summary-cutoff-cold")); 68 } // namespace llvm 69 70 // A set of cutoff values. Each value, when divided by ProfileSummary::Scale 71 // (which is 1000000) is a desired percentile of total counts. 72 static const uint32_t DefaultCutoffsData[] = { 73 10000, /* 1% */ 74 100000, /* 10% */ 75 200000, 300000, 400000, 500000, 600000, 700000, 800000, 76 900000, 950000, 990000, 999000, 999900, 999990, 999999}; 77 const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs = 78 DefaultCutoffsData; 79 80 // An entry for the 0th percentile to correctly calculate hot/cold count 81 // thresholds when -profile-summary-cutoff-hot/cold is 0. If the hot cutoff is 82 // 0, no sample counts are treated as hot. If the cold cutoff is 0, all sample 83 // counts are treated as cold. Assumes there is no UINT64_MAX sample counts. 84 static const ProfileSummaryEntry ZeroCutoffEntry = {0, UINT64_MAX, 0}; 85 86 const ProfileSummaryEntry & 87 ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS, 88 uint64_t Percentile) { 89 if (Percentile == 0) 90 return ZeroCutoffEntry; 91 92 auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) { 93 return Entry.Cutoff < Percentile; 94 }); 95 // The required percentile has to be <= one of the percentiles in the 96 // detailed summary. 97 if (It == DS.end()) 98 report_fatal_error("Desired percentile exceeds the maximum cutoff"); 99 return *It; 100 } 101 102 void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) { 103 // The first counter is not necessarily an entry count for IR 104 // instrumentation profiles. 105 // Eventually MaxFunctionCount will become obsolete and this can be 106 // removed. 107 108 if (R.getCountPseudoKind() != InstrProfRecord::NotPseudo) 109 return; 110 111 addEntryCount(R.Counts[0]); 112 for (size_t I = 1, E = R.Counts.size(); I < E; ++I) 113 addInternalCount(R.Counts[I]); 114 } 115 116 // To compute the detailed summary, we consider each line containing samples as 117 // equivalent to a block with a count in the instrumented profile. 118 void SampleProfileSummaryBuilder::addRecord( 119 const sampleprof::FunctionSamples &FS, bool isCallsiteSample) { 120 if (!isCallsiteSample) { 121 NumFunctions++; 122 if (FS.getHeadSamples() > MaxFunctionCount) 123 MaxFunctionCount = FS.getHeadSamples(); 124 } else if (FS.getContext().hasAttribute( 125 sampleprof::ContextDuplicatedIntoBase)) { 126 // Do not recount callee samples if they are already merged into their base 127 // profiles. This can happen to CS nested profile. 128 return; 129 } 130 131 for (const auto &I : FS.getBodySamples()) { 132 uint64_t Count = I.second.getSamples(); 133 addCount(Count); 134 } 135 for (const auto &I : FS.getCallsiteSamples()) 136 for (const auto &CS : I.second) 137 addRecord(CS.second, true); 138 } 139 140 // The argument to this method is a vector of cutoff percentages and the return 141 // value is a vector of (Cutoff, MinCount, NumCounts) triplets. 142 void ProfileSummaryBuilder::computeDetailedSummary() { 143 if (DetailedSummaryCutoffs.empty()) 144 return; 145 llvm::sort(DetailedSummaryCutoffs); 146 auto Iter = CountFrequencies.begin(); 147 const auto End = CountFrequencies.end(); 148 149 uint32_t CountsSeen = 0; 150 uint64_t CurrSum = 0, Count = 0; 151 152 for (const uint32_t Cutoff : DetailedSummaryCutoffs) { 153 assert(Cutoff <= 999999); 154 APInt Temp(128, TotalCount); 155 APInt N(128, Cutoff); 156 APInt D(128, ProfileSummary::Scale); 157 Temp *= N; 158 Temp = Temp.sdiv(D); 159 uint64_t DesiredCount = Temp.getZExtValue(); 160 assert(DesiredCount <= TotalCount); 161 while (CurrSum < DesiredCount && Iter != End) { 162 Count = Iter->first; 163 uint32_t Freq = Iter->second; 164 CurrSum += (Count * Freq); 165 CountsSeen += Freq; 166 Iter++; 167 } 168 assert(CurrSum >= DesiredCount); 169 ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen}; 170 DetailedSummary.push_back(PSE); 171 } 172 } 173 174 uint64_t 175 ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) { 176 auto &HotEntry = 177 ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot); 178 uint64_t HotCountThreshold = HotEntry.MinCount; 179 if (ProfileSummaryHotCount.getNumOccurrences() > 0) 180 HotCountThreshold = ProfileSummaryHotCount; 181 return HotCountThreshold; 182 } 183 184 uint64_t 185 ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) { 186 auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile( 187 DS, ProfileSummaryCutoffCold); 188 uint64_t ColdCountThreshold = ColdEntry.MinCount; 189 if (ProfileSummaryColdCount.getNumOccurrences() > 0) 190 ColdCountThreshold = ProfileSummaryColdCount; 191 return ColdCountThreshold; 192 } 193 194 std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() { 195 computeDetailedSummary(); 196 return std::make_unique<ProfileSummary>( 197 ProfileSummary::PSK_Sample, DetailedSummary, TotalCount, MaxCount, 0, 198 MaxFunctionCount, NumCounts, NumFunctions); 199 } 200 201 std::unique_ptr<ProfileSummary> 202 SampleProfileSummaryBuilder::computeSummaryForProfiles( 203 const SampleProfileMap &Profiles) { 204 assert(NumFunctions == 0 && 205 "This can only be called on an empty summary builder"); 206 sampleprof::SampleProfileMap ContextLessProfiles; 207 const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles; 208 // For CSSPGO, context-sensitive profile effectively split a function profile 209 // into many copies each representing the CFG profile of a particular calling 210 // context. That makes the count distribution looks more flat as we now have 211 // more function profiles each with lower counts, which in turn leads to lower 212 // hot thresholds. To compensate for that, by default we merge context 213 // profiles before computing profile summary. 214 if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS && 215 !UseContextLessSummary.getNumOccurrences())) { 216 ProfileConverter::flattenProfile(Profiles, ContextLessProfiles, true); 217 ProfilesToUse = &ContextLessProfiles; 218 } 219 220 for (const auto &I : *ProfilesToUse) { 221 const sampleprof::FunctionSamples &Profile = I.second; 222 addRecord(Profile); 223 } 224 225 return getSummary(); 226 } 227 228 std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() { 229 computeDetailedSummary(); 230 return std::make_unique<ProfileSummary>( 231 ProfileSummary::PSK_Instr, DetailedSummary, TotalCount, MaxCount, 232 MaxInternalBlockCount, MaxFunctionCount, NumCounts, NumFunctions); 233 } 234 235 void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) { 236 assert(Count <= getInstrMaxCountValue() && 237 "Count value should be less than the max count value."); 238 NumFunctions++; 239 addCount(Count); 240 if (Count > MaxFunctionCount) 241 MaxFunctionCount = Count; 242 } 243 244 void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) { 245 assert(Count <= getInstrMaxCountValue() && 246 "Count value should be less than the max count value."); 247 addCount(Count); 248 if (Count > MaxInternalBlockCount) 249 MaxInternalBlockCount = Count; 250 } 251