xref: /freebsd/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for computing profile summary data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/IR/ProfileSummary.h"
14 #include "llvm/ProfileData/InstrProf.h"
15 #include "llvm/ProfileData/ProfileCommon.h"
16 #include "llvm/ProfileData/SampleProf.h"
17 #include "llvm/Support/CommandLine.h"
18 
19 using namespace llvm;
20 
21 namespace llvm {
22 cl::opt<bool> UseContextLessSummary(
23     "profile-summary-contextless", cl::Hidden,
24     cl::desc("Merge context profiles before calculating thresholds."));
25 
26 // The following two parameters determine the threshold for a count to be
27 // considered hot/cold. These two parameters are percentile values (multiplied
28 // by 10000). If the counts are sorted in descending order, the minimum count to
29 // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
30 // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
31 // threshold for determining cold count (everything <= this threshold is
32 // considered cold).
33 cl::opt<int> ProfileSummaryCutoffHot(
34     "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000),
35     cl::desc("A count is hot if it exceeds the minimum count to"
36              " reach this percentile of total counts."));
37 
38 cl::opt<int> ProfileSummaryCutoffCold(
39     "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999),
40     cl::desc("A count is cold if it is below the minimum count"
41              " to reach this percentile of total counts."));
42 
43 cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
44     "profile-summary-huge-working-set-size-threshold", cl::Hidden,
45     cl::init(15000),
46     cl::desc("The code working set size is considered huge if the number of"
47              " blocks required to reach the -profile-summary-cutoff-hot"
48              " percentile exceeds this count."));
49 
50 cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
51     "profile-summary-large-working-set-size-threshold", cl::Hidden,
52     cl::init(12500),
53     cl::desc("The code working set size is considered large if the number of"
54              " blocks required to reach the -profile-summary-cutoff-hot"
55              " percentile exceeds this count."));
56 
57 // The next two options override the counts derived from summary computation and
58 // are useful for debugging purposes.
59 cl::opt<uint64_t> ProfileSummaryHotCount(
60     "profile-summary-hot-count", cl::ReallyHidden,
61     cl::desc("A fixed hot count that overrides the count derived from"
62              " profile-summary-cutoff-hot"));
63 
64 cl::opt<uint64_t> ProfileSummaryColdCount(
65     "profile-summary-cold-count", cl::ReallyHidden,
66     cl::desc("A fixed cold count that overrides the count derived from"
67              " profile-summary-cutoff-cold"));
68 } // namespace llvm
69 
70 // A set of cutoff values. Each value, when divided by ProfileSummary::Scale
71 // (which is 1000000) is a desired percentile of total counts.
72 static const uint32_t DefaultCutoffsData[] = {
73     10000,  /*  1% */
74     100000, /* 10% */
75     200000, 300000, 400000, 500000, 600000, 700000, 800000,
76     900000, 950000, 990000, 999000, 999900, 999990, 999999};
77 const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs =
78     DefaultCutoffsData;
79 
80 // An entry for the 0th percentile to correctly calculate hot/cold count
81 // thresholds when -profile-summary-cutoff-hot/cold is 0.  If the hot cutoff is
82 // 0, no sample counts are treated as hot.  If the cold cutoff is 0, all sample
83 // counts are treated as cold.  Assumes there is no UINT64_MAX sample counts.
84 static const ProfileSummaryEntry ZeroCutoffEntry = {0, UINT64_MAX, 0};
85 
86 const ProfileSummaryEntry &
getEntryForPercentile(const SummaryEntryVector & DS,uint64_t Percentile)87 ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS,
88                                              uint64_t Percentile) {
89   if (Percentile == 0)
90     return ZeroCutoffEntry;
91 
92   auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) {
93     return Entry.Cutoff < Percentile;
94   });
95   // The required percentile has to be <= one of the percentiles in the
96   // detailed summary.
97   if (It == DS.end())
98     report_fatal_error("Desired percentile exceeds the maximum cutoff");
99   return *It;
100 }
101 
addRecord(const InstrProfRecord & R)102 void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) {
103   // The first counter is not necessarily an entry count for IR
104   // instrumentation profiles.
105   // Eventually MaxFunctionCount will become obsolete and this can be
106   // removed.
107 
108   if (R.getCountPseudoKind() != InstrProfRecord::NotPseudo)
109     return;
110 
111   addEntryCount(R.Counts[0]);
112   for (size_t I = 1, E = R.Counts.size(); I < E; ++I)
113     addInternalCount(R.Counts[I]);
114 }
115 
116 // To compute the detailed summary, we consider each line containing samples as
117 // equivalent to a block with a count in the instrumented profile.
addRecord(const sampleprof::FunctionSamples & FS,bool isCallsiteSample)118 void SampleProfileSummaryBuilder::addRecord(
119     const sampleprof::FunctionSamples &FS, bool isCallsiteSample) {
120   if (!isCallsiteSample) {
121     NumFunctions++;
122     if (FS.getHeadSamples() > MaxFunctionCount)
123       MaxFunctionCount = FS.getHeadSamples();
124   } else if (FS.getContext().hasAttribute(
125                  sampleprof::ContextDuplicatedIntoBase)) {
126     // Do not recount callee samples if they are already merged into their base
127     // profiles. This can happen to CS nested profile.
128     return;
129   }
130 
131   for (const auto &I : FS.getBodySamples()) {
132     uint64_t Count = I.second.getSamples();
133       addCount(Count);
134   }
135   for (const auto &I : FS.getCallsiteSamples())
136     for (const auto &CS : I.second)
137       addRecord(CS.second, true);
138 }
139 
140 // The argument to this method is a vector of cutoff percentages and the return
141 // value is a vector of (Cutoff, MinCount, NumCounts) triplets.
computeDetailedSummary()142 void ProfileSummaryBuilder::computeDetailedSummary() {
143   if (DetailedSummaryCutoffs.empty())
144     return;
145   llvm::sort(DetailedSummaryCutoffs);
146   auto Iter = CountFrequencies.begin();
147   const auto End = CountFrequencies.end();
148 
149   uint32_t CountsSeen = 0;
150   uint64_t CurrSum = 0, Count = 0;
151 
152   for (const uint32_t Cutoff : DetailedSummaryCutoffs) {
153     assert(Cutoff <= 999999);
154     APInt Temp(128, TotalCount);
155     APInt N(128, Cutoff);
156     APInt D(128, ProfileSummary::Scale);
157     Temp *= N;
158     Temp = Temp.sdiv(D);
159     uint64_t DesiredCount = Temp.getZExtValue();
160     assert(DesiredCount <= TotalCount);
161     while (CurrSum < DesiredCount && Iter != End) {
162       Count = Iter->first;
163       uint32_t Freq = Iter->second;
164       CurrSum += (Count * Freq);
165       CountsSeen += Freq;
166       Iter++;
167     }
168     assert(CurrSum >= DesiredCount);
169     ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen};
170     DetailedSummary.push_back(PSE);
171   }
172 }
173 
174 uint64_t
getHotCountThreshold(const SummaryEntryVector & DS)175 ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) {
176   auto &HotEntry =
177       ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot);
178   uint64_t HotCountThreshold = HotEntry.MinCount;
179   if (ProfileSummaryHotCount.getNumOccurrences() > 0)
180     HotCountThreshold = ProfileSummaryHotCount;
181   return HotCountThreshold;
182 }
183 
184 uint64_t
getColdCountThreshold(const SummaryEntryVector & DS)185 ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) {
186   auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
187       DS, ProfileSummaryCutoffCold);
188   uint64_t ColdCountThreshold = ColdEntry.MinCount;
189   if (ProfileSummaryColdCount.getNumOccurrences() > 0)
190     ColdCountThreshold = ProfileSummaryColdCount;
191   return ColdCountThreshold;
192 }
193 
getSummary()194 std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
195   computeDetailedSummary();
196   return std::make_unique<ProfileSummary>(
197       ProfileSummary::PSK_Sample, DetailedSummary, TotalCount, MaxCount, 0,
198       MaxFunctionCount, NumCounts, NumFunctions);
199 }
200 
201 std::unique_ptr<ProfileSummary>
computeSummaryForProfiles(const SampleProfileMap & Profiles)202 SampleProfileSummaryBuilder::computeSummaryForProfiles(
203     const SampleProfileMap &Profiles) {
204   assert(NumFunctions == 0 &&
205          "This can only be called on an empty summary builder");
206   sampleprof::SampleProfileMap ContextLessProfiles;
207   const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles;
208   // For CSSPGO, context-sensitive profile effectively split a function profile
209   // into many copies each representing the CFG profile of a particular calling
210   // context. That makes the count distribution looks more flat as we now have
211   // more function profiles each with lower counts, which in turn leads to lower
212   // hot thresholds. To compensate for that, by default we merge context
213   // profiles before computing profile summary.
214   if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
215                                 !UseContextLessSummary.getNumOccurrences())) {
216     ProfileConverter::flattenProfile(Profiles, ContextLessProfiles, true);
217     ProfilesToUse = &ContextLessProfiles;
218   }
219 
220   for (const auto &I : *ProfilesToUse) {
221     const sampleprof::FunctionSamples &Profile = I.second;
222     addRecord(Profile);
223   }
224 
225   return getSummary();
226 }
227 
getSummary()228 std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
229   computeDetailedSummary();
230   return std::make_unique<ProfileSummary>(
231       ProfileSummary::PSK_Instr, DetailedSummary, TotalCount, MaxCount,
232       MaxInternalBlockCount, MaxFunctionCount, NumCounts, NumFunctions);
233 }
234 
addEntryCount(uint64_t Count)235 void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) {
236   assert(Count <= getInstrMaxCountValue() &&
237          "Count value should be less than the max count value.");
238   NumFunctions++;
239   addCount(Count);
240   if (Count > MaxFunctionCount)
241     MaxFunctionCount = Count;
242 }
243 
addInternalCount(uint64_t Count)244 void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) {
245   assert(Count <= getInstrMaxCountValue() &&
246          "Count value should be less than the max count value.");
247   addCount(Count);
248   if (Count > MaxInternalBlockCount)
249     MaxInternalBlockCount = Count;
250 }
251