xref: /freebsd/contrib/llvm-project/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for computing profile summary data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/IR/Attributes.h"
14 #include "llvm/IR/Function.h"
15 #include "llvm/IR/Metadata.h"
16 #include "llvm/IR/Type.h"
17 #include "llvm/ProfileData/InstrProf.h"
18 #include "llvm/ProfileData/ProfileCommon.h"
19 #include "llvm/ProfileData/SampleProf.h"
20 #include "llvm/Support/Casting.h"
21 #include "llvm/Support/CommandLine.h"
22 
23 using namespace llvm;
24 
25 cl::opt<bool> UseContextLessSummary(
26     "profile-summary-contextless", cl::Hidden, cl::init(false), cl::ZeroOrMore,
27     cl::desc("Merge context profiles before calculating thresholds."));
28 
29 // The following two parameters determine the threshold for a count to be
30 // considered hot/cold. These two parameters are percentile values (multiplied
31 // by 10000). If the counts are sorted in descending order, the minimum count to
32 // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
33 // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
34 // threshold for determining cold count (everything <= this threshold is
35 // considered cold).
36 cl::opt<int> ProfileSummaryCutoffHot(
37     "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000), cl::ZeroOrMore,
38     cl::desc("A count is hot if it exceeds the minimum count to"
39              " reach this percentile of total counts."));
40 
41 cl::opt<int> ProfileSummaryCutoffCold(
42     "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999), cl::ZeroOrMore,
43     cl::desc("A count is cold if it is below the minimum count"
44              " to reach this percentile of total counts."));
45 
46 cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
47     "profile-summary-huge-working-set-size-threshold", cl::Hidden,
48     cl::init(15000), cl::ZeroOrMore,
49     cl::desc("The code working set size is considered huge if the number of"
50              " blocks required to reach the -profile-summary-cutoff-hot"
51              " percentile exceeds this count."));
52 
53 cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
54     "profile-summary-large-working-set-size-threshold", cl::Hidden,
55     cl::init(12500), cl::ZeroOrMore,
56     cl::desc("The code working set size is considered large if the number of"
57              " blocks required to reach the -profile-summary-cutoff-hot"
58              " percentile exceeds this count."));
59 
60 // The next two options override the counts derived from summary computation and
61 // are useful for debugging purposes.
62 cl::opt<int> ProfileSummaryHotCount(
63     "profile-summary-hot-count", cl::ReallyHidden, cl::ZeroOrMore,
64     cl::desc("A fixed hot count that overrides the count derived from"
65              " profile-summary-cutoff-hot"));
66 
67 cl::opt<int> ProfileSummaryColdCount(
68     "profile-summary-cold-count", cl::ReallyHidden, cl::ZeroOrMore,
69     cl::desc("A fixed cold count that overrides the count derived from"
70              " profile-summary-cutoff-cold"));
71 
72 // A set of cutoff values. Each value, when divided by ProfileSummary::Scale
73 // (which is 1000000) is a desired percentile of total counts.
74 static const uint32_t DefaultCutoffsData[] = {
75     10000,  /*  1% */
76     100000, /* 10% */
77     200000, 300000, 400000, 500000, 600000, 700000, 800000,
78     900000, 950000, 990000, 999000, 999900, 999990, 999999};
79 const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs =
80     DefaultCutoffsData;
81 
82 const ProfileSummaryEntry &
83 ProfileSummaryBuilder::getEntryForPercentile(SummaryEntryVector &DS,
84                                              uint64_t Percentile) {
85   auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) {
86     return Entry.Cutoff < Percentile;
87   });
88   // The required percentile has to be <= one of the percentiles in the
89   // detailed summary.
90   if (It == DS.end())
91     report_fatal_error("Desired percentile exceeds the maximum cutoff");
92   return *It;
93 }
94 
95 void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) {
96   // The first counter is not necessarily an entry count for IR
97   // instrumentation profiles.
98   // Eventually MaxFunctionCount will become obsolete and this can be
99   // removed.
100   addEntryCount(R.Counts[0]);
101   for (size_t I = 1, E = R.Counts.size(); I < E; ++I)
102     addInternalCount(R.Counts[I]);
103 }
104 
105 // To compute the detailed summary, we consider each line containing samples as
106 // equivalent to a block with a count in the instrumented profile.
107 void SampleProfileSummaryBuilder::addRecord(
108     const sampleprof::FunctionSamples &FS, bool isCallsiteSample) {
109   if (!isCallsiteSample) {
110     NumFunctions++;
111     if (FS.getHeadSamples() > MaxFunctionCount)
112       MaxFunctionCount = FS.getHeadSamples();
113   }
114   for (const auto &I : FS.getBodySamples()) {
115     uint64_t Count = I.second.getSamples();
116       addCount(Count);
117   }
118   for (const auto &I : FS.getCallsiteSamples())
119     for (const auto &CS : I.second)
120       addRecord(CS.second, true);
121 }
122 
123 // The argument to this method is a vector of cutoff percentages and the return
124 // value is a vector of (Cutoff, MinCount, NumCounts) triplets.
125 void ProfileSummaryBuilder::computeDetailedSummary() {
126   if (DetailedSummaryCutoffs.empty())
127     return;
128   llvm::sort(DetailedSummaryCutoffs);
129   auto Iter = CountFrequencies.begin();
130   const auto End = CountFrequencies.end();
131 
132   uint32_t CountsSeen = 0;
133   uint64_t CurrSum = 0, Count = 0;
134 
135   for (const uint32_t Cutoff : DetailedSummaryCutoffs) {
136     assert(Cutoff <= 999999);
137     APInt Temp(128, TotalCount);
138     APInt N(128, Cutoff);
139     APInt D(128, ProfileSummary::Scale);
140     Temp *= N;
141     Temp = Temp.sdiv(D);
142     uint64_t DesiredCount = Temp.getZExtValue();
143     assert(DesiredCount <= TotalCount);
144     while (CurrSum < DesiredCount && Iter != End) {
145       Count = Iter->first;
146       uint32_t Freq = Iter->second;
147       CurrSum += (Count * Freq);
148       CountsSeen += Freq;
149       Iter++;
150     }
151     assert(CurrSum >= DesiredCount);
152     ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen};
153     DetailedSummary.push_back(PSE);
154   }
155 }
156 
157 uint64_t ProfileSummaryBuilder::getHotCountThreshold(SummaryEntryVector &DS) {
158   auto &HotEntry =
159       ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot);
160   uint64_t HotCountThreshold = HotEntry.MinCount;
161   if (ProfileSummaryHotCount.getNumOccurrences() > 0)
162     HotCountThreshold = ProfileSummaryHotCount;
163   return HotCountThreshold;
164 }
165 
166 uint64_t ProfileSummaryBuilder::getColdCountThreshold(SummaryEntryVector &DS) {
167   auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
168       DS, ProfileSummaryCutoffCold);
169   uint64_t ColdCountThreshold = ColdEntry.MinCount;
170   if (ProfileSummaryColdCount.getNumOccurrences() > 0)
171     ColdCountThreshold = ProfileSummaryColdCount;
172   return ColdCountThreshold;
173 }
174 
175 std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
176   computeDetailedSummary();
177   return std::make_unique<ProfileSummary>(
178       ProfileSummary::PSK_Sample, DetailedSummary, TotalCount, MaxCount, 0,
179       MaxFunctionCount, NumCounts, NumFunctions);
180 }
181 
182 std::unique_ptr<ProfileSummary>
183 SampleProfileSummaryBuilder::computeSummaryForProfiles(
184     const StringMap<sampleprof::FunctionSamples> &Profiles) {
185   assert(NumFunctions == 0 &&
186          "This can only be called on an empty summary builder");
187   StringMap<sampleprof::FunctionSamples> ContextLessProfiles;
188   const StringMap<sampleprof::FunctionSamples> *ProfilesToUse = &Profiles;
189   // For CSSPGO, context-sensitive profile effectively split a function profile
190   // into many copies each representing the CFG profile of a particular calling
191   // context. That makes the count distribution looks more flat as we now have
192   // more function profiles each with lower counts, which in turn leads to lower
193   // hot thresholds. To compensate for that, by defauly we merge context
194   // profiles before coumputing profile summary.
195   if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
196                                 !UseContextLessSummary.getNumOccurrences())) {
197     for (const auto &I : Profiles) {
198       ContextLessProfiles[I.second.getName()].merge(I.second);
199     }
200     ProfilesToUse = &ContextLessProfiles;
201   }
202 
203   for (const auto &I : *ProfilesToUse) {
204     const sampleprof::FunctionSamples &Profile = I.second;
205     addRecord(Profile);
206   }
207 
208   return getSummary();
209 }
210 
211 std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
212   computeDetailedSummary();
213   return std::make_unique<ProfileSummary>(
214       ProfileSummary::PSK_Instr, DetailedSummary, TotalCount, MaxCount,
215       MaxInternalBlockCount, MaxFunctionCount, NumCounts, NumFunctions);
216 }
217 
218 void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) {
219   NumFunctions++;
220 
221   // Skip invalid count.
222   if (Count == (uint64_t)-1)
223     return;
224 
225   addCount(Count);
226   if (Count > MaxFunctionCount)
227     MaxFunctionCount = Count;
228 }
229 
230 void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) {
231   // Skip invalid count.
232   if (Count == (uint64_t)-1)
233     return;
234 
235   addCount(Count);
236   if (Count > MaxInternalBlockCount)
237     MaxInternalBlockCount = Count;
238 }
239