xref: /freebsd/contrib/llvm-project/llvm/tools/llvm-profdata/llvm-profdata.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- llvm-profdata.cpp - LLVM profile data tool -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // llvm-profdata merges .profdata files.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/SmallSet.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Debuginfod/HTTPClient.h"
17 #include "llvm/IR/LLVMContext.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/ProfileData/DataAccessProf.h"
20 #include "llvm/ProfileData/InstrProfCorrelator.h"
21 #include "llvm/ProfileData/InstrProfReader.h"
22 #include "llvm/ProfileData/InstrProfWriter.h"
23 #include "llvm/ProfileData/MemProf.h"
24 #include "llvm/ProfileData/MemProfReader.h"
25 #include "llvm/ProfileData/MemProfSummaryBuilder.h"
26 #include "llvm/ProfileData/MemProfYAML.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProfReader.h"
29 #include "llvm/ProfileData/SampleProfWriter.h"
30 #include "llvm/Support/BalancedPartitioning.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Discriminator.h"
33 #include "llvm/Support/Errc.h"
34 #include "llvm/Support/FileSystem.h"
35 #include "llvm/Support/Format.h"
36 #include "llvm/Support/FormattedStream.h"
37 #include "llvm/Support/LLVMDriver.h"
38 #include "llvm/Support/MD5.h"
39 #include "llvm/Support/MemoryBuffer.h"
40 #include "llvm/Support/Path.h"
41 #include "llvm/Support/Regex.h"
42 #include "llvm/Support/ThreadPool.h"
43 #include "llvm/Support/Threading.h"
44 #include "llvm/Support/VirtualFileSystem.h"
45 #include "llvm/Support/WithColor.h"
46 #include "llvm/Support/raw_ostream.h"
47 #include <algorithm>
48 #include <cmath>
49 #include <optional>
50 #include <queue>
51 
52 using namespace llvm;
53 using ProfCorrelatorKind = InstrProfCorrelator::ProfCorrelatorKind;
54 
55 // https://llvm.org/docs/CommandGuide/llvm-profdata.html has documentations
56 // on each subcommand.
57 cl::SubCommand ShowSubcommand(
58     "show",
59     "Takes a profile data file and displays the profiles. See detailed "
60     "documentation in "
61     "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-show");
62 cl::SubCommand OrderSubcommand(
63     "order",
64     "Reads temporal profiling traces from a profile and outputs a function "
65     "order that reduces the number of page faults for those traces. See "
66     "detailed documentation in "
67     "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-order");
68 cl::SubCommand OverlapSubcommand(
69     "overlap",
70     "Computes and displays the overlap between two profiles. See detailed "
71     "documentation in "
72     "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-overlap");
73 cl::SubCommand MergeSubcommand(
74     "merge",
75     "Takes several profiles and merge them together. See detailed "
76     "documentation in "
77     "https://llvm.org/docs/CommandGuide/llvm-profdata.html#profdata-merge");
78 
79 namespace {
80 enum ProfileKinds { instr, sample, memory };
81 enum FailureMode { warnOnly, failIfAnyAreInvalid, failIfAllAreInvalid };
82 
83 enum ProfileFormat {
84   PF_None = 0,
85   PF_Text,
86   PF_Compact_Binary, // Deprecated
87   PF_Ext_Binary,
88   PF_GCC,
89   PF_Binary
90 };
91 
92 enum class ShowFormat { Text, Json, Yaml };
93 } // namespace
94 
95 // Common options.
96 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
97                                     cl::init("-"), cl::desc("Output file"),
98                                     cl::sub(ShowSubcommand),
99                                     cl::sub(OrderSubcommand),
100                                     cl::sub(OverlapSubcommand),
101                                     cl::sub(MergeSubcommand));
102 // NOTE: cl::alias must not have cl::sub(), since aliased option's cl::sub()
103 // will be used. llvm::cl::alias::done() method asserts this condition.
104 static cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
105                                  cl::aliasopt(OutputFilename));
106 
107 // Options common to at least two commands.
108 static cl::opt<ProfileKinds> ProfileKind(
109     cl::desc("Profile kind:"), cl::sub(MergeSubcommand),
110     cl::sub(OverlapSubcommand), cl::init(instr),
111     cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
112                clEnumVal(sample, "Sample profile")));
113 static cl::opt<std::string> Filename(cl::Positional,
114                                      cl::desc("<profdata-file>"),
115                                      cl::sub(ShowSubcommand),
116                                      cl::sub(OrderSubcommand));
117 static cl::opt<unsigned> MaxDbgCorrelationWarnings(
118     "max-debug-info-correlation-warnings",
119     cl::desc("The maximum number of warnings to emit when correlating "
120              "profile from debug info (0 = no limit)"),
121     cl::sub(MergeSubcommand), cl::sub(ShowSubcommand), cl::init(5));
122 static cl::opt<std::string> ProfiledBinary(
123     "profiled-binary", cl::init(""),
124     cl::desc("Path to binary from which the profile was collected."),
125     cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
126 static cl::opt<std::string> DebugInfoFilename(
127     "debug-info", cl::init(""),
128     cl::desc(
129         "For show, read and extract profile metadata from debug info and show "
130         "the functions it found. For merge, use the provided debug info to "
131         "correlate the raw profile."),
132     cl::sub(ShowSubcommand), cl::sub(MergeSubcommand));
133 static cl::opt<std::string>
134     BinaryFilename("binary-file", cl::init(""),
135                    cl::desc("For merge, use the provided unstripped binary to "
136                             "correlate the raw profile."),
137                    cl::sub(MergeSubcommand));
138 static cl::list<std::string> DebugFileDirectory(
139     "debug-file-directory",
140     cl::desc("Directories to search for object files by build ID"));
141 static cl::opt<bool> DebugInfod("debuginfod", cl::init(false), cl::Hidden,
142                                 cl::sub(MergeSubcommand),
143                                 cl::desc("Enable debuginfod"));
144 static cl::opt<ProfCorrelatorKind> BIDFetcherProfileCorrelate(
145     "correlate",
146     cl::desc("Use debug-info or binary correlation to correlate profiles with "
147              "build id fetcher"),
148     cl::init(InstrProfCorrelator::NONE),
149     cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
150                           "No profile correlation"),
151                clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
152                           "Use debug info to correlate"),
153                clEnumValN(InstrProfCorrelator::BINARY, "binary",
154                           "Use binary to correlate")));
155 static cl::opt<std::string> FuncNameFilter(
156     "function",
157     cl::desc("Only functions matching the filter are shown in the output. For "
158              "overlapping CSSPGO, this takes a function name with calling "
159              "context."),
160     cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand),
161     cl::sub(MergeSubcommand));
162 
163 // TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
164 // factor out the common cl::sub in cl::opt constructor for subcommand-specific
165 // options.
166 
167 // Options specific to merge subcommand.
168 static cl::list<std::string> InputFilenames(cl::Positional,
169                                             cl::sub(MergeSubcommand),
170                                             cl::desc("<filename...>"));
171 static cl::list<std::string>
172     WeightedInputFilenames("weighted-input", cl::sub(MergeSubcommand),
173                            cl::desc("<weight>,<filename>"));
174 static cl::opt<ProfileFormat> OutputFormat(
175     cl::desc("Format of output profile"), cl::sub(MergeSubcommand),
176     cl::init(PF_Ext_Binary),
177     cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding"),
178                clEnumValN(PF_Ext_Binary, "extbinary",
179                           "Extensible binary encoding "
180                           "(default)"),
181                clEnumValN(PF_Text, "text", "Text encoding"),
182                clEnumValN(PF_GCC, "gcc",
183                           "GCC encoding (only meaningful for -sample)")));
184 static cl::opt<std::string>
185     InputFilenamesFile("input-files", cl::init(""), cl::sub(MergeSubcommand),
186                        cl::desc("Path to file containing newline-separated "
187                                 "[<weight>,]<filename> entries"));
188 static cl::alias InputFilenamesFileA("f", cl::desc("Alias for --input-files"),
189                                      cl::aliasopt(InputFilenamesFile));
190 static cl::opt<bool> DumpInputFileList(
191     "dump-input-file-list", cl::init(false), cl::Hidden,
192     cl::sub(MergeSubcommand),
193     cl::desc("Dump the list of input files and their weights, then exit"));
194 static cl::opt<std::string> RemappingFile("remapping-file",
195                                           cl::value_desc("file"),
196                                           cl::sub(MergeSubcommand),
197                                           cl::desc("Symbol remapping file"));
198 static cl::alias RemappingFileA("r", cl::desc("Alias for --remapping-file"),
199                                 cl::aliasopt(RemappingFile));
200 static cl::opt<bool>
201     UseMD5("use-md5", cl::init(false), cl::Hidden,
202            cl::desc("Choose to use MD5 to represent string in name table (only "
203                     "meaningful for -extbinary)"),
204            cl::sub(MergeSubcommand));
205 static cl::opt<bool> CompressAllSections(
206     "compress-all-sections", cl::init(false), cl::Hidden,
207     cl::sub(MergeSubcommand),
208     cl::desc("Compress all sections when writing the profile (only "
209              "meaningful for -extbinary)"));
210 static cl::opt<bool> SampleMergeColdContext(
211     "sample-merge-cold-context", cl::init(false), cl::Hidden,
212     cl::sub(MergeSubcommand),
213     cl::desc(
214         "Merge context sample profiles whose count is below cold threshold"));
215 static cl::opt<bool> SampleTrimColdContext(
216     "sample-trim-cold-context", cl::init(false), cl::Hidden,
217     cl::sub(MergeSubcommand),
218     cl::desc(
219         "Trim context sample profiles whose count is below cold threshold"));
220 static cl::opt<uint32_t> SampleColdContextFrameDepth(
221     "sample-frame-depth-for-cold-context", cl::init(1),
222     cl::sub(MergeSubcommand),
223     cl::desc("Keep the last K frames while merging cold profile. 1 means the "
224              "context-less base profile"));
225 static cl::opt<size_t> OutputSizeLimit(
226     "output-size-limit", cl::init(0), cl::Hidden, cl::sub(MergeSubcommand),
227     cl::desc("Trim cold functions until profile size is below specified "
228              "limit in bytes. This uses a heursitic and functions may be "
229              "excessively trimmed"));
230 static cl::opt<bool> GenPartialProfile(
231     "gen-partial-profile", cl::init(false), cl::Hidden,
232     cl::sub(MergeSubcommand),
233     cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
234 static cl::opt<bool> SplitLayout(
235     "split-layout", cl::init(false), cl::Hidden, cl::sub(MergeSubcommand),
236     cl::desc("Split the profile to two sections with one containing sample "
237              "profiles with inlined functions and the other without (only "
238              "meaningful for -extbinary)"));
239 static cl::opt<std::string> SupplInstrWithSample(
240     "supplement-instr-with-sample", cl::init(""), cl::Hidden,
241     cl::sub(MergeSubcommand),
242     cl::desc("Supplement an instr profile with sample profile, to correct "
243              "the profile unrepresentativeness issue. The sample "
244              "profile is the input of the flag. Output will be in instr "
245              "format (The flag only works with -instr)"));
246 static cl::opt<float> ZeroCounterThreshold(
247     "zero-counter-threshold", cl::init(0.7), cl::Hidden,
248     cl::sub(MergeSubcommand),
249     cl::desc("For the function which is cold in instr profile but hot in "
250              "sample profile, if the ratio of the number of zero counters "
251              "divided by the total number of counters is above the "
252              "threshold, the profile of the function will be regarded as "
253              "being harmful for performance and will be dropped."));
254 static cl::opt<unsigned> SupplMinSizeThreshold(
255     "suppl-min-size-threshold", cl::init(10), cl::Hidden,
256     cl::sub(MergeSubcommand),
257     cl::desc("If the size of a function is smaller than the threshold, "
258              "assume it can be inlined by PGO early inliner and it won't "
259              "be adjusted based on sample profile."));
260 static cl::opt<unsigned> InstrProfColdThreshold(
261     "instr-prof-cold-threshold", cl::init(0), cl::Hidden,
262     cl::sub(MergeSubcommand),
263     cl::desc("User specified cold threshold for instr profile which will "
264              "override the cold threshold got from profile summary. "));
265 // WARNING: This reservoir size value is propagated to any input indexed
266 // profiles for simplicity. Changing this value between invocations could
267 // result in sample bias.
268 static cl::opt<uint64_t> TemporalProfTraceReservoirSize(
269     "temporal-profile-trace-reservoir-size", cl::init(100),
270     cl::sub(MergeSubcommand),
271     cl::desc("The maximum number of stored temporal profile traces (default: "
272              "100)"));
273 static cl::opt<uint64_t> TemporalProfMaxTraceLength(
274     "temporal-profile-max-trace-length", cl::init(10000),
275     cl::sub(MergeSubcommand),
276     cl::desc("The maximum length of a single temporal profile trace "
277              "(default: 10000)"));
278 static cl::opt<std::string> FuncNameNegativeFilter(
279     "no-function", cl::init(""), cl::sub(MergeSubcommand),
280     cl::desc("Exclude functions matching the filter from the output."));
281 
282 static cl::opt<FailureMode>
283     FailMode("failure-mode", cl::init(failIfAnyAreInvalid),
284              cl::desc("Failure mode:"), cl::sub(MergeSubcommand),
285              cl::values(clEnumValN(warnOnly, "warn",
286                                    "Do not fail and just print warnings."),
287                         clEnumValN(failIfAnyAreInvalid, "any",
288                                    "Fail if any profile is invalid."),
289                         clEnumValN(failIfAllAreInvalid, "all",
290                                    "Fail only if all profiles are invalid.")));
291 
292 static cl::opt<bool> OutputSparse(
293     "sparse", cl::init(false), cl::sub(MergeSubcommand),
294     cl::desc("Generate a sparse profile (only meaningful for -instr)"));
295 static cl::opt<unsigned> NumThreads(
296     "num-threads", cl::init(0), cl::sub(MergeSubcommand),
297     cl::desc("Number of merge threads to use (default: autodetect)"));
298 static cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
299                              cl::aliasopt(NumThreads));
300 
301 static cl::opt<std::string> ProfileSymbolListFile(
302     "prof-sym-list", cl::init(""), cl::sub(MergeSubcommand),
303     cl::desc("Path to file containing the list of function symbols "
304              "used to populate profile symbol list"));
305 
306 static cl::opt<SampleProfileLayout> ProfileLayout(
307     "convert-sample-profile-layout",
308     cl::desc("Convert the generated profile to a profile with a new layout"),
309     cl::sub(MergeSubcommand), cl::init(SPL_None),
310     cl::values(
311         clEnumValN(SPL_Nest, "nest",
312                    "Nested profile, the input should be CS flat profile"),
313         clEnumValN(SPL_Flat, "flat",
314                    "Profile with nested inlinee flatten out")));
315 
316 static cl::opt<bool> DropProfileSymbolList(
317     "drop-profile-symbol-list", cl::init(false), cl::Hidden,
318     cl::sub(MergeSubcommand),
319     cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
320              "(only meaningful for -sample)"));
321 
322 static cl::opt<bool> KeepVTableSymbols(
323     "keep-vtable-symbols", cl::init(false), cl::Hidden,
324     cl::sub(MergeSubcommand),
325     cl::desc("If true, keep the vtable symbols in indexed profiles"));
326 
327 // Temporary support for writing the previous version of the format, to enable
328 // some forward compatibility.
329 // TODO: Consider enabling this with future version changes as well, to ease
330 // deployment of newer versions of llvm-profdata.
331 static cl::opt<bool> DoWritePrevVersion(
332     "write-prev-version", cl::init(false), cl::Hidden,
333     cl::desc("Write the previous version of indexed format, to enable "
334              "some forward compatibility."));
335 
336 static cl::opt<memprof::IndexedVersion> MemProfVersionRequested(
337     "memprof-version", cl::Hidden, cl::sub(MergeSubcommand),
338     cl::desc("Specify the version of the memprof format to use"),
339     cl::init(memprof::Version3),
340     cl::values(clEnumValN(memprof::Version2, "2", "version 2"),
341                clEnumValN(memprof::Version3, "3", "version 3"),
342                clEnumValN(memprof::Version4, "4", "version 4")));
343 
344 static cl::opt<bool> MemProfFullSchema(
345     "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
346     cl::desc("Use the full schema for serialization"), cl::init(false));
347 
348 static cl::opt<bool>
349     MemprofGenerateRandomHotness("memprof-random-hotness", cl::init(false),
350                                  cl::Hidden, cl::sub(MergeSubcommand),
351                                  cl::desc("Generate random hotness values"));
352 static cl::opt<unsigned> MemprofGenerateRandomHotnessSeed(
353     "memprof-random-hotness-seed", cl::init(0), cl::Hidden,
354     cl::sub(MergeSubcommand),
355     cl::desc("Random hotness seed to use (0 to generate new seed)"));
356 
357 // Options specific to overlap subcommand.
358 static cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
359                                          cl::desc("<base profile file>"),
360                                          cl::sub(OverlapSubcommand));
361 static cl::opt<std::string> TestFilename(cl::Positional, cl::Required,
362                                          cl::desc("<test profile file>"),
363                                          cl::sub(OverlapSubcommand));
364 
365 static cl::opt<unsigned long long> SimilarityCutoff(
366     "similarity-cutoff", cl::init(0),
367     cl::desc("For sample profiles, list function names (with calling context "
368              "for csspgo) for overlapped functions "
369              "with similarities below the cutoff (percentage times 10000)."),
370     cl::sub(OverlapSubcommand));
371 
372 static cl::opt<bool> IsCS(
373     "cs", cl::init(false),
374     cl::desc("For context sensitive PGO counts. Does not work with CSSPGO."),
375     cl::sub(OverlapSubcommand));
376 
377 static cl::opt<unsigned long long> OverlapValueCutoff(
378     "value-cutoff", cl::init(-1),
379     cl::desc(
380         "Function level overlap information for every function (with calling "
381         "context for csspgo) in test "
382         "profile with max count value greater than the parameter value"),
383     cl::sub(OverlapSubcommand));
384 
385 // Options specific to show subcommand.
386 static cl::opt<bool>
387     ShowCounts("counts", cl::init(false),
388                cl::desc("Show counter values for shown functions"),
389                cl::sub(ShowSubcommand));
390 static cl::opt<ShowFormat>
391     SFormat("show-format", cl::init(ShowFormat::Text),
392             cl::desc("Emit output in the selected format if supported"),
393             cl::sub(ShowSubcommand),
394             cl::values(clEnumValN(ShowFormat::Text, "text",
395                                   "emit normal text output (default)"),
396                        clEnumValN(ShowFormat::Json, "json", "emit JSON"),
397                        clEnumValN(ShowFormat::Yaml, "yaml", "emit YAML")));
398 // TODO: Consider replacing this with `--show-format=text-encoding`.
399 static cl::opt<bool>
400     TextFormat("text", cl::init(false),
401                cl::desc("Show instr profile data in text dump format"),
402                cl::sub(ShowSubcommand));
403 static cl::opt<bool>
404     JsonFormat("json",
405                cl::desc("Show sample profile data in the JSON format "
406                         "(deprecated, please use --show-format=json)"),
407                cl::sub(ShowSubcommand));
408 static cl::opt<bool> ShowIndirectCallTargets(
409     "ic-targets", cl::init(false),
410     cl::desc("Show indirect call site target values for shown functions"),
411     cl::sub(ShowSubcommand));
412 static cl::opt<bool>
413     ShowVTables("show-vtables", cl::init(false),
414                 cl::desc("Show vtable names for shown functions"),
415                 cl::sub(ShowSubcommand));
416 static cl::opt<bool> ShowMemOPSizes(
417     "memop-sizes", cl::init(false),
418     cl::desc("Show the profiled sizes of the memory intrinsic calls "
419              "for shown functions"),
420     cl::sub(ShowSubcommand));
421 static cl::opt<bool>
422     ShowDetailedSummary("detailed-summary", cl::init(false),
423                         cl::desc("Show detailed profile summary"),
424                         cl::sub(ShowSubcommand));
425 static cl::list<uint32_t> DetailedSummaryCutoffs(
426     cl::CommaSeparated, "detailed-summary-cutoffs",
427     cl::desc(
428         "Cutoff percentages (times 10000) for generating detailed summary"),
429     cl::value_desc("800000,901000,999999"), cl::sub(ShowSubcommand));
430 static cl::opt<bool>
431     ShowHotFuncList("hot-func-list", cl::init(false),
432                     cl::desc("Show profile summary of a list of hot functions"),
433                     cl::sub(ShowSubcommand));
434 static cl::opt<bool>
435     ShowAllFunctions("all-functions", cl::init(false),
436                      cl::desc("Details for each and every function"),
437                      cl::sub(ShowSubcommand));
438 static cl::opt<bool> ShowCS("showcs", cl::init(false),
439                             cl::desc("Show context sensitive counts"),
440                             cl::sub(ShowSubcommand));
441 static cl::opt<ProfileKinds> ShowProfileKind(
442     cl::desc("Profile kind supported by show:"), cl::sub(ShowSubcommand),
443     cl::init(instr),
444     cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
445                clEnumVal(sample, "Sample profile"),
446                clEnumVal(memory, "MemProf memory access profile")));
447 static cl::opt<uint32_t> TopNFunctions(
448     "topn", cl::init(0),
449     cl::desc("Show the list of functions with the largest internal counts"),
450     cl::sub(ShowSubcommand));
451 static cl::opt<uint32_t> ShowValueCutoff(
452     "value-cutoff", cl::init(0),
453     cl::desc("Set the count value cutoff. Functions with the maximum count "
454              "less than this value will not be printed out. (Default is 0)"),
455     cl::sub(ShowSubcommand));
456 static cl::opt<bool> OnlyListBelow(
457     "list-below-cutoff", cl::init(false),
458     cl::desc("Only output names of functions whose max count values are "
459              "below the cutoff value"),
460     cl::sub(ShowSubcommand));
461 static cl::opt<bool> ShowProfileSymbolList(
462     "show-prof-sym-list", cl::init(false),
463     cl::desc("Show profile symbol list if it exists in the profile. "),
464     cl::sub(ShowSubcommand));
465 static cl::opt<bool> ShowSectionInfoOnly(
466     "show-sec-info-only", cl::init(false),
467     cl::desc("Show the information of each section in the sample profile. "
468              "The flag is only usable when the sample profile is in "
469              "extbinary format"),
470     cl::sub(ShowSubcommand));
471 static cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(false),
472                                    cl::desc("Show binary ids in the profile. "),
473                                    cl::sub(ShowSubcommand));
474 static cl::opt<bool> ShowTemporalProfTraces(
475     "temporal-profile-traces",
476     cl::desc("Show temporal profile traces in the profile."),
477     cl::sub(ShowSubcommand));
478 
479 static cl::opt<bool>
480     ShowCovered("covered", cl::init(false),
481                 cl::desc("Show only the functions that have been executed."),
482                 cl::sub(ShowSubcommand));
483 
484 static cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false),
485                                         cl::desc("Show profile version. "),
486                                         cl::sub(ShowSubcommand));
487 
488 // Options specific to order subcommand.
489 static cl::opt<unsigned>
490     NumTestTraces("num-test-traces", cl::init(0),
491                   cl::desc("Keep aside the last <num-test-traces> traces in "
492                            "the profile when computing the function order and "
493                            "instead use them to evaluate that order"),
494                   cl::sub(OrderSubcommand));
495 
496 // We use this string to indicate that there are
497 // multiple static functions map to the same name.
498 const std::string DuplicateNameStr = "----";
499 
warn(Twine Message,StringRef Whence="",StringRef Hint="")500 static void warn(Twine Message, StringRef Whence = "", StringRef Hint = "") {
501   WithColor::warning();
502   if (!Whence.empty())
503     errs() << Whence << ": ";
504   errs() << Message << "\n";
505   if (!Hint.empty())
506     WithColor::note() << Hint << "\n";
507 }
508 
warn(Error E,StringRef Whence="")509 static void warn(Error E, StringRef Whence = "") {
510   if (E.isA<InstrProfError>()) {
511     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
512       warn(IPE.message(), Whence);
513     });
514   }
515 }
516 
exitWithError(Twine Message,StringRef Whence="",StringRef Hint="")517 static void exitWithError(Twine Message, StringRef Whence = "",
518                           StringRef Hint = "") {
519   WithColor::error();
520   if (!Whence.empty())
521     errs() << Whence << ": ";
522   errs() << Message << "\n";
523   if (!Hint.empty())
524     WithColor::note() << Hint << "\n";
525   ::exit(1);
526 }
527 
exitWithError(Error E,StringRef Whence="")528 static void exitWithError(Error E, StringRef Whence = "") {
529   if (E.isA<InstrProfError>()) {
530     handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
531       instrprof_error instrError = IPE.get();
532       StringRef Hint = "";
533       if (instrError == instrprof_error::unrecognized_format) {
534         // Hint in case user missed specifying the profile type.
535         Hint = "Perhaps you forgot to use the --sample or --memory option?";
536       }
537       exitWithError(IPE.message(), Whence, Hint);
538     });
539     return;
540   }
541 
542   exitWithError(toString(std::move(E)), Whence);
543 }
544 
exitWithErrorCode(std::error_code EC,StringRef Whence="")545 static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
546   exitWithError(EC.message(), Whence);
547 }
548 
warnOrExitGivenError(FailureMode FailMode,std::error_code EC,StringRef Whence="")549 static void warnOrExitGivenError(FailureMode FailMode, std::error_code EC,
550                                  StringRef Whence = "") {
551   if (FailMode == failIfAnyAreInvalid)
552     exitWithErrorCode(EC, Whence);
553   else
554     warn(EC.message(), Whence);
555 }
556 
handleMergeWriterError(Error E,StringRef WhenceFile="",StringRef WhenceFunction="",bool ShowHint=true)557 static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
558                                    StringRef WhenceFunction = "",
559                                    bool ShowHint = true) {
560   if (!WhenceFile.empty())
561     errs() << WhenceFile << ": ";
562   if (!WhenceFunction.empty())
563     errs() << WhenceFunction << ": ";
564 
565   auto IPE = instrprof_error::success;
566   E = handleErrors(std::move(E),
567                    [&IPE](std::unique_ptr<InstrProfError> E) -> Error {
568                      IPE = E->get();
569                      return Error(std::move(E));
570                    });
571   errs() << toString(std::move(E)) << "\n";
572 
573   if (ShowHint) {
574     StringRef Hint = "";
575     if (IPE != instrprof_error::success) {
576       switch (IPE) {
577       case instrprof_error::hash_mismatch:
578       case instrprof_error::count_mismatch:
579       case instrprof_error::value_site_count_mismatch:
580         Hint = "Make sure that all profile data to be merged is generated "
581                "from the same binary.";
582         break;
583       default:
584         break;
585       }
586     }
587 
588     if (!Hint.empty())
589       errs() << Hint << "\n";
590   }
591 }
592 
593 namespace {
594 /// A remapper from original symbol names to new symbol names based on a file
595 /// containing a list of mappings from old name to new name.
596 class SymbolRemapper {
597   std::unique_ptr<MemoryBuffer> File;
598   DenseMap<StringRef, StringRef> RemappingTable;
599 
600 public:
601   /// Build a SymbolRemapper from a file containing a list of old/new symbols.
create(StringRef InputFile)602   static std::unique_ptr<SymbolRemapper> create(StringRef InputFile) {
603     auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
604     if (!BufOrError)
605       exitWithErrorCode(BufOrError.getError(), InputFile);
606 
607     auto Remapper = std::make_unique<SymbolRemapper>();
608     Remapper->File = std::move(BufOrError.get());
609 
610     for (line_iterator LineIt(*Remapper->File, /*SkipBlanks=*/true, '#');
611          !LineIt.is_at_eof(); ++LineIt) {
612       std::pair<StringRef, StringRef> Parts = LineIt->split(' ');
613       if (Parts.first.empty() || Parts.second.empty() ||
614           Parts.second.count(' ')) {
615         exitWithError("unexpected line in remapping file",
616                       (InputFile + ":" + Twine(LineIt.line_number())).str(),
617                       "expected 'old_symbol new_symbol'");
618       }
619       Remapper->RemappingTable.insert(Parts);
620     }
621     return Remapper;
622   }
623 
624   /// Attempt to map the given old symbol into a new symbol.
625   ///
626   /// \return The new symbol, or \p Name if no such symbol was found.
operator ()(StringRef Name)627   StringRef operator()(StringRef Name) {
628     StringRef New = RemappingTable.lookup(Name);
629     return New.empty() ? Name : New;
630   }
631 
operator ()(FunctionId Name)632   FunctionId operator()(FunctionId Name) {
633     // MD5 name cannot be remapped.
634     if (!Name.isStringRef())
635       return Name;
636     StringRef New = RemappingTable.lookup(Name.stringRef());
637     return New.empty() ? Name : FunctionId(New);
638   }
639 };
640 }
641 
642 struct WeightedFile {
643   std::string Filename;
644   uint64_t Weight;
645 };
646 typedef SmallVector<WeightedFile, 5> WeightedFileVector;
647 
648 /// Keep track of merged data and reported errors.
649 struct WriterContext {
650   std::mutex Lock;
651   InstrProfWriter Writer;
652   std::vector<std::pair<Error, std::string>> Errors;
653   std::mutex &ErrLock;
654   SmallSet<instrprof_error, 4> &WriterErrorCodes;
655 
WriterContextWriterContext656   WriterContext(bool IsSparse, std::mutex &ErrLock,
657                 SmallSet<instrprof_error, 4> &WriterErrorCodes,
658                 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
659       : Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
660                MemProfVersionRequested, MemProfFullSchema,
661                MemprofGenerateRandomHotness, MemprofGenerateRandomHotnessSeed),
662         ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
663 };
664 
665 /// Computer the overlap b/w profile BaseFilename and TestFileName,
666 /// and store the program level result to Overlap.
overlapInput(const std::string & BaseFilename,const std::string & TestFilename,WriterContext * WC,OverlapStats & Overlap,const OverlapFuncFilters & FuncFilter,raw_fd_ostream & OS,bool IsCS)667 static void overlapInput(const std::string &BaseFilename,
668                          const std::string &TestFilename, WriterContext *WC,
669                          OverlapStats &Overlap,
670                          const OverlapFuncFilters &FuncFilter,
671                          raw_fd_ostream &OS, bool IsCS) {
672   auto FS = vfs::getRealFileSystem();
673   auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS);
674   if (Error E = ReaderOrErr.takeError()) {
675     // Skip the empty profiles by returning sliently.
676     auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
677     if (ErrorCode != instrprof_error::empty_raw_profile)
678       WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
679                               TestFilename);
680     return;
681   }
682 
683   auto Reader = std::move(ReaderOrErr.get());
684   for (auto &I : *Reader) {
685     OverlapStats FuncOverlap(OverlapStats::FunctionLevel);
686     FuncOverlap.setFuncInfo(I.Name, I.Hash);
687 
688     WC->Writer.overlapRecord(std::move(I), Overlap, FuncOverlap, FuncFilter);
689     FuncOverlap.dump(OS);
690   }
691 }
692 
693 /// Load an input into a writer context.
694 static void
loadInput(const WeightedFile & Input,SymbolRemapper * Remapper,const InstrProfCorrelator * Correlator,const StringRef ProfiledBinary,WriterContext * WC,const object::BuildIDFetcher * BIDFetcher=nullptr,const ProfCorrelatorKind * BIDFetcherCorrelatorKind=nullptr)695 loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
696           const InstrProfCorrelator *Correlator, const StringRef ProfiledBinary,
697           WriterContext *WC, const object::BuildIDFetcher *BIDFetcher = nullptr,
698           const ProfCorrelatorKind *BIDFetcherCorrelatorKind = nullptr) {
699   std::unique_lock<std::mutex> CtxGuard{WC->Lock};
700 
701   // Copy the filename, because llvm::ThreadPool copied the input "const
702   // WeightedFile &" by value, making a reference to the filename within it
703   // invalid outside of this packaged task.
704   std::string Filename = Input.Filename;
705 
706   using ::llvm::memprof::RawMemProfReader;
707   if (RawMemProfReader::hasFormat(Input.Filename)) {
708     auto ReaderOrErr = RawMemProfReader::create(Input.Filename, ProfiledBinary);
709     if (!ReaderOrErr) {
710       exitWithError(ReaderOrErr.takeError(), Input.Filename);
711     }
712     std::unique_ptr<RawMemProfReader> Reader = std::move(ReaderOrErr.get());
713     // Check if the profile types can be merged, e.g. clang frontend profiles
714     // should not be merged with memprof profiles.
715     if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
716       consumeError(std::move(E));
717       WC->Errors.emplace_back(
718           make_error<StringError>(
719               "Cannot merge MemProf profile with Clang generated profile.",
720               std::error_code()),
721           Filename);
722       return;
723     }
724 
725     auto MemProfError = [&](Error E) {
726       auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
727       WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
728                               Filename);
729     };
730 
731     WC->Writer.addMemProfData(Reader->takeMemProfData(), MemProfError);
732     return;
733   }
734 
735   using ::llvm::memprof::YAMLMemProfReader;
736   if (YAMLMemProfReader::hasFormat(Input.Filename)) {
737     auto ReaderOrErr = YAMLMemProfReader::create(Input.Filename);
738     if (!ReaderOrErr)
739       exitWithError(ReaderOrErr.takeError(), Input.Filename);
740     std::unique_ptr<YAMLMemProfReader> Reader = std::move(ReaderOrErr.get());
741     // Check if the profile types can be merged, e.g. clang frontend profiles
742     // should not be merged with memprof profiles.
743     if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
744       consumeError(std::move(E));
745       WC->Errors.emplace_back(
746           make_error<StringError>(
747               "Cannot merge MemProf profile with incompatible profile.",
748               std::error_code()),
749           Filename);
750       return;
751     }
752 
753     auto MemProfError = [&](Error E) {
754       auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
755       WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
756                               Filename);
757     };
758 
759     auto MemProfData = Reader->takeMemProfData();
760 
761     auto DataAccessProfData = Reader->takeDataAccessProfData();
762 
763     // Check for the empty input in case the YAML file is invalid.
764     if (MemProfData.Records.empty()) {
765       WC->Errors.emplace_back(
766           make_error<StringError>("The profile is empty.", std::error_code()),
767           Filename);
768     }
769 
770     WC->Writer.addMemProfData(std::move(MemProfData), MemProfError);
771     WC->Writer.addDataAccessProfData(std::move(DataAccessProfData));
772     return;
773   }
774 
775   auto FS = vfs::getRealFileSystem();
776   // TODO: This only saves the first non-fatal error from InstrProfReader, and
777   // then added to WriterContext::Errors. However, this is not extensible, if
778   // we have more non-fatal errors from InstrProfReader in the future. How
779   // should this interact with different -failure-mode?
780   std::optional<std::pair<Error, std::string>> ReaderWarning;
781   auto Warn = [&](Error E) {
782     if (ReaderWarning) {
783       consumeError(std::move(E));
784       return;
785     }
786     // Only show the first time an error occurs in this file.
787     auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
788     ReaderWarning = {make_error<InstrProfError>(ErrCode, Msg), Filename};
789   };
790 
791   const ProfCorrelatorKind CorrelatorKind = BIDFetcherCorrelatorKind
792                                                 ? *BIDFetcherCorrelatorKind
793                                                 : ProfCorrelatorKind::NONE;
794   auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator,
795                                              BIDFetcher, CorrelatorKind, Warn);
796   if (Error E = ReaderOrErr.takeError()) {
797     // Skip the empty profiles by returning silently.
798     auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
799     if (ErrCode != instrprof_error::empty_raw_profile)
800       WC->Errors.emplace_back(make_error<InstrProfError>(ErrCode, Msg),
801                               Filename);
802     return;
803   }
804 
805   auto Reader = std::move(ReaderOrErr.get());
806   if (Error E = WC->Writer.mergeProfileKind(Reader->getProfileKind())) {
807     consumeError(std::move(E));
808     WC->Errors.emplace_back(
809         make_error<StringError>(
810             "Merge IR generated profile with Clang generated profile.",
811             std::error_code()),
812         Filename);
813     return;
814   }
815 
816   for (auto &I : *Reader) {
817     if (Remapper)
818       I.Name = (*Remapper)(I.Name);
819     const StringRef FuncName = I.Name;
820     bool Reported = false;
821     WC->Writer.addRecord(std::move(I), Input.Weight, [&](Error E) {
822       if (Reported) {
823         consumeError(std::move(E));
824         return;
825       }
826       Reported = true;
827       // Only show hint the first time an error occurs.
828       auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
829       std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
830       bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second;
831       handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg),
832                              Input.Filename, FuncName, firstTime);
833     });
834   }
835 
836   if (KeepVTableSymbols) {
837     const InstrProfSymtab &symtab = Reader->getSymtab();
838     const auto &VTableNames = symtab.getVTableNames();
839 
840     for (const auto &kv : VTableNames)
841       WC->Writer.addVTableName(kv.getKey());
842   }
843 
844   if (Reader->hasTemporalProfile()) {
845     auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
846     if (!Traces.empty())
847       WC->Writer.addTemporalProfileTraces(
848           Traces, Reader->getTemporalProfTraceStreamSize());
849   }
850   if (Reader->hasError()) {
851     if (Error E = Reader->getError()) {
852       WC->Errors.emplace_back(std::move(E), Filename);
853       return;
854     }
855   }
856 
857   std::vector<llvm::object::BuildID> BinaryIds;
858   if (Error E = Reader->readBinaryIds(BinaryIds)) {
859     WC->Errors.emplace_back(std::move(E), Filename);
860     return;
861   }
862   WC->Writer.addBinaryIds(BinaryIds);
863 
864   if (ReaderWarning) {
865     WC->Errors.emplace_back(std::move(ReaderWarning->first),
866                             ReaderWarning->second);
867   }
868 }
869 
870 /// Merge the \p Src writer context into \p Dst.
mergeWriterContexts(WriterContext * Dst,WriterContext * Src)871 static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
872   for (auto &ErrorPair : Src->Errors)
873     Dst->Errors.push_back(std::move(ErrorPair));
874   Src->Errors.clear();
875 
876   if (Error E = Dst->Writer.mergeProfileKind(Src->Writer.getProfileKind()))
877     exitWithError(std::move(E));
878 
879   Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) {
880     auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
881     std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
882     bool firstTime = Dst->WriterErrorCodes.insert(ErrorCode).second;
883     if (firstTime)
884       warn(toString(make_error<InstrProfError>(ErrorCode, Msg)));
885   });
886 }
887 
888 static StringRef
getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type & Val)889 getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
890   return Val.first();
891 }
892 
893 static std::string
getFuncName(const SampleProfileMap::value_type & Val)894 getFuncName(const SampleProfileMap::value_type &Val) {
895   return Val.second.getContext().toString();
896 }
897 
898 template <typename T>
filterFunctions(T & ProfileMap)899 static void filterFunctions(T &ProfileMap) {
900   bool hasFilter = !FuncNameFilter.empty();
901   bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
902   if (!hasFilter && !hasNegativeFilter)
903     return;
904 
905   // If filter starts with '?' it is MSVC mangled name, not a regex.
906   llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
907   if (hasFilter && FuncNameFilter[0] == '?' &&
908       ProbablyMSVCMangledName.match(FuncNameFilter))
909     FuncNameFilter = llvm::Regex::escape(FuncNameFilter);
910   if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' &&
911       ProbablyMSVCMangledName.match(FuncNameNegativeFilter))
912     FuncNameNegativeFilter = llvm::Regex::escape(FuncNameNegativeFilter);
913 
914   size_t Count = ProfileMap.size();
915   llvm::Regex Pattern(FuncNameFilter);
916   llvm::Regex NegativePattern(FuncNameNegativeFilter);
917   std::string Error;
918   if (hasFilter && !Pattern.isValid(Error))
919     exitWithError(Error);
920   if (hasNegativeFilter && !NegativePattern.isValid(Error))
921     exitWithError(Error);
922 
923   // Handle MD5 profile, so it is still able to match using the original name.
924   std::string MD5Name = std::to_string(llvm::MD5Hash(FuncNameFilter));
925   std::string NegativeMD5Name =
926       std::to_string(llvm::MD5Hash(FuncNameNegativeFilter));
927 
928   for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
929     auto Tmp = I++;
930     const auto &FuncName = getFuncName(*Tmp);
931     // Negative filter has higher precedence than positive filter.
932     if ((hasNegativeFilter &&
933          (NegativePattern.match(FuncName) ||
934           (FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) ||
935         (hasFilter && !(Pattern.match(FuncName) ||
936                         (FunctionSamples::UseMD5 && MD5Name == FuncName))))
937       ProfileMap.erase(Tmp);
938   }
939 
940   llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
941                << "in the original profile are filtered.\n";
942 }
943 
writeInstrProfile(StringRef OutputFilename,ProfileFormat OutputFormat,InstrProfWriter & Writer)944 static void writeInstrProfile(StringRef OutputFilename,
945                               ProfileFormat OutputFormat,
946                               InstrProfWriter &Writer) {
947   std::error_code EC;
948   raw_fd_ostream Output(OutputFilename.data(), EC,
949                         OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF
950                                                 : sys::fs::OF_None);
951   if (EC)
952     exitWithErrorCode(EC, OutputFilename);
953 
954   if (OutputFormat == PF_Text) {
955     if (Error E = Writer.writeText(Output))
956       warn(std::move(E));
957   } else {
958     if (Output.is_displayed())
959       exitWithError("cannot write a non-text format profile to the terminal");
960     if (Error E = Writer.write(Output))
961       warn(std::move(E));
962   }
963 }
964 
mergeInstrProfile(const WeightedFileVector & Inputs,SymbolRemapper * Remapper,int MaxDbgCorrelationWarnings,const StringRef ProfiledBinary)965 static void mergeInstrProfile(const WeightedFileVector &Inputs,
966                               SymbolRemapper *Remapper,
967                               int MaxDbgCorrelationWarnings,
968                               const StringRef ProfiledBinary) {
969   const uint64_t TraceReservoirSize = TemporalProfTraceReservoirSize.getValue();
970   const uint64_t MaxTraceLength = TemporalProfMaxTraceLength.getValue();
971   if (OutputFormat == PF_Compact_Binary)
972     exitWithError("Compact Binary is deprecated");
973   if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
974       OutputFormat != PF_Text)
975     exitWithError("unknown format is specified");
976 
977   // TODO: Maybe we should support correlation with mixture of different
978   // correlation modes(w/wo debug-info/object correlation).
979   if (DebugInfoFilename.empty()) {
980     if (!BinaryFilename.empty() && (DebugInfod || !DebugFileDirectory.empty()))
981       exitWithError("Expected only one of -binary-file, -debuginfod or "
982                     "-debug-file-directory");
983   } else if (!BinaryFilename.empty() || DebugInfod ||
984              !DebugFileDirectory.empty()) {
985     exitWithError("Expected only one of -debug-info, -binary-file, -debuginfod "
986                   "or -debug-file-directory");
987   }
988   std::string CorrelateFilename;
989   ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
990   if (!DebugInfoFilename.empty()) {
991     CorrelateFilename = DebugInfoFilename;
992     CorrelateKind = ProfCorrelatorKind::DEBUG_INFO;
993   } else if (!BinaryFilename.empty()) {
994     CorrelateFilename = BinaryFilename;
995     CorrelateKind = ProfCorrelatorKind::BINARY;
996   }
997 
998   std::unique_ptr<InstrProfCorrelator> Correlator;
999   if (CorrelateKind != InstrProfCorrelator::NONE) {
1000     if (auto Err = InstrProfCorrelator::get(CorrelateFilename, CorrelateKind)
1001                        .moveInto(Correlator))
1002       exitWithError(std::move(Err), CorrelateFilename);
1003     if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
1004       exitWithError(std::move(Err), CorrelateFilename);
1005   }
1006 
1007   ProfCorrelatorKind BIDFetcherCorrelateKind = ProfCorrelatorKind::NONE;
1008   std::unique_ptr<object::BuildIDFetcher> BIDFetcher;
1009   if (DebugInfod) {
1010     llvm::HTTPClient::initialize();
1011     BIDFetcher = std::make_unique<DebuginfodFetcher>(DebugFileDirectory);
1012     if (!BIDFetcherProfileCorrelate)
1013       exitWithError("Expected --correlate when --debuginfod is provided");
1014     BIDFetcherCorrelateKind = BIDFetcherProfileCorrelate;
1015   } else if (!DebugFileDirectory.empty()) {
1016     BIDFetcher = std::make_unique<object::BuildIDFetcher>(DebugFileDirectory);
1017     if (!BIDFetcherProfileCorrelate)
1018       exitWithError("Expected --correlate when --debug-file-directory "
1019                     "is provided");
1020     BIDFetcherCorrelateKind = BIDFetcherProfileCorrelate;
1021   } else if (BIDFetcherProfileCorrelate) {
1022     exitWithError("Expected --debuginfod or --debug-file-directory when "
1023                   "--correlate is provided");
1024   }
1025 
1026   std::mutex ErrorLock;
1027   SmallSet<instrprof_error, 4> WriterErrorCodes;
1028 
1029   // If NumThreads is not specified, auto-detect a good default.
1030   if (NumThreads == 0)
1031     NumThreads = std::min(hardware_concurrency().compute_thread_count(),
1032                           unsigned((Inputs.size() + 1) / 2));
1033 
1034   // Initialize the writer contexts.
1035   SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
1036   for (unsigned I = 0; I < NumThreads; ++I)
1037     Contexts.emplace_back(std::make_unique<WriterContext>(
1038         OutputSparse, ErrorLock, WriterErrorCodes, TraceReservoirSize,
1039         MaxTraceLength));
1040 
1041   if (NumThreads == 1) {
1042     for (const auto &Input : Inputs)
1043       loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
1044                 Contexts[0].get(), BIDFetcher.get(), &BIDFetcherCorrelateKind);
1045   } else {
1046     DefaultThreadPool Pool(hardware_concurrency(NumThreads));
1047 
1048     // Load the inputs in parallel (N/NumThreads serial steps).
1049     unsigned Ctx = 0;
1050     for (const auto &Input : Inputs) {
1051       Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
1052                  Contexts[Ctx].get(), BIDFetcher.get(),
1053                  &BIDFetcherCorrelateKind);
1054       Ctx = (Ctx + 1) % NumThreads;
1055     }
1056     Pool.wait();
1057 
1058     // Merge the writer contexts together (~ lg(NumThreads) serial steps).
1059     unsigned Mid = Contexts.size() / 2;
1060     unsigned End = Contexts.size();
1061     assert(Mid > 0 && "Expected more than one context");
1062     do {
1063       for (unsigned I = 0; I < Mid; ++I)
1064         Pool.async(mergeWriterContexts, Contexts[I].get(),
1065                    Contexts[I + Mid].get());
1066       Pool.wait();
1067       if (End & 1) {
1068         Pool.async(mergeWriterContexts, Contexts[0].get(),
1069                    Contexts[End - 1].get());
1070         Pool.wait();
1071       }
1072       End = Mid;
1073       Mid /= 2;
1074     } while (Mid > 0);
1075   }
1076 
1077   // Handle deferred errors encountered during merging. If the number of errors
1078   // is equal to the number of inputs the merge failed.
1079   unsigned NumErrors = 0;
1080   for (std::unique_ptr<WriterContext> &WC : Contexts) {
1081     for (auto &ErrorPair : WC->Errors) {
1082       ++NumErrors;
1083       warn(toString(std::move(ErrorPair.first)), ErrorPair.second);
1084     }
1085   }
1086   if ((NumErrors == Inputs.size() && FailMode == failIfAllAreInvalid) ||
1087       (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
1088     exitWithError("no profile can be merged");
1089 
1090   filterFunctions(Contexts[0]->Writer.getProfileData());
1091 
1092   writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
1093 }
1094 
1095 /// The profile entry for a function in instrumentation profile.
1096 struct InstrProfileEntry {
1097   uint64_t MaxCount = 0;
1098   uint64_t NumEdgeCounters = 0;
1099   float ZeroCounterRatio = 0.0;
1100   InstrProfRecord *ProfRecord;
1101   InstrProfileEntry(InstrProfRecord *Record);
1102   InstrProfileEntry() = default;
1103 };
1104 
InstrProfileEntry(InstrProfRecord * Record)1105 InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
1106   ProfRecord = Record;
1107   uint64_t CntNum = Record->Counts.size();
1108   uint64_t ZeroCntNum = 0;
1109   for (size_t I = 0; I < CntNum; ++I) {
1110     MaxCount = std::max(MaxCount, Record->Counts[I]);
1111     ZeroCntNum += !Record->Counts[I];
1112   }
1113   ZeroCounterRatio = (float)ZeroCntNum / CntNum;
1114   NumEdgeCounters = CntNum;
1115 }
1116 
1117 /// Either set all the counters in the instr profile entry \p IFE to
1118 /// -1 / -2 /in order to drop the profile or scale up the
1119 /// counters in \p IFP to be above hot / cold threshold. We use
1120 /// the ratio of zero counters in the profile of a function to
1121 /// decide the profile is helpful or harmful for performance,
1122 /// and to choose whether to scale up or drop it.
updateInstrProfileEntry(InstrProfileEntry & IFE,bool SetToHot,uint64_t HotInstrThreshold,uint64_t ColdInstrThreshold,float ZeroCounterThreshold)1123 static void updateInstrProfileEntry(InstrProfileEntry &IFE, bool SetToHot,
1124                                     uint64_t HotInstrThreshold,
1125                                     uint64_t ColdInstrThreshold,
1126                                     float ZeroCounterThreshold) {
1127   InstrProfRecord *ProfRecord = IFE.ProfRecord;
1128   if (!IFE.MaxCount || IFE.ZeroCounterRatio > ZeroCounterThreshold) {
1129     // If all or most of the counters of the function are zero, the
1130     // profile is unaccountable and should be dropped. Reset all the
1131     // counters to be -1 / -2 and PGO profile-use will drop the profile.
1132     // All counters being -1 also implies that the function is hot so
1133     // PGO profile-use will also set the entry count metadata to be
1134     // above hot threshold.
1135     // All counters being -2 implies that the function is warm so
1136     // PGO profile-use will also set the entry count metadata to be
1137     // above cold threshold.
1138     auto Kind =
1139         (SetToHot ? InstrProfRecord::PseudoHot : InstrProfRecord::PseudoWarm);
1140     ProfRecord->setPseudoCount(Kind);
1141     return;
1142   }
1143 
1144   // Scale up the MaxCount to be multiple times above hot / cold threshold.
1145   const unsigned MultiplyFactor = 3;
1146   uint64_t Threshold = (SetToHot ? HotInstrThreshold : ColdInstrThreshold);
1147   uint64_t Numerator = Threshold * MultiplyFactor;
1148 
1149   // Make sure Threshold for warm counters is below the HotInstrThreshold.
1150   if (!SetToHot && Threshold >= HotInstrThreshold) {
1151     Threshold = (HotInstrThreshold + ColdInstrThreshold) / 2;
1152   }
1153 
1154   uint64_t Denominator = IFE.MaxCount;
1155   if (Numerator <= Denominator)
1156     return;
1157   ProfRecord->scale(Numerator, Denominator, [&](instrprof_error E) {
1158     warn(toString(make_error<InstrProfError>(E)));
1159   });
1160 }
1161 
1162 const uint64_t ColdPercentileIdx = 15;
1163 const uint64_t HotPercentileIdx = 11;
1164 
1165 using sampleprof::FSDiscriminatorPass;
1166 
1167 // Internal options to set FSDiscriminatorPass. Used in merge and show
1168 // commands.
1169 static cl::opt<FSDiscriminatorPass> FSDiscriminatorPassOption(
1170     "fs-discriminator-pass", cl::init(PassLast), cl::Hidden,
1171     cl::desc("Zero out the discriminator bits for the FS discrimiantor "
1172              "pass beyond this value. The enum values are defined in "
1173              "Support/Discriminator.h"),
1174     cl::values(clEnumVal(Base, "Use base discriminators only"),
1175                clEnumVal(Pass1, "Use base and pass 1 discriminators"),
1176                clEnumVal(Pass2, "Use base and pass 1-2 discriminators"),
1177                clEnumVal(Pass3, "Use base and pass 1-3 discriminators"),
1178                clEnumVal(PassLast, "Use all discriminator bits (default)")));
1179 
getDiscriminatorMask()1180 static unsigned getDiscriminatorMask() {
1181   return getN1Bits(getFSPassBitEnd(FSDiscriminatorPassOption.getValue()));
1182 }
1183 
1184 /// Adjust the instr profile in \p WC based on the sample profile in
1185 /// \p Reader.
1186 static void
adjustInstrProfile(std::unique_ptr<WriterContext> & WC,std::unique_ptr<sampleprof::SampleProfileReader> & Reader,unsigned SupplMinSizeThreshold,float ZeroCounterThreshold,unsigned InstrProfColdThreshold)1187 adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
1188                    std::unique_ptr<sampleprof::SampleProfileReader> &Reader,
1189                    unsigned SupplMinSizeThreshold, float ZeroCounterThreshold,
1190                    unsigned InstrProfColdThreshold) {
1191   // Function to its entry in instr profile.
1192   StringMap<InstrProfileEntry> InstrProfileMap;
1193   StringMap<StringRef> StaticFuncMap;
1194   InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
1195 
1196   auto checkSampleProfileHasFUnique = [&Reader]() {
1197     for (const auto &PD : Reader->getProfiles()) {
1198       auto &FContext = PD.second.getContext();
1199       if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
1200           std::string::npos) {
1201         return true;
1202       }
1203     }
1204     return false;
1205   };
1206 
1207   bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
1208 
1209   auto buildStaticFuncMap = [&StaticFuncMap,
1210                              SampleProfileHasFUnique](const StringRef Name) {
1211     std::string FilePrefixes[] = {".cpp", "cc", ".c", ".hpp", ".h"};
1212     size_t PrefixPos = StringRef::npos;
1213     for (auto &FilePrefix : FilePrefixes) {
1214       std::string NamePrefix = FilePrefix + GlobalIdentifierDelimiter;
1215       PrefixPos = Name.find_insensitive(NamePrefix);
1216       if (PrefixPos == StringRef::npos)
1217         continue;
1218       PrefixPos += NamePrefix.size();
1219       break;
1220     }
1221 
1222     if (PrefixPos == StringRef::npos) {
1223       return;
1224     }
1225 
1226     StringRef NewName = Name.drop_front(PrefixPos);
1227     StringRef FName = Name.substr(0, PrefixPos - 1);
1228     if (NewName.size() == 0) {
1229       return;
1230     }
1231 
1232     // This name should have a static linkage.
1233     size_t PostfixPos = NewName.find(FunctionSamples::UniqSuffix);
1234     bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
1235 
1236     // If sample profile and instrumented profile do not agree on symbol
1237     // uniqification.
1238     if (SampleProfileHasFUnique != ProfileHasFUnique) {
1239       // If instrumented profile uses -funique-internal-linkage-symbols,
1240       // we need to trim the name.
1241       if (ProfileHasFUnique) {
1242         NewName = NewName.substr(0, PostfixPos);
1243       } else {
1244         // If sample profile uses -funique-internal-linkage-symbols,
1245         // we build the map.
1246         std::string NStr =
1247             NewName.str() + getUniqueInternalLinkagePostfix(FName);
1248         NewName = StringRef(NStr);
1249         StaticFuncMap[NewName] = Name;
1250         return;
1251       }
1252     }
1253 
1254     auto [It, Inserted] = StaticFuncMap.try_emplace(NewName, Name);
1255     if (!Inserted)
1256       It->second = DuplicateNameStr;
1257   };
1258 
1259   // We need to flatten the SampleFDO profile as the InstrFDO
1260   // profile does not have inlined callsite profiles.
1261   // One caveat is the pre-inlined function -- their samples
1262   // should be collapsed into the caller function.
1263   // Here we do a DFS traversal to get the flatten profile
1264   // info: the sum of entrycount and the max of maxcount.
1265   // Here is the algorithm:
1266   //   recursive (FS, root_name) {
1267   //      name = FS->getName();
1268   //      get samples for FS;
1269   //      if (InstrProf.find(name) {
1270   //        root_name = name;
1271   //      } else {
1272   //        if (name is in static_func map) {
1273   //          root_name = static_name;
1274   //        }
1275   //      }
1276   //      update the Map entry for root_name;
1277   //      for (subfs: FS) {
1278   //        recursive(subfs, root_name);
1279   //      }
1280   //   }
1281   //
1282   // Here is an example.
1283   //
1284   // SampleProfile:
1285   // foo:12345:1000
1286   // 1: 1000
1287   // 2.1: 1000
1288   // 15: 5000
1289   // 4: bar:1000
1290   //  1: 1000
1291   //  2: goo:3000
1292   //   1: 3000
1293   // 8: bar:40000
1294   //  1: 10000
1295   //  2: goo:30000
1296   //   1: 30000
1297   //
1298   // InstrProfile has two entries:
1299   //  foo
1300   //  bar.cc;bar
1301   //
1302   // After BuildMaxSampleMap, we should have the following in FlattenSampleMap:
1303   // {"foo", {1000, 5000}}
1304   // {"bar.cc;bar", {11000, 30000}}
1305   //
1306   // foo's has an entry count of 1000, and max body count of 5000.
1307   // bar.cc;bar has an entry count of 11000 (sum two callsites of 1000 and
1308   // 10000), and max count of 30000 (from the callsite in line 8).
1309   //
1310   // Note that goo's count will remain in bar.cc;bar() as it does not have an
1311   // entry in InstrProfile.
1312   llvm::StringMap<std::pair<uint64_t, uint64_t>> FlattenSampleMap;
1313   auto BuildMaxSampleMap = [&FlattenSampleMap, &StaticFuncMap,
1314                             &InstrProfileMap](const FunctionSamples &FS,
1315                                               const StringRef &RootName) {
1316     auto BuildMaxSampleMapImpl = [&](const FunctionSamples &FS,
1317                                      const StringRef &RootName,
1318                                      auto &BuildImpl) -> void {
1319       std::string NameStr = FS.getFunction().str();
1320       const StringRef Name = NameStr;
1321       const StringRef *NewRootName = &RootName;
1322       uint64_t EntrySample = FS.getHeadSamplesEstimate();
1323       uint64_t MaxBodySample = FS.getMaxCountInside(/* SkipCallSite*/ true);
1324 
1325       auto It = InstrProfileMap.find(Name);
1326       if (It != InstrProfileMap.end()) {
1327         NewRootName = &Name;
1328       } else {
1329         auto NewName = StaticFuncMap.find(Name);
1330         if (NewName != StaticFuncMap.end()) {
1331           It = InstrProfileMap.find(NewName->second);
1332           if (NewName->second != DuplicateNameStr) {
1333             NewRootName = &NewName->second;
1334           }
1335         } else {
1336           // Here the EntrySample is of an inlined function, so we should not
1337           // update the EntrySample in the map.
1338           EntrySample = 0;
1339         }
1340       }
1341       EntrySample += FlattenSampleMap[*NewRootName].first;
1342       MaxBodySample =
1343           std::max(FlattenSampleMap[*NewRootName].second, MaxBodySample);
1344       FlattenSampleMap[*NewRootName] =
1345           std::make_pair(EntrySample, MaxBodySample);
1346 
1347       for (const auto &C : FS.getCallsiteSamples())
1348         for (const auto &F : C.second)
1349           BuildImpl(F.second, *NewRootName, BuildImpl);
1350     };
1351     BuildMaxSampleMapImpl(FS, RootName, BuildMaxSampleMapImpl);
1352   };
1353 
1354   for (auto &PD : WC->Writer.getProfileData()) {
1355     // Populate IPBuilder.
1356     for (const auto &PDV : PD.getValue()) {
1357       InstrProfRecord Record = PDV.second;
1358       IPBuilder.addRecord(Record);
1359     }
1360 
1361     // If a function has multiple entries in instr profile, skip it.
1362     if (PD.getValue().size() != 1)
1363       continue;
1364 
1365     // Initialize InstrProfileMap.
1366     InstrProfRecord *R = &PD.getValue().begin()->second;
1367     StringRef FullName = PD.getKey();
1368     InstrProfileMap[FullName] = InstrProfileEntry(R);
1369     buildStaticFuncMap(FullName);
1370   }
1371 
1372   for (auto &PD : Reader->getProfiles()) {
1373     sampleprof::FunctionSamples &FS = PD.second;
1374     std::string Name = FS.getFunction().str();
1375     BuildMaxSampleMap(FS, Name);
1376   }
1377 
1378   ProfileSummary InstrPS = *IPBuilder.getSummary();
1379   ProfileSummary SamplePS = Reader->getSummary();
1380 
1381   // Compute cold thresholds for instr profile and sample profile.
1382   uint64_t HotSampleThreshold =
1383       ProfileSummaryBuilder::getEntryForPercentile(
1384           SamplePS.getDetailedSummary(),
1385           ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1386           .MinCount;
1387   uint64_t ColdSampleThreshold =
1388       ProfileSummaryBuilder::getEntryForPercentile(
1389           SamplePS.getDetailedSummary(),
1390           ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1391           .MinCount;
1392   uint64_t HotInstrThreshold =
1393       ProfileSummaryBuilder::getEntryForPercentile(
1394           InstrPS.getDetailedSummary(),
1395           ProfileSummaryBuilder::DefaultCutoffs[HotPercentileIdx])
1396           .MinCount;
1397   uint64_t ColdInstrThreshold =
1398       InstrProfColdThreshold
1399           ? InstrProfColdThreshold
1400           : ProfileSummaryBuilder::getEntryForPercentile(
1401                 InstrPS.getDetailedSummary(),
1402                 ProfileSummaryBuilder::DefaultCutoffs[ColdPercentileIdx])
1403                 .MinCount;
1404 
1405   // Find hot/warm functions in sample profile which is cold in instr profile
1406   // and adjust the profiles of those functions in the instr profile.
1407   for (const auto &E : FlattenSampleMap) {
1408     uint64_t SampleMaxCount = std::max(E.second.first, E.second.second);
1409     if (SampleMaxCount < ColdSampleThreshold)
1410       continue;
1411     StringRef Name = E.first();
1412     auto It = InstrProfileMap.find(Name);
1413     if (It == InstrProfileMap.end()) {
1414       auto NewName = StaticFuncMap.find(Name);
1415       if (NewName != StaticFuncMap.end()) {
1416         It = InstrProfileMap.find(NewName->second);
1417         if (NewName->second == DuplicateNameStr) {
1418           WithColor::warning()
1419               << "Static function " << Name
1420               << " has multiple promoted names, cannot adjust profile.\n";
1421         }
1422       }
1423     }
1424     if (It == InstrProfileMap.end() ||
1425         It->second.MaxCount > ColdInstrThreshold ||
1426         It->second.NumEdgeCounters < SupplMinSizeThreshold)
1427       continue;
1428     bool SetToHot = SampleMaxCount >= HotSampleThreshold;
1429     updateInstrProfileEntry(It->second, SetToHot, HotInstrThreshold,
1430                             ColdInstrThreshold, ZeroCounterThreshold);
1431   }
1432 }
1433 
1434 /// The main function to supplement instr profile with sample profile.
1435 /// \Inputs contains the instr profile. \p SampleFilename specifies the
1436 /// sample profile. \p OutputFilename specifies the output profile name.
1437 /// \p OutputFormat specifies the output profile format. \p OutputSparse
1438 /// specifies whether to generate sparse profile. \p SupplMinSizeThreshold
1439 /// specifies the minimal size for the functions whose profile will be
1440 /// adjusted. \p ZeroCounterThreshold is the threshold to check whether
1441 /// a function contains too many zero counters and whether its profile
1442 /// should be dropped. \p InstrProfColdThreshold is the user specified
1443 /// cold threshold which will override the cold threshold got from the
1444 /// instr profile summary.
supplementInstrProfile(const WeightedFileVector & Inputs,StringRef SampleFilename,bool OutputSparse,unsigned SupplMinSizeThreshold,float ZeroCounterThreshold,unsigned InstrProfColdThreshold)1445 static void supplementInstrProfile(const WeightedFileVector &Inputs,
1446                                    StringRef SampleFilename, bool OutputSparse,
1447                                    unsigned SupplMinSizeThreshold,
1448                                    float ZeroCounterThreshold,
1449                                    unsigned InstrProfColdThreshold) {
1450   if (OutputFilename == "-")
1451     exitWithError("cannot write indexed profdata format to stdout");
1452   if (Inputs.size() != 1)
1453     exitWithError("expect one input to be an instr profile");
1454   if (Inputs[0].Weight != 1)
1455     exitWithError("expect instr profile doesn't have weight");
1456 
1457   StringRef InstrFilename = Inputs[0].Filename;
1458 
1459   // Read sample profile.
1460   LLVMContext Context;
1461   auto FS = vfs::getRealFileSystem();
1462   auto ReaderOrErr = sampleprof::SampleProfileReader::create(
1463       SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption);
1464   if (std::error_code EC = ReaderOrErr.getError())
1465     exitWithErrorCode(EC, SampleFilename);
1466   auto Reader = std::move(ReaderOrErr.get());
1467   if (std::error_code EC = Reader->read())
1468     exitWithErrorCode(EC, SampleFilename);
1469 
1470   // Read instr profile.
1471   std::mutex ErrorLock;
1472   SmallSet<instrprof_error, 4> WriterErrorCodes;
1473   auto WC = std::make_unique<WriterContext>(OutputSparse, ErrorLock,
1474                                             WriterErrorCodes);
1475   loadInput(Inputs[0], nullptr, nullptr, /*ProfiledBinary=*/"", WC.get());
1476   if (WC->Errors.size() > 0)
1477     exitWithError(std::move(WC->Errors[0].first), InstrFilename);
1478 
1479   adjustInstrProfile(WC, Reader, SupplMinSizeThreshold, ZeroCounterThreshold,
1480                      InstrProfColdThreshold);
1481   writeInstrProfile(OutputFilename, OutputFormat, WC->Writer);
1482 }
1483 
1484 /// Make a copy of the given function samples with all symbol names remapped
1485 /// by the provided symbol remapper.
1486 static sampleprof::FunctionSamples
remapSamples(const sampleprof::FunctionSamples & Samples,SymbolRemapper & Remapper,sampleprof_error & Error)1487 remapSamples(const sampleprof::FunctionSamples &Samples,
1488              SymbolRemapper &Remapper, sampleprof_error &Error) {
1489   sampleprof::FunctionSamples Result;
1490   Result.setFunction(Remapper(Samples.getFunction()));
1491   Result.addTotalSamples(Samples.getTotalSamples());
1492   Result.addHeadSamples(Samples.getHeadSamples());
1493   for (const auto &BodySample : Samples.getBodySamples()) {
1494     uint32_t MaskedDiscriminator =
1495         BodySample.first.Discriminator & getDiscriminatorMask();
1496     Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator,
1497                           BodySample.second.getSamples());
1498     for (const auto &Target : BodySample.second.getCallTargets()) {
1499       Result.addCalledTargetSamples(BodySample.first.LineOffset,
1500                                     MaskedDiscriminator,
1501                                     Remapper(Target.first), Target.second);
1502     }
1503   }
1504   for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
1505     sampleprof::FunctionSamplesMap &Target =
1506         Result.functionSamplesAt(CallsiteSamples.first);
1507     for (const auto &Callsite : CallsiteSamples.second) {
1508       sampleprof::FunctionSamples Remapped =
1509           remapSamples(Callsite.second, Remapper, Error);
1510       mergeSampleProfErrors(Error,
1511                             Target[Remapped.getFunction()].merge(Remapped));
1512     }
1513   }
1514   return Result;
1515 }
1516 
1517 static sampleprof::SampleProfileFormat FormatMap[] = {
1518     sampleprof::SPF_None,
1519     sampleprof::SPF_Text,
1520     sampleprof::SPF_None,
1521     sampleprof::SPF_Ext_Binary,
1522     sampleprof::SPF_GCC,
1523     sampleprof::SPF_Binary};
1524 
1525 static std::unique_ptr<MemoryBuffer>
getInputFileBuf(const StringRef & InputFile)1526 getInputFileBuf(const StringRef &InputFile) {
1527   if (InputFile == "")
1528     return {};
1529 
1530   auto BufOrError = MemoryBuffer::getFileOrSTDIN(InputFile);
1531   if (!BufOrError)
1532     exitWithErrorCode(BufOrError.getError(), InputFile);
1533 
1534   return std::move(*BufOrError);
1535 }
1536 
populateProfileSymbolList(MemoryBuffer * Buffer,sampleprof::ProfileSymbolList & PSL)1537 static void populateProfileSymbolList(MemoryBuffer *Buffer,
1538                                       sampleprof::ProfileSymbolList &PSL) {
1539   if (!Buffer)
1540     return;
1541 
1542   SmallVector<StringRef, 32> SymbolVec;
1543   StringRef Data = Buffer->getBuffer();
1544   Data.split(SymbolVec, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1545 
1546   for (StringRef SymbolStr : SymbolVec)
1547     PSL.add(SymbolStr.trim());
1548 }
1549 
handleExtBinaryWriter(sampleprof::SampleProfileWriter & Writer,ProfileFormat OutputFormat,MemoryBuffer * Buffer,sampleprof::ProfileSymbolList & WriterList,bool CompressAllSections,bool UseMD5,bool GenPartialProfile)1550 static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
1551                                   ProfileFormat OutputFormat,
1552                                   MemoryBuffer *Buffer,
1553                                   sampleprof::ProfileSymbolList &WriterList,
1554                                   bool CompressAllSections, bool UseMD5,
1555                                   bool GenPartialProfile) {
1556   if (SplitLayout) {
1557     if (OutputFormat == PF_Binary)
1558       warn("-split-layout is ignored. Specify -extbinary to enable it");
1559     else
1560       Writer.setUseCtxSplitLayout();
1561   }
1562 
1563   populateProfileSymbolList(Buffer, WriterList);
1564   if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
1565     warn("Profile Symbol list is not empty but the output format is not "
1566          "ExtBinary format. The list will be lost in the output. ");
1567 
1568   Writer.setProfileSymbolList(&WriterList);
1569 
1570   if (CompressAllSections) {
1571     if (OutputFormat != PF_Ext_Binary)
1572       warn("-compress-all-section is ignored. Specify -extbinary to enable it");
1573     else
1574       Writer.setToCompressAllSections();
1575   }
1576   if (UseMD5) {
1577     if (OutputFormat != PF_Ext_Binary)
1578       warn("-use-md5 is ignored. Specify -extbinary to enable it");
1579     else
1580       Writer.setUseMD5();
1581   }
1582   if (GenPartialProfile) {
1583     if (OutputFormat != PF_Ext_Binary)
1584       warn("-gen-partial-profile is ignored. Specify -extbinary to enable it");
1585     else
1586       Writer.setPartialProfile();
1587   }
1588 }
1589 
mergeSampleProfile(const WeightedFileVector & Inputs,SymbolRemapper * Remapper,StringRef ProfileSymbolListFile,size_t OutputSizeLimit)1590 static void mergeSampleProfile(const WeightedFileVector &Inputs,
1591                                SymbolRemapper *Remapper,
1592                                StringRef ProfileSymbolListFile,
1593                                size_t OutputSizeLimit) {
1594   using namespace sampleprof;
1595   SampleProfileMap ProfileMap;
1596   SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
1597   LLVMContext Context;
1598   sampleprof::ProfileSymbolList WriterList;
1599   std::optional<bool> ProfileIsProbeBased;
1600   std::optional<bool> ProfileIsCS;
1601   for (const auto &Input : Inputs) {
1602     auto FS = vfs::getRealFileSystem();
1603     auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS,
1604                                                    FSDiscriminatorPassOption);
1605     if (std::error_code EC = ReaderOrErr.getError()) {
1606       warnOrExitGivenError(FailMode, EC, Input.Filename);
1607       continue;
1608     }
1609 
1610     // We need to keep the readers around until after all the files are
1611     // read so that we do not lose the function names stored in each
1612     // reader's memory. The function names are needed to write out the
1613     // merged profile map.
1614     Readers.push_back(std::move(ReaderOrErr.get()));
1615     const auto Reader = Readers.back().get();
1616     if (std::error_code EC = Reader->read()) {
1617       warnOrExitGivenError(FailMode, EC, Input.Filename);
1618       Readers.pop_back();
1619       continue;
1620     }
1621 
1622     SampleProfileMap &Profiles = Reader->getProfiles();
1623     if (ProfileIsProbeBased &&
1624         ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
1625       exitWithError(
1626           "cannot merge probe-based profile with non-probe-based profile");
1627     ProfileIsProbeBased = FunctionSamples::ProfileIsProbeBased;
1628     if (ProfileIsCS && ProfileIsCS != FunctionSamples::ProfileIsCS)
1629       exitWithError("cannot merge CS profile with non-CS profile");
1630     ProfileIsCS = FunctionSamples::ProfileIsCS;
1631     for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
1632          I != E; ++I) {
1633       sampleprof_error Result = sampleprof_error::success;
1634       FunctionSamples Remapped =
1635           Remapper ? remapSamples(I->second, *Remapper, Result)
1636                    : FunctionSamples();
1637       FunctionSamples &Samples = Remapper ? Remapped : I->second;
1638       SampleContext FContext = Samples.getContext();
1639       mergeSampleProfErrors(Result,
1640                             ProfileMap[FContext].merge(Samples, Input.Weight));
1641       if (Result != sampleprof_error::success) {
1642         std::error_code EC = make_error_code(Result);
1643         handleMergeWriterError(errorCodeToError(EC), Input.Filename,
1644                                FContext.toString());
1645       }
1646     }
1647 
1648     if (!DropProfileSymbolList) {
1649       std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
1650           Reader->getProfileSymbolList();
1651       if (ReaderList)
1652         WriterList.merge(*ReaderList);
1653     }
1654   }
1655 
1656   if (ProfileIsCS && (SampleMergeColdContext || SampleTrimColdContext)) {
1657     // Use threshold calculated from profile summary unless specified.
1658     SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1659     auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
1660     uint64_t SampleProfColdThreshold =
1661         ProfileSummaryBuilder::getColdCountThreshold(
1662             (Summary->getDetailedSummary()));
1663 
1664     // Trim and merge cold context profile using cold threshold above;
1665     SampleContextTrimmer(ProfileMap)
1666         .trimAndMergeColdContextProfiles(
1667             SampleProfColdThreshold, SampleTrimColdContext,
1668             SampleMergeColdContext, SampleColdContextFrameDepth, false);
1669   }
1670 
1671   if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
1672     ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS);
1673     ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1674   } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
1675     ProfileConverter CSConverter(ProfileMap);
1676     CSConverter.convertCSProfiles();
1677     ProfileIsCS = FunctionSamples::ProfileIsCS = false;
1678   }
1679 
1680   filterFunctions(ProfileMap);
1681 
1682   auto WriterOrErr =
1683       SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
1684   if (std::error_code EC = WriterOrErr.getError())
1685     exitWithErrorCode(EC, OutputFilename);
1686 
1687   auto Writer = std::move(WriterOrErr.get());
1688   // WriterList will have StringRef refering to string in Buffer.
1689   // Make sure Buffer lives as long as WriterList.
1690   auto Buffer = getInputFileBuf(ProfileSymbolListFile);
1691   handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
1692                         CompressAllSections, UseMD5, GenPartialProfile);
1693 
1694   // If OutputSizeLimit is 0 (default), it is the same as write().
1695   if (std::error_code EC =
1696           Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
1697     exitWithErrorCode(EC);
1698 }
1699 
parseWeightedFile(const StringRef & WeightedFilename)1700 static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
1701   StringRef WeightStr, FileName;
1702   std::tie(WeightStr, FileName) = WeightedFilename.split(',');
1703 
1704   uint64_t Weight;
1705   if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
1706     exitWithError("input weight must be a positive integer");
1707 
1708   llvm::SmallString<128> ResolvedFileName;
1709   llvm::sys::fs::expand_tilde(FileName, ResolvedFileName);
1710 
1711   return {std::string(ResolvedFileName), Weight};
1712 }
1713 
addWeightedInput(WeightedFileVector & WNI,const WeightedFile & WF)1714 static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
1715   StringRef Filename = WF.Filename;
1716   uint64_t Weight = WF.Weight;
1717 
1718   // If it's STDIN just pass it on.
1719   if (Filename == "-") {
1720     WNI.push_back({std::string(Filename), Weight});
1721     return;
1722   }
1723 
1724   llvm::sys::fs::file_status Status;
1725   llvm::sys::fs::status(Filename, Status);
1726   if (!llvm::sys::fs::exists(Status))
1727     exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
1728                       Filename);
1729   // If it's a source file, collect it.
1730   if (llvm::sys::fs::is_regular_file(Status)) {
1731     WNI.push_back({std::string(Filename), Weight});
1732     return;
1733   }
1734 
1735   if (llvm::sys::fs::is_directory(Status)) {
1736     std::error_code EC;
1737     for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
1738          F != E && !EC; F.increment(EC)) {
1739       if (llvm::sys::fs::is_regular_file(F->path())) {
1740         addWeightedInput(WNI, {F->path(), Weight});
1741       }
1742     }
1743     if (EC)
1744       exitWithErrorCode(EC, Filename);
1745   }
1746 }
1747 
parseInputFilenamesFile(MemoryBuffer * Buffer,WeightedFileVector & WFV)1748 static void parseInputFilenamesFile(MemoryBuffer *Buffer,
1749                                     WeightedFileVector &WFV) {
1750   if (!Buffer)
1751     return;
1752 
1753   SmallVector<StringRef, 8> Entries;
1754   StringRef Data = Buffer->getBuffer();
1755   Data.split(Entries, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
1756   for (const StringRef &FileWeightEntry : Entries) {
1757     StringRef SanitizedEntry = FileWeightEntry.trim(" \t\v\f\r");
1758     // Skip comments.
1759     if (SanitizedEntry.starts_with("#"))
1760       continue;
1761     // If there's no comma, it's an unweighted profile.
1762     else if (!SanitizedEntry.contains(','))
1763       addWeightedInput(WFV, {std::string(SanitizedEntry), 1});
1764     else
1765       addWeightedInput(WFV, parseWeightedFile(SanitizedEntry));
1766   }
1767 }
1768 
merge_main(StringRef ProgName)1769 static int merge_main(StringRef ProgName) {
1770   WeightedFileVector WeightedInputs;
1771   for (StringRef Filename : InputFilenames)
1772     addWeightedInput(WeightedInputs, {std::string(Filename), 1});
1773   for (StringRef WeightedFilename : WeightedInputFilenames)
1774     addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename));
1775 
1776   // Make sure that the file buffer stays alive for the duration of the
1777   // weighted input vector's lifetime.
1778   auto Buffer = getInputFileBuf(InputFilenamesFile);
1779   parseInputFilenamesFile(Buffer.get(), WeightedInputs);
1780 
1781   if (WeightedInputs.empty())
1782     exitWithError("no input files specified. See " + ProgName + " merge -help");
1783 
1784   if (DumpInputFileList) {
1785     for (auto &WF : WeightedInputs)
1786       outs() << WF.Weight << "," << WF.Filename << "\n";
1787     return 0;
1788   }
1789 
1790   std::unique_ptr<SymbolRemapper> Remapper;
1791   if (!RemappingFile.empty())
1792     Remapper = SymbolRemapper::create(RemappingFile);
1793 
1794   if (!SupplInstrWithSample.empty()) {
1795     if (ProfileKind != instr)
1796       exitWithError(
1797           "-supplement-instr-with-sample can only work with -instr. ");
1798 
1799     supplementInstrProfile(WeightedInputs, SupplInstrWithSample, OutputSparse,
1800                            SupplMinSizeThreshold, ZeroCounterThreshold,
1801                            InstrProfColdThreshold);
1802     return 0;
1803   }
1804 
1805   if (ProfileKind == instr)
1806     mergeInstrProfile(WeightedInputs, Remapper.get(), MaxDbgCorrelationWarnings,
1807                       ProfiledBinary);
1808   else
1809     mergeSampleProfile(WeightedInputs, Remapper.get(), ProfileSymbolListFile,
1810                        OutputSizeLimit);
1811   return 0;
1812 }
1813 
1814 /// Computer the overlap b/w profile BaseFilename and profile TestFilename.
overlapInstrProfile(const std::string & BaseFilename,const std::string & TestFilename,const OverlapFuncFilters & FuncFilter,raw_fd_ostream & OS,bool IsCS)1815 static void overlapInstrProfile(const std::string &BaseFilename,
1816                                 const std::string &TestFilename,
1817                                 const OverlapFuncFilters &FuncFilter,
1818                                 raw_fd_ostream &OS, bool IsCS) {
1819   std::mutex ErrorLock;
1820   SmallSet<instrprof_error, 4> WriterErrorCodes;
1821   WriterContext Context(false, ErrorLock, WriterErrorCodes);
1822   WeightedFile WeightedInput{BaseFilename, 1};
1823   OverlapStats Overlap;
1824   Error E = Overlap.accumulateCounts(BaseFilename, TestFilename, IsCS);
1825   if (E)
1826     exitWithError(std::move(E), "error in getting profile count sums");
1827   if (Overlap.Base.CountSum < 1.0f) {
1828     OS << "Sum of edge counts for profile " << BaseFilename << " is 0.\n";
1829     exit(0);
1830   }
1831   if (Overlap.Test.CountSum < 1.0f) {
1832     OS << "Sum of edge counts for profile " << TestFilename << " is 0.\n";
1833     exit(0);
1834   }
1835   loadInput(WeightedInput, nullptr, nullptr, /*ProfiledBinary=*/"", &Context);
1836   overlapInput(BaseFilename, TestFilename, &Context, Overlap, FuncFilter, OS,
1837                IsCS);
1838   Overlap.dump(OS);
1839 }
1840 
1841 namespace {
1842 struct SampleOverlapStats {
1843   SampleContext BaseName;
1844   SampleContext TestName;
1845   // Number of overlap units
1846   uint64_t OverlapCount = 0;
1847   // Total samples of overlap units
1848   uint64_t OverlapSample = 0;
1849   // Number of and total samples of units that only present in base or test
1850   // profile
1851   uint64_t BaseUniqueCount = 0;
1852   uint64_t BaseUniqueSample = 0;
1853   uint64_t TestUniqueCount = 0;
1854   uint64_t TestUniqueSample = 0;
1855   // Number of units and total samples in base or test profile
1856   uint64_t BaseCount = 0;
1857   uint64_t BaseSample = 0;
1858   uint64_t TestCount = 0;
1859   uint64_t TestSample = 0;
1860   // Number of and total samples of units that present in at least one profile
1861   uint64_t UnionCount = 0;
1862   uint64_t UnionSample = 0;
1863   // Weighted similarity
1864   double Similarity = 0.0;
1865   // For SampleOverlapStats instances representing functions, weights of the
1866   // function in base and test profiles
1867   double BaseWeight = 0.0;
1868   double TestWeight = 0.0;
1869 
1870   SampleOverlapStats() = default;
1871 };
1872 } // end anonymous namespace
1873 
1874 namespace {
1875 struct FuncSampleStats {
1876   uint64_t SampleSum = 0;
1877   uint64_t MaxSample = 0;
1878   uint64_t HotBlockCount = 0;
1879   FuncSampleStats() = default;
FuncSampleStats__anon209d874b1111::FuncSampleStats1880   FuncSampleStats(uint64_t SampleSum, uint64_t MaxSample,
1881                   uint64_t HotBlockCount)
1882       : SampleSum(SampleSum), MaxSample(MaxSample),
1883         HotBlockCount(HotBlockCount) {}
1884 };
1885 } // end anonymous namespace
1886 
1887 namespace {
1888 enum MatchStatus { MS_Match, MS_FirstUnique, MS_SecondUnique, MS_None };
1889 
1890 // Class for updating merging steps for two sorted maps. The class should be
1891 // instantiated with a map iterator type.
1892 template <class T> class MatchStep {
1893 public:
1894   MatchStep() = delete;
1895 
MatchStep(T FirstIter,T FirstEnd,T SecondIter,T SecondEnd)1896   MatchStep(T FirstIter, T FirstEnd, T SecondIter, T SecondEnd)
1897       : FirstIter(FirstIter), FirstEnd(FirstEnd), SecondIter(SecondIter),
1898         SecondEnd(SecondEnd), Status(MS_None) {}
1899 
areBothFinished() const1900   bool areBothFinished() const {
1901     return (FirstIter == FirstEnd && SecondIter == SecondEnd);
1902   }
1903 
isFirstFinished() const1904   bool isFirstFinished() const { return FirstIter == FirstEnd; }
1905 
isSecondFinished() const1906   bool isSecondFinished() const { return SecondIter == SecondEnd; }
1907 
1908   /// Advance one step based on the previous match status unless the previous
1909   /// status is MS_None. Then update Status based on the comparison between two
1910   /// container iterators at the current step. If the previous status is
1911   /// MS_None, it means two iterators are at the beginning and no comparison has
1912   /// been made, so we simply update Status without advancing the iterators.
1913   void updateOneStep();
1914 
getFirstIter() const1915   T getFirstIter() const { return FirstIter; }
1916 
getSecondIter() const1917   T getSecondIter() const { return SecondIter; }
1918 
getMatchStatus() const1919   MatchStatus getMatchStatus() const { return Status; }
1920 
1921 private:
1922   // Current iterator and end iterator of the first container.
1923   T FirstIter;
1924   T FirstEnd;
1925   // Current iterator and end iterator of the second container.
1926   T SecondIter;
1927   T SecondEnd;
1928   // Match status of the current step.
1929   MatchStatus Status;
1930 };
1931 } // end anonymous namespace
1932 
updateOneStep()1933 template <class T> void MatchStep<T>::updateOneStep() {
1934   switch (Status) {
1935   case MS_Match:
1936     ++FirstIter;
1937     ++SecondIter;
1938     break;
1939   case MS_FirstUnique:
1940     ++FirstIter;
1941     break;
1942   case MS_SecondUnique:
1943     ++SecondIter;
1944     break;
1945   case MS_None:
1946     break;
1947   }
1948 
1949   // Update Status according to iterators at the current step.
1950   if (areBothFinished())
1951     return;
1952   if (FirstIter != FirstEnd &&
1953       (SecondIter == SecondEnd || FirstIter->first < SecondIter->first))
1954     Status = MS_FirstUnique;
1955   else if (SecondIter != SecondEnd &&
1956            (FirstIter == FirstEnd || SecondIter->first < FirstIter->first))
1957     Status = MS_SecondUnique;
1958   else
1959     Status = MS_Match;
1960 }
1961 
1962 // Return the sum of line/block samples, the max line/block sample, and the
1963 // number of line/block samples above the given threshold in a function
1964 // including its inlinees.
getFuncSampleStats(const sampleprof::FunctionSamples & Func,FuncSampleStats & FuncStats,uint64_t HotThreshold)1965 static void getFuncSampleStats(const sampleprof::FunctionSamples &Func,
1966                                FuncSampleStats &FuncStats,
1967                                uint64_t HotThreshold) {
1968   for (const auto &L : Func.getBodySamples()) {
1969     uint64_t Sample = L.second.getSamples();
1970     FuncStats.SampleSum += Sample;
1971     FuncStats.MaxSample = std::max(FuncStats.MaxSample, Sample);
1972     if (Sample >= HotThreshold)
1973       ++FuncStats.HotBlockCount;
1974   }
1975 
1976   for (const auto &C : Func.getCallsiteSamples()) {
1977     for (const auto &F : C.second)
1978       getFuncSampleStats(F.second, FuncStats, HotThreshold);
1979   }
1980 }
1981 
1982 /// Predicate that determines if a function is hot with a given threshold. We
1983 /// keep it separate from its callsites for possible extension in the future.
isFunctionHot(const FuncSampleStats & FuncStats,uint64_t HotThreshold)1984 static bool isFunctionHot(const FuncSampleStats &FuncStats,
1985                           uint64_t HotThreshold) {
1986   // We intentionally compare the maximum sample count in a function with the
1987   // HotThreshold to get an approximate determination on hot functions.
1988   return (FuncStats.MaxSample >= HotThreshold);
1989 }
1990 
1991 namespace {
1992 class SampleOverlapAggregator {
1993 public:
SampleOverlapAggregator(const std::string & BaseFilename,const std::string & TestFilename,double LowSimilarityThreshold,double Epsilon,const OverlapFuncFilters & FuncFilter)1994   SampleOverlapAggregator(const std::string &BaseFilename,
1995                           const std::string &TestFilename,
1996                           double LowSimilarityThreshold, double Epsilon,
1997                           const OverlapFuncFilters &FuncFilter)
1998       : BaseFilename(BaseFilename), TestFilename(TestFilename),
1999         LowSimilarityThreshold(LowSimilarityThreshold), Epsilon(Epsilon),
2000         FuncFilter(FuncFilter) {}
2001 
2002   /// Detect 0-sample input profile and report to output stream. This interface
2003   /// should be called after loadProfiles().
2004   bool detectZeroSampleProfile(raw_fd_ostream &OS) const;
2005 
2006   /// Write out function-level similarity statistics for functions specified by
2007   /// options --function, --value-cutoff, and --similarity-cutoff.
2008   void dumpFuncSimilarity(raw_fd_ostream &OS) const;
2009 
2010   /// Write out program-level similarity and overlap statistics.
2011   void dumpProgramSummary(raw_fd_ostream &OS) const;
2012 
2013   /// Write out hot-function and hot-block statistics for base_profile,
2014   /// test_profile, and their overlap. For both cases, the overlap HO is
2015   /// calculated as follows:
2016   ///    Given the number of functions (or blocks) that are hot in both profiles
2017   ///    HCommon and the number of functions (or blocks) that are hot in at
2018   ///    least one profile HUnion, HO = HCommon / HUnion.
2019   void dumpHotFuncAndBlockOverlap(raw_fd_ostream &OS) const;
2020 
2021   /// This function tries matching functions in base and test profiles. For each
2022   /// pair of matched functions, it aggregates the function-level
2023   /// similarity into a profile-level similarity. It also dump function-level
2024   /// similarity information of functions specified by --function,
2025   /// --value-cutoff, and --similarity-cutoff options. The program-level
2026   /// similarity PS is computed as follows:
2027   ///     Given function-level similarity FS(A) for all function A, the
2028   ///     weight of function A in base profile WB(A), and the weight of function
2029   ///     A in test profile WT(A), compute PS(base_profile, test_profile) =
2030   ///     sum_A(FS(A) * avg(WB(A), WT(A))) ranging in [0.0f to 1.0f] with 0.0
2031   ///     meaning no-overlap.
2032   void computeSampleProfileOverlap(raw_fd_ostream &OS);
2033 
2034   /// Initialize ProfOverlap with the sum of samples in base and test
2035   /// profiles. This function also computes and keeps the sum of samples and
2036   /// max sample counts of each function in BaseStats and TestStats for later
2037   /// use to avoid re-computations.
2038   void initializeSampleProfileOverlap();
2039 
2040   /// Load profiles specified by BaseFilename and TestFilename.
2041   std::error_code loadProfiles();
2042 
2043   using FuncSampleStatsMap =
2044       std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
2045 
2046 private:
2047   SampleOverlapStats ProfOverlap;
2048   SampleOverlapStats HotFuncOverlap;
2049   SampleOverlapStats HotBlockOverlap;
2050   std::string BaseFilename;
2051   std::string TestFilename;
2052   std::unique_ptr<sampleprof::SampleProfileReader> BaseReader;
2053   std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
2054   // BaseStats and TestStats hold FuncSampleStats for each function, with
2055   // function name as the key.
2056   FuncSampleStatsMap BaseStats;
2057   FuncSampleStatsMap TestStats;
2058   // Low similarity threshold in floating point number
2059   double LowSimilarityThreshold;
2060   // Block samples above BaseHotThreshold or TestHotThreshold are considered hot
2061   // for tracking hot blocks.
2062   uint64_t BaseHotThreshold;
2063   uint64_t TestHotThreshold;
2064   // A small threshold used to round the results of floating point accumulations
2065   // to resolve imprecision.
2066   const double Epsilon;
2067   std::multimap<double, SampleOverlapStats, std::greater<double>>
2068       FuncSimilarityDump;
2069   // FuncFilter carries specifications in options --value-cutoff and
2070   // --function.
2071   OverlapFuncFilters FuncFilter;
2072   // Column offsets for printing the function-level details table.
2073   static const unsigned int TestWeightCol = 15;
2074   static const unsigned int SimilarityCol = 30;
2075   static const unsigned int OverlapCol = 43;
2076   static const unsigned int BaseUniqueCol = 53;
2077   static const unsigned int TestUniqueCol = 67;
2078   static const unsigned int BaseSampleCol = 81;
2079   static const unsigned int TestSampleCol = 96;
2080   static const unsigned int FuncNameCol = 111;
2081 
2082   /// Return a similarity of two line/block sample counters in the same
2083   /// function in base and test profiles. The line/block-similarity BS(i) is
2084   /// computed as follows:
2085   ///    For an offsets i, given the sample count at i in base profile BB(i),
2086   ///    the sample count at i in test profile BT(i), the sum of sample counts
2087   ///    in this function in base profile SB, and the sum of sample counts in
2088   ///    this function in test profile ST, compute BS(i) = 1.0 - fabs(BB(i)/SB -
2089   ///    BT(i)/ST), ranging in [0.0f to 1.0f] with 0.0 meaning no-overlap.
2090   double computeBlockSimilarity(uint64_t BaseSample, uint64_t TestSample,
2091                                 const SampleOverlapStats &FuncOverlap) const;
2092 
2093   void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
2094                              uint64_t HotBlockCount);
2095 
2096   void getHotFunctions(const FuncSampleStatsMap &ProfStats,
2097                        FuncSampleStatsMap &HotFunc,
2098                        uint64_t HotThreshold) const;
2099 
2100   void computeHotFuncOverlap();
2101 
2102   /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2103   /// Difference for two sample units in a matched function according to the
2104   /// given match status.
2105   void updateOverlapStatsForFunction(uint64_t BaseSample, uint64_t TestSample,
2106                                      uint64_t HotBlockCount,
2107                                      SampleOverlapStats &FuncOverlap,
2108                                      double &Difference, MatchStatus Status);
2109 
2110   /// This function updates statistics in FuncOverlap, HotBlockOverlap, and
2111   /// Difference for unmatched callees that only present in one profile in a
2112   /// matched caller function.
2113   void updateForUnmatchedCallee(const sampleprof::FunctionSamples &Func,
2114                                 SampleOverlapStats &FuncOverlap,
2115                                 double &Difference, MatchStatus Status);
2116 
2117   /// This function updates sample overlap statistics of an overlap function in
2118   /// base and test profile. It also calculates a function-internal similarity
2119   /// FIS as follows:
2120   ///    For offsets i that have samples in at least one profile in this
2121   ///    function A, given BS(i) returned by computeBlockSimilarity(), compute
2122   ///    FIS(A) = (2.0 - sum_i(1.0 - BS(i))) / 2, ranging in [0.0f to 1.0f] with
2123   ///    0.0 meaning no overlap.
2124   double computeSampleFunctionInternalOverlap(
2125       const sampleprof::FunctionSamples &BaseFunc,
2126       const sampleprof::FunctionSamples &TestFunc,
2127       SampleOverlapStats &FuncOverlap);
2128 
2129   /// Function-level similarity (FS) is a weighted value over function internal
2130   /// similarity (FIS). This function computes a function's FS from its FIS by
2131   /// applying the weight.
2132   double weightForFuncSimilarity(double FuncSimilarity, uint64_t BaseFuncSample,
2133                                  uint64_t TestFuncSample) const;
2134 
2135   /// The function-level similarity FS(A) for a function A is computed as
2136   /// follows:
2137   ///     Compute a function-internal similarity FIS(A) by
2138   ///     computeSampleFunctionInternalOverlap(). Then, with the weight of
2139   ///     function A in base profile WB(A), and the weight of function A in test
2140   ///     profile WT(A), compute FS(A) = FIS(A) * (1.0 - fabs(WB(A) - WT(A)))
2141   ///     ranging in [0.0f to 1.0f] with 0.0 meaning no overlap.
2142   double
2143   computeSampleFunctionOverlap(const sampleprof::FunctionSamples *BaseFunc,
2144                                const sampleprof::FunctionSamples *TestFunc,
2145                                SampleOverlapStats *FuncOverlap,
2146                                uint64_t BaseFuncSample,
2147                                uint64_t TestFuncSample);
2148 
2149   /// Profile-level similarity (PS) is a weighted aggregate over function-level
2150   /// similarities (FS). This method weights the FS value by the function
2151   /// weights in the base and test profiles for the aggregation.
2152   double weightByImportance(double FuncSimilarity, uint64_t BaseFuncSample,
2153                             uint64_t TestFuncSample) const;
2154 };
2155 } // end anonymous namespace
2156 
detectZeroSampleProfile(raw_fd_ostream & OS) const2157 bool SampleOverlapAggregator::detectZeroSampleProfile(
2158     raw_fd_ostream &OS) const {
2159   bool HaveZeroSample = false;
2160   if (ProfOverlap.BaseSample == 0) {
2161     OS << "Sum of sample counts for profile " << BaseFilename << " is 0.\n";
2162     HaveZeroSample = true;
2163   }
2164   if (ProfOverlap.TestSample == 0) {
2165     OS << "Sum of sample counts for profile " << TestFilename << " is 0.\n";
2166     HaveZeroSample = true;
2167   }
2168   return HaveZeroSample;
2169 }
2170 
computeBlockSimilarity(uint64_t BaseSample,uint64_t TestSample,const SampleOverlapStats & FuncOverlap) const2171 double SampleOverlapAggregator::computeBlockSimilarity(
2172     uint64_t BaseSample, uint64_t TestSample,
2173     const SampleOverlapStats &FuncOverlap) const {
2174   double BaseFrac = 0.0;
2175   double TestFrac = 0.0;
2176   if (FuncOverlap.BaseSample > 0)
2177     BaseFrac = static_cast<double>(BaseSample) / FuncOverlap.BaseSample;
2178   if (FuncOverlap.TestSample > 0)
2179     TestFrac = static_cast<double>(TestSample) / FuncOverlap.TestSample;
2180   return 1.0 - std::fabs(BaseFrac - TestFrac);
2181 }
2182 
updateHotBlockOverlap(uint64_t BaseSample,uint64_t TestSample,uint64_t HotBlockCount)2183 void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
2184                                                     uint64_t TestSample,
2185                                                     uint64_t HotBlockCount) {
2186   bool IsBaseHot = (BaseSample >= BaseHotThreshold);
2187   bool IsTestHot = (TestSample >= TestHotThreshold);
2188   if (!IsBaseHot && !IsTestHot)
2189     return;
2190 
2191   HotBlockOverlap.UnionCount += HotBlockCount;
2192   if (IsBaseHot)
2193     HotBlockOverlap.BaseCount += HotBlockCount;
2194   if (IsTestHot)
2195     HotBlockOverlap.TestCount += HotBlockCount;
2196   if (IsBaseHot && IsTestHot)
2197     HotBlockOverlap.OverlapCount += HotBlockCount;
2198 }
2199 
getHotFunctions(const FuncSampleStatsMap & ProfStats,FuncSampleStatsMap & HotFunc,uint64_t HotThreshold) const2200 void SampleOverlapAggregator::getHotFunctions(
2201     const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
2202     uint64_t HotThreshold) const {
2203   for (const auto &F : ProfStats) {
2204     if (isFunctionHot(F.second, HotThreshold))
2205       HotFunc.emplace(F.first, F.second);
2206   }
2207 }
2208 
computeHotFuncOverlap()2209 void SampleOverlapAggregator::computeHotFuncOverlap() {
2210   FuncSampleStatsMap BaseHotFunc;
2211   getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold);
2212   HotFuncOverlap.BaseCount = BaseHotFunc.size();
2213 
2214   FuncSampleStatsMap TestHotFunc;
2215   getHotFunctions(TestStats, TestHotFunc, TestHotThreshold);
2216   HotFuncOverlap.TestCount = TestHotFunc.size();
2217   HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
2218 
2219   for (const auto &F : BaseHotFunc) {
2220     if (TestHotFunc.count(F.first))
2221       ++HotFuncOverlap.OverlapCount;
2222     else
2223       ++HotFuncOverlap.UnionCount;
2224   }
2225 }
2226 
updateOverlapStatsForFunction(uint64_t BaseSample,uint64_t TestSample,uint64_t HotBlockCount,SampleOverlapStats & FuncOverlap,double & Difference,MatchStatus Status)2227 void SampleOverlapAggregator::updateOverlapStatsForFunction(
2228     uint64_t BaseSample, uint64_t TestSample, uint64_t HotBlockCount,
2229     SampleOverlapStats &FuncOverlap, double &Difference, MatchStatus Status) {
2230   assert(Status != MS_None &&
2231          "Match status should be updated before updating overlap statistics");
2232   if (Status == MS_FirstUnique) {
2233     TestSample = 0;
2234     FuncOverlap.BaseUniqueSample += BaseSample;
2235   } else if (Status == MS_SecondUnique) {
2236     BaseSample = 0;
2237     FuncOverlap.TestUniqueSample += TestSample;
2238   } else {
2239     ++FuncOverlap.OverlapCount;
2240   }
2241 
2242   FuncOverlap.UnionSample += std::max(BaseSample, TestSample);
2243   FuncOverlap.OverlapSample += std::min(BaseSample, TestSample);
2244   Difference +=
2245       1.0 - computeBlockSimilarity(BaseSample, TestSample, FuncOverlap);
2246   updateHotBlockOverlap(BaseSample, TestSample, HotBlockCount);
2247 }
2248 
updateForUnmatchedCallee(const sampleprof::FunctionSamples & Func,SampleOverlapStats & FuncOverlap,double & Difference,MatchStatus Status)2249 void SampleOverlapAggregator::updateForUnmatchedCallee(
2250     const sampleprof::FunctionSamples &Func, SampleOverlapStats &FuncOverlap,
2251     double &Difference, MatchStatus Status) {
2252   assert((Status == MS_FirstUnique || Status == MS_SecondUnique) &&
2253          "Status must be either of the two unmatched cases");
2254   FuncSampleStats FuncStats;
2255   if (Status == MS_FirstUnique) {
2256     getFuncSampleStats(Func, FuncStats, BaseHotThreshold);
2257     updateOverlapStatsForFunction(FuncStats.SampleSum, 0,
2258                                   FuncStats.HotBlockCount, FuncOverlap,
2259                                   Difference, Status);
2260   } else {
2261     getFuncSampleStats(Func, FuncStats, TestHotThreshold);
2262     updateOverlapStatsForFunction(0, FuncStats.SampleSum,
2263                                   FuncStats.HotBlockCount, FuncOverlap,
2264                                   Difference, Status);
2265   }
2266 }
2267 
computeSampleFunctionInternalOverlap(const sampleprof::FunctionSamples & BaseFunc,const sampleprof::FunctionSamples & TestFunc,SampleOverlapStats & FuncOverlap)2268 double SampleOverlapAggregator::computeSampleFunctionInternalOverlap(
2269     const sampleprof::FunctionSamples &BaseFunc,
2270     const sampleprof::FunctionSamples &TestFunc,
2271     SampleOverlapStats &FuncOverlap) {
2272 
2273   using namespace sampleprof;
2274 
2275   double Difference = 0;
2276 
2277   // Accumulate Difference for regular line/block samples in the function.
2278   // We match them through sort-merge join algorithm because
2279   // FunctionSamples::getBodySamples() returns a map of sample counters ordered
2280   // by their offsets.
2281   MatchStep<BodySampleMap::const_iterator> BlockIterStep(
2282       BaseFunc.getBodySamples().cbegin(), BaseFunc.getBodySamples().cend(),
2283       TestFunc.getBodySamples().cbegin(), TestFunc.getBodySamples().cend());
2284   BlockIterStep.updateOneStep();
2285   while (!BlockIterStep.areBothFinished()) {
2286     uint64_t BaseSample =
2287         BlockIterStep.isFirstFinished()
2288             ? 0
2289             : BlockIterStep.getFirstIter()->second.getSamples();
2290     uint64_t TestSample =
2291         BlockIterStep.isSecondFinished()
2292             ? 0
2293             : BlockIterStep.getSecondIter()->second.getSamples();
2294     updateOverlapStatsForFunction(BaseSample, TestSample, 1, FuncOverlap,
2295                                   Difference, BlockIterStep.getMatchStatus());
2296 
2297     BlockIterStep.updateOneStep();
2298   }
2299 
2300   // Accumulate Difference for callsite lines in the function. We match
2301   // them through sort-merge algorithm because
2302   // FunctionSamples::getCallsiteSamples() returns a map of callsite records
2303   // ordered by their offsets.
2304   MatchStep<CallsiteSampleMap::const_iterator> CallsiteIterStep(
2305       BaseFunc.getCallsiteSamples().cbegin(),
2306       BaseFunc.getCallsiteSamples().cend(),
2307       TestFunc.getCallsiteSamples().cbegin(),
2308       TestFunc.getCallsiteSamples().cend());
2309   CallsiteIterStep.updateOneStep();
2310   while (!CallsiteIterStep.areBothFinished()) {
2311     MatchStatus CallsiteStepStatus = CallsiteIterStep.getMatchStatus();
2312     assert(CallsiteStepStatus != MS_None &&
2313            "Match status should be updated before entering loop body");
2314 
2315     if (CallsiteStepStatus != MS_Match) {
2316       auto Callsite = (CallsiteStepStatus == MS_FirstUnique)
2317                           ? CallsiteIterStep.getFirstIter()
2318                           : CallsiteIterStep.getSecondIter();
2319       for (const auto &F : Callsite->second)
2320         updateForUnmatchedCallee(F.second, FuncOverlap, Difference,
2321                                  CallsiteStepStatus);
2322     } else {
2323       // There may be multiple inlinees at the same offset, so we need to try
2324       // matching all of them. This match is implemented through sort-merge
2325       // algorithm because callsite records at the same offset are ordered by
2326       // function names.
2327       MatchStep<FunctionSamplesMap::const_iterator> CalleeIterStep(
2328           CallsiteIterStep.getFirstIter()->second.cbegin(),
2329           CallsiteIterStep.getFirstIter()->second.cend(),
2330           CallsiteIterStep.getSecondIter()->second.cbegin(),
2331           CallsiteIterStep.getSecondIter()->second.cend());
2332       CalleeIterStep.updateOneStep();
2333       while (!CalleeIterStep.areBothFinished()) {
2334         MatchStatus CalleeStepStatus = CalleeIterStep.getMatchStatus();
2335         if (CalleeStepStatus != MS_Match) {
2336           auto Callee = (CalleeStepStatus == MS_FirstUnique)
2337                             ? CalleeIterStep.getFirstIter()
2338                             : CalleeIterStep.getSecondIter();
2339           updateForUnmatchedCallee(Callee->second, FuncOverlap, Difference,
2340                                    CalleeStepStatus);
2341         } else {
2342           // An inlined function can contain other inlinees inside, so compute
2343           // the Difference recursively.
2344           Difference += 2.0 - 2 * computeSampleFunctionInternalOverlap(
2345                                       CalleeIterStep.getFirstIter()->second,
2346                                       CalleeIterStep.getSecondIter()->second,
2347                                       FuncOverlap);
2348         }
2349         CalleeIterStep.updateOneStep();
2350       }
2351     }
2352     CallsiteIterStep.updateOneStep();
2353   }
2354 
2355   // Difference reflects the total differences of line/block samples in this
2356   // function and ranges in [0.0f to 2.0f]. Take (2.0 - Difference) / 2 to
2357   // reflect the similarity between function profiles in [0.0f to 1.0f].
2358   return (2.0 - Difference) / 2;
2359 }
2360 
weightForFuncSimilarity(double FuncInternalSimilarity,uint64_t BaseFuncSample,uint64_t TestFuncSample) const2361 double SampleOverlapAggregator::weightForFuncSimilarity(
2362     double FuncInternalSimilarity, uint64_t BaseFuncSample,
2363     uint64_t TestFuncSample) const {
2364   // Compute the weight as the distance between the function weights in two
2365   // profiles.
2366   double BaseFrac = 0.0;
2367   double TestFrac = 0.0;
2368   assert(ProfOverlap.BaseSample > 0 &&
2369          "Total samples in base profile should be greater than 0");
2370   BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample;
2371   assert(ProfOverlap.TestSample > 0 &&
2372          "Total samples in test profile should be greater than 0");
2373   TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample;
2374   double WeightDistance = std::fabs(BaseFrac - TestFrac);
2375 
2376   // Take WeightDistance into the similarity.
2377   return FuncInternalSimilarity * (1 - WeightDistance);
2378 }
2379 
2380 double
weightByImportance(double FuncSimilarity,uint64_t BaseFuncSample,uint64_t TestFuncSample) const2381 SampleOverlapAggregator::weightByImportance(double FuncSimilarity,
2382                                             uint64_t BaseFuncSample,
2383                                             uint64_t TestFuncSample) const {
2384 
2385   double BaseFrac = 0.0;
2386   double TestFrac = 0.0;
2387   assert(ProfOverlap.BaseSample > 0 &&
2388          "Total samples in base profile should be greater than 0");
2389   BaseFrac = static_cast<double>(BaseFuncSample) / ProfOverlap.BaseSample / 2.0;
2390   assert(ProfOverlap.TestSample > 0 &&
2391          "Total samples in test profile should be greater than 0");
2392   TestFrac = static_cast<double>(TestFuncSample) / ProfOverlap.TestSample / 2.0;
2393   return FuncSimilarity * (BaseFrac + TestFrac);
2394 }
2395 
computeSampleFunctionOverlap(const sampleprof::FunctionSamples * BaseFunc,const sampleprof::FunctionSamples * TestFunc,SampleOverlapStats * FuncOverlap,uint64_t BaseFuncSample,uint64_t TestFuncSample)2396 double SampleOverlapAggregator::computeSampleFunctionOverlap(
2397     const sampleprof::FunctionSamples *BaseFunc,
2398     const sampleprof::FunctionSamples *TestFunc,
2399     SampleOverlapStats *FuncOverlap, uint64_t BaseFuncSample,
2400     uint64_t TestFuncSample) {
2401   // Default function internal similarity before weighted, meaning two functions
2402   // has no overlap.
2403   const double DefaultFuncInternalSimilarity = 0;
2404   double FuncSimilarity;
2405   double FuncInternalSimilarity;
2406 
2407   // If BaseFunc or TestFunc is nullptr, it means the functions do not overlap.
2408   // In this case, we use DefaultFuncInternalSimilarity as the function internal
2409   // similarity.
2410   if (!BaseFunc || !TestFunc) {
2411     FuncInternalSimilarity = DefaultFuncInternalSimilarity;
2412   } else {
2413     assert(FuncOverlap != nullptr &&
2414            "FuncOverlap should be provided in this case");
2415     FuncInternalSimilarity = computeSampleFunctionInternalOverlap(
2416         *BaseFunc, *TestFunc, *FuncOverlap);
2417     // Now, FuncInternalSimilarity may be a little less than 0 due to
2418     // imprecision of floating point accumulations. Make it zero if the
2419     // difference is below Epsilon.
2420     FuncInternalSimilarity = (std::fabs(FuncInternalSimilarity - 0) < Epsilon)
2421                                  ? 0
2422                                  : FuncInternalSimilarity;
2423   }
2424   FuncSimilarity = weightForFuncSimilarity(FuncInternalSimilarity,
2425                                            BaseFuncSample, TestFuncSample);
2426   return FuncSimilarity;
2427 }
2428 
computeSampleProfileOverlap(raw_fd_ostream & OS)2429 void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
2430   using namespace sampleprof;
2431 
2432   std::unordered_map<SampleContext, const FunctionSamples *,
2433                      SampleContext::Hash>
2434       BaseFuncProf;
2435   const auto &BaseProfiles = BaseReader->getProfiles();
2436   for (const auto &BaseFunc : BaseProfiles) {
2437     BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second));
2438   }
2439   ProfOverlap.UnionCount = BaseFuncProf.size();
2440 
2441   const auto &TestProfiles = TestReader->getProfiles();
2442   for (const auto &TestFunc : TestProfiles) {
2443     SampleOverlapStats FuncOverlap;
2444     FuncOverlap.TestName = TestFunc.second.getContext();
2445     assert(TestStats.count(FuncOverlap.TestName) &&
2446            "TestStats should have records for all functions in test profile "
2447            "except inlinees");
2448     FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
2449 
2450     bool Matched = false;
2451     const auto Match = BaseFuncProf.find(FuncOverlap.TestName);
2452     if (Match == BaseFuncProf.end()) {
2453       const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
2454       ++ProfOverlap.TestUniqueCount;
2455       ProfOverlap.TestUniqueSample += FuncStats.SampleSum;
2456       FuncOverlap.TestUniqueSample = FuncStats.SampleSum;
2457 
2458       updateHotBlockOverlap(0, FuncStats.SampleSum, FuncStats.HotBlockCount);
2459 
2460       double FuncSimilarity = computeSampleFunctionOverlap(
2461           nullptr, nullptr, nullptr, 0, FuncStats.SampleSum);
2462       ProfOverlap.Similarity +=
2463           weightByImportance(FuncSimilarity, 0, FuncStats.SampleSum);
2464 
2465       ++ProfOverlap.UnionCount;
2466       ProfOverlap.UnionSample += FuncStats.SampleSum;
2467     } else {
2468       ++ProfOverlap.OverlapCount;
2469 
2470       // Two functions match with each other. Compute function-level overlap and
2471       // aggregate them into profile-level overlap.
2472       FuncOverlap.BaseName = Match->second->getContext();
2473       assert(BaseStats.count(FuncOverlap.BaseName) &&
2474              "BaseStats should have records for all functions in base profile "
2475              "except inlinees");
2476       FuncOverlap.BaseSample = BaseStats[FuncOverlap.BaseName].SampleSum;
2477 
2478       FuncOverlap.Similarity = computeSampleFunctionOverlap(
2479           Match->second, &TestFunc.second, &FuncOverlap, FuncOverlap.BaseSample,
2480           FuncOverlap.TestSample);
2481       ProfOverlap.Similarity +=
2482           weightByImportance(FuncOverlap.Similarity, FuncOverlap.BaseSample,
2483                              FuncOverlap.TestSample);
2484       ProfOverlap.OverlapSample += FuncOverlap.OverlapSample;
2485       ProfOverlap.UnionSample += FuncOverlap.UnionSample;
2486 
2487       // Accumulate the percentage of base unique and test unique samples into
2488       // ProfOverlap.
2489       ProfOverlap.BaseUniqueSample += FuncOverlap.BaseUniqueSample;
2490       ProfOverlap.TestUniqueSample += FuncOverlap.TestUniqueSample;
2491 
2492       // Remove matched base functions for later reporting functions not found
2493       // in test profile.
2494       BaseFuncProf.erase(Match);
2495       Matched = true;
2496     }
2497 
2498     // Print function-level similarity information if specified by options.
2499     assert(TestStats.count(FuncOverlap.TestName) &&
2500            "TestStats should have records for all functions in test profile "
2501            "except inlinees");
2502     if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
2503         (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
2504         (Matched && !FuncFilter.NameFilter.empty() &&
2505          FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) !=
2506              std::string::npos)) {
2507       assert(ProfOverlap.BaseSample > 0 &&
2508              "Total samples in base profile should be greater than 0");
2509       FuncOverlap.BaseWeight =
2510           static_cast<double>(FuncOverlap.BaseSample) / ProfOverlap.BaseSample;
2511       assert(ProfOverlap.TestSample > 0 &&
2512              "Total samples in test profile should be greater than 0");
2513       FuncOverlap.TestWeight =
2514           static_cast<double>(FuncOverlap.TestSample) / ProfOverlap.TestSample;
2515       FuncSimilarityDump.emplace(FuncOverlap.BaseWeight, FuncOverlap);
2516     }
2517   }
2518 
2519   // Traverse through functions in base profile but not in test profile.
2520   for (const auto &F : BaseFuncProf) {
2521     assert(BaseStats.count(F.second->getContext()) &&
2522            "BaseStats should have records for all functions in base profile "
2523            "except inlinees");
2524     const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
2525     ++ProfOverlap.BaseUniqueCount;
2526     ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
2527 
2528     updateHotBlockOverlap(FuncStats.SampleSum, 0, FuncStats.HotBlockCount);
2529 
2530     double FuncSimilarity = computeSampleFunctionOverlap(
2531         nullptr, nullptr, nullptr, FuncStats.SampleSum, 0);
2532     ProfOverlap.Similarity +=
2533         weightByImportance(FuncSimilarity, FuncStats.SampleSum, 0);
2534 
2535     ProfOverlap.UnionSample += FuncStats.SampleSum;
2536   }
2537 
2538   // Now, ProfSimilarity may be a little greater than 1 due to imprecision
2539   // of floating point accumulations. Make it 1.0 if the difference is below
2540   // Epsilon.
2541   ProfOverlap.Similarity = (std::fabs(ProfOverlap.Similarity - 1) < Epsilon)
2542                                ? 1
2543                                : ProfOverlap.Similarity;
2544 
2545   computeHotFuncOverlap();
2546 }
2547 
initializeSampleProfileOverlap()2548 void SampleOverlapAggregator::initializeSampleProfileOverlap() {
2549   const auto &BaseProf = BaseReader->getProfiles();
2550   for (const auto &I : BaseProf) {
2551     ++ProfOverlap.BaseCount;
2552     FuncSampleStats FuncStats;
2553     getFuncSampleStats(I.second, FuncStats, BaseHotThreshold);
2554     ProfOverlap.BaseSample += FuncStats.SampleSum;
2555     BaseStats.emplace(I.second.getContext(), FuncStats);
2556   }
2557 
2558   const auto &TestProf = TestReader->getProfiles();
2559   for (const auto &I : TestProf) {
2560     ++ProfOverlap.TestCount;
2561     FuncSampleStats FuncStats;
2562     getFuncSampleStats(I.second, FuncStats, TestHotThreshold);
2563     ProfOverlap.TestSample += FuncStats.SampleSum;
2564     TestStats.emplace(I.second.getContext(), FuncStats);
2565   }
2566 
2567   ProfOverlap.BaseName = StringRef(BaseFilename);
2568   ProfOverlap.TestName = StringRef(TestFilename);
2569 }
2570 
dumpFuncSimilarity(raw_fd_ostream & OS) const2571 void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
2572   using namespace sampleprof;
2573 
2574   if (FuncSimilarityDump.empty())
2575     return;
2576 
2577   formatted_raw_ostream FOS(OS);
2578   FOS << "Function-level details:\n";
2579   FOS << "Base weight";
2580   FOS.PadToColumn(TestWeightCol);
2581   FOS << "Test weight";
2582   FOS.PadToColumn(SimilarityCol);
2583   FOS << "Similarity";
2584   FOS.PadToColumn(OverlapCol);
2585   FOS << "Overlap";
2586   FOS.PadToColumn(BaseUniqueCol);
2587   FOS << "Base unique";
2588   FOS.PadToColumn(TestUniqueCol);
2589   FOS << "Test unique";
2590   FOS.PadToColumn(BaseSampleCol);
2591   FOS << "Base samples";
2592   FOS.PadToColumn(TestSampleCol);
2593   FOS << "Test samples";
2594   FOS.PadToColumn(FuncNameCol);
2595   FOS << "Function name\n";
2596   for (const auto &F : FuncSimilarityDump) {
2597     double OverlapPercent =
2598         F.second.UnionSample > 0
2599             ? static_cast<double>(F.second.OverlapSample) / F.second.UnionSample
2600             : 0;
2601     double BaseUniquePercent =
2602         F.second.BaseSample > 0
2603             ? static_cast<double>(F.second.BaseUniqueSample) /
2604                   F.second.BaseSample
2605             : 0;
2606     double TestUniquePercent =
2607         F.second.TestSample > 0
2608             ? static_cast<double>(F.second.TestUniqueSample) /
2609                   F.second.TestSample
2610             : 0;
2611 
2612     FOS << format("%.2f%%", F.second.BaseWeight * 100);
2613     FOS.PadToColumn(TestWeightCol);
2614     FOS << format("%.2f%%", F.second.TestWeight * 100);
2615     FOS.PadToColumn(SimilarityCol);
2616     FOS << format("%.2f%%", F.second.Similarity * 100);
2617     FOS.PadToColumn(OverlapCol);
2618     FOS << format("%.2f%%", OverlapPercent * 100);
2619     FOS.PadToColumn(BaseUniqueCol);
2620     FOS << format("%.2f%%", BaseUniquePercent * 100);
2621     FOS.PadToColumn(TestUniqueCol);
2622     FOS << format("%.2f%%", TestUniquePercent * 100);
2623     FOS.PadToColumn(BaseSampleCol);
2624     FOS << F.second.BaseSample;
2625     FOS.PadToColumn(TestSampleCol);
2626     FOS << F.second.TestSample;
2627     FOS.PadToColumn(FuncNameCol);
2628     FOS << F.second.TestName.toString() << "\n";
2629   }
2630 }
2631 
dumpProgramSummary(raw_fd_ostream & OS) const2632 void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
2633   OS << "Profile overlap infomation for base_profile: "
2634      << ProfOverlap.BaseName.toString()
2635      << " and test_profile: " << ProfOverlap.TestName.toString()
2636      << "\nProgram level:\n";
2637 
2638   OS << "  Whole program profile similarity: "
2639      << format("%.3f%%", ProfOverlap.Similarity * 100) << "\n";
2640 
2641   assert(ProfOverlap.UnionSample > 0 &&
2642          "Total samples in two profile should be greater than 0");
2643   double OverlapPercent =
2644       static_cast<double>(ProfOverlap.OverlapSample) / ProfOverlap.UnionSample;
2645   assert(ProfOverlap.BaseSample > 0 &&
2646          "Total samples in base profile should be greater than 0");
2647   double BaseUniquePercent = static_cast<double>(ProfOverlap.BaseUniqueSample) /
2648                              ProfOverlap.BaseSample;
2649   assert(ProfOverlap.TestSample > 0 &&
2650          "Total samples in test profile should be greater than 0");
2651   double TestUniquePercent = static_cast<double>(ProfOverlap.TestUniqueSample) /
2652                              ProfOverlap.TestSample;
2653 
2654   OS << "  Whole program sample overlap: "
2655      << format("%.3f%%", OverlapPercent * 100) << "\n";
2656   OS << "    percentage of samples unique in base profile: "
2657      << format("%.3f%%", BaseUniquePercent * 100) << "\n";
2658   OS << "    percentage of samples unique in test profile: "
2659      << format("%.3f%%", TestUniquePercent * 100) << "\n";
2660   OS << "    total samples in base profile: " << ProfOverlap.BaseSample << "\n"
2661      << "    total samples in test profile: " << ProfOverlap.TestSample << "\n";
2662 
2663   assert(ProfOverlap.UnionCount > 0 &&
2664          "There should be at least one function in two input profiles");
2665   double FuncOverlapPercent =
2666       static_cast<double>(ProfOverlap.OverlapCount) / ProfOverlap.UnionCount;
2667   OS << "  Function overlap: " << format("%.3f%%", FuncOverlapPercent * 100)
2668      << "\n";
2669   OS << "    overlap functions: " << ProfOverlap.OverlapCount << "\n";
2670   OS << "    functions unique in base profile: " << ProfOverlap.BaseUniqueCount
2671      << "\n";
2672   OS << "    functions unique in test profile: " << ProfOverlap.TestUniqueCount
2673      << "\n";
2674 }
2675 
dumpHotFuncAndBlockOverlap(raw_fd_ostream & OS) const2676 void SampleOverlapAggregator::dumpHotFuncAndBlockOverlap(
2677     raw_fd_ostream &OS) const {
2678   assert(HotFuncOverlap.UnionCount > 0 &&
2679          "There should be at least one hot function in two input profiles");
2680   OS << "  Hot-function overlap: "
2681      << format("%.3f%%", static_cast<double>(HotFuncOverlap.OverlapCount) /
2682                              HotFuncOverlap.UnionCount * 100)
2683      << "\n";
2684   OS << "    overlap hot functions: " << HotFuncOverlap.OverlapCount << "\n";
2685   OS << "    hot functions unique in base profile: "
2686      << HotFuncOverlap.BaseCount - HotFuncOverlap.OverlapCount << "\n";
2687   OS << "    hot functions unique in test profile: "
2688      << HotFuncOverlap.TestCount - HotFuncOverlap.OverlapCount << "\n";
2689 
2690   assert(HotBlockOverlap.UnionCount > 0 &&
2691          "There should be at least one hot block in two input profiles");
2692   OS << "  Hot-block overlap: "
2693      << format("%.3f%%", static_cast<double>(HotBlockOverlap.OverlapCount) /
2694                              HotBlockOverlap.UnionCount * 100)
2695      << "\n";
2696   OS << "    overlap hot blocks: " << HotBlockOverlap.OverlapCount << "\n";
2697   OS << "    hot blocks unique in base profile: "
2698      << HotBlockOverlap.BaseCount - HotBlockOverlap.OverlapCount << "\n";
2699   OS << "    hot blocks unique in test profile: "
2700      << HotBlockOverlap.TestCount - HotBlockOverlap.OverlapCount << "\n";
2701 }
2702 
loadProfiles()2703 std::error_code SampleOverlapAggregator::loadProfiles() {
2704   using namespace sampleprof;
2705 
2706   LLVMContext Context;
2707   auto FS = vfs::getRealFileSystem();
2708   auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS,
2709                                                      FSDiscriminatorPassOption);
2710   if (std::error_code EC = BaseReaderOrErr.getError())
2711     exitWithErrorCode(EC, BaseFilename);
2712 
2713   auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS,
2714                                                      FSDiscriminatorPassOption);
2715   if (std::error_code EC = TestReaderOrErr.getError())
2716     exitWithErrorCode(EC, TestFilename);
2717 
2718   BaseReader = std::move(BaseReaderOrErr.get());
2719   TestReader = std::move(TestReaderOrErr.get());
2720 
2721   if (std::error_code EC = BaseReader->read())
2722     exitWithErrorCode(EC, BaseFilename);
2723   if (std::error_code EC = TestReader->read())
2724     exitWithErrorCode(EC, TestFilename);
2725   if (BaseReader->profileIsProbeBased() != TestReader->profileIsProbeBased())
2726     exitWithError(
2727         "cannot compare probe-based profile with non-probe-based profile");
2728   if (BaseReader->profileIsCS() != TestReader->profileIsCS())
2729     exitWithError("cannot compare CS profile with non-CS profile");
2730 
2731   // Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
2732   // profile summary.
2733   ProfileSummary &BasePS = BaseReader->getSummary();
2734   ProfileSummary &TestPS = TestReader->getSummary();
2735   BaseHotThreshold =
2736       ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary());
2737   TestHotThreshold =
2738       ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary());
2739 
2740   return std::error_code();
2741 }
2742 
overlapSampleProfile(const std::string & BaseFilename,const std::string & TestFilename,const OverlapFuncFilters & FuncFilter,uint64_t SimilarityCutoff,raw_fd_ostream & OS)2743 void overlapSampleProfile(const std::string &BaseFilename,
2744                           const std::string &TestFilename,
2745                           const OverlapFuncFilters &FuncFilter,
2746                           uint64_t SimilarityCutoff, raw_fd_ostream &OS) {
2747   using namespace sampleprof;
2748 
2749   // We use 0.000005 to initialize OverlapAggr.Epsilon because the final metrics
2750   // report 2--3 places after decimal point in percentage numbers.
2751   SampleOverlapAggregator OverlapAggr(
2752       BaseFilename, TestFilename,
2753       static_cast<double>(SimilarityCutoff) / 1000000, 0.000005, FuncFilter);
2754   if (std::error_code EC = OverlapAggr.loadProfiles())
2755     exitWithErrorCode(EC);
2756 
2757   OverlapAggr.initializeSampleProfileOverlap();
2758   if (OverlapAggr.detectZeroSampleProfile(OS))
2759     return;
2760 
2761   OverlapAggr.computeSampleProfileOverlap(OS);
2762 
2763   OverlapAggr.dumpProgramSummary(OS);
2764   OverlapAggr.dumpHotFuncAndBlockOverlap(OS);
2765   OverlapAggr.dumpFuncSimilarity(OS);
2766 }
2767 
overlap_main()2768 static int overlap_main() {
2769   std::error_code EC;
2770   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
2771   if (EC)
2772     exitWithErrorCode(EC, OutputFilename);
2773 
2774   if (ProfileKind == instr)
2775     overlapInstrProfile(BaseFilename, TestFilename,
2776                         OverlapFuncFilters{OverlapValueCutoff, FuncNameFilter},
2777                         OS, IsCS);
2778   else
2779     overlapSampleProfile(BaseFilename, TestFilename,
2780                          OverlapFuncFilters{OverlapValueCutoff, FuncNameFilter},
2781                          SimilarityCutoff, OS);
2782 
2783   return 0;
2784 }
2785 
2786 namespace {
2787 struct ValueSitesStats {
2788   ValueSitesStats() = default;
2789   uint64_t TotalNumValueSites = 0;
2790   uint64_t TotalNumValueSitesWithValueProfile = 0;
2791   uint64_t TotalNumValues = 0;
2792   std::vector<unsigned> ValueSitesHistogram;
2793 };
2794 } // namespace
2795 
traverseAllValueSites(const InstrProfRecord & Func,uint32_t VK,ValueSitesStats & Stats,raw_fd_ostream & OS,InstrProfSymtab * Symtab)2796 static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK,
2797                                   ValueSitesStats &Stats, raw_fd_ostream &OS,
2798                                   InstrProfSymtab *Symtab) {
2799   uint32_t NS = Func.getNumValueSites(VK);
2800   Stats.TotalNumValueSites += NS;
2801   for (size_t I = 0; I < NS; ++I) {
2802     auto VD = Func.getValueArrayForSite(VK, I);
2803     uint32_t NV = VD.size();
2804     if (NV == 0)
2805       continue;
2806     Stats.TotalNumValues += NV;
2807     Stats.TotalNumValueSitesWithValueProfile++;
2808     if (NV > Stats.ValueSitesHistogram.size())
2809       Stats.ValueSitesHistogram.resize(NV, 0);
2810     Stats.ValueSitesHistogram[NV - 1]++;
2811 
2812     uint64_t SiteSum = 0;
2813     for (const auto &V : VD)
2814       SiteSum += V.Count;
2815     if (SiteSum == 0)
2816       SiteSum = 1;
2817 
2818     for (const auto &V : VD) {
2819       OS << "\t[ " << format("%2u", I) << ", ";
2820       if (Symtab == nullptr)
2821         OS << format("%4" PRIu64, V.Value);
2822       else
2823         OS << Symtab->getFuncOrVarName(V.Value);
2824       OS << ", " << format("%10" PRId64, V.Count) << " ] ("
2825          << format("%.2f%%", (V.Count * 100.0 / SiteSum)) << ")\n";
2826     }
2827   }
2828 }
2829 
showValueSitesStats(raw_fd_ostream & OS,uint32_t VK,ValueSitesStats & Stats)2830 static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
2831                                 ValueSitesStats &Stats) {
2832   OS << "  Total number of sites: " << Stats.TotalNumValueSites << "\n";
2833   OS << "  Total number of sites with values: "
2834      << Stats.TotalNumValueSitesWithValueProfile << "\n";
2835   OS << "  Total number of profiled values: " << Stats.TotalNumValues << "\n";
2836 
2837   OS << "  Value sites histogram:\n\tNumTargets, SiteCount\n";
2838   for (unsigned I = 0; I < Stats.ValueSitesHistogram.size(); I++) {
2839     if (Stats.ValueSitesHistogram[I] > 0)
2840       OS << "\t" << I + 1 << ", " << Stats.ValueSitesHistogram[I] << "\n";
2841   }
2842 }
2843 
showInstrProfile(ShowFormat SFormat,raw_fd_ostream & OS)2844 static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
2845   if (SFormat == ShowFormat::Json)
2846     exitWithError("JSON output is not supported for instr profiles");
2847   if (SFormat == ShowFormat::Yaml)
2848     exitWithError("YAML output is not supported for instr profiles");
2849   auto FS = vfs::getRealFileSystem();
2850   auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
2851   std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
2852   if (ShowDetailedSummary && Cutoffs.empty()) {
2853     Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
2854   }
2855   InstrProfSummaryBuilder Builder(std::move(Cutoffs));
2856   if (Error E = ReaderOrErr.takeError())
2857     exitWithError(std::move(E), Filename);
2858 
2859   auto Reader = std::move(ReaderOrErr.get());
2860   bool IsIRInstr = Reader->isIRLevelProfile();
2861   size_t ShownFunctions = 0;
2862   size_t BelowCutoffFunctions = 0;
2863   int NumVPKind = IPVK_Last - IPVK_First + 1;
2864   std::vector<ValueSitesStats> VPStats(NumVPKind);
2865 
2866   auto MinCmp = [](const std::pair<std::string, uint64_t> &v1,
2867                    const std::pair<std::string, uint64_t> &v2) {
2868     return v1.second > v2.second;
2869   };
2870 
2871   std::priority_queue<std::pair<std::string, uint64_t>,
2872                       std::vector<std::pair<std::string, uint64_t>>,
2873                       decltype(MinCmp)>
2874       HottestFuncs(MinCmp);
2875 
2876   if (!TextFormat && OnlyListBelow) {
2877     OS << "The list of functions with the maximum counter less than "
2878        << ShowValueCutoff << ":\n";
2879   }
2880 
2881   // Add marker so that IR-level instrumentation round-trips properly.
2882   if (TextFormat && IsIRInstr)
2883     OS << ":ir\n";
2884 
2885   for (const auto &Func : *Reader) {
2886     if (Reader->isIRLevelProfile()) {
2887       bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash);
2888       if (FuncIsCS != ShowCS)
2889         continue;
2890     }
2891     bool Show = ShowAllFunctions ||
2892                 (!FuncNameFilter.empty() && Func.Name.contains(FuncNameFilter));
2893 
2894     bool doTextFormatDump = (Show && TextFormat);
2895 
2896     if (doTextFormatDump) {
2897       InstrProfSymtab &Symtab = Reader->getSymtab();
2898       InstrProfWriter::writeRecordInText(Func.Name, Func.Hash, Func, Symtab,
2899                                          OS);
2900       continue;
2901     }
2902 
2903     assert(Func.Counts.size() > 0 && "function missing entry counter");
2904     Builder.addRecord(Func);
2905 
2906     if (ShowCovered) {
2907       if (llvm::any_of(Func.Counts, [](uint64_t C) { return C; }))
2908         OS << Func.Name << "\n";
2909       continue;
2910     }
2911 
2912     uint64_t FuncMax = 0;
2913     uint64_t FuncSum = 0;
2914 
2915     auto PseudoKind = Func.getCountPseudoKind();
2916     if (PseudoKind != InstrProfRecord::NotPseudo) {
2917       if (Show) {
2918         if (!ShownFunctions)
2919           OS << "Counters:\n";
2920         ++ShownFunctions;
2921         OS << "  " << Func.Name << ":\n"
2922            << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2923            << "    Counters: " << Func.Counts.size();
2924         if (PseudoKind == InstrProfRecord::PseudoHot)
2925           OS << "    <PseudoHot>\n";
2926         else if (PseudoKind == InstrProfRecord::PseudoWarm)
2927           OS << "    <PseudoWarm>\n";
2928         else
2929           llvm_unreachable("Unknown PseudoKind");
2930       }
2931       continue;
2932     }
2933 
2934     for (size_t I = 0, E = Func.Counts.size(); I < E; ++I) {
2935       FuncMax = std::max(FuncMax, Func.Counts[I]);
2936       FuncSum += Func.Counts[I];
2937     }
2938 
2939     if (FuncMax < ShowValueCutoff) {
2940       ++BelowCutoffFunctions;
2941       if (OnlyListBelow) {
2942         OS << "  " << Func.Name << ": (Max = " << FuncMax
2943            << " Sum = " << FuncSum << ")\n";
2944       }
2945       continue;
2946     } else if (OnlyListBelow)
2947       continue;
2948 
2949     if (TopNFunctions) {
2950       if (HottestFuncs.size() == TopNFunctions) {
2951         if (HottestFuncs.top().second < FuncMax) {
2952           HottestFuncs.pop();
2953           HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2954         }
2955       } else
2956         HottestFuncs.emplace(std::make_pair(std::string(Func.Name), FuncMax));
2957     }
2958 
2959     if (Show) {
2960       if (!ShownFunctions)
2961         OS << "Counters:\n";
2962 
2963       ++ShownFunctions;
2964 
2965       OS << "  " << Func.Name << ":\n"
2966          << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
2967          << "    Counters: " << Func.Counts.size() << "\n";
2968       if (!IsIRInstr)
2969         OS << "    Function count: " << Func.Counts[0] << "\n";
2970 
2971       if (ShowIndirectCallTargets)
2972         OS << "    Indirect Call Site Count: "
2973            << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
2974 
2975       if (ShowVTables)
2976         OS << "    Number of instrumented vtables: "
2977            << Func.getNumValueSites(IPVK_VTableTarget) << "\n";
2978 
2979       uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize);
2980       if (ShowMemOPSizes && NumMemOPCalls > 0)
2981         OS << "    Number of Memory Intrinsics Calls: " << NumMemOPCalls
2982            << "\n";
2983 
2984       if (ShowCounts) {
2985         OS << "    Block counts: [";
2986         size_t Start = (IsIRInstr ? 0 : 1);
2987         for (size_t I = Start, E = Func.Counts.size(); I < E; ++I) {
2988           OS << (I == Start ? "" : ", ") << Func.Counts[I];
2989         }
2990         OS << "]\n";
2991       }
2992 
2993       if (ShowIndirectCallTargets) {
2994         OS << "    Indirect Target Results:\n";
2995         traverseAllValueSites(Func, IPVK_IndirectCallTarget,
2996                               VPStats[IPVK_IndirectCallTarget], OS,
2997                               &(Reader->getSymtab()));
2998       }
2999 
3000       if (ShowVTables) {
3001         OS << "    VTable Results:\n";
3002         traverseAllValueSites(Func, IPVK_VTableTarget,
3003                               VPStats[IPVK_VTableTarget], OS,
3004                               &(Reader->getSymtab()));
3005       }
3006 
3007       if (ShowMemOPSizes && NumMemOPCalls > 0) {
3008         OS << "    Memory Intrinsic Size Results:\n";
3009         traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS,
3010                               nullptr);
3011       }
3012     }
3013   }
3014   if (Reader->hasError())
3015     exitWithError(Reader->getError(), Filename);
3016 
3017   if (TextFormat || ShowCovered)
3018     return 0;
3019   std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
3020   bool IsIR = Reader->isIRLevelProfile();
3021   OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
3022   if (IsIR) {
3023     OS << "  entry_first = " << Reader->instrEntryBBEnabled();
3024     OS << "  instrument_loop_entries = " << Reader->instrLoopEntriesEnabled();
3025   }
3026   OS << "\n";
3027   if (ShowAllFunctions || !FuncNameFilter.empty())
3028     OS << "Functions shown: " << ShownFunctions << "\n";
3029   PS->printSummary(OS);
3030   if (ShowValueCutoff > 0) {
3031     OS << "Number of functions with maximum count (< " << ShowValueCutoff
3032        << "): " << BelowCutoffFunctions << "\n";
3033     OS << "Number of functions with maximum count (>= " << ShowValueCutoff
3034        << "): " << PS->getNumFunctions() - BelowCutoffFunctions << "\n";
3035   }
3036 
3037   if (TopNFunctions) {
3038     std::vector<std::pair<std::string, uint64_t>> SortedHottestFuncs;
3039     while (!HottestFuncs.empty()) {
3040       SortedHottestFuncs.emplace_back(HottestFuncs.top());
3041       HottestFuncs.pop();
3042     }
3043     OS << "Top " << TopNFunctions
3044        << " functions with the largest internal block counts: \n";
3045     for (auto &hotfunc : llvm::reverse(SortedHottestFuncs))
3046       OS << "  " << hotfunc.first << ", max count = " << hotfunc.second << "\n";
3047   }
3048 
3049   if (ShownFunctions && ShowIndirectCallTargets) {
3050     OS << "Statistics for indirect call sites profile:\n";
3051     showValueSitesStats(OS, IPVK_IndirectCallTarget,
3052                         VPStats[IPVK_IndirectCallTarget]);
3053   }
3054 
3055   if (ShownFunctions && ShowVTables) {
3056     OS << "Statistics for vtable profile:\n";
3057     showValueSitesStats(OS, IPVK_VTableTarget, VPStats[IPVK_VTableTarget]);
3058   }
3059 
3060   if (ShownFunctions && ShowMemOPSizes) {
3061     OS << "Statistics for memory intrinsic calls sizes profile:\n";
3062     showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]);
3063   }
3064 
3065   if (ShowDetailedSummary)
3066     PS->printDetailedSummary(OS);
3067 
3068   if (ShowBinaryIds)
3069     if (Error E = Reader->printBinaryIds(OS))
3070       exitWithError(std::move(E), Filename);
3071 
3072   if (ShowProfileVersion)
3073     OS << "Profile version: " << Reader->getVersion() << "\n";
3074 
3075   if (ShowTemporalProfTraces) {
3076     auto &Traces = Reader->getTemporalProfTraces();
3077     OS << "Temporal Profile Traces (samples=" << Traces.size()
3078        << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
3079     for (unsigned i = 0; i < Traces.size(); i++) {
3080       OS << "  Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
3081          << " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
3082       for (auto &NameRef : Traces[i].FunctionNameRefs)
3083         OS << "    " << Reader->getSymtab().getFuncOrVarName(NameRef) << "\n";
3084     }
3085   }
3086 
3087   return 0;
3088 }
3089 
showSectionInfo(sampleprof::SampleProfileReader * Reader,raw_fd_ostream & OS)3090 static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
3091                             raw_fd_ostream &OS) {
3092   if (!Reader->dumpSectionInfo(OS)) {
3093     WithColor::warning() << "-show-sec-info-only is only supported for "
3094                          << "sample profile in extbinary format and is "
3095                          << "ignored for other formats.\n";
3096     return;
3097   }
3098 }
3099 
3100 namespace {
3101 struct HotFuncInfo {
3102   std::string FuncName;
3103   uint64_t TotalCount = 0;
3104   double TotalCountPercent = 0.0f;
3105   uint64_t MaxCount = 0;
3106   uint64_t EntryCount = 0;
3107 
3108   HotFuncInfo() = default;
3109 
HotFuncInfo__anon209d874b1711::HotFuncInfo3110   HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
3111       : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
3112         MaxCount(MS), EntryCount(ES) {}
3113 };
3114 } // namespace
3115 
3116 // Print out detailed information about hot functions in PrintValues vector.
3117 // Users specify titles and offset of every columns through ColumnTitle and
3118 // ColumnOffset. The size of ColumnTitle and ColumnOffset need to be the same
3119 // and at least 4. Besides, users can optionally give a HotFuncMetric string to
3120 // print out or let it be an empty string.
dumpHotFunctionList(const std::vector<std::string> & ColumnTitle,const std::vector<int> & ColumnOffset,const std::vector<HotFuncInfo> & PrintValues,uint64_t HotFuncCount,uint64_t TotalFuncCount,uint64_t HotProfCount,uint64_t TotalProfCount,const std::string & HotFuncMetric,uint32_t TopNFunctions,raw_fd_ostream & OS)3121 static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
3122                                 const std::vector<int> &ColumnOffset,
3123                                 const std::vector<HotFuncInfo> &PrintValues,
3124                                 uint64_t HotFuncCount, uint64_t TotalFuncCount,
3125                                 uint64_t HotProfCount, uint64_t TotalProfCount,
3126                                 const std::string &HotFuncMetric,
3127                                 uint32_t TopNFunctions, raw_fd_ostream &OS) {
3128   assert(ColumnOffset.size() == ColumnTitle.size() &&
3129          "ColumnOffset and ColumnTitle should have the same size");
3130   assert(ColumnTitle.size() >= 4 &&
3131          "ColumnTitle should have at least 4 elements");
3132   assert(TotalFuncCount > 0 &&
3133          "There should be at least one function in the profile");
3134   double TotalProfPercent = 0;
3135   if (TotalProfCount > 0)
3136     TotalProfPercent = static_cast<double>(HotProfCount) / TotalProfCount * 100;
3137 
3138   formatted_raw_ostream FOS(OS);
3139   FOS << HotFuncCount << " out of " << TotalFuncCount
3140       << " functions with profile ("
3141       << format("%.2f%%",
3142                 (static_cast<double>(HotFuncCount) / TotalFuncCount * 100))
3143       << ") are considered hot functions";
3144   if (!HotFuncMetric.empty())
3145     FOS << " (" << HotFuncMetric << ")";
3146   FOS << ".\n";
3147   FOS << HotProfCount << " out of " << TotalProfCount << " profile counts ("
3148       << format("%.2f%%", TotalProfPercent) << ") are from hot functions.\n";
3149 
3150   for (size_t I = 0; I < ColumnTitle.size(); ++I) {
3151     FOS.PadToColumn(ColumnOffset[I]);
3152     FOS << ColumnTitle[I];
3153   }
3154   FOS << "\n";
3155 
3156   uint32_t Count = 0;
3157   for (const auto &R : PrintValues) {
3158     if (TopNFunctions && (Count++ == TopNFunctions))
3159       break;
3160     FOS.PadToColumn(ColumnOffset[0]);
3161     FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")";
3162     FOS.PadToColumn(ColumnOffset[1]);
3163     FOS << R.MaxCount;
3164     FOS.PadToColumn(ColumnOffset[2]);
3165     FOS << R.EntryCount;
3166     FOS.PadToColumn(ColumnOffset[3]);
3167     FOS << R.FuncName << "\n";
3168   }
3169 }
3170 
showHotFunctionList(const sampleprof::SampleProfileMap & Profiles,ProfileSummary & PS,uint32_t TopN,raw_fd_ostream & OS)3171 static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
3172                                ProfileSummary &PS, uint32_t TopN,
3173                                raw_fd_ostream &OS) {
3174   using namespace sampleprof;
3175 
3176   const uint32_t HotFuncCutoff = 990000;
3177   auto &SummaryVector = PS.getDetailedSummary();
3178   uint64_t MinCountThreshold = 0;
3179   for (const ProfileSummaryEntry &SummaryEntry : SummaryVector) {
3180     if (SummaryEntry.Cutoff == HotFuncCutoff) {
3181       MinCountThreshold = SummaryEntry.MinCount;
3182       break;
3183     }
3184   }
3185 
3186   // Traverse all functions in the profile and keep only hot functions.
3187   // The following loop also calculates the sum of total samples of all
3188   // functions.
3189   std::multimap<uint64_t, std::pair<const FunctionSamples *, const uint64_t>,
3190                 std::greater<uint64_t>>
3191       HotFunc;
3192   uint64_t ProfileTotalSample = 0;
3193   uint64_t HotFuncSample = 0;
3194   uint64_t HotFuncCount = 0;
3195 
3196   for (const auto &I : Profiles) {
3197     FuncSampleStats FuncStats;
3198     const FunctionSamples &FuncProf = I.second;
3199     ProfileTotalSample += FuncProf.getTotalSamples();
3200     getFuncSampleStats(FuncProf, FuncStats, MinCountThreshold);
3201 
3202     if (isFunctionHot(FuncStats, MinCountThreshold)) {
3203       HotFunc.emplace(FuncProf.getTotalSamples(),
3204                       std::make_pair(&(I.second), FuncStats.MaxSample));
3205       HotFuncSample += FuncProf.getTotalSamples();
3206       ++HotFuncCount;
3207     }
3208   }
3209 
3210   std::vector<std::string> ColumnTitle{"Total sample (%)", "Max sample",
3211                                        "Entry sample", "Function name"};
3212   std::vector<int> ColumnOffset{0, 24, 42, 58};
3213   std::string Metric =
3214       std::string("max sample >= ") + std::to_string(MinCountThreshold);
3215   std::vector<HotFuncInfo> PrintValues;
3216   for (const auto &FuncPair : HotFunc) {
3217     const FunctionSamples &Func = *FuncPair.second.first;
3218     double TotalSamplePercent =
3219         (ProfileTotalSample > 0)
3220             ? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
3221             : 0;
3222     PrintValues.emplace_back(
3223         HotFuncInfo(Func.getContext().toString(), Func.getTotalSamples(),
3224                     TotalSamplePercent, FuncPair.second.second,
3225                     Func.getHeadSamplesEstimate()));
3226   }
3227   dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
3228                       Profiles.size(), HotFuncSample, ProfileTotalSample,
3229                       Metric, TopN, OS);
3230 
3231   return 0;
3232 }
3233 
showSampleProfile(ShowFormat SFormat,raw_fd_ostream & OS)3234 static int showSampleProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3235   if (SFormat == ShowFormat::Yaml)
3236     exitWithError("YAML output is not supported for sample profiles");
3237   using namespace sampleprof;
3238   LLVMContext Context;
3239   auto FS = vfs::getRealFileSystem();
3240   auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS,
3241                                                  FSDiscriminatorPassOption);
3242   if (std::error_code EC = ReaderOrErr.getError())
3243     exitWithErrorCode(EC, Filename);
3244 
3245   auto Reader = std::move(ReaderOrErr.get());
3246   if (ShowSectionInfoOnly) {
3247     showSectionInfo(Reader.get(), OS);
3248     return 0;
3249   }
3250 
3251   if (std::error_code EC = Reader->read())
3252     exitWithErrorCode(EC, Filename);
3253 
3254   if (ShowAllFunctions || FuncNameFilter.empty()) {
3255     if (SFormat == ShowFormat::Json)
3256       Reader->dumpJson(OS);
3257     else
3258       Reader->dump(OS);
3259   } else {
3260     if (SFormat == ShowFormat::Json)
3261       exitWithError(
3262           "the JSON format is supported only when all functions are to "
3263           "be printed");
3264 
3265     // TODO: parse context string to support filtering by contexts.
3266     FunctionSamples *FS = Reader->getSamplesFor(StringRef(FuncNameFilter));
3267     Reader->dumpFunctionProfile(FS ? *FS : FunctionSamples(), OS);
3268   }
3269 
3270   if (ShowProfileSymbolList) {
3271     std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
3272         Reader->getProfileSymbolList();
3273     ReaderList->dump(OS);
3274   }
3275 
3276   if (ShowDetailedSummary) {
3277     auto &PS = Reader->getSummary();
3278     PS.printSummary(OS);
3279     PS.printDetailedSummary(OS);
3280   }
3281 
3282   if (ShowHotFuncList || TopNFunctions)
3283     showHotFunctionList(Reader->getProfiles(), Reader->getSummary(),
3284                         TopNFunctions, OS);
3285 
3286   return 0;
3287 }
3288 
showMemProfProfile(ShowFormat SFormat,raw_fd_ostream & OS)3289 static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
3290   if (SFormat == ShowFormat::Json)
3291     exitWithError("JSON output is not supported for MemProf");
3292 
3293   // Show the raw profile in YAML.
3294   if (memprof::RawMemProfReader::hasFormat(Filename)) {
3295     auto ReaderOr = llvm::memprof::RawMemProfReader::create(
3296         Filename, ProfiledBinary, /*KeepNames=*/true);
3297     if (Error E = ReaderOr.takeError()) {
3298       // Since the error can be related to the profile or the binary we do not
3299       // pass whence. Instead additional context is provided where necessary in
3300       // the error message.
3301       exitWithError(std::move(E), /*Whence*/ "");
3302     }
3303 
3304     std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
3305         ReaderOr.get().release());
3306 
3307     Reader->printYAML(OS);
3308     return 0;
3309   }
3310 
3311   // Show the indexed MemProf profile in YAML.
3312   auto FS = vfs::getRealFileSystem();
3313   auto ReaderOrErr = IndexedInstrProfReader::create(Filename, *FS);
3314   if (Error E = ReaderOrErr.takeError())
3315     exitWithError(std::move(E), Filename);
3316 
3317   auto Reader = std::move(ReaderOrErr.get());
3318   memprof::AllMemProfData Data = Reader->getAllMemProfData();
3319 
3320   // For v4 and above the summary is serialized in the indexed profile, and can
3321   // be accessed from the reader. Earlier versions build the summary below.
3322   // The summary is emitted as YAML comments at the start of the output.
3323   if (auto *MemProfSum = Reader->getMemProfSummary()) {
3324     MemProfSum->printSummaryYaml(OS);
3325   } else {
3326     memprof::MemProfSummaryBuilder MemProfSumBuilder;
3327     for (auto &Pair : Data.HeapProfileRecords)
3328       MemProfSumBuilder.addRecord(Pair.Record);
3329     MemProfSumBuilder.getSummary()->printSummaryYaml(OS);
3330   }
3331   // Construct yaml::Output with the maximum column width of 80 so that each
3332   // Frame fits in one line.
3333   yaml::Output Yout(OS, nullptr, 80);
3334   Yout << Data;
3335 
3336   return 0;
3337 }
3338 
showDebugInfoCorrelation(const std::string & Filename,ShowFormat SFormat,raw_fd_ostream & OS)3339 static int showDebugInfoCorrelation(const std::string &Filename,
3340                                     ShowFormat SFormat, raw_fd_ostream &OS) {
3341   if (SFormat == ShowFormat::Json)
3342     exitWithError("JSON output is not supported for debug info correlation");
3343   std::unique_ptr<InstrProfCorrelator> Correlator;
3344   if (auto Err =
3345           InstrProfCorrelator::get(Filename, InstrProfCorrelator::DEBUG_INFO)
3346               .moveInto(Correlator))
3347     exitWithError(std::move(Err), Filename);
3348   if (SFormat == ShowFormat::Yaml) {
3349     if (auto Err = Correlator->dumpYaml(MaxDbgCorrelationWarnings, OS))
3350       exitWithError(std::move(Err), Filename);
3351     return 0;
3352   }
3353 
3354   if (auto Err = Correlator->correlateProfileData(MaxDbgCorrelationWarnings))
3355     exitWithError(std::move(Err), Filename);
3356 
3357   InstrProfSymtab Symtab;
3358   if (auto Err = Symtab.create(
3359           StringRef(Correlator->getNamesPointer(), Correlator->getNamesSize())))
3360     exitWithError(std::move(Err), Filename);
3361 
3362   if (ShowProfileSymbolList)
3363     Symtab.dumpNames(OS);
3364   // TODO: Read "Profile Data Type" from debug info to compute and show how many
3365   // counters the section holds.
3366   if (ShowDetailedSummary)
3367     OS << "Counters section size: 0x"
3368        << Twine::utohexstr(Correlator->getCountersSectionSize()) << " bytes\n";
3369   OS << "Found " << Correlator->getDataSize() << " functions\n";
3370 
3371   return 0;
3372 }
3373 
show_main(StringRef ProgName)3374 static int show_main(StringRef ProgName) {
3375   if (Filename.empty() && DebugInfoFilename.empty())
3376     exitWithError(
3377         "the positional argument '<profdata-file>' is required unless '--" +
3378         DebugInfoFilename.ArgStr + "' is provided");
3379 
3380   if (Filename == OutputFilename) {
3381     errs() << ProgName
3382            << " show: Input file name cannot be the same as the output file "
3383               "name!\n";
3384     return 1;
3385   }
3386   if (JsonFormat)
3387     SFormat = ShowFormat::Json;
3388 
3389   std::error_code EC;
3390   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3391   if (EC)
3392     exitWithErrorCode(EC, OutputFilename);
3393 
3394   if (ShowAllFunctions && !FuncNameFilter.empty())
3395     WithColor::warning() << "-function argument ignored: showing all functions\n";
3396 
3397   if (!DebugInfoFilename.empty())
3398     return showDebugInfoCorrelation(DebugInfoFilename, SFormat, OS);
3399 
3400   if (ShowProfileKind == instr)
3401     return showInstrProfile(SFormat, OS);
3402   if (ShowProfileKind == sample)
3403     return showSampleProfile(SFormat, OS);
3404   return showMemProfProfile(SFormat, OS);
3405 }
3406 
order_main()3407 static int order_main() {
3408   std::error_code EC;
3409   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
3410   if (EC)
3411     exitWithErrorCode(EC, OutputFilename);
3412   auto FS = vfs::getRealFileSystem();
3413   auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
3414   if (Error E = ReaderOrErr.takeError())
3415     exitWithError(std::move(E), Filename);
3416 
3417   auto Reader = std::move(ReaderOrErr.get());
3418   for (auto &I : *Reader) {
3419     // Read all entries
3420     (void)I;
3421   }
3422   ArrayRef Traces = Reader->getTemporalProfTraces();
3423   if (NumTestTraces && NumTestTraces >= Traces.size())
3424     exitWithError(
3425         "--" + NumTestTraces.ArgStr +
3426         " must be smaller than the total number of traces: expected: < " +
3427         Twine(Traces.size()) + ", actual: " + Twine(NumTestTraces));
3428   ArrayRef TestTraces = Traces.take_back(NumTestTraces);
3429   Traces = Traces.drop_back(NumTestTraces);
3430 
3431   std::vector<BPFunctionNode> Nodes;
3432   TemporalProfTraceTy::createBPFunctionNodes(Traces, Nodes);
3433   BalancedPartitioningConfig Config;
3434   BalancedPartitioning BP(Config);
3435   BP.run(Nodes);
3436 
3437   OS << "# Ordered " << Nodes.size() << " functions\n";
3438   if (!TestTraces.empty()) {
3439     // Since we don't know the symbol sizes, we assume 32 functions per page.
3440     DenseMap<BPFunctionNode::IDT, unsigned> IdToPageNumber;
3441     for (auto &Node : Nodes)
3442       IdToPageNumber[Node.Id] = IdToPageNumber.size() / 32;
3443 
3444     SmallSet<unsigned, 0> TouchedPages;
3445     unsigned Area = 0;
3446     for (auto &Trace : TestTraces) {
3447       for (auto Id : Trace.FunctionNameRefs) {
3448         auto It = IdToPageNumber.find(Id);
3449         if (It == IdToPageNumber.end())
3450           continue;
3451         TouchedPages.insert(It->getSecond());
3452         Area += TouchedPages.size();
3453       }
3454       TouchedPages.clear();
3455     }
3456     OS << "# Total area under the page fault curve: " << (float)Area << "\n";
3457   }
3458   OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
3459         "linkage and this output does not take that into account. Some "
3460         "post-processing may be required before passing to the linker via "
3461         "-order_file.\n";
3462   for (auto &N : Nodes) {
3463     auto [Filename, ParsedFuncName] =
3464         getParsedIRPGOName(Reader->getSymtab().getFuncOrVarName(N.Id));
3465     if (!Filename.empty())
3466       OS << "# " << Filename << "\n";
3467     OS << ParsedFuncName << "\n";
3468   }
3469   return 0;
3470 }
3471 
llvm_profdata_main(int argc,char ** argvNonConst,const llvm::ToolContext &)3472 int llvm_profdata_main(int argc, char **argvNonConst,
3473                        const llvm::ToolContext &) {
3474   const char **argv = const_cast<const char **>(argvNonConst);
3475 
3476   StringRef ProgName(sys::path::filename(argv[0]));
3477 
3478   if (argc < 2) {
3479     errs()
3480         << ProgName
3481         << ": No subcommand specified! Run llvm-profdata --help for usage.\n";
3482     return 1;
3483   }
3484 
3485   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data\n");
3486 
3487   if (ShowSubcommand)
3488     return show_main(ProgName);
3489 
3490   if (OrderSubcommand)
3491     return order_main();
3492 
3493   if (OverlapSubcommand)
3494     return overlap_main();
3495 
3496   if (MergeSubcommand)
3497     return merge_main(ProgName);
3498 
3499   errs() << ProgName
3500          << ": Unknown command. Run llvm-profdata --help for usage.\n";
3501   return 1;
3502 }
3503