xref: /freebsd/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/ProfileSummary.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Compression.h"
31 #include "llvm/Support/ErrorOr.h"
32 #include "llvm/Support/LEB128.h"
33 #include "llvm/Support/LineIterator.h"
34 #include "llvm/Support/MD5.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <algorithm>
38 #include <cstddef>
39 #include <cstdint>
40 #include <limits>
41 #include <memory>
42 #include <set>
43 #include <system_error>
44 #include <vector>
45 
46 using namespace llvm;
47 using namespace sampleprof;
48 
49 #define DEBUG_TYPE "samplepgo-reader"
50 
51 // This internal option specifies if the profile uses FS discriminators.
52 // It only applies to text, binary and compact binary format profiles.
53 // For ext-binary format profiles, the flag is set in the summary.
54 static cl::opt<bool> ProfileIsFSDisciminator(
55     "profile-isfs", cl::Hidden, cl::init(false),
56     cl::desc("Profile uses flow sensitive discriminators"));
57 
58 /// Dump the function profile for \p FName.
59 ///
60 /// \param FContext Name + context of the function to print.
61 /// \param OS Stream to emit the output to.
62 void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
63                                               raw_ostream &OS) {
64   OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
65 }
66 
67 /// Dump all the function profiles found on stream \p OS.
68 void SampleProfileReader::dump(raw_ostream &OS) {
69   std::vector<NameFunctionSamples> V;
70   sortFuncProfiles(Profiles, V);
71   for (const auto &I : V)
72     dumpFunctionProfile(I.first, OS);
73 }
74 
75 /// Parse \p Input as function head.
76 ///
77 /// Parse one line of \p Input, and update function name in \p FName,
78 /// function's total sample count in \p NumSamples, function's entry
79 /// count in \p NumHeadSamples.
80 ///
81 /// \returns true if parsing is successful.
82 static bool ParseHead(const StringRef &Input, StringRef &FName,
83                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
84   if (Input[0] == ' ')
85     return false;
86   size_t n2 = Input.rfind(':');
87   size_t n1 = Input.rfind(':', n2 - 1);
88   FName = Input.substr(0, n1);
89   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
90     return false;
91   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
92     return false;
93   return true;
94 }
95 
96 /// Returns true if line offset \p L is legal (only has 16 bits).
97 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
98 
99 /// Parse \p Input that contains metadata.
100 /// Possible metadata:
101 /// - CFG Checksum information:
102 ///     !CFGChecksum: 12345
103 /// - CFG Checksum information:
104 ///     !Attributes: 1
105 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
106 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
107                           uint32_t &Attributes) {
108   if (Input.startswith("!CFGChecksum:")) {
109     StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
110     return !CFGInfo.getAsInteger(10, FunctionHash);
111   }
112 
113   if (Input.startswith("!Attributes:")) {
114     StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
115     return !Attrib.getAsInteger(10, Attributes);
116   }
117 
118   return false;
119 }
120 
121 enum class LineType {
122   CallSiteProfile,
123   BodyProfile,
124   Metadata,
125 };
126 
127 /// Parse \p Input as line sample.
128 ///
129 /// \param Input input line.
130 /// \param LineTy Type of this line.
131 /// \param Depth the depth of the inline stack.
132 /// \param NumSamples total samples of the line/inlined callsite.
133 /// \param LineOffset line offset to the start of the function.
134 /// \param Discriminator discriminator of the line.
135 /// \param TargetCountMap map from indirect call target to count.
136 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
137 ///
138 /// returns true if parsing is successful.
139 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
140                       uint64_t &NumSamples, uint32_t &LineOffset,
141                       uint32_t &Discriminator, StringRef &CalleeName,
142                       DenseMap<StringRef, uint64_t> &TargetCountMap,
143                       uint64_t &FunctionHash, uint32_t &Attributes) {
144   for (Depth = 0; Input[Depth] == ' '; Depth++)
145     ;
146   if (Depth == 0)
147     return false;
148 
149   if (Depth == 1 && Input[Depth] == '!') {
150     LineTy = LineType::Metadata;
151     return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
152   }
153 
154   size_t n1 = Input.find(':');
155   StringRef Loc = Input.substr(Depth, n1 - Depth);
156   size_t n2 = Loc.find('.');
157   if (n2 == StringRef::npos) {
158     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
159       return false;
160     Discriminator = 0;
161   } else {
162     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
163       return false;
164     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
165       return false;
166   }
167 
168   StringRef Rest = Input.substr(n1 + 2);
169   if (isDigit(Rest[0])) {
170     LineTy = LineType::BodyProfile;
171     size_t n3 = Rest.find(' ');
172     if (n3 == StringRef::npos) {
173       if (Rest.getAsInteger(10, NumSamples))
174         return false;
175     } else {
176       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
177         return false;
178     }
179     // Find call targets and their sample counts.
180     // Note: In some cases, there are symbols in the profile which are not
181     // mangled. To accommodate such cases, use colon + integer pairs as the
182     // anchor points.
183     // An example:
184     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
185     // ":1000" and ":437" are used as anchor points so the string above will
186     // be interpreted as
187     // target: _M_construct<char *>
188     // count: 1000
189     // target: string_view<std::allocator<char> >
190     // count: 437
191     while (n3 != StringRef::npos) {
192       n3 += Rest.substr(n3).find_first_not_of(' ');
193       Rest = Rest.substr(n3);
194       n3 = Rest.find_first_of(':');
195       if (n3 == StringRef::npos || n3 == 0)
196         return false;
197 
198       StringRef Target;
199       uint64_t count, n4;
200       while (true) {
201         // Get the segment after the current colon.
202         StringRef AfterColon = Rest.substr(n3 + 1);
203         // Get the target symbol before the current colon.
204         Target = Rest.substr(0, n3);
205         // Check if the word after the current colon is an integer.
206         n4 = AfterColon.find_first_of(' ');
207         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
208         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
209         if (!WordAfterColon.getAsInteger(10, count))
210           break;
211 
212         // Try to find the next colon.
213         uint64_t n5 = AfterColon.find_first_of(':');
214         if (n5 == StringRef::npos)
215           return false;
216         n3 += n5 + 1;
217       }
218 
219       // An anchor point is found. Save the {target, count} pair
220       TargetCountMap[Target] = count;
221       if (n4 == Rest.size())
222         break;
223       // Change n3 to the next blank space after colon + integer pair.
224       n3 = n4;
225     }
226   } else {
227     LineTy = LineType::CallSiteProfile;
228     size_t n3 = Rest.find_last_of(':');
229     CalleeName = Rest.substr(0, n3);
230     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
231       return false;
232   }
233   return true;
234 }
235 
236 /// Load samples from a text file.
237 ///
238 /// See the documentation at the top of the file for an explanation of
239 /// the expected format.
240 ///
241 /// \returns true if the file was loaded successfully, false otherwise.
242 std::error_code SampleProfileReaderText::readImpl() {
243   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
244   sampleprof_error Result = sampleprof_error::success;
245 
246   InlineCallStack InlineStack;
247   uint32_t ProbeProfileCount = 0;
248 
249   // SeenMetadata tracks whether we have processed metadata for the current
250   // top-level function profile.
251   bool SeenMetadata = false;
252 
253   ProfileIsFS = ProfileIsFSDisciminator;
254   FunctionSamples::ProfileIsFS = ProfileIsFS;
255   for (; !LineIt.is_at_eof(); ++LineIt) {
256     if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
257       continue;
258     // Read the header of each function.
259     //
260     // Note that for function identifiers we are actually expecting
261     // mangled names, but we may not always get them. This happens when
262     // the compiler decides not to emit the function (e.g., it was inlined
263     // and removed). In this case, the binary will not have the linkage
264     // name for the function, so the profiler will emit the function's
265     // unmangled name, which may contain characters like ':' and '>' in its
266     // name (member functions, templates, etc).
267     //
268     // The only requirement we place on the identifier, then, is that it
269     // should not begin with a number.
270     if ((*LineIt)[0] != ' ') {
271       uint64_t NumSamples, NumHeadSamples;
272       StringRef FName;
273       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
274         reportError(LineIt.line_number(),
275                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
276         return sampleprof_error::malformed;
277       }
278       SeenMetadata = false;
279       SampleContext FContext(FName, CSNameTable);
280       if (FContext.hasContext())
281         ++CSProfileCount;
282       Profiles[FContext] = FunctionSamples();
283       FunctionSamples &FProfile = Profiles[FContext];
284       FProfile.setContext(FContext);
285       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
286       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
287       InlineStack.clear();
288       InlineStack.push_back(&FProfile);
289     } else {
290       uint64_t NumSamples;
291       StringRef FName;
292       DenseMap<StringRef, uint64_t> TargetCountMap;
293       uint32_t Depth, LineOffset, Discriminator;
294       LineType LineTy;
295       uint64_t FunctionHash = 0;
296       uint32_t Attributes = 0;
297       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
298                      Discriminator, FName, TargetCountMap, FunctionHash,
299                      Attributes)) {
300         reportError(LineIt.line_number(),
301                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
302                         *LineIt);
303         return sampleprof_error::malformed;
304       }
305       if (SeenMetadata && LineTy != LineType::Metadata) {
306         // Metadata must be put at the end of a function profile.
307         reportError(LineIt.line_number(),
308                     "Found non-metadata after metadata: " + *LineIt);
309         return sampleprof_error::malformed;
310       }
311 
312       // Here we handle FS discriminators.
313       Discriminator &= getDiscriminatorMask();
314 
315       while (InlineStack.size() > Depth) {
316         InlineStack.pop_back();
317       }
318       switch (LineTy) {
319       case LineType::CallSiteProfile: {
320         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
321             LineLocation(LineOffset, Discriminator))[std::string(FName)];
322         FSamples.setName(FName);
323         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
324         InlineStack.push_back(&FSamples);
325         break;
326       }
327       case LineType::BodyProfile: {
328         while (InlineStack.size() > Depth) {
329           InlineStack.pop_back();
330         }
331         FunctionSamples &FProfile = *InlineStack.back();
332         for (const auto &name_count : TargetCountMap) {
333           MergeResult(Result, FProfile.addCalledTargetSamples(
334                                   LineOffset, Discriminator, name_count.first,
335                                   name_count.second));
336         }
337         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
338                                                     NumSamples));
339         break;
340       }
341       case LineType::Metadata: {
342         FunctionSamples &FProfile = *InlineStack.back();
343         if (FunctionHash) {
344           FProfile.setFunctionHash(FunctionHash);
345           ++ProbeProfileCount;
346         }
347         if (Attributes)
348           FProfile.getContext().setAllAttributes(Attributes);
349         SeenMetadata = true;
350         break;
351       }
352       }
353     }
354   }
355 
356   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
357          "Cannot have both context-sensitive and regular profile");
358   ProfileIsCS = (CSProfileCount > 0);
359   assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
360          "Cannot have both probe-based profiles and regular profiles");
361   ProfileIsProbeBased = (ProbeProfileCount > 0);
362   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
363   FunctionSamples::ProfileIsCS = ProfileIsCS;
364 
365   if (Result == sampleprof_error::success)
366     computeSummary();
367 
368   return Result;
369 }
370 
371 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
372   bool result = false;
373 
374   // Check that the first non-comment line is a valid function header.
375   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
376   if (!LineIt.is_at_eof()) {
377     if ((*LineIt)[0] != ' ') {
378       uint64_t NumSamples, NumHeadSamples;
379       StringRef FName;
380       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
381     }
382   }
383 
384   return result;
385 }
386 
387 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
388   unsigned NumBytesRead = 0;
389   std::error_code EC;
390   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
391 
392   if (Val > std::numeric_limits<T>::max())
393     EC = sampleprof_error::malformed;
394   else if (Data + NumBytesRead > End)
395     EC = sampleprof_error::truncated;
396   else
397     EC = sampleprof_error::success;
398 
399   if (EC) {
400     reportError(0, EC.message());
401     return EC;
402   }
403 
404   Data += NumBytesRead;
405   return static_cast<T>(Val);
406 }
407 
408 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
409   std::error_code EC;
410   StringRef Str(reinterpret_cast<const char *>(Data));
411   if (Data + Str.size() + 1 > End) {
412     EC = sampleprof_error::truncated;
413     reportError(0, EC.message());
414     return EC;
415   }
416 
417   Data += Str.size() + 1;
418   return Str;
419 }
420 
421 template <typename T>
422 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
423   std::error_code EC;
424 
425   if (Data + sizeof(T) > End) {
426     EC = sampleprof_error::truncated;
427     reportError(0, EC.message());
428     return EC;
429   }
430 
431   using namespace support;
432   T Val = endian::readNext<T, little, unaligned>(Data);
433   return Val;
434 }
435 
436 template <typename T>
437 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
438   std::error_code EC;
439   auto Idx = readNumber<uint32_t>();
440   if (std::error_code EC = Idx.getError())
441     return EC;
442   if (*Idx >= Table.size())
443     return sampleprof_error::truncated_name_table;
444   return *Idx;
445 }
446 
447 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
448   auto Idx = readStringIndex(NameTable);
449   if (std::error_code EC = Idx.getError())
450     return EC;
451 
452   return NameTable[*Idx];
453 }
454 
455 ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
456   auto FName(readStringFromTable());
457   if (std::error_code EC = FName.getError())
458     return EC;
459   return SampleContext(*FName);
460 }
461 
462 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
463   if (!FixedLengthMD5)
464     return SampleProfileReaderBinary::readStringFromTable();
465 
466   // read NameTable index.
467   auto Idx = readStringIndex(NameTable);
468   if (std::error_code EC = Idx.getError())
469     return EC;
470 
471   // Check whether the name to be accessed has been accessed before,
472   // if not, read it from memory directly.
473   StringRef &SR = NameTable[*Idx];
474   if (SR.empty()) {
475     const uint8_t *SavedData = Data;
476     Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
477     auto FID = readUnencodedNumber<uint64_t>();
478     if (std::error_code EC = FID.getError())
479       return EC;
480     // Save the string converted from uint64_t in MD5StringBuf. All the
481     // references to the name are all StringRefs refering to the string
482     // in MD5StringBuf.
483     MD5StringBuf->push_back(std::to_string(*FID));
484     SR = MD5StringBuf->back();
485     Data = SavedData;
486   }
487   return SR;
488 }
489 
490 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
491   auto Idx = readStringIndex(NameTable);
492   if (std::error_code EC = Idx.getError())
493     return EC;
494 
495   return StringRef(NameTable[*Idx]);
496 }
497 
498 std::error_code
499 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
500   auto NumSamples = readNumber<uint64_t>();
501   if (std::error_code EC = NumSamples.getError())
502     return EC;
503   FProfile.addTotalSamples(*NumSamples);
504 
505   // Read the samples in the body.
506   auto NumRecords = readNumber<uint32_t>();
507   if (std::error_code EC = NumRecords.getError())
508     return EC;
509 
510   for (uint32_t I = 0; I < *NumRecords; ++I) {
511     auto LineOffset = readNumber<uint64_t>();
512     if (std::error_code EC = LineOffset.getError())
513       return EC;
514 
515     if (!isOffsetLegal(*LineOffset)) {
516       return std::error_code();
517     }
518 
519     auto Discriminator = readNumber<uint64_t>();
520     if (std::error_code EC = Discriminator.getError())
521       return EC;
522 
523     auto NumSamples = readNumber<uint64_t>();
524     if (std::error_code EC = NumSamples.getError())
525       return EC;
526 
527     auto NumCalls = readNumber<uint32_t>();
528     if (std::error_code EC = NumCalls.getError())
529       return EC;
530 
531     // Here we handle FS discriminators:
532     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
533 
534     for (uint32_t J = 0; J < *NumCalls; ++J) {
535       auto CalledFunction(readStringFromTable());
536       if (std::error_code EC = CalledFunction.getError())
537         return EC;
538 
539       auto CalledFunctionSamples = readNumber<uint64_t>();
540       if (std::error_code EC = CalledFunctionSamples.getError())
541         return EC;
542 
543       FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal,
544                                       *CalledFunction, *CalledFunctionSamples);
545     }
546 
547     FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples);
548   }
549 
550   // Read all the samples for inlined function calls.
551   auto NumCallsites = readNumber<uint32_t>();
552   if (std::error_code EC = NumCallsites.getError())
553     return EC;
554 
555   for (uint32_t J = 0; J < *NumCallsites; ++J) {
556     auto LineOffset = readNumber<uint64_t>();
557     if (std::error_code EC = LineOffset.getError())
558       return EC;
559 
560     auto Discriminator = readNumber<uint64_t>();
561     if (std::error_code EC = Discriminator.getError())
562       return EC;
563 
564     auto FName(readStringFromTable());
565     if (std::error_code EC = FName.getError())
566       return EC;
567 
568     // Here we handle FS discriminators:
569     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
570 
571     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
572         LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)];
573     CalleeProfile.setName(*FName);
574     if (std::error_code EC = readProfile(CalleeProfile))
575       return EC;
576   }
577 
578   return sampleprof_error::success;
579 }
580 
581 std::error_code
582 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
583   Data = Start;
584   auto NumHeadSamples = readNumber<uint64_t>();
585   if (std::error_code EC = NumHeadSamples.getError())
586     return EC;
587 
588   ErrorOr<SampleContext> FContext(readSampleContextFromTable());
589   if (std::error_code EC = FContext.getError())
590     return EC;
591 
592   Profiles[*FContext] = FunctionSamples();
593   FunctionSamples &FProfile = Profiles[*FContext];
594   FProfile.setContext(*FContext);
595   FProfile.addHeadSamples(*NumHeadSamples);
596 
597   if (FContext->hasContext())
598     CSProfileCount++;
599 
600   if (std::error_code EC = readProfile(FProfile))
601     return EC;
602   return sampleprof_error::success;
603 }
604 
605 std::error_code SampleProfileReaderBinary::readImpl() {
606   ProfileIsFS = ProfileIsFSDisciminator;
607   FunctionSamples::ProfileIsFS = ProfileIsFS;
608   while (!at_eof()) {
609     if (std::error_code EC = readFuncProfile(Data))
610       return EC;
611   }
612 
613   return sampleprof_error::success;
614 }
615 
616 ErrorOr<SampleContextFrames>
617 SampleProfileReaderExtBinaryBase::readContextFromTable() {
618   auto ContextIdx = readNumber<uint32_t>();
619   if (std::error_code EC = ContextIdx.getError())
620     return EC;
621   if (*ContextIdx >= CSNameTable->size())
622     return sampleprof_error::truncated_name_table;
623   return (*CSNameTable)[*ContextIdx];
624 }
625 
626 ErrorOr<SampleContext>
627 SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
628   if (ProfileIsCS) {
629     auto FContext(readContextFromTable());
630     if (std::error_code EC = FContext.getError())
631       return EC;
632     return SampleContext(*FContext);
633   } else {
634     auto FName(readStringFromTable());
635     if (std::error_code EC = FName.getError())
636       return EC;
637     return SampleContext(*FName);
638   }
639 }
640 
641 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
642     const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
643   Data = Start;
644   End = Start + Size;
645   switch (Entry.Type) {
646   case SecProfSummary:
647     if (std::error_code EC = readSummary())
648       return EC;
649     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
650       Summary->setPartialProfile(true);
651     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
652       FunctionSamples::ProfileIsCS = ProfileIsCS = true;
653     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
654       FunctionSamples::ProfileIsFS = ProfileIsFS = true;
655     break;
656   case SecNameTable: {
657     FixedLengthMD5 =
658         hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
659     bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
660     assert((!FixedLengthMD5 || UseMD5) &&
661            "If FixedLengthMD5 is true, UseMD5 has to be true");
662     FunctionSamples::HasUniqSuffix =
663         hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
664     if (std::error_code EC = readNameTableSec(UseMD5))
665       return EC;
666     break;
667   }
668   case SecCSNameTable: {
669     if (std::error_code EC = readCSNameTableSec())
670       return EC;
671     break;
672   }
673   case SecLBRProfile:
674     if (std::error_code EC = readFuncProfiles())
675       return EC;
676     break;
677   case SecFuncOffsetTable:
678     FuncOffsetsOrdered = hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered);
679     if (std::error_code EC = readFuncOffsetTable())
680       return EC;
681     break;
682   case SecFuncMetadata: {
683     ProfileIsProbeBased =
684         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
685     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
686     bool HasAttribute =
687         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
688     if (std::error_code EC = readFuncMetadata(HasAttribute))
689       return EC;
690     break;
691   }
692   case SecProfileSymbolList:
693     if (std::error_code EC = readProfileSymbolList())
694       return EC;
695     break;
696   default:
697     if (std::error_code EC = readCustomSection(Entry))
698       return EC;
699     break;
700   }
701   return sampleprof_error::success;
702 }
703 
704 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
705   if (!M)
706     return false;
707   FuncsToUse.clear();
708   for (auto &F : *M)
709     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
710   return true;
711 }
712 
713 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
714   // If there are more than one FuncOffsetTable, the profile read associated
715   // with previous FuncOffsetTable has to be done before next FuncOffsetTable
716   // is read.
717   FuncOffsetTable.clear();
718 
719   auto Size = readNumber<uint64_t>();
720   if (std::error_code EC = Size.getError())
721     return EC;
722 
723   FuncOffsetTable.reserve(*Size);
724 
725   if (FuncOffsetsOrdered) {
726     OrderedFuncOffsets =
727         std::make_unique<std::vector<std::pair<SampleContext, uint64_t>>>();
728     OrderedFuncOffsets->reserve(*Size);
729   }
730 
731   for (uint32_t I = 0; I < *Size; ++I) {
732     auto FContext(readSampleContextFromTable());
733     if (std::error_code EC = FContext.getError())
734       return EC;
735 
736     auto Offset = readNumber<uint64_t>();
737     if (std::error_code EC = Offset.getError())
738       return EC;
739 
740     FuncOffsetTable[*FContext] = *Offset;
741     if (FuncOffsetsOrdered)
742       OrderedFuncOffsets->emplace_back(*FContext, *Offset);
743   }
744 
745   return sampleprof_error::success;
746 }
747 
748 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
749   // Collect functions used by current module if the Reader has been
750   // given a module.
751   // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
752   // which will query FunctionSamples::HasUniqSuffix, so it has to be
753   // called after FunctionSamples::HasUniqSuffix is set, i.e. after
754   // NameTable section is read.
755   bool LoadFuncsToBeUsed = collectFuncsFromModule();
756 
757   // When LoadFuncsToBeUsed is false, load all the function profiles.
758   const uint8_t *Start = Data;
759   if (!LoadFuncsToBeUsed) {
760     while (Data < End) {
761       if (std::error_code EC = readFuncProfile(Data))
762         return EC;
763     }
764     assert(Data == End && "More data is read than expected");
765   } else {
766     // Load function profiles on demand.
767     if (Remapper) {
768       for (auto Name : FuncsToUse) {
769         Remapper->insert(Name);
770       }
771     }
772 
773     if (ProfileIsCS) {
774       DenseSet<uint64_t> FuncGuidsToUse;
775       if (useMD5()) {
776         for (auto Name : FuncsToUse)
777           FuncGuidsToUse.insert(Function::getGUID(Name));
778       }
779 
780       // For each function in current module, load all context profiles for
781       // the function as well as their callee contexts which can help profile
782       // guided importing for ThinLTO. This can be achieved by walking
783       // through an ordered context container, where contexts are laid out
784       // as if they were walked in preorder of a context trie. While
785       // traversing the trie, a link to the highest common ancestor node is
786       // kept so that all of its decendants will be loaded.
787       assert(OrderedFuncOffsets.get() &&
788              "func offset table should always be sorted in CS profile");
789       const SampleContext *CommonContext = nullptr;
790       for (const auto &NameOffset : *OrderedFuncOffsets) {
791         const auto &FContext = NameOffset.first;
792         auto FName = FContext.getName();
793         // For function in the current module, keep its farthest ancestor
794         // context. This can be used to load itself and its child and
795         // sibling contexts.
796         if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) ||
797             (!useMD5() && (FuncsToUse.count(FName) ||
798                            (Remapper && Remapper->exist(FName))))) {
799           if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
800             CommonContext = &FContext;
801         }
802 
803         if (CommonContext == &FContext ||
804             (CommonContext && CommonContext->IsPrefixOf(FContext))) {
805           // Load profile for the current context which originated from
806           // the common ancestor.
807           const uint8_t *FuncProfileAddr = Start + NameOffset.second;
808           assert(FuncProfileAddr < End && "out of LBRProfile section");
809           if (std::error_code EC = readFuncProfile(FuncProfileAddr))
810             return EC;
811         }
812       }
813     } else {
814       if (useMD5()) {
815         for (auto Name : FuncsToUse) {
816           auto GUID = std::to_string(MD5Hash(Name));
817           auto iter = FuncOffsetTable.find(StringRef(GUID));
818           if (iter == FuncOffsetTable.end())
819             continue;
820           const uint8_t *FuncProfileAddr = Start + iter->second;
821           assert(FuncProfileAddr < End && "out of LBRProfile section");
822           if (std::error_code EC = readFuncProfile(FuncProfileAddr))
823             return EC;
824         }
825       } else {
826         for (auto NameOffset : FuncOffsetTable) {
827           SampleContext FContext(NameOffset.first);
828           auto FuncName = FContext.getName();
829           if (!FuncsToUse.count(FuncName) &&
830               (!Remapper || !Remapper->exist(FuncName)))
831             continue;
832           const uint8_t *FuncProfileAddr = Start + NameOffset.second;
833           assert(FuncProfileAddr < End && "out of LBRProfile section");
834           if (std::error_code EC = readFuncProfile(FuncProfileAddr))
835             return EC;
836         }
837       }
838     }
839     Data = End;
840   }
841   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
842          "Cannot have both context-sensitive and regular profile");
843   assert((!CSProfileCount || ProfileIsCS) &&
844          "Section flag should be consistent with actual profile");
845   return sampleprof_error::success;
846 }
847 
848 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
849   if (!ProfSymList)
850     ProfSymList = std::make_unique<ProfileSymbolList>();
851 
852   if (std::error_code EC = ProfSymList->read(Data, End - Data))
853     return EC;
854 
855   Data = End;
856   return sampleprof_error::success;
857 }
858 
859 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
860     const uint8_t *SecStart, const uint64_t SecSize,
861     const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
862   Data = SecStart;
863   End = SecStart + SecSize;
864   auto DecompressSize = readNumber<uint64_t>();
865   if (std::error_code EC = DecompressSize.getError())
866     return EC;
867   DecompressBufSize = *DecompressSize;
868 
869   auto CompressSize = readNumber<uint64_t>();
870   if (std::error_code EC = CompressSize.getError())
871     return EC;
872 
873   if (!llvm::zlib::isAvailable())
874     return sampleprof_error::zlib_unavailable;
875 
876   StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
877                               *CompressSize);
878   char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
879   size_t UCSize = DecompressBufSize;
880   llvm::Error E =
881       zlib::uncompress(CompressedStrings, Buffer, UCSize);
882   if (E)
883     return sampleprof_error::uncompress_failed;
884   DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
885   return sampleprof_error::success;
886 }
887 
888 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
889   const uint8_t *BufStart =
890       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
891 
892   for (auto &Entry : SecHdrTable) {
893     // Skip empty section.
894     if (!Entry.Size)
895       continue;
896 
897     // Skip sections without context when SkipFlatProf is true.
898     if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
899       continue;
900 
901     const uint8_t *SecStart = BufStart + Entry.Offset;
902     uint64_t SecSize = Entry.Size;
903 
904     // If the section is compressed, decompress it into a buffer
905     // DecompressBuf before reading the actual data. The pointee of
906     // 'Data' will be changed to buffer hold by DecompressBuf
907     // temporarily when reading the actual data.
908     bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
909     if (isCompressed) {
910       const uint8_t *DecompressBuf;
911       uint64_t DecompressBufSize;
912       if (std::error_code EC = decompressSection(
913               SecStart, SecSize, DecompressBuf, DecompressBufSize))
914         return EC;
915       SecStart = DecompressBuf;
916       SecSize = DecompressBufSize;
917     }
918 
919     if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
920       return EC;
921     if (Data != SecStart + SecSize)
922       return sampleprof_error::malformed;
923 
924     // Change the pointee of 'Data' from DecompressBuf to original Buffer.
925     if (isCompressed) {
926       Data = BufStart + Entry.Offset;
927       End = BufStart + Buffer->getBufferSize();
928     }
929   }
930 
931   return sampleprof_error::success;
932 }
933 
934 std::error_code SampleProfileReaderCompactBinary::readImpl() {
935   // Collect functions used by current module if the Reader has been
936   // given a module.
937   bool LoadFuncsToBeUsed = collectFuncsFromModule();
938   ProfileIsFS = ProfileIsFSDisciminator;
939   FunctionSamples::ProfileIsFS = ProfileIsFS;
940   std::vector<uint64_t> OffsetsToUse;
941   if (!LoadFuncsToBeUsed) {
942     // load all the function profiles.
943     for (auto FuncEntry : FuncOffsetTable) {
944       OffsetsToUse.push_back(FuncEntry.second);
945     }
946   } else {
947     // load function profiles on demand.
948     for (auto Name : FuncsToUse) {
949       auto GUID = std::to_string(MD5Hash(Name));
950       auto iter = FuncOffsetTable.find(StringRef(GUID));
951       if (iter == FuncOffsetTable.end())
952         continue;
953       OffsetsToUse.push_back(iter->second);
954     }
955   }
956 
957   for (auto Offset : OffsetsToUse) {
958     const uint8_t *SavedData = Data;
959     if (std::error_code EC = readFuncProfile(
960             reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
961             Offset))
962       return EC;
963     Data = SavedData;
964   }
965   return sampleprof_error::success;
966 }
967 
968 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
969   if (Magic == SPMagic())
970     return sampleprof_error::success;
971   return sampleprof_error::bad_magic;
972 }
973 
974 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
975   if (Magic == SPMagic(SPF_Ext_Binary))
976     return sampleprof_error::success;
977   return sampleprof_error::bad_magic;
978 }
979 
980 std::error_code
981 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
982   if (Magic == SPMagic(SPF_Compact_Binary))
983     return sampleprof_error::success;
984   return sampleprof_error::bad_magic;
985 }
986 
987 std::error_code SampleProfileReaderBinary::readNameTable() {
988   auto Size = readNumber<uint32_t>();
989   if (std::error_code EC = Size.getError())
990     return EC;
991   NameTable.reserve(*Size + NameTable.size());
992   for (uint32_t I = 0; I < *Size; ++I) {
993     auto Name(readString());
994     if (std::error_code EC = Name.getError())
995       return EC;
996     NameTable.push_back(*Name);
997   }
998 
999   return sampleprof_error::success;
1000 }
1001 
1002 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
1003   auto Size = readNumber<uint64_t>();
1004   if (std::error_code EC = Size.getError())
1005     return EC;
1006   MD5StringBuf = std::make_unique<std::vector<std::string>>();
1007   MD5StringBuf->reserve(*Size);
1008   if (FixedLengthMD5) {
1009     // Preallocate and initialize NameTable so we can check whether a name
1010     // index has been read before by checking whether the element in the
1011     // NameTable is empty, meanwhile readStringIndex can do the boundary
1012     // check using the size of NameTable.
1013     NameTable.resize(*Size + NameTable.size());
1014 
1015     MD5NameMemStart = Data;
1016     Data = Data + (*Size) * sizeof(uint64_t);
1017     return sampleprof_error::success;
1018   }
1019   NameTable.reserve(*Size);
1020   for (uint32_t I = 0; I < *Size; ++I) {
1021     auto FID = readNumber<uint64_t>();
1022     if (std::error_code EC = FID.getError())
1023       return EC;
1024     MD5StringBuf->push_back(std::to_string(*FID));
1025     // NameTable is a vector of StringRef. Here it is pushing back a
1026     // StringRef initialized with the last string in MD5stringBuf.
1027     NameTable.push_back(MD5StringBuf->back());
1028   }
1029   return sampleprof_error::success;
1030 }
1031 
1032 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
1033   if (IsMD5)
1034     return readMD5NameTable();
1035   return SampleProfileReaderBinary::readNameTable();
1036 }
1037 
1038 // Read in the CS name table section, which basically contains a list of context
1039 // vectors. Each element of a context vector, aka a frame, refers to the
1040 // underlying raw function names that are stored in the name table, as well as
1041 // a callsite identifier that only makes sense for non-leaf frames.
1042 std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
1043   auto Size = readNumber<uint32_t>();
1044   if (std::error_code EC = Size.getError())
1045     return EC;
1046 
1047   std::vector<SampleContextFrameVector> *PNameVec =
1048       new std::vector<SampleContextFrameVector>();
1049   PNameVec->reserve(*Size);
1050   for (uint32_t I = 0; I < *Size; ++I) {
1051     PNameVec->emplace_back(SampleContextFrameVector());
1052     auto ContextSize = readNumber<uint32_t>();
1053     if (std::error_code EC = ContextSize.getError())
1054       return EC;
1055     for (uint32_t J = 0; J < *ContextSize; ++J) {
1056       auto FName(readStringFromTable());
1057       if (std::error_code EC = FName.getError())
1058         return EC;
1059       auto LineOffset = readNumber<uint64_t>();
1060       if (std::error_code EC = LineOffset.getError())
1061         return EC;
1062 
1063       if (!isOffsetLegal(*LineOffset))
1064         return std::error_code();
1065 
1066       auto Discriminator = readNumber<uint64_t>();
1067       if (std::error_code EC = Discriminator.getError())
1068         return EC;
1069 
1070       PNameVec->back().emplace_back(
1071           FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
1072     }
1073   }
1074 
1075   // From this point the underlying object of CSNameTable should be immutable.
1076   CSNameTable.reset(PNameVec);
1077   return sampleprof_error::success;
1078 }
1079 
1080 std::error_code
1081 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
1082   while (Data < End) {
1083     auto FContext(readSampleContextFromTable());
1084     if (std::error_code EC = FContext.getError())
1085       return EC;
1086 
1087     bool ProfileInMap = Profiles.count(*FContext);
1088     if (ProfileIsProbeBased) {
1089       auto Checksum = readNumber<uint64_t>();
1090       if (std::error_code EC = Checksum.getError())
1091         return EC;
1092       if (ProfileInMap)
1093         Profiles[*FContext].setFunctionHash(*Checksum);
1094     }
1095 
1096     if (ProfileHasAttribute) {
1097       auto Attributes = readNumber<uint32_t>();
1098       if (std::error_code EC = Attributes.getError())
1099         return EC;
1100       if (ProfileInMap)
1101         Profiles[*FContext].getContext().setAllAttributes(*Attributes);
1102     }
1103   }
1104 
1105   assert(Data == End && "More data is read than expected");
1106   return sampleprof_error::success;
1107 }
1108 
1109 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
1110   auto Size = readNumber<uint64_t>();
1111   if (std::error_code EC = Size.getError())
1112     return EC;
1113   NameTable.reserve(*Size);
1114   for (uint32_t I = 0; I < *Size; ++I) {
1115     auto FID = readNumber<uint64_t>();
1116     if (std::error_code EC = FID.getError())
1117       return EC;
1118     NameTable.push_back(std::to_string(*FID));
1119   }
1120   return sampleprof_error::success;
1121 }
1122 
1123 std::error_code
1124 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
1125   SecHdrTableEntry Entry;
1126   auto Type = readUnencodedNumber<uint64_t>();
1127   if (std::error_code EC = Type.getError())
1128     return EC;
1129   Entry.Type = static_cast<SecType>(*Type);
1130 
1131   auto Flags = readUnencodedNumber<uint64_t>();
1132   if (std::error_code EC = Flags.getError())
1133     return EC;
1134   Entry.Flags = *Flags;
1135 
1136   auto Offset = readUnencodedNumber<uint64_t>();
1137   if (std::error_code EC = Offset.getError())
1138     return EC;
1139   Entry.Offset = *Offset;
1140 
1141   auto Size = readUnencodedNumber<uint64_t>();
1142   if (std::error_code EC = Size.getError())
1143     return EC;
1144   Entry.Size = *Size;
1145 
1146   Entry.LayoutIndex = Idx;
1147   SecHdrTable.push_back(std::move(Entry));
1148   return sampleprof_error::success;
1149 }
1150 
1151 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1152   auto EntryNum = readUnencodedNumber<uint64_t>();
1153   if (std::error_code EC = EntryNum.getError())
1154     return EC;
1155 
1156   for (uint32_t i = 0; i < (*EntryNum); i++)
1157     if (std::error_code EC = readSecHdrTableEntry(i))
1158       return EC;
1159 
1160   return sampleprof_error::success;
1161 }
1162 
1163 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
1164   const uint8_t *BufStart =
1165       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1166   Data = BufStart;
1167   End = BufStart + Buffer->getBufferSize();
1168 
1169   if (std::error_code EC = readMagicIdent())
1170     return EC;
1171 
1172   if (std::error_code EC = readSecHdrTable())
1173     return EC;
1174 
1175   return sampleprof_error::success;
1176 }
1177 
1178 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1179   uint64_t Size = 0;
1180   for (auto &Entry : SecHdrTable) {
1181     if (Entry.Type == Type)
1182       Size += Entry.Size;
1183   }
1184   return Size;
1185 }
1186 
1187 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1188   // Sections in SecHdrTable is not necessarily in the same order as
1189   // sections in the profile because section like FuncOffsetTable needs
1190   // to be written after section LBRProfile but needs to be read before
1191   // section LBRProfile, so we cannot simply use the last entry in
1192   // SecHdrTable to calculate the file size.
1193   uint64_t FileSize = 0;
1194   for (auto &Entry : SecHdrTable) {
1195     FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
1196   }
1197   return FileSize;
1198 }
1199 
1200 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1201   std::string Flags;
1202   if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1203     Flags.append("{compressed,");
1204   else
1205     Flags.append("{");
1206 
1207   if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1208     Flags.append("flat,");
1209 
1210   switch (Entry.Type) {
1211   case SecNameTable:
1212     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1213       Flags.append("fixlenmd5,");
1214     else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1215       Flags.append("md5,");
1216     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1217       Flags.append("uniq,");
1218     break;
1219   case SecProfSummary:
1220     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1221       Flags.append("partial,");
1222     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
1223       Flags.append("context,");
1224     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
1225       Flags.append("fs-discriminator,");
1226     break;
1227   case SecFuncOffsetTable:
1228     if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
1229       Flags.append("ordered,");
1230     break;
1231   case SecFuncMetadata:
1232     if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased))
1233       Flags.append("probe,");
1234     if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
1235       Flags.append("attr,");
1236     break;
1237   default:
1238     break;
1239   }
1240   char &last = Flags.back();
1241   if (last == ',')
1242     last = '}';
1243   else
1244     Flags.append("}");
1245   return Flags;
1246 }
1247 
1248 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1249   uint64_t TotalSecsSize = 0;
1250   for (auto &Entry : SecHdrTable) {
1251     OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1252        << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1253        << "\n";
1254     ;
1255     TotalSecsSize += Entry.Size;
1256   }
1257   uint64_t HeaderSize = SecHdrTable.front().Offset;
1258   assert(HeaderSize + TotalSecsSize == getFileSize() &&
1259          "Size of 'header + sections' doesn't match the total size of profile");
1260 
1261   OS << "Header Size: " << HeaderSize << "\n";
1262   OS << "Total Sections Size: " << TotalSecsSize << "\n";
1263   OS << "File Size: " << getFileSize() << "\n";
1264   return true;
1265 }
1266 
1267 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1268   // Read and check the magic identifier.
1269   auto Magic = readNumber<uint64_t>();
1270   if (std::error_code EC = Magic.getError())
1271     return EC;
1272   else if (std::error_code EC = verifySPMagic(*Magic))
1273     return EC;
1274 
1275   // Read the version number.
1276   auto Version = readNumber<uint64_t>();
1277   if (std::error_code EC = Version.getError())
1278     return EC;
1279   else if (*Version != SPVersion())
1280     return sampleprof_error::unsupported_version;
1281 
1282   return sampleprof_error::success;
1283 }
1284 
1285 std::error_code SampleProfileReaderBinary::readHeader() {
1286   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1287   End = Data + Buffer->getBufferSize();
1288 
1289   if (std::error_code EC = readMagicIdent())
1290     return EC;
1291 
1292   if (std::error_code EC = readSummary())
1293     return EC;
1294 
1295   if (std::error_code EC = readNameTable())
1296     return EC;
1297   return sampleprof_error::success;
1298 }
1299 
1300 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1301   SampleProfileReaderBinary::readHeader();
1302   if (std::error_code EC = readFuncOffsetTable())
1303     return EC;
1304   return sampleprof_error::success;
1305 }
1306 
1307 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1308   auto TableOffset = readUnencodedNumber<uint64_t>();
1309   if (std::error_code EC = TableOffset.getError())
1310     return EC;
1311 
1312   const uint8_t *SavedData = Data;
1313   const uint8_t *TableStart =
1314       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1315       *TableOffset;
1316   Data = TableStart;
1317 
1318   auto Size = readNumber<uint64_t>();
1319   if (std::error_code EC = Size.getError())
1320     return EC;
1321 
1322   FuncOffsetTable.reserve(*Size);
1323   for (uint32_t I = 0; I < *Size; ++I) {
1324     auto FName(readStringFromTable());
1325     if (std::error_code EC = FName.getError())
1326       return EC;
1327 
1328     auto Offset = readNumber<uint64_t>();
1329     if (std::error_code EC = Offset.getError())
1330       return EC;
1331 
1332     FuncOffsetTable[*FName] = *Offset;
1333   }
1334   End = TableStart;
1335   Data = SavedData;
1336   return sampleprof_error::success;
1337 }
1338 
1339 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
1340   if (!M)
1341     return false;
1342   FuncsToUse.clear();
1343   for (auto &F : *M)
1344     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1345   return true;
1346 }
1347 
1348 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1349     std::vector<ProfileSummaryEntry> &Entries) {
1350   auto Cutoff = readNumber<uint64_t>();
1351   if (std::error_code EC = Cutoff.getError())
1352     return EC;
1353 
1354   auto MinBlockCount = readNumber<uint64_t>();
1355   if (std::error_code EC = MinBlockCount.getError())
1356     return EC;
1357 
1358   auto NumBlocks = readNumber<uint64_t>();
1359   if (std::error_code EC = NumBlocks.getError())
1360     return EC;
1361 
1362   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1363   return sampleprof_error::success;
1364 }
1365 
1366 std::error_code SampleProfileReaderBinary::readSummary() {
1367   auto TotalCount = readNumber<uint64_t>();
1368   if (std::error_code EC = TotalCount.getError())
1369     return EC;
1370 
1371   auto MaxBlockCount = readNumber<uint64_t>();
1372   if (std::error_code EC = MaxBlockCount.getError())
1373     return EC;
1374 
1375   auto MaxFunctionCount = readNumber<uint64_t>();
1376   if (std::error_code EC = MaxFunctionCount.getError())
1377     return EC;
1378 
1379   auto NumBlocks = readNumber<uint64_t>();
1380   if (std::error_code EC = NumBlocks.getError())
1381     return EC;
1382 
1383   auto NumFunctions = readNumber<uint64_t>();
1384   if (std::error_code EC = NumFunctions.getError())
1385     return EC;
1386 
1387   auto NumSummaryEntries = readNumber<uint64_t>();
1388   if (std::error_code EC = NumSummaryEntries.getError())
1389     return EC;
1390 
1391   std::vector<ProfileSummaryEntry> Entries;
1392   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1393     std::error_code EC = readSummaryEntry(Entries);
1394     if (EC != sampleprof_error::success)
1395       return EC;
1396   }
1397   Summary = std::make_unique<ProfileSummary>(
1398       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1399       *MaxFunctionCount, *NumBlocks, *NumFunctions);
1400 
1401   return sampleprof_error::success;
1402 }
1403 
1404 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1405   const uint8_t *Data =
1406       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1407   uint64_t Magic = decodeULEB128(Data);
1408   return Magic == SPMagic();
1409 }
1410 
1411 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1412   const uint8_t *Data =
1413       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1414   uint64_t Magic = decodeULEB128(Data);
1415   return Magic == SPMagic(SPF_Ext_Binary);
1416 }
1417 
1418 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1419   const uint8_t *Data =
1420       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1421   uint64_t Magic = decodeULEB128(Data);
1422   return Magic == SPMagic(SPF_Compact_Binary);
1423 }
1424 
1425 std::error_code SampleProfileReaderGCC::skipNextWord() {
1426   uint32_t dummy;
1427   if (!GcovBuffer.readInt(dummy))
1428     return sampleprof_error::truncated;
1429   return sampleprof_error::success;
1430 }
1431 
1432 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1433   if (sizeof(T) <= sizeof(uint32_t)) {
1434     uint32_t Val;
1435     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1436       return static_cast<T>(Val);
1437   } else if (sizeof(T) <= sizeof(uint64_t)) {
1438     uint64_t Val;
1439     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1440       return static_cast<T>(Val);
1441   }
1442 
1443   std::error_code EC = sampleprof_error::malformed;
1444   reportError(0, EC.message());
1445   return EC;
1446 }
1447 
1448 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1449   StringRef Str;
1450   if (!GcovBuffer.readString(Str))
1451     return sampleprof_error::truncated;
1452   return Str;
1453 }
1454 
1455 std::error_code SampleProfileReaderGCC::readHeader() {
1456   // Read the magic identifier.
1457   if (!GcovBuffer.readGCDAFormat())
1458     return sampleprof_error::unrecognized_format;
1459 
1460   // Read the version number. Note - the GCC reader does not validate this
1461   // version, but the profile creator generates v704.
1462   GCOV::GCOVVersion version;
1463   if (!GcovBuffer.readGCOVVersion(version))
1464     return sampleprof_error::unrecognized_format;
1465 
1466   if (version != GCOV::V407)
1467     return sampleprof_error::unsupported_version;
1468 
1469   // Skip the empty integer.
1470   if (std::error_code EC = skipNextWord())
1471     return EC;
1472 
1473   return sampleprof_error::success;
1474 }
1475 
1476 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1477   uint32_t Tag;
1478   if (!GcovBuffer.readInt(Tag))
1479     return sampleprof_error::truncated;
1480 
1481   if (Tag != Expected)
1482     return sampleprof_error::malformed;
1483 
1484   if (std::error_code EC = skipNextWord())
1485     return EC;
1486 
1487   return sampleprof_error::success;
1488 }
1489 
1490 std::error_code SampleProfileReaderGCC::readNameTable() {
1491   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1492     return EC;
1493 
1494   uint32_t Size;
1495   if (!GcovBuffer.readInt(Size))
1496     return sampleprof_error::truncated;
1497 
1498   for (uint32_t I = 0; I < Size; ++I) {
1499     StringRef Str;
1500     if (!GcovBuffer.readString(Str))
1501       return sampleprof_error::truncated;
1502     Names.push_back(std::string(Str));
1503   }
1504 
1505   return sampleprof_error::success;
1506 }
1507 
1508 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1509   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1510     return EC;
1511 
1512   uint32_t NumFunctions;
1513   if (!GcovBuffer.readInt(NumFunctions))
1514     return sampleprof_error::truncated;
1515 
1516   InlineCallStack Stack;
1517   for (uint32_t I = 0; I < NumFunctions; ++I)
1518     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1519       return EC;
1520 
1521   computeSummary();
1522   return sampleprof_error::success;
1523 }
1524 
1525 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1526     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1527   uint64_t HeadCount = 0;
1528   if (InlineStack.size() == 0)
1529     if (!GcovBuffer.readInt64(HeadCount))
1530       return sampleprof_error::truncated;
1531 
1532   uint32_t NameIdx;
1533   if (!GcovBuffer.readInt(NameIdx))
1534     return sampleprof_error::truncated;
1535 
1536   StringRef Name(Names[NameIdx]);
1537 
1538   uint32_t NumPosCounts;
1539   if (!GcovBuffer.readInt(NumPosCounts))
1540     return sampleprof_error::truncated;
1541 
1542   uint32_t NumCallsites;
1543   if (!GcovBuffer.readInt(NumCallsites))
1544     return sampleprof_error::truncated;
1545 
1546   FunctionSamples *FProfile = nullptr;
1547   if (InlineStack.size() == 0) {
1548     // If this is a top function that we have already processed, do not
1549     // update its profile again.  This happens in the presence of
1550     // function aliases.  Since these aliases share the same function
1551     // body, there will be identical replicated profiles for the
1552     // original function.  In this case, we simply not bother updating
1553     // the profile of the original function.
1554     FProfile = &Profiles[Name];
1555     FProfile->addHeadSamples(HeadCount);
1556     if (FProfile->getTotalSamples() > 0)
1557       Update = false;
1558   } else {
1559     // Otherwise, we are reading an inlined instance. The top of the
1560     // inline stack contains the profile of the caller. Insert this
1561     // callee in the caller's CallsiteMap.
1562     FunctionSamples *CallerProfile = InlineStack.front();
1563     uint32_t LineOffset = Offset >> 16;
1564     uint32_t Discriminator = Offset & 0xffff;
1565     FProfile = &CallerProfile->functionSamplesAt(
1566         LineLocation(LineOffset, Discriminator))[std::string(Name)];
1567   }
1568   FProfile->setName(Name);
1569 
1570   for (uint32_t I = 0; I < NumPosCounts; ++I) {
1571     uint32_t Offset;
1572     if (!GcovBuffer.readInt(Offset))
1573       return sampleprof_error::truncated;
1574 
1575     uint32_t NumTargets;
1576     if (!GcovBuffer.readInt(NumTargets))
1577       return sampleprof_error::truncated;
1578 
1579     uint64_t Count;
1580     if (!GcovBuffer.readInt64(Count))
1581       return sampleprof_error::truncated;
1582 
1583     // The line location is encoded in the offset as:
1584     //   high 16 bits: line offset to the start of the function.
1585     //   low 16 bits: discriminator.
1586     uint32_t LineOffset = Offset >> 16;
1587     uint32_t Discriminator = Offset & 0xffff;
1588 
1589     InlineCallStack NewStack;
1590     NewStack.push_back(FProfile);
1591     llvm::append_range(NewStack, InlineStack);
1592     if (Update) {
1593       // Walk up the inline stack, adding the samples on this line to
1594       // the total sample count of the callers in the chain.
1595       for (auto CallerProfile : NewStack)
1596         CallerProfile->addTotalSamples(Count);
1597 
1598       // Update the body samples for the current profile.
1599       FProfile->addBodySamples(LineOffset, Discriminator, Count);
1600     }
1601 
1602     // Process the list of functions called at an indirect call site.
1603     // These are all the targets that a function pointer (or virtual
1604     // function) resolved at runtime.
1605     for (uint32_t J = 0; J < NumTargets; J++) {
1606       uint32_t HistVal;
1607       if (!GcovBuffer.readInt(HistVal))
1608         return sampleprof_error::truncated;
1609 
1610       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1611         return sampleprof_error::malformed;
1612 
1613       uint64_t TargetIdx;
1614       if (!GcovBuffer.readInt64(TargetIdx))
1615         return sampleprof_error::truncated;
1616       StringRef TargetName(Names[TargetIdx]);
1617 
1618       uint64_t TargetCount;
1619       if (!GcovBuffer.readInt64(TargetCount))
1620         return sampleprof_error::truncated;
1621 
1622       if (Update)
1623         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1624                                          TargetName, TargetCount);
1625     }
1626   }
1627 
1628   // Process all the inlined callers into the current function. These
1629   // are all the callsites that were inlined into this function.
1630   for (uint32_t I = 0; I < NumCallsites; I++) {
1631     // The offset is encoded as:
1632     //   high 16 bits: line offset to the start of the function.
1633     //   low 16 bits: discriminator.
1634     uint32_t Offset;
1635     if (!GcovBuffer.readInt(Offset))
1636       return sampleprof_error::truncated;
1637     InlineCallStack NewStack;
1638     NewStack.push_back(FProfile);
1639     llvm::append_range(NewStack, InlineStack);
1640     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1641       return EC;
1642   }
1643 
1644   return sampleprof_error::success;
1645 }
1646 
1647 /// Read a GCC AutoFDO profile.
1648 ///
1649 /// This format is generated by the Linux Perf conversion tool at
1650 /// https://github.com/google/autofdo.
1651 std::error_code SampleProfileReaderGCC::readImpl() {
1652   assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator");
1653   // Read the string table.
1654   if (std::error_code EC = readNameTable())
1655     return EC;
1656 
1657   // Read the source profile.
1658   if (std::error_code EC = readFunctionProfiles())
1659     return EC;
1660 
1661   return sampleprof_error::success;
1662 }
1663 
1664 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1665   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1666   return Magic == "adcg*704";
1667 }
1668 
1669 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1670   // If the reader uses MD5 to represent string, we can't remap it because
1671   // we don't know what the original function names were.
1672   if (Reader.useMD5()) {
1673     Ctx.diagnose(DiagnosticInfoSampleProfile(
1674         Reader.getBuffer()->getBufferIdentifier(),
1675         "Profile data remapping cannot be applied to profile data "
1676         "in compact format (original mangled names are not available).",
1677         DS_Warning));
1678     return;
1679   }
1680 
1681   // CSSPGO-TODO: Remapper is not yet supported.
1682   // We will need to remap the entire context string.
1683   assert(Remappings && "should be initialized while creating remapper");
1684   for (auto &Sample : Reader.getProfiles()) {
1685     DenseSet<StringRef> NamesInSample;
1686     Sample.second.findAllNames(NamesInSample);
1687     for (auto &Name : NamesInSample)
1688       if (auto Key = Remappings->insert(Name))
1689         NameMap.insert({Key, Name});
1690   }
1691 
1692   RemappingApplied = true;
1693 }
1694 
1695 Optional<StringRef>
1696 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1697   if (auto Key = Remappings->lookup(Fname))
1698     return NameMap.lookup(Key);
1699   return None;
1700 }
1701 
1702 /// Prepare a memory buffer for the contents of \p Filename.
1703 ///
1704 /// \returns an error code indicating the status of the buffer.
1705 static ErrorOr<std::unique_ptr<MemoryBuffer>>
1706 setupMemoryBuffer(const Twine &Filename) {
1707   auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
1708   if (std::error_code EC = BufferOrErr.getError())
1709     return EC;
1710   auto Buffer = std::move(BufferOrErr.get());
1711 
1712   // Sanity check the file.
1713   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
1714     return sampleprof_error::too_large;
1715 
1716   return std::move(Buffer);
1717 }
1718 
1719 /// Create a sample profile reader based on the format of the input file.
1720 ///
1721 /// \param Filename The file to open.
1722 ///
1723 /// \param C The LLVM context to use to emit diagnostics.
1724 ///
1725 /// \param P The FSDiscriminatorPass.
1726 ///
1727 /// \param RemapFilename The file used for profile remapping.
1728 ///
1729 /// \returns an error code indicating the status of the created reader.
1730 ErrorOr<std::unique_ptr<SampleProfileReader>>
1731 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1732                             FSDiscriminatorPass P,
1733                             const std::string RemapFilename) {
1734   auto BufferOrError = setupMemoryBuffer(Filename);
1735   if (std::error_code EC = BufferOrError.getError())
1736     return EC;
1737   return create(BufferOrError.get(), C, P, RemapFilename);
1738 }
1739 
1740 /// Create a sample profile remapper from the given input, to remap the
1741 /// function names in the given profile data.
1742 ///
1743 /// \param Filename The file to open.
1744 ///
1745 /// \param Reader The profile reader the remapper is going to be applied to.
1746 ///
1747 /// \param C The LLVM context to use to emit diagnostics.
1748 ///
1749 /// \returns an error code indicating the status of the created reader.
1750 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1751 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1752                                            SampleProfileReader &Reader,
1753                                            LLVMContext &C) {
1754   auto BufferOrError = setupMemoryBuffer(Filename);
1755   if (std::error_code EC = BufferOrError.getError())
1756     return EC;
1757   return create(BufferOrError.get(), Reader, C);
1758 }
1759 
1760 /// Create a sample profile remapper from the given input, to remap the
1761 /// function names in the given profile data.
1762 ///
1763 /// \param B The memory buffer to create the reader from (assumes ownership).
1764 ///
1765 /// \param C The LLVM context to use to emit diagnostics.
1766 ///
1767 /// \param Reader The profile reader the remapper is going to be applied to.
1768 ///
1769 /// \returns an error code indicating the status of the created reader.
1770 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1771 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1772                                            SampleProfileReader &Reader,
1773                                            LLVMContext &C) {
1774   auto Remappings = std::make_unique<SymbolRemappingReader>();
1775   if (Error E = Remappings->read(*B.get())) {
1776     handleAllErrors(
1777         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1778           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1779                                                  ParseError.getLineNum(),
1780                                                  ParseError.getMessage()));
1781         });
1782     return sampleprof_error::malformed;
1783   }
1784 
1785   return std::make_unique<SampleProfileReaderItaniumRemapper>(
1786       std::move(B), std::move(Remappings), Reader);
1787 }
1788 
1789 /// Create a sample profile reader based on the format of the input data.
1790 ///
1791 /// \param B The memory buffer to create the reader from (assumes ownership).
1792 ///
1793 /// \param C The LLVM context to use to emit diagnostics.
1794 ///
1795 /// \param P The FSDiscriminatorPass.
1796 ///
1797 /// \param RemapFilename The file used for profile remapping.
1798 ///
1799 /// \returns an error code indicating the status of the created reader.
1800 ErrorOr<std::unique_ptr<SampleProfileReader>>
1801 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1802                             FSDiscriminatorPass P,
1803                             const std::string RemapFilename) {
1804   std::unique_ptr<SampleProfileReader> Reader;
1805   if (SampleProfileReaderRawBinary::hasFormat(*B))
1806     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1807   else if (SampleProfileReaderExtBinary::hasFormat(*B))
1808     Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1809   else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1810     Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1811   else if (SampleProfileReaderGCC::hasFormat(*B))
1812     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1813   else if (SampleProfileReaderText::hasFormat(*B))
1814     Reader.reset(new SampleProfileReaderText(std::move(B), C));
1815   else
1816     return sampleprof_error::unrecognized_format;
1817 
1818   if (!RemapFilename.empty()) {
1819     auto ReaderOrErr =
1820         SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1821     if (std::error_code EC = ReaderOrErr.getError()) {
1822       std::string Msg = "Could not create remapper: " + EC.message();
1823       C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1824       return EC;
1825     }
1826     Reader->Remapper = std::move(ReaderOrErr.get());
1827   }
1828 
1829   FunctionSamples::Format = Reader->getFormat();
1830   if (std::error_code EC = Reader->readHeader()) {
1831     return EC;
1832   }
1833 
1834   Reader->setDiscriminatorMaskedBitFrom(P);
1835 
1836   return std::move(Reader);
1837 }
1838 
1839 // For text and GCC file formats, we compute the summary after reading the
1840 // profile. Binary format has the profile summary in its header.
1841 void SampleProfileReader::computeSummary() {
1842   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1843   Summary = Builder.computeSummaryForProfiles(Profiles);
1844 }
1845