xref: /freebsd/contrib/llvm-project/llvm/lib/ProfileData/SampleProfReader.cpp (revision d409305fa3838fb39b38c26fc085fb729b8766d5)
1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
11 //
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
14 //
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
17 //
18 // All three encodings can be used interchangeably as an input sample profile.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/ProfileSummary.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/Compression.h"
30 #include "llvm/Support/ErrorOr.h"
31 #include "llvm/Support/LEB128.h"
32 #include "llvm/Support/LineIterator.h"
33 #include "llvm/Support/MD5.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <cstddef>
38 #include <cstdint>
39 #include <limits>
40 #include <memory>
41 #include <system_error>
42 #include <vector>
43 
44 using namespace llvm;
45 using namespace sampleprof;
46 
47 /// Dump the function profile for \p FName.
48 ///
49 /// \param FName Name of the function to print.
50 /// \param OS Stream to emit the output to.
51 void SampleProfileReader::dumpFunctionProfile(StringRef FName,
52                                               raw_ostream &OS) {
53   OS << "Function: " << FName << ": " << Profiles[FName];
54 }
55 
56 /// Dump all the function profiles found on stream \p OS.
57 void SampleProfileReader::dump(raw_ostream &OS) {
58   for (const auto &I : Profiles)
59     dumpFunctionProfile(I.getKey(), OS);
60 }
61 
62 /// Parse \p Input as function head.
63 ///
64 /// Parse one line of \p Input, and update function name in \p FName,
65 /// function's total sample count in \p NumSamples, function's entry
66 /// count in \p NumHeadSamples.
67 ///
68 /// \returns true if parsing is successful.
69 static bool ParseHead(const StringRef &Input, StringRef &FName,
70                       uint64_t &NumSamples, uint64_t &NumHeadSamples) {
71   if (Input[0] == ' ')
72     return false;
73   size_t n2 = Input.rfind(':');
74   size_t n1 = Input.rfind(':', n2 - 1);
75   FName = Input.substr(0, n1);
76   if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
77     return false;
78   if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
79     return false;
80   return true;
81 }
82 
83 /// Returns true if line offset \p L is legal (only has 16 bits).
84 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
85 
86 /// Parse \p Input that contains metadata.
87 /// Possible metadata:
88 /// - CFG Checksum information:
89 ///     !CFGChecksum: 12345
90 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
91 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash) {
92   if (!Input.startswith("!CFGChecksum:"))
93     return false;
94 
95   StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
96   return !CFGInfo.getAsInteger(10, FunctionHash);
97 }
98 
99 enum class LineType {
100   CallSiteProfile,
101   BodyProfile,
102   Metadata,
103 };
104 
105 /// Parse \p Input as line sample.
106 ///
107 /// \param Input input line.
108 /// \param LineTy Type of this line.
109 /// \param Depth the depth of the inline stack.
110 /// \param NumSamples total samples of the line/inlined callsite.
111 /// \param LineOffset line offset to the start of the function.
112 /// \param Discriminator discriminator of the line.
113 /// \param TargetCountMap map from indirect call target to count.
114 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
115 ///
116 /// returns true if parsing is successful.
117 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
118                       uint64_t &NumSamples, uint32_t &LineOffset,
119                       uint32_t &Discriminator, StringRef &CalleeName,
120                       DenseMap<StringRef, uint64_t> &TargetCountMap,
121                       uint64_t &FunctionHash) {
122   for (Depth = 0; Input[Depth] == ' '; Depth++)
123     ;
124   if (Depth == 0)
125     return false;
126 
127   if (Depth == 1 && Input[Depth] == '!') {
128     LineTy = LineType::Metadata;
129     return parseMetadata(Input.substr(Depth), FunctionHash);
130   }
131 
132   size_t n1 = Input.find(':');
133   StringRef Loc = Input.substr(Depth, n1 - Depth);
134   size_t n2 = Loc.find('.');
135   if (n2 == StringRef::npos) {
136     if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
137       return false;
138     Discriminator = 0;
139   } else {
140     if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
141       return false;
142     if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
143       return false;
144   }
145 
146   StringRef Rest = Input.substr(n1 + 2);
147   if (isDigit(Rest[0])) {
148     LineTy = LineType::BodyProfile;
149     size_t n3 = Rest.find(' ');
150     if (n3 == StringRef::npos) {
151       if (Rest.getAsInteger(10, NumSamples))
152         return false;
153     } else {
154       if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
155         return false;
156     }
157     // Find call targets and their sample counts.
158     // Note: In some cases, there are symbols in the profile which are not
159     // mangled. To accommodate such cases, use colon + integer pairs as the
160     // anchor points.
161     // An example:
162     // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
163     // ":1000" and ":437" are used as anchor points so the string above will
164     // be interpreted as
165     // target: _M_construct<char *>
166     // count: 1000
167     // target: string_view<std::allocator<char> >
168     // count: 437
169     while (n3 != StringRef::npos) {
170       n3 += Rest.substr(n3).find_first_not_of(' ');
171       Rest = Rest.substr(n3);
172       n3 = Rest.find_first_of(':');
173       if (n3 == StringRef::npos || n3 == 0)
174         return false;
175 
176       StringRef Target;
177       uint64_t count, n4;
178       while (true) {
179         // Get the segment after the current colon.
180         StringRef AfterColon = Rest.substr(n3 + 1);
181         // Get the target symbol before the current colon.
182         Target = Rest.substr(0, n3);
183         // Check if the word after the current colon is an integer.
184         n4 = AfterColon.find_first_of(' ');
185         n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
186         StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
187         if (!WordAfterColon.getAsInteger(10, count))
188           break;
189 
190         // Try to find the next colon.
191         uint64_t n5 = AfterColon.find_first_of(':');
192         if (n5 == StringRef::npos)
193           return false;
194         n3 += n5 + 1;
195       }
196 
197       // An anchor point is found. Save the {target, count} pair
198       TargetCountMap[Target] = count;
199       if (n4 == Rest.size())
200         break;
201       // Change n3 to the next blank space after colon + integer pair.
202       n3 = n4;
203     }
204   } else {
205     LineTy = LineType::CallSiteProfile;
206     size_t n3 = Rest.find_last_of(':');
207     CalleeName = Rest.substr(0, n3);
208     if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
209       return false;
210   }
211   return true;
212 }
213 
214 /// Load samples from a text file.
215 ///
216 /// See the documentation at the top of the file for an explanation of
217 /// the expected format.
218 ///
219 /// \returns true if the file was loaded successfully, false otherwise.
220 std::error_code SampleProfileReaderText::readImpl() {
221   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
222   sampleprof_error Result = sampleprof_error::success;
223 
224   InlineCallStack InlineStack;
225   uint32_t ProbeProfileCount = 0;
226 
227   // SeenMetadata tracks whether we have processed metadata for the current
228   // top-level function profile.
229   bool SeenMetadata = false;
230 
231   for (; !LineIt.is_at_eof(); ++LineIt) {
232     if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
233       continue;
234     // Read the header of each function.
235     //
236     // Note that for function identifiers we are actually expecting
237     // mangled names, but we may not always get them. This happens when
238     // the compiler decides not to emit the function (e.g., it was inlined
239     // and removed). In this case, the binary will not have the linkage
240     // name for the function, so the profiler will emit the function's
241     // unmangled name, which may contain characters like ':' and '>' in its
242     // name (member functions, templates, etc).
243     //
244     // The only requirement we place on the identifier, then, is that it
245     // should not begin with a number.
246     if ((*LineIt)[0] != ' ') {
247       uint64_t NumSamples, NumHeadSamples;
248       StringRef FName;
249       if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
250         reportError(LineIt.line_number(),
251                     "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
252         return sampleprof_error::malformed;
253       }
254       SeenMetadata = false;
255       SampleContext FContext(FName);
256       if (FContext.hasContext())
257         ++CSProfileCount;
258       Profiles[FContext] = FunctionSamples();
259       FunctionSamples &FProfile = Profiles[FContext];
260       FProfile.setName(FContext.getNameWithoutContext());
261       FProfile.setContext(FContext);
262       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
263       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
264       InlineStack.clear();
265       InlineStack.push_back(&FProfile);
266     } else {
267       uint64_t NumSamples;
268       StringRef FName;
269       DenseMap<StringRef, uint64_t> TargetCountMap;
270       uint32_t Depth, LineOffset, Discriminator;
271       LineType LineTy;
272       uint64_t FunctionHash;
273       if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
274                      Discriminator, FName, TargetCountMap, FunctionHash)) {
275         reportError(LineIt.line_number(),
276                     "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
277                         *LineIt);
278         return sampleprof_error::malformed;
279       }
280       if (SeenMetadata && LineTy != LineType::Metadata) {
281         // Metadata must be put at the end of a function profile.
282         reportError(LineIt.line_number(),
283                     "Found non-metadata after metadata: " + *LineIt);
284         return sampleprof_error::malformed;
285       }
286       while (InlineStack.size() > Depth) {
287         InlineStack.pop_back();
288       }
289       switch (LineTy) {
290       case LineType::CallSiteProfile: {
291         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
292             LineLocation(LineOffset, Discriminator))[std::string(FName)];
293         FSamples.setName(FName);
294         MergeResult(Result, FSamples.addTotalSamples(NumSamples));
295         InlineStack.push_back(&FSamples);
296         break;
297       }
298       case LineType::BodyProfile: {
299         while (InlineStack.size() > Depth) {
300           InlineStack.pop_back();
301         }
302         FunctionSamples &FProfile = *InlineStack.back();
303         for (const auto &name_count : TargetCountMap) {
304           MergeResult(Result, FProfile.addCalledTargetSamples(
305                                   LineOffset, Discriminator, name_count.first,
306                                   name_count.second));
307         }
308         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
309                                                     NumSamples));
310         break;
311       }
312       case LineType::Metadata: {
313         FunctionSamples &FProfile = *InlineStack.back();
314         FProfile.setFunctionHash(FunctionHash);
315         ++ProbeProfileCount;
316         SeenMetadata = true;
317         break;
318       }
319       }
320     }
321   }
322 
323   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
324          "Cannot have both context-sensitive and regular profile");
325   ProfileIsCS = (CSProfileCount > 0);
326   assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
327          "Cannot have both probe-based profiles and regular profiles");
328   ProfileIsProbeBased = (ProbeProfileCount > 0);
329   FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
330   FunctionSamples::ProfileIsCS = ProfileIsCS;
331 
332   if (Result == sampleprof_error::success)
333     computeSummary();
334 
335   return Result;
336 }
337 
338 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
339   bool result = false;
340 
341   // Check that the first non-comment line is a valid function header.
342   line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
343   if (!LineIt.is_at_eof()) {
344     if ((*LineIt)[0] != ' ') {
345       uint64_t NumSamples, NumHeadSamples;
346       StringRef FName;
347       result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
348     }
349   }
350 
351   return result;
352 }
353 
354 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
355   unsigned NumBytesRead = 0;
356   std::error_code EC;
357   uint64_t Val = decodeULEB128(Data, &NumBytesRead);
358 
359   if (Val > std::numeric_limits<T>::max())
360     EC = sampleprof_error::malformed;
361   else if (Data + NumBytesRead > End)
362     EC = sampleprof_error::truncated;
363   else
364     EC = sampleprof_error::success;
365 
366   if (EC) {
367     reportError(0, EC.message());
368     return EC;
369   }
370 
371   Data += NumBytesRead;
372   return static_cast<T>(Val);
373 }
374 
375 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
376   std::error_code EC;
377   StringRef Str(reinterpret_cast<const char *>(Data));
378   if (Data + Str.size() + 1 > End) {
379     EC = sampleprof_error::truncated;
380     reportError(0, EC.message());
381     return EC;
382   }
383 
384   Data += Str.size() + 1;
385   return Str;
386 }
387 
388 template <typename T>
389 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
390   std::error_code EC;
391 
392   if (Data + sizeof(T) > End) {
393     EC = sampleprof_error::truncated;
394     reportError(0, EC.message());
395     return EC;
396   }
397 
398   using namespace support;
399   T Val = endian::readNext<T, little, unaligned>(Data);
400   return Val;
401 }
402 
403 template <typename T>
404 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
405   std::error_code EC;
406   auto Idx = readNumber<uint32_t>();
407   if (std::error_code EC = Idx.getError())
408     return EC;
409   if (*Idx >= Table.size())
410     return sampleprof_error::truncated_name_table;
411   return *Idx;
412 }
413 
414 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
415   auto Idx = readStringIndex(NameTable);
416   if (std::error_code EC = Idx.getError())
417     return EC;
418 
419   return NameTable[*Idx];
420 }
421 
422 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
423   if (!FixedLengthMD5)
424     return SampleProfileReaderBinary::readStringFromTable();
425 
426   // read NameTable index.
427   auto Idx = readStringIndex(NameTable);
428   if (std::error_code EC = Idx.getError())
429     return EC;
430 
431   // Check whether the name to be accessed has been accessed before,
432   // if not, read it from memory directly.
433   StringRef &SR = NameTable[*Idx];
434   if (SR.empty()) {
435     const uint8_t *SavedData = Data;
436     Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
437     auto FID = readUnencodedNumber<uint64_t>();
438     if (std::error_code EC = FID.getError())
439       return EC;
440     // Save the string converted from uint64_t in MD5StringBuf. All the
441     // references to the name are all StringRefs refering to the string
442     // in MD5StringBuf.
443     MD5StringBuf->push_back(std::to_string(*FID));
444     SR = MD5StringBuf->back();
445     Data = SavedData;
446   }
447   return SR;
448 }
449 
450 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
451   auto Idx = readStringIndex(NameTable);
452   if (std::error_code EC = Idx.getError())
453     return EC;
454 
455   return StringRef(NameTable[*Idx]);
456 }
457 
458 std::error_code
459 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
460   auto NumSamples = readNumber<uint64_t>();
461   if (std::error_code EC = NumSamples.getError())
462     return EC;
463   FProfile.addTotalSamples(*NumSamples);
464 
465   // Read the samples in the body.
466   auto NumRecords = readNumber<uint32_t>();
467   if (std::error_code EC = NumRecords.getError())
468     return EC;
469 
470   for (uint32_t I = 0; I < *NumRecords; ++I) {
471     auto LineOffset = readNumber<uint64_t>();
472     if (std::error_code EC = LineOffset.getError())
473       return EC;
474 
475     if (!isOffsetLegal(*LineOffset)) {
476       return std::error_code();
477     }
478 
479     auto Discriminator = readNumber<uint64_t>();
480     if (std::error_code EC = Discriminator.getError())
481       return EC;
482 
483     auto NumSamples = readNumber<uint64_t>();
484     if (std::error_code EC = NumSamples.getError())
485       return EC;
486 
487     auto NumCalls = readNumber<uint32_t>();
488     if (std::error_code EC = NumCalls.getError())
489       return EC;
490 
491     for (uint32_t J = 0; J < *NumCalls; ++J) {
492       auto CalledFunction(readStringFromTable());
493       if (std::error_code EC = CalledFunction.getError())
494         return EC;
495 
496       auto CalledFunctionSamples = readNumber<uint64_t>();
497       if (std::error_code EC = CalledFunctionSamples.getError())
498         return EC;
499 
500       FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
501                                       *CalledFunction, *CalledFunctionSamples);
502     }
503 
504     FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
505   }
506 
507   // Read all the samples for inlined function calls.
508   auto NumCallsites = readNumber<uint32_t>();
509   if (std::error_code EC = NumCallsites.getError())
510     return EC;
511 
512   for (uint32_t J = 0; J < *NumCallsites; ++J) {
513     auto LineOffset = readNumber<uint64_t>();
514     if (std::error_code EC = LineOffset.getError())
515       return EC;
516 
517     auto Discriminator = readNumber<uint64_t>();
518     if (std::error_code EC = Discriminator.getError())
519       return EC;
520 
521     auto FName(readStringFromTable());
522     if (std::error_code EC = FName.getError())
523       return EC;
524 
525     FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
526         LineLocation(*LineOffset, *Discriminator))[std::string(*FName)];
527     CalleeProfile.setName(*FName);
528     if (std::error_code EC = readProfile(CalleeProfile))
529       return EC;
530   }
531 
532   return sampleprof_error::success;
533 }
534 
535 std::error_code
536 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
537   Data = Start;
538   auto NumHeadSamples = readNumber<uint64_t>();
539   if (std::error_code EC = NumHeadSamples.getError())
540     return EC;
541 
542   auto FName(readStringFromTable());
543   if (std::error_code EC = FName.getError())
544     return EC;
545 
546   SampleContext FContext(*FName);
547   Profiles[FContext] = FunctionSamples();
548   FunctionSamples &FProfile = Profiles[FContext];
549   FProfile.setName(FContext.getNameWithoutContext());
550   FProfile.setContext(FContext);
551   FProfile.addHeadSamples(*NumHeadSamples);
552 
553   if (FContext.hasContext())
554     CSProfileCount++;
555 
556   if (std::error_code EC = readProfile(FProfile))
557     return EC;
558   return sampleprof_error::success;
559 }
560 
561 std::error_code SampleProfileReaderBinary::readImpl() {
562   while (!at_eof()) {
563     if (std::error_code EC = readFuncProfile(Data))
564       return EC;
565   }
566 
567   return sampleprof_error::success;
568 }
569 
570 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
571     const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
572   Data = Start;
573   End = Start + Size;
574   switch (Entry.Type) {
575   case SecProfSummary:
576     if (std::error_code EC = readSummary())
577       return EC;
578     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
579       Summary->setPartialProfile(true);
580     break;
581   case SecNameTable: {
582     FixedLengthMD5 =
583         hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
584     bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
585     assert((!FixedLengthMD5 || UseMD5) &&
586            "If FixedLengthMD5 is true, UseMD5 has to be true");
587     if (std::error_code EC = readNameTableSec(UseMD5))
588       return EC;
589     break;
590   }
591   case SecLBRProfile:
592     if (std::error_code EC = readFuncProfiles())
593       return EC;
594     break;
595   case SecFuncOffsetTable:
596     if (std::error_code EC = readFuncOffsetTable())
597       return EC;
598     break;
599   case SecFuncMetadata:
600     ProfileIsProbeBased =
601         hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
602     FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
603     if (std::error_code EC = readFuncMetadata())
604       return EC;
605     break;
606   case SecProfileSymbolList:
607     if (std::error_code EC = readProfileSymbolList())
608       return EC;
609     break;
610   default:
611     if (std::error_code EC = readCustomSection(Entry))
612       return EC;
613     break;
614   }
615   return sampleprof_error::success;
616 }
617 
618 void SampleProfileReaderExtBinaryBase::collectFuncsFrom(const Module &M) {
619   UseAllFuncs = false;
620   FuncsToUse.clear();
621   for (auto &F : M)
622     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
623 }
624 
625 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
626   // If there are more than one FuncOffsetTable, the profile read associated
627   // with previous FuncOffsetTable has to be done before next FuncOffsetTable
628   // is read.
629   FuncOffsetTable.clear();
630 
631   auto Size = readNumber<uint64_t>();
632   if (std::error_code EC = Size.getError())
633     return EC;
634 
635   FuncOffsetTable.reserve(*Size);
636   for (uint32_t I = 0; I < *Size; ++I) {
637     auto FName(readStringFromTable());
638     if (std::error_code EC = FName.getError())
639       return EC;
640 
641     auto Offset = readNumber<uint64_t>();
642     if (std::error_code EC = Offset.getError())
643       return EC;
644 
645     FuncOffsetTable[*FName] = *Offset;
646   }
647   return sampleprof_error::success;
648 }
649 
650 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
651   const uint8_t *Start = Data;
652   if (UseAllFuncs) {
653     while (Data < End) {
654       if (std::error_code EC = readFuncProfile(Data))
655         return EC;
656     }
657     assert(Data == End && "More data is read than expected");
658   } else {
659     if (Remapper) {
660       for (auto Name : FuncsToUse) {
661         Remapper->insert(Name);
662       }
663     }
664 
665     if (useMD5()) {
666       for (auto Name : FuncsToUse) {
667         auto GUID = std::to_string(MD5Hash(Name));
668         auto iter = FuncOffsetTable.find(StringRef(GUID));
669         if (iter == FuncOffsetTable.end())
670           continue;
671         const uint8_t *FuncProfileAddr = Start + iter->second;
672         assert(FuncProfileAddr < End && "out of LBRProfile section");
673         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
674           return EC;
675       }
676     } else {
677       for (auto NameOffset : FuncOffsetTable) {
678         SampleContext FContext(NameOffset.first);
679         auto FuncName = FContext.getNameWithoutContext();
680         if (!FuncsToUse.count(FuncName) &&
681             (!Remapper || !Remapper->exist(FuncName)))
682           continue;
683         const uint8_t *FuncProfileAddr = Start + NameOffset.second;
684         assert(FuncProfileAddr < End && "out of LBRProfile section");
685         if (std::error_code EC = readFuncProfile(FuncProfileAddr))
686           return EC;
687       }
688     }
689     Data = End;
690   }
691 
692   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
693          "Cannot have both context-sensitive and regular profile");
694   ProfileIsCS = (CSProfileCount > 0);
695   FunctionSamples::ProfileIsCS = ProfileIsCS;
696   return sampleprof_error::success;
697 }
698 
699 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
700   if (!ProfSymList)
701     ProfSymList = std::make_unique<ProfileSymbolList>();
702 
703   if (std::error_code EC = ProfSymList->read(Data, End - Data))
704     return EC;
705 
706   Data = End;
707   return sampleprof_error::success;
708 }
709 
710 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
711     const uint8_t *SecStart, const uint64_t SecSize,
712     const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
713   Data = SecStart;
714   End = SecStart + SecSize;
715   auto DecompressSize = readNumber<uint64_t>();
716   if (std::error_code EC = DecompressSize.getError())
717     return EC;
718   DecompressBufSize = *DecompressSize;
719 
720   auto CompressSize = readNumber<uint64_t>();
721   if (std::error_code EC = CompressSize.getError())
722     return EC;
723 
724   if (!llvm::zlib::isAvailable())
725     return sampleprof_error::zlib_unavailable;
726 
727   StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
728                               *CompressSize);
729   char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
730   size_t UCSize = DecompressBufSize;
731   llvm::Error E =
732       zlib::uncompress(CompressedStrings, Buffer, UCSize);
733   if (E)
734     return sampleprof_error::uncompress_failed;
735   DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
736   return sampleprof_error::success;
737 }
738 
739 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
740   const uint8_t *BufStart =
741       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
742 
743   for (auto &Entry : SecHdrTable) {
744     // Skip empty section.
745     if (!Entry.Size)
746       continue;
747 
748     // Skip sections without context when SkipFlatProf is true.
749     if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
750       continue;
751 
752     const uint8_t *SecStart = BufStart + Entry.Offset;
753     uint64_t SecSize = Entry.Size;
754 
755     // If the section is compressed, decompress it into a buffer
756     // DecompressBuf before reading the actual data. The pointee of
757     // 'Data' will be changed to buffer hold by DecompressBuf
758     // temporarily when reading the actual data.
759     bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
760     if (isCompressed) {
761       const uint8_t *DecompressBuf;
762       uint64_t DecompressBufSize;
763       if (std::error_code EC = decompressSection(
764               SecStart, SecSize, DecompressBuf, DecompressBufSize))
765         return EC;
766       SecStart = DecompressBuf;
767       SecSize = DecompressBufSize;
768     }
769 
770     if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
771       return EC;
772     if (Data != SecStart + SecSize)
773       return sampleprof_error::malformed;
774 
775     // Change the pointee of 'Data' from DecompressBuf to original Buffer.
776     if (isCompressed) {
777       Data = BufStart + Entry.Offset;
778       End = BufStart + Buffer->getBufferSize();
779     }
780   }
781 
782   return sampleprof_error::success;
783 }
784 
785 std::error_code SampleProfileReaderCompactBinary::readImpl() {
786   std::vector<uint64_t> OffsetsToUse;
787   if (UseAllFuncs) {
788     for (auto FuncEntry : FuncOffsetTable) {
789       OffsetsToUse.push_back(FuncEntry.second);
790     }
791   }
792   else {
793     for (auto Name : FuncsToUse) {
794       auto GUID = std::to_string(MD5Hash(Name));
795       auto iter = FuncOffsetTable.find(StringRef(GUID));
796       if (iter == FuncOffsetTable.end())
797         continue;
798       OffsetsToUse.push_back(iter->second);
799     }
800   }
801 
802   for (auto Offset : OffsetsToUse) {
803     const uint8_t *SavedData = Data;
804     if (std::error_code EC = readFuncProfile(
805             reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
806             Offset))
807       return EC;
808     Data = SavedData;
809   }
810   return sampleprof_error::success;
811 }
812 
813 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
814   if (Magic == SPMagic())
815     return sampleprof_error::success;
816   return sampleprof_error::bad_magic;
817 }
818 
819 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
820   if (Magic == SPMagic(SPF_Ext_Binary))
821     return sampleprof_error::success;
822   return sampleprof_error::bad_magic;
823 }
824 
825 std::error_code
826 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
827   if (Magic == SPMagic(SPF_Compact_Binary))
828     return sampleprof_error::success;
829   return sampleprof_error::bad_magic;
830 }
831 
832 std::error_code SampleProfileReaderBinary::readNameTable() {
833   auto Size = readNumber<uint32_t>();
834   if (std::error_code EC = Size.getError())
835     return EC;
836   NameTable.reserve(*Size + NameTable.size());
837   for (uint32_t I = 0; I < *Size; ++I) {
838     auto Name(readString());
839     if (std::error_code EC = Name.getError())
840       return EC;
841     NameTable.push_back(*Name);
842   }
843 
844   return sampleprof_error::success;
845 }
846 
847 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
848   auto Size = readNumber<uint64_t>();
849   if (std::error_code EC = Size.getError())
850     return EC;
851   MD5StringBuf = std::make_unique<std::vector<std::string>>();
852   MD5StringBuf->reserve(*Size);
853   if (FixedLengthMD5) {
854     // Preallocate and initialize NameTable so we can check whether a name
855     // index has been read before by checking whether the element in the
856     // NameTable is empty, meanwhile readStringIndex can do the boundary
857     // check using the size of NameTable.
858     NameTable.resize(*Size + NameTable.size());
859 
860     MD5NameMemStart = Data;
861     Data = Data + (*Size) * sizeof(uint64_t);
862     return sampleprof_error::success;
863   }
864   NameTable.reserve(*Size);
865   for (uint32_t I = 0; I < *Size; ++I) {
866     auto FID = readNumber<uint64_t>();
867     if (std::error_code EC = FID.getError())
868       return EC;
869     MD5StringBuf->push_back(std::to_string(*FID));
870     // NameTable is a vector of StringRef. Here it is pushing back a
871     // StringRef initialized with the last string in MD5stringBuf.
872     NameTable.push_back(MD5StringBuf->back());
873   }
874   return sampleprof_error::success;
875 }
876 
877 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
878   if (IsMD5)
879     return readMD5NameTable();
880   return SampleProfileReaderBinary::readNameTable();
881 }
882 
883 std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
884   if (!ProfileIsProbeBased)
885     return sampleprof_error::success;
886   while (Data < End) {
887     auto FName(readStringFromTable());
888     if (std::error_code EC = FName.getError())
889       return EC;
890 
891     auto Checksum = readNumber<uint64_t>();
892     if (std::error_code EC = Checksum.getError())
893       return EC;
894 
895     SampleContext FContext(*FName);
896     // No need to load metadata for profiles that are not loaded in the current
897     // module.
898     if (Profiles.count(FContext))
899       Profiles[FContext].setFunctionHash(*Checksum);
900   }
901 
902   assert(Data == End && "More data is read than expected");
903   return sampleprof_error::success;
904 }
905 
906 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
907   auto Size = readNumber<uint64_t>();
908   if (std::error_code EC = Size.getError())
909     return EC;
910   NameTable.reserve(*Size);
911   for (uint32_t I = 0; I < *Size; ++I) {
912     auto FID = readNumber<uint64_t>();
913     if (std::error_code EC = FID.getError())
914       return EC;
915     NameTable.push_back(std::to_string(*FID));
916   }
917   return sampleprof_error::success;
918 }
919 
920 std::error_code
921 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
922   SecHdrTableEntry Entry;
923   auto Type = readUnencodedNumber<uint64_t>();
924   if (std::error_code EC = Type.getError())
925     return EC;
926   Entry.Type = static_cast<SecType>(*Type);
927 
928   auto Flags = readUnencodedNumber<uint64_t>();
929   if (std::error_code EC = Flags.getError())
930     return EC;
931   Entry.Flags = *Flags;
932 
933   auto Offset = readUnencodedNumber<uint64_t>();
934   if (std::error_code EC = Offset.getError())
935     return EC;
936   Entry.Offset = *Offset;
937 
938   auto Size = readUnencodedNumber<uint64_t>();
939   if (std::error_code EC = Size.getError())
940     return EC;
941   Entry.Size = *Size;
942 
943   Entry.LayoutIndex = Idx;
944   SecHdrTable.push_back(std::move(Entry));
945   return sampleprof_error::success;
946 }
947 
948 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
949   auto EntryNum = readUnencodedNumber<uint64_t>();
950   if (std::error_code EC = EntryNum.getError())
951     return EC;
952 
953   for (uint32_t i = 0; i < (*EntryNum); i++)
954     if (std::error_code EC = readSecHdrTableEntry(i))
955       return EC;
956 
957   return sampleprof_error::success;
958 }
959 
960 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
961   const uint8_t *BufStart =
962       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
963   Data = BufStart;
964   End = BufStart + Buffer->getBufferSize();
965 
966   if (std::error_code EC = readMagicIdent())
967     return EC;
968 
969   if (std::error_code EC = readSecHdrTable())
970     return EC;
971 
972   return sampleprof_error::success;
973 }
974 
975 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
976   uint64_t Size = 0;
977   for (auto &Entry : SecHdrTable) {
978     if (Entry.Type == Type)
979       Size += Entry.Size;
980   }
981   return Size;
982 }
983 
984 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
985   // Sections in SecHdrTable is not necessarily in the same order as
986   // sections in the profile because section like FuncOffsetTable needs
987   // to be written after section LBRProfile but needs to be read before
988   // section LBRProfile, so we cannot simply use the last entry in
989   // SecHdrTable to calculate the file size.
990   uint64_t FileSize = 0;
991   for (auto &Entry : SecHdrTable) {
992     FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
993   }
994   return FileSize;
995 }
996 
997 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
998   std::string Flags;
999   if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1000     Flags.append("{compressed,");
1001   else
1002     Flags.append("{");
1003 
1004   if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1005     Flags.append("flat,");
1006 
1007   switch (Entry.Type) {
1008   case SecNameTable:
1009     if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1010       Flags.append("fixlenmd5,");
1011     else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1012       Flags.append("md5,");
1013     break;
1014   case SecProfSummary:
1015     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1016       Flags.append("partial,");
1017     break;
1018   default:
1019     break;
1020   }
1021   char &last = Flags.back();
1022   if (last == ',')
1023     last = '}';
1024   else
1025     Flags.append("}");
1026   return Flags;
1027 }
1028 
1029 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1030   uint64_t TotalSecsSize = 0;
1031   for (auto &Entry : SecHdrTable) {
1032     OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1033        << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1034        << "\n";
1035     ;
1036     TotalSecsSize += Entry.Size;
1037   }
1038   uint64_t HeaderSize = SecHdrTable.front().Offset;
1039   assert(HeaderSize + TotalSecsSize == getFileSize() &&
1040          "Size of 'header + sections' doesn't match the total size of profile");
1041 
1042   OS << "Header Size: " << HeaderSize << "\n";
1043   OS << "Total Sections Size: " << TotalSecsSize << "\n";
1044   OS << "File Size: " << getFileSize() << "\n";
1045   return true;
1046 }
1047 
1048 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1049   // Read and check the magic identifier.
1050   auto Magic = readNumber<uint64_t>();
1051   if (std::error_code EC = Magic.getError())
1052     return EC;
1053   else if (std::error_code EC = verifySPMagic(*Magic))
1054     return EC;
1055 
1056   // Read the version number.
1057   auto Version = readNumber<uint64_t>();
1058   if (std::error_code EC = Version.getError())
1059     return EC;
1060   else if (*Version != SPVersion())
1061     return sampleprof_error::unsupported_version;
1062 
1063   return sampleprof_error::success;
1064 }
1065 
1066 std::error_code SampleProfileReaderBinary::readHeader() {
1067   Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1068   End = Data + Buffer->getBufferSize();
1069 
1070   if (std::error_code EC = readMagicIdent())
1071     return EC;
1072 
1073   if (std::error_code EC = readSummary())
1074     return EC;
1075 
1076   if (std::error_code EC = readNameTable())
1077     return EC;
1078   return sampleprof_error::success;
1079 }
1080 
1081 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1082   SampleProfileReaderBinary::readHeader();
1083   if (std::error_code EC = readFuncOffsetTable())
1084     return EC;
1085   return sampleprof_error::success;
1086 }
1087 
1088 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1089   auto TableOffset = readUnencodedNumber<uint64_t>();
1090   if (std::error_code EC = TableOffset.getError())
1091     return EC;
1092 
1093   const uint8_t *SavedData = Data;
1094   const uint8_t *TableStart =
1095       reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1096       *TableOffset;
1097   Data = TableStart;
1098 
1099   auto Size = readNumber<uint64_t>();
1100   if (std::error_code EC = Size.getError())
1101     return EC;
1102 
1103   FuncOffsetTable.reserve(*Size);
1104   for (uint32_t I = 0; I < *Size; ++I) {
1105     auto FName(readStringFromTable());
1106     if (std::error_code EC = FName.getError())
1107       return EC;
1108 
1109     auto Offset = readNumber<uint64_t>();
1110     if (std::error_code EC = Offset.getError())
1111       return EC;
1112 
1113     FuncOffsetTable[*FName] = *Offset;
1114   }
1115   End = TableStart;
1116   Data = SavedData;
1117   return sampleprof_error::success;
1118 }
1119 
1120 void SampleProfileReaderCompactBinary::collectFuncsFrom(const Module &M) {
1121   UseAllFuncs = false;
1122   FuncsToUse.clear();
1123   for (auto &F : M)
1124     FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1125 }
1126 
1127 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1128     std::vector<ProfileSummaryEntry> &Entries) {
1129   auto Cutoff = readNumber<uint64_t>();
1130   if (std::error_code EC = Cutoff.getError())
1131     return EC;
1132 
1133   auto MinBlockCount = readNumber<uint64_t>();
1134   if (std::error_code EC = MinBlockCount.getError())
1135     return EC;
1136 
1137   auto NumBlocks = readNumber<uint64_t>();
1138   if (std::error_code EC = NumBlocks.getError())
1139     return EC;
1140 
1141   Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1142   return sampleprof_error::success;
1143 }
1144 
1145 std::error_code SampleProfileReaderBinary::readSummary() {
1146   auto TotalCount = readNumber<uint64_t>();
1147   if (std::error_code EC = TotalCount.getError())
1148     return EC;
1149 
1150   auto MaxBlockCount = readNumber<uint64_t>();
1151   if (std::error_code EC = MaxBlockCount.getError())
1152     return EC;
1153 
1154   auto MaxFunctionCount = readNumber<uint64_t>();
1155   if (std::error_code EC = MaxFunctionCount.getError())
1156     return EC;
1157 
1158   auto NumBlocks = readNumber<uint64_t>();
1159   if (std::error_code EC = NumBlocks.getError())
1160     return EC;
1161 
1162   auto NumFunctions = readNumber<uint64_t>();
1163   if (std::error_code EC = NumFunctions.getError())
1164     return EC;
1165 
1166   auto NumSummaryEntries = readNumber<uint64_t>();
1167   if (std::error_code EC = NumSummaryEntries.getError())
1168     return EC;
1169 
1170   std::vector<ProfileSummaryEntry> Entries;
1171   for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1172     std::error_code EC = readSummaryEntry(Entries);
1173     if (EC != sampleprof_error::success)
1174       return EC;
1175   }
1176   Summary = std::make_unique<ProfileSummary>(
1177       ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1178       *MaxFunctionCount, *NumBlocks, *NumFunctions);
1179 
1180   return sampleprof_error::success;
1181 }
1182 
1183 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1184   const uint8_t *Data =
1185       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1186   uint64_t Magic = decodeULEB128(Data);
1187   return Magic == SPMagic();
1188 }
1189 
1190 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1191   const uint8_t *Data =
1192       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1193   uint64_t Magic = decodeULEB128(Data);
1194   return Magic == SPMagic(SPF_Ext_Binary);
1195 }
1196 
1197 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1198   const uint8_t *Data =
1199       reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1200   uint64_t Magic = decodeULEB128(Data);
1201   return Magic == SPMagic(SPF_Compact_Binary);
1202 }
1203 
1204 std::error_code SampleProfileReaderGCC::skipNextWord() {
1205   uint32_t dummy;
1206   if (!GcovBuffer.readInt(dummy))
1207     return sampleprof_error::truncated;
1208   return sampleprof_error::success;
1209 }
1210 
1211 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1212   if (sizeof(T) <= sizeof(uint32_t)) {
1213     uint32_t Val;
1214     if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1215       return static_cast<T>(Val);
1216   } else if (sizeof(T) <= sizeof(uint64_t)) {
1217     uint64_t Val;
1218     if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1219       return static_cast<T>(Val);
1220   }
1221 
1222   std::error_code EC = sampleprof_error::malformed;
1223   reportError(0, EC.message());
1224   return EC;
1225 }
1226 
1227 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1228   StringRef Str;
1229   if (!GcovBuffer.readString(Str))
1230     return sampleprof_error::truncated;
1231   return Str;
1232 }
1233 
1234 std::error_code SampleProfileReaderGCC::readHeader() {
1235   // Read the magic identifier.
1236   if (!GcovBuffer.readGCDAFormat())
1237     return sampleprof_error::unrecognized_format;
1238 
1239   // Read the version number. Note - the GCC reader does not validate this
1240   // version, but the profile creator generates v704.
1241   GCOV::GCOVVersion version;
1242   if (!GcovBuffer.readGCOVVersion(version))
1243     return sampleprof_error::unrecognized_format;
1244 
1245   if (version != GCOV::V407)
1246     return sampleprof_error::unsupported_version;
1247 
1248   // Skip the empty integer.
1249   if (std::error_code EC = skipNextWord())
1250     return EC;
1251 
1252   return sampleprof_error::success;
1253 }
1254 
1255 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1256   uint32_t Tag;
1257   if (!GcovBuffer.readInt(Tag))
1258     return sampleprof_error::truncated;
1259 
1260   if (Tag != Expected)
1261     return sampleprof_error::malformed;
1262 
1263   if (std::error_code EC = skipNextWord())
1264     return EC;
1265 
1266   return sampleprof_error::success;
1267 }
1268 
1269 std::error_code SampleProfileReaderGCC::readNameTable() {
1270   if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1271     return EC;
1272 
1273   uint32_t Size;
1274   if (!GcovBuffer.readInt(Size))
1275     return sampleprof_error::truncated;
1276 
1277   for (uint32_t I = 0; I < Size; ++I) {
1278     StringRef Str;
1279     if (!GcovBuffer.readString(Str))
1280       return sampleprof_error::truncated;
1281     Names.push_back(std::string(Str));
1282   }
1283 
1284   return sampleprof_error::success;
1285 }
1286 
1287 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1288   if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1289     return EC;
1290 
1291   uint32_t NumFunctions;
1292   if (!GcovBuffer.readInt(NumFunctions))
1293     return sampleprof_error::truncated;
1294 
1295   InlineCallStack Stack;
1296   for (uint32_t I = 0; I < NumFunctions; ++I)
1297     if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1298       return EC;
1299 
1300   computeSummary();
1301   return sampleprof_error::success;
1302 }
1303 
1304 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1305     const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1306   uint64_t HeadCount = 0;
1307   if (InlineStack.size() == 0)
1308     if (!GcovBuffer.readInt64(HeadCount))
1309       return sampleprof_error::truncated;
1310 
1311   uint32_t NameIdx;
1312   if (!GcovBuffer.readInt(NameIdx))
1313     return sampleprof_error::truncated;
1314 
1315   StringRef Name(Names[NameIdx]);
1316 
1317   uint32_t NumPosCounts;
1318   if (!GcovBuffer.readInt(NumPosCounts))
1319     return sampleprof_error::truncated;
1320 
1321   uint32_t NumCallsites;
1322   if (!GcovBuffer.readInt(NumCallsites))
1323     return sampleprof_error::truncated;
1324 
1325   FunctionSamples *FProfile = nullptr;
1326   if (InlineStack.size() == 0) {
1327     // If this is a top function that we have already processed, do not
1328     // update its profile again.  This happens in the presence of
1329     // function aliases.  Since these aliases share the same function
1330     // body, there will be identical replicated profiles for the
1331     // original function.  In this case, we simply not bother updating
1332     // the profile of the original function.
1333     FProfile = &Profiles[Name];
1334     FProfile->addHeadSamples(HeadCount);
1335     if (FProfile->getTotalSamples() > 0)
1336       Update = false;
1337   } else {
1338     // Otherwise, we are reading an inlined instance. The top of the
1339     // inline stack contains the profile of the caller. Insert this
1340     // callee in the caller's CallsiteMap.
1341     FunctionSamples *CallerProfile = InlineStack.front();
1342     uint32_t LineOffset = Offset >> 16;
1343     uint32_t Discriminator = Offset & 0xffff;
1344     FProfile = &CallerProfile->functionSamplesAt(
1345         LineLocation(LineOffset, Discriminator))[std::string(Name)];
1346   }
1347   FProfile->setName(Name);
1348 
1349   for (uint32_t I = 0; I < NumPosCounts; ++I) {
1350     uint32_t Offset;
1351     if (!GcovBuffer.readInt(Offset))
1352       return sampleprof_error::truncated;
1353 
1354     uint32_t NumTargets;
1355     if (!GcovBuffer.readInt(NumTargets))
1356       return sampleprof_error::truncated;
1357 
1358     uint64_t Count;
1359     if (!GcovBuffer.readInt64(Count))
1360       return sampleprof_error::truncated;
1361 
1362     // The line location is encoded in the offset as:
1363     //   high 16 bits: line offset to the start of the function.
1364     //   low 16 bits: discriminator.
1365     uint32_t LineOffset = Offset >> 16;
1366     uint32_t Discriminator = Offset & 0xffff;
1367 
1368     InlineCallStack NewStack;
1369     NewStack.push_back(FProfile);
1370     llvm::append_range(NewStack, InlineStack);
1371     if (Update) {
1372       // Walk up the inline stack, adding the samples on this line to
1373       // the total sample count of the callers in the chain.
1374       for (auto CallerProfile : NewStack)
1375         CallerProfile->addTotalSamples(Count);
1376 
1377       // Update the body samples for the current profile.
1378       FProfile->addBodySamples(LineOffset, Discriminator, Count);
1379     }
1380 
1381     // Process the list of functions called at an indirect call site.
1382     // These are all the targets that a function pointer (or virtual
1383     // function) resolved at runtime.
1384     for (uint32_t J = 0; J < NumTargets; J++) {
1385       uint32_t HistVal;
1386       if (!GcovBuffer.readInt(HistVal))
1387         return sampleprof_error::truncated;
1388 
1389       if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1390         return sampleprof_error::malformed;
1391 
1392       uint64_t TargetIdx;
1393       if (!GcovBuffer.readInt64(TargetIdx))
1394         return sampleprof_error::truncated;
1395       StringRef TargetName(Names[TargetIdx]);
1396 
1397       uint64_t TargetCount;
1398       if (!GcovBuffer.readInt64(TargetCount))
1399         return sampleprof_error::truncated;
1400 
1401       if (Update)
1402         FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1403                                          TargetName, TargetCount);
1404     }
1405   }
1406 
1407   // Process all the inlined callers into the current function. These
1408   // are all the callsites that were inlined into this function.
1409   for (uint32_t I = 0; I < NumCallsites; I++) {
1410     // The offset is encoded as:
1411     //   high 16 bits: line offset to the start of the function.
1412     //   low 16 bits: discriminator.
1413     uint32_t Offset;
1414     if (!GcovBuffer.readInt(Offset))
1415       return sampleprof_error::truncated;
1416     InlineCallStack NewStack;
1417     NewStack.push_back(FProfile);
1418     llvm::append_range(NewStack, InlineStack);
1419     if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1420       return EC;
1421   }
1422 
1423   return sampleprof_error::success;
1424 }
1425 
1426 /// Read a GCC AutoFDO profile.
1427 ///
1428 /// This format is generated by the Linux Perf conversion tool at
1429 /// https://github.com/google/autofdo.
1430 std::error_code SampleProfileReaderGCC::readImpl() {
1431   // Read the string table.
1432   if (std::error_code EC = readNameTable())
1433     return EC;
1434 
1435   // Read the source profile.
1436   if (std::error_code EC = readFunctionProfiles())
1437     return EC;
1438 
1439   return sampleprof_error::success;
1440 }
1441 
1442 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1443   StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1444   return Magic == "adcg*704";
1445 }
1446 
1447 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1448   // If the reader uses MD5 to represent string, we can't remap it because
1449   // we don't know what the original function names were.
1450   if (Reader.useMD5()) {
1451     Ctx.diagnose(DiagnosticInfoSampleProfile(
1452         Reader.getBuffer()->getBufferIdentifier(),
1453         "Profile data remapping cannot be applied to profile data "
1454         "in compact format (original mangled names are not available).",
1455         DS_Warning));
1456     return;
1457   }
1458 
1459   // CSSPGO-TODO: Remapper is not yet supported.
1460   // We will need to remap the entire context string.
1461   assert(Remappings && "should be initialized while creating remapper");
1462   for (auto &Sample : Reader.getProfiles()) {
1463     DenseSet<StringRef> NamesInSample;
1464     Sample.second.findAllNames(NamesInSample);
1465     for (auto &Name : NamesInSample)
1466       if (auto Key = Remappings->insert(Name))
1467         NameMap.insert({Key, Name});
1468   }
1469 
1470   RemappingApplied = true;
1471 }
1472 
1473 Optional<StringRef>
1474 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1475   if (auto Key = Remappings->lookup(Fname))
1476     return NameMap.lookup(Key);
1477   return None;
1478 }
1479 
1480 /// Prepare a memory buffer for the contents of \p Filename.
1481 ///
1482 /// \returns an error code indicating the status of the buffer.
1483 static ErrorOr<std::unique_ptr<MemoryBuffer>>
1484 setupMemoryBuffer(const Twine &Filename) {
1485   auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
1486   if (std::error_code EC = BufferOrErr.getError())
1487     return EC;
1488   auto Buffer = std::move(BufferOrErr.get());
1489 
1490   // Sanity check the file.
1491   if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
1492     return sampleprof_error::too_large;
1493 
1494   return std::move(Buffer);
1495 }
1496 
1497 /// Create a sample profile reader based on the format of the input file.
1498 ///
1499 /// \param Filename The file to open.
1500 ///
1501 /// \param C The LLVM context to use to emit diagnostics.
1502 ///
1503 /// \param RemapFilename The file used for profile remapping.
1504 ///
1505 /// \returns an error code indicating the status of the created reader.
1506 ErrorOr<std::unique_ptr<SampleProfileReader>>
1507 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1508                             const std::string RemapFilename) {
1509   auto BufferOrError = setupMemoryBuffer(Filename);
1510   if (std::error_code EC = BufferOrError.getError())
1511     return EC;
1512   return create(BufferOrError.get(), C, RemapFilename);
1513 }
1514 
1515 /// Create a sample profile remapper from the given input, to remap the
1516 /// function names in the given profile data.
1517 ///
1518 /// \param Filename The file to open.
1519 ///
1520 /// \param Reader The profile reader the remapper is going to be applied to.
1521 ///
1522 /// \param C The LLVM context to use to emit diagnostics.
1523 ///
1524 /// \returns an error code indicating the status of the created reader.
1525 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1526 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1527                                            SampleProfileReader &Reader,
1528                                            LLVMContext &C) {
1529   auto BufferOrError = setupMemoryBuffer(Filename);
1530   if (std::error_code EC = BufferOrError.getError())
1531     return EC;
1532   return create(BufferOrError.get(), Reader, C);
1533 }
1534 
1535 /// Create a sample profile remapper from the given input, to remap the
1536 /// function names in the given profile data.
1537 ///
1538 /// \param B The memory buffer to create the reader from (assumes ownership).
1539 ///
1540 /// \param C The LLVM context to use to emit diagnostics.
1541 ///
1542 /// \param Reader The profile reader the remapper is going to be applied to.
1543 ///
1544 /// \returns an error code indicating the status of the created reader.
1545 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1546 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1547                                            SampleProfileReader &Reader,
1548                                            LLVMContext &C) {
1549   auto Remappings = std::make_unique<SymbolRemappingReader>();
1550   if (Error E = Remappings->read(*B.get())) {
1551     handleAllErrors(
1552         std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1553           C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1554                                                  ParseError.getLineNum(),
1555                                                  ParseError.getMessage()));
1556         });
1557     return sampleprof_error::malformed;
1558   }
1559 
1560   return std::make_unique<SampleProfileReaderItaniumRemapper>(
1561       std::move(B), std::move(Remappings), Reader);
1562 }
1563 
1564 /// Create a sample profile reader based on the format of the input data.
1565 ///
1566 /// \param B The memory buffer to create the reader from (assumes ownership).
1567 ///
1568 /// \param C The LLVM context to use to emit diagnostics.
1569 ///
1570 /// \param RemapFilename The file used for profile remapping.
1571 ///
1572 /// \returns an error code indicating the status of the created reader.
1573 ErrorOr<std::unique_ptr<SampleProfileReader>>
1574 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1575                             const std::string RemapFilename) {
1576   std::unique_ptr<SampleProfileReader> Reader;
1577   if (SampleProfileReaderRawBinary::hasFormat(*B))
1578     Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1579   else if (SampleProfileReaderExtBinary::hasFormat(*B))
1580     Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1581   else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1582     Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1583   else if (SampleProfileReaderGCC::hasFormat(*B))
1584     Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1585   else if (SampleProfileReaderText::hasFormat(*B))
1586     Reader.reset(new SampleProfileReaderText(std::move(B), C));
1587   else
1588     return sampleprof_error::unrecognized_format;
1589 
1590   if (!RemapFilename.empty()) {
1591     auto ReaderOrErr =
1592         SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1593     if (std::error_code EC = ReaderOrErr.getError()) {
1594       std::string Msg = "Could not create remapper: " + EC.message();
1595       C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1596       return EC;
1597     }
1598     Reader->Remapper = std::move(ReaderOrErr.get());
1599   }
1600 
1601   FunctionSamples::Format = Reader->getFormat();
1602   if (std::error_code EC = Reader->readHeader()) {
1603     return EC;
1604   }
1605 
1606   return std::move(Reader);
1607 }
1608 
1609 // For text and GCC file formats, we compute the summary after reading the
1610 // profile. Binary format has the profile summary in its header.
1611 void SampleProfileReader::computeSummary() {
1612   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1613   Summary = Builder.computeSummaryForProfiles(Profiles);
1614 }
1615