xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ProfileData/SampleProf.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains common definitions used in the reading and writing of
10 // sample profile data.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H
15 #define LLVM_PROFILEDATA_SAMPLEPROF_H
16 
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/MapVector.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GlobalValue.h"
24 #include "llvm/ProfileData/FunctionId.h"
25 #include "llvm/ProfileData/HashKeyMap.h"
26 #include "llvm/Support/Allocator.h"
27 #include "llvm/Support/Compiler.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/ErrorOr.h"
30 #include "llvm/Support/MathExtras.h"
31 #include <algorithm>
32 #include <cstdint>
33 #include <list>
34 #include <map>
35 #include <set>
36 #include <sstream>
37 #include <string>
38 #include <system_error>
39 #include <unordered_map>
40 #include <utility>
41 
42 namespace llvm {
43 
44 class DILocation;
45 class raw_ostream;
46 
47 LLVM_ABI const std::error_category &sampleprof_category();
48 
49 enum class sampleprof_error {
50   success = 0,
51   bad_magic,
52   unsupported_version,
53   too_large,
54   truncated,
55   malformed,
56   unrecognized_format,
57   unsupported_writing_format,
58   truncated_name_table,
59   not_implemented,
60   counter_overflow,
61   ostream_seek_unsupported,
62   uncompress_failed,
63   zlib_unavailable,
64   hash_mismatch,
65   illegal_line_offset
66 };
67 
make_error_code(sampleprof_error E)68 inline std::error_code make_error_code(sampleprof_error E) {
69   return std::error_code(static_cast<int>(E), sampleprof_category());
70 }
71 
mergeSampleProfErrors(sampleprof_error & Accumulator,sampleprof_error Result)72 inline sampleprof_error mergeSampleProfErrors(sampleprof_error &Accumulator,
73                                               sampleprof_error Result) {
74   // Prefer first error encountered as later errors may be secondary effects of
75   // the initial problem.
76   if (Accumulator == sampleprof_error::success &&
77       Result != sampleprof_error::success)
78     Accumulator = Result;
79   return Accumulator;
80 }
81 
82 } // end namespace llvm
83 
84 namespace std {
85 
86 template <>
87 struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {};
88 
89 } // end namespace std
90 
91 namespace llvm {
92 namespace sampleprof {
93 
94 enum SampleProfileFormat {
95   SPF_None = 0,
96   SPF_Text = 0x1,
97   SPF_Compact_Binary = 0x2, // Deprecated
98   SPF_GCC = 0x3,
99   SPF_Ext_Binary = 0x4,
100   SPF_Binary = 0xff
101 };
102 
103 enum SampleProfileLayout {
104   SPL_None = 0,
105   SPL_Nest = 0x1,
106   SPL_Flat = 0x2,
107 };
108 
109 static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
110   return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
111          uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
112          uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) |
113          uint64_t('2') << (64 - 56) | uint64_t(Format);
114 }
115 
116 static inline uint64_t SPVersion() { return 103; }
117 
118 // Section Type used by SampleProfileExtBinaryBaseReader and
119 // SampleProfileExtBinaryBaseWriter. Never change the existing
120 // value of enum. Only append new ones.
121 enum SecType {
122   SecInValid = 0,
123   SecProfSummary = 1,
124   SecNameTable = 2,
125   SecProfileSymbolList = 3,
126   SecFuncOffsetTable = 4,
127   SecFuncMetadata = 5,
128   SecCSNameTable = 6,
129   // marker for the first type of profile.
130   SecFuncProfileFirst = 32,
131   SecLBRProfile = SecFuncProfileFirst
132 };
133 
134 static inline std::string getSecName(SecType Type) {
135   switch (static_cast<int>(Type)) { // Avoid -Wcovered-switch-default
136   case SecInValid:
137     return "InvalidSection";
138   case SecProfSummary:
139     return "ProfileSummarySection";
140   case SecNameTable:
141     return "NameTableSection";
142   case SecProfileSymbolList:
143     return "ProfileSymbolListSection";
144   case SecFuncOffsetTable:
145     return "FuncOffsetTableSection";
146   case SecFuncMetadata:
147     return "FunctionMetadata";
148   case SecCSNameTable:
149     return "CSNameTableSection";
150   case SecLBRProfile:
151     return "LBRProfileSection";
152   default:
153     return "UnknownSection";
154   }
155 }
156 
157 // Entry type of section header table used by SampleProfileExtBinaryBaseReader
158 // and SampleProfileExtBinaryBaseWriter.
159 struct SecHdrTableEntry {
160   SecType Type;
161   uint64_t Flags;
162   uint64_t Offset;
163   uint64_t Size;
164   // The index indicating the location of the current entry in
165   // SectionHdrLayout table.
166   uint64_t LayoutIndex;
167 };
168 
169 // Flags common for all sections are defined here. In SecHdrTableEntry::Flags,
170 // common flags will be saved in the lower 32bits and section specific flags
171 // will be saved in the higher 32 bits.
172 enum class SecCommonFlags : uint32_t {
173   SecFlagInValid = 0,
174   SecFlagCompress = (1 << 0),
175   // Indicate the section contains only profile without context.
176   SecFlagFlat = (1 << 1)
177 };
178 
179 // Section specific flags are defined here.
180 // !!!Note: Everytime a new enum class is created here, please add
181 // a new check in verifySecFlag.
182 enum class SecNameTableFlags : uint32_t {
183   SecFlagInValid = 0,
184   SecFlagMD5Name = (1 << 0),
185   // Store MD5 in fixed length instead of ULEB128 so NameTable can be
186   // accessed like an array.
187   SecFlagFixedLengthMD5 = (1 << 1),
188   // Profile contains ".__uniq." suffix name. Compiler shouldn't strip
189   // the suffix when doing profile matching when seeing the flag.
190   SecFlagUniqSuffix = (1 << 2)
191 };
192 enum class SecProfSummaryFlags : uint32_t {
193   SecFlagInValid = 0,
194   /// SecFlagPartial means the profile is for common/shared code.
195   /// The common profile is usually merged from profiles collected
196   /// from running other targets.
197   SecFlagPartial = (1 << 0),
198   /// SecFlagContext means this is context-sensitive flat profile for
199   /// CSSPGO
200   SecFlagFullContext = (1 << 1),
201   /// SecFlagFSDiscriminator means this profile uses flow-sensitive
202   /// discriminators.
203   SecFlagFSDiscriminator = (1 << 2),
204   /// SecFlagIsPreInlined means this profile contains ShouldBeInlined
205   /// contexts thus this is CS preinliner computed.
206   SecFlagIsPreInlined = (1 << 4),
207 };
208 
209 enum class SecFuncMetadataFlags : uint32_t {
210   SecFlagInvalid = 0,
211   SecFlagIsProbeBased = (1 << 0),
212   SecFlagHasAttribute = (1 << 1),
213 };
214 
215 enum class SecFuncOffsetFlags : uint32_t {
216   SecFlagInvalid = 0,
217   // Store function offsets in an order of contexts. The order ensures that
218   // callee contexts of a given context laid out next to it.
219   SecFlagOrdered = (1 << 0),
220 };
221 
222 // Verify section specific flag is used for the correct section.
223 template <class SecFlagType>
224 static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
225   // No verification is needed for common flags.
226   if (std::is_same<SecCommonFlags, SecFlagType>())
227     return;
228 
229   // Verification starts here for section specific flag.
230   bool IsFlagLegal = false;
231   switch (Type) {
232   case SecNameTable:
233     IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>();
234     break;
235   case SecProfSummary:
236     IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>();
237     break;
238   case SecFuncMetadata:
239     IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>();
240     break;
241   default:
242   case SecFuncOffsetTable:
243     IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>();
244     break;
245   }
246   if (!IsFlagLegal)
247     llvm_unreachable("Misuse of a flag in an incompatible section");
248 }
249 
250 template <class SecFlagType>
251 static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
252   verifySecFlag(Entry.Type, Flag);
253   auto FVal = static_cast<uint64_t>(Flag);
254   bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
255   Entry.Flags |= IsCommon ? FVal : (FVal << 32);
256 }
257 
258 template <class SecFlagType>
259 static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
260   verifySecFlag(Entry.Type, Flag);
261   auto FVal = static_cast<uint64_t>(Flag);
262   bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
263   Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32));
264 }
265 
266 template <class SecFlagType>
267 static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) {
268   verifySecFlag(Entry.Type, Flag);
269   auto FVal = static_cast<uint64_t>(Flag);
270   bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
271   return Entry.Flags & (IsCommon ? FVal : (FVal << 32));
272 }
273 
274 /// Represents the relative location of an instruction.
275 ///
276 /// Instruction locations are specified by the line offset from the
277 /// beginning of the function (marked by the line where the function
278 /// header is) and the discriminator value within that line.
279 ///
280 /// The discriminator value is useful to distinguish instructions
281 /// that are on the same line but belong to different basic blocks
282 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
283 struct LineLocation {
284   LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {}
285 
286   LLVM_ABI void print(raw_ostream &OS) const;
287   LLVM_ABI void dump() const;
288 
289   // Serialize the line location to the output stream using ULEB128 encoding.
290   LLVM_ABI void serialize(raw_ostream &OS) const;
291 
292   bool operator<(const LineLocation &O) const {
293     return std::tie(LineOffset, Discriminator) <
294            std::tie(O.LineOffset, O.Discriminator);
295   }
296 
297   bool operator==(const LineLocation &O) const {
298     return LineOffset == O.LineOffset && Discriminator == O.Discriminator;
299   }
300 
301   bool operator!=(const LineLocation &O) const {
302     return LineOffset != O.LineOffset || Discriminator != O.Discriminator;
303   }
304 
305   uint64_t getHashCode() const {
306     return ((uint64_t) Discriminator << 32) | LineOffset;
307   }
308 
309   uint32_t LineOffset;
310   uint32_t Discriminator;
311 };
312 
313 struct LineLocationHash {
314   uint64_t operator()(const LineLocation &Loc) const {
315     return Loc.getHashCode();
316   }
317 };
318 
319 LLVM_ABI raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
320 
321 /// Representation of a single sample record.
322 ///
323 /// A sample record is represented by a positive integer value, which
324 /// indicates how frequently was the associated line location executed.
325 ///
326 /// Additionally, if the associated location contains a function call,
327 /// the record will hold a list of all the possible called targets. For
328 /// direct calls, this will be the exact function being invoked. For
329 /// indirect calls (function pointers, virtual table dispatch), this
330 /// will be a list of one or more functions.
331 class SampleRecord {
332 public:
333   using CallTarget = std::pair<FunctionId, uint64_t>;
334   struct CallTargetComparator {
335     bool operator()(const CallTarget &LHS, const CallTarget &RHS) const {
336       if (LHS.second != RHS.second)
337         return LHS.second > RHS.second;
338 
339       return LHS.first < RHS.first;
340     }
341   };
342 
343   using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>;
344   using CallTargetMap = std::unordered_map<FunctionId, uint64_t>;
345   SampleRecord() = default;
346 
347   /// Increment the number of samples for this record by \p S.
348   /// Optionally scale sample count \p S by \p Weight.
349   ///
350   /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
351   /// around unsigned integers.
352   sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
353     bool Overflowed;
354     NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed);
355     return Overflowed ? sampleprof_error::counter_overflow
356                       : sampleprof_error::success;
357   }
358 
359   /// Decrease the number of samples for this record by \p S. Return the amout
360   /// of samples actually decreased.
361   uint64_t removeSamples(uint64_t S) {
362     if (S > NumSamples)
363       S = NumSamples;
364     NumSamples -= S;
365     return S;
366   }
367 
368   /// Add called function \p F with samples \p S.
369   /// Optionally scale sample count \p S by \p Weight.
370   ///
371   /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
372   /// around unsigned integers.
373   sampleprof_error addCalledTarget(FunctionId F, uint64_t S,
374                                    uint64_t Weight = 1) {
375     uint64_t &TargetSamples = CallTargets[F];
376     bool Overflowed;
377     TargetSamples =
378         SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed);
379     return Overflowed ? sampleprof_error::counter_overflow
380                       : sampleprof_error::success;
381   }
382 
383   /// Remove called function from the call target map. Return the target sample
384   /// count of the called function.
385   uint64_t removeCalledTarget(FunctionId F) {
386     uint64_t Count = 0;
387     auto I = CallTargets.find(F);
388     if (I != CallTargets.end()) {
389       Count = I->second;
390       CallTargets.erase(I);
391     }
392     return Count;
393   }
394 
395   /// Return true if this sample record contains function calls.
396   bool hasCalls() const { return !CallTargets.empty(); }
397 
398   uint64_t getSamples() const { return NumSamples; }
399   const CallTargetMap &getCallTargets() const { return CallTargets; }
400   const SortedCallTargetSet getSortedCallTargets() const {
401     return sortCallTargets(CallTargets);
402   }
403 
404   uint64_t getCallTargetSum() const {
405     uint64_t Sum = 0;
406     for (const auto &I : CallTargets)
407       Sum += I.second;
408     return Sum;
409   }
410 
411   /// Sort call targets in descending order of call frequency.
412   static const SortedCallTargetSet
413   sortCallTargets(const CallTargetMap &Targets) {
414     SortedCallTargetSet SortedTargets;
415     for (const auto &[Target, Frequency] : Targets) {
416       SortedTargets.emplace(Target, Frequency);
417     }
418     return SortedTargets;
419   }
420 
421   /// Prorate call targets by a distribution factor.
422   static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
423                                                float DistributionFactor) {
424     CallTargetMap AdjustedTargets;
425     for (const auto &[Target, Frequency] : Targets) {
426       AdjustedTargets[Target] = Frequency * DistributionFactor;
427     }
428     return AdjustedTargets;
429   }
430 
431   /// Merge the samples in \p Other into this record.
432   /// Optionally scale sample counts by \p Weight.
433   LLVM_ABI sampleprof_error merge(const SampleRecord &Other,
434                                   uint64_t Weight = 1);
435   LLVM_ABI void print(raw_ostream &OS, unsigned Indent) const;
436   LLVM_ABI void dump() const;
437   /// Serialize the sample record to the output stream using ULEB128 encoding.
438   /// The \p NameTable is used to map function names to their IDs.
439   LLVM_ABI std::error_code
440   serialize(raw_ostream &OS,
441             const MapVector<FunctionId, uint32_t> &NameTable) const;
442 
443   bool operator==(const SampleRecord &Other) const {
444     return NumSamples == Other.NumSamples && CallTargets == Other.CallTargets;
445   }
446 
447   bool operator!=(const SampleRecord &Other) const {
448     return !(*this == Other);
449   }
450 
451 private:
452   uint64_t NumSamples = 0;
453   CallTargetMap CallTargets;
454 };
455 
456 LLVM_ABI raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
457 
458 // State of context associated with FunctionSamples
459 enum ContextStateMask {
460   UnknownContext = 0x0,   // Profile without context
461   RawContext = 0x1,       // Full context profile from input profile
462   SyntheticContext = 0x2, // Synthetic context created for context promotion
463   InlinedContext = 0x4,   // Profile for context that is inlined into caller
464   MergedContext = 0x8     // Profile for context merged into base profile
465 };
466 
467 // Attribute of context associated with FunctionSamples
468 enum ContextAttributeMask {
469   ContextNone = 0x0,
470   ContextWasInlined = 0x1,      // Leaf of context was inlined in previous build
471   ContextShouldBeInlined = 0x2, // Leaf of context should be inlined
472   ContextDuplicatedIntoBase =
473       0x4, // Leaf of context is duplicated into the base profile
474 };
475 
476 // Represents a context frame with profile function and line location
477 struct SampleContextFrame {
478   FunctionId Func;
479   LineLocation Location;
480 
481   SampleContextFrame() : Location(0, 0) {}
482 
483   SampleContextFrame(FunctionId Func, LineLocation Location)
484       : Func(Func), Location(Location) {}
485 
486   bool operator==(const SampleContextFrame &That) const {
487     return Location == That.Location && Func == That.Func;
488   }
489 
490   bool operator!=(const SampleContextFrame &That) const {
491     return !(*this == That);
492   }
493 
494   std::string toString(bool OutputLineLocation) const {
495     std::ostringstream OContextStr;
496     OContextStr << Func.str();
497     if (OutputLineLocation) {
498       OContextStr << ":" << Location.LineOffset;
499       if (Location.Discriminator)
500         OContextStr << "." << Location.Discriminator;
501     }
502     return OContextStr.str();
503   }
504 
505   uint64_t getHashCode() const {
506     uint64_t NameHash = Func.getHashCode();
507     uint64_t LocId = Location.getHashCode();
508     return NameHash + (LocId << 5) + LocId;
509   }
510 };
511 
512 static inline hash_code hash_value(const SampleContextFrame &arg) {
513   return arg.getHashCode();
514 }
515 
516 using SampleContextFrameVector = SmallVector<SampleContextFrame, 1>;
517 using SampleContextFrames = ArrayRef<SampleContextFrame>;
518 
519 struct SampleContextFrameHash {
520   uint64_t operator()(const SampleContextFrameVector &S) const {
521     return hash_combine_range(S);
522   }
523 };
524 
525 // Sample context for FunctionSamples. It consists of the calling context,
526 // the function name and context state. Internally sample context is represented
527 // using ArrayRef, which is also the input for constructing a `SampleContext`.
528 // It can accept and represent both full context string as well as context-less
529 // function name.
530 // For a CS profile, a full context vector can look like:
531 //    `main:3 _Z5funcAi:1 _Z8funcLeafi`
532 // For a base CS profile without calling context, the context vector should only
533 // contain the leaf frame name.
534 // For a non-CS profile, the context vector should be empty.
535 class SampleContext {
536 public:
537   SampleContext() : State(UnknownContext), Attributes(ContextNone) {}
538 
539   SampleContext(StringRef Name)
540       : Func(Name), State(UnknownContext), Attributes(ContextNone) {
541         assert(!Name.empty() && "Name is empty");
542       }
543 
544   SampleContext(FunctionId Func)
545       : Func(Func), State(UnknownContext), Attributes(ContextNone) {}
546 
547   SampleContext(SampleContextFrames Context,
548                 ContextStateMask CState = RawContext)
549       : Attributes(ContextNone) {
550     assert(!Context.empty() && "Context is empty");
551     setContext(Context, CState);
552   }
553 
554   // Give a context string, decode and populate internal states like
555   // Function name, Calling context and context state. Example of input
556   // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
557   SampleContext(StringRef ContextStr,
558                 std::list<SampleContextFrameVector> &CSNameTable,
559                 ContextStateMask CState = RawContext)
560       : Attributes(ContextNone) {
561     assert(!ContextStr.empty());
562     // Note that `[]` wrapped input indicates a full context string, otherwise
563     // it's treated as context-less function name only.
564     bool HasContext = ContextStr.starts_with("[");
565     if (!HasContext) {
566       State = UnknownContext;
567       Func = FunctionId(ContextStr);
568     } else {
569       CSNameTable.emplace_back();
570       SampleContextFrameVector &Context = CSNameTable.back();
571       createCtxVectorFromStr(ContextStr, Context);
572       setContext(Context, CState);
573     }
574   }
575 
576   /// Create a context vector from a given context string and save it in
577   /// `Context`.
578   static void createCtxVectorFromStr(StringRef ContextStr,
579                                      SampleContextFrameVector &Context) {
580     // Remove encapsulating '[' and ']' if any
581     ContextStr = ContextStr.substr(1, ContextStr.size() - 2);
582     StringRef ContextRemain = ContextStr;
583     StringRef ChildContext;
584     FunctionId Callee;
585     while (!ContextRemain.empty()) {
586       auto ContextSplit = ContextRemain.split(" @ ");
587       ChildContext = ContextSplit.first;
588       ContextRemain = ContextSplit.second;
589       LineLocation CallSiteLoc(0, 0);
590       decodeContextString(ChildContext, Callee, CallSiteLoc);
591       Context.emplace_back(Callee, CallSiteLoc);
592     }
593   }
594 
595   // Decode context string for a frame to get function name and location.
596   // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
597   static void decodeContextString(StringRef ContextStr,
598                                   FunctionId &Func,
599                                   LineLocation &LineLoc) {
600     // Get function name
601     auto EntrySplit = ContextStr.split(':');
602     Func = FunctionId(EntrySplit.first);
603 
604     LineLoc = {0, 0};
605     if (!EntrySplit.second.empty()) {
606       // Get line offset, use signed int for getAsInteger so string will
607       // be parsed as signed.
608       int LineOffset = 0;
609       auto LocSplit = EntrySplit.second.split('.');
610       LocSplit.first.getAsInteger(10, LineOffset);
611       LineLoc.LineOffset = LineOffset;
612 
613       // Get discriminator
614       if (!LocSplit.second.empty())
615         LocSplit.second.getAsInteger(10, LineLoc.Discriminator);
616     }
617   }
618 
619   operator SampleContextFrames() const { return FullContext; }
620   bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; }
621   void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; }
622   uint32_t getAllAttributes() { return Attributes; }
623   void setAllAttributes(uint32_t A) { Attributes = A; }
624   bool hasState(ContextStateMask S) { return State & (uint32_t)S; }
625   void setState(ContextStateMask S) { State |= (uint32_t)S; }
626   void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
627   bool hasContext() const { return State != UnknownContext; }
628   bool isBaseContext() const { return FullContext.size() == 1; }
629   FunctionId getFunction() const { return Func; }
630   SampleContextFrames getContextFrames() const { return FullContext; }
631 
632   static std::string getContextString(SampleContextFrames Context,
633                                       bool IncludeLeafLineLocation = false) {
634     std::ostringstream OContextStr;
635     for (uint32_t I = 0; I < Context.size(); I++) {
636       if (OContextStr.str().size()) {
637         OContextStr << " @ ";
638       }
639       OContextStr << Context[I].toString(I != Context.size() - 1 ||
640                                          IncludeLeafLineLocation);
641     }
642     return OContextStr.str();
643   }
644 
645   std::string toString() const {
646     if (!hasContext())
647       return Func.str();
648     return getContextString(FullContext, false);
649   }
650 
651   uint64_t getHashCode() const {
652     if (hasContext())
653       return hash_value(getContextFrames());
654     return getFunction().getHashCode();
655   }
656 
657   /// Set the name of the function and clear the current context.
658   void setFunction(FunctionId NewFunctionID) {
659     Func = NewFunctionID;
660     FullContext = SampleContextFrames();
661     State = UnknownContext;
662   }
663 
664   void setContext(SampleContextFrames Context,
665                   ContextStateMask CState = RawContext) {
666     assert(CState != UnknownContext);
667     FullContext = Context;
668     Func = Context.back().Func;
669     State = CState;
670   }
671 
672   bool operator==(const SampleContext &That) const {
673     return State == That.State && Func == That.Func &&
674            FullContext == That.FullContext;
675   }
676 
677   bool operator!=(const SampleContext &That) const { return !(*this == That); }
678 
679   bool operator<(const SampleContext &That) const {
680     if (State != That.State)
681       return State < That.State;
682 
683     if (!hasContext()) {
684       return Func < That.Func;
685     }
686 
687     uint64_t I = 0;
688     while (I < std::min(FullContext.size(), That.FullContext.size())) {
689       auto &Context1 = FullContext[I];
690       auto &Context2 = That.FullContext[I];
691       auto V = Context1.Func.compare(Context2.Func);
692       if (V)
693         return V < 0;
694       if (Context1.Location != Context2.Location)
695         return Context1.Location < Context2.Location;
696       I++;
697     }
698 
699     return FullContext.size() < That.FullContext.size();
700   }
701 
702   struct Hash {
703     uint64_t operator()(const SampleContext &Context) const {
704       return Context.getHashCode();
705     }
706   };
707 
708   bool isPrefixOf(const SampleContext &That) const {
709     auto ThisContext = FullContext;
710     auto ThatContext = That.FullContext;
711     if (ThatContext.size() < ThisContext.size())
712       return false;
713     ThatContext = ThatContext.take_front(ThisContext.size());
714     // Compare Leaf frame first
715     if (ThisContext.back().Func != ThatContext.back().Func)
716       return false;
717     // Compare leading context
718     return ThisContext.drop_back() == ThatContext.drop_back();
719   }
720 
721 private:
722   // The function associated with this context. If CS profile, this is the leaf
723   // function.
724   FunctionId Func;
725   // Full context including calling context and leaf function name
726   SampleContextFrames FullContext;
727   // State of the associated sample profile
728   uint32_t State;
729   // Attribute of the associated sample profile
730   uint32_t Attributes;
731 };
732 
733 static inline hash_code hash_value(const SampleContext &Context) {
734   return Context.getHashCode();
735 }
736 
737 inline raw_ostream &operator<<(raw_ostream &OS, const SampleContext &Context) {
738   return OS << Context.toString();
739 }
740 
741 class FunctionSamples;
742 class SampleProfileReaderItaniumRemapper;
743 
744 using BodySampleMap = std::map<LineLocation, SampleRecord>;
745 // NOTE: Using a StringMap here makes parsed profiles consume around 17% more
746 // memory, which is *very* significant for large profiles.
747 using FunctionSamplesMap = std::map<FunctionId, FunctionSamples>;
748 using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>;
749 using LocToLocMap =
750     std::unordered_map<LineLocation, LineLocation, LineLocationHash>;
751 
752 /// Representation of the samples collected for a function.
753 ///
754 /// This data structure contains all the collected samples for the body
755 /// of a function. Each sample corresponds to a LineLocation instance
756 /// within the body of the function.
757 class FunctionSamples {
758 public:
759   FunctionSamples() = default;
760 
761   LLVM_ABI void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const;
762   LLVM_ABI void dump() const;
763 
764   sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
765     bool Overflowed;
766     TotalSamples =
767         SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed);
768     return Overflowed ? sampleprof_error::counter_overflow
769                       : sampleprof_error::success;
770   }
771 
772   void removeTotalSamples(uint64_t Num) {
773     if (TotalSamples < Num)
774       TotalSamples = 0;
775     else
776       TotalSamples -= Num;
777   }
778 
779   void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
780 
781   void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; }
782 
783   sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
784     bool Overflowed;
785     TotalHeadSamples =
786         SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed);
787     return Overflowed ? sampleprof_error::counter_overflow
788                       : sampleprof_error::success;
789   }
790 
791   sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
792                                   uint64_t Num, uint64_t Weight = 1) {
793     return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(
794         Num, Weight);
795   }
796 
797   sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
798                                           uint32_t Discriminator,
799                                           FunctionId Func,
800                                           uint64_t Num,
801                                           uint64_t Weight = 1) {
802     return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
803         Func, Num, Weight);
804   }
805 
806   sampleprof_error addSampleRecord(LineLocation Location,
807                                    const SampleRecord &SampleRecord,
808                                    uint64_t Weight = 1) {
809     return BodySamples[Location].merge(SampleRecord, Weight);
810   }
811 
812   // Remove a call target and decrease the body sample correspondingly. Return
813   // the number of body samples actually decreased.
814   uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset,
815                                            uint32_t Discriminator,
816                                            FunctionId Func) {
817     uint64_t Count = 0;
818     auto I = BodySamples.find(LineLocation(LineOffset, Discriminator));
819     if (I != BodySamples.end()) {
820       Count = I->second.removeCalledTarget(Func);
821       Count = I->second.removeSamples(Count);
822       if (!I->second.getSamples())
823         BodySamples.erase(I);
824     }
825     return Count;
826   }
827 
828   // Remove all call site samples for inlinees. This is needed when flattening
829   // a nested profile.
830   void removeAllCallsiteSamples() {
831     CallsiteSamples.clear();
832   }
833 
834   // Accumulate all call target samples to update the body samples.
835   void updateCallsiteSamples() {
836     for (auto &I : BodySamples) {
837       uint64_t TargetSamples = I.second.getCallTargetSum();
838       // It's possible that the body sample count can be greater than the call
839       // target sum. E.g, if some call targets are external targets, they won't
840       // be considered valid call targets, but the body sample count which is
841       // from lbr ranges can actually include them.
842       if (TargetSamples > I.second.getSamples())
843         I.second.addSamples(TargetSamples - I.second.getSamples());
844     }
845   }
846 
847   // Accumulate all body samples to set total samples.
848   void updateTotalSamples() {
849     setTotalSamples(0);
850     for (const auto &I : BodySamples)
851       addTotalSamples(I.second.getSamples());
852 
853     for (auto &I : CallsiteSamples) {
854       for (auto &CS : I.second) {
855         CS.second.updateTotalSamples();
856         addTotalSamples(CS.second.getTotalSamples());
857       }
858     }
859   }
860 
861   // Set current context and all callee contexts to be synthetic.
862   void setContextSynthetic() {
863     Context.setState(SyntheticContext);
864     for (auto &I : CallsiteSamples) {
865       for (auto &CS : I.second) {
866         CS.second.setContextSynthetic();
867       }
868     }
869   }
870 
871   // Query the stale profile matching results and remap the location.
872   const LineLocation &mapIRLocToProfileLoc(const LineLocation &IRLoc) const {
873     // There is no remapping if the profile is not stale or the matching gives
874     // the same location.
875     if (!IRToProfileLocationMap)
876       return IRLoc;
877     const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
878     if (ProfileLoc != IRToProfileLocationMap->end())
879       return ProfileLoc->second;
880     return IRLoc;
881   }
882 
883   /// Return the number of samples collected at the given location.
884   /// Each location is specified by \p LineOffset and \p Discriminator.
885   /// If the location is not found in profile, return error.
886   ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
887                                   uint32_t Discriminator) const {
888     const auto &Ret = BodySamples.find(
889         mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator)));
890     if (Ret == BodySamples.end())
891       return std::error_code();
892     return Ret->second.getSamples();
893   }
894 
895   /// Returns the call target map collected at a given location.
896   /// Each location is specified by \p LineOffset and \p Discriminator.
897   /// If the location is not found in profile, return error.
898   ErrorOr<const SampleRecord::CallTargetMap &>
899   findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const {
900     const auto &Ret = BodySamples.find(
901         mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator)));
902     if (Ret == BodySamples.end())
903       return std::error_code();
904     return Ret->second.getCallTargets();
905   }
906 
907   /// Returns the call target map collected at a given location specified by \p
908   /// CallSite. If the location is not found in profile, return error.
909   ErrorOr<const SampleRecord::CallTargetMap &>
910   findCallTargetMapAt(const LineLocation &CallSite) const {
911     const auto &Ret = BodySamples.find(mapIRLocToProfileLoc(CallSite));
912     if (Ret == BodySamples.end())
913       return std::error_code();
914     return Ret->second.getCallTargets();
915   }
916 
917   /// Return the function samples at the given callsite location.
918   FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
919     return CallsiteSamples[mapIRLocToProfileLoc(Loc)];
920   }
921 
922   /// Returns the FunctionSamplesMap at the given \p Loc.
923   const FunctionSamplesMap *
924   findFunctionSamplesMapAt(const LineLocation &Loc) const {
925     auto Iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
926     if (Iter == CallsiteSamples.end())
927       return nullptr;
928     return &Iter->second;
929   }
930 
931   /// Returns a pointer to FunctionSamples at the given callsite location
932   /// \p Loc with callee \p CalleeName. If no callsite can be found, relax
933   /// the restriction to return the FunctionSamples at callsite location
934   /// \p Loc with the maximum total sample count. If \p Remapper or \p
935   /// FuncNameToProfNameMap is not nullptr, use them to find FunctionSamples
936   /// with equivalent name as \p CalleeName.
937   LLVM_ABI const FunctionSamples *findFunctionSamplesAt(
938       const LineLocation &Loc, StringRef CalleeName,
939       SampleProfileReaderItaniumRemapper *Remapper,
940       const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
941           *FuncNameToProfNameMap = nullptr) const;
942 
943   bool empty() const { return TotalSamples == 0; }
944 
945   /// Return the total number of samples collected inside the function.
946   uint64_t getTotalSamples() const { return TotalSamples; }
947 
948   /// For top-level functions, return the total number of branch samples that
949   /// have the function as the branch target (or 0 otherwise). This is the raw
950   /// data fetched from the profile. This should be equivalent to the sample of
951   /// the first instruction of the symbol. But as we directly get this info for
952   /// raw profile without referring to potentially inaccurate debug info, this
953   /// gives more accurate profile data and is preferred for standalone symbols.
954   uint64_t getHeadSamples() const { return TotalHeadSamples; }
955 
956   /// Return an estimate of the sample count of the function entry basic block.
957   /// The function can be either a standalone symbol or an inlined function.
958   /// For Context-Sensitive profiles, this will prefer returning the head
959   /// samples (i.e. getHeadSamples()), if non-zero. Otherwise it estimates from
960   /// the function body's samples or callsite samples.
961   uint64_t getHeadSamplesEstimate() const {
962     if (FunctionSamples::ProfileIsCS && getHeadSamples()) {
963       // For CS profile, if we already have more accurate head samples
964       // counted by branch sample from caller, use them as entry samples.
965       return getHeadSamples();
966     }
967     uint64_t Count = 0;
968     // Use either BodySamples or CallsiteSamples which ever has the smaller
969     // lineno.
970     if (!BodySamples.empty() &&
971         (CallsiteSamples.empty() ||
972          BodySamples.begin()->first < CallsiteSamples.begin()->first))
973       Count = BodySamples.begin()->second.getSamples();
974     else if (!CallsiteSamples.empty()) {
975       // An indirect callsite may be promoted to several inlined direct calls.
976       // We need to get the sum of them.
977       for (const auto &FuncSamples : CallsiteSamples.begin()->second)
978         Count += FuncSamples.second.getHeadSamplesEstimate();
979     }
980     // Return at least 1 if total sample is not 0.
981     return Count ? Count : TotalSamples > 0;
982   }
983 
984   /// Return all the samples collected in the body of the function.
985   const BodySampleMap &getBodySamples() const { return BodySamples; }
986 
987   /// Return all the callsite samples collected in the body of the function.
988   const CallsiteSampleMap &getCallsiteSamples() const {
989     return CallsiteSamples;
990   }
991 
992   /// Return the maximum of sample counts in a function body. When SkipCallSite
993   /// is false, which is the default, the return count includes samples in the
994   /// inlined functions. When SkipCallSite is true, the return count only
995   /// considers the body samples.
996   uint64_t getMaxCountInside(bool SkipCallSite = false) const {
997     uint64_t MaxCount = 0;
998     for (const auto &L : getBodySamples())
999       MaxCount = std::max(MaxCount, L.second.getSamples());
1000     if (SkipCallSite)
1001       return MaxCount;
1002     for (const auto &C : getCallsiteSamples())
1003       for (const FunctionSamplesMap::value_type &F : C.second)
1004         MaxCount = std::max(MaxCount, F.second.getMaxCountInside());
1005     return MaxCount;
1006   }
1007 
1008   /// Merge the samples in \p Other into this one.
1009   /// Optionally scale samples by \p Weight.
1010   sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
1011     sampleprof_error Result = sampleprof_error::success;
1012     if (!GUIDToFuncNameMap)
1013       GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
1014     if (Context.getFunction().empty())
1015       Context = Other.getContext();
1016     if (FunctionHash == 0) {
1017       // Set the function hash code for the target profile.
1018       FunctionHash = Other.getFunctionHash();
1019     } else if (FunctionHash != Other.getFunctionHash()) {
1020       // The two profiles coming with different valid hash codes indicates
1021       // either:
1022       // 1. They are same-named static functions from different compilation
1023       // units (without using -unique-internal-linkage-names), or
1024       // 2. They are really the same function but from different compilations.
1025       // Let's bail out in either case for now, which means one profile is
1026       // dropped.
1027       return sampleprof_error::hash_mismatch;
1028     }
1029 
1030     mergeSampleProfErrors(Result,
1031                           addTotalSamples(Other.getTotalSamples(), Weight));
1032     mergeSampleProfErrors(Result,
1033                           addHeadSamples(Other.getHeadSamples(), Weight));
1034     for (const auto &I : Other.getBodySamples()) {
1035       const LineLocation &Loc = I.first;
1036       const SampleRecord &Rec = I.second;
1037       mergeSampleProfErrors(Result, BodySamples[Loc].merge(Rec, Weight));
1038     }
1039     for (const auto &I : Other.getCallsiteSamples()) {
1040       const LineLocation &Loc = I.first;
1041       FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
1042       for (const auto &Rec : I.second)
1043         mergeSampleProfErrors(Result,
1044                               FSMap[Rec.first].merge(Rec.second, Weight));
1045     }
1046     return Result;
1047   }
1048 
1049   /// Recursively traverses all children, if the total sample count of the
1050   /// corresponding function is no less than \p Threshold, add its corresponding
1051   /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
1052   /// to \p S.
1053   void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S,
1054                             const HashKeyMap<std::unordered_map, FunctionId,
1055                                              Function *>  &SymbolMap,
1056                             uint64_t Threshold) const {
1057     if (TotalSamples <= Threshold)
1058       return;
1059     auto IsDeclaration = [](const Function *F) {
1060       return !F || F->isDeclaration();
1061     };
1062     if (IsDeclaration(SymbolMap.lookup(getFunction()))) {
1063       // Add to the import list only when it's defined out of module.
1064       S.insert(getGUID());
1065     }
1066     // Import hot CallTargets, which may not be available in IR because full
1067     // profile annotation cannot be done until backend compilation in ThinLTO.
1068     for (const auto &BS : BodySamples)
1069       for (const auto &TS : BS.second.getCallTargets())
1070         if (TS.second > Threshold) {
1071           const Function *Callee = SymbolMap.lookup(TS.first);
1072           if (IsDeclaration(Callee))
1073             S.insert(TS.first.getHashCode());
1074         }
1075     for (const auto &CS : CallsiteSamples)
1076       for (const auto &NameFS : CS.second)
1077         NameFS.second.findInlinedFunctions(S, SymbolMap, Threshold);
1078   }
1079 
1080   /// Set the name of the function.
1081   void setFunction(FunctionId NewFunctionID) {
1082     Context.setFunction(NewFunctionID);
1083   }
1084 
1085   /// Return the function name.
1086   FunctionId getFunction() const { return Context.getFunction(); }
1087 
1088   /// Return the original function name.
1089   StringRef getFuncName() const { return getFuncName(getFunction()); }
1090 
1091   void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; }
1092 
1093   uint64_t getFunctionHash() const { return FunctionHash; }
1094 
1095   void setIRToProfileLocationMap(const LocToLocMap *LTLM) {
1096     assert(IRToProfileLocationMap == nullptr && "this should be set only once");
1097     IRToProfileLocationMap = LTLM;
1098   }
1099 
1100   /// Return the canonical name for a function, taking into account
1101   /// suffix elision policy attributes.
1102   static StringRef getCanonicalFnName(const Function &F) {
1103     const char *AttrName = "sample-profile-suffix-elision-policy";
1104     auto Attr = F.getFnAttribute(AttrName).getValueAsString();
1105     return getCanonicalFnName(F.getName(), Attr);
1106   }
1107 
1108   /// Name suffixes which canonicalization should handle to avoid
1109   /// profile mismatch.
1110   static constexpr const char *LLVMSuffix = ".llvm.";
1111   static constexpr const char *PartSuffix = ".part.";
1112   static constexpr const char *UniqSuffix = ".__uniq.";
1113 
1114   static StringRef getCanonicalFnName(StringRef FnName,
1115                                       StringRef Attr = "selected") {
1116     // Note the sequence of the suffixes in the knownSuffixes array matters.
1117     // If suffix "A" is appended after the suffix "B", "A" should be in front
1118     // of "B" in knownSuffixes.
1119     const char *KnownSuffixes[] = {LLVMSuffix, PartSuffix, UniqSuffix};
1120     if (Attr == "" || Attr == "all")
1121       return FnName.split('.').first;
1122     if (Attr == "selected") {
1123       StringRef Cand(FnName);
1124       for (const auto &Suf : KnownSuffixes) {
1125         StringRef Suffix(Suf);
1126         // If the profile contains ".__uniq." suffix, don't strip the
1127         // suffix for names in the IR.
1128         if (Suffix == UniqSuffix && FunctionSamples::HasUniqSuffix)
1129           continue;
1130         auto It = Cand.rfind(Suffix);
1131         if (It == StringRef::npos)
1132           continue;
1133         auto Dit = Cand.rfind('.');
1134         if (Dit == It + Suffix.size() - 1)
1135           Cand = Cand.substr(0, It);
1136       }
1137       return Cand;
1138     }
1139     if (Attr == "none")
1140       return FnName;
1141     assert(false && "internal error: unknown suffix elision policy");
1142     return FnName;
1143   }
1144 
1145   /// Translate \p Func into its original name.
1146   /// When profile doesn't use MD5, \p Func needs no translation.
1147   /// When profile uses MD5, \p Func in current FunctionSamples
1148   /// is actually GUID of the original function name. getFuncName will
1149   /// translate \p Func in current FunctionSamples into its original name
1150   /// by looking up in the function map GUIDToFuncNameMap.
1151   /// If the original name doesn't exist in the map, return empty StringRef.
1152   StringRef getFuncName(FunctionId Func) const {
1153     if (!UseMD5)
1154       return Func.stringRef();
1155 
1156     assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first");
1157     return GUIDToFuncNameMap->lookup(Func.getHashCode());
1158   }
1159 
1160   /// Returns the line offset to the start line of the subprogram.
1161   /// We assume that a single function will not exceed 65535 LOC.
1162   LLVM_ABI static unsigned getOffset(const DILocation *DIL);
1163 
1164   /// Returns a unique call site identifier for a given debug location of a call
1165   /// instruction. This is wrapper of two scenarios, the probe-based profile and
1166   /// regular profile, to hide implementation details from the sample loader and
1167   /// the context tracker.
1168   LLVM_ABI static LineLocation getCallSiteIdentifier(const DILocation *DIL,
1169                                                      bool ProfileIsFS = false);
1170 
1171   /// Returns a unique hash code for a combination of a callsite location and
1172   /// the callee function name.
1173   /// Guarantee MD5 and non-MD5 representation of the same function results in
1174   /// the same hash.
1175   static uint64_t getCallSiteHash(FunctionId Callee,
1176                                   const LineLocation &Callsite) {
1177     return SampleContextFrame(Callee, Callsite).getHashCode();
1178   }
1179 
1180   /// Get the FunctionSamples of the inline instance where DIL originates
1181   /// from.
1182   ///
1183   /// The FunctionSamples of the instruction (Machine or IR) associated to
1184   /// \p DIL is the inlined instance in which that instruction is coming from.
1185   /// We traverse the inline stack of that instruction, and match it with the
1186   /// tree nodes in the profile.
1187   ///
1188   /// \returns the FunctionSamples pointer to the inlined instance.
1189   /// If \p Remapper or \p FuncNameToProfNameMap is not nullptr, it will be used
1190   /// to find matching FunctionSamples with not exactly the same but equivalent
1191   /// name.
1192   LLVM_ABI const FunctionSamples *findFunctionSamples(
1193       const DILocation *DIL,
1194       SampleProfileReaderItaniumRemapper *Remapper = nullptr,
1195       const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
1196           *FuncNameToProfNameMap = nullptr) const;
1197 
1198   LLVM_ABI static bool ProfileIsProbeBased;
1199 
1200   LLVM_ABI static bool ProfileIsCS;
1201 
1202   LLVM_ABI static bool ProfileIsPreInlined;
1203 
1204   SampleContext &getContext() const { return Context; }
1205 
1206   void setContext(const SampleContext &FContext) { Context = FContext; }
1207 
1208   /// Whether the profile uses MD5 to represent string.
1209   LLVM_ABI static bool UseMD5;
1210 
1211   /// Whether the profile contains any ".__uniq." suffix in a name.
1212   LLVM_ABI static bool HasUniqSuffix;
1213 
1214   /// If this profile uses flow sensitive discriminators.
1215   LLVM_ABI static bool ProfileIsFS;
1216 
1217   /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
1218   /// all the function symbols defined or declared in current module.
1219   DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr;
1220 
1221   /// Return the GUID of the context's name. If the context is already using
1222   /// MD5, don't hash it again.
1223   uint64_t getGUID() const {
1224     return getFunction().getHashCode();
1225   }
1226 
1227   // Find all the names in the current FunctionSamples including names in
1228   // all the inline instances and names of call targets.
1229   LLVM_ABI void findAllNames(DenseSet<FunctionId> &NameSet) const;
1230 
1231   bool operator==(const FunctionSamples &Other) const {
1232     return (GUIDToFuncNameMap == Other.GUIDToFuncNameMap ||
1233             (GUIDToFuncNameMap && Other.GUIDToFuncNameMap &&
1234              *GUIDToFuncNameMap == *Other.GUIDToFuncNameMap)) &&
1235            FunctionHash == Other.FunctionHash && Context == Other.Context &&
1236            TotalSamples == Other.TotalSamples &&
1237            TotalHeadSamples == Other.TotalHeadSamples &&
1238            BodySamples == Other.BodySamples &&
1239            CallsiteSamples == Other.CallsiteSamples;
1240   }
1241 
1242   bool operator!=(const FunctionSamples &Other) const {
1243     return !(*this == Other);
1244   }
1245 
1246 private:
1247   /// CFG hash value for the function.
1248   uint64_t FunctionHash = 0;
1249 
1250   /// Calling context for function profile
1251   mutable SampleContext Context;
1252 
1253   /// Total number of samples collected inside this function.
1254   ///
1255   /// Samples are cumulative, they include all the samples collected
1256   /// inside this function and all its inlined callees.
1257   uint64_t TotalSamples = 0;
1258 
1259   /// Total number of samples collected at the head of the function.
1260   /// This is an approximation of the number of calls made to this function
1261   /// at runtime.
1262   uint64_t TotalHeadSamples = 0;
1263 
1264   /// Map instruction locations to collected samples.
1265   ///
1266   /// Each entry in this map contains the number of samples
1267   /// collected at the corresponding line offset. All line locations
1268   /// are an offset from the start of the function.
1269   BodySampleMap BodySamples;
1270 
1271   /// Map call sites to collected samples for the called function.
1272   ///
1273   /// Each entry in this map corresponds to all the samples
1274   /// collected for the inlined function call at the given
1275   /// location. For example, given:
1276   ///
1277   ///     void foo() {
1278   ///  1    bar();
1279   ///  ...
1280   ///  8    baz();
1281   ///     }
1282   ///
1283   /// If the bar() and baz() calls were inlined inside foo(), this
1284   /// map will contain two entries.  One for all the samples collected
1285   /// in the call to bar() at line offset 1, the other for all the samples
1286   /// collected in the call to baz() at line offset 8.
1287   CallsiteSampleMap CallsiteSamples;
1288 
1289   /// IR to profile location map generated by stale profile matching.
1290   ///
1291   /// Each entry is a mapping from the location on current build to the matched
1292   /// location in the "stale" profile. For example:
1293   ///   Profiled source code:
1294   ///      void foo() {
1295   ///   1    bar();
1296   ///      }
1297   ///
1298   ///   Current source code:
1299   ///      void foo() {
1300   ///   1    // Code change
1301   ///   2    bar();
1302   ///      }
1303   /// Supposing the stale profile matching algorithm generated the mapping [2 ->
1304   /// 1], the profile query using the location of bar on the IR which is 2 will
1305   /// be remapped to 1 and find the location of bar in the profile.
1306   const LocToLocMap *IRToProfileLocationMap = nullptr;
1307 };
1308 
1309 /// Get the proper representation of a string according to whether the
1310 /// current Format uses MD5 to represent the string.
1311 static inline FunctionId getRepInFormat(StringRef Name) {
1312   if (Name.empty() || !FunctionSamples::UseMD5)
1313     return FunctionId(Name);
1314   return FunctionId(Function::getGUIDAssumingExternalLinkage(Name));
1315 }
1316 
1317 LLVM_ABI raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
1318 
1319 /// This class provides operator overloads to the map container using MD5 as the
1320 /// key type, so that existing code can still work in most cases using
1321 /// SampleContext as key.
1322 /// Note: when populating container, make sure to assign the SampleContext to
1323 /// the mapped value immediately because the key no longer holds it.
1324 class SampleProfileMap
1325     : public HashKeyMap<std::unordered_map, SampleContext, FunctionSamples> {
1326 public:
1327   // Convenience method because this is being used in many places. Set the
1328   // FunctionSamples' context if its newly inserted.
1329   mapped_type &create(const SampleContext &Ctx) {
1330     auto Ret = try_emplace(Ctx, FunctionSamples());
1331     if (Ret.second)
1332       Ret.first->second.setContext(Ctx);
1333     return Ret.first->second;
1334   }
1335 
1336   iterator find(const SampleContext &Ctx) {
1337     return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>::find(
1338         Ctx);
1339   }
1340 
1341   const_iterator find(const SampleContext &Ctx) const {
1342     return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>::find(
1343         Ctx);
1344   }
1345 
1346   size_t erase(const SampleContext &Ctx) {
1347     return HashKeyMap<std::unordered_map, SampleContext, FunctionSamples>::
1348         erase(Ctx);
1349   }
1350 
1351   size_t erase(const key_type &Key) { return base_type::erase(Key); }
1352 
1353   iterator erase(iterator It) { return base_type::erase(It); }
1354 };
1355 
1356 using NameFunctionSamples = std::pair<hash_code, const FunctionSamples *>;
1357 
1358 LLVM_ABI void
1359 sortFuncProfiles(const SampleProfileMap &ProfileMap,
1360                  std::vector<NameFunctionSamples> &SortedProfiles);
1361 
1362 /// Sort a LocationT->SampleT map by LocationT.
1363 ///
1364 /// It produces a sorted list of <LocationT, SampleT> records by ascending
1365 /// order of LocationT.
1366 template <class LocationT, class SampleT> class SampleSorter {
1367 public:
1368   using SamplesWithLoc = std::pair<const LocationT, SampleT>;
1369   using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>;
1370 
1371   SampleSorter(const std::map<LocationT, SampleT> &Samples) {
1372     for (const auto &I : Samples)
1373       V.push_back(&I);
1374     llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
1375       return A->first < B->first;
1376     });
1377   }
1378 
1379   const SamplesWithLocList &get() const { return V; }
1380 
1381 private:
1382   SamplesWithLocList V;
1383 };
1384 
1385 /// SampleContextTrimmer impelements helper functions to trim, merge cold
1386 /// context profiles. It also supports context profile canonicalization to make
1387 /// sure ProfileMap's key is consistent with FunctionSample's name/context.
1388 class SampleContextTrimmer {
1389 public:
1390   SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){};
1391   // Trim and merge cold context profile when requested. TrimBaseProfileOnly
1392   // should only be effective when TrimColdContext is true. On top of
1393   // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all
1394   // cold profiles or only cold base profiles. Trimming base profiles only is
1395   // mainly to honor the preinliner decsion. Note that when MergeColdContext is
1396   // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will
1397   // be ignored.
1398   LLVM_ABI void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
1399                                                 bool TrimColdContext,
1400                                                 bool MergeColdContext,
1401                                                 uint32_t ColdContextFrameLength,
1402                                                 bool TrimBaseProfileOnly);
1403 
1404 private:
1405   SampleProfileMap &ProfileMap;
1406 };
1407 
1408 /// Helper class for profile conversion.
1409 ///
1410 /// It supports full context-sensitive profile to nested profile conversion,
1411 /// nested profile to flatten profile conversion, etc.
1412 class ProfileConverter {
1413 public:
1414   LLVM_ABI ProfileConverter(SampleProfileMap &Profiles);
1415   // Convert a full context-sensitive flat sample profile into a nested sample
1416   // profile.
1417   LLVM_ABI void convertCSProfiles();
1418   struct FrameNode {
1419     FrameNode(FunctionId FName = FunctionId(),
1420               FunctionSamples *FSamples = nullptr,
1421               LineLocation CallLoc = {0, 0})
1422         : FuncName(FName), FuncSamples(FSamples), CallSiteLoc(CallLoc){};
1423 
1424     // Map line+discriminator location to child frame
1425     std::map<uint64_t, FrameNode> AllChildFrames;
1426     // Function name for current frame
1427     FunctionId FuncName;
1428     // Function Samples for current frame
1429     FunctionSamples *FuncSamples;
1430     // Callsite location in parent context
1431     LineLocation CallSiteLoc;
1432 
1433     LLVM_ABI FrameNode *getOrCreateChildFrame(const LineLocation &CallSite,
1434                                               FunctionId CalleeName);
1435   };
1436 
1437   static void flattenProfile(SampleProfileMap &ProfileMap,
1438                              bool ProfileIsCS = false) {
1439     SampleProfileMap TmpProfiles;
1440     flattenProfile(ProfileMap, TmpProfiles, ProfileIsCS);
1441     ProfileMap = std::move(TmpProfiles);
1442   }
1443 
1444   static void flattenProfile(const SampleProfileMap &InputProfiles,
1445                              SampleProfileMap &OutputProfiles,
1446                              bool ProfileIsCS = false) {
1447     if (ProfileIsCS) {
1448       for (const auto &I : InputProfiles) {
1449         // Retain the profile name and clear the full context for each function
1450         // profile.
1451         FunctionSamples &FS = OutputProfiles.create(I.second.getFunction());
1452         FS.merge(I.second);
1453       }
1454     } else {
1455       for (const auto &I : InputProfiles)
1456         flattenNestedProfile(OutputProfiles, I.second);
1457     }
1458   }
1459 
1460 private:
1461   static void flattenNestedProfile(SampleProfileMap &OutputProfiles,
1462                                    const FunctionSamples &FS) {
1463     // To retain the context, checksum, attributes of the original profile, make
1464     // a copy of it if no profile is found.
1465     SampleContext &Context = FS.getContext();
1466     auto Ret = OutputProfiles.try_emplace(Context, FS);
1467     FunctionSamples &Profile = Ret.first->second;
1468     if (Ret.second) {
1469       // Clear nested inlinees' samples for the flattened copy. These inlinees
1470       // will have their own top-level entries after flattening.
1471       Profile.removeAllCallsiteSamples();
1472       // We recompute TotalSamples later, so here set to zero.
1473       Profile.setTotalSamples(0);
1474     } else {
1475       for (const auto &[LineLocation, SampleRecord] : FS.getBodySamples()) {
1476         Profile.addSampleRecord(LineLocation, SampleRecord);
1477       }
1478     }
1479 
1480     assert(Profile.getCallsiteSamples().empty() &&
1481            "There should be no inlinees' profiles after flattening.");
1482 
1483     // TotalSamples might not be equal to the sum of all samples from
1484     // BodySamples and CallsiteSamples. So here we use "TotalSamples =
1485     // Original_TotalSamples - All_of_Callsite_TotalSamples +
1486     // All_of_Callsite_HeadSamples" to compute the new TotalSamples.
1487     uint64_t TotalSamples = FS.getTotalSamples();
1488 
1489     for (const auto &I : FS.getCallsiteSamples()) {
1490       for (const auto &Callee : I.second) {
1491         const auto &CalleeProfile = Callee.second;
1492         // Add body sample.
1493         Profile.addBodySamples(I.first.LineOffset, I.first.Discriminator,
1494                                CalleeProfile.getHeadSamplesEstimate());
1495         // Add callsite sample.
1496         Profile.addCalledTargetSamples(
1497             I.first.LineOffset, I.first.Discriminator,
1498             CalleeProfile.getFunction(),
1499             CalleeProfile.getHeadSamplesEstimate());
1500         // Update total samples.
1501         TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples()
1502                            ? TotalSamples - CalleeProfile.getTotalSamples()
1503                            : 0;
1504         TotalSamples += CalleeProfile.getHeadSamplesEstimate();
1505         // Recursively convert callee profile.
1506         flattenNestedProfile(OutputProfiles, CalleeProfile);
1507       }
1508     }
1509     Profile.addTotalSamples(TotalSamples);
1510 
1511     Profile.setHeadSamples(Profile.getHeadSamplesEstimate());
1512   }
1513 
1514   // Nest all children profiles into the profile of Node.
1515   void convertCSProfiles(FrameNode &Node);
1516   FrameNode *getOrCreateContextPath(const SampleContext &Context);
1517 
1518   SampleProfileMap &ProfileMap;
1519   FrameNode RootFrame;
1520 };
1521 
1522 /// ProfileSymbolList records the list of function symbols shown up
1523 /// in the binary used to generate the profile. It is useful to
1524 /// to discriminate a function being so cold as not to shown up
1525 /// in the profile and a function newly added.
1526 class ProfileSymbolList {
1527 public:
1528   /// copy indicates whether we need to copy the underlying memory
1529   /// for the input Name.
1530   void add(StringRef Name, bool Copy = false) {
1531     if (!Copy) {
1532       Syms.insert(Name);
1533       return;
1534     }
1535     Syms.insert(Name.copy(Allocator));
1536   }
1537 
1538   bool contains(StringRef Name) { return Syms.count(Name); }
1539 
1540   void merge(const ProfileSymbolList &List) {
1541     for (auto Sym : List.Syms)
1542       add(Sym, true);
1543   }
1544 
1545   unsigned size() { return Syms.size(); }
1546 
1547   void setToCompress(bool TC) { ToCompress = TC; }
1548   bool toCompress() { return ToCompress; }
1549 
1550   LLVM_ABI std::error_code read(const uint8_t *Data, uint64_t ListSize);
1551   LLVM_ABI std::error_code write(raw_ostream &OS);
1552   LLVM_ABI void dump(raw_ostream &OS = dbgs()) const;
1553 
1554 private:
1555   // Determine whether or not to compress the symbol list when
1556   // writing it into profile. The variable is unused when the symbol
1557   // list is read from an existing profile.
1558   bool ToCompress = false;
1559   DenseSet<StringRef> Syms;
1560   BumpPtrAllocator Allocator;
1561 };
1562 
1563 } // end namespace sampleprof
1564 
1565 using namespace sampleprof;
1566 // Provide DenseMapInfo for SampleContext.
1567 template <> struct DenseMapInfo<SampleContext> {
1568   static inline SampleContext getEmptyKey() { return SampleContext(); }
1569 
1570   static inline SampleContext getTombstoneKey() {
1571     return SampleContext(FunctionId(~1ULL));
1572   }
1573 
1574   static unsigned getHashValue(const SampleContext &Val) {
1575     return Val.getHashCode();
1576   }
1577 
1578   static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) {
1579     return LHS == RHS;
1580   }
1581 };
1582 
1583 // Prepend "__uniq" before the hash for tools like profilers to understand
1584 // that this symbol is of internal linkage type.  The "__uniq" is the
1585 // pre-determined prefix that is used to tell tools that this symbol was
1586 // created with -funique-internal-linkage-symbols and the tools can strip or
1587 // keep the prefix as needed.
1588 inline std::string getUniqueInternalLinkagePostfix(const StringRef &FName) {
1589   llvm::MD5 Md5;
1590   Md5.update(FName);
1591   llvm::MD5::MD5Result R;
1592   Md5.final(R);
1593   SmallString<32> Str;
1594   llvm::MD5::stringifyResult(R, Str);
1595   // Convert MD5hash to Decimal. Demangler suffixes can either contain
1596   // numbers or characters but not both.
1597   llvm::APInt IntHash(128, Str.str(), 16);
1598   return toString(IntHash, /* Radix = */ 10, /* Signed = */ false)
1599       .insert(0, FunctionSamples::UniqSuffix);
1600 }
1601 
1602 } // end namespace llvm
1603 
1604 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H
1605