xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ProfileData/DataAccessProf.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- DataAccessProf.h - Data access profile format support ---------*- C++
2 //-*-===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support to construct and use data access profiles.
11 //
12 // For the original RFC of this pass please see
13 // https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_PROFILEDATA_DATAACCESSPROF_H_
18 #define LLVM_PROFILEDATA_DATAACCESSPROF_H_
19 
20 #include "llvm/ADT/DenseMapInfoVariant.h"
21 #include "llvm/ADT/MapVector.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ProfileData/InstrProf.h"
26 #include "llvm/Support/Allocator.h"
27 #include "llvm/Support/Compiler.h"
28 #include "llvm/Support/Error.h"
29 #include "llvm/Support/StringSaver.h"
30 
31 #include <cstdint>
32 #include <optional>
33 #include <variant>
34 
35 namespace llvm {
36 
37 namespace memprof {
38 
39 /// The location of data in the source code. Used by profile lookup API.
40 struct SourceLocation {
SourceLocationSourceLocation41   SourceLocation(StringRef FileNameRef, uint32_t Line)
42       : FileName(FileNameRef.str()), Line(Line) {}
43 
44   // Empty constructor is used in yaml conversion.
SourceLocationSourceLocation45   SourceLocation() {}
46   /// The filename where the data is located.
47   std::string FileName;
48   /// The line number in the source code.
49   uint32_t Line;
50 };
51 
52 namespace internal {
53 
54 // Conceptually similar to SourceLocation except that FileNames are StringRef of
55 // which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
56 // to represent data locations internally.
57 struct SourceLocationRef {
SourceLocationRefSourceLocationRef58   SourceLocationRef(StringRef FileNameRef, uint32_t Line)
59       : FileName(FileNameRef), Line(Line) {}
60   // The filename where the data is located.
61   StringRef FileName;
62   // The line number in the source code.
63   uint32_t Line;
64 };
65 
66 // The data access profiles for a symbol. Used by `DataAccessProfData`
67 // to represent records internally.
68 struct DataAccessProfRecordRef {
DataAccessProfRecordRefDataAccessProfRecordRef69   DataAccessProfRecordRef(uint64_t SymbolID, uint64_t AccessCount,
70                           bool IsStringLiteral)
71       : SymbolID(SymbolID), AccessCount(AccessCount),
72         IsStringLiteral(IsStringLiteral) {}
73 
74   // Represents a data symbol. The semantic comes in two forms: a symbol index
75   // for symbol name if `IsStringLiteral` is false, or the hash of a string
76   // content if `IsStringLiteral` is true. For most of the symbolizable static
77   // data, the mangled symbol names remain stable relative to the source code
78   // and therefore used to identify symbols across binary releases. String
79   // literals have unstable name patterns like `.str.N[.llvm.hash]`, so we use
80   // the content hash instead. This is a required field.
81   uint64_t SymbolID;
82 
83   // The access count of symbol. Required.
84   uint64_t AccessCount;
85 
86   // True iff this is a record for string literal (symbols with name pattern
87   // `.str.*` in the symbol table). Required.
88   bool IsStringLiteral;
89 
90   // The locations of data in the source code. Optional.
91   llvm::SmallVector<SourceLocationRef, 0> Locations;
92 };
93 } // namespace internal
94 
95 // SymbolID is either a string representing symbol name if the symbol has
96 // stable mangled name relative to source code, or a uint64_t representing the
97 // content hash of a string literal (with unstable name patterns like
98 // `.str.N[.llvm.hash]`). The StringRef is owned by the class's saver object.
99 using SymbolHandleRef = std::variant<StringRef, uint64_t>;
100 
101 // The senamtic is the same as `SymbolHandleRef` above. The strings are owned.
102 using SymbolHandle = std::variant<std::string, uint64_t>;
103 
104 /// The data access profiles for a symbol.
105 struct DataAccessProfRecord {
106 public:
DataAccessProfRecordDataAccessProfRecord107   DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount,
108                        ArrayRef<internal::SourceLocationRef> LocRefs)
109       : AccessCount(AccessCount) {
110     if (std::holds_alternative<StringRef>(SymHandleRef)) {
111       SymHandle = std::get<StringRef>(SymHandleRef).str();
112     } else
113       SymHandle = std::get<uint64_t>(SymHandleRef);
114 
115     for (auto Loc : LocRefs)
116       Locations.emplace_back(Loc.FileName, Loc.Line);
117   }
118   // Empty constructor is used in yaml conversion.
DataAccessProfRecordDataAccessProfRecord119   DataAccessProfRecord() : AccessCount(0) {}
120   SymbolHandle SymHandle;
121   uint64_t AccessCount;
122   // The locations of data in the source code. Optional.
123   SmallVector<SourceLocation> Locations;
124 };
125 
126 /// Encapsulates the data access profile data and the methods to operate on
127 /// it. This class provides profile look-up, serialization and
128 /// deserialization.
129 class DataAccessProfData {
130 public:
131   // Use MapVector to keep input order of strings for serialization and
132   // deserialization.
133   using StringToIndexMap = llvm::MapVector<StringRef, uint64_t>;
134 
DataAccessProfData()135   DataAccessProfData() : Saver(Allocator) {}
136 
137   /// Serialize profile data to the output stream.
138   /// Storage layout:
139   /// - Serialized strings.
140   /// - The encoded hashes.
141   /// - Records.
142   LLVM_ABI Error serialize(ProfOStream &OS) const;
143 
144   /// Deserialize this class from the given buffer.
145   LLVM_ABI Error deserialize(const unsigned char *&Ptr);
146 
147   /// Returns a profile record for \p SymbolID, or std::nullopt if there
148   /// isn't a record. Internally, this function will canonicalize the symbol
149   /// name before the lookup.
150   LLVM_ABI std::optional<DataAccessProfRecord>
151   getProfileRecord(const SymbolHandleRef SymID) const;
152 
153   /// Returns true if \p SymID is seen in profiled binaries and cold.
154   LLVM_ABI bool isKnownColdSymbol(const SymbolHandleRef SymID) const;
155 
156   /// Methods to set symbolized data access profile. Returns error if
157   /// duplicated symbol names or content hashes are seen. The user of this
158   /// class should aggregate counters that correspond to the same symbol name
159   /// or with the same string literal hash before calling 'set*' methods.
160   LLVM_ABI Error setDataAccessProfile(SymbolHandleRef SymbolID,
161                                       uint64_t AccessCount);
162   /// Similar to the method above, for records with \p Locations representing
163   /// the `filename:line` where this symbol shows up. Note because of linker's
164   /// merge of identical symbols (e.g., unnamed_addr string literals), one
165   /// symbol is likely to have multiple locations.
166   LLVM_ABI Error setDataAccessProfile(SymbolHandleRef SymbolID,
167                                       uint64_t AccessCount,
168                                       ArrayRef<SourceLocation> Locations);
169   /// Add a symbol that's seen in the profiled binary without samples.
170   LLVM_ABI Error addKnownSymbolWithoutSamples(SymbolHandleRef SymbolID);
171 
172   /// The following methods return array reference for various internal data
173   /// structures.
getStrToIndexMapRef()174   ArrayRef<StringToIndexMap::value_type> getStrToIndexMapRef() const {
175     return StrToIndexMap.getArrayRef();
176   }
177   ArrayRef<
178       MapVector<SymbolHandleRef, internal::DataAccessProfRecordRef>::value_type>
getRecords()179   getRecords() const {
180     return Records.getArrayRef();
181   }
getKnownColdSymbols()182   ArrayRef<StringRef> getKnownColdSymbols() const {
183     return KnownColdSymbols.getArrayRef();
184   }
getKnownColdHashes()185   ArrayRef<uint64_t> getKnownColdHashes() const {
186     return KnownColdHashes.getArrayRef();
187   }
188 
189 private:
190   /// Serialize the symbol strings into the output stream.
191   Error serializeSymbolsAndFilenames(ProfOStream &OS) const;
192 
193   /// Deserialize the symbol strings from \p Ptr and increment \p Ptr to the
194   /// start of the next payload.
195   Error deserializeSymbolsAndFilenames(const unsigned char *&Ptr,
196                                        const uint64_t NumSampledSymbols,
197                                        const uint64_t NumColdKnownSymbols);
198 
199   /// Decode the records and increment \p Ptr to the start of the next
200   /// payload.
201   Error deserializeRecords(const unsigned char *&Ptr);
202 
203   /// A helper function to compute a storage index for \p SymbolID.
204   uint64_t getEncodedIndex(const SymbolHandleRef SymbolID) const;
205 
206   // Keeps owned copies of the input strings.
207   // NOTE: Keep `Saver` initialized before other class members that reference
208   // its string copies and destructed after they are destructed.
209   llvm::BumpPtrAllocator Allocator;
210   llvm::UniqueStringSaver Saver;
211 
212   // `Records` stores the records.
213   MapVector<SymbolHandleRef, internal::DataAccessProfRecordRef> Records;
214 
215   StringToIndexMap StrToIndexMap;
216   llvm::SetVector<uint64_t> KnownColdHashes;
217   llvm::SetVector<StringRef> KnownColdSymbols;
218 };
219 
220 } // namespace memprof
221 } // namespace llvm
222 
223 #endif // LLVM_PROFILEDATA_DATAACCESSPROF_H_
224