xref: /freebsd/contrib/llvm-project/llvm/include/llvm/ProfileData/MemProfRadixTree.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- MemProfRadixTree.h - MemProf format support ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // A custom Radix Tree builder for memprof data to optimize for space.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_PROFILEDATA_MEMPROFRADIXTREE_H
14 #define LLVM_PROFILEDATA_MEMPROFRADIXTREE_H
15 
16 #include "llvm/ProfileData/IndexedMemProfData.h"
17 #include "llvm/ProfileData/MemProf.h"
18 #include "llvm/Support/Compiler.h"
19 
20 namespace llvm {
21 namespace memprof {
22 namespace detail {
23 // "Dereference" the iterator from DenseMap or OnDiskChainedHashTable.  We have
24 // to do so in one of two different ways depending on the type of the hash
25 // table.
26 template <typename value_type, typename IterTy>
DerefIterator(IterTy Iter)27 value_type DerefIterator(IterTy Iter) {
28   using deref_type = llvm::remove_cvref_t<decltype(*Iter)>;
29   if constexpr (std::is_same_v<deref_type, value_type>)
30     return *Iter;
31   else
32     return Iter->second;
33 }
34 } // namespace detail
35 
36 // A function object that returns a frame for a given FrameId.
37 template <typename MapTy> struct FrameIdConverter {
38   std::optional<FrameId> LastUnmappedId;
39   MapTy &Map;
40 
41   FrameIdConverter() = delete;
FrameIdConverterFrameIdConverter42   FrameIdConverter(MapTy &Map) : Map(Map) {}
43 
44   // Delete the copy constructor and copy assignment operator to avoid a
45   // situation where a copy of FrameIdConverter gets an error in LastUnmappedId
46   // while the original instance doesn't.
47   FrameIdConverter(const FrameIdConverter &) = delete;
48   FrameIdConverter &operator=(const FrameIdConverter &) = delete;
49 
operatorFrameIdConverter50   Frame operator()(FrameId Id) {
51     auto Iter = Map.find(Id);
52     if (Iter == Map.end()) {
53       LastUnmappedId = Id;
54       return Frame();
55     }
56     return detail::DerefIterator<Frame>(Iter);
57   }
58 };
59 
60 // A function object that returns a call stack for a given CallStackId.
61 template <typename MapTy> struct CallStackIdConverter {
62   std::optional<CallStackId> LastUnmappedId;
63   MapTy &Map;
64   llvm::function_ref<Frame(FrameId)> FrameIdToFrame;
65 
66   CallStackIdConverter() = delete;
CallStackIdConverterCallStackIdConverter67   CallStackIdConverter(MapTy &Map,
68                        llvm::function_ref<Frame(FrameId)> FrameIdToFrame)
69       : Map(Map), FrameIdToFrame(FrameIdToFrame) {}
70 
71   // Delete the copy constructor and copy assignment operator to avoid a
72   // situation where a copy of CallStackIdConverter gets an error in
73   // LastUnmappedId while the original instance doesn't.
74   CallStackIdConverter(const CallStackIdConverter &) = delete;
75   CallStackIdConverter &operator=(const CallStackIdConverter &) = delete;
76 
operatorCallStackIdConverter77   std::vector<Frame> operator()(CallStackId CSId) {
78     std::vector<Frame> Frames;
79     auto CSIter = Map.find(CSId);
80     if (CSIter == Map.end()) {
81       LastUnmappedId = CSId;
82     } else {
83       llvm::SmallVector<FrameId> CS =
84           detail::DerefIterator<llvm::SmallVector<FrameId>>(CSIter);
85       Frames.reserve(CS.size());
86       for (FrameId Id : CS)
87         Frames.push_back(FrameIdToFrame(Id));
88     }
89     return Frames;
90   }
91 };
92 
93 // A function object that returns a Frame stored at a given index into the Frame
94 // array in the profile.
95 struct LinearFrameIdConverter {
96   const unsigned char *FrameBase;
97 
98   LinearFrameIdConverter() = delete;
LinearFrameIdConverterLinearFrameIdConverter99   LinearFrameIdConverter(const unsigned char *FrameBase)
100       : FrameBase(FrameBase) {}
101 
operatorLinearFrameIdConverter102   Frame operator()(LinearFrameId LinearId) {
103     uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize();
104     return Frame::deserialize(FrameBase + Offset);
105   }
106 };
107 
108 // A function object that returns a call stack stored at a given index into the
109 // call stack array in the profile.
110 struct LinearCallStackIdConverter {
111   const unsigned char *CallStackBase;
112   llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame;
113 
114   LinearCallStackIdConverter() = delete;
LinearCallStackIdConverterLinearCallStackIdConverter115   LinearCallStackIdConverter(
116       const unsigned char *CallStackBase,
117       llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame)
118       : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {}
119 
operatorLinearCallStackIdConverter120   std::vector<Frame> operator()(LinearCallStackId LinearCSId) {
121     std::vector<Frame> Frames;
122 
123     const unsigned char *Ptr =
124         CallStackBase +
125         static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
126     uint32_t NumFrames =
127         support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
128     Frames.reserve(NumFrames);
129     for (; NumFrames; --NumFrames) {
130       LinearFrameId Elem =
131           support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
132       // Follow a pointer to the parent, if any.  See comments below on
133       // CallStackRadixTreeBuilder for the description of the radix tree format.
134       if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
135         Ptr += (-Elem) * sizeof(LinearFrameId);
136         Elem =
137             support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
138       }
139       // We shouldn't encounter another pointer.
140       assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
141       Frames.push_back(FrameIdToFrame(Elem));
142       Ptr += sizeof(LinearFrameId);
143     }
144 
145     return Frames;
146   }
147 };
148 
149 // Used to extract caller-callee pairs from the call stack array.  The leaf
150 // frame is assumed to call a heap allocation function with GUID 0.  The
151 // resulting pairs are accumulated in CallerCalleePairs.  Users can take it
152 // with:
153 //
154 //   auto Pairs = std::move(Extractor.CallerCalleePairs);
155 struct CallerCalleePairExtractor {
156   // The base address of the radix tree array.
157   const unsigned char *CallStackBase;
158   // A functor to convert a linear FrameId to a Frame.
159   llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame;
160   // A map from caller GUIDs to lists of call sites in respective callers.
161   DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> CallerCalleePairs;
162 
163   // The set of linear call stack IDs that we've visited.
164   BitVector Visited;
165 
166   CallerCalleePairExtractor() = delete;
CallerCalleePairExtractorCallerCalleePairExtractor167   CallerCalleePairExtractor(
168       const unsigned char *CallStackBase,
169       llvm::function_ref<Frame(LinearFrameId)> FrameIdToFrame,
170       unsigned RadixTreeSize)
171       : CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame),
172         Visited(RadixTreeSize) {}
173 
operatorCallerCalleePairExtractor174   void operator()(LinearCallStackId LinearCSId) {
175     const unsigned char *Ptr =
176         CallStackBase +
177         static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
178     uint32_t NumFrames =
179         support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
180     // The leaf frame calls a function with GUID 0.
181     uint64_t CalleeGUID = 0;
182     for (; NumFrames; --NumFrames) {
183       LinearFrameId Elem =
184           support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
185       // Follow a pointer to the parent, if any.  See comments below on
186       // CallStackRadixTreeBuilder for the description of the radix tree format.
187       if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
188         Ptr += (-Elem) * sizeof(LinearFrameId);
189         Elem =
190             support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
191       }
192       // We shouldn't encounter another pointer.
193       assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
194 
195       // Add a new caller-callee pair.
196       Frame F = FrameIdToFrame(Elem);
197       uint64_t CallerGUID = F.Function;
198       LineLocation Loc(F.LineOffset, F.Column);
199       CallerCalleePairs[CallerGUID].emplace_back(Loc, CalleeGUID);
200 
201       // Keep track of the indices we've visited.  If we've already visited the
202       // current one, terminate the traversal.  We will not discover any new
203       // caller-callee pair by continuing the traversal.
204       unsigned Offset =
205           std::distance(CallStackBase, Ptr) / sizeof(LinearFrameId);
206       if (Visited.test(Offset))
207         break;
208       Visited.set(Offset);
209 
210       Ptr += sizeof(LinearFrameId);
211       CalleeGUID = CallerGUID;
212     }
213   }
214 };
215 
216 // A convenience wrapper around FrameIdConverter and CallStackIdConverter for
217 // tests.
218 struct IndexedCallstackIdConverter {
219   IndexedCallstackIdConverter() = delete;
IndexedCallstackIdConverterIndexedCallstackIdConverter220   IndexedCallstackIdConverter(IndexedMemProfData &MemProfData)
221       : FrameIdConv(MemProfData.Frames),
222         CSIdConv(MemProfData.CallStacks, FrameIdConv) {}
223 
224   // Delete the copy constructor and copy assignment operator to avoid a
225   // situation where a copy of IndexedCallstackIdConverter gets an error in
226   // LastUnmappedId while the original instance doesn't.
227   IndexedCallstackIdConverter(const IndexedCallstackIdConverter &) = delete;
228   IndexedCallstackIdConverter &
229   operator=(const IndexedCallstackIdConverter &) = delete;
230 
operatorIndexedCallstackIdConverter231   std::vector<Frame> operator()(CallStackId CSId) { return CSIdConv(CSId); }
232 
233   FrameIdConverter<decltype(IndexedMemProfData::Frames)> FrameIdConv;
234   CallStackIdConverter<decltype(IndexedMemProfData::CallStacks)> CSIdConv;
235 };
236 
237 struct FrameStat {
238   // The number of occurrences of a given FrameId.
239   uint64_t Count = 0;
240   // The sum of indexes where a given FrameId shows up.
241   uint64_t PositionSum = 0;
242 };
243 
244 // Compute a histogram of Frames in call stacks.
245 template <typename FrameIdTy>
246 llvm::DenseMap<FrameIdTy, FrameStat>
247 computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
248                           &MemProfCallStackData);
249 
250 // Construct a radix tree of call stacks.
251 //
252 // A set of call stacks might look like:
253 //
254 // CallStackId 1:  f1 -> f2 -> f3
255 // CallStackId 2:  f1 -> f2 -> f4 -> f5
256 // CallStackId 3:  f1 -> f2 -> f4 -> f6
257 // CallStackId 4:  f7 -> f8 -> f9
258 //
259 // where each fn refers to a stack frame.
260 //
261 // Since we expect a lot of common prefixes, we can compress the call stacks
262 // into a radix tree like:
263 //
264 // CallStackId 1:  f1 -> f2 -> f3
265 //                       |
266 // CallStackId 2:        +---> f4 -> f5
267 //                             |
268 // CallStackId 3:              +---> f6
269 //
270 // CallStackId 4:  f7 -> f8 -> f9
271 //
272 // Now, we are interested in retrieving call stacks for a given CallStackId, so
273 // we just need a pointer from a given call stack to its parent.  For example,
274 // CallStackId 2 would point to CallStackId 1 as a parent.
275 //
276 // We serialize the radix tree above into a single array along with the length
277 // of each call stack and pointers to the parent call stacks.
278 //
279 // Index:              0  1  2  3  4  5  6  7  8  9 10 11 12 13 14
280 // Array:             L3 f9 f8 f7 L4 f6 J3 L4 f5 f4 J3 L3 f3 f2 f1
281 //                     ^           ^        ^           ^
282 //                     |           |        |           |
283 // CallStackId 4:  0 --+           |        |           |
284 // CallStackId 3:  4 --------------+        |           |
285 // CallStackId 2:  7 -----------------------+           |
286 // CallStackId 1: 11 -----------------------------------+
287 //
288 // - LN indicates the length of a call stack, encoded as ordinary integer N.
289 //
290 // - JN indicates a pointer to the parent, encoded as -N.
291 //
292 // The radix tree allows us to reconstruct call stacks in the leaf-to-root
293 // order as we scan the array from left ro right while following pointers to
294 // parents along the way.
295 //
296 // For example, if we are decoding CallStackId 2, we start a forward traversal
297 // at Index 7, noting the call stack length of 4 and obtaining f5 and f4.  When
298 // we see J3 at Index 10, we resume a forward traversal at Index 13 = 10 + 3,
299 // picking up f2 and f1.  We are done after collecting 4 frames as indicated at
300 // the beginning of the traversal.
301 //
302 // On-disk IndexedMemProfRecord will refer to call stacks by their indexes into
303 // the radix tree array, so we do not explicitly encode mappings like:
304 // "CallStackId 1 -> 11".
305 template <typename FrameIdTy> class CallStackRadixTreeBuilder {
306   // The radix tree array.
307   std::vector<LinearFrameId> RadixArray;
308 
309   // Mapping from CallStackIds to indexes into RadixArray.
310   llvm::DenseMap<CallStackId, LinearCallStackId> CallStackPos;
311 
312   // In build, we partition a given call stack into two parts -- the prefix
313   // that's common with the previously encoded call stack and the frames beyond
314   // the common prefix -- the unique portion.  Then we want to find out where
315   // the common prefix is stored in RadixArray so that we can link the unique
316   // portion to the common prefix.  Indexes, declared below, helps with our
317   // needs.  Intuitively, Indexes tells us where each of the previously encoded
318   // call stack is stored in RadixArray.  More formally, Indexes satisfies:
319   //
320   //   RadixArray[Indexes[I]] == Prev[I]
321   //
322   // for every I, where Prev is the the call stack in the root-to-leaf order
323   // previously encoded by build.  (Note that Prev, as passed to
324   // encodeCallStack, is in the leaf-to-root order.)
325   //
326   // For example, if the call stack being encoded shares 5 frames at the root of
327   // the call stack with the previously encoded call stack,
328   // RadixArray[Indexes[0]] is the root frame of the common prefix.
329   // RadixArray[Indexes[5 - 1]] is the last frame of the common prefix.
330   std::vector<LinearCallStackId> Indexes;
331 
332   using CSIdPair = std::pair<CallStackId, llvm::SmallVector<FrameIdTy>>;
333 
334   // Encode a call stack into RadixArray.  Return the starting index within
335   // RadixArray.
336   LinearCallStackId encodeCallStack(
337       const llvm::SmallVector<FrameIdTy> *CallStack,
338       const llvm::SmallVector<FrameIdTy> *Prev,
339       const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes);
340 
341 public:
342   CallStackRadixTreeBuilder() = default;
343 
344   // Build a radix tree array.
345   void
346   build(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
347             &&MemProfCallStackData,
348         const llvm::DenseMap<FrameIdTy, LinearFrameId> *MemProfFrameIndexes,
349         llvm::DenseMap<FrameIdTy, FrameStat> &FrameHistogram);
350 
getRadixArray()351   ArrayRef<LinearFrameId> getRadixArray() const { return RadixArray; }
352 
takeCallStackPos()353   llvm::DenseMap<CallStackId, LinearCallStackId> takeCallStackPos() {
354     return std::move(CallStackPos);
355   }
356 };
357 
358 // Defined in MemProfRadixTree.cpp
359 extern template class LLVM_TEMPLATE_ABI CallStackRadixTreeBuilder<FrameId>;
360 extern template class LLVM_TEMPLATE_ABI
361     CallStackRadixTreeBuilder<LinearFrameId>;
362 
363 } // namespace memprof
364 } // namespace llvm
365 #endif // LLVM_PROFILEDATA_MEMPROFRADIXTREE_H
366