xref: /freebsd/contrib/llvm-project/compiler-rt/lib/xray/xray_function_call_trie.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of XRay, a dynamic runtime instrumentation system.
10 //
11 // This file defines the interface for a function call trie.
12 //
13 //===----------------------------------------------------------------------===//
14 #ifndef XRAY_FUNCTION_CALL_TRIE_H
15 #define XRAY_FUNCTION_CALL_TRIE_H
16 
17 #include "xray_buffer_queue.h"
18 #include "xray_defs.h"
19 #include "xray_profiling_flags.h"
20 #include "xray_segmented_array.h"
21 #include <limits>
22 #include <memory> // For placement new.
23 #include <utility>
24 
25 namespace __xray {
26 
27 /// A FunctionCallTrie represents the stack traces of XRay instrumented
28 /// functions that we've encountered, where a node corresponds to a function and
29 /// the path from the root to the node its stack trace. Each node in the trie
30 /// will contain some useful values, including:
31 ///
32 ///   * The cumulative amount of time spent in this particular node/stack.
33 ///   * The number of times this stack has appeared.
34 ///   * A histogram of latencies for that particular node.
35 ///
36 /// Each node in the trie will also contain a list of callees, represented using
37 /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function
38 /// ID of the callee, and a pointer to the node.
39 ///
40 /// If we visualise this data structure, we'll find the following potential
41 /// representation:
42 ///
43 ///   [function id node] -> [callees] [cumulative time]
44 ///                         [call counter] [latency histogram]
45 ///
46 /// As an example, when we have a function in this pseudocode:
47 ///
48 ///   func f(N) {
49 ///     g()
50 ///     h()
51 ///     for i := 1..N { j() }
52 ///   }
53 ///
54 /// We may end up with a trie of the following form:
55 ///
56 ///   f -> [ g, h, j ] [...] [1] [...]
57 ///   g -> [ ... ] [...] [1] [...]
58 ///   h -> [ ... ] [...] [1] [...]
59 ///   j -> [ ... ] [...] [N] [...]
60 ///
61 /// If for instance the function g() called j() like so:
62 ///
63 ///   func g() {
64 ///     for i := 1..10 { j() }
65 ///   }
66 ///
67 /// We'll find the following updated trie:
68 ///
69 ///   f -> [ g, h, j ] [...] [1] [...]
70 ///   g -> [ j' ] [...] [1] [...]
71 ///   h -> [ ... ] [...] [1] [...]
72 ///   j -> [ ... ] [...] [N] [...]
73 ///   j' -> [ ... ] [...] [10] [...]
74 ///
75 /// Note that we'll have a new node representing the path `f -> g -> j'` with
76 /// isolated data. This isolation gives us a means of representing the stack
77 /// traces as a path, as opposed to a key in a table. The alternative
78 /// implementation here would be to use a separate table for the path, and use
79 /// hashes of the path as an identifier to accumulate the information. We've
80 /// moved away from this approach as it takes a lot of time to compute the hash
81 /// every time we need to update a function's call information as we're handling
82 /// the entry and exit events.
83 ///
84 /// This approach allows us to maintain a shadow stack, which represents the
85 /// currently executing path, and on function exits quickly compute the amount
86 /// of time elapsed from the entry, then update the counters for the node
87 /// already represented in the trie. This necessitates an efficient
88 /// representation of the various data structures (the list of callees must be
89 /// cache-aware and efficient to look up, and the histogram must be compact and
90 /// quick to update) to enable us to keep the overheads of this implementation
91 /// to the minimum.
92 class FunctionCallTrie {
93 public:
94   struct Node;
95 
96   // We use a NodeIdPair type instead of a std::pair<...> to not rely on the
97   // standard library types in this header.
98   struct NodeIdPair {
99     Node *NodePtr;
100     int32_t FId;
101   };
102 
103   using NodeIdPairArray = Array<NodeIdPair>;
104   using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType;
105 
106   // A Node in the FunctionCallTrie gives us a list of callees, the cumulative
107   // number of times this node actually appeared, the cumulative amount of time
108   // for this particular node including its children call times, and just the
109   // local time spent on this node. Each Node will have the ID of the XRay
110   // instrumented function that it is associated to.
111   struct Node {
112     Node *Parent;
113     NodeIdPairArray Callees;
114     uint64_t CallCount;
115     uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time.
116     int32_t FId;
117 
118     // TODO: Include the compact histogram.
119   };
120 
121 private:
122   struct ShadowStackEntry {
123     uint64_t EntryTSC;
124     Node *NodePtr;
125     uint16_t EntryCPU;
126   };
127 
128   using NodeArray = Array<Node>;
129   using RootArray = Array<Node *>;
130   using ShadowStackArray = Array<ShadowStackEntry>;
131 
132 public:
133   // We collate the allocators we need into a single struct, as a convenience to
134   // allow us to initialize these as a group.
135   struct Allocators {
136     using NodeAllocatorType = NodeArray::AllocatorType;
137     using RootAllocatorType = RootArray::AllocatorType;
138     using ShadowStackAllocatorType = ShadowStackArray::AllocatorType;
139 
140     // Use hosted aligned storage members to allow for trivial move and init.
141     // This also allows us to sidestep the potential-failing allocation issue.
142     alignas(NodeAllocatorType) std::byte
143         NodeAllocatorStorage[sizeof(NodeAllocatorType)];
144     alignas(RootAllocatorType) std::byte
145         RootAllocatorStorage[sizeof(RootAllocatorType)];
146     alignas(ShadowStackAllocatorType) std::byte
147         ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)];
148     alignas(NodeIdPairAllocatorType) std::byte
149         NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)];
150 
151     NodeAllocatorType *NodeAllocator = nullptr;
152     RootAllocatorType *RootAllocator = nullptr;
153     ShadowStackAllocatorType *ShadowStackAllocator = nullptr;
154     NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;
155 
156     Allocators() = default;
157     Allocators(const Allocators &) = delete;
158     Allocators &operator=(const Allocators &) = delete;
159 
160     struct Buffers {
161       BufferQueue::Buffer NodeBuffer;
162       BufferQueue::Buffer RootsBuffer;
163       BufferQueue::Buffer ShadowStackBuffer;
164       BufferQueue::Buffer NodeIdPairBuffer;
165     };
166 
AllocatorsAllocators167     explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT {
168       new (&NodeAllocatorStorage)
169           NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size);
170       NodeAllocator =
171           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
172 
173       new (&RootAllocatorStorage)
174           RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size);
175       RootAllocator =
176           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
177 
178       new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(
179           B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size);
180       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
181           &ShadowStackAllocatorStorage);
182 
183       new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(
184           B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size);
185       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
186           &NodeIdPairAllocatorStorage);
187     }
188 
AllocatorsAllocators189     explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT {
190       new (&NodeAllocatorStorage) NodeAllocatorType(Max);
191       NodeAllocator =
192           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
193 
194       new (&RootAllocatorStorage) RootAllocatorType(Max);
195       RootAllocator =
196           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
197 
198       new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max);
199       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
200           &ShadowStackAllocatorStorage);
201 
202       new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max);
203       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
204           &NodeIdPairAllocatorStorage);
205     }
206 
AllocatorsAllocators207     Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT {
208       // Here we rely on the safety of memcpy'ing contents of the storage
209       // members, and then pointing the source pointers to nullptr.
210       internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage,
211                       sizeof(NodeAllocatorType));
212       internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage,
213                       sizeof(RootAllocatorType));
214       internal_memcpy(&ShadowStackAllocatorStorage,
215                       &O.ShadowStackAllocatorStorage,
216                       sizeof(ShadowStackAllocatorType));
217       internal_memcpy(&NodeIdPairAllocatorStorage,
218                       &O.NodeIdPairAllocatorStorage,
219                       sizeof(NodeIdPairAllocatorType));
220 
221       NodeAllocator =
222           reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
223       RootAllocator =
224           reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
225       ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
226           &ShadowStackAllocatorStorage);
227       NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
228           &NodeIdPairAllocatorStorage);
229 
230       O.NodeAllocator = nullptr;
231       O.RootAllocator = nullptr;
232       O.ShadowStackAllocator = nullptr;
233       O.NodeIdPairAllocator = nullptr;
234     }
235 
236     Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
237       // When moving into an existing instance, we ensure that we clean up the
238       // current allocators.
239       if (NodeAllocator)
240         NodeAllocator->~NodeAllocatorType();
241       if (O.NodeAllocator) {
242         new (&NodeAllocatorStorage)
243             NodeAllocatorType(std::move(*O.NodeAllocator));
244         NodeAllocator =
245             reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage);
246         O.NodeAllocator = nullptr;
247       } else {
248         NodeAllocator = nullptr;
249       }
250 
251       if (RootAllocator)
252         RootAllocator->~RootAllocatorType();
253       if (O.RootAllocator) {
254         new (&RootAllocatorStorage)
255             RootAllocatorType(std::move(*O.RootAllocator));
256         RootAllocator =
257             reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage);
258         O.RootAllocator = nullptr;
259       } else {
260         RootAllocator = nullptr;
261       }
262 
263       if (ShadowStackAllocator)
264         ShadowStackAllocator->~ShadowStackAllocatorType();
265       if (O.ShadowStackAllocator) {
266         new (&ShadowStackAllocatorStorage)
267             ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator));
268         ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>(
269             &ShadowStackAllocatorStorage);
270         O.ShadowStackAllocator = nullptr;
271       } else {
272         ShadowStackAllocator = nullptr;
273       }
274 
275       if (NodeIdPairAllocator)
276         NodeIdPairAllocator->~NodeIdPairAllocatorType();
277       if (O.NodeIdPairAllocator) {
278         new (&NodeIdPairAllocatorStorage)
279             NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator));
280         NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
281             &NodeIdPairAllocatorStorage);
282         O.NodeIdPairAllocator = nullptr;
283       } else {
284         NodeIdPairAllocator = nullptr;
285       }
286 
287       return *this;
288     }
289 
~AllocatorsAllocators290     ~Allocators() XRAY_NEVER_INSTRUMENT {
291       if (NodeAllocator != nullptr)
292         NodeAllocator->~NodeAllocatorType();
293       if (RootAllocator != nullptr)
294         RootAllocator->~RootAllocatorType();
295       if (ShadowStackAllocator != nullptr)
296         ShadowStackAllocator->~ShadowStackAllocatorType();
297       if (NodeIdPairAllocator != nullptr)
298         NodeIdPairAllocator->~NodeIdPairAllocatorType();
299     }
300   };
301 
InitAllocators()302   static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
303     return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
304   }
305 
InitAllocatorsCustom(uptr Max)306   static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
307     Allocators A(Max);
308     return A;
309   }
310 
311   static Allocators
InitAllocatorsFromBuffers(Allocators::Buffers & Bufs)312   InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT {
313     Allocators A(Bufs);
314     return A;
315   }
316 
317 private:
318   NodeArray Nodes;
319   RootArray Roots;
320   ShadowStackArray ShadowStack;
321   NodeIdPairAllocatorType *NodeIdPairAllocator;
322   uint32_t OverflowedFunctions;
323 
324 public:
FunctionCallTrie(const Allocators & A)325   explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
326       : Nodes(*A.NodeAllocator),
327         Roots(*A.RootAllocator),
328         ShadowStack(*A.ShadowStackAllocator),
329         NodeIdPairAllocator(A.NodeIdPairAllocator),
330         OverflowedFunctions(0) {}
331 
332   FunctionCallTrie() = delete;
333   FunctionCallTrie(const FunctionCallTrie &) = delete;
334   FunctionCallTrie &operator=(const FunctionCallTrie &) = delete;
335 
FunctionCallTrie(FunctionCallTrie && O)336   FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT
337       : Nodes(std::move(O.Nodes)),
338         Roots(std::move(O.Roots)),
339         ShadowStack(std::move(O.ShadowStack)),
340         NodeIdPairAllocator(O.NodeIdPairAllocator),
341         OverflowedFunctions(O.OverflowedFunctions) {}
342 
343   FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT {
344     Nodes = std::move(O.Nodes);
345     Roots = std::move(O.Roots);
346     ShadowStack = std::move(O.ShadowStack);
347     NodeIdPairAllocator = O.NodeIdPairAllocator;
348     OverflowedFunctions = O.OverflowedFunctions;
349     return *this;
350   }
351 
~FunctionCallTrie()352   ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {}
353 
enterFunction(const int32_t FId,uint64_t TSC,uint16_t CPU)354   void enterFunction(const int32_t FId, uint64_t TSC,
355                      uint16_t CPU) XRAY_NEVER_INSTRUMENT {
356     DCHECK_NE(FId, 0);
357 
358     // If we're already overflowed the function call stack, do not bother
359     // attempting to record any more function entries.
360     if (UNLIKELY(OverflowedFunctions)) {
361       ++OverflowedFunctions;
362       return;
363     }
364 
365     // If this is the first function we've encountered, we want to set up the
366     // node(s) and treat it as a root.
367     if (UNLIKELY(ShadowStack.empty())) {
368       auto *NewRoot = Nodes.AppendEmplace(
369           nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
370       if (UNLIKELY(NewRoot == nullptr))
371         return;
372       if (Roots.AppendEmplace(NewRoot) == nullptr) {
373         Nodes.trim(1);
374         return;
375       }
376       if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) {
377         Nodes.trim(1);
378         Roots.trim(1);
379         ++OverflowedFunctions;
380         return;
381       }
382       return;
383     }
384 
385     // From this point on, we require that the stack is not empty.
386     DCHECK(!ShadowStack.empty());
387     auto TopNode = ShadowStack.back().NodePtr;
388     DCHECK_NE(TopNode, nullptr);
389 
390     // If we've seen this callee before, then we access that node and place that
391     // on the top of the stack.
392     auto* Callee = TopNode->Callees.find_element(
393         [FId](const NodeIdPair &NR) { return NR.FId == FId; });
394     if (Callee != nullptr) {
395       CHECK_NE(Callee->NodePtr, nullptr);
396       if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr)
397         ++OverflowedFunctions;
398       return;
399     }
400 
401     // This means we've never seen this stack before, create a new node here.
402     auto* NewNode = Nodes.AppendEmplace(
403         TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId);
404     if (UNLIKELY(NewNode == nullptr))
405       return;
406     DCHECK_NE(NewNode, nullptr);
407     TopNode->Callees.AppendEmplace(NewNode, FId);
408     if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr)
409       ++OverflowedFunctions;
410     return;
411   }
412 
exitFunction(int32_t FId,uint64_t TSC,uint16_t CPU)413   void exitFunction(int32_t FId, uint64_t TSC,
414                     uint16_t CPU) XRAY_NEVER_INSTRUMENT {
415     // If we're exiting functions that have "overflowed" or don't fit into the
416     // stack due to allocator constraints, we then decrement that count first.
417     if (OverflowedFunctions) {
418       --OverflowedFunctions;
419       return;
420     }
421 
422     // When we exit a function, we look up the ShadowStack to see whether we've
423     // entered this function before. We do as little processing here as we can,
424     // since most of the hard work would have already been done at function
425     // entry.
426     uint64_t CumulativeTreeTime = 0;
427 
428     while (!ShadowStack.empty()) {
429       const auto &Top = ShadowStack.back();
430       auto TopNode = Top.NodePtr;
431       DCHECK_NE(TopNode, nullptr);
432 
433       // We may encounter overflow on the TSC we're provided, which may end up
434       // being less than the TSC when we first entered the function.
435       //
436       // To get the accurate measurement of cycles, we need to check whether
437       // we've overflowed (TSC < Top.EntryTSC) and then account the difference
438       // between the entry TSC and the max for the TSC counter (max of uint64_t)
439       // then add the value of TSC. We can prove that the maximum delta we will
440       // get is at most the 64-bit unsigned value, since the difference between
441       // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max()
442       // - 1) + 1.
443       //
444       // NOTE: This assumes that TSCs are synchronised across CPUs.
445       // TODO: Count the number of times we've seen CPU migrations.
446       uint64_t LocalTime =
447           Top.EntryTSC > TSC
448               ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC
449               : TSC - Top.EntryTSC;
450       TopNode->CallCount++;
451       TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime;
452       CumulativeTreeTime += LocalTime;
453       ShadowStack.trim(1);
454 
455       // TODO: Update the histogram for the node.
456       if (TopNode->FId == FId)
457         break;
458     }
459   }
460 
getRoots()461   const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; }
462 
463   // The deepCopyInto operation will update the provided FunctionCallTrie by
464   // re-creating the contents of this particular FunctionCallTrie in the other
465   // FunctionCallTrie. It will do this using a Depth First Traversal from the
466   // roots, and while doing so recreating the traversal in the provided
467   // FunctionCallTrie.
468   //
469   // This operation will *not* destroy the state in `O`, and thus may cause some
470   // duplicate entries in `O` if it is not empty.
471   //
472   // This function is *not* thread-safe, and may require external
473   // synchronisation of both "this" and |O|.
474   //
475   // This function must *not* be called with a non-empty FunctionCallTrie |O|.
deepCopyInto(FunctionCallTrie & O)476   void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
477     DCHECK(O.getRoots().empty());
478 
479     // We then push the root into a stack, to use as the parent marker for new
480     // nodes we push in as we're traversing depth-first down the call tree.
481     struct NodeAndParent {
482       FunctionCallTrie::Node *Node;
483       FunctionCallTrie::Node *NewNode;
484     };
485     using Stack = Array<NodeAndParent>;
486 
487     typename Stack::AllocatorType StackAllocator(
488         profilingFlags()->stack_allocator_max);
489     Stack DFSStack(StackAllocator);
490 
491     for (const auto Root : getRoots()) {
492       // Add a node in O for this root.
493       auto NewRoot = O.Nodes.AppendEmplace(
494           nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount,
495           Root->CumulativeLocalTime, Root->FId);
496 
497       // Because we cannot allocate more memory we should bail out right away.
498       if (UNLIKELY(NewRoot == nullptr))
499         return;
500 
501       if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr))
502         return;
503 
504       // TODO: Figure out what to do if we fail to allocate any more stack
505       // space. Maybe warn or report once?
506       if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr)
507         return;
508       while (!DFSStack.empty()) {
509         NodeAndParent NP = DFSStack.back();
510         DCHECK_NE(NP.Node, nullptr);
511         DCHECK_NE(NP.NewNode, nullptr);
512         DFSStack.trim(1);
513         for (const auto Callee : NP.Node->Callees) {
514           auto NewNode = O.Nodes.AppendEmplace(
515               NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator),
516               Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime,
517               Callee.FId);
518           if (UNLIKELY(NewNode == nullptr))
519             return;
520           if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) ==
521                        nullptr))
522             return;
523           if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) ==
524                        nullptr))
525             return;
526         }
527       }
528     }
529   }
530 
531   // The mergeInto operation will update the provided FunctionCallTrie by
532   // traversing the current trie's roots and updating (i.e. merging) the data in
533   // the nodes with the data in the target's nodes. If the node doesn't exist in
534   // the provided trie, we add a new one in the right position, and inherit the
535   // data from the original (current) trie, along with all its callees.
536   //
537   // This function is *not* thread-safe, and may require external
538   // synchronisation of both "this" and |O|.
mergeInto(FunctionCallTrie & O)539   void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
540     struct NodeAndTarget {
541       FunctionCallTrie::Node *OrigNode;
542       FunctionCallTrie::Node *TargetNode;
543     };
544     using Stack = Array<NodeAndTarget>;
545     typename Stack::AllocatorType StackAllocator(
546         profilingFlags()->stack_allocator_max);
547     Stack DFSStack(StackAllocator);
548 
549     for (const auto Root : getRoots()) {
550       Node *TargetRoot = nullptr;
551       auto R = O.Roots.find_element(
552           [&](const Node *Node) { return Node->FId == Root->FId; });
553       if (R == nullptr) {
554         TargetRoot = O.Nodes.AppendEmplace(
555             nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
556             Root->FId);
557         if (UNLIKELY(TargetRoot == nullptr))
558           return;
559 
560         O.Roots.Append(TargetRoot);
561       } else {
562         TargetRoot = *R;
563       }
564 
565       DFSStack.AppendEmplace(Root, TargetRoot);
566       while (!DFSStack.empty()) {
567         NodeAndTarget NT = DFSStack.back();
568         DCHECK_NE(NT.OrigNode, nullptr);
569         DCHECK_NE(NT.TargetNode, nullptr);
570         DFSStack.trim(1);
571         // TODO: Update the histogram as well when we have it ready.
572         NT.TargetNode->CallCount += NT.OrigNode->CallCount;
573         NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime;
574         for (const auto Callee : NT.OrigNode->Callees) {
575           auto TargetCallee = NT.TargetNode->Callees.find_element(
576               [&](const FunctionCallTrie::NodeIdPair &C) {
577                 return C.FId == Callee.FId;
578               });
579           if (TargetCallee == nullptr) {
580             auto NewTargetNode = O.Nodes.AppendEmplace(
581                 NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u,
582                 Callee.FId);
583 
584             if (UNLIKELY(NewTargetNode == nullptr))
585               return;
586 
587             TargetCallee =
588                 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId);
589           }
590           DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr);
591         }
592       }
593     }
594   }
595 };
596 
597 } // namespace __xray
598 
599 #endif // XRAY_FUNCTION_CALL_TRIE_H
600