1 //===-- xray_function_call_trie.h ------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of XRay, a dynamic runtime instrumentation system. 10 // 11 // This file defines the interface for a function call trie. 12 // 13 //===----------------------------------------------------------------------===// 14 #ifndef XRAY_FUNCTION_CALL_TRIE_H 15 #define XRAY_FUNCTION_CALL_TRIE_H 16 17 #include "xray_buffer_queue.h" 18 #include "xray_defs.h" 19 #include "xray_profiling_flags.h" 20 #include "xray_segmented_array.h" 21 #include <limits> 22 #include <memory> // For placement new. 23 #include <utility> 24 25 namespace __xray { 26 27 /// A FunctionCallTrie represents the stack traces of XRay instrumented 28 /// functions that we've encountered, where a node corresponds to a function and 29 /// the path from the root to the node its stack trace. Each node in the trie 30 /// will contain some useful values, including: 31 /// 32 /// * The cumulative amount of time spent in this particular node/stack. 33 /// * The number of times this stack has appeared. 34 /// * A histogram of latencies for that particular node. 35 /// 36 /// Each node in the trie will also contain a list of callees, represented using 37 /// a Array<NodeIdPair> -- each NodeIdPair instance will contain the function 38 /// ID of the callee, and a pointer to the node. 39 /// 40 /// If we visualise this data structure, we'll find the following potential 41 /// representation: 42 /// 43 /// [function id node] -> [callees] [cumulative time] 44 /// [call counter] [latency histogram] 45 /// 46 /// As an example, when we have a function in this pseudocode: 47 /// 48 /// func f(N) { 49 /// g() 50 /// h() 51 /// for i := 1..N { j() } 52 /// } 53 /// 54 /// We may end up with a trie of the following form: 55 /// 56 /// f -> [ g, h, j ] [...] [1] [...] 57 /// g -> [ ... ] [...] [1] [...] 58 /// h -> [ ... ] [...] [1] [...] 59 /// j -> [ ... ] [...] [N] [...] 60 /// 61 /// If for instance the function g() called j() like so: 62 /// 63 /// func g() { 64 /// for i := 1..10 { j() } 65 /// } 66 /// 67 /// We'll find the following updated trie: 68 /// 69 /// f -> [ g, h, j ] [...] [1] [...] 70 /// g -> [ j' ] [...] [1] [...] 71 /// h -> [ ... ] [...] [1] [...] 72 /// j -> [ ... ] [...] [N] [...] 73 /// j' -> [ ... ] [...] [10] [...] 74 /// 75 /// Note that we'll have a new node representing the path `f -> g -> j'` with 76 /// isolated data. This isolation gives us a means of representing the stack 77 /// traces as a path, as opposed to a key in a table. The alternative 78 /// implementation here would be to use a separate table for the path, and use 79 /// hashes of the path as an identifier to accumulate the information. We've 80 /// moved away from this approach as it takes a lot of time to compute the hash 81 /// every time we need to update a function's call information as we're handling 82 /// the entry and exit events. 83 /// 84 /// This approach allows us to maintain a shadow stack, which represents the 85 /// currently executing path, and on function exits quickly compute the amount 86 /// of time elapsed from the entry, then update the counters for the node 87 /// already represented in the trie. This necessitates an efficient 88 /// representation of the various data structures (the list of callees must be 89 /// cache-aware and efficient to look up, and the histogram must be compact and 90 /// quick to update) to enable us to keep the overheads of this implementation 91 /// to the minimum. 92 class FunctionCallTrie { 93 public: 94 struct Node; 95 96 // We use a NodeIdPair type instead of a std::pair<...> to not rely on the 97 // standard library types in this header. 98 struct NodeIdPair { 99 Node *NodePtr; 100 int32_t FId; 101 }; 102 103 using NodeIdPairArray = Array<NodeIdPair>; 104 using NodeIdPairAllocatorType = NodeIdPairArray::AllocatorType; 105 106 // A Node in the FunctionCallTrie gives us a list of callees, the cumulative 107 // number of times this node actually appeared, the cumulative amount of time 108 // for this particular node including its children call times, and just the 109 // local time spent on this node. Each Node will have the ID of the XRay 110 // instrumented function that it is associated to. 111 struct Node { 112 Node *Parent; 113 NodeIdPairArray Callees; 114 uint64_t CallCount; 115 uint64_t CumulativeLocalTime; // Typically in TSC deltas, not wall-time. 116 int32_t FId; 117 118 // TODO: Include the compact histogram. 119 }; 120 121 private: 122 struct ShadowStackEntry { 123 uint64_t EntryTSC; 124 Node *NodePtr; 125 uint16_t EntryCPU; 126 }; 127 128 using NodeArray = Array<Node>; 129 using RootArray = Array<Node *>; 130 using ShadowStackArray = Array<ShadowStackEntry>; 131 132 public: 133 // We collate the allocators we need into a single struct, as a convenience to 134 // allow us to initialize these as a group. 135 struct Allocators { 136 using NodeAllocatorType = NodeArray::AllocatorType; 137 using RootAllocatorType = RootArray::AllocatorType; 138 using ShadowStackAllocatorType = ShadowStackArray::AllocatorType; 139 140 // Use hosted aligned storage members to allow for trivial move and init. 141 // This also allows us to sidestep the potential-failing allocation issue. 142 alignas(NodeAllocatorType) std::byte 143 NodeAllocatorStorage[sizeof(NodeAllocatorType)]; 144 alignas(RootAllocatorType) std::byte 145 RootAllocatorStorage[sizeof(RootAllocatorType)]; 146 alignas(ShadowStackAllocatorType) std::byte 147 ShadowStackAllocatorStorage[sizeof(ShadowStackAllocatorType)]; 148 alignas(NodeIdPairAllocatorType) std::byte 149 NodeIdPairAllocatorStorage[sizeof(NodeIdPairAllocatorType)]; 150 151 NodeAllocatorType *NodeAllocator = nullptr; 152 RootAllocatorType *RootAllocator = nullptr; 153 ShadowStackAllocatorType *ShadowStackAllocator = nullptr; 154 NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr; 155 156 Allocators() = default; 157 Allocators(const Allocators &) = delete; 158 Allocators &operator=(const Allocators &) = delete; 159 160 struct Buffers { 161 BufferQueue::Buffer NodeBuffer; 162 BufferQueue::Buffer RootsBuffer; 163 BufferQueue::Buffer ShadowStackBuffer; 164 BufferQueue::Buffer NodeIdPairBuffer; 165 }; 166 AllocatorsAllocators167 explicit Allocators(Buffers &B) XRAY_NEVER_INSTRUMENT { 168 new (&NodeAllocatorStorage) 169 NodeAllocatorType(B.NodeBuffer.Data, B.NodeBuffer.Size); 170 NodeAllocator = 171 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 172 173 new (&RootAllocatorStorage) 174 RootAllocatorType(B.RootsBuffer.Data, B.RootsBuffer.Size); 175 RootAllocator = 176 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 177 178 new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType( 179 B.ShadowStackBuffer.Data, B.ShadowStackBuffer.Size); 180 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 181 &ShadowStackAllocatorStorage); 182 183 new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType( 184 B.NodeIdPairBuffer.Data, B.NodeIdPairBuffer.Size); 185 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 186 &NodeIdPairAllocatorStorage); 187 } 188 AllocatorsAllocators189 explicit Allocators(uptr Max) XRAY_NEVER_INSTRUMENT { 190 new (&NodeAllocatorStorage) NodeAllocatorType(Max); 191 NodeAllocator = 192 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 193 194 new (&RootAllocatorStorage) RootAllocatorType(Max); 195 RootAllocator = 196 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 197 198 new (&ShadowStackAllocatorStorage) ShadowStackAllocatorType(Max); 199 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 200 &ShadowStackAllocatorStorage); 201 202 new (&NodeIdPairAllocatorStorage) NodeIdPairAllocatorType(Max); 203 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 204 &NodeIdPairAllocatorStorage); 205 } 206 AllocatorsAllocators207 Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT { 208 // Here we rely on the safety of memcpy'ing contents of the storage 209 // members, and then pointing the source pointers to nullptr. 210 internal_memcpy(&NodeAllocatorStorage, &O.NodeAllocatorStorage, 211 sizeof(NodeAllocatorType)); 212 internal_memcpy(&RootAllocatorStorage, &O.RootAllocatorStorage, 213 sizeof(RootAllocatorType)); 214 internal_memcpy(&ShadowStackAllocatorStorage, 215 &O.ShadowStackAllocatorStorage, 216 sizeof(ShadowStackAllocatorType)); 217 internal_memcpy(&NodeIdPairAllocatorStorage, 218 &O.NodeIdPairAllocatorStorage, 219 sizeof(NodeIdPairAllocatorType)); 220 221 NodeAllocator = 222 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 223 RootAllocator = 224 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 225 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 226 &ShadowStackAllocatorStorage); 227 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 228 &NodeIdPairAllocatorStorage); 229 230 O.NodeAllocator = nullptr; 231 O.RootAllocator = nullptr; 232 O.ShadowStackAllocator = nullptr; 233 O.NodeIdPairAllocator = nullptr; 234 } 235 236 Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT { 237 // When moving into an existing instance, we ensure that we clean up the 238 // current allocators. 239 if (NodeAllocator) 240 NodeAllocator->~NodeAllocatorType(); 241 if (O.NodeAllocator) { 242 new (&NodeAllocatorStorage) 243 NodeAllocatorType(std::move(*O.NodeAllocator)); 244 NodeAllocator = 245 reinterpret_cast<NodeAllocatorType *>(&NodeAllocatorStorage); 246 O.NodeAllocator = nullptr; 247 } else { 248 NodeAllocator = nullptr; 249 } 250 251 if (RootAllocator) 252 RootAllocator->~RootAllocatorType(); 253 if (O.RootAllocator) { 254 new (&RootAllocatorStorage) 255 RootAllocatorType(std::move(*O.RootAllocator)); 256 RootAllocator = 257 reinterpret_cast<RootAllocatorType *>(&RootAllocatorStorage); 258 O.RootAllocator = nullptr; 259 } else { 260 RootAllocator = nullptr; 261 } 262 263 if (ShadowStackAllocator) 264 ShadowStackAllocator->~ShadowStackAllocatorType(); 265 if (O.ShadowStackAllocator) { 266 new (&ShadowStackAllocatorStorage) 267 ShadowStackAllocatorType(std::move(*O.ShadowStackAllocator)); 268 ShadowStackAllocator = reinterpret_cast<ShadowStackAllocatorType *>( 269 &ShadowStackAllocatorStorage); 270 O.ShadowStackAllocator = nullptr; 271 } else { 272 ShadowStackAllocator = nullptr; 273 } 274 275 if (NodeIdPairAllocator) 276 NodeIdPairAllocator->~NodeIdPairAllocatorType(); 277 if (O.NodeIdPairAllocator) { 278 new (&NodeIdPairAllocatorStorage) 279 NodeIdPairAllocatorType(std::move(*O.NodeIdPairAllocator)); 280 NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>( 281 &NodeIdPairAllocatorStorage); 282 O.NodeIdPairAllocator = nullptr; 283 } else { 284 NodeIdPairAllocator = nullptr; 285 } 286 287 return *this; 288 } 289 ~AllocatorsAllocators290 ~Allocators() XRAY_NEVER_INSTRUMENT { 291 if (NodeAllocator != nullptr) 292 NodeAllocator->~NodeAllocatorType(); 293 if (RootAllocator != nullptr) 294 RootAllocator->~RootAllocatorType(); 295 if (ShadowStackAllocator != nullptr) 296 ShadowStackAllocator->~ShadowStackAllocatorType(); 297 if (NodeIdPairAllocator != nullptr) 298 NodeIdPairAllocator->~NodeIdPairAllocatorType(); 299 } 300 }; 301 InitAllocators()302 static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT { 303 return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max); 304 } 305 InitAllocatorsCustom(uptr Max)306 static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT { 307 Allocators A(Max); 308 return A; 309 } 310 311 static Allocators InitAllocatorsFromBuffers(Allocators::Buffers & Bufs)312 InitAllocatorsFromBuffers(Allocators::Buffers &Bufs) XRAY_NEVER_INSTRUMENT { 313 Allocators A(Bufs); 314 return A; 315 } 316 317 private: 318 NodeArray Nodes; 319 RootArray Roots; 320 ShadowStackArray ShadowStack; 321 NodeIdPairAllocatorType *NodeIdPairAllocator; 322 uint32_t OverflowedFunctions; 323 324 public: FunctionCallTrie(const Allocators & A)325 explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT 326 : Nodes(*A.NodeAllocator), 327 Roots(*A.RootAllocator), 328 ShadowStack(*A.ShadowStackAllocator), 329 NodeIdPairAllocator(A.NodeIdPairAllocator), 330 OverflowedFunctions(0) {} 331 332 FunctionCallTrie() = delete; 333 FunctionCallTrie(const FunctionCallTrie &) = delete; 334 FunctionCallTrie &operator=(const FunctionCallTrie &) = delete; 335 FunctionCallTrie(FunctionCallTrie && O)336 FunctionCallTrie(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT 337 : Nodes(std::move(O.Nodes)), 338 Roots(std::move(O.Roots)), 339 ShadowStack(std::move(O.ShadowStack)), 340 NodeIdPairAllocator(O.NodeIdPairAllocator), 341 OverflowedFunctions(O.OverflowedFunctions) {} 342 343 FunctionCallTrie &operator=(FunctionCallTrie &&O) XRAY_NEVER_INSTRUMENT { 344 Nodes = std::move(O.Nodes); 345 Roots = std::move(O.Roots); 346 ShadowStack = std::move(O.ShadowStack); 347 NodeIdPairAllocator = O.NodeIdPairAllocator; 348 OverflowedFunctions = O.OverflowedFunctions; 349 return *this; 350 } 351 ~FunctionCallTrie()352 ~FunctionCallTrie() XRAY_NEVER_INSTRUMENT {} 353 enterFunction(const int32_t FId,uint64_t TSC,uint16_t CPU)354 void enterFunction(const int32_t FId, uint64_t TSC, 355 uint16_t CPU) XRAY_NEVER_INSTRUMENT { 356 DCHECK_NE(FId, 0); 357 358 // If we're already overflowed the function call stack, do not bother 359 // attempting to record any more function entries. 360 if (UNLIKELY(OverflowedFunctions)) { 361 ++OverflowedFunctions; 362 return; 363 } 364 365 // If this is the first function we've encountered, we want to set up the 366 // node(s) and treat it as a root. 367 if (UNLIKELY(ShadowStack.empty())) { 368 auto *NewRoot = Nodes.AppendEmplace( 369 nullptr, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); 370 if (UNLIKELY(NewRoot == nullptr)) 371 return; 372 if (Roots.AppendEmplace(NewRoot) == nullptr) { 373 Nodes.trim(1); 374 return; 375 } 376 if (ShadowStack.AppendEmplace(TSC, NewRoot, CPU) == nullptr) { 377 Nodes.trim(1); 378 Roots.trim(1); 379 ++OverflowedFunctions; 380 return; 381 } 382 return; 383 } 384 385 // From this point on, we require that the stack is not empty. 386 DCHECK(!ShadowStack.empty()); 387 auto TopNode = ShadowStack.back().NodePtr; 388 DCHECK_NE(TopNode, nullptr); 389 390 // If we've seen this callee before, then we access that node and place that 391 // on the top of the stack. 392 auto* Callee = TopNode->Callees.find_element( 393 [FId](const NodeIdPair &NR) { return NR.FId == FId; }); 394 if (Callee != nullptr) { 395 CHECK_NE(Callee->NodePtr, nullptr); 396 if (ShadowStack.AppendEmplace(TSC, Callee->NodePtr, CPU) == nullptr) 397 ++OverflowedFunctions; 398 return; 399 } 400 401 // This means we've never seen this stack before, create a new node here. 402 auto* NewNode = Nodes.AppendEmplace( 403 TopNode, NodeIdPairArray(*NodeIdPairAllocator), 0u, 0u, FId); 404 if (UNLIKELY(NewNode == nullptr)) 405 return; 406 DCHECK_NE(NewNode, nullptr); 407 TopNode->Callees.AppendEmplace(NewNode, FId); 408 if (ShadowStack.AppendEmplace(TSC, NewNode, CPU) == nullptr) 409 ++OverflowedFunctions; 410 return; 411 } 412 exitFunction(int32_t FId,uint64_t TSC,uint16_t CPU)413 void exitFunction(int32_t FId, uint64_t TSC, 414 uint16_t CPU) XRAY_NEVER_INSTRUMENT { 415 // If we're exiting functions that have "overflowed" or don't fit into the 416 // stack due to allocator constraints, we then decrement that count first. 417 if (OverflowedFunctions) { 418 --OverflowedFunctions; 419 return; 420 } 421 422 // When we exit a function, we look up the ShadowStack to see whether we've 423 // entered this function before. We do as little processing here as we can, 424 // since most of the hard work would have already been done at function 425 // entry. 426 uint64_t CumulativeTreeTime = 0; 427 428 while (!ShadowStack.empty()) { 429 const auto &Top = ShadowStack.back(); 430 auto TopNode = Top.NodePtr; 431 DCHECK_NE(TopNode, nullptr); 432 433 // We may encounter overflow on the TSC we're provided, which may end up 434 // being less than the TSC when we first entered the function. 435 // 436 // To get the accurate measurement of cycles, we need to check whether 437 // we've overflowed (TSC < Top.EntryTSC) and then account the difference 438 // between the entry TSC and the max for the TSC counter (max of uint64_t) 439 // then add the value of TSC. We can prove that the maximum delta we will 440 // get is at most the 64-bit unsigned value, since the difference between 441 // a TSC of 0 and a Top.EntryTSC of 1 is (numeric_limits<uint64_t>::max() 442 // - 1) + 1. 443 // 444 // NOTE: This assumes that TSCs are synchronised across CPUs. 445 // TODO: Count the number of times we've seen CPU migrations. 446 uint64_t LocalTime = 447 Top.EntryTSC > TSC 448 ? (std::numeric_limits<uint64_t>::max() - Top.EntryTSC) + TSC 449 : TSC - Top.EntryTSC; 450 TopNode->CallCount++; 451 TopNode->CumulativeLocalTime += LocalTime - CumulativeTreeTime; 452 CumulativeTreeTime += LocalTime; 453 ShadowStack.trim(1); 454 455 // TODO: Update the histogram for the node. 456 if (TopNode->FId == FId) 457 break; 458 } 459 } 460 getRoots()461 const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; } 462 463 // The deepCopyInto operation will update the provided FunctionCallTrie by 464 // re-creating the contents of this particular FunctionCallTrie in the other 465 // FunctionCallTrie. It will do this using a Depth First Traversal from the 466 // roots, and while doing so recreating the traversal in the provided 467 // FunctionCallTrie. 468 // 469 // This operation will *not* destroy the state in `O`, and thus may cause some 470 // duplicate entries in `O` if it is not empty. 471 // 472 // This function is *not* thread-safe, and may require external 473 // synchronisation of both "this" and |O|. 474 // 475 // This function must *not* be called with a non-empty FunctionCallTrie |O|. deepCopyInto(FunctionCallTrie & O)476 void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { 477 DCHECK(O.getRoots().empty()); 478 479 // We then push the root into a stack, to use as the parent marker for new 480 // nodes we push in as we're traversing depth-first down the call tree. 481 struct NodeAndParent { 482 FunctionCallTrie::Node *Node; 483 FunctionCallTrie::Node *NewNode; 484 }; 485 using Stack = Array<NodeAndParent>; 486 487 typename Stack::AllocatorType StackAllocator( 488 profilingFlags()->stack_allocator_max); 489 Stack DFSStack(StackAllocator); 490 491 for (const auto Root : getRoots()) { 492 // Add a node in O for this root. 493 auto NewRoot = O.Nodes.AppendEmplace( 494 nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), Root->CallCount, 495 Root->CumulativeLocalTime, Root->FId); 496 497 // Because we cannot allocate more memory we should bail out right away. 498 if (UNLIKELY(NewRoot == nullptr)) 499 return; 500 501 if (UNLIKELY(O.Roots.Append(NewRoot) == nullptr)) 502 return; 503 504 // TODO: Figure out what to do if we fail to allocate any more stack 505 // space. Maybe warn or report once? 506 if (DFSStack.AppendEmplace(Root, NewRoot) == nullptr) 507 return; 508 while (!DFSStack.empty()) { 509 NodeAndParent NP = DFSStack.back(); 510 DCHECK_NE(NP.Node, nullptr); 511 DCHECK_NE(NP.NewNode, nullptr); 512 DFSStack.trim(1); 513 for (const auto Callee : NP.Node->Callees) { 514 auto NewNode = O.Nodes.AppendEmplace( 515 NP.NewNode, NodeIdPairArray(*O.NodeIdPairAllocator), 516 Callee.NodePtr->CallCount, Callee.NodePtr->CumulativeLocalTime, 517 Callee.FId); 518 if (UNLIKELY(NewNode == nullptr)) 519 return; 520 if (UNLIKELY(NP.NewNode->Callees.AppendEmplace(NewNode, Callee.FId) == 521 nullptr)) 522 return; 523 if (UNLIKELY(DFSStack.AppendEmplace(Callee.NodePtr, NewNode) == 524 nullptr)) 525 return; 526 } 527 } 528 } 529 } 530 531 // The mergeInto operation will update the provided FunctionCallTrie by 532 // traversing the current trie's roots and updating (i.e. merging) the data in 533 // the nodes with the data in the target's nodes. If the node doesn't exist in 534 // the provided trie, we add a new one in the right position, and inherit the 535 // data from the original (current) trie, along with all its callees. 536 // 537 // This function is *not* thread-safe, and may require external 538 // synchronisation of both "this" and |O|. mergeInto(FunctionCallTrie & O)539 void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT { 540 struct NodeAndTarget { 541 FunctionCallTrie::Node *OrigNode; 542 FunctionCallTrie::Node *TargetNode; 543 }; 544 using Stack = Array<NodeAndTarget>; 545 typename Stack::AllocatorType StackAllocator( 546 profilingFlags()->stack_allocator_max); 547 Stack DFSStack(StackAllocator); 548 549 for (const auto Root : getRoots()) { 550 Node *TargetRoot = nullptr; 551 auto R = O.Roots.find_element( 552 [&](const Node *Node) { return Node->FId == Root->FId; }); 553 if (R == nullptr) { 554 TargetRoot = O.Nodes.AppendEmplace( 555 nullptr, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, 556 Root->FId); 557 if (UNLIKELY(TargetRoot == nullptr)) 558 return; 559 560 O.Roots.Append(TargetRoot); 561 } else { 562 TargetRoot = *R; 563 } 564 565 DFSStack.AppendEmplace(Root, TargetRoot); 566 while (!DFSStack.empty()) { 567 NodeAndTarget NT = DFSStack.back(); 568 DCHECK_NE(NT.OrigNode, nullptr); 569 DCHECK_NE(NT.TargetNode, nullptr); 570 DFSStack.trim(1); 571 // TODO: Update the histogram as well when we have it ready. 572 NT.TargetNode->CallCount += NT.OrigNode->CallCount; 573 NT.TargetNode->CumulativeLocalTime += NT.OrigNode->CumulativeLocalTime; 574 for (const auto Callee : NT.OrigNode->Callees) { 575 auto TargetCallee = NT.TargetNode->Callees.find_element( 576 [&](const FunctionCallTrie::NodeIdPair &C) { 577 return C.FId == Callee.FId; 578 }); 579 if (TargetCallee == nullptr) { 580 auto NewTargetNode = O.Nodes.AppendEmplace( 581 NT.TargetNode, NodeIdPairArray(*O.NodeIdPairAllocator), 0u, 0u, 582 Callee.FId); 583 584 if (UNLIKELY(NewTargetNode == nullptr)) 585 return; 586 587 TargetCallee = 588 NT.TargetNode->Callees.AppendEmplace(NewTargetNode, Callee.FId); 589 } 590 DFSStack.AppendEmplace(Callee.NodePtr, TargetCallee->NodePtr); 591 } 592 } 593 } 594 } 595 }; 596 597 } // namespace __xray 598 599 #endif // XRAY_FUNCTION_CALL_TRIE_H 600