1 #include "llvm/ProfileData/MemProf.h" 2 #include "llvm/ADT/SmallVector.h" 3 #include "llvm/IR/Function.h" 4 #include "llvm/ProfileData/InstrProf.h" 5 #include "llvm/ProfileData/SampleProf.h" 6 #include "llvm/Support/BLAKE3.h" 7 #include "llvm/Support/Endian.h" 8 #include "llvm/Support/EndianStream.h" 9 #include "llvm/Support/HashBuilder.h" 10 11 namespace llvm { 12 namespace memprof { 13 MemProfSchema getFullSchema() { 14 MemProfSchema List; 15 #define MIBEntryDef(NameTag, Name, Type) List.push_back(Meta::Name); 16 #include "llvm/ProfileData/MIBEntryDef.inc" 17 #undef MIBEntryDef 18 return List; 19 } 20 21 MemProfSchema getHotColdSchema() { 22 return {Meta::AllocCount, Meta::TotalSize, Meta::TotalLifetime, 23 Meta::TotalLifetimeAccessDensity}; 24 } 25 26 static size_t serializedSizeV0(const IndexedAllocationInfo &IAI, 27 const MemProfSchema &Schema) { 28 size_t Size = 0; 29 // The number of frames to serialize. 30 Size += sizeof(uint64_t); 31 // The callstack frame ids. 32 Size += sizeof(FrameId) * IAI.CallStack.size(); 33 // The size of the payload. 34 Size += PortableMemInfoBlock::serializedSize(Schema); 35 return Size; 36 } 37 38 static size_t serializedSizeV2(const IndexedAllocationInfo &IAI, 39 const MemProfSchema &Schema) { 40 size_t Size = 0; 41 // The CallStackId 42 Size += sizeof(CallStackId); 43 // The size of the payload. 44 Size += PortableMemInfoBlock::serializedSize(Schema); 45 return Size; 46 } 47 48 static size_t serializedSizeV3(const IndexedAllocationInfo &IAI, 49 const MemProfSchema &Schema) { 50 size_t Size = 0; 51 // The linear call stack ID. 52 Size += sizeof(LinearCallStackId); 53 // The size of the payload. 54 Size += PortableMemInfoBlock::serializedSize(Schema); 55 return Size; 56 } 57 58 size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema, 59 IndexedVersion Version) const { 60 switch (Version) { 61 case Version0: 62 case Version1: 63 return serializedSizeV0(*this, Schema); 64 case Version2: 65 return serializedSizeV2(*this, Schema); 66 case Version3: 67 return serializedSizeV3(*this, Schema); 68 } 69 llvm_unreachable("unsupported MemProf version"); 70 } 71 72 static size_t serializedSizeV0(const IndexedMemProfRecord &Record, 73 const MemProfSchema &Schema) { 74 // The number of alloc sites to serialize. 75 size_t Result = sizeof(uint64_t); 76 for (const IndexedAllocationInfo &N : Record.AllocSites) 77 Result += N.serializedSize(Schema, Version0); 78 79 // The number of callsites we have information for. 80 Result += sizeof(uint64_t); 81 for (const auto &Frames : Record.CallSites) { 82 // The number of frame ids to serialize. 83 Result += sizeof(uint64_t); 84 Result += Frames.size() * sizeof(FrameId); 85 } 86 return Result; 87 } 88 89 static size_t serializedSizeV2(const IndexedMemProfRecord &Record, 90 const MemProfSchema &Schema) { 91 // The number of alloc sites to serialize. 92 size_t Result = sizeof(uint64_t); 93 for (const IndexedAllocationInfo &N : Record.AllocSites) 94 Result += N.serializedSize(Schema, Version2); 95 96 // The number of callsites we have information for. 97 Result += sizeof(uint64_t); 98 // The CallStackId 99 Result += Record.CallSiteIds.size() * sizeof(CallStackId); 100 return Result; 101 } 102 103 static size_t serializedSizeV3(const IndexedMemProfRecord &Record, 104 const MemProfSchema &Schema) { 105 // The number of alloc sites to serialize. 106 size_t Result = sizeof(uint64_t); 107 for (const IndexedAllocationInfo &N : Record.AllocSites) 108 Result += N.serializedSize(Schema, Version3); 109 110 // The number of callsites we have information for. 111 Result += sizeof(uint64_t); 112 // The linear call stack ID. 113 Result += Record.CallSiteIds.size() * sizeof(LinearCallStackId); 114 return Result; 115 } 116 117 size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema, 118 IndexedVersion Version) const { 119 switch (Version) { 120 case Version0: 121 case Version1: 122 return serializedSizeV0(*this, Schema); 123 case Version2: 124 return serializedSizeV2(*this, Schema); 125 case Version3: 126 return serializedSizeV3(*this, Schema); 127 } 128 llvm_unreachable("unsupported MemProf version"); 129 } 130 131 static void serializeV0(const IndexedMemProfRecord &Record, 132 const MemProfSchema &Schema, raw_ostream &OS) { 133 using namespace support; 134 135 endian::Writer LE(OS, llvm::endianness::little); 136 137 LE.write<uint64_t>(Record.AllocSites.size()); 138 for (const IndexedAllocationInfo &N : Record.AllocSites) { 139 LE.write<uint64_t>(N.CallStack.size()); 140 for (const FrameId &Id : N.CallStack) 141 LE.write<FrameId>(Id); 142 N.Info.serialize(Schema, OS); 143 } 144 145 // Related contexts. 146 LE.write<uint64_t>(Record.CallSites.size()); 147 for (const auto &Frames : Record.CallSites) { 148 LE.write<uint64_t>(Frames.size()); 149 for (const FrameId &Id : Frames) 150 LE.write<FrameId>(Id); 151 } 152 } 153 154 static void serializeV2(const IndexedMemProfRecord &Record, 155 const MemProfSchema &Schema, raw_ostream &OS) { 156 using namespace support; 157 158 endian::Writer LE(OS, llvm::endianness::little); 159 160 LE.write<uint64_t>(Record.AllocSites.size()); 161 for (const IndexedAllocationInfo &N : Record.AllocSites) { 162 LE.write<CallStackId>(N.CSId); 163 N.Info.serialize(Schema, OS); 164 } 165 166 // Related contexts. 167 LE.write<uint64_t>(Record.CallSiteIds.size()); 168 for (const auto &CSId : Record.CallSiteIds) 169 LE.write<CallStackId>(CSId); 170 } 171 172 static void serializeV3( 173 const IndexedMemProfRecord &Record, const MemProfSchema &Schema, 174 raw_ostream &OS, 175 llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) { 176 using namespace support; 177 178 endian::Writer LE(OS, llvm::endianness::little); 179 180 LE.write<uint64_t>(Record.AllocSites.size()); 181 for (const IndexedAllocationInfo &N : Record.AllocSites) { 182 assert(MemProfCallStackIndexes.contains(N.CSId)); 183 LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]); 184 N.Info.serialize(Schema, OS); 185 } 186 187 // Related contexts. 188 LE.write<uint64_t>(Record.CallSiteIds.size()); 189 for (const auto &CSId : Record.CallSiteIds) { 190 assert(MemProfCallStackIndexes.contains(CSId)); 191 LE.write<LinearCallStackId>(MemProfCallStackIndexes[CSId]); 192 } 193 } 194 195 void IndexedMemProfRecord::serialize( 196 const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version, 197 llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes) 198 const { 199 switch (Version) { 200 case Version0: 201 case Version1: 202 serializeV0(*this, Schema, OS); 203 return; 204 case Version2: 205 serializeV2(*this, Schema, OS); 206 return; 207 case Version3: 208 serializeV3(*this, Schema, OS, *MemProfCallStackIndexes); 209 return; 210 } 211 llvm_unreachable("unsupported MemProf version"); 212 } 213 214 static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, 215 const unsigned char *Ptr) { 216 using namespace support; 217 218 IndexedMemProfRecord Record; 219 220 // Read the meminfo nodes. 221 const uint64_t NumNodes = 222 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 223 for (uint64_t I = 0; I < NumNodes; I++) { 224 IndexedAllocationInfo Node; 225 const uint64_t NumFrames = 226 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 227 for (uint64_t J = 0; J < NumFrames; J++) { 228 const FrameId Id = 229 endian::readNext<FrameId, llvm::endianness::little>(Ptr); 230 Node.CallStack.push_back(Id); 231 } 232 Node.CSId = hashCallStack(Node.CallStack); 233 Node.Info.deserialize(Schema, Ptr); 234 Ptr += PortableMemInfoBlock::serializedSize(Schema); 235 Record.AllocSites.push_back(Node); 236 } 237 238 // Read the callsite information. 239 const uint64_t NumCtxs = 240 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 241 for (uint64_t J = 0; J < NumCtxs; J++) { 242 const uint64_t NumFrames = 243 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 244 llvm::SmallVector<FrameId> Frames; 245 Frames.reserve(NumFrames); 246 for (uint64_t K = 0; K < NumFrames; K++) { 247 const FrameId Id = 248 endian::readNext<FrameId, llvm::endianness::little>(Ptr); 249 Frames.push_back(Id); 250 } 251 Record.CallSites.push_back(Frames); 252 Record.CallSiteIds.push_back(hashCallStack(Frames)); 253 } 254 255 return Record; 256 } 257 258 static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, 259 const unsigned char *Ptr) { 260 using namespace support; 261 262 IndexedMemProfRecord Record; 263 264 // Read the meminfo nodes. 265 const uint64_t NumNodes = 266 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 267 Record.AllocSites.reserve(NumNodes); 268 for (uint64_t I = 0; I < NumNodes; I++) { 269 IndexedAllocationInfo Node; 270 Node.CSId = endian::readNext<CallStackId, llvm::endianness::little>(Ptr); 271 Node.Info.deserialize(Schema, Ptr); 272 Ptr += PortableMemInfoBlock::serializedSize(Schema); 273 Record.AllocSites.push_back(Node); 274 } 275 276 // Read the callsite information. 277 const uint64_t NumCtxs = 278 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 279 Record.CallSiteIds.reserve(NumCtxs); 280 for (uint64_t J = 0; J < NumCtxs; J++) { 281 CallStackId CSId = 282 endian::readNext<CallStackId, llvm::endianness::little>(Ptr); 283 Record.CallSiteIds.push_back(CSId); 284 } 285 286 return Record; 287 } 288 289 static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema, 290 const unsigned char *Ptr) { 291 using namespace support; 292 293 IndexedMemProfRecord Record; 294 295 // Read the meminfo nodes. 296 const uint64_t NumNodes = 297 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 298 Record.AllocSites.reserve(NumNodes); 299 for (uint64_t I = 0; I < NumNodes; I++) { 300 IndexedAllocationInfo Node; 301 Node.CSId = 302 endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); 303 Node.Info.deserialize(Schema, Ptr); 304 Ptr += PortableMemInfoBlock::serializedSize(Schema); 305 Record.AllocSites.push_back(Node); 306 } 307 308 // Read the callsite information. 309 const uint64_t NumCtxs = 310 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 311 Record.CallSiteIds.reserve(NumCtxs); 312 for (uint64_t J = 0; J < NumCtxs; J++) { 313 // We are storing LinearCallStackId in CallSiteIds, which is a vector of 314 // CallStackId. Assert that CallStackId is no smaller than 315 // LinearCallStackId. 316 static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId)); 317 LinearCallStackId CSId = 318 endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); 319 Record.CallSiteIds.push_back(CSId); 320 } 321 322 return Record; 323 } 324 325 IndexedMemProfRecord 326 IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, 327 const unsigned char *Ptr, 328 IndexedVersion Version) { 329 switch (Version) { 330 case Version0: 331 case Version1: 332 return deserializeV0(Schema, Ptr); 333 case Version2: 334 return deserializeV2(Schema, Ptr); 335 case Version3: 336 return deserializeV3(Schema, Ptr); 337 } 338 llvm_unreachable("unsupported MemProf version"); 339 } 340 341 MemProfRecord IndexedMemProfRecord::toMemProfRecord( 342 llvm::function_ref<std::vector<Frame>(const CallStackId)> Callback) const { 343 MemProfRecord Record; 344 345 Record.AllocSites.reserve(AllocSites.size()); 346 for (const IndexedAllocationInfo &IndexedAI : AllocSites) { 347 AllocationInfo AI; 348 AI.Info = IndexedAI.Info; 349 AI.CallStack = Callback(IndexedAI.CSId); 350 Record.AllocSites.push_back(std::move(AI)); 351 } 352 353 Record.CallSites.reserve(CallSiteIds.size()); 354 for (CallStackId CSId : CallSiteIds) 355 Record.CallSites.push_back(Callback(CSId)); 356 357 return Record; 358 } 359 360 GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { 361 // Canonicalize the function name to drop suffixes such as ".llvm.". Note 362 // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop 363 // those by default. This is by design to differentiate internal linkage 364 // functions during matching. By dropping the other suffixes we can then match 365 // functions in the profile use phase prior to their addition. Note that this 366 // applies to both instrumented and sampled function names. 367 StringRef CanonicalName = 368 sampleprof::FunctionSamples::getCanonicalFnName(FunctionName); 369 370 // We use the function guid which we expect to be a uint64_t. At 371 // this time, it is the lower 64 bits of the md5 of the canonical 372 // function name. 373 return Function::getGUID(CanonicalName); 374 } 375 376 Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) { 377 using namespace support; 378 379 const unsigned char *Ptr = Buffer; 380 const uint64_t NumSchemaIds = 381 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 382 if (NumSchemaIds > static_cast<uint64_t>(Meta::Size)) { 383 return make_error<InstrProfError>(instrprof_error::malformed, 384 "memprof schema invalid"); 385 } 386 387 MemProfSchema Result; 388 for (size_t I = 0; I < NumSchemaIds; I++) { 389 const uint64_t Tag = 390 endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 391 if (Tag >= static_cast<uint64_t>(Meta::Size)) { 392 return make_error<InstrProfError>(instrprof_error::malformed, 393 "memprof schema invalid"); 394 } 395 Result.push_back(static_cast<Meta>(Tag)); 396 } 397 // Advance the buffer to one past the schema if we succeeded. 398 Buffer = Ptr; 399 return Result; 400 } 401 402 CallStackId hashCallStack(ArrayRef<FrameId> CS) { 403 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 404 HashBuilder; 405 for (FrameId F : CS) 406 HashBuilder.add(F); 407 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 408 CallStackId CSId; 409 std::memcpy(&CSId, Hash.data(), sizeof(Hash)); 410 return CSId; 411 } 412 413 // Encode a call stack into RadixArray. Return the starting index within 414 // RadixArray. For each call stack we encode, we emit two or three components 415 // into RadixArray. If a given call stack doesn't have a common prefix relative 416 // to the previous one, we emit: 417 // 418 // - the frames in the given call stack in the root-to-leaf order 419 // 420 // - the length of the given call stack 421 // 422 // If a given call stack has a non-empty common prefix relative to the previous 423 // one, we emit: 424 // 425 // - the relative location of the common prefix, encoded as a negative number. 426 // 427 // - a portion of the given call stack that's beyond the common prefix 428 // 429 // - the length of the given call stack, including the length of the common 430 // prefix. 431 // 432 // The resulting RadixArray requires a somewhat unintuitive backward traversal 433 // to reconstruct a call stack -- read the call stack length and scan backward 434 // while collecting frames in the leaf to root order. build, the caller of this 435 // function, reverses RadixArray in place so that we can reconstruct a call 436 // stack as if we were deserializing an array in a typical way -- the call stack 437 // length followed by the frames in the leaf-to-root order except that we need 438 // to handle pointers to parents along the way. 439 // 440 // To quickly determine the location of the common prefix within RadixArray, 441 // Indexes caches the indexes of the previous call stack's frames within 442 // RadixArray. 443 LinearCallStackId CallStackRadixTreeBuilder::encodeCallStack( 444 const llvm::SmallVector<FrameId> *CallStack, 445 const llvm::SmallVector<FrameId> *Prev, 446 const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes) { 447 // Compute the length of the common root prefix between Prev and CallStack. 448 uint32_t CommonLen = 0; 449 if (Prev) { 450 auto Pos = std::mismatch(Prev->rbegin(), Prev->rend(), CallStack->rbegin(), 451 CallStack->rend()); 452 CommonLen = std::distance(CallStack->rbegin(), Pos.second); 453 } 454 455 // Drop the portion beyond CommonLen. 456 assert(CommonLen <= Indexes.size()); 457 Indexes.resize(CommonLen); 458 459 // Append a pointer to the parent. 460 if (CommonLen) { 461 uint32_t CurrentIndex = RadixArray.size(); 462 uint32_t ParentIndex = Indexes.back(); 463 // The offset to the parent must be negative because we are pointing to an 464 // element we've already added to RadixArray. 465 assert(ParentIndex < CurrentIndex); 466 RadixArray.push_back(ParentIndex - CurrentIndex); 467 } 468 469 // Copy the part of the call stack beyond the common prefix to RadixArray. 470 assert(CommonLen <= CallStack->size()); 471 for (FrameId F : llvm::drop_begin(llvm::reverse(*CallStack), CommonLen)) { 472 // Remember the index of F in RadixArray. 473 Indexes.push_back(RadixArray.size()); 474 RadixArray.push_back(MemProfFrameIndexes.find(F)->second); 475 } 476 assert(CallStack->size() == Indexes.size()); 477 478 // End with the call stack length. 479 RadixArray.push_back(CallStack->size()); 480 481 // Return the index within RadixArray where we can start reconstructing a 482 // given call stack from. 483 return RadixArray.size() - 1; 484 } 485 486 void CallStackRadixTreeBuilder::build( 487 llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> 488 &&MemProfCallStackData, 489 const llvm::DenseMap<FrameId, LinearFrameId> &MemProfFrameIndexes, 490 llvm::DenseMap<FrameId, FrameStat> &FrameHistogram) { 491 // Take the vector portion of MemProfCallStackData. The vector is exactly 492 // what we need to sort. Also, we no longer need its lookup capability. 493 llvm::SmallVector<CSIdPair, 0> CallStacks = MemProfCallStackData.takeVector(); 494 495 // Return early if we have no work to do. 496 if (CallStacks.empty()) { 497 RadixArray.clear(); 498 CallStackPos.clear(); 499 return; 500 } 501 502 // Sorting the list of call stacks in the dictionary order is sufficient to 503 // maximize the length of the common prefix between two adjacent call stacks 504 // and thus minimize the length of RadixArray. However, we go one step 505 // further and try to reduce the number of times we follow pointers to parents 506 // during deserilization. Consider a poorly encoded radix tree: 507 // 508 // CallStackId 1: f1 -> f2 -> f3 509 // | 510 // CallStackId 2: +--- f4 -> f5 511 // | 512 // CallStackId 3: +--> f6 513 // 514 // Here, f2 and f4 appear once and twice, respectively, in the call stacks. 515 // Once we encode CallStackId 1 into RadixArray, every other call stack with 516 // common prefix f1 ends up pointing to CallStackId 1. Since CallStackId 3 517 // share "f1 f4" with CallStackId 2, CallStackId 3 needs to follow pointers to 518 // parents twice. 519 // 520 // We try to alleviate the situation by sorting the list of call stacks by 521 // comparing the popularity of frames rather than the integer values of 522 // FrameIds. In the example above, f4 is more popular than f2, so we sort the 523 // call stacks and encode them as: 524 // 525 // CallStackId 2: f1 -- f4 -> f5 526 // | | 527 // CallStackId 3: | +--> f6 528 // | 529 // CallStackId 1: +--> f2 -> f3 530 // 531 // Notice that CallStackId 3 follows a pointer to a parent only once. 532 // 533 // All this is a quick-n-dirty trick to reduce the number of jumps. The 534 // proper way would be to compute the weight of each radix tree node -- how 535 // many call stacks use a given radix tree node, and encode a radix tree from 536 // the heaviest node first. We do not do so because that's a lot of work. 537 llvm::sort(CallStacks, [&](const CSIdPair &L, const CSIdPair &R) { 538 // Call stacks are stored from leaf to root. Perform comparisons from the 539 // root. 540 return std::lexicographical_compare( 541 L.second.rbegin(), L.second.rend(), R.second.rbegin(), R.second.rend(), 542 [&](FrameId F1, FrameId F2) { 543 uint64_t H1 = FrameHistogram[F1].Count; 544 uint64_t H2 = FrameHistogram[F2].Count; 545 // Popular frames should come later because we encode call stacks from 546 // the last one in the list. 547 if (H1 != H2) 548 return H1 < H2; 549 // For sort stability. 550 return F1 < F2; 551 }); 552 }); 553 554 // Reserve some reasonable amount of storage. 555 RadixArray.clear(); 556 RadixArray.reserve(CallStacks.size() * 8); 557 558 // Indexes will grow as long as the longest call stack. 559 Indexes.clear(); 560 Indexes.reserve(512); 561 562 // CallStackPos will grow to exactly CallStacks.size() entries. 563 CallStackPos.clear(); 564 CallStackPos.reserve(CallStacks.size()); 565 566 // Compute the radix array. We encode one call stack at a time, computing the 567 // longest prefix that's shared with the previous call stack we encode. For 568 // each call stack we encode, we remember a mapping from CallStackId to its 569 // position within RadixArray. 570 // 571 // As an optimization, we encode from the last call stack in CallStacks to 572 // reduce the number of times we follow pointers to the parents. Consider the 573 // list of call stacks that has been sorted in the dictionary order: 574 // 575 // Call Stack 1: F1 576 // Call Stack 2: F1 -> F2 577 // Call Stack 3: F1 -> F2 -> F3 578 // 579 // If we traversed CallStacks in the forward order, we would end up with a 580 // radix tree like: 581 // 582 // Call Stack 1: F1 583 // | 584 // Call Stack 2: +---> F2 585 // | 586 // Call Stack 3: +---> F3 587 // 588 // Notice that each call stack jumps to the previous one. However, if we 589 // traverse CallStacks in the reverse order, then Call Stack 3 has the 590 // complete call stack encoded without any pointers. Call Stack 1 and 2 point 591 // to appropriate prefixes of Call Stack 3. 592 const llvm::SmallVector<FrameId> *Prev = nullptr; 593 for (const auto &[CSId, CallStack] : llvm::reverse(CallStacks)) { 594 LinearCallStackId Pos = 595 encodeCallStack(&CallStack, Prev, MemProfFrameIndexes); 596 CallStackPos.insert({CSId, Pos}); 597 Prev = &CallStack; 598 } 599 600 // "RadixArray.size() - 1" below is problematic if RadixArray is empty. 601 assert(!RadixArray.empty()); 602 603 // Reverse the radix array in place. We do so mostly for intuitive 604 // deserialization where we would read the length field and then the call 605 // stack frames proper just like any other array deserialization, except 606 // that we have occasional jumps to take advantage of prefixes. 607 for (size_t I = 0, J = RadixArray.size() - 1; I < J; ++I, --J) 608 std::swap(RadixArray[I], RadixArray[J]); 609 610 // "Reverse" the indexes stored in CallStackPos. 611 for (auto &[K, V] : CallStackPos) 612 V = RadixArray.size() - 1 - V; 613 } 614 615 llvm::DenseMap<FrameId, FrameStat> 616 computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> 617 &MemProfCallStackData) { 618 llvm::DenseMap<FrameId, FrameStat> Histogram; 619 620 for (const auto &KV : MemProfCallStackData) { 621 const auto &CS = KV.second; 622 for (unsigned I = 0, E = CS.size(); I != E; ++I) { 623 auto &S = Histogram[CS[I]]; 624 ++S.Count; 625 S.PositionSum += I; 626 } 627 } 628 return Histogram; 629 } 630 631 void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) { 632 for (const auto &AS : Record.AllocSites) { 633 assert(AS.CSId == hashCallStack(AS.CallStack)); 634 (void)AS; 635 } 636 } 637 638 void verifyFunctionProfileData( 639 const llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord> 640 &FunctionProfileData) { 641 for (const auto &[GUID, Record] : FunctionProfileData) { 642 (void)GUID; 643 verifyIndexedMemProfRecord(Record); 644 } 645 } 646 647 } // namespace memprof 648 } // namespace llvm 649