1 //===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // MemProf data is serialized in writeMemProf provided in this file. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/ProfileData/DataAccessProf.h" 14 #include "llvm/ProfileData/InstrProf.h" 15 #include "llvm/ProfileData/InstrProfReader.h" 16 #include "llvm/ProfileData/MemProf.h" 17 #include "llvm/ProfileData/MemProfRadixTree.h" 18 #include "llvm/ProfileData/MemProfSummary.h" 19 #include "llvm/Support/FormatVariadic.h" 20 #include "llvm/Support/OnDiskHashTable.h" 21 22 namespace llvm { 23 24 // Serialize Schema. 25 static void writeMemProfSchema(ProfOStream &OS, 26 const memprof::MemProfSchema &Schema) { 27 OS.write(static_cast<uint64_t>(Schema.size())); 28 for (const auto Id : Schema) 29 OS.write(static_cast<uint64_t>(Id)); 30 } 31 32 // Serialize MemProfRecordData. Return RecordTableOffset. 33 static uint64_t writeMemProfRecords( 34 ProfOStream &OS, 35 llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord> 36 &MemProfRecordData, 37 memprof::MemProfSchema *Schema, memprof::IndexedVersion Version, 38 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 39 *MemProfCallStackIndexes = nullptr) { 40 memprof::RecordWriterTrait RecordWriter(Schema, Version, 41 MemProfCallStackIndexes); 42 OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait> 43 RecordTableGenerator; 44 for (auto &[GUID, Record] : MemProfRecordData) { 45 // Insert the key (func hash) and value (memprof record). 46 RecordTableGenerator.insert(GUID, Record, RecordWriter); 47 } 48 // Release the memory of this MapVector as it is no longer needed. 49 MemProfRecordData.clear(); 50 51 // The call to Emit invokes RecordWriterTrait::EmitData which destructs 52 // the memprof record copies owned by the RecordTableGenerator. This works 53 // because the RecordTableGenerator is not used after this point. 54 return RecordTableGenerator.Emit(OS.OS, RecordWriter); 55 } 56 57 // Serialize MemProfFrameData. Return FrameTableOffset. 58 static uint64_t writeMemProfFrames( 59 ProfOStream &OS, 60 llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) { 61 OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait> 62 FrameTableGenerator; 63 for (auto &[FrameId, Frame] : MemProfFrameData) { 64 // Insert the key (frame id) and value (frame contents). 65 FrameTableGenerator.insert(FrameId, Frame); 66 } 67 // Release the memory of this MapVector as it is no longer needed. 68 MemProfFrameData.clear(); 69 70 return FrameTableGenerator.Emit(OS.OS); 71 } 72 73 // Serialize MemProfFrameData. Return the mapping from FrameIds to their 74 // indexes within the frame array. 75 static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> 76 writeMemProfFrameArray( 77 ProfOStream &OS, 78 llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData, 79 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) { 80 // Mappings from FrameIds to array indexes. 81 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes; 82 83 // Compute the order in which we serialize Frames. The order does not matter 84 // in terms of correctness, but we still compute it for deserialization 85 // performance. Specifically, if we serialize frequently used Frames one 86 // after another, we have better cache utilization. For two Frames that 87 // appear equally frequently, we break a tie by serializing the one that tends 88 // to appear earlier in call stacks. We implement the tie-breaking mechanism 89 // by computing the sum of indexes within call stacks for each Frame. If we 90 // still have a tie, then we just resort to compare two FrameIds, which is 91 // just for stability of output. 92 std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder; 93 FrameIdOrder.reserve(MemProfFrameData.size()); 94 for (const auto &[Id, Frame] : MemProfFrameData) 95 FrameIdOrder.emplace_back(Id, &Frame); 96 assert(MemProfFrameData.size() == FrameIdOrder.size()); 97 llvm::sort(FrameIdOrder, 98 [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L, 99 const std::pair<memprof::FrameId, const memprof::Frame *> &R) { 100 const auto &SL = FrameHistogram[L.first]; 101 const auto &SR = FrameHistogram[R.first]; 102 // Popular FrameIds should come first. 103 if (SL.Count != SR.Count) 104 return SL.Count > SR.Count; 105 // If they are equally popular, then the one that tends to appear 106 // earlier in call stacks should come first. 107 if (SL.PositionSum != SR.PositionSum) 108 return SL.PositionSum < SR.PositionSum; 109 // Compare their FrameIds for sort stability. 110 return L.first < R.first; 111 }); 112 113 // Serialize all frames while creating mappings from linear IDs to FrameIds. 114 uint64_t Index = 0; 115 MemProfFrameIndexes.reserve(FrameIdOrder.size()); 116 for (const auto &[Id, F] : FrameIdOrder) { 117 F->serialize(OS.OS); 118 MemProfFrameIndexes.insert({Id, Index}); 119 ++Index; 120 } 121 assert(MemProfFrameData.size() == Index); 122 assert(MemProfFrameData.size() == MemProfFrameIndexes.size()); 123 124 // Release the memory of this MapVector as it is no longer needed. 125 MemProfFrameData.clear(); 126 127 return MemProfFrameIndexes; 128 } 129 130 static uint64_t writeMemProfCallStacks( 131 ProfOStream &OS, 132 llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> 133 &MemProfCallStackData) { 134 OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait> 135 CallStackTableGenerator; 136 for (auto &[CSId, CallStack] : MemProfCallStackData) 137 CallStackTableGenerator.insert(CSId, CallStack); 138 // Release the memory of this vector as it is no longer needed. 139 MemProfCallStackData.clear(); 140 141 return CallStackTableGenerator.Emit(OS.OS); 142 } 143 144 static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 145 writeMemProfCallStackArray( 146 ProfOStream &OS, 147 llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> 148 &MemProfCallStackData, 149 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> 150 &MemProfFrameIndexes, 151 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram, 152 unsigned &NumElements) { 153 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 154 MemProfCallStackIndexes; 155 156 memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder; 157 Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes, 158 FrameHistogram); 159 for (auto I : Builder.getRadixArray()) 160 OS.write32(I); 161 NumElements = Builder.getRadixArray().size(); 162 MemProfCallStackIndexes = Builder.takeCallStackPos(); 163 164 // Release the memory of this vector as it is no longer needed. 165 MemProfCallStackData.clear(); 166 167 return MemProfCallStackIndexes; 168 } 169 170 // Write out MemProf Version2 as follows: 171 // uint64_t Version 172 // uint64_t RecordTableOffset = RecordTableGenerator.Emit 173 // uint64_t FramePayloadOffset = Offset for the frame payload 174 // uint64_t FrameTableOffset = FrameTableGenerator.Emit 175 // uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2) 176 // uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2) 177 // uint64_t Num schema entries 178 // uint64_t Schema entry 0 179 // uint64_t Schema entry 1 180 // .... 181 // uint64_t Schema entry N - 1 182 // OnDiskChainedHashTable MemProfRecordData 183 // OnDiskChainedHashTable MemProfFrameData 184 // OnDiskChainedHashTable MemProfCallStackData (NEW in V2) 185 static Error writeMemProfV2(ProfOStream &OS, 186 memprof::IndexedMemProfData &MemProfData, 187 bool MemProfFullSchema) { 188 OS.write(memprof::Version2); 189 uint64_t HeaderUpdatePos = OS.tell(); 190 OS.write(0ULL); // Reserve space for the memprof record table offset. 191 OS.write(0ULL); // Reserve space for the memprof frame payload offset. 192 OS.write(0ULL); // Reserve space for the memprof frame table offset. 193 OS.write(0ULL); // Reserve space for the memprof call stack payload offset. 194 OS.write(0ULL); // Reserve space for the memprof call stack table offset. 195 196 auto Schema = memprof::getHotColdSchema(); 197 if (MemProfFullSchema) 198 Schema = memprof::getFullSchema(); 199 writeMemProfSchema(OS, Schema); 200 201 uint64_t RecordTableOffset = 202 writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2); 203 204 uint64_t FramePayloadOffset = OS.tell(); 205 uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames); 206 207 uint64_t CallStackPayloadOffset = OS.tell(); 208 uint64_t CallStackTableOffset = 209 writeMemProfCallStacks(OS, MemProfData.CallStacks); 210 211 uint64_t Header[] = { 212 RecordTableOffset, FramePayloadOffset, FrameTableOffset, 213 CallStackPayloadOffset, CallStackTableOffset, 214 }; 215 OS.patch({{HeaderUpdatePos, Header}}); 216 217 return Error::success(); 218 } 219 220 static Error writeMemProfRadixTreeBased( 221 ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, 222 memprof::IndexedVersion Version, bool MemProfFullSchema, 223 std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData = 224 nullptr, 225 std::unique_ptr<memprof::MemProfSummary> MemProfSum = nullptr) { 226 assert((Version == memprof::Version3 || Version == memprof::Version4) && 227 "Unsupported version for radix tree format"); 228 229 OS.write(Version); // Write the specific version (V3 or V4) 230 uint64_t HeaderUpdatePos = OS.tell(); 231 OS.write(0ULL); // Reserve space for the memprof call stack payload offset. 232 OS.write(0ULL); // Reserve space for the memprof record payload offset. 233 OS.write(0ULL); // Reserve space for the memprof record table offset. 234 if (Version >= memprof::Version4) { 235 OS.write(0ULL); // Reserve space for the data access profile offset. 236 237 MemProfSum->write(OS); 238 } 239 240 auto Schema = memprof::getHotColdSchema(); 241 if (MemProfFullSchema) 242 Schema = memprof::getFullSchema(); 243 writeMemProfSchema(OS, Schema); 244 245 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram = 246 memprof::computeFrameHistogram(MemProfData.CallStacks); 247 assert(MemProfData.Frames.size() == FrameHistogram.size()); 248 249 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes = 250 writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram); 251 252 uint64_t CallStackPayloadOffset = OS.tell(); 253 // The number of elements in the call stack array. 254 unsigned NumElements = 0; 255 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 256 MemProfCallStackIndexes = 257 writeMemProfCallStackArray(OS, MemProfData.CallStacks, 258 MemProfFrameIndexes, FrameHistogram, 259 NumElements); 260 261 uint64_t RecordPayloadOffset = OS.tell(); 262 uint64_t RecordTableOffset = writeMemProfRecords( 263 OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes); 264 265 uint64_t DataAccessProfOffset = 0; 266 if (DataAccessProfileData != nullptr) { 267 assert(Version >= memprof::Version4 && 268 "Data access profiles are added starting from v4"); 269 DataAccessProfOffset = OS.tell(); 270 if (Error E = DataAccessProfileData->serialize(OS)) 271 return E; 272 } 273 274 // Verify that the computation for the number of elements in the call stack 275 // array works. 276 assert(CallStackPayloadOffset + 277 NumElements * sizeof(memprof::LinearFrameId) == 278 RecordPayloadOffset); 279 280 SmallVector<uint64_t, 4> Header = { 281 CallStackPayloadOffset, 282 RecordPayloadOffset, 283 RecordTableOffset, 284 }; 285 if (Version >= memprof::Version4) 286 Header.push_back(DataAccessProfOffset); 287 288 OS.patch({{HeaderUpdatePos, Header}}); 289 290 return Error::success(); 291 } 292 293 // Write out MemProf Version3 294 static Error writeMemProfV3(ProfOStream &OS, 295 memprof::IndexedMemProfData &MemProfData, 296 bool MemProfFullSchema) { 297 return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3, 298 MemProfFullSchema); 299 } 300 301 // Write out MemProf Version4 302 static Error writeMemProfV4( 303 ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, 304 bool MemProfFullSchema, 305 std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData, 306 std::unique_ptr<memprof::MemProfSummary> MemProfSum) { 307 return writeMemProfRadixTreeBased( 308 OS, MemProfData, memprof::Version4, MemProfFullSchema, 309 std::move(DataAccessProfileData), std::move(MemProfSum)); 310 } 311 312 // Write out the MemProf data in a requested version. 313 Error writeMemProf( 314 ProfOStream &OS, memprof::IndexedMemProfData &MemProfData, 315 memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema, 316 std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData, 317 std::unique_ptr<memprof::MemProfSummary> MemProfSum) { 318 switch (MemProfVersionRequested) { 319 case memprof::Version2: 320 return writeMemProfV2(OS, MemProfData, MemProfFullSchema); 321 case memprof::Version3: 322 return writeMemProfV3(OS, MemProfData, MemProfFullSchema); 323 case memprof::Version4: 324 return writeMemProfV4(OS, MemProfData, MemProfFullSchema, 325 std::move(DataAccessProfileData), 326 std::move(MemProfSum)); 327 } 328 329 return make_error<InstrProfError>( 330 instrprof_error::unsupported_version, 331 formatv("MemProf version {} not supported; " 332 "requires version between {} and {}, inclusive", 333 MemProfVersionRequested, memprof::MinimumSupportedVersion, 334 memprof::MaximumSupportedVersion)); 335 } 336 337 Error IndexedMemProfReader::deserializeV2(const unsigned char *Start, 338 const unsigned char *Ptr) { 339 // The value returned from RecordTableGenerator.Emit. 340 const uint64_t RecordTableOffset = 341 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 342 // The offset in the stream right before invoking 343 // FrameTableGenerator.Emit. 344 const uint64_t FramePayloadOffset = 345 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 346 // The value returned from FrameTableGenerator.Emit. 347 const uint64_t FrameTableOffset = 348 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 349 350 // The offset in the stream right before invoking 351 // CallStackTableGenerator.Emit. 352 uint64_t CallStackPayloadOffset = 0; 353 // The value returned from CallStackTableGenerator.Emit. 354 uint64_t CallStackTableOffset = 0; 355 if (Version >= memprof::Version2) { 356 CallStackPayloadOffset = 357 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 358 CallStackTableOffset = 359 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 360 } 361 362 // Read the schema. 363 auto SchemaOr = memprof::readMemProfSchema(Ptr); 364 if (!SchemaOr) 365 return SchemaOr.takeError(); 366 Schema = SchemaOr.get(); 367 368 // Now initialize the table reader with a pointer into data buffer. 369 MemProfRecordTable.reset(MemProfRecordHashTable::Create( 370 /*Buckets=*/Start + RecordTableOffset, 371 /*Payload=*/Ptr, 372 /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema))); 373 374 // Initialize the frame table reader with the payload and bucket offsets. 375 MemProfFrameTable.reset(MemProfFrameHashTable::Create( 376 /*Buckets=*/Start + FrameTableOffset, 377 /*Payload=*/Start + FramePayloadOffset, 378 /*Base=*/Start)); 379 380 if (Version >= memprof::Version2) 381 MemProfCallStackTable.reset(MemProfCallStackHashTable::Create( 382 /*Buckets=*/Start + CallStackTableOffset, 383 /*Payload=*/Start + CallStackPayloadOffset, 384 /*Base=*/Start)); 385 386 return Error::success(); 387 } 388 389 Error IndexedMemProfReader::deserializeRadixTreeBased( 390 const unsigned char *Start, const unsigned char *Ptr, 391 memprof::IndexedVersion Version) { 392 assert((Version == memprof::Version3 || Version == memprof::Version4) && 393 "Unsupported version for radix tree format"); 394 // The offset in the stream right before invoking 395 // CallStackTableGenerator.Emit. 396 const uint64_t CallStackPayloadOffset = 397 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 398 // The offset in the stream right before invoking RecordTableGenerator.Emit. 399 const uint64_t RecordPayloadOffset = 400 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 401 // The value returned from RecordTableGenerator.Emit. 402 const uint64_t RecordTableOffset = 403 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 404 405 uint64_t DataAccessProfOffset = 0; 406 if (Version >= memprof::Version4) { 407 DataAccessProfOffset = 408 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 409 MemProfSum = memprof::MemProfSummary::deserialize(Ptr); 410 } 411 412 // Read the schema. 413 auto SchemaOr = memprof::readMemProfSchema(Ptr); 414 if (!SchemaOr) 415 return SchemaOr.takeError(); 416 Schema = SchemaOr.get(); 417 418 FrameBase = Ptr; 419 CallStackBase = Start + CallStackPayloadOffset; 420 421 // Compute the number of elements in the radix tree array. Since we use this 422 // to reserve enough bits in a BitVector, it's totally OK if we overestimate 423 // this number a little bit because of padding just before the next section. 424 RadixTreeSize = (RecordPayloadOffset - CallStackPayloadOffset) / 425 sizeof(memprof::LinearFrameId); 426 427 // Now initialize the table reader with a pointer into data buffer. 428 MemProfRecordTable.reset(MemProfRecordHashTable::Create( 429 /*Buckets=*/Start + RecordTableOffset, 430 /*Payload=*/Start + RecordPayloadOffset, 431 /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema))); 432 433 assert((!DataAccessProfOffset || DataAccessProfOffset > RecordTableOffset) && 434 "Data access profile is either empty or after the record table"); 435 if (DataAccessProfOffset > RecordTableOffset) { 436 DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>(); 437 const unsigned char *DAPPtr = Start + DataAccessProfOffset; 438 if (Error E = DataAccessProfileData->deserialize(DAPPtr)) 439 return E; 440 } 441 442 return Error::success(); 443 } 444 445 Error IndexedMemProfReader::deserialize(const unsigned char *Start, 446 uint64_t MemProfOffset) { 447 const unsigned char *Ptr = Start + MemProfOffset; 448 449 // Read the MemProf version number. 450 const uint64_t FirstWord = 451 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 452 453 // Check if the version is supported 454 if (FirstWord >= memprof::MinimumSupportedVersion && 455 FirstWord <= memprof::MaximumSupportedVersion) { 456 // Everything is good. We can proceed to deserialize the rest. 457 Version = static_cast<memprof::IndexedVersion>(FirstWord); 458 } else { 459 return make_error<InstrProfError>( 460 instrprof_error::unsupported_version, 461 formatv("MemProf version {} not supported; " 462 "requires version between {} and {}, inclusive", 463 FirstWord, memprof::MinimumSupportedVersion, 464 memprof::MaximumSupportedVersion)); 465 } 466 467 switch (Version) { 468 case memprof::Version2: 469 if (Error E = deserializeV2(Start, Ptr)) 470 return E; 471 break; 472 case memprof::Version3: 473 case memprof::Version4: 474 // V3 and V4 share the same high-level structure (radix tree, linear IDs). 475 if (Error E = deserializeRadixTreeBased(Start, Ptr, Version)) 476 return E; 477 break; 478 } 479 480 return Error::success(); 481 } 482 } // namespace llvm 483