1 //===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for writing profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfWriter.h" 15 #include "llvm/ADT/STLExtras.h" 16 #include "llvm/ADT/SetVector.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/IR/ProfileSummary.h" 19 #include "llvm/ProfileData/InstrProf.h" 20 #include "llvm/ProfileData/MemProf.h" 21 #include "llvm/ProfileData/ProfileCommon.h" 22 #include "llvm/Support/Compression.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/EndianStream.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/FormatVariadic.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/OnDiskHashTable.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <cstdint> 31 #include <memory> 32 #include <string> 33 #include <tuple> 34 #include <utility> 35 #include <vector> 36 37 using namespace llvm; 38 39 // A struct to define how the data stream should be patched. For Indexed 40 // profiling, only uint64_t data type is needed. 41 struct PatchItem { 42 uint64_t Pos; // Where to patch. 43 ArrayRef<uint64_t> D; // An array of source data. 44 }; 45 46 namespace llvm { 47 48 // A wrapper class to abstract writer stream with support of bytes 49 // back patching. 50 class ProfOStream { 51 public: 52 ProfOStream(raw_fd_ostream &FD) 53 : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {} 54 ProfOStream(raw_string_ostream &STR) 55 : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {} 56 57 [[nodiscard]] uint64_t tell() const { return OS.tell(); } 58 void write(uint64_t V) { LE.write<uint64_t>(V); } 59 void write32(uint32_t V) { LE.write<uint32_t>(V); } 60 void writeByte(uint8_t V) { LE.write<uint8_t>(V); } 61 62 // \c patch can only be called when all data is written and flushed. 63 // For raw_string_ostream, the patch is done on the target string 64 // directly and it won't be reflected in the stream's internal buffer. 65 void patch(ArrayRef<PatchItem> P) { 66 using namespace support; 67 68 if (IsFDOStream) { 69 raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS); 70 const uint64_t LastPos = FDOStream.tell(); 71 for (const auto &K : P) { 72 FDOStream.seek(K.Pos); 73 for (uint64_t Elem : K.D) 74 write(Elem); 75 } 76 // Reset the stream to the last position after patching so that users 77 // don't accidentally overwrite data. This makes it consistent with 78 // the string stream below which replaces the data directly. 79 FDOStream.seek(LastPos); 80 } else { 81 raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS); 82 std::string &Data = SOStream.str(); // with flush 83 for (const auto &K : P) { 84 for (int I = 0, E = K.D.size(); I != E; I++) { 85 uint64_t Bytes = 86 endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]); 87 Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t), 88 (const char *)&Bytes, sizeof(uint64_t)); 89 } 90 } 91 } 92 } 93 94 // If \c OS is an instance of \c raw_fd_ostream, this field will be 95 // true. Otherwise, \c OS will be an raw_string_ostream. 96 bool IsFDOStream; 97 raw_ostream &OS; 98 support::endian::Writer LE; 99 }; 100 101 class InstrProfRecordWriterTrait { 102 public: 103 using key_type = StringRef; 104 using key_type_ref = StringRef; 105 106 using data_type = const InstrProfWriter::ProfilingData *const; 107 using data_type_ref = const InstrProfWriter::ProfilingData *const; 108 109 using hash_value_type = uint64_t; 110 using offset_type = uint64_t; 111 112 llvm::endianness ValueProfDataEndianness = llvm::endianness::little; 113 InstrProfSummaryBuilder *SummaryBuilder; 114 InstrProfSummaryBuilder *CSSummaryBuilder; 115 116 InstrProfRecordWriterTrait() = default; 117 118 static hash_value_type ComputeHash(key_type_ref K) { 119 return IndexedInstrProf::ComputeHash(K); 120 } 121 122 static std::pair<offset_type, offset_type> 123 EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { 124 using namespace support; 125 126 endian::Writer LE(Out, llvm::endianness::little); 127 128 offset_type N = K.size(); 129 LE.write<offset_type>(N); 130 131 offset_type M = 0; 132 for (const auto &ProfileData : *V) { 133 const InstrProfRecord &ProfRecord = ProfileData.second; 134 M += sizeof(uint64_t); // The function hash 135 M += sizeof(uint64_t); // The size of the Counts vector 136 M += ProfRecord.Counts.size() * sizeof(uint64_t); 137 M += sizeof(uint64_t); // The size of the Bitmap vector 138 M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t); 139 140 // Value data 141 M += ValueProfData::getSize(ProfileData.second); 142 } 143 LE.write<offset_type>(M); 144 145 return std::make_pair(N, M); 146 } 147 148 void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N) { 149 Out.write(K.data(), N); 150 } 151 152 void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) { 153 using namespace support; 154 155 endian::Writer LE(Out, llvm::endianness::little); 156 for (const auto &ProfileData : *V) { 157 const InstrProfRecord &ProfRecord = ProfileData.second; 158 if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first)) 159 CSSummaryBuilder->addRecord(ProfRecord); 160 else 161 SummaryBuilder->addRecord(ProfRecord); 162 163 LE.write<uint64_t>(ProfileData.first); // Function hash 164 LE.write<uint64_t>(ProfRecord.Counts.size()); 165 for (uint64_t I : ProfRecord.Counts) 166 LE.write<uint64_t>(I); 167 168 LE.write<uint64_t>(ProfRecord.BitmapBytes.size()); 169 for (uint64_t I : ProfRecord.BitmapBytes) 170 LE.write<uint64_t>(I); 171 172 // Write value data 173 std::unique_ptr<ValueProfData> VDataPtr = 174 ValueProfData::serializeFrom(ProfileData.second); 175 uint32_t S = VDataPtr->getSize(); 176 VDataPtr->swapBytesFromHost(ValueProfDataEndianness); 177 Out.write((const char *)VDataPtr.get(), S); 178 } 179 } 180 }; 181 182 } // end namespace llvm 183 184 InstrProfWriter::InstrProfWriter( 185 bool Sparse, uint64_t TemporalProfTraceReservoirSize, 186 uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion, 187 memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema) 188 : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength), 189 TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize), 190 InfoObj(new InstrProfRecordWriterTrait()), 191 WritePrevVersion(WritePrevVersion), 192 MemProfVersionRequested(MemProfVersionRequested), 193 MemProfFullSchema(MemProfFullSchema) {} 194 195 InstrProfWriter::~InstrProfWriter() { delete InfoObj; } 196 197 // Internal interface for testing purpose only. 198 void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) { 199 InfoObj->ValueProfDataEndianness = Endianness; 200 } 201 202 void InstrProfWriter::setOutputSparse(bool Sparse) { 203 this->Sparse = Sparse; 204 } 205 206 void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight, 207 function_ref<void(Error)> Warn) { 208 auto Name = I.Name; 209 auto Hash = I.Hash; 210 addRecord(Name, Hash, std::move(I), Weight, Warn); 211 } 212 213 void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other, 214 OverlapStats &Overlap, 215 OverlapStats &FuncLevelOverlap, 216 const OverlapFuncFilters &FuncFilter) { 217 auto Name = Other.Name; 218 auto Hash = Other.Hash; 219 Other.accumulateCounts(FuncLevelOverlap.Test); 220 if (!FunctionData.contains(Name)) { 221 Overlap.addOneUnique(FuncLevelOverlap.Test); 222 return; 223 } 224 if (FuncLevelOverlap.Test.CountSum < 1.0f) { 225 Overlap.Overlap.NumEntries += 1; 226 return; 227 } 228 auto &ProfileDataMap = FunctionData[Name]; 229 bool NewFunc; 230 ProfilingData::iterator Where; 231 std::tie(Where, NewFunc) = 232 ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord())); 233 if (NewFunc) { 234 Overlap.addOneMismatch(FuncLevelOverlap.Test); 235 return; 236 } 237 InstrProfRecord &Dest = Where->second; 238 239 uint64_t ValueCutoff = FuncFilter.ValueCutoff; 240 if (!FuncFilter.NameFilter.empty() && Name.contains(FuncFilter.NameFilter)) 241 ValueCutoff = 0; 242 243 Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff); 244 } 245 246 void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash, 247 InstrProfRecord &&I, uint64_t Weight, 248 function_ref<void(Error)> Warn) { 249 auto &ProfileDataMap = FunctionData[Name]; 250 251 bool NewFunc; 252 ProfilingData::iterator Where; 253 std::tie(Where, NewFunc) = 254 ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord())); 255 InstrProfRecord &Dest = Where->second; 256 257 auto MapWarn = [&](instrprof_error E) { 258 Warn(make_error<InstrProfError>(E)); 259 }; 260 261 if (NewFunc) { 262 // We've never seen a function with this name and hash, add it. 263 Dest = std::move(I); 264 if (Weight > 1) 265 Dest.scale(Weight, 1, MapWarn); 266 } else { 267 // We're updating a function we've seen before. 268 Dest.merge(I, Weight, MapWarn); 269 } 270 271 Dest.sortValueData(); 272 } 273 274 void InstrProfWriter::addMemProfRecord( 275 const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) { 276 auto [Iter, Inserted] = MemProfData.Records.insert({Id, Record}); 277 // If we inserted a new record then we are done. 278 if (Inserted) { 279 return; 280 } 281 memprof::IndexedMemProfRecord &Existing = Iter->second; 282 Existing.merge(Record); 283 } 284 285 bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id, 286 const memprof::Frame &Frame, 287 function_ref<void(Error)> Warn) { 288 auto [Iter, Inserted] = MemProfData.Frames.insert({Id, Frame}); 289 // If a mapping already exists for the current frame id and it does not 290 // match the new mapping provided then reset the existing contents and bail 291 // out. We don't support the merging of memprof data whose Frame -> Id 292 // mapping across profiles is inconsistent. 293 if (!Inserted && Iter->second != Frame) { 294 Warn(make_error<InstrProfError>(instrprof_error::malformed, 295 "frame to id mapping mismatch")); 296 return false; 297 } 298 return true; 299 } 300 301 bool InstrProfWriter::addMemProfCallStack( 302 const memprof::CallStackId CSId, 303 const llvm::SmallVector<memprof::FrameId> &CallStack, 304 function_ref<void(Error)> Warn) { 305 auto [Iter, Inserted] = MemProfData.CallStacks.insert({CSId, CallStack}); 306 // If a mapping already exists for the current call stack id and it does not 307 // match the new mapping provided then reset the existing contents and bail 308 // out. We don't support the merging of memprof data whose CallStack -> Id 309 // mapping across profiles is inconsistent. 310 if (!Inserted && Iter->second != CallStack) { 311 Warn(make_error<InstrProfError>(instrprof_error::malformed, 312 "call stack to id mapping mismatch")); 313 return false; 314 } 315 return true; 316 } 317 318 void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) { 319 llvm::append_range(BinaryIds, BIs); 320 } 321 322 void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) { 323 assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength); 324 assert(!Trace.FunctionNameRefs.empty()); 325 if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) { 326 // Simply append the trace if we have not yet hit our reservoir size limit. 327 TemporalProfTraces.push_back(std::move(Trace)); 328 } else { 329 // Otherwise, replace a random trace in the stream. 330 std::uniform_int_distribution<uint64_t> Distribution( 331 0, TemporalProfTraceStreamSize); 332 uint64_t RandomIndex = Distribution(RNG); 333 if (RandomIndex < TemporalProfTraces.size()) 334 TemporalProfTraces[RandomIndex] = std::move(Trace); 335 } 336 ++TemporalProfTraceStreamSize; 337 } 338 339 void InstrProfWriter::addTemporalProfileTraces( 340 SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) { 341 for (auto &Trace : SrcTraces) 342 if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength) 343 Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength); 344 llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); }); 345 // Assume that the source has the same reservoir size as the destination to 346 // avoid needing to record it in the indexed profile format. 347 bool IsDestSampled = 348 (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize); 349 bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize); 350 if (!IsDestSampled && IsSrcSampled) { 351 // If one of the traces are sampled, ensure that it belongs to Dest. 352 std::swap(TemporalProfTraces, SrcTraces); 353 std::swap(TemporalProfTraceStreamSize, SrcStreamSize); 354 std::swap(IsDestSampled, IsSrcSampled); 355 } 356 if (!IsSrcSampled) { 357 // If the source stream is not sampled, we add each source trace normally. 358 for (auto &Trace : SrcTraces) 359 addTemporalProfileTrace(std::move(Trace)); 360 return; 361 } 362 // Otherwise, we find the traces that would have been removed if we added 363 // the whole source stream. 364 SmallSetVector<uint64_t, 8> IndicesToReplace; 365 for (uint64_t I = 0; I < SrcStreamSize; I++) { 366 std::uniform_int_distribution<uint64_t> Distribution( 367 0, TemporalProfTraceStreamSize); 368 uint64_t RandomIndex = Distribution(RNG); 369 if (RandomIndex < TemporalProfTraces.size()) 370 IndicesToReplace.insert(RandomIndex); 371 ++TemporalProfTraceStreamSize; 372 } 373 // Then we insert a random sample of the source traces. 374 llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG); 375 for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces)) 376 TemporalProfTraces[Index] = std::move(Trace); 377 } 378 379 void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, 380 function_ref<void(Error)> Warn) { 381 for (auto &I : IPW.FunctionData) 382 for (auto &Func : I.getValue()) 383 addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); 384 385 BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size()); 386 for (auto &I : IPW.BinaryIds) 387 addBinaryIds(I); 388 389 addTemporalProfileTraces(IPW.TemporalProfTraces, 390 IPW.TemporalProfTraceStreamSize); 391 392 MemProfData.Frames.reserve(IPW.MemProfData.Frames.size()); 393 for (auto &[FrameId, Frame] : IPW.MemProfData.Frames) { 394 // If we weren't able to add the frame mappings then it doesn't make sense 395 // to try to merge the records from this profile. 396 if (!addMemProfFrame(FrameId, Frame, Warn)) 397 return; 398 } 399 400 MemProfData.CallStacks.reserve(IPW.MemProfData.CallStacks.size()); 401 for (auto &[CSId, CallStack] : IPW.MemProfData.CallStacks) { 402 if (!addMemProfCallStack(CSId, CallStack, Warn)) 403 return; 404 } 405 406 MemProfData.Records.reserve(IPW.MemProfData.Records.size()); 407 for (auto &[GUID, Record] : IPW.MemProfData.Records) { 408 addMemProfRecord(GUID, Record); 409 } 410 } 411 412 bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { 413 if (!Sparse) 414 return true; 415 for (const auto &Func : PD) { 416 const InstrProfRecord &IPR = Func.second; 417 if (llvm::any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; })) 418 return true; 419 if (llvm::any_of(IPR.BitmapBytes, [](uint8_t Byte) { return Byte > 0; })) 420 return true; 421 } 422 return false; 423 } 424 425 static void setSummary(IndexedInstrProf::Summary *TheSummary, 426 ProfileSummary &PS) { 427 using namespace IndexedInstrProf; 428 429 const std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary(); 430 TheSummary->NumSummaryFields = Summary::NumKinds; 431 TheSummary->NumCutoffEntries = Res.size(); 432 TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount()); 433 TheSummary->set(Summary::MaxBlockCount, PS.getMaxCount()); 434 TheSummary->set(Summary::MaxInternalBlockCount, PS.getMaxInternalCount()); 435 TheSummary->set(Summary::TotalBlockCount, PS.getTotalCount()); 436 TheSummary->set(Summary::TotalNumBlocks, PS.getNumCounts()); 437 TheSummary->set(Summary::TotalNumFunctions, PS.getNumFunctions()); 438 for (unsigned I = 0; I < Res.size(); I++) 439 TheSummary->setEntry(I, Res[I]); 440 } 441 442 // Serialize Schema. 443 static void writeMemProfSchema(ProfOStream &OS, 444 const memprof::MemProfSchema &Schema) { 445 OS.write(static_cast<uint64_t>(Schema.size())); 446 for (const auto Id : Schema) 447 OS.write(static_cast<uint64_t>(Id)); 448 } 449 450 // Serialize MemProfRecordData. Return RecordTableOffset. 451 static uint64_t writeMemProfRecords( 452 ProfOStream &OS, 453 llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord> 454 &MemProfRecordData, 455 memprof::MemProfSchema *Schema, memprof::IndexedVersion Version, 456 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 457 *MemProfCallStackIndexes = nullptr) { 458 memprof::RecordWriterTrait RecordWriter(Schema, Version, 459 MemProfCallStackIndexes); 460 OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait> 461 RecordTableGenerator; 462 for (auto &[GUID, Record] : MemProfRecordData) { 463 // Insert the key (func hash) and value (memprof record). 464 RecordTableGenerator.insert(GUID, Record, RecordWriter); 465 } 466 // Release the memory of this MapVector as it is no longer needed. 467 MemProfRecordData.clear(); 468 469 // The call to Emit invokes RecordWriterTrait::EmitData which destructs 470 // the memprof record copies owned by the RecordTableGenerator. This works 471 // because the RecordTableGenerator is not used after this point. 472 return RecordTableGenerator.Emit(OS.OS, RecordWriter); 473 } 474 475 // Serialize MemProfFrameData. Return FrameTableOffset. 476 static uint64_t writeMemProfFrames( 477 ProfOStream &OS, 478 llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) { 479 OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait> 480 FrameTableGenerator; 481 for (auto &[FrameId, Frame] : MemProfFrameData) { 482 // Insert the key (frame id) and value (frame contents). 483 FrameTableGenerator.insert(FrameId, Frame); 484 } 485 // Release the memory of this MapVector as it is no longer needed. 486 MemProfFrameData.clear(); 487 488 return FrameTableGenerator.Emit(OS.OS); 489 } 490 491 // Serialize MemProfFrameData. Return the mapping from FrameIds to their 492 // indexes within the frame array. 493 static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> 494 writeMemProfFrameArray( 495 ProfOStream &OS, 496 llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData, 497 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) { 498 // Mappings from FrameIds to array indexes. 499 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes; 500 501 // Compute the order in which we serialize Frames. The order does not matter 502 // in terms of correctness, but we still compute it for deserialization 503 // performance. Specifically, if we serialize frequently used Frames one 504 // after another, we have better cache utilization. For two Frames that 505 // appear equally frequently, we break a tie by serializing the one that tends 506 // to appear earlier in call stacks. We implement the tie-breaking mechanism 507 // by computing the sum of indexes within call stacks for each Frame. If we 508 // still have a tie, then we just resort to compare two FrameIds, which is 509 // just for stability of output. 510 std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder; 511 FrameIdOrder.reserve(MemProfFrameData.size()); 512 for (const auto &[Id, Frame] : MemProfFrameData) 513 FrameIdOrder.emplace_back(Id, &Frame); 514 assert(MemProfFrameData.size() == FrameIdOrder.size()); 515 llvm::sort(FrameIdOrder, 516 [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L, 517 const std::pair<memprof::FrameId, const memprof::Frame *> &R) { 518 const auto &SL = FrameHistogram[L.first]; 519 const auto &SR = FrameHistogram[R.first]; 520 // Popular FrameIds should come first. 521 if (SL.Count != SR.Count) 522 return SL.Count > SR.Count; 523 // If they are equally popular, then the one that tends to appear 524 // earlier in call stacks should come first. 525 if (SL.PositionSum != SR.PositionSum) 526 return SL.PositionSum < SR.PositionSum; 527 // Compare their FrameIds for sort stability. 528 return L.first < R.first; 529 }); 530 531 // Serialize all frames while creating mappings from linear IDs to FrameIds. 532 uint64_t Index = 0; 533 MemProfFrameIndexes.reserve(FrameIdOrder.size()); 534 for (const auto &[Id, F] : FrameIdOrder) { 535 F->serialize(OS.OS); 536 MemProfFrameIndexes.insert({Id, Index}); 537 ++Index; 538 } 539 assert(MemProfFrameData.size() == Index); 540 assert(MemProfFrameData.size() == MemProfFrameIndexes.size()); 541 542 // Release the memory of this MapVector as it is no longer needed. 543 MemProfFrameData.clear(); 544 545 return MemProfFrameIndexes; 546 } 547 548 static uint64_t writeMemProfCallStacks( 549 ProfOStream &OS, 550 llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> 551 &MemProfCallStackData) { 552 OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait> 553 CallStackTableGenerator; 554 for (auto &[CSId, CallStack] : MemProfCallStackData) 555 CallStackTableGenerator.insert(CSId, CallStack); 556 // Release the memory of this vector as it is no longer needed. 557 MemProfCallStackData.clear(); 558 559 return CallStackTableGenerator.Emit(OS.OS); 560 } 561 562 static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 563 writeMemProfCallStackArray( 564 ProfOStream &OS, 565 llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>> 566 &MemProfCallStackData, 567 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> 568 &MemProfFrameIndexes, 569 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) { 570 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 571 MemProfCallStackIndexes; 572 573 memprof::CallStackRadixTreeBuilder Builder; 574 Builder.build(std::move(MemProfCallStackData), MemProfFrameIndexes, 575 FrameHistogram); 576 for (auto I : Builder.getRadixArray()) 577 OS.write32(I); 578 MemProfCallStackIndexes = Builder.takeCallStackPos(); 579 580 // Release the memory of this vector as it is no longer needed. 581 MemProfCallStackData.clear(); 582 583 return MemProfCallStackIndexes; 584 } 585 586 // Write out MemProf Version0 as follows: 587 // uint64_t RecordTableOffset = RecordTableGenerator.Emit 588 // uint64_t FramePayloadOffset = Offset for the frame payload 589 // uint64_t FrameTableOffset = FrameTableGenerator.Emit 590 // uint64_t Num schema entries 591 // uint64_t Schema entry 0 592 // uint64_t Schema entry 1 593 // .... 594 // uint64_t Schema entry N - 1 595 // OnDiskChainedHashTable MemProfRecordData 596 // OnDiskChainedHashTable MemProfFrameData 597 static Error writeMemProfV0(ProfOStream &OS, 598 memprof::IndexedMemProfData &MemProfData) { 599 uint64_t HeaderUpdatePos = OS.tell(); 600 OS.write(0ULL); // Reserve space for the memprof record table offset. 601 OS.write(0ULL); // Reserve space for the memprof frame payload offset. 602 OS.write(0ULL); // Reserve space for the memprof frame table offset. 603 604 auto Schema = memprof::getFullSchema(); 605 writeMemProfSchema(OS, Schema); 606 607 uint64_t RecordTableOffset = 608 writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version0); 609 610 uint64_t FramePayloadOffset = OS.tell(); 611 uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames); 612 613 uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset}; 614 OS.patch({{HeaderUpdatePos, Header}}); 615 616 return Error::success(); 617 } 618 619 // Write out MemProf Version1 as follows: 620 // uint64_t Version (NEW in V1) 621 // uint64_t RecordTableOffset = RecordTableGenerator.Emit 622 // uint64_t FramePayloadOffset = Offset for the frame payload 623 // uint64_t FrameTableOffset = FrameTableGenerator.Emit 624 // uint64_t Num schema entries 625 // uint64_t Schema entry 0 626 // uint64_t Schema entry 1 627 // .... 628 // uint64_t Schema entry N - 1 629 // OnDiskChainedHashTable MemProfRecordData 630 // OnDiskChainedHashTable MemProfFrameData 631 static Error writeMemProfV1(ProfOStream &OS, 632 memprof::IndexedMemProfData &MemProfData) { 633 OS.write(memprof::Version1); 634 uint64_t HeaderUpdatePos = OS.tell(); 635 OS.write(0ULL); // Reserve space for the memprof record table offset. 636 OS.write(0ULL); // Reserve space for the memprof frame payload offset. 637 OS.write(0ULL); // Reserve space for the memprof frame table offset. 638 639 auto Schema = memprof::getFullSchema(); 640 writeMemProfSchema(OS, Schema); 641 642 uint64_t RecordTableOffset = 643 writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version1); 644 645 uint64_t FramePayloadOffset = OS.tell(); 646 uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames); 647 648 uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset}; 649 OS.patch({{HeaderUpdatePos, Header}}); 650 651 return Error::success(); 652 } 653 654 // Write out MemProf Version2 as follows: 655 // uint64_t Version 656 // uint64_t RecordTableOffset = RecordTableGenerator.Emit 657 // uint64_t FramePayloadOffset = Offset for the frame payload 658 // uint64_t FrameTableOffset = FrameTableGenerator.Emit 659 // uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2) 660 // uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2) 661 // uint64_t Num schema entries 662 // uint64_t Schema entry 0 663 // uint64_t Schema entry 1 664 // .... 665 // uint64_t Schema entry N - 1 666 // OnDiskChainedHashTable MemProfRecordData 667 // OnDiskChainedHashTable MemProfFrameData 668 // OnDiskChainedHashTable MemProfCallStackData (NEW in V2) 669 static Error writeMemProfV2(ProfOStream &OS, 670 memprof::IndexedMemProfData &MemProfData, 671 bool MemProfFullSchema) { 672 OS.write(memprof::Version2); 673 uint64_t HeaderUpdatePos = OS.tell(); 674 OS.write(0ULL); // Reserve space for the memprof record table offset. 675 OS.write(0ULL); // Reserve space for the memprof frame payload offset. 676 OS.write(0ULL); // Reserve space for the memprof frame table offset. 677 OS.write(0ULL); // Reserve space for the memprof call stack payload offset. 678 OS.write(0ULL); // Reserve space for the memprof call stack table offset. 679 680 auto Schema = memprof::getHotColdSchema(); 681 if (MemProfFullSchema) 682 Schema = memprof::getFullSchema(); 683 writeMemProfSchema(OS, Schema); 684 685 uint64_t RecordTableOffset = 686 writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2); 687 688 uint64_t FramePayloadOffset = OS.tell(); 689 uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames); 690 691 uint64_t CallStackPayloadOffset = OS.tell(); 692 uint64_t CallStackTableOffset = 693 writeMemProfCallStacks(OS, MemProfData.CallStacks); 694 695 uint64_t Header[] = { 696 RecordTableOffset, FramePayloadOffset, FrameTableOffset, 697 CallStackPayloadOffset, CallStackTableOffset, 698 }; 699 OS.patch({{HeaderUpdatePos, Header}}); 700 701 return Error::success(); 702 } 703 704 // Write out MemProf Version3 as follows: 705 // uint64_t Version 706 // uint64_t CallStackPayloadOffset = Offset for the call stack payload 707 // uint64_t RecordPayloadOffset = Offset for the record payload 708 // uint64_t RecordTableOffset = RecordTableGenerator.Emit 709 // uint64_t Num schema entries 710 // uint64_t Schema entry 0 711 // uint64_t Schema entry 1 712 // .... 713 // uint64_t Schema entry N - 1 714 // Frames serialized one after another 715 // Call stacks encoded as a radix tree 716 // OnDiskChainedHashTable MemProfRecordData 717 static Error writeMemProfV3(ProfOStream &OS, 718 memprof::IndexedMemProfData &MemProfData, 719 bool MemProfFullSchema) { 720 OS.write(memprof::Version3); 721 uint64_t HeaderUpdatePos = OS.tell(); 722 OS.write(0ULL); // Reserve space for the memprof call stack payload offset. 723 OS.write(0ULL); // Reserve space for the memprof record payload offset. 724 OS.write(0ULL); // Reserve space for the memprof record table offset. 725 726 auto Schema = memprof::getHotColdSchema(); 727 if (MemProfFullSchema) 728 Schema = memprof::getFullSchema(); 729 writeMemProfSchema(OS, Schema); 730 731 llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram = 732 memprof::computeFrameHistogram(MemProfData.CallStacks); 733 assert(MemProfData.Frames.size() == FrameHistogram.size()); 734 735 llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes = 736 writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram); 737 738 uint64_t CallStackPayloadOffset = OS.tell(); 739 llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId> 740 MemProfCallStackIndexes = writeMemProfCallStackArray( 741 OS, MemProfData.CallStacks, MemProfFrameIndexes, FrameHistogram); 742 743 uint64_t RecordPayloadOffset = OS.tell(); 744 uint64_t RecordTableOffset = 745 writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3, 746 &MemProfCallStackIndexes); 747 748 uint64_t Header[] = { 749 CallStackPayloadOffset, 750 RecordPayloadOffset, 751 RecordTableOffset, 752 }; 753 OS.patch({{HeaderUpdatePos, Header}}); 754 755 return Error::success(); 756 } 757 758 // Write out the MemProf data in a requested version. 759 static Error writeMemProf(ProfOStream &OS, 760 memprof::IndexedMemProfData &MemProfData, 761 memprof::IndexedVersion MemProfVersionRequested, 762 bool MemProfFullSchema) { 763 switch (MemProfVersionRequested) { 764 case memprof::Version0: 765 return writeMemProfV0(OS, MemProfData); 766 case memprof::Version1: 767 return writeMemProfV1(OS, MemProfData); 768 case memprof::Version2: 769 return writeMemProfV2(OS, MemProfData, MemProfFullSchema); 770 case memprof::Version3: 771 return writeMemProfV3(OS, MemProfData, MemProfFullSchema); 772 } 773 774 return make_error<InstrProfError>( 775 instrprof_error::unsupported_version, 776 formatv("MemProf version {} not supported; " 777 "requires version between {} and {}, inclusive", 778 MemProfVersionRequested, memprof::MinimumSupportedVersion, 779 memprof::MaximumSupportedVersion)); 780 } 781 782 uint64_t InstrProfWriter::writeHeader(const IndexedInstrProf::Header &Header, 783 const bool WritePrevVersion, 784 ProfOStream &OS) { 785 // Only write out the first four fields. 786 for (int I = 0; I < 4; I++) 787 OS.write(reinterpret_cast<const uint64_t *>(&Header)[I]); 788 789 // Remember the offset of the remaining fields to allow back patching later. 790 auto BackPatchStartOffset = OS.tell(); 791 792 // Reserve the space for back patching later. 793 OS.write(0); // HashOffset 794 OS.write(0); // MemProfOffset 795 OS.write(0); // BinaryIdOffset 796 OS.write(0); // TemporalProfTracesOffset 797 if (!WritePrevVersion) 798 OS.write(0); // VTableNamesOffset 799 800 return BackPatchStartOffset; 801 } 802 803 Error InstrProfWriter::writeVTableNames(ProfOStream &OS) { 804 std::vector<std::string> VTableNameStrs; 805 for (StringRef VTableName : VTableNames.keys()) 806 VTableNameStrs.push_back(VTableName.str()); 807 808 std::string CompressedVTableNames; 809 if (!VTableNameStrs.empty()) 810 if (Error E = collectGlobalObjectNameStrings( 811 VTableNameStrs, compression::zlib::isAvailable(), 812 CompressedVTableNames)) 813 return E; 814 815 const uint64_t CompressedStringLen = CompressedVTableNames.length(); 816 817 // Record the length of compressed string. 818 OS.write(CompressedStringLen); 819 820 // Write the chars in compressed strings. 821 for (auto &c : CompressedVTableNames) 822 OS.writeByte(static_cast<uint8_t>(c)); 823 824 // Pad up to a multiple of 8. 825 // InstrProfReader could read bytes according to 'CompressedStringLen'. 826 const uint64_t PaddedLength = alignTo(CompressedStringLen, 8); 827 828 for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) 829 OS.writeByte(0); 830 831 return Error::success(); 832 } 833 834 Error InstrProfWriter::writeImpl(ProfOStream &OS) { 835 using namespace IndexedInstrProf; 836 using namespace support; 837 838 OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator; 839 840 InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs); 841 InfoObj->SummaryBuilder = &ISB; 842 InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs); 843 InfoObj->CSSummaryBuilder = &CSISB; 844 845 // Populate the hash table generator. 846 SmallVector<std::pair<StringRef, const ProfilingData *>> OrderedData; 847 for (const auto &I : FunctionData) 848 if (shouldEncodeData(I.getValue())) 849 OrderedData.emplace_back((I.getKey()), &I.getValue()); 850 llvm::sort(OrderedData, less_first()); 851 for (const auto &I : OrderedData) 852 Generator.insert(I.first, I.second); 853 854 // Write the header. 855 IndexedInstrProf::Header Header; 856 Header.Version = WritePrevVersion 857 ? IndexedInstrProf::ProfVersion::Version11 858 : IndexedInstrProf::ProfVersion::CurrentVersion; 859 // The WritePrevVersion handling will either need to be removed or updated 860 // if the version is advanced beyond 12. 861 static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == 862 IndexedInstrProf::ProfVersion::Version12); 863 if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) 864 Header.Version |= VARIANT_MASK_IR_PROF; 865 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) 866 Header.Version |= VARIANT_MASK_CSIR_PROF; 867 if (static_cast<bool>(ProfileKind & 868 InstrProfKind::FunctionEntryInstrumentation)) 869 Header.Version |= VARIANT_MASK_INSTR_ENTRY; 870 if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage)) 871 Header.Version |= VARIANT_MASK_BYTE_COVERAGE; 872 if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly)) 873 Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY; 874 if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) 875 Header.Version |= VARIANT_MASK_MEMPROF; 876 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) 877 Header.Version |= VARIANT_MASK_TEMPORAL_PROF; 878 879 const uint64_t BackPatchStartOffset = 880 writeHeader(Header, WritePrevVersion, OS); 881 882 // Reserve space to write profile summary data. 883 uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); 884 uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries); 885 // Remember the summary offset. 886 uint64_t SummaryOffset = OS.tell(); 887 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 888 OS.write(0); 889 uint64_t CSSummaryOffset = 0; 890 uint64_t CSSummarySize = 0; 891 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) { 892 CSSummaryOffset = OS.tell(); 893 CSSummarySize = SummarySize / sizeof(uint64_t); 894 for (unsigned I = 0; I < CSSummarySize; I++) 895 OS.write(0); 896 } 897 898 // Write the hash table. 899 uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); 900 901 // Write the MemProf profile data if we have it. 902 uint64_t MemProfSectionStart = 0; 903 if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) { 904 MemProfSectionStart = OS.tell(); 905 if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested, 906 MemProfFullSchema)) 907 return E; 908 } 909 910 // BinaryIdSection has two parts: 911 // 1. uint64_t BinaryIdsSectionSize 912 // 2. list of binary ids that consist of: 913 // a. uint64_t BinaryIdLength 914 // b. uint8_t BinaryIdData 915 // c. uint8_t Padding (if necessary) 916 uint64_t BinaryIdSectionStart = OS.tell(); 917 // Calculate size of binary section. 918 uint64_t BinaryIdsSectionSize = 0; 919 920 // Remove duplicate binary ids. 921 llvm::sort(BinaryIds); 922 BinaryIds.erase(llvm::unique(BinaryIds), BinaryIds.end()); 923 924 for (const auto &BI : BinaryIds) { 925 // Increment by binary id length data type size. 926 BinaryIdsSectionSize += sizeof(uint64_t); 927 // Increment by binary id data length, aligned to 8 bytes. 928 BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t)); 929 } 930 // Write binary ids section size. 931 OS.write(BinaryIdsSectionSize); 932 933 for (const auto &BI : BinaryIds) { 934 uint64_t BILen = BI.size(); 935 // Write binary id length. 936 OS.write(BILen); 937 // Write binary id data. 938 for (unsigned K = 0; K < BILen; K++) 939 OS.writeByte(BI[K]); 940 // Write padding if necessary. 941 uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen; 942 for (unsigned K = 0; K < PaddingSize; K++) 943 OS.writeByte(0); 944 } 945 946 uint64_t VTableNamesSectionStart = OS.tell(); 947 948 if (!WritePrevVersion) 949 if (Error E = writeVTableNames(OS)) 950 return E; 951 952 uint64_t TemporalProfTracesSectionStart = 0; 953 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) { 954 TemporalProfTracesSectionStart = OS.tell(); 955 OS.write(TemporalProfTraces.size()); 956 OS.write(TemporalProfTraceStreamSize); 957 for (auto &Trace : TemporalProfTraces) { 958 OS.write(Trace.Weight); 959 OS.write(Trace.FunctionNameRefs.size()); 960 for (auto &NameRef : Trace.FunctionNameRefs) 961 OS.write(NameRef); 962 } 963 } 964 965 // Allocate space for data to be serialized out. 966 std::unique_ptr<IndexedInstrProf::Summary> TheSummary = 967 IndexedInstrProf::allocSummary(SummarySize); 968 // Compute the Summary and copy the data to the data 969 // structure to be serialized out (to disk or buffer). 970 std::unique_ptr<ProfileSummary> PS = ISB.getSummary(); 971 setSummary(TheSummary.get(), *PS); 972 InfoObj->SummaryBuilder = nullptr; 973 974 // For Context Sensitive summary. 975 std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr; 976 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) { 977 TheCSSummary = IndexedInstrProf::allocSummary(SummarySize); 978 std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary(); 979 setSummary(TheCSSummary.get(), *CSPS); 980 } 981 InfoObj->CSSummaryBuilder = nullptr; 982 983 SmallVector<uint64_t, 8> HeaderOffsets = {HashTableStart, MemProfSectionStart, 984 BinaryIdSectionStart, 985 TemporalProfTracesSectionStart}; 986 if (!WritePrevVersion) 987 HeaderOffsets.push_back(VTableNamesSectionStart); 988 989 PatchItem PatchItems[] = { 990 // Patch the Header fields 991 {BackPatchStartOffset, HeaderOffsets}, 992 // Patch the summary data. 993 {SummaryOffset, 994 ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheSummary.get()), 995 SummarySize / sizeof(uint64_t))}, 996 {CSSummaryOffset, 997 ArrayRef<uint64_t>(reinterpret_cast<uint64_t *>(TheCSSummary.get()), 998 CSSummarySize)}}; 999 1000 OS.patch(PatchItems); 1001 1002 for (const auto &I : FunctionData) 1003 for (const auto &F : I.getValue()) 1004 if (Error E = validateRecord(F.second)) 1005 return E; 1006 1007 return Error::success(); 1008 } 1009 1010 Error InstrProfWriter::write(raw_fd_ostream &OS) { 1011 // Write the hash table. 1012 ProfOStream POS(OS); 1013 return writeImpl(POS); 1014 } 1015 1016 Error InstrProfWriter::write(raw_string_ostream &OS) { 1017 ProfOStream POS(OS); 1018 return writeImpl(POS); 1019 } 1020 1021 std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() { 1022 std::string Data; 1023 raw_string_ostream OS(Data); 1024 // Write the hash table. 1025 if (Error E = write(OS)) 1026 return nullptr; 1027 // Return this in an aligned memory buffer. 1028 return MemoryBuffer::getMemBufferCopy(Data); 1029 } 1030 1031 static const char *ValueProfKindStr[] = { 1032 #define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator, 1033 #include "llvm/ProfileData/InstrProfData.inc" 1034 }; 1035 1036 Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) { 1037 for (uint32_t VK = 0; VK <= IPVK_Last; VK++) { 1038 if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget) 1039 continue; 1040 uint32_t NS = Func.getNumValueSites(VK); 1041 for (uint32_t S = 0; S < NS; S++) { 1042 DenseSet<uint64_t> SeenValues; 1043 for (const auto &V : Func.getValueArrayForSite(VK, S)) 1044 if (!SeenValues.insert(V.Value).second) 1045 return make_error<InstrProfError>(instrprof_error::invalid_prof); 1046 } 1047 } 1048 1049 return Error::success(); 1050 } 1051 1052 void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, 1053 const InstrProfRecord &Func, 1054 InstrProfSymtab &Symtab, 1055 raw_fd_ostream &OS) { 1056 OS << Name << "\n"; 1057 OS << "# Func Hash:\n" << Hash << "\n"; 1058 OS << "# Num Counters:\n" << Func.Counts.size() << "\n"; 1059 OS << "# Counter Values:\n"; 1060 for (uint64_t Count : Func.Counts) 1061 OS << Count << "\n"; 1062 1063 if (Func.BitmapBytes.size() > 0) { 1064 OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n"; 1065 OS << "# Bitmap Byte Values:\n"; 1066 for (uint8_t Byte : Func.BitmapBytes) { 1067 OS << "0x"; 1068 OS.write_hex(Byte); 1069 OS << "\n"; 1070 } 1071 OS << "\n"; 1072 } 1073 1074 uint32_t NumValueKinds = Func.getNumValueKinds(); 1075 if (!NumValueKinds) { 1076 OS << "\n"; 1077 return; 1078 } 1079 1080 OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n"; 1081 for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) { 1082 uint32_t NS = Func.getNumValueSites(VK); 1083 if (!NS) 1084 continue; 1085 OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n"; 1086 OS << "# NumValueSites:\n" << NS << "\n"; 1087 for (uint32_t S = 0; S < NS; S++) { 1088 auto VD = Func.getValueArrayForSite(VK, S); 1089 OS << VD.size() << "\n"; 1090 for (const auto &V : VD) { 1091 if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget) 1092 OS << Symtab.getFuncOrVarNameIfDefined(V.Value) << ":" << V.Count 1093 << "\n"; 1094 else 1095 OS << V.Value << ":" << V.Count << "\n"; 1096 } 1097 } 1098 } 1099 1100 OS << "\n"; 1101 } 1102 1103 Error InstrProfWriter::writeText(raw_fd_ostream &OS) { 1104 // Check CS first since it implies an IR level profile. 1105 if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) 1106 OS << "# CSIR level Instrumentation Flag\n:csir\n"; 1107 else if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) 1108 OS << "# IR level Instrumentation Flag\n:ir\n"; 1109 1110 if (static_cast<bool>(ProfileKind & 1111 InstrProfKind::FunctionEntryInstrumentation)) 1112 OS << "# Always instrument the function entry block\n:entry_first\n"; 1113 if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage)) 1114 OS << "# Instrument block coverage\n:single_byte_coverage\n"; 1115 InstrProfSymtab Symtab; 1116 1117 using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>; 1118 using RecordType = std::pair<StringRef, FuncPair>; 1119 SmallVector<RecordType, 4> OrderedFuncData; 1120 1121 for (const auto &I : FunctionData) { 1122 if (shouldEncodeData(I.getValue())) { 1123 if (Error E = Symtab.addFuncName(I.getKey())) 1124 return E; 1125 for (const auto &Func : I.getValue()) 1126 OrderedFuncData.push_back(std::make_pair(I.getKey(), Func)); 1127 } 1128 } 1129 1130 for (const auto &VTableName : VTableNames) 1131 if (Error E = Symtab.addVTableName(VTableName.getKey())) 1132 return E; 1133 1134 if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) 1135 writeTextTemporalProfTraceData(OS, Symtab); 1136 1137 llvm::sort(OrderedFuncData, [](const RecordType &A, const RecordType &B) { 1138 return std::tie(A.first, A.second.first) < 1139 std::tie(B.first, B.second.first); 1140 }); 1141 1142 for (const auto &record : OrderedFuncData) { 1143 const StringRef &Name = record.first; 1144 const FuncPair &Func = record.second; 1145 writeRecordInText(Name, Func.first, Func.second, Symtab, OS); 1146 } 1147 1148 for (const auto &record : OrderedFuncData) { 1149 const FuncPair &Func = record.second; 1150 if (Error E = validateRecord(Func.second)) 1151 return E; 1152 } 1153 1154 return Error::success(); 1155 } 1156 1157 void InstrProfWriter::writeTextTemporalProfTraceData(raw_fd_ostream &OS, 1158 InstrProfSymtab &Symtab) { 1159 OS << ":temporal_prof_traces\n"; 1160 OS << "# Num Temporal Profile Traces:\n" << TemporalProfTraces.size() << "\n"; 1161 OS << "# Temporal Profile Trace Stream Size:\n" 1162 << TemporalProfTraceStreamSize << "\n"; 1163 for (auto &Trace : TemporalProfTraces) { 1164 OS << "# Weight:\n" << Trace.Weight << "\n"; 1165 for (auto &NameRef : Trace.FunctionNameRefs) 1166 OS << Symtab.getFuncOrVarName(NameRef) << ","; 1167 OS << "\n"; 1168 } 1169 OS << "\n"; 1170 } 1171