1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/IR/ProfileSummary.h" 20 #include "llvm/ProfileData/InstrProf.h" 21 // #include "llvm/ProfileData/MemProf.h" 22 #include "llvm/ProfileData/MemProfRadixTree.h" 23 #include "llvm/ProfileData/ProfileCommon.h" 24 #include "llvm/ProfileData/SymbolRemappingReader.h" 25 #include "llvm/Support/Endian.h" 26 #include "llvm/Support/Error.h" 27 #include "llvm/Support/ErrorOr.h" 28 #include "llvm/Support/FormatVariadic.h" 29 #include "llvm/Support/MemoryBuffer.h" 30 #include "llvm/Support/VirtualFileSystem.h" 31 #include <algorithm> 32 #include <cstddef> 33 #include <cstdint> 34 #include <limits> 35 #include <memory> 36 #include <optional> 37 #include <system_error> 38 #include <utility> 39 #include <vector> 40 41 using namespace llvm; 42 43 // Extracts the variant information from the top 32 bits in the version and 44 // returns an enum specifying the variants present. 45 static InstrProfKind getProfileKindFromVersion(uint64_t Version) { 46 InstrProfKind ProfileKind = InstrProfKind::Unknown; 47 if (Version & VARIANT_MASK_IR_PROF) { 48 ProfileKind |= InstrProfKind::IRInstrumentation; 49 } 50 if (Version & VARIANT_MASK_CSIR_PROF) { 51 ProfileKind |= InstrProfKind::ContextSensitive; 52 } 53 if (Version & VARIANT_MASK_INSTR_ENTRY) { 54 ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; 55 } 56 if (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) { 57 ProfileKind |= InstrProfKind::LoopEntriesInstrumentation; 58 } 59 if (Version & VARIANT_MASK_BYTE_COVERAGE) { 60 ProfileKind |= InstrProfKind::SingleByteCoverage; 61 } 62 if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) { 63 ProfileKind |= InstrProfKind::FunctionEntryOnly; 64 } 65 if (Version & VARIANT_MASK_MEMPROF) { 66 ProfileKind |= InstrProfKind::MemProf; 67 } 68 if (Version & VARIANT_MASK_TEMPORAL_PROF) { 69 ProfileKind |= InstrProfKind::TemporalProfile; 70 } 71 return ProfileKind; 72 } 73 74 static Expected<std::unique_ptr<MemoryBuffer>> 75 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { 76 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() 77 : FS.getBufferForFile(Filename); 78 if (std::error_code EC = BufferOrErr.getError()) 79 return errorCodeToError(EC); 80 return std::move(BufferOrErr.get()); 81 } 82 83 static Error initializeReader(InstrProfReader &Reader) { 84 return Reader.readHeader(); 85 } 86 87 /// Read a list of binary ids from a profile that consist of 88 /// a. uint64_t binary id length 89 /// b. uint8_t binary id data 90 /// c. uint8_t padding (if necessary) 91 /// This function is shared between raw and indexed profiles. 92 /// Raw profiles are in host-endian format, and indexed profiles are in 93 /// little-endian format. So, this function takes an argument indicating the 94 /// associated endian format to read the binary ids correctly. 95 static Error 96 readBinaryIdsInternal(const MemoryBuffer &DataBuffer, 97 ArrayRef<uint8_t> BinaryIdsBuffer, 98 std::vector<llvm::object::BuildID> &BinaryIds, 99 const llvm::endianness Endian) { 100 using namespace support; 101 102 const uint64_t BinaryIdsSize = BinaryIdsBuffer.size(); 103 const uint8_t *BinaryIdsStart = BinaryIdsBuffer.data(); 104 105 if (BinaryIdsSize == 0) 106 return Error::success(); 107 108 const uint8_t *BI = BinaryIdsStart; 109 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; 110 const uint8_t *End = 111 reinterpret_cast<const uint8_t *>(DataBuffer.getBufferEnd()); 112 113 while (BI < BIEnd) { 114 size_t Remaining = BIEnd - BI; 115 // There should be enough left to read the binary id length. 116 if (Remaining < sizeof(uint64_t)) 117 return make_error<InstrProfError>( 118 instrprof_error::malformed, 119 "not enough data to read binary id length"); 120 121 uint64_t BILen = endian::readNext<uint64_t>(BI, Endian); 122 if (BILen == 0) 123 return make_error<InstrProfError>(instrprof_error::malformed, 124 "binary id length is 0"); 125 126 Remaining = BIEnd - BI; 127 // There should be enough left to read the binary id data. 128 if (Remaining < alignToPowerOf2(BILen, sizeof(uint64_t))) 129 return make_error<InstrProfError>( 130 instrprof_error::malformed, "not enough data to read binary id data"); 131 132 // Add binary id to the binary ids list. 133 BinaryIds.push_back(object::BuildID(BI, BI + BILen)); 134 135 // Increment by binary id data length, which aligned to the size of uint64. 136 BI += alignToPowerOf2(BILen, sizeof(uint64_t)); 137 if (BI > End) 138 return make_error<InstrProfError>( 139 instrprof_error::malformed, 140 "binary id section is greater than buffer size"); 141 } 142 143 return Error::success(); 144 } 145 146 static void printBinaryIdsInternal(raw_ostream &OS, 147 ArrayRef<llvm::object::BuildID> BinaryIds) { 148 OS << "Binary IDs: \n"; 149 for (const auto &BI : BinaryIds) { 150 for (auto I : BI) 151 OS << format("%02x", I); 152 OS << "\n"; 153 } 154 } 155 156 Expected<std::unique_ptr<InstrProfReader>> InstrProfReader::create( 157 const Twine &Path, vfs::FileSystem &FS, 158 const InstrProfCorrelator *Correlator, 159 const object::BuildIDFetcher *BIDFetcher, 160 const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind, 161 std::function<void(Error)> Warn) { 162 // Set up the buffer to read. 163 auto BufferOrError = setupMemoryBuffer(Path, FS); 164 if (Error E = BufferOrError.takeError()) 165 return std::move(E); 166 return InstrProfReader::create(std::move(BufferOrError.get()), Correlator, 167 BIDFetcher, BIDFetcherCorrelatorKind, Warn); 168 } 169 170 Expected<std::unique_ptr<InstrProfReader>> InstrProfReader::create( 171 std::unique_ptr<MemoryBuffer> Buffer, const InstrProfCorrelator *Correlator, 172 const object::BuildIDFetcher *BIDFetcher, 173 const InstrProfCorrelator::ProfCorrelatorKind BIDFetcherCorrelatorKind, 174 std::function<void(Error)> Warn) { 175 if (Buffer->getBufferSize() == 0) 176 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 177 178 std::unique_ptr<InstrProfReader> Result; 179 // Create the reader. 180 if (IndexedInstrProfReader::hasFormat(*Buffer)) 181 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 182 else if (RawInstrProfReader64::hasFormat(*Buffer)) 183 Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator, 184 BIDFetcher, BIDFetcherCorrelatorKind, 185 Warn)); 186 else if (RawInstrProfReader32::hasFormat(*Buffer)) 187 Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator, 188 BIDFetcher, BIDFetcherCorrelatorKind, 189 Warn)); 190 else if (TextInstrProfReader::hasFormat(*Buffer)) 191 Result.reset(new TextInstrProfReader(std::move(Buffer))); 192 else 193 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 194 195 // Initialize the reader and return the result. 196 if (Error E = initializeReader(*Result)) 197 return std::move(E); 198 199 return std::move(Result); 200 } 201 202 Expected<std::unique_ptr<IndexedInstrProfReader>> 203 IndexedInstrProfReader::create(const Twine &Path, vfs::FileSystem &FS, 204 const Twine &RemappingPath) { 205 // Set up the buffer to read. 206 auto BufferOrError = setupMemoryBuffer(Path, FS); 207 if (Error E = BufferOrError.takeError()) 208 return std::move(E); 209 210 // Set up the remapping buffer if requested. 211 std::unique_ptr<MemoryBuffer> RemappingBuffer; 212 std::string RemappingPathStr = RemappingPath.str(); 213 if (!RemappingPathStr.empty()) { 214 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr, FS); 215 if (Error E = RemappingBufferOrError.takeError()) 216 return std::move(E); 217 RemappingBuffer = std::move(RemappingBufferOrError.get()); 218 } 219 220 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 221 std::move(RemappingBuffer)); 222 } 223 224 Expected<std::unique_ptr<IndexedInstrProfReader>> 225 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 226 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 227 // Create the reader. 228 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 229 return make_error<InstrProfError>(instrprof_error::bad_magic); 230 auto Result = std::make_unique<IndexedInstrProfReader>( 231 std::move(Buffer), std::move(RemappingBuffer)); 232 233 // Initialize the reader and return the result. 234 if (Error E = initializeReader(*Result)) 235 return std::move(E); 236 237 return std::move(Result); 238 } 239 240 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 241 // Verify that this really looks like plain ASCII text by checking a 242 // 'reasonable' number of characters (up to profile magic size). 243 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 244 StringRef buffer = Buffer.getBufferStart(); 245 return count == 0 || 246 std::all_of(buffer.begin(), buffer.begin() + count, 247 [](char c) { return isPrint(c) || isSpace(c); }); 248 } 249 250 // Read the profile variant flag from the header: ":FE" means this is a FE 251 // generated profile. ":IR" means this is an IR level profile. Other strings 252 // with a leading ':' will be reported an error format. 253 Error TextInstrProfReader::readHeader() { 254 Symtab.reset(new InstrProfSymtab()); 255 256 while (Line->starts_with(":")) { 257 StringRef Str = Line->substr(1); 258 if (Str.equals_insensitive("ir")) 259 ProfileKind |= InstrProfKind::IRInstrumentation; 260 else if (Str.equals_insensitive("fe")) 261 ProfileKind |= InstrProfKind::FrontendInstrumentation; 262 else if (Str.equals_insensitive("csir")) { 263 ProfileKind |= InstrProfKind::IRInstrumentation; 264 ProfileKind |= InstrProfKind::ContextSensitive; 265 } else if (Str.equals_insensitive("entry_first")) 266 ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; 267 else if (Str.equals_insensitive("not_entry_first")) 268 ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation; 269 else if (Str.equals_insensitive("instrument_loop_entries")) 270 ProfileKind |= InstrProfKind::LoopEntriesInstrumentation; 271 else if (Str.equals_insensitive("single_byte_coverage")) 272 ProfileKind |= InstrProfKind::SingleByteCoverage; 273 else if (Str.equals_insensitive("temporal_prof_traces")) { 274 ProfileKind |= InstrProfKind::TemporalProfile; 275 if (auto Err = readTemporalProfTraceData()) 276 return error(std::move(Err)); 277 } else 278 return error(instrprof_error::bad_header); 279 ++Line; 280 } 281 return success(); 282 } 283 284 /// Temporal profile trace data is stored in the header immediately after 285 /// ":temporal_prof_traces". The first integer is the number of traces, the 286 /// second integer is the stream size, then the following lines are the actual 287 /// traces which consist of a weight and a comma separated list of function 288 /// names. 289 Error TextInstrProfReader::readTemporalProfTraceData() { 290 if ((++Line).is_at_end()) 291 return error(instrprof_error::eof); 292 293 uint32_t NumTraces; 294 if (Line->getAsInteger(0, NumTraces)) 295 return error(instrprof_error::malformed); 296 297 if ((++Line).is_at_end()) 298 return error(instrprof_error::eof); 299 300 if (Line->getAsInteger(0, TemporalProfTraceStreamSize)) 301 return error(instrprof_error::malformed); 302 303 for (uint32_t i = 0; i < NumTraces; i++) { 304 if ((++Line).is_at_end()) 305 return error(instrprof_error::eof); 306 307 TemporalProfTraceTy Trace; 308 if (Line->getAsInteger(0, Trace.Weight)) 309 return error(instrprof_error::malformed); 310 311 if ((++Line).is_at_end()) 312 return error(instrprof_error::eof); 313 314 SmallVector<StringRef> FuncNames; 315 Line->split(FuncNames, ",", /*MaxSplit=*/-1, /*KeepEmpty=*/false); 316 for (auto &FuncName : FuncNames) 317 Trace.FunctionNameRefs.push_back( 318 IndexedInstrProf::ComputeHash(FuncName.trim())); 319 TemporalProfTraces.push_back(std::move(Trace)); 320 } 321 return success(); 322 } 323 324 Error 325 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 326 327 #define CHECK_LINE_END(Line) \ 328 if (Line.is_at_end()) \ 329 return error(instrprof_error::truncated); 330 #define READ_NUM(Str, Dst) \ 331 if ((Str).getAsInteger(10, (Dst))) \ 332 return error(instrprof_error::malformed); 333 #define VP_READ_ADVANCE(Val) \ 334 CHECK_LINE_END(Line); \ 335 uint32_t Val; \ 336 READ_NUM((*Line), (Val)); \ 337 Line++; 338 339 if (Line.is_at_end()) 340 return success(); 341 342 uint32_t NumValueKinds; 343 if (Line->getAsInteger(10, NumValueKinds)) { 344 // No value profile data 345 return success(); 346 } 347 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 348 return error(instrprof_error::malformed, 349 "number of value kinds is invalid"); 350 Line++; 351 352 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 353 VP_READ_ADVANCE(ValueKind); 354 if (ValueKind > IPVK_Last) 355 return error(instrprof_error::malformed, "value kind is invalid"); 356 ; 357 VP_READ_ADVANCE(NumValueSites); 358 if (!NumValueSites) 359 continue; 360 361 Record.reserveSites(VK, NumValueSites); 362 for (uint32_t S = 0; S < NumValueSites; S++) { 363 VP_READ_ADVANCE(NumValueData); 364 365 std::vector<InstrProfValueData> CurrentValues; 366 for (uint32_t V = 0; V < NumValueData; V++) { 367 CHECK_LINE_END(Line); 368 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 369 uint64_t TakenCount, Value; 370 if (ValueKind == IPVK_IndirectCallTarget) { 371 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 372 Value = 0; 373 } else { 374 if (Error E = Symtab->addFuncName(VD.first)) 375 return E; 376 Value = IndexedInstrProf::ComputeHash(VD.first); 377 } 378 } else if (ValueKind == IPVK_VTableTarget) { 379 if (InstrProfSymtab::isExternalSymbol(VD.first)) 380 Value = 0; 381 else { 382 if (Error E = Symtab->addVTableName(VD.first)) 383 return E; 384 Value = IndexedInstrProf::ComputeHash(VD.first); 385 } 386 } else { 387 READ_NUM(VD.first, Value); 388 } 389 READ_NUM(VD.second, TakenCount); 390 CurrentValues.push_back({Value, TakenCount}); 391 Line++; 392 } 393 assert(CurrentValues.size() == NumValueData); 394 Record.addValueData(ValueKind, S, CurrentValues, nullptr); 395 } 396 } 397 return success(); 398 399 #undef CHECK_LINE_END 400 #undef READ_NUM 401 #undef VP_READ_ADVANCE 402 } 403 404 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 405 // Skip empty lines and comments. 406 while (!Line.is_at_end() && (Line->empty() || Line->starts_with("#"))) 407 ++Line; 408 // If we hit EOF while looking for a name, we're done. 409 if (Line.is_at_end()) { 410 return error(instrprof_error::eof); 411 } 412 413 // Read the function name. 414 Record.Name = *Line++; 415 if (Error E = Symtab->addFuncName(Record.Name)) 416 return error(std::move(E)); 417 418 // Read the function hash. 419 if (Line.is_at_end()) 420 return error(instrprof_error::truncated); 421 if ((Line++)->getAsInteger(0, Record.Hash)) 422 return error(instrprof_error::malformed, 423 "function hash is not a valid integer"); 424 425 // Read the number of counters. 426 uint64_t NumCounters; 427 if (Line.is_at_end()) 428 return error(instrprof_error::truncated); 429 if ((Line++)->getAsInteger(10, NumCounters)) 430 return error(instrprof_error::malformed, 431 "number of counters is not a valid integer"); 432 if (NumCounters == 0) 433 return error(instrprof_error::malformed, "number of counters is zero"); 434 435 // Read each counter and fill our internal storage with the values. 436 Record.Clear(); 437 Record.Counts.reserve(NumCounters); 438 for (uint64_t I = 0; I < NumCounters; ++I) { 439 if (Line.is_at_end()) 440 return error(instrprof_error::truncated); 441 uint64_t Count; 442 if ((Line++)->getAsInteger(10, Count)) 443 return error(instrprof_error::malformed, "count is invalid"); 444 Record.Counts.push_back(Count); 445 } 446 447 // Bitmap byte information is indicated with special character. 448 if (Line->starts_with("$")) { 449 Record.BitmapBytes.clear(); 450 // Read the number of bitmap bytes. 451 uint64_t NumBitmapBytes; 452 if ((Line++)->drop_front(1).trim().getAsInteger(0, NumBitmapBytes)) 453 return error(instrprof_error::malformed, 454 "number of bitmap bytes is not a valid integer"); 455 if (NumBitmapBytes != 0) { 456 // Read each bitmap and fill our internal storage with the values. 457 Record.BitmapBytes.reserve(NumBitmapBytes); 458 for (uint8_t I = 0; I < NumBitmapBytes; ++I) { 459 if (Line.is_at_end()) 460 return error(instrprof_error::truncated); 461 uint8_t BitmapByte; 462 if ((Line++)->getAsInteger(0, BitmapByte)) 463 return error(instrprof_error::malformed, 464 "bitmap byte is not a valid integer"); 465 Record.BitmapBytes.push_back(BitmapByte); 466 } 467 } 468 } 469 470 // Check if value profile data exists and read it if so. 471 if (Error E = readValueProfileData(Record)) 472 return error(std::move(E)); 473 474 return success(); 475 } 476 477 template <class IntPtrT> 478 InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const { 479 return getProfileKindFromVersion(Version); 480 } 481 482 template <class IntPtrT> 483 SmallVector<TemporalProfTraceTy> & 484 RawInstrProfReader<IntPtrT>::getTemporalProfTraces( 485 std::optional<uint64_t> Weight) { 486 if (TemporalProfTimestamps.empty()) { 487 assert(TemporalProfTraces.empty()); 488 return TemporalProfTraces; 489 } 490 // Sort functions by their timestamps to build the trace. 491 std::sort(TemporalProfTimestamps.begin(), TemporalProfTimestamps.end()); 492 TemporalProfTraceTy Trace; 493 if (Weight) 494 Trace.Weight = *Weight; 495 for (auto &[TimestampValue, NameRef] : TemporalProfTimestamps) 496 Trace.FunctionNameRefs.push_back(NameRef); 497 TemporalProfTraces = {std::move(Trace)}; 498 return TemporalProfTraces; 499 } 500 501 template <class IntPtrT> 502 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 503 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 504 return false; 505 uint64_t Magic = 506 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 507 return RawInstrProf::getMagic<IntPtrT>() == Magic || 508 llvm::byteswap(RawInstrProf::getMagic<IntPtrT>()) == Magic; 509 } 510 511 template <class IntPtrT> 512 Error RawInstrProfReader<IntPtrT>::readHeader() { 513 if (!hasFormat(*DataBuffer)) 514 return error(instrprof_error::bad_magic); 515 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 516 return error(instrprof_error::bad_header); 517 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 518 DataBuffer->getBufferStart()); 519 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 520 return readHeader(*Header); 521 } 522 523 template <class IntPtrT> 524 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 525 const char *End = DataBuffer->getBufferEnd(); 526 // Skip zero padding between profiles. 527 while (CurrentPos != End && *CurrentPos == 0) 528 ++CurrentPos; 529 // If there's nothing left, we're done. 530 if (CurrentPos == End) 531 return make_error<InstrProfError>(instrprof_error::eof); 532 // If there isn't enough space for another header, this is probably just 533 // garbage at the end of the file. 534 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 535 return make_error<InstrProfError>(instrprof_error::malformed, 536 "not enough space for another header"); 537 // The writer ensures each profile is padded to start at an aligned address. 538 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 539 return make_error<InstrProfError>(instrprof_error::malformed, 540 "insufficient padding"); 541 // The magic should have the same byte order as in the previous header. 542 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 543 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 544 return make_error<InstrProfError>(instrprof_error::bad_magic); 545 546 // There's another profile to read, so we need to process the header. 547 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 548 return readHeader(*Header); 549 } 550 551 template <class IntPtrT> 552 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 553 if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart), 554 StringRef(VNamesStart, VNamesEnd - VNamesStart))) 555 return error(std::move(E)); 556 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 557 const IntPtrT FPtr = swap(I->FunctionPointer); 558 if (!FPtr) 559 continue; 560 Symtab.mapAddress(FPtr, swap(I->NameRef)); 561 } 562 563 if (VTableBegin != nullptr && VTableEnd != nullptr) { 564 for (const RawInstrProf::VTableProfileData<IntPtrT> *I = VTableBegin; 565 I != VTableEnd; ++I) { 566 const IntPtrT VPtr = swap(I->VTablePointer); 567 if (!VPtr) 568 continue; 569 // Map both begin and end address to the name hash, since the instrumented 570 // address could be somewhere in the middle. 571 // VPtr is of type uint32_t or uint64_t so 'VPtr + I->VTableSize' marks 572 // the end of vtable address. 573 Symtab.mapVTableAddress(VPtr, VPtr + swap(I->VTableSize), 574 swap(I->VTableNameHash)); 575 } 576 } 577 return success(); 578 } 579 580 template <class IntPtrT> 581 Error RawInstrProfReader<IntPtrT>::readHeader( 582 const RawInstrProf::Header &Header) { 583 Version = swap(Header.Version); 584 if (GET_VERSION(Version) != RawInstrProf::Version) 585 return error(instrprof_error::raw_profile_version_mismatch, 586 ("Profile uses raw profile format version = " + 587 Twine(GET_VERSION(Version)) + 588 "; expected version = " + Twine(RawInstrProf::Version) + 589 "\nPLEASE update this tool to version in the raw profile, or " 590 "regenerate raw profile with expected version.") 591 .str()); 592 593 uint64_t BinaryIdSize = swap(Header.BinaryIdsSize); 594 // Binary id start just after the header if exists. 595 const uint8_t *BinaryIdStart = 596 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header); 597 const uint8_t *BinaryIdEnd = BinaryIdStart + BinaryIdSize; 598 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); 599 if (BinaryIdSize % sizeof(uint64_t) || BinaryIdEnd > BufferEnd) 600 return error(instrprof_error::bad_header); 601 ArrayRef<uint8_t> BinaryIdsBuffer(BinaryIdStart, BinaryIdSize); 602 if (!BinaryIdsBuffer.empty()) { 603 if (Error Err = readBinaryIdsInternal(*DataBuffer, BinaryIdsBuffer, 604 BinaryIds, getDataEndianness())) 605 return Err; 606 } 607 608 CountersDelta = swap(Header.CountersDelta); 609 BitmapDelta = swap(Header.BitmapDelta); 610 NamesDelta = swap(Header.NamesDelta); 611 auto NumData = swap(Header.NumData); 612 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 613 auto CountersSize = swap(Header.NumCounters) * getCounterTypeSize(); 614 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 615 auto NumBitmapBytes = swap(Header.NumBitmapBytes); 616 auto PaddingBytesAfterBitmapBytes = swap(Header.PaddingBytesAfterBitmapBytes); 617 auto NamesSize = swap(Header.NamesSize); 618 auto VTableNameSize = swap(Header.VNamesSize); 619 auto NumVTables = swap(Header.NumVTables); 620 ValueKindLast = swap(Header.ValueKindLast); 621 622 auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>); 623 auto PaddingBytesAfterNames = getNumPaddingBytes(NamesSize); 624 auto PaddingBytesAfterVTableNames = getNumPaddingBytes(VTableNameSize); 625 626 auto VTableSectionSize = 627 NumVTables * sizeof(RawInstrProf::VTableProfileData<IntPtrT>); 628 auto PaddingBytesAfterVTableProfData = getNumPaddingBytes(VTableSectionSize); 629 630 // Profile data starts after profile header and binary ids if exist. 631 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdSize; 632 ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters; 633 ptrdiff_t BitmapOffset = 634 CountersOffset + CountersSize + PaddingBytesAfterCounters; 635 ptrdiff_t NamesOffset = 636 BitmapOffset + NumBitmapBytes + PaddingBytesAfterBitmapBytes; 637 ptrdiff_t VTableProfDataOffset = 638 NamesOffset + NamesSize + PaddingBytesAfterNames; 639 ptrdiff_t VTableNameOffset = VTableProfDataOffset + VTableSectionSize + 640 PaddingBytesAfterVTableProfData; 641 ptrdiff_t ValueDataOffset = 642 VTableNameOffset + VTableNameSize + PaddingBytesAfterVTableNames; 643 644 auto *Start = reinterpret_cast<const char *>(&Header); 645 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 646 return error(instrprof_error::bad_header); 647 648 if (BIDFetcher) { 649 std::vector<object::BuildID> BinaryIDs; 650 if (Error E = readBinaryIds(BinaryIDs)) 651 return E; 652 if (auto E = InstrProfCorrelator::get("", BIDFetcherCorrelatorKind, 653 BIDFetcher, BinaryIDs) 654 .moveInto(BIDFetcherCorrelator)) { 655 return E; 656 } 657 if (auto Err = BIDFetcherCorrelator->correlateProfileData(0)) 658 return Err; 659 } 660 661 if (Correlator) { 662 // These sizes in the raw file are zero because we constructed them in the 663 // Correlator. 664 if (!(DataSize == 0 && NamesSize == 0 && CountersDelta == 0 && 665 NamesDelta == 0)) 666 return error(instrprof_error::unexpected_correlation_info); 667 Data = Correlator->getDataPointer(); 668 DataEnd = Data + Correlator->getDataSize(); 669 NamesStart = Correlator->getNamesPointer(); 670 NamesEnd = NamesStart + Correlator->getNamesSize(); 671 } else if (BIDFetcherCorrelator) { 672 InstrProfCorrelatorImpl<IntPtrT> *BIDFetcherCorrelatorImpl = 673 dyn_cast_or_null<InstrProfCorrelatorImpl<IntPtrT>>( 674 BIDFetcherCorrelator.get()); 675 Data = BIDFetcherCorrelatorImpl->getDataPointer(); 676 DataEnd = Data + BIDFetcherCorrelatorImpl->getDataSize(); 677 NamesStart = BIDFetcherCorrelatorImpl->getNamesPointer(); 678 NamesEnd = NamesStart + BIDFetcherCorrelatorImpl->getNamesSize(); 679 } else { 680 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 681 Start + DataOffset); 682 DataEnd = Data + NumData; 683 VTableBegin = 684 reinterpret_cast<const RawInstrProf::VTableProfileData<IntPtrT> *>( 685 Start + VTableProfDataOffset); 686 VTableEnd = VTableBegin + NumVTables; 687 NamesStart = Start + NamesOffset; 688 NamesEnd = NamesStart + NamesSize; 689 VNamesStart = Start + VTableNameOffset; 690 VNamesEnd = VNamesStart + VTableNameSize; 691 } 692 693 CountersStart = Start + CountersOffset; 694 CountersEnd = CountersStart + CountersSize; 695 BitmapStart = Start + BitmapOffset; 696 BitmapEnd = BitmapStart + NumBitmapBytes; 697 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 698 699 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 700 if (Error E = createSymtab(*NewSymtab)) 701 return E; 702 703 Symtab = std::move(NewSymtab); 704 return success(); 705 } 706 707 template <class IntPtrT> 708 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 709 Record.Name = getName(Data->NameRef); 710 return success(); 711 } 712 713 template <class IntPtrT> 714 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 715 Record.Hash = swap(Data->FuncHash); 716 return success(); 717 } 718 719 template <class IntPtrT> 720 Error RawInstrProfReader<IntPtrT>::readRawCounts( 721 InstrProfRecord &Record) { 722 uint32_t NumCounters = swap(Data->NumCounters); 723 if (NumCounters == 0) 724 return error(instrprof_error::malformed, "number of counters is zero"); 725 726 ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta; 727 if (CounterBaseOffset < 0) 728 return error( 729 instrprof_error::malformed, 730 ("counter offset " + Twine(CounterBaseOffset) + " is negative").str()); 731 732 if (CounterBaseOffset >= CountersEnd - CountersStart) 733 return error(instrprof_error::malformed, 734 ("counter offset " + Twine(CounterBaseOffset) + 735 " is greater than the maximum counter offset " + 736 Twine(CountersEnd - CountersStart - 1)) 737 .str()); 738 739 uint64_t MaxNumCounters = 740 (CountersEnd - (CountersStart + CounterBaseOffset)) / 741 getCounterTypeSize(); 742 if (NumCounters > MaxNumCounters) 743 return error(instrprof_error::malformed, 744 ("number of counters " + Twine(NumCounters) + 745 " is greater than the maximum number of counters " + 746 Twine(MaxNumCounters)) 747 .str()); 748 749 Record.Counts.clear(); 750 Record.Counts.reserve(NumCounters); 751 for (uint32_t I = 0; I < NumCounters; I++) { 752 const char *Ptr = 753 CountersStart + CounterBaseOffset + I * getCounterTypeSize(); 754 if (I == 0 && hasTemporalProfile()) { 755 uint64_t TimestampValue = swap(*reinterpret_cast<const uint64_t *>(Ptr)); 756 if (TimestampValue != 0 && 757 TimestampValue != std::numeric_limits<uint64_t>::max()) { 758 TemporalProfTimestamps.emplace_back(TimestampValue, 759 swap(Data->NameRef)); 760 TemporalProfTraceStreamSize = 1; 761 } 762 if (hasSingleByteCoverage()) { 763 // In coverage mode, getCounterTypeSize() returns 1 byte but our 764 // timestamp field has size uint64_t. Increment I so that the next 765 // iteration of this for loop points to the byte after the timestamp 766 // field, i.e., I += 8. 767 I += 7; 768 } 769 continue; 770 } 771 if (hasSingleByteCoverage()) { 772 // A value of zero signifies the block is covered. 773 Record.Counts.push_back(*Ptr == 0 ? 1 : 0); 774 } else { 775 uint64_t CounterValue = swap(*reinterpret_cast<const uint64_t *>(Ptr)); 776 if (CounterValue > MaxCounterValue && Warn) 777 Warn(make_error<InstrProfError>( 778 instrprof_error::counter_value_too_large, Twine(CounterValue))); 779 780 Record.Counts.push_back(CounterValue); 781 } 782 } 783 784 return success(); 785 } 786 787 template <class IntPtrT> 788 Error RawInstrProfReader<IntPtrT>::readRawBitmapBytes(InstrProfRecord &Record) { 789 uint32_t NumBitmapBytes = swap(Data->NumBitmapBytes); 790 791 Record.BitmapBytes.clear(); 792 Record.BitmapBytes.reserve(NumBitmapBytes); 793 794 // It's possible MCDC is either not enabled or only used for some functions 795 // and not others. So if we record 0 bytes, just move on. 796 if (NumBitmapBytes == 0) 797 return success(); 798 799 // BitmapDelta decreases as we advance to the next data record. 800 ptrdiff_t BitmapOffset = swap(Data->BitmapPtr) - BitmapDelta; 801 if (BitmapOffset < 0) 802 return error( 803 instrprof_error::malformed, 804 ("bitmap offset " + Twine(BitmapOffset) + " is negative").str()); 805 806 if (BitmapOffset >= BitmapEnd - BitmapStart) 807 return error(instrprof_error::malformed, 808 ("bitmap offset " + Twine(BitmapOffset) + 809 " is greater than the maximum bitmap offset " + 810 Twine(BitmapEnd - BitmapStart - 1)) 811 .str()); 812 813 uint64_t MaxNumBitmapBytes = 814 (BitmapEnd - (BitmapStart + BitmapOffset)) / sizeof(uint8_t); 815 if (NumBitmapBytes > MaxNumBitmapBytes) 816 return error(instrprof_error::malformed, 817 ("number of bitmap bytes " + Twine(NumBitmapBytes) + 818 " is greater than the maximum number of bitmap bytes " + 819 Twine(MaxNumBitmapBytes)) 820 .str()); 821 822 for (uint32_t I = 0; I < NumBitmapBytes; I++) { 823 const char *Ptr = BitmapStart + BitmapOffset + I; 824 Record.BitmapBytes.push_back(swap(*Ptr)); 825 } 826 827 return success(); 828 } 829 830 template <class IntPtrT> 831 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 832 InstrProfRecord &Record) { 833 Record.clearValueData(); 834 CurValueDataSize = 0; 835 // Need to match the logic in value profile dumper code in compiler-rt: 836 uint32_t NumValueKinds = 0; 837 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 838 NumValueKinds += (Data->NumValueSites[I] != 0); 839 840 if (!NumValueKinds) 841 return success(); 842 843 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 844 ValueProfData::getValueProfData( 845 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 846 getDataEndianness()); 847 848 if (Error E = VDataPtrOrErr.takeError()) 849 return E; 850 851 // Note that besides deserialization, this also performs the conversion for 852 // indirect call targets. The function pointers from the raw profile are 853 // remapped into function name hashes. 854 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 855 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 856 return success(); 857 } 858 859 template <class IntPtrT> 860 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 861 // Keep reading profiles that consist of only headers and no profile data and 862 // counters. 863 while (atEnd()) 864 // At this point, ValueDataStart field points to the next header. 865 if (Error E = readNextHeader(getNextHeaderPos())) 866 return error(std::move(E)); 867 868 // Read name and set it in Record. 869 if (Error E = readName(Record)) 870 return error(std::move(E)); 871 872 // Read FuncHash and set it in Record. 873 if (Error E = readFuncHash(Record)) 874 return error(std::move(E)); 875 876 // Read raw counts and set Record. 877 if (Error E = readRawCounts(Record)) 878 return error(std::move(E)); 879 880 // Read raw bitmap bytes and set Record. 881 if (Error E = readRawBitmapBytes(Record)) 882 return error(std::move(E)); 883 884 // Read value data and set Record. 885 if (Error E = readValueProfilingData(Record)) 886 return error(std::move(E)); 887 888 // Iterate. 889 advanceData(); 890 return success(); 891 } 892 893 template <class IntPtrT> 894 Error RawInstrProfReader<IntPtrT>::readBinaryIds( 895 std::vector<llvm::object::BuildID> &BinaryIds) { 896 BinaryIds.insert(BinaryIds.begin(), this->BinaryIds.begin(), 897 this->BinaryIds.end()); 898 return Error::success(); 899 } 900 901 template <class IntPtrT> 902 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) { 903 if (!BinaryIds.empty()) 904 printBinaryIdsInternal(OS, BinaryIds); 905 return Error::success(); 906 } 907 908 namespace llvm { 909 910 template class RawInstrProfReader<uint32_t>; 911 template class RawInstrProfReader<uint64_t>; 912 913 } // end namespace llvm 914 915 InstrProfLookupTrait::hash_value_type 916 InstrProfLookupTrait::ComputeHash(StringRef K) { 917 return IndexedInstrProf::ComputeHash(HashType, K); 918 } 919 920 using data_type = InstrProfLookupTrait::data_type; 921 using offset_type = InstrProfLookupTrait::offset_type; 922 923 bool InstrProfLookupTrait::readValueProfilingData( 924 const unsigned char *&D, const unsigned char *const End) { 925 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 926 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 927 928 if (VDataPtrOrErr.takeError()) 929 return false; 930 931 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 932 D += VDataPtrOrErr.get()->TotalSize; 933 934 return true; 935 } 936 937 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 938 offset_type N) { 939 using namespace support; 940 941 // Check if the data is corrupt. If so, don't try to read it. 942 if (N % sizeof(uint64_t)) 943 return data_type(); 944 945 DataBuffer.clear(); 946 std::vector<uint64_t> CounterBuffer; 947 std::vector<uint8_t> BitmapByteBuffer; 948 949 const unsigned char *End = D + N; 950 while (D < End) { 951 // Read hash. 952 if (D + sizeof(uint64_t) >= End) 953 return data_type(); 954 uint64_t Hash = endian::readNext<uint64_t, llvm::endianness::little>(D); 955 956 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 957 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 958 // If format version is different then read the number of counters. 959 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 960 if (D + sizeof(uint64_t) > End) 961 return data_type(); 962 CountsSize = endian::readNext<uint64_t, llvm::endianness::little>(D); 963 } 964 // Read counter values. 965 if (D + CountsSize * sizeof(uint64_t) > End) 966 return data_type(); 967 968 CounterBuffer.clear(); 969 CounterBuffer.reserve(CountsSize); 970 for (uint64_t J = 0; J < CountsSize; ++J) 971 CounterBuffer.push_back( 972 endian::readNext<uint64_t, llvm::endianness::little>(D)); 973 974 // Read bitmap bytes for GET_VERSION(FormatVersion) > 10. 975 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version10) { 976 uint64_t BitmapBytes = 0; 977 if (D + sizeof(uint64_t) > End) 978 return data_type(); 979 BitmapBytes = endian::readNext<uint64_t, llvm::endianness::little>(D); 980 // Read bitmap byte values. 981 if (D + BitmapBytes * sizeof(uint8_t) > End) 982 return data_type(); 983 BitmapByteBuffer.clear(); 984 BitmapByteBuffer.reserve(BitmapBytes); 985 for (uint64_t J = 0; J < BitmapBytes; ++J) 986 BitmapByteBuffer.push_back(static_cast<uint8_t>( 987 endian::readNext<uint64_t, llvm::endianness::little>(D))); 988 } 989 990 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer), 991 std::move(BitmapByteBuffer)); 992 993 // Read value profiling data. 994 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 995 !readValueProfilingData(D, End)) { 996 DataBuffer.clear(); 997 return data_type(); 998 } 999 } 1000 return DataBuffer; 1001 } 1002 1003 template <typename HashTableImpl> 1004 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 1005 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 1006 auto Iter = HashTable->find(FuncName); 1007 if (Iter == HashTable->end()) 1008 return make_error<InstrProfError>(instrprof_error::unknown_function); 1009 1010 Data = (*Iter); 1011 if (Data.empty()) 1012 return make_error<InstrProfError>(instrprof_error::malformed, 1013 "profile data is empty"); 1014 1015 return Error::success(); 1016 } 1017 1018 template <typename HashTableImpl> 1019 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 1020 ArrayRef<NamedInstrProfRecord> &Data) { 1021 if (atEnd()) 1022 return make_error<InstrProfError>(instrprof_error::eof); 1023 1024 Data = *RecordIterator; 1025 1026 if (Data.empty()) 1027 return make_error<InstrProfError>(instrprof_error::malformed, 1028 "profile data is empty"); 1029 1030 return Error::success(); 1031 } 1032 1033 template <typename HashTableImpl> 1034 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 1035 const unsigned char *Buckets, const unsigned char *const Payload, 1036 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 1037 uint64_t Version) { 1038 FormatVersion = Version; 1039 HashTable.reset(HashTableImpl::Create( 1040 Buckets, Payload, Base, 1041 typename HashTableImpl::InfoType(HashType, Version))); 1042 RecordIterator = HashTable->data_begin(); 1043 } 1044 1045 template <typename HashTableImpl> 1046 InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const { 1047 return getProfileKindFromVersion(FormatVersion); 1048 } 1049 1050 namespace { 1051 /// A remapper that does not apply any remappings. 1052 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 1053 InstrProfReaderIndexBase &Underlying; 1054 1055 public: 1056 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 1057 : Underlying(Underlying) {} 1058 1059 Error getRecords(StringRef FuncName, 1060 ArrayRef<NamedInstrProfRecord> &Data) override { 1061 return Underlying.getRecords(FuncName, Data); 1062 } 1063 }; 1064 } // namespace 1065 1066 /// A remapper that applies remappings based on a symbol remapping file. 1067 template <typename HashTableImpl> 1068 class llvm::InstrProfReaderItaniumRemapper 1069 : public InstrProfReaderRemapper { 1070 public: 1071 InstrProfReaderItaniumRemapper( 1072 std::unique_ptr<MemoryBuffer> RemapBuffer, 1073 InstrProfReaderIndex<HashTableImpl> &Underlying) 1074 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 1075 } 1076 1077 /// Extract the original function name from a PGO function name. 1078 static StringRef extractName(StringRef Name) { 1079 // We can have multiple pieces separated by kGlobalIdentifierDelimiter ( 1080 // semicolon now and colon in older profiles); there can be pieces both 1081 // before and after the mangled name. Find the first part that starts with 1082 // '_Z'; we'll assume that's the mangled name we want. 1083 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 1084 while (true) { 1085 Parts = Parts.second.split(GlobalIdentifierDelimiter); 1086 if (Parts.first.starts_with("_Z")) 1087 return Parts.first; 1088 if (Parts.second.empty()) 1089 return Name; 1090 } 1091 } 1092 1093 /// Given a mangled name extracted from a PGO function name, and a new 1094 /// form for that mangled name, reconstitute the name. 1095 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 1096 StringRef Replacement, 1097 SmallVectorImpl<char> &Out) { 1098 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 1099 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 1100 llvm::append_range(Out, Replacement); 1101 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 1102 } 1103 1104 Error populateRemappings() override { 1105 if (Error E = Remappings.read(*RemapBuffer)) 1106 return E; 1107 for (StringRef Name : Underlying.HashTable->keys()) { 1108 StringRef RealName = extractName(Name); 1109 if (auto Key = Remappings.insert(RealName)) { 1110 // FIXME: We could theoretically map the same equivalence class to 1111 // multiple names in the profile data. If that happens, we should 1112 // return NamedInstrProfRecords from all of them. 1113 MappedNames.insert({Key, RealName}); 1114 } 1115 } 1116 return Error::success(); 1117 } 1118 1119 Error getRecords(StringRef FuncName, 1120 ArrayRef<NamedInstrProfRecord> &Data) override { 1121 StringRef RealName = extractName(FuncName); 1122 if (auto Key = Remappings.lookup(RealName)) { 1123 StringRef Remapped = MappedNames.lookup(Key); 1124 if (!Remapped.empty()) { 1125 if (RealName.begin() == FuncName.begin() && 1126 RealName.end() == FuncName.end()) 1127 FuncName = Remapped; 1128 else { 1129 // Try rebuilding the name from the given remapping. 1130 SmallString<256> Reconstituted; 1131 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 1132 Error E = Underlying.getRecords(Reconstituted, Data); 1133 if (!E) 1134 return E; 1135 1136 // If we failed because the name doesn't exist, fall back to asking 1137 // about the original name. 1138 if (Error Unhandled = handleErrors( 1139 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 1140 return Err->get() == instrprof_error::unknown_function 1141 ? Error::success() 1142 : Error(std::move(Err)); 1143 })) 1144 return Unhandled; 1145 } 1146 } 1147 } 1148 return Underlying.getRecords(FuncName, Data); 1149 } 1150 1151 private: 1152 /// The memory buffer containing the remapping configuration. Remappings 1153 /// holds pointers into this buffer. 1154 std::unique_ptr<MemoryBuffer> RemapBuffer; 1155 1156 /// The mangling remapper. 1157 SymbolRemappingReader Remappings; 1158 1159 /// Mapping from mangled name keys to the name used for the key in the 1160 /// profile data. 1161 /// FIXME: Can we store a location within the on-disk hash table instead of 1162 /// redoing lookup? 1163 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 1164 1165 /// The real profile data reader. 1166 InstrProfReaderIndex<HashTableImpl> &Underlying; 1167 }; 1168 1169 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 1170 using namespace support; 1171 1172 if (DataBuffer.getBufferSize() < 8) 1173 return false; 1174 uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>( 1175 DataBuffer.getBufferStart()); 1176 // Verify that it's magical. 1177 return Magic == IndexedInstrProf::Magic; 1178 } 1179 1180 const unsigned char * 1181 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 1182 const unsigned char *Cur, bool UseCS) { 1183 using namespace IndexedInstrProf; 1184 using namespace support; 1185 1186 if (Version >= IndexedInstrProf::Version4) { 1187 const IndexedInstrProf::Summary *SummaryInLE = 1188 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 1189 uint64_t NFields = endian::byte_swap<uint64_t, llvm::endianness::little>( 1190 SummaryInLE->NumSummaryFields); 1191 uint64_t NEntries = endian::byte_swap<uint64_t, llvm::endianness::little>( 1192 SummaryInLE->NumCutoffEntries); 1193 uint32_t SummarySize = 1194 IndexedInstrProf::Summary::getSize(NFields, NEntries); 1195 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 1196 IndexedInstrProf::allocSummary(SummarySize); 1197 1198 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 1199 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 1200 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 1201 Dst[I] = endian::byte_swap<uint64_t, llvm::endianness::little>(Src[I]); 1202 1203 SummaryEntryVector DetailedSummary; 1204 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 1205 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 1206 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 1207 Ent.NumBlocks); 1208 } 1209 std::unique_ptr<llvm::ProfileSummary> &Summary = 1210 UseCS ? this->CS_Summary : this->Summary; 1211 1212 // initialize InstrProfSummary using the SummaryData from disk. 1213 Summary = std::make_unique<ProfileSummary>( 1214 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 1215 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 1216 SummaryData->get(Summary::MaxBlockCount), 1217 SummaryData->get(Summary::MaxInternalBlockCount), 1218 SummaryData->get(Summary::MaxFunctionCount), 1219 SummaryData->get(Summary::TotalNumBlocks), 1220 SummaryData->get(Summary::TotalNumFunctions)); 1221 return Cur + SummarySize; 1222 } else { 1223 // The older versions do not support a profile summary. This just computes 1224 // an empty summary, which will not result in accurate hot/cold detection. 1225 // We would need to call addRecord for all NamedInstrProfRecords to get the 1226 // correct summary. However, this version is old (prior to early 2016) and 1227 // has not been supporting an accurate summary for several years. 1228 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1229 Summary = Builder.getSummary(); 1230 return Cur; 1231 } 1232 } 1233 1234 Error IndexedInstrProfReader::readHeader() { 1235 using namespace support; 1236 1237 const unsigned char *Start = 1238 (const unsigned char *)DataBuffer->getBufferStart(); 1239 const unsigned char *Cur = Start; 1240 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 1241 return error(instrprof_error::truncated); 1242 1243 auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start); 1244 if (!HeaderOr) 1245 return HeaderOr.takeError(); 1246 1247 const IndexedInstrProf::Header *Header = &HeaderOr.get(); 1248 Cur += Header->size(); 1249 1250 Cur = readSummary((IndexedInstrProf::ProfVersion)Header->Version, Cur, 1251 /* UseCS */ false); 1252 if (Header->Version & VARIANT_MASK_CSIR_PROF) 1253 Cur = readSummary((IndexedInstrProf::ProfVersion)Header->Version, Cur, 1254 /* UseCS */ true); 1255 // Read the hash type and start offset. 1256 IndexedInstrProf::HashT HashType = 1257 static_cast<IndexedInstrProf::HashT>(Header->HashType); 1258 if (HashType > IndexedInstrProf::HashT::Last) 1259 return error(instrprof_error::unsupported_hash_type); 1260 1261 // The hash table with profile counts comes next. 1262 auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 1263 Start + Header->HashOffset, Cur, Start, HashType, Header->Version); 1264 1265 // The MemProfOffset field in the header is only valid when the format 1266 // version is higher than 8 (when it was introduced). 1267 if (Header->getIndexedProfileVersion() >= 8 && 1268 Header->Version & VARIANT_MASK_MEMPROF) { 1269 if (Error E = MemProfReader.deserialize(Start, Header->MemProfOffset)) 1270 return E; 1271 } 1272 1273 // BinaryIdOffset field in the header is only valid when the format version 1274 // is higher than 9 (when it was introduced). 1275 if (Header->getIndexedProfileVersion() >= 9) { 1276 const unsigned char *Ptr = Start + Header->BinaryIdOffset; 1277 // Read binary ids size. 1278 uint64_t BinaryIdsSize = 1279 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 1280 if (BinaryIdsSize % sizeof(uint64_t)) 1281 return error(instrprof_error::bad_header); 1282 // Set the binary ids start. 1283 BinaryIdsBuffer = ArrayRef<uint8_t>(Ptr, BinaryIdsSize); 1284 if (Ptr > (const unsigned char *)DataBuffer->getBufferEnd()) 1285 return make_error<InstrProfError>(instrprof_error::malformed, 1286 "corrupted binary ids"); 1287 } 1288 1289 if (Header->getIndexedProfileVersion() >= 12) { 1290 const unsigned char *Ptr = Start + Header->VTableNamesOffset; 1291 1292 uint64_t CompressedVTableNamesLen = 1293 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 1294 1295 // Writer first writes the length of compressed string, and then the actual 1296 // content. 1297 const char *VTableNamePtr = (const char *)Ptr; 1298 if (VTableNamePtr > (const char *)DataBuffer->getBufferEnd()) 1299 return make_error<InstrProfError>(instrprof_error::truncated); 1300 1301 VTableName = StringRef(VTableNamePtr, CompressedVTableNamesLen); 1302 } 1303 1304 if (Header->getIndexedProfileVersion() >= 10 && 1305 Header->Version & VARIANT_MASK_TEMPORAL_PROF) { 1306 const unsigned char *Ptr = Start + Header->TemporalProfTracesOffset; 1307 const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd(); 1308 // Expect at least two 64 bit fields: NumTraces, and TraceStreamSize 1309 if (Ptr + 2 * sizeof(uint64_t) > PtrEnd) 1310 return error(instrprof_error::truncated); 1311 const uint64_t NumTraces = 1312 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 1313 TemporalProfTraceStreamSize = 1314 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 1315 for (unsigned i = 0; i < NumTraces; i++) { 1316 // Expect at least two 64 bit fields: Weight and NumFunctions 1317 if (Ptr + 2 * sizeof(uint64_t) > PtrEnd) 1318 return error(instrprof_error::truncated); 1319 TemporalProfTraceTy Trace; 1320 Trace.Weight = 1321 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 1322 const uint64_t NumFunctions = 1323 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 1324 // Expect at least NumFunctions 64 bit fields 1325 if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd) 1326 return error(instrprof_error::truncated); 1327 for (unsigned j = 0; j < NumFunctions; j++) { 1328 const uint64_t NameRef = 1329 support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr); 1330 Trace.FunctionNameRefs.push_back(NameRef); 1331 } 1332 TemporalProfTraces.push_back(std::move(Trace)); 1333 } 1334 } 1335 1336 // Load the remapping table now if requested. 1337 if (RemappingBuffer) { 1338 Remapper = 1339 std::make_unique<InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 1340 std::move(RemappingBuffer), *IndexPtr); 1341 if (Error E = Remapper->populateRemappings()) 1342 return E; 1343 } else { 1344 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 1345 } 1346 Index = std::move(IndexPtr); 1347 1348 return success(); 1349 } 1350 1351 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 1352 if (Symtab) 1353 return *Symtab; 1354 1355 auto NewSymtab = std::make_unique<InstrProfSymtab>(); 1356 1357 if (Error E = NewSymtab->initVTableNamesFromCompressedStrings(VTableName)) { 1358 auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); 1359 consumeError(error(ErrCode, Msg)); 1360 } 1361 1362 // finalizeSymtab is called inside populateSymtab. 1363 if (Error E = Index->populateSymtab(*NewSymtab)) { 1364 auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); 1365 consumeError(error(ErrCode, Msg)); 1366 } 1367 1368 Symtab = std::move(NewSymtab); 1369 return *Symtab; 1370 } 1371 1372 Expected<NamedInstrProfRecord> IndexedInstrProfReader::getInstrProfRecord( 1373 StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName, 1374 uint64_t *MismatchedFuncSum) { 1375 ArrayRef<NamedInstrProfRecord> Data; 1376 uint64_t FuncSum = 0; 1377 auto Err = Remapper->getRecords(FuncName, Data); 1378 if (Err) { 1379 // If we don't find FuncName, try DeprecatedFuncName to handle profiles 1380 // built by older compilers. 1381 auto Err2 = 1382 handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error { 1383 if (IE.get() != instrprof_error::unknown_function) 1384 return make_error<InstrProfError>(IE); 1385 if (auto Err = Remapper->getRecords(DeprecatedFuncName, Data)) 1386 return Err; 1387 return Error::success(); 1388 }); 1389 if (Err2) 1390 return std::move(Err2); 1391 } 1392 // Found it. Look for counters with the right hash. 1393 1394 // A flag to indicate if the records are from the same type 1395 // of profile (i.e cs vs nocs). 1396 bool CSBitMatch = false; 1397 auto getFuncSum = [](ArrayRef<uint64_t> Counts) { 1398 uint64_t ValueSum = 0; 1399 for (uint64_t CountValue : Counts) { 1400 if (CountValue == (uint64_t)-1) 1401 continue; 1402 // Handle overflow -- if that happens, return max. 1403 if (std::numeric_limits<uint64_t>::max() - CountValue <= ValueSum) 1404 return std::numeric_limits<uint64_t>::max(); 1405 ValueSum += CountValue; 1406 } 1407 return ValueSum; 1408 }; 1409 1410 for (const NamedInstrProfRecord &I : Data) { 1411 // Check for a match and fill the vector if there is one. 1412 if (I.Hash == FuncHash) 1413 return std::move(I); 1414 if (NamedInstrProfRecord::hasCSFlagInHash(I.Hash) == 1415 NamedInstrProfRecord::hasCSFlagInHash(FuncHash)) { 1416 CSBitMatch = true; 1417 if (MismatchedFuncSum == nullptr) 1418 continue; 1419 FuncSum = std::max(FuncSum, getFuncSum(I.Counts)); 1420 } 1421 } 1422 if (CSBitMatch) { 1423 if (MismatchedFuncSum != nullptr) 1424 *MismatchedFuncSum = FuncSum; 1425 return error(instrprof_error::hash_mismatch); 1426 } 1427 return error(instrprof_error::unknown_function); 1428 } 1429 1430 static Expected<memprof::MemProfRecord> 1431 getMemProfRecordV2(const memprof::IndexedMemProfRecord &IndexedRecord, 1432 MemProfFrameHashTable &MemProfFrameTable, 1433 MemProfCallStackHashTable &MemProfCallStackTable) { 1434 memprof::FrameIdConverter<MemProfFrameHashTable> FrameIdConv( 1435 MemProfFrameTable); 1436 1437 memprof::CallStackIdConverter<MemProfCallStackHashTable> CSIdConv( 1438 MemProfCallStackTable, FrameIdConv); 1439 1440 memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv); 1441 1442 // Check that all call stack ids were successfully converted to call stacks. 1443 if (CSIdConv.LastUnmappedId) { 1444 return make_error<InstrProfError>( 1445 instrprof_error::hash_mismatch, 1446 "memprof call stack not found for call stack id " + 1447 Twine(*CSIdConv.LastUnmappedId)); 1448 } 1449 1450 // Check that all frame ids were successfully converted to frames. 1451 if (FrameIdConv.LastUnmappedId) { 1452 return make_error<InstrProfError>(instrprof_error::hash_mismatch, 1453 "memprof frame not found for frame id " + 1454 Twine(*FrameIdConv.LastUnmappedId)); 1455 } 1456 1457 return Record; 1458 } 1459 1460 Expected<memprof::MemProfRecord> 1461 IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const { 1462 // TODO: Add memprof specific errors. 1463 if (MemProfRecordTable == nullptr) 1464 return make_error<InstrProfError>(instrprof_error::invalid_prof, 1465 "no memprof data available in profile"); 1466 auto Iter = MemProfRecordTable->find(FuncNameHash); 1467 if (Iter == MemProfRecordTable->end()) 1468 return make_error<InstrProfError>( 1469 instrprof_error::unknown_function, 1470 "memprof record not found for function hash " + Twine(FuncNameHash)); 1471 1472 const memprof::IndexedMemProfRecord &IndexedRecord = *Iter; 1473 switch (Version) { 1474 case memprof::Version2: 1475 assert(MemProfFrameTable && "MemProfFrameTable must be available"); 1476 assert(MemProfCallStackTable && "MemProfCallStackTable must be available"); 1477 return getMemProfRecordV2(IndexedRecord, *MemProfFrameTable, 1478 *MemProfCallStackTable); 1479 // Combine V3 and V4 cases as the record conversion logic is the same. 1480 case memprof::Version3: 1481 case memprof::Version4: 1482 assert(!MemProfFrameTable && "MemProfFrameTable must not be available"); 1483 assert(!MemProfCallStackTable && 1484 "MemProfCallStackTable must not be available"); 1485 assert(FrameBase && "FrameBase must be available"); 1486 assert(CallStackBase && "CallStackBase must be available"); 1487 { 1488 memprof::LinearFrameIdConverter FrameIdConv(FrameBase); 1489 memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv); 1490 memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv); 1491 return Record; 1492 } 1493 } 1494 1495 return make_error<InstrProfError>( 1496 instrprof_error::unsupported_version, 1497 formatv("MemProf version {} not supported; " 1498 "requires version between {} and {}, inclusive", 1499 Version, memprof::MinimumSupportedVersion, 1500 memprof::MaximumSupportedVersion)); 1501 } 1502 1503 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> 1504 IndexedMemProfReader::getMemProfCallerCalleePairs() const { 1505 assert(MemProfRecordTable); 1506 assert(Version == memprof::Version3 || Version == memprof::Version4); 1507 1508 memprof::LinearFrameIdConverter FrameIdConv(FrameBase); 1509 memprof::CallerCalleePairExtractor Extractor(CallStackBase, FrameIdConv, 1510 RadixTreeSize); 1511 1512 // The set of linear call stack IDs that we need to traverse from. We expect 1513 // the set to be dense, so we use a BitVector. 1514 BitVector Worklist(RadixTreeSize); 1515 1516 // Collect the set of linear call stack IDs. Since we expect a lot of 1517 // duplicates, we first collect them in the form of a bit vector before 1518 // processing them. 1519 for (const memprof::IndexedMemProfRecord &IndexedRecord : 1520 MemProfRecordTable->data()) { 1521 for (const memprof::IndexedAllocationInfo &IndexedAI : 1522 IndexedRecord.AllocSites) 1523 Worklist.set(IndexedAI.CSId); 1524 } 1525 1526 // Collect caller-callee pairs for each linear call stack ID in Worklist. 1527 for (unsigned CS : Worklist.set_bits()) 1528 Extractor(CS); 1529 1530 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> Pairs = 1531 std::move(Extractor.CallerCalleePairs); 1532 1533 // Sort each call list by the source location. 1534 for (auto &[CallerGUID, CallList] : Pairs) { 1535 llvm::sort(CallList); 1536 CallList.erase(llvm::unique(CallList), CallList.end()); 1537 } 1538 1539 return Pairs; 1540 } 1541 1542 memprof::AllMemProfData IndexedMemProfReader::getAllMemProfData() const { 1543 memprof::AllMemProfData AllMemProfData; 1544 AllMemProfData.HeapProfileRecords.reserve( 1545 MemProfRecordTable->getNumEntries()); 1546 for (uint64_t Key : MemProfRecordTable->keys()) { 1547 auto Record = getMemProfRecord(Key); 1548 if (Record.takeError()) 1549 continue; 1550 memprof::GUIDMemProfRecordPair Pair; 1551 Pair.GUID = Key; 1552 Pair.Record = std::move(*Record); 1553 AllMemProfData.HeapProfileRecords.push_back(std::move(Pair)); 1554 } 1555 // Populate the data access profiles for yaml output. 1556 if (DataAccessProfileData != nullptr) { 1557 for (const auto &[SymHandleRef, RecordRef] : 1558 DataAccessProfileData->getRecords()) 1559 AllMemProfData.YamlifiedDataAccessProfiles.Records.push_back( 1560 memprof::DataAccessProfRecord(SymHandleRef, RecordRef.AccessCount, 1561 RecordRef.Locations)); 1562 for (StringRef ColdSymbol : DataAccessProfileData->getKnownColdSymbols()) 1563 AllMemProfData.YamlifiedDataAccessProfiles.KnownColdSymbols.push_back( 1564 ColdSymbol.str()); 1565 for (uint64_t Hash : DataAccessProfileData->getKnownColdHashes()) 1566 AllMemProfData.YamlifiedDataAccessProfiles.KnownColdStrHashes.push_back( 1567 Hash); 1568 } 1569 return AllMemProfData; 1570 } 1571 1572 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 1573 uint64_t FuncHash, 1574 std::vector<uint64_t> &Counts) { 1575 auto Record = getInstrProfRecord(FuncName, FuncHash); 1576 if (Error E = Record.takeError()) 1577 return error(std::move(E)); 1578 1579 Counts = Record.get().Counts; 1580 return success(); 1581 } 1582 1583 Error IndexedInstrProfReader::getFunctionBitmap(StringRef FuncName, 1584 uint64_t FuncHash, 1585 BitVector &Bitmap) { 1586 auto Record = getInstrProfRecord(FuncName, FuncHash); 1587 if (Error E = Record.takeError()) 1588 return error(std::move(E)); 1589 1590 const auto &BitmapBytes = Record.get().BitmapBytes; 1591 size_t I = 0, E = BitmapBytes.size(); 1592 Bitmap.resize(E * CHAR_BIT); 1593 BitVector::apply( 1594 [&](auto X) { 1595 using XTy = decltype(X); 1596 alignas(XTy) uint8_t W[sizeof(X)]; 1597 size_t N = std::min(E - I, sizeof(W)); 1598 std::memset(W, 0, sizeof(W)); 1599 std::memcpy(W, &BitmapBytes[I], N); 1600 I += N; 1601 return support::endian::read<XTy, llvm::endianness::little, 1602 support::aligned>(W); 1603 }, 1604 Bitmap, Bitmap); 1605 assert(I == E); 1606 1607 return success(); 1608 } 1609 1610 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 1611 ArrayRef<NamedInstrProfRecord> Data; 1612 1613 Error E = Index->getRecords(Data); 1614 if (E) 1615 return error(std::move(E)); 1616 1617 Record = Data[RecordIndex++]; 1618 if (RecordIndex >= Data.size()) { 1619 Index->advanceToNextKey(); 1620 RecordIndex = 0; 1621 } 1622 return success(); 1623 } 1624 1625 Error IndexedInstrProfReader::readBinaryIds( 1626 std::vector<llvm::object::BuildID> &BinaryIds) { 1627 return readBinaryIdsInternal(*DataBuffer, BinaryIdsBuffer, BinaryIds, 1628 llvm::endianness::little); 1629 } 1630 1631 Error IndexedInstrProfReader::printBinaryIds(raw_ostream &OS) { 1632 std::vector<llvm::object::BuildID> BinaryIds; 1633 if (Error E = readBinaryIds(BinaryIds)) 1634 return E; 1635 printBinaryIdsInternal(OS, BinaryIds); 1636 return Error::success(); 1637 } 1638 1639 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 1640 uint64_t NumFuncs = 0; 1641 for (const auto &Func : *this) { 1642 if (isIRLevelProfile()) { 1643 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 1644 if (FuncIsCS != IsCS) 1645 continue; 1646 } 1647 Func.accumulateCounts(Sum); 1648 ++NumFuncs; 1649 } 1650 Sum.NumEntries = NumFuncs; 1651 } 1652