1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/IR/ProfileSummary.h" 20 #include "llvm/ProfileData/InstrProf.h" 21 #include "llvm/ProfileData/MemProf.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/ProfileData/SymbolRemappingReader.h" 24 #include "llvm/Support/Endian.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/ErrorOr.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/SwapByteOrder.h" 29 #include "llvm/Support/VirtualFileSystem.h" 30 #include <algorithm> 31 #include <cstddef> 32 #include <cstdint> 33 #include <limits> 34 #include <memory> 35 #include <system_error> 36 #include <utility> 37 #include <vector> 38 39 using namespace llvm; 40 41 // Extracts the variant information from the top 32 bits in the version and 42 // returns an enum specifying the variants present. 43 static InstrProfKind getProfileKindFromVersion(uint64_t Version) { 44 InstrProfKind ProfileKind = InstrProfKind::Unknown; 45 if (Version & VARIANT_MASK_IR_PROF) { 46 ProfileKind |= InstrProfKind::IRInstrumentation; 47 } 48 if (Version & VARIANT_MASK_CSIR_PROF) { 49 ProfileKind |= InstrProfKind::ContextSensitive; 50 } 51 if (Version & VARIANT_MASK_INSTR_ENTRY) { 52 ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; 53 } 54 if (Version & VARIANT_MASK_BYTE_COVERAGE) { 55 ProfileKind |= InstrProfKind::SingleByteCoverage; 56 } 57 if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) { 58 ProfileKind |= InstrProfKind::FunctionEntryOnly; 59 } 60 if (Version & VARIANT_MASK_MEMPROF) { 61 ProfileKind |= InstrProfKind::MemProf; 62 } 63 if (Version & VARIANT_MASK_TEMPORAL_PROF) { 64 ProfileKind |= InstrProfKind::TemporalProfile; 65 } 66 return ProfileKind; 67 } 68 69 static Expected<std::unique_ptr<MemoryBuffer>> 70 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { 71 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() 72 : FS.getBufferForFile(Filename); 73 if (std::error_code EC = BufferOrErr.getError()) 74 return errorCodeToError(EC); 75 return std::move(BufferOrErr.get()); 76 } 77 78 static Error initializeReader(InstrProfReader &Reader) { 79 return Reader.readHeader(); 80 } 81 82 /// Read a list of binary ids from a profile that consist of 83 /// a. uint64_t binary id length 84 /// b. uint8_t binary id data 85 /// c. uint8_t padding (if necessary) 86 /// This function is shared between raw and indexed profiles. 87 /// Raw profiles are in host-endian format, and indexed profiles are in 88 /// little-endian format. So, this function takes an argument indicating the 89 /// associated endian format to read the binary ids correctly. 90 static Error 91 readBinaryIdsInternal(const MemoryBuffer &DataBuffer, 92 const uint64_t BinaryIdsSize, 93 const uint8_t *BinaryIdsStart, 94 std::vector<llvm::object::BuildID> &BinaryIds, 95 const llvm::endianness Endian) { 96 using namespace support; 97 98 if (BinaryIdsSize == 0) 99 return Error::success(); 100 101 const uint8_t *BI = BinaryIdsStart; 102 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; 103 const uint8_t *End = 104 reinterpret_cast<const uint8_t *>(DataBuffer.getBufferEnd()); 105 106 while (BI < BIEnd) { 107 size_t Remaining = BIEnd - BI; 108 // There should be enough left to read the binary id length. 109 if (Remaining < sizeof(uint64_t)) 110 return make_error<InstrProfError>( 111 instrprof_error::malformed, 112 "not enough data to read binary id length"); 113 114 uint64_t BILen = 0; 115 if (Endian == llvm::endianness::little) 116 BILen = 117 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(BI); 118 else 119 BILen = endian::readNext<uint64_t, llvm::endianness::big, unaligned>(BI); 120 121 if (BILen == 0) 122 return make_error<InstrProfError>(instrprof_error::malformed, 123 "binary id length is 0"); 124 125 Remaining = BIEnd - BI; 126 // There should be enough left to read the binary id data. 127 if (Remaining < alignToPowerOf2(BILen, sizeof(uint64_t))) 128 return make_error<InstrProfError>( 129 instrprof_error::malformed, "not enough data to read binary id data"); 130 131 // Add binary id to the binary ids list. 132 BinaryIds.push_back(object::BuildID(BI, BI + BILen)); 133 134 // Increment by binary id data length, which aligned to the size of uint64. 135 BI += alignToPowerOf2(BILen, sizeof(uint64_t)); 136 if (BI > End) 137 return make_error<InstrProfError>( 138 instrprof_error::malformed, 139 "binary id section is greater than buffer size"); 140 } 141 142 return Error::success(); 143 } 144 145 static void 146 printBinaryIdsInternal(raw_ostream &OS, 147 std::vector<llvm::object::BuildID> &BinaryIds) { 148 OS << "Binary IDs: \n"; 149 for (auto BI : BinaryIds) { 150 for (uint64_t I = 0; I < BI.size(); I++) 151 OS << format("%02x", BI[I]); 152 OS << "\n"; 153 } 154 } 155 156 Expected<std::unique_ptr<InstrProfReader>> 157 InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS, 158 const InstrProfCorrelator *Correlator, 159 std::function<void(Error)> Warn) { 160 // Set up the buffer to read. 161 auto BufferOrError = setupMemoryBuffer(Path, FS); 162 if (Error E = BufferOrError.takeError()) 163 return std::move(E); 164 return InstrProfReader::create(std::move(BufferOrError.get()), Correlator, 165 Warn); 166 } 167 168 Expected<std::unique_ptr<InstrProfReader>> 169 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 170 const InstrProfCorrelator *Correlator, 171 std::function<void(Error)> Warn) { 172 if (Buffer->getBufferSize() == 0) 173 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 174 175 std::unique_ptr<InstrProfReader> Result; 176 // Create the reader. 177 if (IndexedInstrProfReader::hasFormat(*Buffer)) 178 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 179 else if (RawInstrProfReader64::hasFormat(*Buffer)) 180 Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator, Warn)); 181 else if (RawInstrProfReader32::hasFormat(*Buffer)) 182 Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator, Warn)); 183 else if (TextInstrProfReader::hasFormat(*Buffer)) 184 Result.reset(new TextInstrProfReader(std::move(Buffer))); 185 else 186 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 187 188 // Initialize the reader and return the result. 189 if (Error E = initializeReader(*Result)) 190 return std::move(E); 191 192 return std::move(Result); 193 } 194 195 Expected<std::unique_ptr<IndexedInstrProfReader>> 196 IndexedInstrProfReader::create(const Twine &Path, vfs::FileSystem &FS, 197 const Twine &RemappingPath) { 198 // Set up the buffer to read. 199 auto BufferOrError = setupMemoryBuffer(Path, FS); 200 if (Error E = BufferOrError.takeError()) 201 return std::move(E); 202 203 // Set up the remapping buffer if requested. 204 std::unique_ptr<MemoryBuffer> RemappingBuffer; 205 std::string RemappingPathStr = RemappingPath.str(); 206 if (!RemappingPathStr.empty()) { 207 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr, FS); 208 if (Error E = RemappingBufferOrError.takeError()) 209 return std::move(E); 210 RemappingBuffer = std::move(RemappingBufferOrError.get()); 211 } 212 213 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 214 std::move(RemappingBuffer)); 215 } 216 217 Expected<std::unique_ptr<IndexedInstrProfReader>> 218 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 219 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 220 // Create the reader. 221 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 222 return make_error<InstrProfError>(instrprof_error::bad_magic); 223 auto Result = std::make_unique<IndexedInstrProfReader>( 224 std::move(Buffer), std::move(RemappingBuffer)); 225 226 // Initialize the reader and return the result. 227 if (Error E = initializeReader(*Result)) 228 return std::move(E); 229 230 return std::move(Result); 231 } 232 233 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 234 // Verify that this really looks like plain ASCII text by checking a 235 // 'reasonable' number of characters (up to profile magic size). 236 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 237 StringRef buffer = Buffer.getBufferStart(); 238 return count == 0 || 239 std::all_of(buffer.begin(), buffer.begin() + count, 240 [](char c) { return isPrint(c) || isSpace(c); }); 241 } 242 243 // Read the profile variant flag from the header: ":FE" means this is a FE 244 // generated profile. ":IR" means this is an IR level profile. Other strings 245 // with a leading ':' will be reported an error format. 246 Error TextInstrProfReader::readHeader() { 247 Symtab.reset(new InstrProfSymtab()); 248 249 while (Line->starts_with(":")) { 250 StringRef Str = Line->substr(1); 251 if (Str.equals_insensitive("ir")) 252 ProfileKind |= InstrProfKind::IRInstrumentation; 253 else if (Str.equals_insensitive("fe")) 254 ProfileKind |= InstrProfKind::FrontendInstrumentation; 255 else if (Str.equals_insensitive("csir")) { 256 ProfileKind |= InstrProfKind::IRInstrumentation; 257 ProfileKind |= InstrProfKind::ContextSensitive; 258 } else if (Str.equals_insensitive("entry_first")) 259 ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; 260 else if (Str.equals_insensitive("not_entry_first")) 261 ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation; 262 else if (Str.equals_insensitive("single_byte_coverage")) 263 ProfileKind |= InstrProfKind::SingleByteCoverage; 264 else if (Str.equals_insensitive("temporal_prof_traces")) { 265 ProfileKind |= InstrProfKind::TemporalProfile; 266 if (auto Err = readTemporalProfTraceData()) 267 return error(std::move(Err)); 268 } else 269 return error(instrprof_error::bad_header); 270 ++Line; 271 } 272 return success(); 273 } 274 275 /// Temporal profile trace data is stored in the header immediately after 276 /// ":temporal_prof_traces". The first integer is the number of traces, the 277 /// second integer is the stream size, then the following lines are the actual 278 /// traces which consist of a weight and a comma separated list of function 279 /// names. 280 Error TextInstrProfReader::readTemporalProfTraceData() { 281 if ((++Line).is_at_end()) 282 return error(instrprof_error::eof); 283 284 uint32_t NumTraces; 285 if (Line->getAsInteger(0, NumTraces)) 286 return error(instrprof_error::malformed); 287 288 if ((++Line).is_at_end()) 289 return error(instrprof_error::eof); 290 291 if (Line->getAsInteger(0, TemporalProfTraceStreamSize)) 292 return error(instrprof_error::malformed); 293 294 for (uint32_t i = 0; i < NumTraces; i++) { 295 if ((++Line).is_at_end()) 296 return error(instrprof_error::eof); 297 298 TemporalProfTraceTy Trace; 299 if (Line->getAsInteger(0, Trace.Weight)) 300 return error(instrprof_error::malformed); 301 302 if ((++Line).is_at_end()) 303 return error(instrprof_error::eof); 304 305 SmallVector<StringRef> FuncNames; 306 Line->split(FuncNames, ",", /*MaxSplit=*/-1, /*KeepEmpty=*/false); 307 for (auto &FuncName : FuncNames) 308 Trace.FunctionNameRefs.push_back( 309 IndexedInstrProf::ComputeHash(FuncName.trim())); 310 TemporalProfTraces.push_back(std::move(Trace)); 311 } 312 return success(); 313 } 314 315 Error 316 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 317 318 #define CHECK_LINE_END(Line) \ 319 if (Line.is_at_end()) \ 320 return error(instrprof_error::truncated); 321 #define READ_NUM(Str, Dst) \ 322 if ((Str).getAsInteger(10, (Dst))) \ 323 return error(instrprof_error::malformed); 324 #define VP_READ_ADVANCE(Val) \ 325 CHECK_LINE_END(Line); \ 326 uint32_t Val; \ 327 READ_NUM((*Line), (Val)); \ 328 Line++; 329 330 if (Line.is_at_end()) 331 return success(); 332 333 uint32_t NumValueKinds; 334 if (Line->getAsInteger(10, NumValueKinds)) { 335 // No value profile data 336 return success(); 337 } 338 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 339 return error(instrprof_error::malformed, 340 "number of value kinds is invalid"); 341 Line++; 342 343 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 344 VP_READ_ADVANCE(ValueKind); 345 if (ValueKind > IPVK_Last) 346 return error(instrprof_error::malformed, "value kind is invalid"); 347 ; 348 VP_READ_ADVANCE(NumValueSites); 349 if (!NumValueSites) 350 continue; 351 352 Record.reserveSites(VK, NumValueSites); 353 for (uint32_t S = 0; S < NumValueSites; S++) { 354 VP_READ_ADVANCE(NumValueData); 355 356 std::vector<InstrProfValueData> CurrentValues; 357 for (uint32_t V = 0; V < NumValueData; V++) { 358 CHECK_LINE_END(Line); 359 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 360 uint64_t TakenCount, Value; 361 if (ValueKind == IPVK_IndirectCallTarget) { 362 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 363 Value = 0; 364 } else { 365 if (Error E = Symtab->addFuncName(VD.first)) 366 return E; 367 Value = IndexedInstrProf::ComputeHash(VD.first); 368 } 369 } else { 370 READ_NUM(VD.first, Value); 371 } 372 READ_NUM(VD.second, TakenCount); 373 CurrentValues.push_back({Value, TakenCount}); 374 Line++; 375 } 376 Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData, 377 nullptr); 378 } 379 } 380 return success(); 381 382 #undef CHECK_LINE_END 383 #undef READ_NUM 384 #undef VP_READ_ADVANCE 385 } 386 387 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 388 // Skip empty lines and comments. 389 while (!Line.is_at_end() && (Line->empty() || Line->starts_with("#"))) 390 ++Line; 391 // If we hit EOF while looking for a name, we're done. 392 if (Line.is_at_end()) { 393 return error(instrprof_error::eof); 394 } 395 396 // Read the function name. 397 Record.Name = *Line++; 398 if (Error E = Symtab->addFuncName(Record.Name)) 399 return error(std::move(E)); 400 401 // Read the function hash. 402 if (Line.is_at_end()) 403 return error(instrprof_error::truncated); 404 if ((Line++)->getAsInteger(0, Record.Hash)) 405 return error(instrprof_error::malformed, 406 "function hash is not a valid integer"); 407 408 // Read the number of counters. 409 uint64_t NumCounters; 410 if (Line.is_at_end()) 411 return error(instrprof_error::truncated); 412 if ((Line++)->getAsInteger(10, NumCounters)) 413 return error(instrprof_error::malformed, 414 "number of counters is not a valid integer"); 415 if (NumCounters == 0) 416 return error(instrprof_error::malformed, "number of counters is zero"); 417 418 // Read each counter and fill our internal storage with the values. 419 Record.Clear(); 420 Record.Counts.reserve(NumCounters); 421 for (uint64_t I = 0; I < NumCounters; ++I) { 422 if (Line.is_at_end()) 423 return error(instrprof_error::truncated); 424 uint64_t Count; 425 if ((Line++)->getAsInteger(10, Count)) 426 return error(instrprof_error::malformed, "count is invalid"); 427 Record.Counts.push_back(Count); 428 } 429 430 // Bitmap byte information is indicated with special character. 431 if (Line->starts_with("$")) { 432 Record.BitmapBytes.clear(); 433 // Read the number of bitmap bytes. 434 uint64_t NumBitmapBytes; 435 if ((Line++)->drop_front(1).trim().getAsInteger(0, NumBitmapBytes)) 436 return error(instrprof_error::malformed, 437 "number of bitmap bytes is not a valid integer"); 438 if (NumBitmapBytes != 0) { 439 // Read each bitmap and fill our internal storage with the values. 440 Record.BitmapBytes.reserve(NumBitmapBytes); 441 for (uint8_t I = 0; I < NumBitmapBytes; ++I) { 442 if (Line.is_at_end()) 443 return error(instrprof_error::truncated); 444 uint8_t BitmapByte; 445 if ((Line++)->getAsInteger(0, BitmapByte)) 446 return error(instrprof_error::malformed, 447 "bitmap byte is not a valid integer"); 448 Record.BitmapBytes.push_back(BitmapByte); 449 } 450 } 451 } 452 453 // Check if value profile data exists and read it if so. 454 if (Error E = readValueProfileData(Record)) 455 return error(std::move(E)); 456 457 return success(); 458 } 459 460 template <class IntPtrT> 461 InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const { 462 return getProfileKindFromVersion(Version); 463 } 464 465 template <class IntPtrT> 466 SmallVector<TemporalProfTraceTy> & 467 RawInstrProfReader<IntPtrT>::getTemporalProfTraces( 468 std::optional<uint64_t> Weight) { 469 if (TemporalProfTimestamps.empty()) { 470 assert(TemporalProfTraces.empty()); 471 return TemporalProfTraces; 472 } 473 // Sort functions by their timestamps to build the trace. 474 std::sort(TemporalProfTimestamps.begin(), TemporalProfTimestamps.end()); 475 TemporalProfTraceTy Trace; 476 if (Weight) 477 Trace.Weight = *Weight; 478 for (auto &[TimestampValue, NameRef] : TemporalProfTimestamps) 479 Trace.FunctionNameRefs.push_back(NameRef); 480 TemporalProfTraces = {std::move(Trace)}; 481 return TemporalProfTraces; 482 } 483 484 template <class IntPtrT> 485 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 486 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 487 return false; 488 uint64_t Magic = 489 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 490 return RawInstrProf::getMagic<IntPtrT>() == Magic || 491 llvm::byteswap(RawInstrProf::getMagic<IntPtrT>()) == Magic; 492 } 493 494 template <class IntPtrT> 495 Error RawInstrProfReader<IntPtrT>::readHeader() { 496 if (!hasFormat(*DataBuffer)) 497 return error(instrprof_error::bad_magic); 498 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 499 return error(instrprof_error::bad_header); 500 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 501 DataBuffer->getBufferStart()); 502 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 503 return readHeader(*Header); 504 } 505 506 template <class IntPtrT> 507 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 508 const char *End = DataBuffer->getBufferEnd(); 509 // Skip zero padding between profiles. 510 while (CurrentPos != End && *CurrentPos == 0) 511 ++CurrentPos; 512 // If there's nothing left, we're done. 513 if (CurrentPos == End) 514 return make_error<InstrProfError>(instrprof_error::eof); 515 // If there isn't enough space for another header, this is probably just 516 // garbage at the end of the file. 517 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 518 return make_error<InstrProfError>(instrprof_error::malformed, 519 "not enough space for another header"); 520 // The writer ensures each profile is padded to start at an aligned address. 521 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 522 return make_error<InstrProfError>(instrprof_error::malformed, 523 "insufficient padding"); 524 // The magic should have the same byte order as in the previous header. 525 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 526 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 527 return make_error<InstrProfError>(instrprof_error::bad_magic); 528 529 // There's another profile to read, so we need to process the header. 530 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 531 return readHeader(*Header); 532 } 533 534 template <class IntPtrT> 535 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 536 if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart))) 537 return error(std::move(E)); 538 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 539 const IntPtrT FPtr = swap(I->FunctionPointer); 540 if (!FPtr) 541 continue; 542 Symtab.mapAddress(FPtr, swap(I->NameRef)); 543 } 544 return success(); 545 } 546 547 template <class IntPtrT> 548 Error RawInstrProfReader<IntPtrT>::readHeader( 549 const RawInstrProf::Header &Header) { 550 Version = swap(Header.Version); 551 if (GET_VERSION(Version) != RawInstrProf::Version) 552 return error(instrprof_error::raw_profile_version_mismatch, 553 ("Profile uses raw profile format version = " + 554 Twine(GET_VERSION(Version)) + 555 "; expected version = " + Twine(RawInstrProf::Version) + 556 "\nPLEASE update this tool to version in the raw profile, or " 557 "regenerate raw profile with expected version.") 558 .str()); 559 560 uint64_t BinaryIdSize = swap(Header.BinaryIdsSize); 561 // Binary id start just after the header if exists. 562 const uint8_t *BinaryIdStart = 563 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header); 564 const uint8_t *BinaryIdEnd = BinaryIdStart + BinaryIdSize; 565 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); 566 if (BinaryIdSize % sizeof(uint64_t) || BinaryIdEnd > BufferEnd) 567 return error(instrprof_error::bad_header); 568 if (BinaryIdSize != 0) { 569 if (Error Err = 570 readBinaryIdsInternal(*DataBuffer, BinaryIdSize, BinaryIdStart, 571 BinaryIds, getDataEndianness())) 572 return Err; 573 } 574 575 CountersDelta = swap(Header.CountersDelta); 576 BitmapDelta = swap(Header.BitmapDelta); 577 NamesDelta = swap(Header.NamesDelta); 578 auto NumData = swap(Header.NumData); 579 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 580 auto CountersSize = swap(Header.NumCounters) * getCounterTypeSize(); 581 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 582 auto NumBitmapBytes = swap(Header.NumBitmapBytes); 583 auto PaddingBytesAfterBitmapBytes = swap(Header.PaddingBytesAfterBitmapBytes); 584 auto NamesSize = swap(Header.NamesSize); 585 ValueKindLast = swap(Header.ValueKindLast); 586 587 auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>); 588 auto PaddingSize = getNumPaddingBytes(NamesSize); 589 590 // Profile data starts after profile header and binary ids if exist. 591 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdSize; 592 ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters; 593 ptrdiff_t BitmapOffset = 594 CountersOffset + CountersSize + PaddingBytesAfterCounters; 595 ptrdiff_t NamesOffset = 596 BitmapOffset + NumBitmapBytes + PaddingBytesAfterBitmapBytes; 597 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; 598 599 auto *Start = reinterpret_cast<const char *>(&Header); 600 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 601 return error(instrprof_error::bad_header); 602 603 if (Correlator) { 604 // These sizes in the raw file are zero because we constructed them in the 605 // Correlator. 606 if (!(DataSize == 0 && NamesSize == 0 && CountersDelta == 0 && 607 NamesDelta == 0)) 608 return error(instrprof_error::unexpected_correlation_info); 609 Data = Correlator->getDataPointer(); 610 DataEnd = Data + Correlator->getDataSize(); 611 NamesStart = Correlator->getNamesPointer(); 612 NamesEnd = NamesStart + Correlator->getNamesSize(); 613 } else { 614 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 615 Start + DataOffset); 616 DataEnd = Data + NumData; 617 NamesStart = Start + NamesOffset; 618 NamesEnd = NamesStart + NamesSize; 619 } 620 621 CountersStart = Start + CountersOffset; 622 CountersEnd = CountersStart + CountersSize; 623 BitmapStart = Start + BitmapOffset; 624 BitmapEnd = BitmapStart + NumBitmapBytes; 625 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 626 627 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 628 if (Error E = createSymtab(*NewSymtab)) 629 return E; 630 631 Symtab = std::move(NewSymtab); 632 return success(); 633 } 634 635 template <class IntPtrT> 636 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 637 Record.Name = getName(Data->NameRef); 638 return success(); 639 } 640 641 template <class IntPtrT> 642 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 643 Record.Hash = swap(Data->FuncHash); 644 return success(); 645 } 646 647 template <class IntPtrT> 648 Error RawInstrProfReader<IntPtrT>::readRawCounts( 649 InstrProfRecord &Record) { 650 uint32_t NumCounters = swap(Data->NumCounters); 651 if (NumCounters == 0) 652 return error(instrprof_error::malformed, "number of counters is zero"); 653 654 ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta; 655 if (CounterBaseOffset < 0) 656 return error( 657 instrprof_error::malformed, 658 ("counter offset " + Twine(CounterBaseOffset) + " is negative").str()); 659 660 if (CounterBaseOffset >= CountersEnd - CountersStart) 661 return error(instrprof_error::malformed, 662 ("counter offset " + Twine(CounterBaseOffset) + 663 " is greater than the maximum counter offset " + 664 Twine(CountersEnd - CountersStart - 1)) 665 .str()); 666 667 uint64_t MaxNumCounters = 668 (CountersEnd - (CountersStart + CounterBaseOffset)) / 669 getCounterTypeSize(); 670 if (NumCounters > MaxNumCounters) 671 return error(instrprof_error::malformed, 672 ("number of counters " + Twine(NumCounters) + 673 " is greater than the maximum number of counters " + 674 Twine(MaxNumCounters)) 675 .str()); 676 677 Record.Counts.clear(); 678 Record.Counts.reserve(NumCounters); 679 for (uint32_t I = 0; I < NumCounters; I++) { 680 const char *Ptr = 681 CountersStart + CounterBaseOffset + I * getCounterTypeSize(); 682 if (I == 0 && hasTemporalProfile()) { 683 uint64_t TimestampValue = swap(*reinterpret_cast<const uint64_t *>(Ptr)); 684 if (TimestampValue != 0 && 685 TimestampValue != std::numeric_limits<uint64_t>::max()) { 686 TemporalProfTimestamps.emplace_back(TimestampValue, 687 swap(Data->NameRef)); 688 TemporalProfTraceStreamSize = 1; 689 } 690 if (hasSingleByteCoverage()) { 691 // In coverage mode, getCounterTypeSize() returns 1 byte but our 692 // timestamp field has size uint64_t. Increment I so that the next 693 // iteration of this for loop points to the byte after the timestamp 694 // field, i.e., I += 8. 695 I += 7; 696 } 697 continue; 698 } 699 if (hasSingleByteCoverage()) { 700 // A value of zero signifies the block is covered. 701 Record.Counts.push_back(*Ptr == 0 ? 1 : 0); 702 } else { 703 uint64_t CounterValue = swap(*reinterpret_cast<const uint64_t *>(Ptr)); 704 if (CounterValue > MaxCounterValue && Warn) 705 Warn(make_error<InstrProfError>( 706 instrprof_error::counter_value_too_large, Twine(CounterValue))); 707 708 Record.Counts.push_back(CounterValue); 709 } 710 } 711 712 return success(); 713 } 714 715 template <class IntPtrT> 716 Error RawInstrProfReader<IntPtrT>::readRawBitmapBytes(InstrProfRecord &Record) { 717 uint32_t NumBitmapBytes = swap(Data->NumBitmapBytes); 718 719 Record.BitmapBytes.clear(); 720 Record.BitmapBytes.reserve(NumBitmapBytes); 721 722 // It's possible MCDC is either not enabled or only used for some functions 723 // and not others. So if we record 0 bytes, just move on. 724 if (NumBitmapBytes == 0) 725 return success(); 726 727 // BitmapDelta decreases as we advance to the next data record. 728 ptrdiff_t BitmapOffset = swap(Data->BitmapPtr) - BitmapDelta; 729 if (BitmapOffset < 0) 730 return error( 731 instrprof_error::malformed, 732 ("bitmap offset " + Twine(BitmapOffset) + " is negative").str()); 733 734 if (BitmapOffset >= BitmapEnd - BitmapStart) 735 return error(instrprof_error::malformed, 736 ("bitmap offset " + Twine(BitmapOffset) + 737 " is greater than the maximum bitmap offset " + 738 Twine(BitmapEnd - BitmapStart - 1)) 739 .str()); 740 741 uint64_t MaxNumBitmapBytes = 742 (BitmapEnd - (BitmapStart + BitmapOffset)) / sizeof(uint8_t); 743 if (NumBitmapBytes > MaxNumBitmapBytes) 744 return error(instrprof_error::malformed, 745 ("number of bitmap bytes " + Twine(NumBitmapBytes) + 746 " is greater than the maximum number of bitmap bytes " + 747 Twine(MaxNumBitmapBytes)) 748 .str()); 749 750 for (uint32_t I = 0; I < NumBitmapBytes; I++) { 751 const char *Ptr = BitmapStart + BitmapOffset + I; 752 Record.BitmapBytes.push_back(swap(*Ptr)); 753 } 754 755 return success(); 756 } 757 758 template <class IntPtrT> 759 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 760 InstrProfRecord &Record) { 761 Record.clearValueData(); 762 CurValueDataSize = 0; 763 // Need to match the logic in value profile dumper code in compiler-rt: 764 uint32_t NumValueKinds = 0; 765 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 766 NumValueKinds += (Data->NumValueSites[I] != 0); 767 768 if (!NumValueKinds) 769 return success(); 770 771 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 772 ValueProfData::getValueProfData( 773 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 774 getDataEndianness()); 775 776 if (Error E = VDataPtrOrErr.takeError()) 777 return E; 778 779 // Note that besides deserialization, this also performs the conversion for 780 // indirect call targets. The function pointers from the raw profile are 781 // remapped into function name hashes. 782 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 783 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 784 return success(); 785 } 786 787 template <class IntPtrT> 788 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 789 // Keep reading profiles that consist of only headers and no profile data and 790 // counters. 791 while (atEnd()) 792 // At this point, ValueDataStart field points to the next header. 793 if (Error E = readNextHeader(getNextHeaderPos())) 794 return error(std::move(E)); 795 796 // Read name and set it in Record. 797 if (Error E = readName(Record)) 798 return error(std::move(E)); 799 800 // Read FuncHash and set it in Record. 801 if (Error E = readFuncHash(Record)) 802 return error(std::move(E)); 803 804 // Read raw counts and set Record. 805 if (Error E = readRawCounts(Record)) 806 return error(std::move(E)); 807 808 // Read raw bitmap bytes and set Record. 809 if (Error E = readRawBitmapBytes(Record)) 810 return error(std::move(E)); 811 812 // Read value data and set Record. 813 if (Error E = readValueProfilingData(Record)) 814 return error(std::move(E)); 815 816 // Iterate. 817 advanceData(); 818 return success(); 819 } 820 821 template <class IntPtrT> 822 Error RawInstrProfReader<IntPtrT>::readBinaryIds( 823 std::vector<llvm::object::BuildID> &BinaryIds) { 824 BinaryIds.insert(BinaryIds.begin(), this->BinaryIds.begin(), 825 this->BinaryIds.end()); 826 return Error::success(); 827 } 828 829 template <class IntPtrT> 830 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) { 831 if (!BinaryIds.empty()) 832 printBinaryIdsInternal(OS, BinaryIds); 833 return Error::success(); 834 } 835 836 namespace llvm { 837 838 template class RawInstrProfReader<uint32_t>; 839 template class RawInstrProfReader<uint64_t>; 840 841 } // end namespace llvm 842 843 InstrProfLookupTrait::hash_value_type 844 InstrProfLookupTrait::ComputeHash(StringRef K) { 845 return IndexedInstrProf::ComputeHash(HashType, K); 846 } 847 848 using data_type = InstrProfLookupTrait::data_type; 849 using offset_type = InstrProfLookupTrait::offset_type; 850 851 bool InstrProfLookupTrait::readValueProfilingData( 852 const unsigned char *&D, const unsigned char *const End) { 853 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 854 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 855 856 if (VDataPtrOrErr.takeError()) 857 return false; 858 859 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 860 D += VDataPtrOrErr.get()->TotalSize; 861 862 return true; 863 } 864 865 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 866 offset_type N) { 867 using namespace support; 868 869 // Check if the data is corrupt. If so, don't try to read it. 870 if (N % sizeof(uint64_t)) 871 return data_type(); 872 873 DataBuffer.clear(); 874 std::vector<uint64_t> CounterBuffer; 875 std::vector<uint8_t> BitmapByteBuffer; 876 877 const unsigned char *End = D + N; 878 while (D < End) { 879 // Read hash. 880 if (D + sizeof(uint64_t) >= End) 881 return data_type(); 882 uint64_t Hash = 883 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D); 884 885 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 886 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 887 // If format version is different then read the number of counters. 888 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 889 if (D + sizeof(uint64_t) > End) 890 return data_type(); 891 CountsSize = 892 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D); 893 } 894 // Read counter values. 895 if (D + CountsSize * sizeof(uint64_t) > End) 896 return data_type(); 897 898 CounterBuffer.clear(); 899 CounterBuffer.reserve(CountsSize); 900 for (uint64_t J = 0; J < CountsSize; ++J) 901 CounterBuffer.push_back( 902 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D)); 903 904 // Read bitmap bytes for GET_VERSION(FormatVersion) > 10. 905 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version10) { 906 uint64_t BitmapBytes = 0; 907 if (D + sizeof(uint64_t) > End) 908 return data_type(); 909 BitmapBytes = 910 endian::readNext<uint64_t, llvm::endianness::little, unaligned>(D); 911 // Read bitmap byte values. 912 if (D + BitmapBytes * sizeof(uint8_t) > End) 913 return data_type(); 914 BitmapByteBuffer.clear(); 915 BitmapByteBuffer.reserve(BitmapBytes); 916 for (uint64_t J = 0; J < BitmapBytes; ++J) 917 BitmapByteBuffer.push_back(static_cast<uint8_t>( 918 endian::readNext<uint64_t, llvm::endianness::little, unaligned>( 919 D))); 920 } 921 922 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer), 923 std::move(BitmapByteBuffer)); 924 925 // Read value profiling data. 926 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 927 !readValueProfilingData(D, End)) { 928 DataBuffer.clear(); 929 return data_type(); 930 } 931 } 932 return DataBuffer; 933 } 934 935 template <typename HashTableImpl> 936 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 937 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 938 auto Iter = HashTable->find(FuncName); 939 if (Iter == HashTable->end()) 940 return make_error<InstrProfError>(instrprof_error::unknown_function); 941 942 Data = (*Iter); 943 if (Data.empty()) 944 return make_error<InstrProfError>(instrprof_error::malformed, 945 "profile data is empty"); 946 947 return Error::success(); 948 } 949 950 template <typename HashTableImpl> 951 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 952 ArrayRef<NamedInstrProfRecord> &Data) { 953 if (atEnd()) 954 return make_error<InstrProfError>(instrprof_error::eof); 955 956 Data = *RecordIterator; 957 958 if (Data.empty()) 959 return make_error<InstrProfError>(instrprof_error::malformed, 960 "profile data is empty"); 961 962 return Error::success(); 963 } 964 965 template <typename HashTableImpl> 966 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 967 const unsigned char *Buckets, const unsigned char *const Payload, 968 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 969 uint64_t Version) { 970 FormatVersion = Version; 971 HashTable.reset(HashTableImpl::Create( 972 Buckets, Payload, Base, 973 typename HashTableImpl::InfoType(HashType, Version))); 974 RecordIterator = HashTable->data_begin(); 975 } 976 977 template <typename HashTableImpl> 978 InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const { 979 return getProfileKindFromVersion(FormatVersion); 980 } 981 982 namespace { 983 /// A remapper that does not apply any remappings. 984 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 985 InstrProfReaderIndexBase &Underlying; 986 987 public: 988 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 989 : Underlying(Underlying) {} 990 991 Error getRecords(StringRef FuncName, 992 ArrayRef<NamedInstrProfRecord> &Data) override { 993 return Underlying.getRecords(FuncName, Data); 994 } 995 }; 996 } // namespace 997 998 /// A remapper that applies remappings based on a symbol remapping file. 999 template <typename HashTableImpl> 1000 class llvm::InstrProfReaderItaniumRemapper 1001 : public InstrProfReaderRemapper { 1002 public: 1003 InstrProfReaderItaniumRemapper( 1004 std::unique_ptr<MemoryBuffer> RemapBuffer, 1005 InstrProfReaderIndex<HashTableImpl> &Underlying) 1006 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 1007 } 1008 1009 /// Extract the original function name from a PGO function name. 1010 static StringRef extractName(StringRef Name) { 1011 // We can have multiple pieces separated by kGlobalIdentifierDelimiter ( 1012 // semicolon now and colon in older profiles); there can be pieces both 1013 // before and after the mangled name. Find the first part that starts with 1014 // '_Z'; we'll assume that's the mangled name we want. 1015 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 1016 while (true) { 1017 Parts = Parts.second.split(kGlobalIdentifierDelimiter); 1018 if (Parts.first.starts_with("_Z")) 1019 return Parts.first; 1020 if (Parts.second.empty()) 1021 return Name; 1022 } 1023 } 1024 1025 /// Given a mangled name extracted from a PGO function name, and a new 1026 /// form for that mangled name, reconstitute the name. 1027 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 1028 StringRef Replacement, 1029 SmallVectorImpl<char> &Out) { 1030 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 1031 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 1032 Out.insert(Out.end(), Replacement.begin(), Replacement.end()); 1033 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 1034 } 1035 1036 Error populateRemappings() override { 1037 if (Error E = Remappings.read(*RemapBuffer)) 1038 return E; 1039 for (StringRef Name : Underlying.HashTable->keys()) { 1040 StringRef RealName = extractName(Name); 1041 if (auto Key = Remappings.insert(RealName)) { 1042 // FIXME: We could theoretically map the same equivalence class to 1043 // multiple names in the profile data. If that happens, we should 1044 // return NamedInstrProfRecords from all of them. 1045 MappedNames.insert({Key, RealName}); 1046 } 1047 } 1048 return Error::success(); 1049 } 1050 1051 Error getRecords(StringRef FuncName, 1052 ArrayRef<NamedInstrProfRecord> &Data) override { 1053 StringRef RealName = extractName(FuncName); 1054 if (auto Key = Remappings.lookup(RealName)) { 1055 StringRef Remapped = MappedNames.lookup(Key); 1056 if (!Remapped.empty()) { 1057 if (RealName.begin() == FuncName.begin() && 1058 RealName.end() == FuncName.end()) 1059 FuncName = Remapped; 1060 else { 1061 // Try rebuilding the name from the given remapping. 1062 SmallString<256> Reconstituted; 1063 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 1064 Error E = Underlying.getRecords(Reconstituted, Data); 1065 if (!E) 1066 return E; 1067 1068 // If we failed because the name doesn't exist, fall back to asking 1069 // about the original name. 1070 if (Error Unhandled = handleErrors( 1071 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 1072 return Err->get() == instrprof_error::unknown_function 1073 ? Error::success() 1074 : Error(std::move(Err)); 1075 })) 1076 return Unhandled; 1077 } 1078 } 1079 } 1080 return Underlying.getRecords(FuncName, Data); 1081 } 1082 1083 private: 1084 /// The memory buffer containing the remapping configuration. Remappings 1085 /// holds pointers into this buffer. 1086 std::unique_ptr<MemoryBuffer> RemapBuffer; 1087 1088 /// The mangling remapper. 1089 SymbolRemappingReader Remappings; 1090 1091 /// Mapping from mangled name keys to the name used for the key in the 1092 /// profile data. 1093 /// FIXME: Can we store a location within the on-disk hash table instead of 1094 /// redoing lookup? 1095 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 1096 1097 /// The real profile data reader. 1098 InstrProfReaderIndex<HashTableImpl> &Underlying; 1099 }; 1100 1101 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 1102 using namespace support; 1103 1104 if (DataBuffer.getBufferSize() < 8) 1105 return false; 1106 uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>( 1107 DataBuffer.getBufferStart()); 1108 // Verify that it's magical. 1109 return Magic == IndexedInstrProf::Magic; 1110 } 1111 1112 const unsigned char * 1113 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 1114 const unsigned char *Cur, bool UseCS) { 1115 using namespace IndexedInstrProf; 1116 using namespace support; 1117 1118 if (Version >= IndexedInstrProf::Version4) { 1119 const IndexedInstrProf::Summary *SummaryInLE = 1120 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 1121 uint64_t NFields = endian::byte_swap<uint64_t, llvm::endianness::little>( 1122 SummaryInLE->NumSummaryFields); 1123 uint64_t NEntries = endian::byte_swap<uint64_t, llvm::endianness::little>( 1124 SummaryInLE->NumCutoffEntries); 1125 uint32_t SummarySize = 1126 IndexedInstrProf::Summary::getSize(NFields, NEntries); 1127 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 1128 IndexedInstrProf::allocSummary(SummarySize); 1129 1130 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 1131 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 1132 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 1133 Dst[I] = endian::byte_swap<uint64_t, llvm::endianness::little>(Src[I]); 1134 1135 SummaryEntryVector DetailedSummary; 1136 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 1137 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 1138 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 1139 Ent.NumBlocks); 1140 } 1141 std::unique_ptr<llvm::ProfileSummary> &Summary = 1142 UseCS ? this->CS_Summary : this->Summary; 1143 1144 // initialize InstrProfSummary using the SummaryData from disk. 1145 Summary = std::make_unique<ProfileSummary>( 1146 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 1147 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 1148 SummaryData->get(Summary::MaxBlockCount), 1149 SummaryData->get(Summary::MaxInternalBlockCount), 1150 SummaryData->get(Summary::MaxFunctionCount), 1151 SummaryData->get(Summary::TotalNumBlocks), 1152 SummaryData->get(Summary::TotalNumFunctions)); 1153 return Cur + SummarySize; 1154 } else { 1155 // The older versions do not support a profile summary. This just computes 1156 // an empty summary, which will not result in accurate hot/cold detection. 1157 // We would need to call addRecord for all NamedInstrProfRecords to get the 1158 // correct summary. However, this version is old (prior to early 2016) and 1159 // has not been supporting an accurate summary for several years. 1160 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1161 Summary = Builder.getSummary(); 1162 return Cur; 1163 } 1164 } 1165 1166 Error IndexedInstrProfReader::readHeader() { 1167 using namespace support; 1168 1169 const unsigned char *Start = 1170 (const unsigned char *)DataBuffer->getBufferStart(); 1171 const unsigned char *Cur = Start; 1172 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 1173 return error(instrprof_error::truncated); 1174 1175 auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start); 1176 if (!HeaderOr) 1177 return HeaderOr.takeError(); 1178 1179 const IndexedInstrProf::Header *Header = &HeaderOr.get(); 1180 Cur += Header->size(); 1181 1182 Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, 1183 /* UseCS */ false); 1184 if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF) 1185 Cur = 1186 readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, 1187 /* UseCS */ true); 1188 // Read the hash type and start offset. 1189 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 1190 endian::byte_swap<uint64_t, llvm::endianness::little>(Header->HashType)); 1191 if (HashType > IndexedInstrProf::HashT::Last) 1192 return error(instrprof_error::unsupported_hash_type); 1193 1194 uint64_t HashOffset = 1195 endian::byte_swap<uint64_t, llvm::endianness::little>(Header->HashOffset); 1196 1197 // The hash table with profile counts comes next. 1198 auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 1199 Start + HashOffset, Cur, Start, HashType, Header->formatVersion()); 1200 1201 // The MemProfOffset field in the header is only valid when the format 1202 // version is higher than 8 (when it was introduced). 1203 if (GET_VERSION(Header->formatVersion()) >= 8 && 1204 Header->formatVersion() & VARIANT_MASK_MEMPROF) { 1205 uint64_t MemProfOffset = 1206 endian::byte_swap<uint64_t, llvm::endianness::little>( 1207 Header->MemProfOffset); 1208 1209 const unsigned char *Ptr = Start + MemProfOffset; 1210 // The value returned from RecordTableGenerator.Emit. 1211 const uint64_t RecordTableOffset = 1212 support::endian::readNext<uint64_t, llvm::endianness::little, 1213 unaligned>(Ptr); 1214 // The offset in the stream right before invoking 1215 // FrameTableGenerator.Emit. 1216 const uint64_t FramePayloadOffset = 1217 support::endian::readNext<uint64_t, llvm::endianness::little, 1218 unaligned>(Ptr); 1219 // The value returned from FrameTableGenerator.Emit. 1220 const uint64_t FrameTableOffset = 1221 support::endian::readNext<uint64_t, llvm::endianness::little, 1222 unaligned>(Ptr); 1223 1224 // Read the schema. 1225 auto SchemaOr = memprof::readMemProfSchema(Ptr); 1226 if (!SchemaOr) 1227 return SchemaOr.takeError(); 1228 Schema = SchemaOr.get(); 1229 1230 // Now initialize the table reader with a pointer into data buffer. 1231 MemProfRecordTable.reset(MemProfRecordHashTable::Create( 1232 /*Buckets=*/Start + RecordTableOffset, 1233 /*Payload=*/Ptr, 1234 /*Base=*/Start, memprof::RecordLookupTrait(Schema))); 1235 1236 // Initialize the frame table reader with the payload and bucket offsets. 1237 MemProfFrameTable.reset(MemProfFrameHashTable::Create( 1238 /*Buckets=*/Start + FrameTableOffset, 1239 /*Payload=*/Start + FramePayloadOffset, 1240 /*Base=*/Start, memprof::FrameLookupTrait())); 1241 } 1242 1243 // BinaryIdOffset field in the header is only valid when the format version 1244 // is higher than 9 (when it was introduced). 1245 if (GET_VERSION(Header->formatVersion()) >= 9) { 1246 uint64_t BinaryIdOffset = 1247 endian::byte_swap<uint64_t, llvm::endianness::little>( 1248 Header->BinaryIdOffset); 1249 const unsigned char *Ptr = Start + BinaryIdOffset; 1250 // Read binary ids size. 1251 BinaryIdsSize = 1252 support::endian::readNext<uint64_t, llvm::endianness::little, 1253 unaligned>(Ptr); 1254 if (BinaryIdsSize % sizeof(uint64_t)) 1255 return error(instrprof_error::bad_header); 1256 // Set the binary ids start. 1257 BinaryIdsStart = Ptr; 1258 if (BinaryIdsStart > (const unsigned char *)DataBuffer->getBufferEnd()) 1259 return make_error<InstrProfError>(instrprof_error::malformed, 1260 "corrupted binary ids"); 1261 } 1262 1263 if (GET_VERSION(Header->formatVersion()) >= 10 && 1264 Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) { 1265 uint64_t TemporalProfTracesOffset = 1266 endian::byte_swap<uint64_t, llvm::endianness::little>( 1267 Header->TemporalProfTracesOffset); 1268 const unsigned char *Ptr = Start + TemporalProfTracesOffset; 1269 const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd(); 1270 // Expect at least two 64 bit fields: NumTraces, and TraceStreamSize 1271 if (Ptr + 2 * sizeof(uint64_t) > PtrEnd) 1272 return error(instrprof_error::truncated); 1273 const uint64_t NumTraces = 1274 support::endian::readNext<uint64_t, llvm::endianness::little, 1275 unaligned>(Ptr); 1276 TemporalProfTraceStreamSize = 1277 support::endian::readNext<uint64_t, llvm::endianness::little, 1278 unaligned>(Ptr); 1279 for (unsigned i = 0; i < NumTraces; i++) { 1280 // Expect at least two 64 bit fields: Weight and NumFunctions 1281 if (Ptr + 2 * sizeof(uint64_t) > PtrEnd) 1282 return error(instrprof_error::truncated); 1283 TemporalProfTraceTy Trace; 1284 Trace.Weight = 1285 support::endian::readNext<uint64_t, llvm::endianness::little, 1286 unaligned>(Ptr); 1287 const uint64_t NumFunctions = 1288 support::endian::readNext<uint64_t, llvm::endianness::little, 1289 unaligned>(Ptr); 1290 // Expect at least NumFunctions 64 bit fields 1291 if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd) 1292 return error(instrprof_error::truncated); 1293 for (unsigned j = 0; j < NumFunctions; j++) { 1294 const uint64_t NameRef = 1295 support::endian::readNext<uint64_t, llvm::endianness::little, 1296 unaligned>(Ptr); 1297 Trace.FunctionNameRefs.push_back(NameRef); 1298 } 1299 TemporalProfTraces.push_back(std::move(Trace)); 1300 } 1301 } 1302 1303 // Load the remapping table now if requested. 1304 if (RemappingBuffer) { 1305 Remapper = 1306 std::make_unique<InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 1307 std::move(RemappingBuffer), *IndexPtr); 1308 if (Error E = Remapper->populateRemappings()) 1309 return E; 1310 } else { 1311 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 1312 } 1313 Index = std::move(IndexPtr); 1314 1315 return success(); 1316 } 1317 1318 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 1319 if (Symtab) 1320 return *Symtab; 1321 1322 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 1323 if (Error E = Index->populateSymtab(*NewSymtab)) { 1324 auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); 1325 consumeError(error(ErrCode, Msg)); 1326 } 1327 1328 Symtab = std::move(NewSymtab); 1329 return *Symtab; 1330 } 1331 1332 Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord( 1333 StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName, 1334 uint64_t *MismatchedFuncSum) { 1335 ArrayRef<NamedInstrProfRecord> Data; 1336 uint64_t FuncSum = 0; 1337 auto Err = Remapper->getRecords(FuncName, Data); 1338 if (Err) { 1339 // If we don't find FuncName, try DeprecatedFuncName to handle profiles 1340 // built by older compilers. 1341 auto Err2 = 1342 handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error { 1343 if (IE.get() != instrprof_error::unknown_function) 1344 return make_error<InstrProfError>(IE); 1345 if (auto Err = Remapper->getRecords(DeprecatedFuncName, Data)) 1346 return Err; 1347 return Error::success(); 1348 }); 1349 if (Err2) 1350 return std::move(Err2); 1351 } 1352 // Found it. Look for counters with the right hash. 1353 1354 // A flag to indicate if the records are from the same type 1355 // of profile (i.e cs vs nocs). 1356 bool CSBitMatch = false; 1357 auto getFuncSum = [](const std::vector<uint64_t> &Counts) { 1358 uint64_t ValueSum = 0; 1359 for (uint64_t CountValue : Counts) { 1360 if (CountValue == (uint64_t)-1) 1361 continue; 1362 // Handle overflow -- if that happens, return max. 1363 if (std::numeric_limits<uint64_t>::max() - CountValue <= ValueSum) 1364 return std::numeric_limits<uint64_t>::max(); 1365 ValueSum += CountValue; 1366 } 1367 return ValueSum; 1368 }; 1369 1370 for (const NamedInstrProfRecord &I : Data) { 1371 // Check for a match and fill the vector if there is one. 1372 if (I.Hash == FuncHash) 1373 return std::move(I); 1374 if (NamedInstrProfRecord::hasCSFlagInHash(I.Hash) == 1375 NamedInstrProfRecord::hasCSFlagInHash(FuncHash)) { 1376 CSBitMatch = true; 1377 if (MismatchedFuncSum == nullptr) 1378 continue; 1379 FuncSum = std::max(FuncSum, getFuncSum(I.Counts)); 1380 } 1381 } 1382 if (CSBitMatch) { 1383 if (MismatchedFuncSum != nullptr) 1384 *MismatchedFuncSum = FuncSum; 1385 return error(instrprof_error::hash_mismatch); 1386 } 1387 return error(instrprof_error::unknown_function); 1388 } 1389 1390 Expected<memprof::MemProfRecord> 1391 IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) { 1392 // TODO: Add memprof specific errors. 1393 if (MemProfRecordTable == nullptr) 1394 return make_error<InstrProfError>(instrprof_error::invalid_prof, 1395 "no memprof data available in profile"); 1396 auto Iter = MemProfRecordTable->find(FuncNameHash); 1397 if (Iter == MemProfRecordTable->end()) 1398 return make_error<InstrProfError>( 1399 instrprof_error::unknown_function, 1400 "memprof record not found for function hash " + Twine(FuncNameHash)); 1401 1402 // Setup a callback to convert from frame ids to frame using the on-disk 1403 // FrameData hash table. 1404 memprof::FrameId LastUnmappedFrameId = 0; 1405 bool HasFrameMappingError = false; 1406 auto IdToFrameCallback = [&](const memprof::FrameId Id) { 1407 auto FrIter = MemProfFrameTable->find(Id); 1408 if (FrIter == MemProfFrameTable->end()) { 1409 LastUnmappedFrameId = Id; 1410 HasFrameMappingError = true; 1411 return memprof::Frame(0, 0, 0, false); 1412 } 1413 return *FrIter; 1414 }; 1415 1416 memprof::MemProfRecord Record(*Iter, IdToFrameCallback); 1417 1418 // Check that all frame ids were successfully converted to frames. 1419 if (HasFrameMappingError) { 1420 return make_error<InstrProfError>(instrprof_error::hash_mismatch, 1421 "memprof frame not found for frame id " + 1422 Twine(LastUnmappedFrameId)); 1423 } 1424 return Record; 1425 } 1426 1427 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 1428 uint64_t FuncHash, 1429 std::vector<uint64_t> &Counts) { 1430 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 1431 if (Error E = Record.takeError()) 1432 return error(std::move(E)); 1433 1434 Counts = Record.get().Counts; 1435 return success(); 1436 } 1437 1438 Error IndexedInstrProfReader::getFunctionBitmapBytes( 1439 StringRef FuncName, uint64_t FuncHash, std::vector<uint8_t> &BitmapBytes) { 1440 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 1441 if (Error E = Record.takeError()) 1442 return error(std::move(E)); 1443 1444 BitmapBytes = Record.get().BitmapBytes; 1445 return success(); 1446 } 1447 1448 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 1449 ArrayRef<NamedInstrProfRecord> Data; 1450 1451 Error E = Index->getRecords(Data); 1452 if (E) 1453 return error(std::move(E)); 1454 1455 Record = Data[RecordIndex++]; 1456 if (RecordIndex >= Data.size()) { 1457 Index->advanceToNextKey(); 1458 RecordIndex = 0; 1459 } 1460 return success(); 1461 } 1462 1463 Error IndexedInstrProfReader::readBinaryIds( 1464 std::vector<llvm::object::BuildID> &BinaryIds) { 1465 return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart, 1466 BinaryIds, llvm::endianness::little); 1467 } 1468 1469 Error IndexedInstrProfReader::printBinaryIds(raw_ostream &OS) { 1470 std::vector<llvm::object::BuildID> BinaryIds; 1471 if (Error E = readBinaryIds(BinaryIds)) 1472 return E; 1473 printBinaryIdsInternal(OS, BinaryIds); 1474 return Error::success(); 1475 } 1476 1477 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 1478 uint64_t NumFuncs = 0; 1479 for (const auto &Func : *this) { 1480 if (isIRLevelProfile()) { 1481 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 1482 if (FuncIsCS != IsCS) 1483 continue; 1484 } 1485 Func.accumulateCounts(Sum); 1486 ++NumFuncs; 1487 } 1488 Sum.NumEntries = NumFuncs; 1489 } 1490