1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/IR/ProfileSummary.h" 20 #include "llvm/ProfileData/InstrProf.h" 21 #include "llvm/ProfileData/MemProf.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorOr.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/SwapByteOrder.h" 28 #include "llvm/Support/SymbolRemappingReader.h" 29 #include <algorithm> 30 #include <cstddef> 31 #include <cstdint> 32 #include <limits> 33 #include <memory> 34 #include <system_error> 35 #include <utility> 36 #include <vector> 37 38 using namespace llvm; 39 40 // Extracts the variant information from the top 8 bits in the version and 41 // returns an enum specifying the variants present. 42 static InstrProfKind getProfileKindFromVersion(uint64_t Version) { 43 InstrProfKind ProfileKind = InstrProfKind::Unknown; 44 if (Version & VARIANT_MASK_IR_PROF) { 45 ProfileKind |= InstrProfKind::IRInstrumentation; 46 } 47 if (Version & VARIANT_MASK_CSIR_PROF) { 48 ProfileKind |= InstrProfKind::ContextSensitive; 49 } 50 if (Version & VARIANT_MASK_INSTR_ENTRY) { 51 ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; 52 } 53 if (Version & VARIANT_MASK_BYTE_COVERAGE) { 54 ProfileKind |= InstrProfKind::SingleByteCoverage; 55 } 56 if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) { 57 ProfileKind |= InstrProfKind::FunctionEntryOnly; 58 } 59 if (Version & VARIANT_MASK_MEMPROF) { 60 ProfileKind |= InstrProfKind::MemProf; 61 } 62 return ProfileKind; 63 } 64 65 static Expected<std::unique_ptr<MemoryBuffer>> 66 setupMemoryBuffer(const Twine &Path) { 67 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 68 MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); 69 if (std::error_code EC = BufferOrErr.getError()) 70 return errorCodeToError(EC); 71 return std::move(BufferOrErr.get()); 72 } 73 74 static Error initializeReader(InstrProfReader &Reader) { 75 return Reader.readHeader(); 76 } 77 78 Expected<std::unique_ptr<InstrProfReader>> 79 InstrProfReader::create(const Twine &Path, 80 const InstrProfCorrelator *Correlator) { 81 // Set up the buffer to read. 82 auto BufferOrError = setupMemoryBuffer(Path); 83 if (Error E = BufferOrError.takeError()) 84 return std::move(E); 85 return InstrProfReader::create(std::move(BufferOrError.get()), Correlator); 86 } 87 88 Expected<std::unique_ptr<InstrProfReader>> 89 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 90 const InstrProfCorrelator *Correlator) { 91 // Sanity check the buffer. 92 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 93 return make_error<InstrProfError>(instrprof_error::too_large); 94 95 if (Buffer->getBufferSize() == 0) 96 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 97 98 std::unique_ptr<InstrProfReader> Result; 99 // Create the reader. 100 if (IndexedInstrProfReader::hasFormat(*Buffer)) 101 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 102 else if (RawInstrProfReader64::hasFormat(*Buffer)) 103 Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator)); 104 else if (RawInstrProfReader32::hasFormat(*Buffer)) 105 Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator)); 106 else if (TextInstrProfReader::hasFormat(*Buffer)) 107 Result.reset(new TextInstrProfReader(std::move(Buffer))); 108 else 109 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 110 111 // Initialize the reader and return the result. 112 if (Error E = initializeReader(*Result)) 113 return std::move(E); 114 115 return std::move(Result); 116 } 117 118 Expected<std::unique_ptr<IndexedInstrProfReader>> 119 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { 120 // Set up the buffer to read. 121 auto BufferOrError = setupMemoryBuffer(Path); 122 if (Error E = BufferOrError.takeError()) 123 return std::move(E); 124 125 // Set up the remapping buffer if requested. 126 std::unique_ptr<MemoryBuffer> RemappingBuffer; 127 std::string RemappingPathStr = RemappingPath.str(); 128 if (!RemappingPathStr.empty()) { 129 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); 130 if (Error E = RemappingBufferOrError.takeError()) 131 return std::move(E); 132 RemappingBuffer = std::move(RemappingBufferOrError.get()); 133 } 134 135 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 136 std::move(RemappingBuffer)); 137 } 138 139 Expected<std::unique_ptr<IndexedInstrProfReader>> 140 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 141 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 142 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 143 return make_error<InstrProfError>(instrprof_error::too_large); 144 145 // Create the reader. 146 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 147 return make_error<InstrProfError>(instrprof_error::bad_magic); 148 auto Result = std::make_unique<IndexedInstrProfReader>( 149 std::move(Buffer), std::move(RemappingBuffer)); 150 151 // Initialize the reader and return the result. 152 if (Error E = initializeReader(*Result)) 153 return std::move(E); 154 155 return std::move(Result); 156 } 157 158 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 159 // Verify that this really looks like plain ASCII text by checking a 160 // 'reasonable' number of characters (up to profile magic size). 161 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 162 StringRef buffer = Buffer.getBufferStart(); 163 return count == 0 || 164 std::all_of(buffer.begin(), buffer.begin() + count, 165 [](char c) { return isPrint(c) || isSpace(c); }); 166 } 167 168 // Read the profile variant flag from the header: ":FE" means this is a FE 169 // generated profile. ":IR" means this is an IR level profile. Other strings 170 // with a leading ':' will be reported an error format. 171 Error TextInstrProfReader::readHeader() { 172 Symtab.reset(new InstrProfSymtab()); 173 174 while (Line->startswith(":")) { 175 StringRef Str = Line->substr(1); 176 if (Str.equals_insensitive("ir")) 177 ProfileKind |= InstrProfKind::IRInstrumentation; 178 else if (Str.equals_insensitive("fe")) 179 ProfileKind |= InstrProfKind::FrontendInstrumentation; 180 else if (Str.equals_insensitive("csir")) { 181 ProfileKind |= InstrProfKind::IRInstrumentation; 182 ProfileKind |= InstrProfKind::ContextSensitive; 183 } else if (Str.equals_insensitive("entry_first")) 184 ProfileKind |= InstrProfKind::FunctionEntryInstrumentation; 185 else if (Str.equals_insensitive("not_entry_first")) 186 ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation; 187 else 188 return error(instrprof_error::bad_header); 189 ++Line; 190 } 191 return success(); 192 } 193 194 Error 195 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 196 197 #define CHECK_LINE_END(Line) \ 198 if (Line.is_at_end()) \ 199 return error(instrprof_error::truncated); 200 #define READ_NUM(Str, Dst) \ 201 if ((Str).getAsInteger(10, (Dst))) \ 202 return error(instrprof_error::malformed); 203 #define VP_READ_ADVANCE(Val) \ 204 CHECK_LINE_END(Line); \ 205 uint32_t Val; \ 206 READ_NUM((*Line), (Val)); \ 207 Line++; 208 209 if (Line.is_at_end()) 210 return success(); 211 212 uint32_t NumValueKinds; 213 if (Line->getAsInteger(10, NumValueKinds)) { 214 // No value profile data 215 return success(); 216 } 217 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 218 return error(instrprof_error::malformed, 219 "number of value kinds is invalid"); 220 Line++; 221 222 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 223 VP_READ_ADVANCE(ValueKind); 224 if (ValueKind > IPVK_Last) 225 return error(instrprof_error::malformed, "value kind is invalid"); 226 ; 227 VP_READ_ADVANCE(NumValueSites); 228 if (!NumValueSites) 229 continue; 230 231 Record.reserveSites(VK, NumValueSites); 232 for (uint32_t S = 0; S < NumValueSites; S++) { 233 VP_READ_ADVANCE(NumValueData); 234 235 std::vector<InstrProfValueData> CurrentValues; 236 for (uint32_t V = 0; V < NumValueData; V++) { 237 CHECK_LINE_END(Line); 238 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 239 uint64_t TakenCount, Value; 240 if (ValueKind == IPVK_IndirectCallTarget) { 241 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 242 Value = 0; 243 } else { 244 if (Error E = Symtab->addFuncName(VD.first)) 245 return E; 246 Value = IndexedInstrProf::ComputeHash(VD.first); 247 } 248 } else { 249 READ_NUM(VD.first, Value); 250 } 251 READ_NUM(VD.second, TakenCount); 252 CurrentValues.push_back({Value, TakenCount}); 253 Line++; 254 } 255 Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData, 256 nullptr); 257 } 258 } 259 return success(); 260 261 #undef CHECK_LINE_END 262 #undef READ_NUM 263 #undef VP_READ_ADVANCE 264 } 265 266 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 267 // Skip empty lines and comments. 268 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) 269 ++Line; 270 // If we hit EOF while looking for a name, we're done. 271 if (Line.is_at_end()) { 272 return error(instrprof_error::eof); 273 } 274 275 // Read the function name. 276 Record.Name = *Line++; 277 if (Error E = Symtab->addFuncName(Record.Name)) 278 return error(std::move(E)); 279 280 // Read the function hash. 281 if (Line.is_at_end()) 282 return error(instrprof_error::truncated); 283 if ((Line++)->getAsInteger(0, Record.Hash)) 284 return error(instrprof_error::malformed, 285 "function hash is not a valid integer"); 286 287 // Read the number of counters. 288 uint64_t NumCounters; 289 if (Line.is_at_end()) 290 return error(instrprof_error::truncated); 291 if ((Line++)->getAsInteger(10, NumCounters)) 292 return error(instrprof_error::malformed, 293 "number of counters is not a valid integer"); 294 if (NumCounters == 0) 295 return error(instrprof_error::malformed, "number of counters is zero"); 296 297 // Read each counter and fill our internal storage with the values. 298 Record.Clear(); 299 Record.Counts.reserve(NumCounters); 300 for (uint64_t I = 0; I < NumCounters; ++I) { 301 if (Line.is_at_end()) 302 return error(instrprof_error::truncated); 303 uint64_t Count; 304 if ((Line++)->getAsInteger(10, Count)) 305 return error(instrprof_error::malformed, "count is invalid"); 306 Record.Counts.push_back(Count); 307 } 308 309 // Check if value profile data exists and read it if so. 310 if (Error E = readValueProfileData(Record)) 311 return error(std::move(E)); 312 313 return success(); 314 } 315 316 template <class IntPtrT> 317 InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const { 318 return getProfileKindFromVersion(Version); 319 } 320 321 template <class IntPtrT> 322 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 323 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 324 return false; 325 uint64_t Magic = 326 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 327 return RawInstrProf::getMagic<IntPtrT>() == Magic || 328 sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; 329 } 330 331 template <class IntPtrT> 332 Error RawInstrProfReader<IntPtrT>::readHeader() { 333 if (!hasFormat(*DataBuffer)) 334 return error(instrprof_error::bad_magic); 335 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 336 return error(instrprof_error::bad_header); 337 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 338 DataBuffer->getBufferStart()); 339 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 340 return readHeader(*Header); 341 } 342 343 template <class IntPtrT> 344 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 345 const char *End = DataBuffer->getBufferEnd(); 346 // Skip zero padding between profiles. 347 while (CurrentPos != End && *CurrentPos == 0) 348 ++CurrentPos; 349 // If there's nothing left, we're done. 350 if (CurrentPos == End) 351 return make_error<InstrProfError>(instrprof_error::eof); 352 // If there isn't enough space for another header, this is probably just 353 // garbage at the end of the file. 354 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 355 return make_error<InstrProfError>(instrprof_error::malformed, 356 "not enough space for another header"); 357 // The writer ensures each profile is padded to start at an aligned address. 358 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 359 return make_error<InstrProfError>(instrprof_error::malformed, 360 "insufficient padding"); 361 // The magic should have the same byte order as in the previous header. 362 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 363 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 364 return make_error<InstrProfError>(instrprof_error::bad_magic); 365 366 // There's another profile to read, so we need to process the header. 367 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 368 return readHeader(*Header); 369 } 370 371 template <class IntPtrT> 372 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 373 if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart))) 374 return error(std::move(E)); 375 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 376 const IntPtrT FPtr = swap(I->FunctionPointer); 377 if (!FPtr) 378 continue; 379 Symtab.mapAddress(FPtr, I->NameRef); 380 } 381 return success(); 382 } 383 384 template <class IntPtrT> 385 Error RawInstrProfReader<IntPtrT>::readHeader( 386 const RawInstrProf::Header &Header) { 387 Version = swap(Header.Version); 388 if (GET_VERSION(Version) != RawInstrProf::Version) 389 return error(instrprof_error::unsupported_version); 390 if (useDebugInfoCorrelate() && !Correlator) 391 return error(instrprof_error::missing_debug_info_for_correlation); 392 if (!useDebugInfoCorrelate() && Correlator) 393 return error(instrprof_error::unexpected_debug_info_for_correlation); 394 395 BinaryIdsSize = swap(Header.BinaryIdsSize); 396 if (BinaryIdsSize % sizeof(uint64_t)) 397 return error(instrprof_error::bad_header); 398 399 CountersDelta = swap(Header.CountersDelta); 400 NamesDelta = swap(Header.NamesDelta); 401 auto NumData = swap(Header.DataSize); 402 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 403 auto CountersSize = swap(Header.CountersSize) * getCounterTypeSize(); 404 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 405 auto NamesSize = swap(Header.NamesSize); 406 ValueKindLast = swap(Header.ValueKindLast); 407 408 auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>); 409 auto PaddingSize = getNumPaddingBytes(NamesSize); 410 411 // Profile data starts after profile header and binary ids if exist. 412 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize; 413 ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters; 414 ptrdiff_t NamesOffset = 415 CountersOffset + CountersSize + PaddingBytesAfterCounters; 416 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; 417 418 auto *Start = reinterpret_cast<const char *>(&Header); 419 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 420 return error(instrprof_error::bad_header); 421 422 if (Correlator) { 423 // These sizes in the raw file are zero because we constructed them in the 424 // Correlator. 425 assert(DataSize == 0 && NamesSize == 0); 426 assert(CountersDelta == 0 && NamesDelta == 0); 427 Data = Correlator->getDataPointer(); 428 DataEnd = Data + Correlator->getDataSize(); 429 NamesStart = Correlator->getNamesPointer(); 430 NamesEnd = NamesStart + Correlator->getNamesSize(); 431 } else { 432 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 433 Start + DataOffset); 434 DataEnd = Data + NumData; 435 NamesStart = Start + NamesOffset; 436 NamesEnd = NamesStart + NamesSize; 437 } 438 439 // Binary ids start just after the header. 440 BinaryIdsStart = 441 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header); 442 CountersStart = Start + CountersOffset; 443 CountersEnd = CountersStart + CountersSize; 444 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 445 446 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); 447 if (BinaryIdsStart + BinaryIdsSize > BufferEnd) 448 return error(instrprof_error::bad_header); 449 450 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 451 if (Error E = createSymtab(*NewSymtab)) 452 return E; 453 454 Symtab = std::move(NewSymtab); 455 return success(); 456 } 457 458 template <class IntPtrT> 459 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 460 Record.Name = getName(Data->NameRef); 461 return success(); 462 } 463 464 template <class IntPtrT> 465 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 466 Record.Hash = swap(Data->FuncHash); 467 return success(); 468 } 469 470 template <class IntPtrT> 471 Error RawInstrProfReader<IntPtrT>::readRawCounts( 472 InstrProfRecord &Record) { 473 uint32_t NumCounters = swap(Data->NumCounters); 474 if (NumCounters == 0) 475 return error(instrprof_error::malformed, "number of counters is zero"); 476 477 ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta; 478 if (CounterBaseOffset < 0) 479 return error( 480 instrprof_error::malformed, 481 ("counter offset " + Twine(CounterBaseOffset) + " is negative").str()); 482 483 if (CounterBaseOffset >= CountersEnd - CountersStart) 484 return error(instrprof_error::malformed, 485 ("counter offset " + Twine(CounterBaseOffset) + 486 " is greater than the maximum counter offset " + 487 Twine(CountersEnd - CountersStart - 1)) 488 .str()); 489 490 uint64_t MaxNumCounters = 491 (CountersEnd - (CountersStart + CounterBaseOffset)) / 492 getCounterTypeSize(); 493 if (NumCounters > MaxNumCounters) 494 return error(instrprof_error::malformed, 495 ("number of counters " + Twine(NumCounters) + 496 " is greater than the maximum number of counters " + 497 Twine(MaxNumCounters)) 498 .str()); 499 500 Record.Counts.clear(); 501 Record.Counts.reserve(NumCounters); 502 for (uint32_t I = 0; I < NumCounters; I++) { 503 const char *Ptr = 504 CountersStart + CounterBaseOffset + I * getCounterTypeSize(); 505 if (hasSingleByteCoverage()) { 506 // A value of zero signifies the block is covered. 507 Record.Counts.push_back(*Ptr == 0 ? 1 : 0); 508 } else { 509 const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr); 510 Record.Counts.push_back(swap(*CounterValue)); 511 } 512 } 513 514 return success(); 515 } 516 517 template <class IntPtrT> 518 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 519 InstrProfRecord &Record) { 520 Record.clearValueData(); 521 CurValueDataSize = 0; 522 // Need to match the logic in value profile dumper code in compiler-rt: 523 uint32_t NumValueKinds = 0; 524 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 525 NumValueKinds += (Data->NumValueSites[I] != 0); 526 527 if (!NumValueKinds) 528 return success(); 529 530 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 531 ValueProfData::getValueProfData( 532 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 533 getDataEndianness()); 534 535 if (Error E = VDataPtrOrErr.takeError()) 536 return E; 537 538 // Note that besides deserialization, this also performs the conversion for 539 // indirect call targets. The function pointers from the raw profile are 540 // remapped into function name hashes. 541 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 542 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 543 return success(); 544 } 545 546 template <class IntPtrT> 547 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 548 if (atEnd()) 549 // At this point, ValueDataStart field points to the next header. 550 if (Error E = readNextHeader(getNextHeaderPos())) 551 return error(std::move(E)); 552 553 // Read name ad set it in Record. 554 if (Error E = readName(Record)) 555 return error(std::move(E)); 556 557 // Read FuncHash and set it in Record. 558 if (Error E = readFuncHash(Record)) 559 return error(std::move(E)); 560 561 // Read raw counts and set Record. 562 if (Error E = readRawCounts(Record)) 563 return error(std::move(E)); 564 565 // Read value data and set Record. 566 if (Error E = readValueProfilingData(Record)) 567 return error(std::move(E)); 568 569 // Iterate. 570 advanceData(); 571 return success(); 572 } 573 574 static size_t RoundUp(size_t size, size_t align) { 575 return (size + align - 1) & ~(align - 1); 576 } 577 578 template <class IntPtrT> 579 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) { 580 if (BinaryIdsSize == 0) 581 return success(); 582 583 OS << "Binary IDs: \n"; 584 const uint8_t *BI = BinaryIdsStart; 585 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; 586 while (BI < BIEnd) { 587 size_t Remaining = BIEnd - BI; 588 589 // There should be enough left to read the binary ID size field. 590 if (Remaining < sizeof(uint64_t)) 591 return make_error<InstrProfError>( 592 instrprof_error::malformed, 593 "not enough data to read binary id length"); 594 595 uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI)); 596 597 // There should be enough left to read the binary ID size field, and the 598 // binary ID. 599 if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen) 600 return make_error<InstrProfError>( 601 instrprof_error::malformed, "not enough data to read binary id data"); 602 603 // Increment by binary id length data type size. 604 BI += sizeof(BinaryIdLen); 605 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 606 return make_error<InstrProfError>( 607 instrprof_error::malformed, 608 "binary id that is read is bigger than buffer size"); 609 610 for (uint64_t I = 0; I < BinaryIdLen; I++) 611 OS << format("%02x", BI[I]); 612 OS << "\n"; 613 614 // Increment by binary id data length, rounded to the next 8 bytes. This 615 // accounts for the zero-padding after each build ID. 616 BI += RoundUp(BinaryIdLen, sizeof(uint64_t)); 617 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 618 return make_error<InstrProfError>(instrprof_error::malformed); 619 } 620 621 return success(); 622 } 623 624 namespace llvm { 625 626 template class RawInstrProfReader<uint32_t>; 627 template class RawInstrProfReader<uint64_t>; 628 629 } // end namespace llvm 630 631 InstrProfLookupTrait::hash_value_type 632 InstrProfLookupTrait::ComputeHash(StringRef K) { 633 return IndexedInstrProf::ComputeHash(HashType, K); 634 } 635 636 using data_type = InstrProfLookupTrait::data_type; 637 using offset_type = InstrProfLookupTrait::offset_type; 638 639 bool InstrProfLookupTrait::readValueProfilingData( 640 const unsigned char *&D, const unsigned char *const End) { 641 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 642 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 643 644 if (VDataPtrOrErr.takeError()) 645 return false; 646 647 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 648 D += VDataPtrOrErr.get()->TotalSize; 649 650 return true; 651 } 652 653 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 654 offset_type N) { 655 using namespace support; 656 657 // Check if the data is corrupt. If so, don't try to read it. 658 if (N % sizeof(uint64_t)) 659 return data_type(); 660 661 DataBuffer.clear(); 662 std::vector<uint64_t> CounterBuffer; 663 664 const unsigned char *End = D + N; 665 while (D < End) { 666 // Read hash. 667 if (D + sizeof(uint64_t) >= End) 668 return data_type(); 669 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); 670 671 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 672 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 673 // If format version is different then read the number of counters. 674 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 675 if (D + sizeof(uint64_t) > End) 676 return data_type(); 677 CountsSize = endian::readNext<uint64_t, little, unaligned>(D); 678 } 679 // Read counter values. 680 if (D + CountsSize * sizeof(uint64_t) > End) 681 return data_type(); 682 683 CounterBuffer.clear(); 684 CounterBuffer.reserve(CountsSize); 685 for (uint64_t J = 0; J < CountsSize; ++J) 686 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); 687 688 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); 689 690 // Read value profiling data. 691 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 692 !readValueProfilingData(D, End)) { 693 DataBuffer.clear(); 694 return data_type(); 695 } 696 } 697 return DataBuffer; 698 } 699 700 template <typename HashTableImpl> 701 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 702 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 703 auto Iter = HashTable->find(FuncName); 704 if (Iter == HashTable->end()) 705 return make_error<InstrProfError>(instrprof_error::unknown_function); 706 707 Data = (*Iter); 708 if (Data.empty()) 709 return make_error<InstrProfError>(instrprof_error::malformed, 710 "profile data is empty"); 711 712 return Error::success(); 713 } 714 715 template <typename HashTableImpl> 716 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 717 ArrayRef<NamedInstrProfRecord> &Data) { 718 if (atEnd()) 719 return make_error<InstrProfError>(instrprof_error::eof); 720 721 Data = *RecordIterator; 722 723 if (Data.empty()) 724 return make_error<InstrProfError>(instrprof_error::malformed, 725 "profile data is empty"); 726 727 return Error::success(); 728 } 729 730 template <typename HashTableImpl> 731 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 732 const unsigned char *Buckets, const unsigned char *const Payload, 733 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 734 uint64_t Version) { 735 FormatVersion = Version; 736 HashTable.reset(HashTableImpl::Create( 737 Buckets, Payload, Base, 738 typename HashTableImpl::InfoType(HashType, Version))); 739 RecordIterator = HashTable->data_begin(); 740 } 741 742 template <typename HashTableImpl> 743 InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const { 744 return getProfileKindFromVersion(FormatVersion); 745 } 746 747 namespace { 748 /// A remapper that does not apply any remappings. 749 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 750 InstrProfReaderIndexBase &Underlying; 751 752 public: 753 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 754 : Underlying(Underlying) {} 755 756 Error getRecords(StringRef FuncName, 757 ArrayRef<NamedInstrProfRecord> &Data) override { 758 return Underlying.getRecords(FuncName, Data); 759 } 760 }; 761 } // namespace 762 763 /// A remapper that applies remappings based on a symbol remapping file. 764 template <typename HashTableImpl> 765 class llvm::InstrProfReaderItaniumRemapper 766 : public InstrProfReaderRemapper { 767 public: 768 InstrProfReaderItaniumRemapper( 769 std::unique_ptr<MemoryBuffer> RemapBuffer, 770 InstrProfReaderIndex<HashTableImpl> &Underlying) 771 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 772 } 773 774 /// Extract the original function name from a PGO function name. 775 static StringRef extractName(StringRef Name) { 776 // We can have multiple :-separated pieces; there can be pieces both 777 // before and after the mangled name. Find the first part that starts 778 // with '_Z'; we'll assume that's the mangled name we want. 779 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 780 while (true) { 781 Parts = Parts.second.split(':'); 782 if (Parts.first.startswith("_Z")) 783 return Parts.first; 784 if (Parts.second.empty()) 785 return Name; 786 } 787 } 788 789 /// Given a mangled name extracted from a PGO function name, and a new 790 /// form for that mangled name, reconstitute the name. 791 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 792 StringRef Replacement, 793 SmallVectorImpl<char> &Out) { 794 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 795 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 796 Out.insert(Out.end(), Replacement.begin(), Replacement.end()); 797 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 798 } 799 800 Error populateRemappings() override { 801 if (Error E = Remappings.read(*RemapBuffer)) 802 return E; 803 for (StringRef Name : Underlying.HashTable->keys()) { 804 StringRef RealName = extractName(Name); 805 if (auto Key = Remappings.insert(RealName)) { 806 // FIXME: We could theoretically map the same equivalence class to 807 // multiple names in the profile data. If that happens, we should 808 // return NamedInstrProfRecords from all of them. 809 MappedNames.insert({Key, RealName}); 810 } 811 } 812 return Error::success(); 813 } 814 815 Error getRecords(StringRef FuncName, 816 ArrayRef<NamedInstrProfRecord> &Data) override { 817 StringRef RealName = extractName(FuncName); 818 if (auto Key = Remappings.lookup(RealName)) { 819 StringRef Remapped = MappedNames.lookup(Key); 820 if (!Remapped.empty()) { 821 if (RealName.begin() == FuncName.begin() && 822 RealName.end() == FuncName.end()) 823 FuncName = Remapped; 824 else { 825 // Try rebuilding the name from the given remapping. 826 SmallString<256> Reconstituted; 827 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 828 Error E = Underlying.getRecords(Reconstituted, Data); 829 if (!E) 830 return E; 831 832 // If we failed because the name doesn't exist, fall back to asking 833 // about the original name. 834 if (Error Unhandled = handleErrors( 835 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 836 return Err->get() == instrprof_error::unknown_function 837 ? Error::success() 838 : Error(std::move(Err)); 839 })) 840 return Unhandled; 841 } 842 } 843 } 844 return Underlying.getRecords(FuncName, Data); 845 } 846 847 private: 848 /// The memory buffer containing the remapping configuration. Remappings 849 /// holds pointers into this buffer. 850 std::unique_ptr<MemoryBuffer> RemapBuffer; 851 852 /// The mangling remapper. 853 SymbolRemappingReader Remappings; 854 855 /// Mapping from mangled name keys to the name used for the key in the 856 /// profile data. 857 /// FIXME: Can we store a location within the on-disk hash table instead of 858 /// redoing lookup? 859 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 860 861 /// The real profile data reader. 862 InstrProfReaderIndex<HashTableImpl> &Underlying; 863 }; 864 865 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 866 using namespace support; 867 868 if (DataBuffer.getBufferSize() < 8) 869 return false; 870 uint64_t Magic = 871 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); 872 // Verify that it's magical. 873 return Magic == IndexedInstrProf::Magic; 874 } 875 876 const unsigned char * 877 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 878 const unsigned char *Cur, bool UseCS) { 879 using namespace IndexedInstrProf; 880 using namespace support; 881 882 if (Version >= IndexedInstrProf::Version4) { 883 const IndexedInstrProf::Summary *SummaryInLE = 884 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 885 uint64_t NFields = 886 endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields); 887 uint64_t NEntries = 888 endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries); 889 uint32_t SummarySize = 890 IndexedInstrProf::Summary::getSize(NFields, NEntries); 891 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 892 IndexedInstrProf::allocSummary(SummarySize); 893 894 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 895 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 896 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 897 Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]); 898 899 SummaryEntryVector DetailedSummary; 900 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 901 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 902 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 903 Ent.NumBlocks); 904 } 905 std::unique_ptr<llvm::ProfileSummary> &Summary = 906 UseCS ? this->CS_Summary : this->Summary; 907 908 // initialize InstrProfSummary using the SummaryData from disk. 909 Summary = std::make_unique<ProfileSummary>( 910 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 911 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 912 SummaryData->get(Summary::MaxBlockCount), 913 SummaryData->get(Summary::MaxInternalBlockCount), 914 SummaryData->get(Summary::MaxFunctionCount), 915 SummaryData->get(Summary::TotalNumBlocks), 916 SummaryData->get(Summary::TotalNumFunctions)); 917 return Cur + SummarySize; 918 } else { 919 // The older versions do not support a profile summary. This just computes 920 // an empty summary, which will not result in accurate hot/cold detection. 921 // We would need to call addRecord for all NamedInstrProfRecords to get the 922 // correct summary. However, this version is old (prior to early 2016) and 923 // has not been supporting an accurate summary for several years. 924 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 925 Summary = Builder.getSummary(); 926 return Cur; 927 } 928 } 929 930 Error IndexedInstrProfReader::readHeader() { 931 using namespace support; 932 933 const unsigned char *Start = 934 (const unsigned char *)DataBuffer->getBufferStart(); 935 const unsigned char *Cur = Start; 936 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 937 return error(instrprof_error::truncated); 938 939 auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start); 940 if (!HeaderOr) 941 return HeaderOr.takeError(); 942 943 const IndexedInstrProf::Header *Header = &HeaderOr.get(); 944 Cur += Header->size(); 945 946 Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, 947 /* UseCS */ false); 948 if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF) 949 Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur, 950 /* UseCS */ true); 951 952 // Read the hash type and start offset. 953 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 954 endian::byte_swap<uint64_t, little>(Header->HashType)); 955 if (HashType > IndexedInstrProf::HashT::Last) 956 return error(instrprof_error::unsupported_hash_type); 957 958 uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); 959 960 // The hash table with profile counts comes next. 961 auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 962 Start + HashOffset, Cur, Start, HashType, Header->formatVersion()); 963 964 // The MemProfOffset field in the header is only valid when the format version 965 // is higher than 8 (when it was introduced). 966 if (GET_VERSION(Header->formatVersion()) >= 8 && 967 Header->formatVersion() & VARIANT_MASK_MEMPROF) { 968 uint64_t MemProfOffset = 969 endian::byte_swap<uint64_t, little>(Header->MemProfOffset); 970 971 const unsigned char *Ptr = Start + MemProfOffset; 972 // The value returned from RecordTableGenerator.Emit. 973 const uint64_t RecordTableOffset = 974 support::endian::readNext<uint64_t, little, unaligned>(Ptr); 975 // The offset in the stream right before invoking FrameTableGenerator.Emit. 976 const uint64_t FramePayloadOffset = 977 support::endian::readNext<uint64_t, little, unaligned>(Ptr); 978 // The value returned from FrameTableGenerator.Emit. 979 const uint64_t FrameTableOffset = 980 support::endian::readNext<uint64_t, little, unaligned>(Ptr); 981 982 // Read the schema. 983 auto SchemaOr = memprof::readMemProfSchema(Ptr); 984 if (!SchemaOr) 985 return SchemaOr.takeError(); 986 Schema = SchemaOr.get(); 987 988 // Now initialize the table reader with a pointer into data buffer. 989 MemProfRecordTable.reset(MemProfRecordHashTable::Create( 990 /*Buckets=*/Start + RecordTableOffset, 991 /*Payload=*/Ptr, 992 /*Base=*/Start, memprof::RecordLookupTrait(Schema))); 993 994 // Initialize the frame table reader with the payload and bucket offsets. 995 MemProfFrameTable.reset(MemProfFrameHashTable::Create( 996 /*Buckets=*/Start + FrameTableOffset, 997 /*Payload=*/Start + FramePayloadOffset, 998 /*Base=*/Start, memprof::FrameLookupTrait())); 999 } 1000 1001 // Load the remapping table now if requested. 1002 if (RemappingBuffer) { 1003 Remapper = std::make_unique< 1004 InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 1005 std::move(RemappingBuffer), *IndexPtr); 1006 if (Error E = Remapper->populateRemappings()) 1007 return E; 1008 } else { 1009 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 1010 } 1011 Index = std::move(IndexPtr); 1012 1013 return success(); 1014 } 1015 1016 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 1017 if (Symtab) 1018 return *Symtab; 1019 1020 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 1021 if (Error E = Index->populateSymtab(*NewSymtab)) { 1022 consumeError(error(InstrProfError::take(std::move(E)))); 1023 } 1024 1025 Symtab = std::move(NewSymtab); 1026 return *Symtab; 1027 } 1028 1029 Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord( 1030 StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum) { 1031 ArrayRef<NamedInstrProfRecord> Data; 1032 uint64_t FuncSum = 0; 1033 Error Err = Remapper->getRecords(FuncName, Data); 1034 if (Err) 1035 return std::move(Err); 1036 // Found it. Look for counters with the right hash. 1037 1038 // A flag to indicate if the records are from the same type 1039 // of profile (i.e cs vs nocs). 1040 bool CSBitMatch = false; 1041 auto getFuncSum = [](const std::vector<uint64_t> &Counts) { 1042 uint64_t ValueSum = 0; 1043 for (unsigned I = 0, S = Counts.size(); I < S; I++) { 1044 uint64_t CountValue = Counts[I]; 1045 if (CountValue == (uint64_t)-1) 1046 continue; 1047 // Handle overflow -- if that happens, return max. 1048 if (std::numeric_limits<uint64_t>::max() - CountValue <= ValueSum) 1049 return std::numeric_limits<uint64_t>::max(); 1050 ValueSum += CountValue; 1051 } 1052 return ValueSum; 1053 }; 1054 1055 for (const NamedInstrProfRecord &I : Data) { 1056 // Check for a match and fill the vector if there is one. 1057 if (I.Hash == FuncHash) 1058 return std::move(I); 1059 if (NamedInstrProfRecord::hasCSFlagInHash(I.Hash) == 1060 NamedInstrProfRecord::hasCSFlagInHash(FuncHash)) { 1061 CSBitMatch = true; 1062 if (MismatchedFuncSum == nullptr) 1063 continue; 1064 FuncSum = std::max(FuncSum, getFuncSum(I.Counts)); 1065 } 1066 } 1067 if (CSBitMatch) { 1068 if (MismatchedFuncSum != nullptr) 1069 *MismatchedFuncSum = FuncSum; 1070 return error(instrprof_error::hash_mismatch); 1071 } 1072 return error(instrprof_error::unknown_function); 1073 } 1074 1075 Expected<memprof::MemProfRecord> 1076 IndexedInstrProfReader::getMemProfRecord(const uint64_t FuncNameHash) { 1077 // TODO: Add memprof specific errors. 1078 if (MemProfRecordTable == nullptr) 1079 return make_error<InstrProfError>(instrprof_error::invalid_prof, 1080 "no memprof data available in profile"); 1081 auto Iter = MemProfRecordTable->find(FuncNameHash); 1082 if (Iter == MemProfRecordTable->end()) 1083 return make_error<InstrProfError>( 1084 instrprof_error::unknown_function, 1085 "memprof record not found for function hash " + Twine(FuncNameHash)); 1086 1087 // Setup a callback to convert from frame ids to frame using the on-disk 1088 // FrameData hash table. 1089 memprof::FrameId LastUnmappedFrameId = 0; 1090 bool HasFrameMappingError = false; 1091 auto IdToFrameCallback = [&](const memprof::FrameId Id) { 1092 auto FrIter = MemProfFrameTable->find(Id); 1093 if (FrIter == MemProfFrameTable->end()) { 1094 LastUnmappedFrameId = Id; 1095 HasFrameMappingError = true; 1096 return memprof::Frame(0, 0, 0, false); 1097 } 1098 return *FrIter; 1099 }; 1100 1101 memprof::MemProfRecord Record(*Iter, IdToFrameCallback); 1102 1103 // Check that all frame ids were successfully converted to frames. 1104 if (HasFrameMappingError) { 1105 return make_error<InstrProfError>(instrprof_error::hash_mismatch, 1106 "memprof frame not found for frame id " + 1107 Twine(LastUnmappedFrameId)); 1108 } 1109 return Record; 1110 } 1111 1112 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 1113 uint64_t FuncHash, 1114 std::vector<uint64_t> &Counts) { 1115 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 1116 if (Error E = Record.takeError()) 1117 return error(std::move(E)); 1118 1119 Counts = Record.get().Counts; 1120 return success(); 1121 } 1122 1123 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 1124 ArrayRef<NamedInstrProfRecord> Data; 1125 1126 Error E = Index->getRecords(Data); 1127 if (E) 1128 return error(std::move(E)); 1129 1130 Record = Data[RecordIndex++]; 1131 if (RecordIndex >= Data.size()) { 1132 Index->advanceToNextKey(); 1133 RecordIndex = 0; 1134 } 1135 return success(); 1136 } 1137 1138 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 1139 uint64_t NumFuncs = 0; 1140 for (const auto &Func : *this) { 1141 if (isIRLevelProfile()) { 1142 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 1143 if (FuncIsCS != IsCS) 1144 continue; 1145 } 1146 Func.accumulateCounts(Sum); 1147 ++NumFuncs; 1148 } 1149 Sum.NumEntries = NumFuncs; 1150 } 1151