1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/IR/ProfileSummary.h" 21 #include "llvm/ProfileData/InstrProf.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorOr.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/SwapByteOrder.h" 28 #include "llvm/Support/SymbolRemappingReader.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <cstddef> 32 #include <cstdint> 33 #include <limits> 34 #include <memory> 35 #include <system_error> 36 #include <utility> 37 #include <vector> 38 39 using namespace llvm; 40 41 // Extracts the variant information from the top 8 bits in the version and 42 // returns an enum specifying the variants present. 43 static InstrProfKind getProfileKindFromVersion(uint64_t Version) { 44 InstrProfKind ProfileKind = InstrProfKind::Unknown; 45 if (Version & VARIANT_MASK_IR_PROF) { 46 ProfileKind |= InstrProfKind::IR; 47 } 48 if (Version & VARIANT_MASK_CSIR_PROF) { 49 ProfileKind |= InstrProfKind::CS; 50 } 51 if (Version & VARIANT_MASK_INSTR_ENTRY) { 52 ProfileKind |= InstrProfKind::BB; 53 } 54 if (Version & VARIANT_MASK_BYTE_COVERAGE) { 55 ProfileKind |= InstrProfKind::SingleByteCoverage; 56 } 57 if (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) { 58 ProfileKind |= InstrProfKind::FunctionEntryOnly; 59 } 60 return ProfileKind; 61 } 62 63 static Expected<std::unique_ptr<MemoryBuffer>> 64 setupMemoryBuffer(const Twine &Path) { 65 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 66 MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true); 67 if (std::error_code EC = BufferOrErr.getError()) 68 return errorCodeToError(EC); 69 return std::move(BufferOrErr.get()); 70 } 71 72 static Error initializeReader(InstrProfReader &Reader) { 73 return Reader.readHeader(); 74 } 75 76 Expected<std::unique_ptr<InstrProfReader>> 77 InstrProfReader::create(const Twine &Path, 78 const InstrProfCorrelator *Correlator) { 79 // Set up the buffer to read. 80 auto BufferOrError = setupMemoryBuffer(Path); 81 if (Error E = BufferOrError.takeError()) 82 return std::move(E); 83 return InstrProfReader::create(std::move(BufferOrError.get()), Correlator); 84 } 85 86 Expected<std::unique_ptr<InstrProfReader>> 87 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 88 const InstrProfCorrelator *Correlator) { 89 // Sanity check the buffer. 90 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 91 return make_error<InstrProfError>(instrprof_error::too_large); 92 93 if (Buffer->getBufferSize() == 0) 94 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 95 96 std::unique_ptr<InstrProfReader> Result; 97 // Create the reader. 98 if (IndexedInstrProfReader::hasFormat(*Buffer)) 99 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 100 else if (RawInstrProfReader64::hasFormat(*Buffer)) 101 Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator)); 102 else if (RawInstrProfReader32::hasFormat(*Buffer)) 103 Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator)); 104 else if (TextInstrProfReader::hasFormat(*Buffer)) 105 Result.reset(new TextInstrProfReader(std::move(Buffer))); 106 else 107 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 108 109 // Initialize the reader and return the result. 110 if (Error E = initializeReader(*Result)) 111 return std::move(E); 112 113 return std::move(Result); 114 } 115 116 Expected<std::unique_ptr<IndexedInstrProfReader>> 117 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { 118 // Set up the buffer to read. 119 auto BufferOrError = setupMemoryBuffer(Path); 120 if (Error E = BufferOrError.takeError()) 121 return std::move(E); 122 123 // Set up the remapping buffer if requested. 124 std::unique_ptr<MemoryBuffer> RemappingBuffer; 125 std::string RemappingPathStr = RemappingPath.str(); 126 if (!RemappingPathStr.empty()) { 127 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); 128 if (Error E = RemappingBufferOrError.takeError()) 129 return std::move(E); 130 RemappingBuffer = std::move(RemappingBufferOrError.get()); 131 } 132 133 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 134 std::move(RemappingBuffer)); 135 } 136 137 Expected<std::unique_ptr<IndexedInstrProfReader>> 138 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 139 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 140 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 141 return make_error<InstrProfError>(instrprof_error::too_large); 142 143 // Create the reader. 144 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 145 return make_error<InstrProfError>(instrprof_error::bad_magic); 146 auto Result = std::make_unique<IndexedInstrProfReader>( 147 std::move(Buffer), std::move(RemappingBuffer)); 148 149 // Initialize the reader and return the result. 150 if (Error E = initializeReader(*Result)) 151 return std::move(E); 152 153 return std::move(Result); 154 } 155 156 void InstrProfIterator::Increment() { 157 if (auto E = Reader->readNextRecord(Record)) { 158 // Handle errors in the reader. 159 InstrProfError::take(std::move(E)); 160 *this = InstrProfIterator(); 161 } 162 } 163 164 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 165 // Verify that this really looks like plain ASCII text by checking a 166 // 'reasonable' number of characters (up to profile magic size). 167 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 168 StringRef buffer = Buffer.getBufferStart(); 169 return count == 0 || 170 std::all_of(buffer.begin(), buffer.begin() + count, 171 [](char c) { return isPrint(c) || isSpace(c); }); 172 } 173 174 // Read the profile variant flag from the header: ":FE" means this is a FE 175 // generated profile. ":IR" means this is an IR level profile. Other strings 176 // with a leading ':' will be reported an error format. 177 Error TextInstrProfReader::readHeader() { 178 Symtab.reset(new InstrProfSymtab()); 179 180 while (Line->startswith(":")) { 181 StringRef Str = Line->substr(1); 182 if (Str.equals_insensitive("ir")) 183 ProfileKind |= InstrProfKind::IR; 184 else if (Str.equals_insensitive("fe")) 185 ProfileKind |= InstrProfKind::FE; 186 else if (Str.equals_insensitive("csir")) { 187 ProfileKind |= InstrProfKind::IR; 188 ProfileKind |= InstrProfKind::CS; 189 } else if (Str.equals_insensitive("entry_first")) 190 ProfileKind |= InstrProfKind::BB; 191 else if (Str.equals_insensitive("not_entry_first")) 192 ProfileKind &= ~InstrProfKind::BB; 193 else 194 return error(instrprof_error::bad_header); 195 ++Line; 196 } 197 return success(); 198 } 199 200 Error 201 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 202 203 #define CHECK_LINE_END(Line) \ 204 if (Line.is_at_end()) \ 205 return error(instrprof_error::truncated); 206 #define READ_NUM(Str, Dst) \ 207 if ((Str).getAsInteger(10, (Dst))) \ 208 return error(instrprof_error::malformed); 209 #define VP_READ_ADVANCE(Val) \ 210 CHECK_LINE_END(Line); \ 211 uint32_t Val; \ 212 READ_NUM((*Line), (Val)); \ 213 Line++; 214 215 if (Line.is_at_end()) 216 return success(); 217 218 uint32_t NumValueKinds; 219 if (Line->getAsInteger(10, NumValueKinds)) { 220 // No value profile data 221 return success(); 222 } 223 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 224 return error(instrprof_error::malformed, 225 "number of value kinds is invalid"); 226 Line++; 227 228 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 229 VP_READ_ADVANCE(ValueKind); 230 if (ValueKind > IPVK_Last) 231 return error(instrprof_error::malformed, "value kind is invalid"); 232 ; 233 VP_READ_ADVANCE(NumValueSites); 234 if (!NumValueSites) 235 continue; 236 237 Record.reserveSites(VK, NumValueSites); 238 for (uint32_t S = 0; S < NumValueSites; S++) { 239 VP_READ_ADVANCE(NumValueData); 240 241 std::vector<InstrProfValueData> CurrentValues; 242 for (uint32_t V = 0; V < NumValueData; V++) { 243 CHECK_LINE_END(Line); 244 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 245 uint64_t TakenCount, Value; 246 if (ValueKind == IPVK_IndirectCallTarget) { 247 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 248 Value = 0; 249 } else { 250 if (Error E = Symtab->addFuncName(VD.first)) 251 return E; 252 Value = IndexedInstrProf::ComputeHash(VD.first); 253 } 254 } else { 255 READ_NUM(VD.first, Value); 256 } 257 READ_NUM(VD.second, TakenCount); 258 CurrentValues.push_back({Value, TakenCount}); 259 Line++; 260 } 261 Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData, 262 nullptr); 263 } 264 } 265 return success(); 266 267 #undef CHECK_LINE_END 268 #undef READ_NUM 269 #undef VP_READ_ADVANCE 270 } 271 272 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 273 // Skip empty lines and comments. 274 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) 275 ++Line; 276 // If we hit EOF while looking for a name, we're done. 277 if (Line.is_at_end()) { 278 return error(instrprof_error::eof); 279 } 280 281 // Read the function name. 282 Record.Name = *Line++; 283 if (Error E = Symtab->addFuncName(Record.Name)) 284 return error(std::move(E)); 285 286 // Read the function hash. 287 if (Line.is_at_end()) 288 return error(instrprof_error::truncated); 289 if ((Line++)->getAsInteger(0, Record.Hash)) 290 return error(instrprof_error::malformed, 291 "function hash is not a valid integer"); 292 293 // Read the number of counters. 294 uint64_t NumCounters; 295 if (Line.is_at_end()) 296 return error(instrprof_error::truncated); 297 if ((Line++)->getAsInteger(10, NumCounters)) 298 return error(instrprof_error::malformed, 299 "number of counters is not a valid integer"); 300 if (NumCounters == 0) 301 return error(instrprof_error::malformed, "number of counters is zero"); 302 303 // Read each counter and fill our internal storage with the values. 304 Record.Clear(); 305 Record.Counts.reserve(NumCounters); 306 for (uint64_t I = 0; I < NumCounters; ++I) { 307 if (Line.is_at_end()) 308 return error(instrprof_error::truncated); 309 uint64_t Count; 310 if ((Line++)->getAsInteger(10, Count)) 311 return error(instrprof_error::malformed, "count is invalid"); 312 Record.Counts.push_back(Count); 313 } 314 315 // Check if value profile data exists and read it if so. 316 if (Error E = readValueProfileData(Record)) 317 return error(std::move(E)); 318 319 return success(); 320 } 321 322 template <class IntPtrT> 323 InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const { 324 return getProfileKindFromVersion(Version); 325 } 326 327 template <class IntPtrT> 328 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 329 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 330 return false; 331 uint64_t Magic = 332 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 333 return RawInstrProf::getMagic<IntPtrT>() == Magic || 334 sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; 335 } 336 337 template <class IntPtrT> 338 Error RawInstrProfReader<IntPtrT>::readHeader() { 339 if (!hasFormat(*DataBuffer)) 340 return error(instrprof_error::bad_magic); 341 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 342 return error(instrprof_error::bad_header); 343 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 344 DataBuffer->getBufferStart()); 345 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 346 return readHeader(*Header); 347 } 348 349 template <class IntPtrT> 350 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 351 const char *End = DataBuffer->getBufferEnd(); 352 // Skip zero padding between profiles. 353 while (CurrentPos != End && *CurrentPos == 0) 354 ++CurrentPos; 355 // If there's nothing left, we're done. 356 if (CurrentPos == End) 357 return make_error<InstrProfError>(instrprof_error::eof); 358 // If there isn't enough space for another header, this is probably just 359 // garbage at the end of the file. 360 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 361 return make_error<InstrProfError>(instrprof_error::malformed, 362 "not enough space for another header"); 363 // The writer ensures each profile is padded to start at an aligned address. 364 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 365 return make_error<InstrProfError>(instrprof_error::malformed, 366 "insufficient padding"); 367 // The magic should have the same byte order as in the previous header. 368 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 369 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 370 return make_error<InstrProfError>(instrprof_error::bad_magic); 371 372 // There's another profile to read, so we need to process the header. 373 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 374 return readHeader(*Header); 375 } 376 377 template <class IntPtrT> 378 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 379 if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart))) 380 return error(std::move(E)); 381 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 382 const IntPtrT FPtr = swap(I->FunctionPointer); 383 if (!FPtr) 384 continue; 385 Symtab.mapAddress(FPtr, I->NameRef); 386 } 387 return success(); 388 } 389 390 template <class IntPtrT> 391 Error RawInstrProfReader<IntPtrT>::readHeader( 392 const RawInstrProf::Header &Header) { 393 Version = swap(Header.Version); 394 if (GET_VERSION(Version) != RawInstrProf::Version) 395 return error(instrprof_error::unsupported_version); 396 if (useDebugInfoCorrelate() && !Correlator) 397 return error(instrprof_error::missing_debug_info_for_correlation); 398 if (!useDebugInfoCorrelate() && Correlator) 399 return error(instrprof_error::unexpected_debug_info_for_correlation); 400 401 BinaryIdsSize = swap(Header.BinaryIdsSize); 402 if (BinaryIdsSize % sizeof(uint64_t)) 403 return error(instrprof_error::bad_header); 404 405 CountersDelta = swap(Header.CountersDelta); 406 NamesDelta = swap(Header.NamesDelta); 407 auto NumData = swap(Header.DataSize); 408 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 409 auto CountersSize = swap(Header.CountersSize) * getCounterTypeSize(); 410 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 411 auto NamesSize = swap(Header.NamesSize); 412 ValueKindLast = swap(Header.ValueKindLast); 413 414 auto DataSize = NumData * sizeof(RawInstrProf::ProfileData<IntPtrT>); 415 auto PaddingSize = getNumPaddingBytes(NamesSize); 416 417 // Profile data starts after profile header and binary ids if exist. 418 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header) + BinaryIdsSize; 419 ptrdiff_t CountersOffset = DataOffset + DataSize + PaddingBytesBeforeCounters; 420 ptrdiff_t NamesOffset = 421 CountersOffset + CountersSize + PaddingBytesAfterCounters; 422 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; 423 424 auto *Start = reinterpret_cast<const char *>(&Header); 425 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 426 return error(instrprof_error::bad_header); 427 428 if (Correlator) { 429 // These sizes in the raw file are zero because we constructed them in the 430 // Correlator. 431 assert(DataSize == 0 && NamesSize == 0); 432 assert(CountersDelta == 0 && NamesDelta == 0); 433 Data = Correlator->getDataPointer(); 434 DataEnd = Data + Correlator->getDataSize(); 435 NamesStart = Correlator->getNamesPointer(); 436 NamesEnd = NamesStart + Correlator->getNamesSize(); 437 } else { 438 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 439 Start + DataOffset); 440 DataEnd = Data + NumData; 441 NamesStart = Start + NamesOffset; 442 NamesEnd = NamesStart + NamesSize; 443 } 444 445 // Binary ids start just after the header. 446 BinaryIdsStart = 447 reinterpret_cast<const uint8_t *>(&Header) + sizeof(RawInstrProf::Header); 448 CountersStart = Start + CountersOffset; 449 CountersEnd = CountersStart + CountersSize; 450 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 451 452 const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd(); 453 if (BinaryIdsStart + BinaryIdsSize > BufferEnd) 454 return error(instrprof_error::bad_header); 455 456 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 457 if (Error E = createSymtab(*NewSymtab.get())) 458 return E; 459 460 Symtab = std::move(NewSymtab); 461 return success(); 462 } 463 464 template <class IntPtrT> 465 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 466 Record.Name = getName(Data->NameRef); 467 return success(); 468 } 469 470 template <class IntPtrT> 471 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 472 Record.Hash = swap(Data->FuncHash); 473 return success(); 474 } 475 476 template <class IntPtrT> 477 Error RawInstrProfReader<IntPtrT>::readRawCounts( 478 InstrProfRecord &Record) { 479 uint32_t NumCounters = swap(Data->NumCounters); 480 if (NumCounters == 0) 481 return error(instrprof_error::malformed, "number of counters is zero"); 482 483 ptrdiff_t CounterBaseOffset = swap(Data->CounterPtr) - CountersDelta; 484 if (CounterBaseOffset < 0) 485 return error( 486 instrprof_error::malformed, 487 ("counter offset " + Twine(CounterBaseOffset) + " is negative").str()); 488 489 if (CounterBaseOffset >= CountersEnd - CountersStart) 490 return error(instrprof_error::malformed, 491 ("counter offset " + Twine(CounterBaseOffset) + 492 " is greater than the maximum counter offset " + 493 Twine(CountersEnd - CountersStart - 1)) 494 .str()); 495 496 uint64_t MaxNumCounters = 497 (CountersEnd - (CountersStart + CounterBaseOffset)) / 498 getCounterTypeSize(); 499 if (NumCounters > MaxNumCounters) 500 return error(instrprof_error::malformed, 501 ("number of counters " + Twine(NumCounters) + 502 " is greater than the maximum number of counters " + 503 Twine(MaxNumCounters)) 504 .str()); 505 506 Record.Counts.clear(); 507 Record.Counts.reserve(NumCounters); 508 for (uint32_t I = 0; I < NumCounters; I++) { 509 const char *Ptr = 510 CountersStart + CounterBaseOffset + I * getCounterTypeSize(); 511 if (hasSingleByteCoverage()) { 512 // A value of zero signifies the block is covered. 513 Record.Counts.push_back(*Ptr == 0 ? 1 : 0); 514 } else { 515 const auto *CounterValue = reinterpret_cast<const uint64_t *>(Ptr); 516 Record.Counts.push_back(swap(*CounterValue)); 517 } 518 } 519 520 return success(); 521 } 522 523 template <class IntPtrT> 524 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 525 InstrProfRecord &Record) { 526 Record.clearValueData(); 527 CurValueDataSize = 0; 528 // Need to match the logic in value profile dumper code in compiler-rt: 529 uint32_t NumValueKinds = 0; 530 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 531 NumValueKinds += (Data->NumValueSites[I] != 0); 532 533 if (!NumValueKinds) 534 return success(); 535 536 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 537 ValueProfData::getValueProfData( 538 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 539 getDataEndianness()); 540 541 if (Error E = VDataPtrOrErr.takeError()) 542 return E; 543 544 // Note that besides deserialization, this also performs the conversion for 545 // indirect call targets. The function pointers from the raw profile are 546 // remapped into function name hashes. 547 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 548 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 549 return success(); 550 } 551 552 template <class IntPtrT> 553 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 554 if (atEnd()) 555 // At this point, ValueDataStart field points to the next header. 556 if (Error E = readNextHeader(getNextHeaderPos())) 557 return error(std::move(E)); 558 559 // Read name ad set it in Record. 560 if (Error E = readName(Record)) 561 return error(std::move(E)); 562 563 // Read FuncHash and set it in Record. 564 if (Error E = readFuncHash(Record)) 565 return error(std::move(E)); 566 567 // Read raw counts and set Record. 568 if (Error E = readRawCounts(Record)) 569 return error(std::move(E)); 570 571 // Read value data and set Record. 572 if (Error E = readValueProfilingData(Record)) 573 return error(std::move(E)); 574 575 // Iterate. 576 advanceData(); 577 return success(); 578 } 579 580 static size_t RoundUp(size_t size, size_t align) { 581 return (size + align - 1) & ~(align - 1); 582 } 583 584 template <class IntPtrT> 585 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) { 586 if (BinaryIdsSize == 0) 587 return success(); 588 589 OS << "Binary IDs: \n"; 590 const uint8_t *BI = BinaryIdsStart; 591 const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize; 592 while (BI < BIEnd) { 593 size_t Remaining = BIEnd - BI; 594 595 // There should be enough left to read the binary ID size field. 596 if (Remaining < sizeof(uint64_t)) 597 return make_error<InstrProfError>( 598 instrprof_error::malformed, 599 "not enough data to read binary id length"); 600 601 uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI)); 602 603 // There should be enough left to read the binary ID size field, and the 604 // binary ID. 605 if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen) 606 return make_error<InstrProfError>( 607 instrprof_error::malformed, "not enough data to read binary id data"); 608 609 // Increment by binary id length data type size. 610 BI += sizeof(BinaryIdLen); 611 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 612 return make_error<InstrProfError>( 613 instrprof_error::malformed, 614 "binary id that is read is bigger than buffer size"); 615 616 for (uint64_t I = 0; I < BinaryIdLen; I++) 617 OS << format("%02x", BI[I]); 618 OS << "\n"; 619 620 // Increment by binary id data length, rounded to the next 8 bytes. This 621 // accounts for the zero-padding after each build ID. 622 BI += RoundUp(BinaryIdLen, sizeof(uint64_t)); 623 if (BI > (const uint8_t *)DataBuffer->getBufferEnd()) 624 return make_error<InstrProfError>(instrprof_error::malformed); 625 } 626 627 return success(); 628 } 629 630 namespace llvm { 631 632 template class RawInstrProfReader<uint32_t>; 633 template class RawInstrProfReader<uint64_t>; 634 635 } // end namespace llvm 636 637 InstrProfLookupTrait::hash_value_type 638 InstrProfLookupTrait::ComputeHash(StringRef K) { 639 return IndexedInstrProf::ComputeHash(HashType, K); 640 } 641 642 using data_type = InstrProfLookupTrait::data_type; 643 using offset_type = InstrProfLookupTrait::offset_type; 644 645 bool InstrProfLookupTrait::readValueProfilingData( 646 const unsigned char *&D, const unsigned char *const End) { 647 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 648 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 649 650 if (VDataPtrOrErr.takeError()) 651 return false; 652 653 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 654 D += VDataPtrOrErr.get()->TotalSize; 655 656 return true; 657 } 658 659 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 660 offset_type N) { 661 using namespace support; 662 663 // Check if the data is corrupt. If so, don't try to read it. 664 if (N % sizeof(uint64_t)) 665 return data_type(); 666 667 DataBuffer.clear(); 668 std::vector<uint64_t> CounterBuffer; 669 670 const unsigned char *End = D + N; 671 while (D < End) { 672 // Read hash. 673 if (D + sizeof(uint64_t) >= End) 674 return data_type(); 675 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); 676 677 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 678 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 679 // If format version is different then read the number of counters. 680 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 681 if (D + sizeof(uint64_t) > End) 682 return data_type(); 683 CountsSize = endian::readNext<uint64_t, little, unaligned>(D); 684 } 685 // Read counter values. 686 if (D + CountsSize * sizeof(uint64_t) > End) 687 return data_type(); 688 689 CounterBuffer.clear(); 690 CounterBuffer.reserve(CountsSize); 691 for (uint64_t J = 0; J < CountsSize; ++J) 692 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); 693 694 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); 695 696 // Read value profiling data. 697 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 698 !readValueProfilingData(D, End)) { 699 DataBuffer.clear(); 700 return data_type(); 701 } 702 } 703 return DataBuffer; 704 } 705 706 template <typename HashTableImpl> 707 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 708 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 709 auto Iter = HashTable->find(FuncName); 710 if (Iter == HashTable->end()) 711 return make_error<InstrProfError>(instrprof_error::unknown_function); 712 713 Data = (*Iter); 714 if (Data.empty()) 715 return make_error<InstrProfError>(instrprof_error::malformed, 716 "profile data is empty"); 717 718 return Error::success(); 719 } 720 721 template <typename HashTableImpl> 722 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 723 ArrayRef<NamedInstrProfRecord> &Data) { 724 if (atEnd()) 725 return make_error<InstrProfError>(instrprof_error::eof); 726 727 Data = *RecordIterator; 728 729 if (Data.empty()) 730 return make_error<InstrProfError>(instrprof_error::malformed, 731 "profile data is empty"); 732 733 return Error::success(); 734 } 735 736 template <typename HashTableImpl> 737 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 738 const unsigned char *Buckets, const unsigned char *const Payload, 739 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 740 uint64_t Version) { 741 FormatVersion = Version; 742 HashTable.reset(HashTableImpl::Create( 743 Buckets, Payload, Base, 744 typename HashTableImpl::InfoType(HashType, Version))); 745 RecordIterator = HashTable->data_begin(); 746 } 747 748 template <typename HashTableImpl> 749 InstrProfKind InstrProfReaderIndex<HashTableImpl>::getProfileKind() const { 750 return getProfileKindFromVersion(FormatVersion); 751 } 752 753 namespace { 754 /// A remapper that does not apply any remappings. 755 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 756 InstrProfReaderIndexBase &Underlying; 757 758 public: 759 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 760 : Underlying(Underlying) {} 761 762 Error getRecords(StringRef FuncName, 763 ArrayRef<NamedInstrProfRecord> &Data) override { 764 return Underlying.getRecords(FuncName, Data); 765 } 766 }; 767 } // namespace 768 769 /// A remapper that applies remappings based on a symbol remapping file. 770 template <typename HashTableImpl> 771 class llvm::InstrProfReaderItaniumRemapper 772 : public InstrProfReaderRemapper { 773 public: 774 InstrProfReaderItaniumRemapper( 775 std::unique_ptr<MemoryBuffer> RemapBuffer, 776 InstrProfReaderIndex<HashTableImpl> &Underlying) 777 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 778 } 779 780 /// Extract the original function name from a PGO function name. 781 static StringRef extractName(StringRef Name) { 782 // We can have multiple :-separated pieces; there can be pieces both 783 // before and after the mangled name. Find the first part that starts 784 // with '_Z'; we'll assume that's the mangled name we want. 785 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 786 while (true) { 787 Parts = Parts.second.split(':'); 788 if (Parts.first.startswith("_Z")) 789 return Parts.first; 790 if (Parts.second.empty()) 791 return Name; 792 } 793 } 794 795 /// Given a mangled name extracted from a PGO function name, and a new 796 /// form for that mangled name, reconstitute the name. 797 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 798 StringRef Replacement, 799 SmallVectorImpl<char> &Out) { 800 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 801 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 802 Out.insert(Out.end(), Replacement.begin(), Replacement.end()); 803 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 804 } 805 806 Error populateRemappings() override { 807 if (Error E = Remappings.read(*RemapBuffer)) 808 return E; 809 for (StringRef Name : Underlying.HashTable->keys()) { 810 StringRef RealName = extractName(Name); 811 if (auto Key = Remappings.insert(RealName)) { 812 // FIXME: We could theoretically map the same equivalence class to 813 // multiple names in the profile data. If that happens, we should 814 // return NamedInstrProfRecords from all of them. 815 MappedNames.insert({Key, RealName}); 816 } 817 } 818 return Error::success(); 819 } 820 821 Error getRecords(StringRef FuncName, 822 ArrayRef<NamedInstrProfRecord> &Data) override { 823 StringRef RealName = extractName(FuncName); 824 if (auto Key = Remappings.lookup(RealName)) { 825 StringRef Remapped = MappedNames.lookup(Key); 826 if (!Remapped.empty()) { 827 if (RealName.begin() == FuncName.begin() && 828 RealName.end() == FuncName.end()) 829 FuncName = Remapped; 830 else { 831 // Try rebuilding the name from the given remapping. 832 SmallString<256> Reconstituted; 833 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 834 Error E = Underlying.getRecords(Reconstituted, Data); 835 if (!E) 836 return E; 837 838 // If we failed because the name doesn't exist, fall back to asking 839 // about the original name. 840 if (Error Unhandled = handleErrors( 841 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 842 return Err->get() == instrprof_error::unknown_function 843 ? Error::success() 844 : Error(std::move(Err)); 845 })) 846 return Unhandled; 847 } 848 } 849 } 850 return Underlying.getRecords(FuncName, Data); 851 } 852 853 private: 854 /// The memory buffer containing the remapping configuration. Remappings 855 /// holds pointers into this buffer. 856 std::unique_ptr<MemoryBuffer> RemapBuffer; 857 858 /// The mangling remapper. 859 SymbolRemappingReader Remappings; 860 861 /// Mapping from mangled name keys to the name used for the key in the 862 /// profile data. 863 /// FIXME: Can we store a location within the on-disk hash table instead of 864 /// redoing lookup? 865 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 866 867 /// The real profile data reader. 868 InstrProfReaderIndex<HashTableImpl> &Underlying; 869 }; 870 871 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 872 using namespace support; 873 874 if (DataBuffer.getBufferSize() < 8) 875 return false; 876 uint64_t Magic = 877 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); 878 // Verify that it's magical. 879 return Magic == IndexedInstrProf::Magic; 880 } 881 882 const unsigned char * 883 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 884 const unsigned char *Cur, bool UseCS) { 885 using namespace IndexedInstrProf; 886 using namespace support; 887 888 if (Version >= IndexedInstrProf::Version4) { 889 const IndexedInstrProf::Summary *SummaryInLE = 890 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 891 uint64_t NFields = 892 endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields); 893 uint64_t NEntries = 894 endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries); 895 uint32_t SummarySize = 896 IndexedInstrProf::Summary::getSize(NFields, NEntries); 897 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 898 IndexedInstrProf::allocSummary(SummarySize); 899 900 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 901 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 902 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 903 Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]); 904 905 SummaryEntryVector DetailedSummary; 906 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 907 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 908 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 909 Ent.NumBlocks); 910 } 911 std::unique_ptr<llvm::ProfileSummary> &Summary = 912 UseCS ? this->CS_Summary : this->Summary; 913 914 // initialize InstrProfSummary using the SummaryData from disk. 915 Summary = std::make_unique<ProfileSummary>( 916 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 917 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 918 SummaryData->get(Summary::MaxBlockCount), 919 SummaryData->get(Summary::MaxInternalBlockCount), 920 SummaryData->get(Summary::MaxFunctionCount), 921 SummaryData->get(Summary::TotalNumBlocks), 922 SummaryData->get(Summary::TotalNumFunctions)); 923 return Cur + SummarySize; 924 } else { 925 // The older versions do not support a profile summary. This just computes 926 // an empty summary, which will not result in accurate hot/cold detection. 927 // We would need to call addRecord for all NamedInstrProfRecords to get the 928 // correct summary. However, this version is old (prior to early 2016) and 929 // has not been supporting an accurate summary for several years. 930 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 931 Summary = Builder.getSummary(); 932 return Cur; 933 } 934 } 935 936 Error IndexedInstrProfReader::readHeader() { 937 using namespace support; 938 939 const unsigned char *Start = 940 (const unsigned char *)DataBuffer->getBufferStart(); 941 const unsigned char *Cur = Start; 942 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 943 return error(instrprof_error::truncated); 944 945 auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur); 946 Cur += sizeof(IndexedInstrProf::Header); 947 948 // Check the magic number. 949 uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic); 950 if (Magic != IndexedInstrProf::Magic) 951 return error(instrprof_error::bad_magic); 952 953 // Read the version. 954 uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version); 955 if (GET_VERSION(FormatVersion) > 956 IndexedInstrProf::ProfVersion::CurrentVersion) 957 return error(instrprof_error::unsupported_version); 958 959 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 960 /* UseCS */ false); 961 if (FormatVersion & VARIANT_MASK_CSIR_PROF) 962 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 963 /* UseCS */ true); 964 965 // Read the hash type and start offset. 966 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 967 endian::byte_swap<uint64_t, little>(Header->HashType)); 968 if (HashType > IndexedInstrProf::HashT::Last) 969 return error(instrprof_error::unsupported_hash_type); 970 971 uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); 972 973 // The rest of the file is an on disk hash table. 974 auto IndexPtr = 975 std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 976 Start + HashOffset, Cur, Start, HashType, FormatVersion); 977 978 // Load the remapping table now if requested. 979 if (RemappingBuffer) { 980 Remapper = std::make_unique< 981 InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 982 std::move(RemappingBuffer), *IndexPtr); 983 if (Error E = Remapper->populateRemappings()) 984 return E; 985 } else { 986 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 987 } 988 Index = std::move(IndexPtr); 989 990 return success(); 991 } 992 993 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 994 if (Symtab.get()) 995 return *Symtab.get(); 996 997 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 998 if (Error E = Index->populateSymtab(*NewSymtab.get())) { 999 consumeError(error(InstrProfError::take(std::move(E)))); 1000 } 1001 1002 Symtab = std::move(NewSymtab); 1003 return *Symtab.get(); 1004 } 1005 1006 Expected<InstrProfRecord> 1007 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, 1008 uint64_t FuncHash) { 1009 ArrayRef<NamedInstrProfRecord> Data; 1010 Error Err = Remapper->getRecords(FuncName, Data); 1011 if (Err) 1012 return std::move(Err); 1013 // Found it. Look for counters with the right hash. 1014 for (const NamedInstrProfRecord &I : Data) { 1015 // Check for a match and fill the vector if there is one. 1016 if (I.Hash == FuncHash) 1017 return std::move(I); 1018 } 1019 return error(instrprof_error::hash_mismatch); 1020 } 1021 1022 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 1023 uint64_t FuncHash, 1024 std::vector<uint64_t> &Counts) { 1025 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 1026 if (Error E = Record.takeError()) 1027 return error(std::move(E)); 1028 1029 Counts = Record.get().Counts; 1030 return success(); 1031 } 1032 1033 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 1034 ArrayRef<NamedInstrProfRecord> Data; 1035 1036 Error E = Index->getRecords(Data); 1037 if (E) 1038 return error(std::move(E)); 1039 1040 Record = Data[RecordIndex++]; 1041 if (RecordIndex >= Data.size()) { 1042 Index->advanceToNextKey(); 1043 RecordIndex = 0; 1044 } 1045 return success(); 1046 } 1047 1048 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 1049 uint64_t NumFuncs = 0; 1050 for (const auto &Func : *this) { 1051 if (isIRLevelProfile()) { 1052 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 1053 if (FuncIsCS != IsCS) 1054 continue; 1055 } 1056 Func.accumulateCounts(Sum); 1057 ++NumFuncs; 1058 } 1059 Sum.NumEntries = NumFuncs; 1060 } 1061