1 //===- InstrProfReader.cpp - Instrumented profiling reader ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading profiling data for clang's 10 // instrumentation based PGO and coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProfReader.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/IR/ProfileSummary.h" 21 #include "llvm/ProfileData/InstrProf.h" 22 #include "llvm/ProfileData/ProfileCommon.h" 23 #include "llvm/Support/Endian.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/ErrorOr.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/SymbolRemappingReader.h" 28 #include "llvm/Support/SwapByteOrder.h" 29 #include <algorithm> 30 #include <cctype> 31 #include <cstddef> 32 #include <cstdint> 33 #include <limits> 34 #include <memory> 35 #include <system_error> 36 #include <utility> 37 #include <vector> 38 39 using namespace llvm; 40 41 static Expected<std::unique_ptr<MemoryBuffer>> 42 setupMemoryBuffer(const Twine &Path) { 43 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = 44 MemoryBuffer::getFileOrSTDIN(Path); 45 if (std::error_code EC = BufferOrErr.getError()) 46 return errorCodeToError(EC); 47 return std::move(BufferOrErr.get()); 48 } 49 50 static Error initializeReader(InstrProfReader &Reader) { 51 return Reader.readHeader(); 52 } 53 54 Expected<std::unique_ptr<InstrProfReader>> 55 InstrProfReader::create(const Twine &Path) { 56 // Set up the buffer to read. 57 auto BufferOrError = setupMemoryBuffer(Path); 58 if (Error E = BufferOrError.takeError()) 59 return std::move(E); 60 return InstrProfReader::create(std::move(BufferOrError.get())); 61 } 62 63 Expected<std::unique_ptr<InstrProfReader>> 64 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { 65 // Sanity check the buffer. 66 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 67 return make_error<InstrProfError>(instrprof_error::too_large); 68 69 if (Buffer->getBufferSize() == 0) 70 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 71 72 std::unique_ptr<InstrProfReader> Result; 73 // Create the reader. 74 if (IndexedInstrProfReader::hasFormat(*Buffer)) 75 Result.reset(new IndexedInstrProfReader(std::move(Buffer))); 76 else if (RawInstrProfReader64::hasFormat(*Buffer)) 77 Result.reset(new RawInstrProfReader64(std::move(Buffer))); 78 else if (RawInstrProfReader32::hasFormat(*Buffer)) 79 Result.reset(new RawInstrProfReader32(std::move(Buffer))); 80 else if (TextInstrProfReader::hasFormat(*Buffer)) 81 Result.reset(new TextInstrProfReader(std::move(Buffer))); 82 else 83 return make_error<InstrProfError>(instrprof_error::unrecognized_format); 84 85 // Initialize the reader and return the result. 86 if (Error E = initializeReader(*Result)) 87 return std::move(E); 88 89 return std::move(Result); 90 } 91 92 Expected<std::unique_ptr<IndexedInstrProfReader>> 93 IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { 94 // Set up the buffer to read. 95 auto BufferOrError = setupMemoryBuffer(Path); 96 if (Error E = BufferOrError.takeError()) 97 return std::move(E); 98 99 // Set up the remapping buffer if requested. 100 std::unique_ptr<MemoryBuffer> RemappingBuffer; 101 std::string RemappingPathStr = RemappingPath.str(); 102 if (!RemappingPathStr.empty()) { 103 auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); 104 if (Error E = RemappingBufferOrError.takeError()) 105 return std::move(E); 106 RemappingBuffer = std::move(RemappingBufferOrError.get()); 107 } 108 109 return IndexedInstrProfReader::create(std::move(BufferOrError.get()), 110 std::move(RemappingBuffer)); 111 } 112 113 Expected<std::unique_ptr<IndexedInstrProfReader>> 114 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer, 115 std::unique_ptr<MemoryBuffer> RemappingBuffer) { 116 // Sanity check the buffer. 117 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max()) 118 return make_error<InstrProfError>(instrprof_error::too_large); 119 120 // Create the reader. 121 if (!IndexedInstrProfReader::hasFormat(*Buffer)) 122 return make_error<InstrProfError>(instrprof_error::bad_magic); 123 auto Result = std::make_unique<IndexedInstrProfReader>( 124 std::move(Buffer), std::move(RemappingBuffer)); 125 126 // Initialize the reader and return the result. 127 if (Error E = initializeReader(*Result)) 128 return std::move(E); 129 130 return std::move(Result); 131 } 132 133 void InstrProfIterator::Increment() { 134 if (auto E = Reader->readNextRecord(Record)) { 135 // Handle errors in the reader. 136 InstrProfError::take(std::move(E)); 137 *this = InstrProfIterator(); 138 } 139 } 140 141 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { 142 // Verify that this really looks like plain ASCII text by checking a 143 // 'reasonable' number of characters (up to profile magic size). 144 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); 145 StringRef buffer = Buffer.getBufferStart(); 146 return count == 0 || 147 std::all_of(buffer.begin(), buffer.begin() + count, 148 [](char c) { return isPrint(c) || isSpace(c); }); 149 } 150 151 // Read the profile variant flag from the header: ":FE" means this is a FE 152 // generated profile. ":IR" means this is an IR level profile. Other strings 153 // with a leading ':' will be reported an error format. 154 Error TextInstrProfReader::readHeader() { 155 Symtab.reset(new InstrProfSymtab()); 156 bool IsIRInstr = false; 157 if (!Line->startswith(":")) { 158 IsIRLevelProfile = false; 159 return success(); 160 } 161 StringRef Str = (Line)->substr(1); 162 if (Str.equals_lower("ir")) 163 IsIRInstr = true; 164 else if (Str.equals_lower("fe")) 165 IsIRInstr = false; 166 else if (Str.equals_lower("csir")) { 167 IsIRInstr = true; 168 HasCSIRLevelProfile = true; 169 } else 170 return error(instrprof_error::bad_header); 171 172 ++Line; 173 IsIRLevelProfile = IsIRInstr; 174 return success(); 175 } 176 177 Error 178 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { 179 180 #define CHECK_LINE_END(Line) \ 181 if (Line.is_at_end()) \ 182 return error(instrprof_error::truncated); 183 #define READ_NUM(Str, Dst) \ 184 if ((Str).getAsInteger(10, (Dst))) \ 185 return error(instrprof_error::malformed); 186 #define VP_READ_ADVANCE(Val) \ 187 CHECK_LINE_END(Line); \ 188 uint32_t Val; \ 189 READ_NUM((*Line), (Val)); \ 190 Line++; 191 192 if (Line.is_at_end()) 193 return success(); 194 195 uint32_t NumValueKinds; 196 if (Line->getAsInteger(10, NumValueKinds)) { 197 // No value profile data 198 return success(); 199 } 200 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) 201 return error(instrprof_error::malformed); 202 Line++; 203 204 for (uint32_t VK = 0; VK < NumValueKinds; VK++) { 205 VP_READ_ADVANCE(ValueKind); 206 if (ValueKind > IPVK_Last) 207 return error(instrprof_error::malformed); 208 VP_READ_ADVANCE(NumValueSites); 209 if (!NumValueSites) 210 continue; 211 212 Record.reserveSites(VK, NumValueSites); 213 for (uint32_t S = 0; S < NumValueSites; S++) { 214 VP_READ_ADVANCE(NumValueData); 215 216 std::vector<InstrProfValueData> CurrentValues; 217 for (uint32_t V = 0; V < NumValueData; V++) { 218 CHECK_LINE_END(Line); 219 std::pair<StringRef, StringRef> VD = Line->rsplit(':'); 220 uint64_t TakenCount, Value; 221 if (ValueKind == IPVK_IndirectCallTarget) { 222 if (InstrProfSymtab::isExternalSymbol(VD.first)) { 223 Value = 0; 224 } else { 225 if (Error E = Symtab->addFuncName(VD.first)) 226 return E; 227 Value = IndexedInstrProf::ComputeHash(VD.first); 228 } 229 } else { 230 READ_NUM(VD.first, Value); 231 } 232 READ_NUM(VD.second, TakenCount); 233 CurrentValues.push_back({Value, TakenCount}); 234 Line++; 235 } 236 Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData, 237 nullptr); 238 } 239 } 240 return success(); 241 242 #undef CHECK_LINE_END 243 #undef READ_NUM 244 #undef VP_READ_ADVANCE 245 } 246 247 Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 248 // Skip empty lines and comments. 249 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) 250 ++Line; 251 // If we hit EOF while looking for a name, we're done. 252 if (Line.is_at_end()) { 253 return error(instrprof_error::eof); 254 } 255 256 // Read the function name. 257 Record.Name = *Line++; 258 if (Error E = Symtab->addFuncName(Record.Name)) 259 return error(std::move(E)); 260 261 // Read the function hash. 262 if (Line.is_at_end()) 263 return error(instrprof_error::truncated); 264 if ((Line++)->getAsInteger(0, Record.Hash)) 265 return error(instrprof_error::malformed); 266 267 // Read the number of counters. 268 uint64_t NumCounters; 269 if (Line.is_at_end()) 270 return error(instrprof_error::truncated); 271 if ((Line++)->getAsInteger(10, NumCounters)) 272 return error(instrprof_error::malformed); 273 if (NumCounters == 0) 274 return error(instrprof_error::malformed); 275 276 // Read each counter and fill our internal storage with the values. 277 Record.Clear(); 278 Record.Counts.reserve(NumCounters); 279 for (uint64_t I = 0; I < NumCounters; ++I) { 280 if (Line.is_at_end()) 281 return error(instrprof_error::truncated); 282 uint64_t Count; 283 if ((Line++)->getAsInteger(10, Count)) 284 return error(instrprof_error::malformed); 285 Record.Counts.push_back(Count); 286 } 287 288 // Check if value profile data exists and read it if so. 289 if (Error E = readValueProfileData(Record)) 290 return error(std::move(E)); 291 292 return success(); 293 } 294 295 template <class IntPtrT> 296 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { 297 if (DataBuffer.getBufferSize() < sizeof(uint64_t)) 298 return false; 299 uint64_t Magic = 300 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); 301 return RawInstrProf::getMagic<IntPtrT>() == Magic || 302 sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; 303 } 304 305 template <class IntPtrT> 306 Error RawInstrProfReader<IntPtrT>::readHeader() { 307 if (!hasFormat(*DataBuffer)) 308 return error(instrprof_error::bad_magic); 309 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) 310 return error(instrprof_error::bad_header); 311 auto *Header = reinterpret_cast<const RawInstrProf::Header *>( 312 DataBuffer->getBufferStart()); 313 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); 314 return readHeader(*Header); 315 } 316 317 template <class IntPtrT> 318 Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { 319 const char *End = DataBuffer->getBufferEnd(); 320 // Skip zero padding between profiles. 321 while (CurrentPos != End && *CurrentPos == 0) 322 ++CurrentPos; 323 // If there's nothing left, we're done. 324 if (CurrentPos == End) 325 return make_error<InstrProfError>(instrprof_error::eof); 326 // If there isn't enough space for another header, this is probably just 327 // garbage at the end of the file. 328 if (CurrentPos + sizeof(RawInstrProf::Header) > End) 329 return make_error<InstrProfError>(instrprof_error::malformed); 330 // The writer ensures each profile is padded to start at an aligned address. 331 if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t)) 332 return make_error<InstrProfError>(instrprof_error::malformed); 333 // The magic should have the same byte order as in the previous header. 334 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); 335 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) 336 return make_error<InstrProfError>(instrprof_error::bad_magic); 337 338 // There's another profile to read, so we need to process the header. 339 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); 340 return readHeader(*Header); 341 } 342 343 template <class IntPtrT> 344 Error RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { 345 if (Error E = Symtab.create(StringRef(NamesStart, NamesSize))) 346 return error(std::move(E)); 347 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { 348 const IntPtrT FPtr = swap(I->FunctionPointer); 349 if (!FPtr) 350 continue; 351 Symtab.mapAddress(FPtr, I->NameRef); 352 } 353 return success(); 354 } 355 356 template <class IntPtrT> 357 Error RawInstrProfReader<IntPtrT>::readHeader( 358 const RawInstrProf::Header &Header) { 359 Version = swap(Header.Version); 360 if (GET_VERSION(Version) != RawInstrProf::Version) 361 return error(instrprof_error::unsupported_version); 362 363 CountersDelta = swap(Header.CountersDelta); 364 NamesDelta = swap(Header.NamesDelta); 365 auto DataSize = swap(Header.DataSize); 366 auto PaddingBytesBeforeCounters = swap(Header.PaddingBytesBeforeCounters); 367 auto CountersSize = swap(Header.CountersSize); 368 auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters); 369 NamesSize = swap(Header.NamesSize); 370 ValueKindLast = swap(Header.ValueKindLast); 371 372 auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>); 373 auto PaddingSize = getNumPaddingBytes(NamesSize); 374 375 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header); 376 ptrdiff_t CountersOffset = 377 DataOffset + DataSizeInBytes + PaddingBytesBeforeCounters; 378 ptrdiff_t NamesOffset = CountersOffset + (sizeof(uint64_t) * CountersSize) + 379 PaddingBytesAfterCounters; 380 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; 381 382 auto *Start = reinterpret_cast<const char *>(&Header); 383 if (Start + ValueDataOffset > DataBuffer->getBufferEnd()) 384 return error(instrprof_error::bad_header); 385 386 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( 387 Start + DataOffset); 388 DataEnd = Data + DataSize; 389 CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset); 390 NamesStart = Start + NamesOffset; 391 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); 392 393 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 394 if (Error E = createSymtab(*NewSymtab.get())) 395 return E; 396 397 Symtab = std::move(NewSymtab); 398 return success(); 399 } 400 401 template <class IntPtrT> 402 Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { 403 Record.Name = getName(Data->NameRef); 404 return success(); 405 } 406 407 template <class IntPtrT> 408 Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { 409 Record.Hash = swap(Data->FuncHash); 410 return success(); 411 } 412 413 template <class IntPtrT> 414 Error RawInstrProfReader<IntPtrT>::readRawCounts( 415 InstrProfRecord &Record) { 416 uint32_t NumCounters = swap(Data->NumCounters); 417 IntPtrT CounterPtr = Data->CounterPtr; 418 if (NumCounters == 0) 419 return error(instrprof_error::malformed); 420 421 auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart); 422 ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart; 423 424 // Check bounds. Note that the counter pointer embedded in the data record 425 // may itself be corrupt. 426 if (MaxNumCounters < 0 || NumCounters > (uint32_t)MaxNumCounters) 427 return error(instrprof_error::malformed); 428 ptrdiff_t CounterOffset = getCounterOffset(CounterPtr); 429 if (CounterOffset < 0 || CounterOffset > MaxNumCounters || 430 ((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters) 431 return error(instrprof_error::malformed); 432 433 auto RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters); 434 435 if (ShouldSwapBytes) { 436 Record.Counts.clear(); 437 Record.Counts.reserve(RawCounts.size()); 438 for (uint64_t Count : RawCounts) 439 Record.Counts.push_back(swap(Count)); 440 } else 441 Record.Counts = RawCounts; 442 443 return success(); 444 } 445 446 template <class IntPtrT> 447 Error RawInstrProfReader<IntPtrT>::readValueProfilingData( 448 InstrProfRecord &Record) { 449 Record.clearValueData(); 450 CurValueDataSize = 0; 451 // Need to match the logic in value profile dumper code in compiler-rt: 452 uint32_t NumValueKinds = 0; 453 for (uint32_t I = 0; I < IPVK_Last + 1; I++) 454 NumValueKinds += (Data->NumValueSites[I] != 0); 455 456 if (!NumValueKinds) 457 return success(); 458 459 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 460 ValueProfData::getValueProfData( 461 ValueDataStart, (const unsigned char *)DataBuffer->getBufferEnd(), 462 getDataEndianness()); 463 464 if (Error E = VDataPtrOrErr.takeError()) 465 return E; 466 467 // Note that besides deserialization, this also performs the conversion for 468 // indirect call targets. The function pointers from the raw profile are 469 // remapped into function name hashes. 470 VDataPtrOrErr.get()->deserializeTo(Record, Symtab.get()); 471 CurValueDataSize = VDataPtrOrErr.get()->getSize(); 472 return success(); 473 } 474 475 template <class IntPtrT> 476 Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { 477 if (atEnd()) 478 // At this point, ValueDataStart field points to the next header. 479 if (Error E = readNextHeader(getNextHeaderPos())) 480 return error(std::move(E)); 481 482 // Read name ad set it in Record. 483 if (Error E = readName(Record)) 484 return error(std::move(E)); 485 486 // Read FuncHash and set it in Record. 487 if (Error E = readFuncHash(Record)) 488 return error(std::move(E)); 489 490 // Read raw counts and set Record. 491 if (Error E = readRawCounts(Record)) 492 return error(std::move(E)); 493 494 // Read value data and set Record. 495 if (Error E = readValueProfilingData(Record)) 496 return error(std::move(E)); 497 498 // Iterate. 499 advanceData(); 500 return success(); 501 } 502 503 namespace llvm { 504 505 template class RawInstrProfReader<uint32_t>; 506 template class RawInstrProfReader<uint64_t>; 507 508 } // end namespace llvm 509 510 InstrProfLookupTrait::hash_value_type 511 InstrProfLookupTrait::ComputeHash(StringRef K) { 512 return IndexedInstrProf::ComputeHash(HashType, K); 513 } 514 515 using data_type = InstrProfLookupTrait::data_type; 516 using offset_type = InstrProfLookupTrait::offset_type; 517 518 bool InstrProfLookupTrait::readValueProfilingData( 519 const unsigned char *&D, const unsigned char *const End) { 520 Expected<std::unique_ptr<ValueProfData>> VDataPtrOrErr = 521 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); 522 523 if (VDataPtrOrErr.takeError()) 524 return false; 525 526 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); 527 D += VDataPtrOrErr.get()->TotalSize; 528 529 return true; 530 } 531 532 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, 533 offset_type N) { 534 using namespace support; 535 536 // Check if the data is corrupt. If so, don't try to read it. 537 if (N % sizeof(uint64_t)) 538 return data_type(); 539 540 DataBuffer.clear(); 541 std::vector<uint64_t> CounterBuffer; 542 543 const unsigned char *End = D + N; 544 while (D < End) { 545 // Read hash. 546 if (D + sizeof(uint64_t) >= End) 547 return data_type(); 548 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); 549 550 // Initialize number of counters for GET_VERSION(FormatVersion) == 1. 551 uint64_t CountsSize = N / sizeof(uint64_t) - 1; 552 // If format version is different then read the number of counters. 553 if (GET_VERSION(FormatVersion) != IndexedInstrProf::ProfVersion::Version1) { 554 if (D + sizeof(uint64_t) > End) 555 return data_type(); 556 CountsSize = endian::readNext<uint64_t, little, unaligned>(D); 557 } 558 // Read counter values. 559 if (D + CountsSize * sizeof(uint64_t) > End) 560 return data_type(); 561 562 CounterBuffer.clear(); 563 CounterBuffer.reserve(CountsSize); 564 for (uint64_t J = 0; J < CountsSize; ++J) 565 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); 566 567 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); 568 569 // Read value profiling data. 570 if (GET_VERSION(FormatVersion) > IndexedInstrProf::ProfVersion::Version2 && 571 !readValueProfilingData(D, End)) { 572 DataBuffer.clear(); 573 return data_type(); 574 } 575 } 576 return DataBuffer; 577 } 578 579 template <typename HashTableImpl> 580 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 581 StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { 582 auto Iter = HashTable->find(FuncName); 583 if (Iter == HashTable->end()) 584 return make_error<InstrProfError>(instrprof_error::unknown_function); 585 586 Data = (*Iter); 587 if (Data.empty()) 588 return make_error<InstrProfError>(instrprof_error::malformed); 589 590 return Error::success(); 591 } 592 593 template <typename HashTableImpl> 594 Error InstrProfReaderIndex<HashTableImpl>::getRecords( 595 ArrayRef<NamedInstrProfRecord> &Data) { 596 if (atEnd()) 597 return make_error<InstrProfError>(instrprof_error::eof); 598 599 Data = *RecordIterator; 600 601 if (Data.empty()) 602 return make_error<InstrProfError>(instrprof_error::malformed); 603 604 return Error::success(); 605 } 606 607 template <typename HashTableImpl> 608 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( 609 const unsigned char *Buckets, const unsigned char *const Payload, 610 const unsigned char *const Base, IndexedInstrProf::HashT HashType, 611 uint64_t Version) { 612 FormatVersion = Version; 613 HashTable.reset(HashTableImpl::Create( 614 Buckets, Payload, Base, 615 typename HashTableImpl::InfoType(HashType, Version))); 616 RecordIterator = HashTable->data_begin(); 617 } 618 619 namespace { 620 /// A remapper that does not apply any remappings. 621 class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { 622 InstrProfReaderIndexBase &Underlying; 623 624 public: 625 InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) 626 : Underlying(Underlying) {} 627 628 Error getRecords(StringRef FuncName, 629 ArrayRef<NamedInstrProfRecord> &Data) override { 630 return Underlying.getRecords(FuncName, Data); 631 } 632 }; 633 } 634 635 /// A remapper that applies remappings based on a symbol remapping file. 636 template <typename HashTableImpl> 637 class llvm::InstrProfReaderItaniumRemapper 638 : public InstrProfReaderRemapper { 639 public: 640 InstrProfReaderItaniumRemapper( 641 std::unique_ptr<MemoryBuffer> RemapBuffer, 642 InstrProfReaderIndex<HashTableImpl> &Underlying) 643 : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { 644 } 645 646 /// Extract the original function name from a PGO function name. 647 static StringRef extractName(StringRef Name) { 648 // We can have multiple :-separated pieces; there can be pieces both 649 // before and after the mangled name. Find the first part that starts 650 // with '_Z'; we'll assume that's the mangled name we want. 651 std::pair<StringRef, StringRef> Parts = {StringRef(), Name}; 652 while (true) { 653 Parts = Parts.second.split(':'); 654 if (Parts.first.startswith("_Z")) 655 return Parts.first; 656 if (Parts.second.empty()) 657 return Name; 658 } 659 } 660 661 /// Given a mangled name extracted from a PGO function name, and a new 662 /// form for that mangled name, reconstitute the name. 663 static void reconstituteName(StringRef OrigName, StringRef ExtractedName, 664 StringRef Replacement, 665 SmallVectorImpl<char> &Out) { 666 Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); 667 Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); 668 Out.insert(Out.end(), Replacement.begin(), Replacement.end()); 669 Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); 670 } 671 672 Error populateRemappings() override { 673 if (Error E = Remappings.read(*RemapBuffer)) 674 return E; 675 for (StringRef Name : Underlying.HashTable->keys()) { 676 StringRef RealName = extractName(Name); 677 if (auto Key = Remappings.insert(RealName)) { 678 // FIXME: We could theoretically map the same equivalence class to 679 // multiple names in the profile data. If that happens, we should 680 // return NamedInstrProfRecords from all of them. 681 MappedNames.insert({Key, RealName}); 682 } 683 } 684 return Error::success(); 685 } 686 687 Error getRecords(StringRef FuncName, 688 ArrayRef<NamedInstrProfRecord> &Data) override { 689 StringRef RealName = extractName(FuncName); 690 if (auto Key = Remappings.lookup(RealName)) { 691 StringRef Remapped = MappedNames.lookup(Key); 692 if (!Remapped.empty()) { 693 if (RealName.begin() == FuncName.begin() && 694 RealName.end() == FuncName.end()) 695 FuncName = Remapped; 696 else { 697 // Try rebuilding the name from the given remapping. 698 SmallString<256> Reconstituted; 699 reconstituteName(FuncName, RealName, Remapped, Reconstituted); 700 Error E = Underlying.getRecords(Reconstituted, Data); 701 if (!E) 702 return E; 703 704 // If we failed because the name doesn't exist, fall back to asking 705 // about the original name. 706 if (Error Unhandled = handleErrors( 707 std::move(E), [](std::unique_ptr<InstrProfError> Err) { 708 return Err->get() == instrprof_error::unknown_function 709 ? Error::success() 710 : Error(std::move(Err)); 711 })) 712 return Unhandled; 713 } 714 } 715 } 716 return Underlying.getRecords(FuncName, Data); 717 } 718 719 private: 720 /// The memory buffer containing the remapping configuration. Remappings 721 /// holds pointers into this buffer. 722 std::unique_ptr<MemoryBuffer> RemapBuffer; 723 724 /// The mangling remapper. 725 SymbolRemappingReader Remappings; 726 727 /// Mapping from mangled name keys to the name used for the key in the 728 /// profile data. 729 /// FIXME: Can we store a location within the on-disk hash table instead of 730 /// redoing lookup? 731 DenseMap<SymbolRemappingReader::Key, StringRef> MappedNames; 732 733 /// The real profile data reader. 734 InstrProfReaderIndex<HashTableImpl> &Underlying; 735 }; 736 737 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { 738 using namespace support; 739 740 if (DataBuffer.getBufferSize() < 8) 741 return false; 742 uint64_t Magic = 743 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); 744 // Verify that it's magical. 745 return Magic == IndexedInstrProf::Magic; 746 } 747 748 const unsigned char * 749 IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, 750 const unsigned char *Cur, bool UseCS) { 751 using namespace IndexedInstrProf; 752 using namespace support; 753 754 if (Version >= IndexedInstrProf::Version4) { 755 const IndexedInstrProf::Summary *SummaryInLE = 756 reinterpret_cast<const IndexedInstrProf::Summary *>(Cur); 757 uint64_t NFields = 758 endian::byte_swap<uint64_t, little>(SummaryInLE->NumSummaryFields); 759 uint64_t NEntries = 760 endian::byte_swap<uint64_t, little>(SummaryInLE->NumCutoffEntries); 761 uint32_t SummarySize = 762 IndexedInstrProf::Summary::getSize(NFields, NEntries); 763 std::unique_ptr<IndexedInstrProf::Summary> SummaryData = 764 IndexedInstrProf::allocSummary(SummarySize); 765 766 const uint64_t *Src = reinterpret_cast<const uint64_t *>(SummaryInLE); 767 uint64_t *Dst = reinterpret_cast<uint64_t *>(SummaryData.get()); 768 for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) 769 Dst[I] = endian::byte_swap<uint64_t, little>(Src[I]); 770 771 SummaryEntryVector DetailedSummary; 772 for (unsigned I = 0; I < SummaryData->NumCutoffEntries; I++) { 773 const IndexedInstrProf::Summary::Entry &Ent = SummaryData->getEntry(I); 774 DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, 775 Ent.NumBlocks); 776 } 777 std::unique_ptr<llvm::ProfileSummary> &Summary = 778 UseCS ? this->CS_Summary : this->Summary; 779 780 // initialize InstrProfSummary using the SummaryData from disk. 781 Summary = std::make_unique<ProfileSummary>( 782 UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, 783 DetailedSummary, SummaryData->get(Summary::TotalBlockCount), 784 SummaryData->get(Summary::MaxBlockCount), 785 SummaryData->get(Summary::MaxInternalBlockCount), 786 SummaryData->get(Summary::MaxFunctionCount), 787 SummaryData->get(Summary::TotalNumBlocks), 788 SummaryData->get(Summary::TotalNumFunctions)); 789 return Cur + SummarySize; 790 } else { 791 // The older versions do not support a profile summary. This just computes 792 // an empty summary, which will not result in accurate hot/cold detection. 793 // We would need to call addRecord for all NamedInstrProfRecords to get the 794 // correct summary. However, this version is old (prior to early 2016) and 795 // has not been supporting an accurate summary for several years. 796 InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 797 Summary = Builder.getSummary(); 798 return Cur; 799 } 800 } 801 802 Error IndexedInstrProfReader::readHeader() { 803 using namespace support; 804 805 const unsigned char *Start = 806 (const unsigned char *)DataBuffer->getBufferStart(); 807 const unsigned char *Cur = Start; 808 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) 809 return error(instrprof_error::truncated); 810 811 auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur); 812 Cur += sizeof(IndexedInstrProf::Header); 813 814 // Check the magic number. 815 uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic); 816 if (Magic != IndexedInstrProf::Magic) 817 return error(instrprof_error::bad_magic); 818 819 // Read the version. 820 uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version); 821 if (GET_VERSION(FormatVersion) > 822 IndexedInstrProf::ProfVersion::CurrentVersion) 823 return error(instrprof_error::unsupported_version); 824 825 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 826 /* UseCS */ false); 827 if (FormatVersion & VARIANT_MASK_CSIR_PROF) 828 Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, 829 /* UseCS */ true); 830 831 // Read the hash type and start offset. 832 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( 833 endian::byte_swap<uint64_t, little>(Header->HashType)); 834 if (HashType > IndexedInstrProf::HashT::Last) 835 return error(instrprof_error::unsupported_hash_type); 836 837 uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); 838 839 // The rest of the file is an on disk hash table. 840 auto IndexPtr = 841 std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>( 842 Start + HashOffset, Cur, Start, HashType, FormatVersion); 843 844 // Load the remapping table now if requested. 845 if (RemappingBuffer) { 846 Remapper = std::make_unique< 847 InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>( 848 std::move(RemappingBuffer), *IndexPtr); 849 if (Error E = Remapper->populateRemappings()) 850 return E; 851 } else { 852 Remapper = std::make_unique<InstrProfReaderNullRemapper>(*IndexPtr); 853 } 854 Index = std::move(IndexPtr); 855 856 return success(); 857 } 858 859 InstrProfSymtab &IndexedInstrProfReader::getSymtab() { 860 if (Symtab.get()) 861 return *Symtab.get(); 862 863 std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>(); 864 if (Error E = Index->populateSymtab(*NewSymtab.get())) { 865 consumeError(error(InstrProfError::take(std::move(E)))); 866 } 867 868 Symtab = std::move(NewSymtab); 869 return *Symtab.get(); 870 } 871 872 Expected<InstrProfRecord> 873 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, 874 uint64_t FuncHash) { 875 ArrayRef<NamedInstrProfRecord> Data; 876 Error Err = Remapper->getRecords(FuncName, Data); 877 if (Err) 878 return std::move(Err); 879 // Found it. Look for counters with the right hash. 880 for (unsigned I = 0, E = Data.size(); I < E; ++I) { 881 // Check for a match and fill the vector if there is one. 882 if (Data[I].Hash == FuncHash) { 883 return std::move(Data[I]); 884 } 885 } 886 return error(instrprof_error::hash_mismatch); 887 } 888 889 Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, 890 uint64_t FuncHash, 891 std::vector<uint64_t> &Counts) { 892 Expected<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); 893 if (Error E = Record.takeError()) 894 return error(std::move(E)); 895 896 Counts = Record.get().Counts; 897 return success(); 898 } 899 900 Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { 901 ArrayRef<NamedInstrProfRecord> Data; 902 903 Error E = Index->getRecords(Data); 904 if (E) 905 return error(std::move(E)); 906 907 Record = Data[RecordIndex++]; 908 if (RecordIndex >= Data.size()) { 909 Index->advanceToNextKey(); 910 RecordIndex = 0; 911 } 912 return success(); 913 } 914 915 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) { 916 uint64_t NumFuncs = 0; 917 for (const auto &Func : *this) { 918 if (isIRLevelProfile()) { 919 bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); 920 if (FuncIsCS != IsCS) 921 continue; 922 } 923 Func.accumulateCounts(Sum); 924 ++NumFuncs; 925 } 926 Sum.NumEntries = NumFuncs; 927 } 928