1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the class that reads LLVM sample profiles. It 10 // supports three file formats: text, binary and gcov. 11 // 12 // The textual representation is useful for debugging and testing purposes. The 13 // binary representation is more compact, resulting in smaller file sizes. 14 // 15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation 16 // tool (https://github.com/google/autofdo) 17 // 18 // All three encodings can be used interchangeably as an input sample profile. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "llvm/ProfileData/SampleProfReader.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/IR/Module.h" 27 #include "llvm/IR/ProfileSummary.h" 28 #include "llvm/ProfileData/ProfileCommon.h" 29 #include "llvm/ProfileData/SampleProf.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Compression.h" 32 #include "llvm/Support/ErrorOr.h" 33 #include "llvm/Support/JSON.h" 34 #include "llvm/Support/LEB128.h" 35 #include "llvm/Support/LineIterator.h" 36 #include "llvm/Support/MD5.h" 37 #include "llvm/Support/MemoryBuffer.h" 38 #include "llvm/Support/VirtualFileSystem.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <algorithm> 41 #include <cstddef> 42 #include <cstdint> 43 #include <limits> 44 #include <memory> 45 #include <system_error> 46 #include <vector> 47 48 using namespace llvm; 49 using namespace sampleprof; 50 51 #define DEBUG_TYPE "samplepgo-reader" 52 53 // This internal option specifies if the profile uses FS discriminators. 54 // It only applies to text, and binary format profiles. 55 // For ext-binary format profiles, the flag is set in the summary. 56 static cl::opt<bool> ProfileIsFSDisciminator( 57 "profile-isfs", cl::Hidden, cl::init(false), 58 cl::desc("Profile uses flow sensitive discriminators")); 59 60 /// Dump the function profile for \p FName. 61 /// 62 /// \param FContext Name + context of the function to print. 63 /// \param OS Stream to emit the output to. 64 void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS, 65 raw_ostream &OS) { 66 OS << "Function: " << FS.getContext().toString() << ": " << FS; 67 } 68 69 /// Dump all the function profiles found on stream \p OS. 70 void SampleProfileReader::dump(raw_ostream &OS) { 71 std::vector<NameFunctionSamples> V; 72 sortFuncProfiles(Profiles, V); 73 for (const auto &I : V) 74 dumpFunctionProfile(*I.second, OS); 75 } 76 77 static void dumpFunctionProfileJson(const FunctionSamples &S, 78 json::OStream &JOS, bool TopLevel = false) { 79 auto DumpBody = [&](const BodySampleMap &BodySamples) { 80 for (const auto &I : BodySamples) { 81 const LineLocation &Loc = I.first; 82 const SampleRecord &Sample = I.second; 83 JOS.object([&] { 84 JOS.attribute("line", Loc.LineOffset); 85 if (Loc.Discriminator) 86 JOS.attribute("discriminator", Loc.Discriminator); 87 JOS.attribute("samples", Sample.getSamples()); 88 89 auto CallTargets = Sample.getSortedCallTargets(); 90 if (!CallTargets.empty()) { 91 JOS.attributeArray("calls", [&] { 92 for (const auto &J : CallTargets) { 93 JOS.object([&] { 94 JOS.attribute("function", J.first.str()); 95 JOS.attribute("samples", J.second); 96 }); 97 } 98 }); 99 } 100 }); 101 } 102 }; 103 104 auto DumpCallsiteSamples = [&](const CallsiteSampleMap &CallsiteSamples) { 105 for (const auto &I : CallsiteSamples) 106 for (const auto &FS : I.second) { 107 const LineLocation &Loc = I.first; 108 const FunctionSamples &CalleeSamples = FS.second; 109 JOS.object([&] { 110 JOS.attribute("line", Loc.LineOffset); 111 if (Loc.Discriminator) 112 JOS.attribute("discriminator", Loc.Discriminator); 113 JOS.attributeArray( 114 "samples", [&] { dumpFunctionProfileJson(CalleeSamples, JOS); }); 115 }); 116 } 117 }; 118 119 JOS.object([&] { 120 JOS.attribute("name", S.getFunction().str()); 121 JOS.attribute("total", S.getTotalSamples()); 122 if (TopLevel) 123 JOS.attribute("head", S.getHeadSamples()); 124 125 const auto &BodySamples = S.getBodySamples(); 126 if (!BodySamples.empty()) 127 JOS.attributeArray("body", [&] { DumpBody(BodySamples); }); 128 129 const auto &CallsiteSamples = S.getCallsiteSamples(); 130 if (!CallsiteSamples.empty()) 131 JOS.attributeArray("callsites", 132 [&] { DumpCallsiteSamples(CallsiteSamples); }); 133 }); 134 } 135 136 /// Dump all the function profiles found on stream \p OS in the JSON format. 137 void SampleProfileReader::dumpJson(raw_ostream &OS) { 138 std::vector<NameFunctionSamples> V; 139 sortFuncProfiles(Profiles, V); 140 json::OStream JOS(OS, 2); 141 JOS.arrayBegin(); 142 for (const auto &F : V) 143 dumpFunctionProfileJson(*F.second, JOS, true); 144 JOS.arrayEnd(); 145 146 // Emit a newline character at the end as json::OStream doesn't emit one. 147 OS << "\n"; 148 } 149 150 /// Parse \p Input as function head. 151 /// 152 /// Parse one line of \p Input, and update function name in \p FName, 153 /// function's total sample count in \p NumSamples, function's entry 154 /// count in \p NumHeadSamples. 155 /// 156 /// \returns true if parsing is successful. 157 static bool ParseHead(const StringRef &Input, StringRef &FName, 158 uint64_t &NumSamples, uint64_t &NumHeadSamples) { 159 if (Input[0] == ' ') 160 return false; 161 size_t n2 = Input.rfind(':'); 162 size_t n1 = Input.rfind(':', n2 - 1); 163 FName = Input.substr(0, n1); 164 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) 165 return false; 166 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) 167 return false; 168 return true; 169 } 170 171 /// Returns true if line offset \p L is legal (only has 16 bits). 172 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; } 173 174 /// Parse \p Input that contains metadata. 175 /// Possible metadata: 176 /// - CFG Checksum information: 177 /// !CFGChecksum: 12345 178 /// - CFG Checksum information: 179 /// !Attributes: 1 180 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash. 181 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash, 182 uint32_t &Attributes) { 183 if (Input.starts_with("!CFGChecksum:")) { 184 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim(); 185 return !CFGInfo.getAsInteger(10, FunctionHash); 186 } 187 188 if (Input.starts_with("!Attributes:")) { 189 StringRef Attrib = Input.substr(strlen("!Attributes:")).trim(); 190 return !Attrib.getAsInteger(10, Attributes); 191 } 192 193 return false; 194 } 195 196 enum class LineType { 197 CallSiteProfile, 198 BodyProfile, 199 Metadata, 200 }; 201 202 /// Parse \p Input as line sample. 203 /// 204 /// \param Input input line. 205 /// \param LineTy Type of this line. 206 /// \param Depth the depth of the inline stack. 207 /// \param NumSamples total samples of the line/inlined callsite. 208 /// \param LineOffset line offset to the start of the function. 209 /// \param Discriminator discriminator of the line. 210 /// \param TargetCountMap map from indirect call target to count. 211 /// \param FunctionHash the function's CFG hash, used by pseudo probe. 212 /// 213 /// returns true if parsing is successful. 214 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth, 215 uint64_t &NumSamples, uint32_t &LineOffset, 216 uint32_t &Discriminator, StringRef &CalleeName, 217 DenseMap<StringRef, uint64_t> &TargetCountMap, 218 uint64_t &FunctionHash, uint32_t &Attributes) { 219 for (Depth = 0; Input[Depth] == ' '; Depth++) 220 ; 221 if (Depth == 0) 222 return false; 223 224 if (Input[Depth] == '!') { 225 LineTy = LineType::Metadata; 226 return parseMetadata(Input.substr(Depth), FunctionHash, Attributes); 227 } 228 229 size_t n1 = Input.find(':'); 230 StringRef Loc = Input.substr(Depth, n1 - Depth); 231 size_t n2 = Loc.find('.'); 232 if (n2 == StringRef::npos) { 233 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset)) 234 return false; 235 Discriminator = 0; 236 } else { 237 if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) 238 return false; 239 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) 240 return false; 241 } 242 243 StringRef Rest = Input.substr(n1 + 2); 244 if (isDigit(Rest[0])) { 245 LineTy = LineType::BodyProfile; 246 size_t n3 = Rest.find(' '); 247 if (n3 == StringRef::npos) { 248 if (Rest.getAsInteger(10, NumSamples)) 249 return false; 250 } else { 251 if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) 252 return false; 253 } 254 // Find call targets and their sample counts. 255 // Note: In some cases, there are symbols in the profile which are not 256 // mangled. To accommodate such cases, use colon + integer pairs as the 257 // anchor points. 258 // An example: 259 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437 260 // ":1000" and ":437" are used as anchor points so the string above will 261 // be interpreted as 262 // target: _M_construct<char *> 263 // count: 1000 264 // target: string_view<std::allocator<char> > 265 // count: 437 266 while (n3 != StringRef::npos) { 267 n3 += Rest.substr(n3).find_first_not_of(' '); 268 Rest = Rest.substr(n3); 269 n3 = Rest.find_first_of(':'); 270 if (n3 == StringRef::npos || n3 == 0) 271 return false; 272 273 StringRef Target; 274 uint64_t count, n4; 275 while (true) { 276 // Get the segment after the current colon. 277 StringRef AfterColon = Rest.substr(n3 + 1); 278 // Get the target symbol before the current colon. 279 Target = Rest.substr(0, n3); 280 // Check if the word after the current colon is an integer. 281 n4 = AfterColon.find_first_of(' '); 282 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size(); 283 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1); 284 if (!WordAfterColon.getAsInteger(10, count)) 285 break; 286 287 // Try to find the next colon. 288 uint64_t n5 = AfterColon.find_first_of(':'); 289 if (n5 == StringRef::npos) 290 return false; 291 n3 += n5 + 1; 292 } 293 294 // An anchor point is found. Save the {target, count} pair 295 TargetCountMap[Target] = count; 296 if (n4 == Rest.size()) 297 break; 298 // Change n3 to the next blank space after colon + integer pair. 299 n3 = n4; 300 } 301 } else { 302 LineTy = LineType::CallSiteProfile; 303 size_t n3 = Rest.find_last_of(':'); 304 CalleeName = Rest.substr(0, n3); 305 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) 306 return false; 307 } 308 return true; 309 } 310 311 /// Load samples from a text file. 312 /// 313 /// See the documentation at the top of the file for an explanation of 314 /// the expected format. 315 /// 316 /// \returns true if the file was loaded successfully, false otherwise. 317 std::error_code SampleProfileReaderText::readImpl() { 318 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); 319 sampleprof_error Result = sampleprof_error::success; 320 321 InlineCallStack InlineStack; 322 uint32_t TopLevelProbeProfileCount = 0; 323 324 // DepthMetadata tracks whether we have processed metadata for the current 325 // top-level or nested function profile. 326 uint32_t DepthMetadata = 0; 327 328 ProfileIsFS = ProfileIsFSDisciminator; 329 FunctionSamples::ProfileIsFS = ProfileIsFS; 330 for (; !LineIt.is_at_eof(); ++LineIt) { 331 size_t pos = LineIt->find_first_not_of(' '); 332 if (pos == LineIt->npos || (*LineIt)[pos] == '#') 333 continue; 334 // Read the header of each function. 335 // 336 // Note that for function identifiers we are actually expecting 337 // mangled names, but we may not always get them. This happens when 338 // the compiler decides not to emit the function (e.g., it was inlined 339 // and removed). In this case, the binary will not have the linkage 340 // name for the function, so the profiler will emit the function's 341 // unmangled name, which may contain characters like ':' and '>' in its 342 // name (member functions, templates, etc). 343 // 344 // The only requirement we place on the identifier, then, is that it 345 // should not begin with a number. 346 if ((*LineIt)[0] != ' ') { 347 uint64_t NumSamples, NumHeadSamples; 348 StringRef FName; 349 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { 350 reportError(LineIt.line_number(), 351 "Expected 'mangled_name:NUM:NUM', found " + *LineIt); 352 return sampleprof_error::malformed; 353 } 354 DepthMetadata = 0; 355 SampleContext FContext(FName, CSNameTable); 356 if (FContext.hasContext()) 357 ++CSProfileCount; 358 FunctionSamples &FProfile = Profiles.Create(FContext); 359 MergeResult(Result, FProfile.addTotalSamples(NumSamples)); 360 MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); 361 InlineStack.clear(); 362 InlineStack.push_back(&FProfile); 363 } else { 364 uint64_t NumSamples; 365 StringRef FName; 366 DenseMap<StringRef, uint64_t> TargetCountMap; 367 uint32_t Depth, LineOffset, Discriminator; 368 LineType LineTy; 369 uint64_t FunctionHash = 0; 370 uint32_t Attributes = 0; 371 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset, 372 Discriminator, FName, TargetCountMap, FunctionHash, 373 Attributes)) { 374 reportError(LineIt.line_number(), 375 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + 376 *LineIt); 377 return sampleprof_error::malformed; 378 } 379 if (LineTy != LineType::Metadata && Depth == DepthMetadata) { 380 // Metadata must be put at the end of a function profile. 381 reportError(LineIt.line_number(), 382 "Found non-metadata after metadata: " + *LineIt); 383 return sampleprof_error::malformed; 384 } 385 386 // Here we handle FS discriminators. 387 Discriminator &= getDiscriminatorMask(); 388 389 while (InlineStack.size() > Depth) { 390 InlineStack.pop_back(); 391 } 392 switch (LineTy) { 393 case LineType::CallSiteProfile: { 394 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( 395 LineLocation(LineOffset, Discriminator))[FunctionId(FName)]; 396 FSamples.setFunction(FunctionId(FName)); 397 MergeResult(Result, FSamples.addTotalSamples(NumSamples)); 398 InlineStack.push_back(&FSamples); 399 DepthMetadata = 0; 400 break; 401 } 402 case LineType::BodyProfile: { 403 while (InlineStack.size() > Depth) { 404 InlineStack.pop_back(); 405 } 406 FunctionSamples &FProfile = *InlineStack.back(); 407 for (const auto &name_count : TargetCountMap) { 408 MergeResult(Result, FProfile.addCalledTargetSamples( 409 LineOffset, Discriminator, 410 FunctionId(name_count.first), 411 name_count.second)); 412 } 413 MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator, 414 NumSamples)); 415 break; 416 } 417 case LineType::Metadata: { 418 FunctionSamples &FProfile = *InlineStack.back(); 419 if (FunctionHash) { 420 FProfile.setFunctionHash(FunctionHash); 421 if (Depth == 1) 422 ++TopLevelProbeProfileCount; 423 } 424 FProfile.getContext().setAllAttributes(Attributes); 425 if (Attributes & (uint32_t)ContextShouldBeInlined) 426 ProfileIsPreInlined = true; 427 DepthMetadata = Depth; 428 break; 429 } 430 } 431 } 432 } 433 434 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 435 "Cannot have both context-sensitive and regular profile"); 436 ProfileIsCS = (CSProfileCount > 0); 437 assert((TopLevelProbeProfileCount == 0 || 438 TopLevelProbeProfileCount == Profiles.size()) && 439 "Cannot have both probe-based profiles and regular profiles"); 440 ProfileIsProbeBased = (TopLevelProbeProfileCount > 0); 441 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 442 FunctionSamples::ProfileIsCS = ProfileIsCS; 443 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined; 444 445 if (Result == sampleprof_error::success) 446 computeSummary(); 447 448 return Result; 449 } 450 451 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) { 452 bool result = false; 453 454 // Check that the first non-comment line is a valid function header. 455 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); 456 if (!LineIt.is_at_eof()) { 457 if ((*LineIt)[0] != ' ') { 458 uint64_t NumSamples, NumHeadSamples; 459 StringRef FName; 460 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples); 461 } 462 } 463 464 return result; 465 } 466 467 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { 468 unsigned NumBytesRead = 0; 469 uint64_t Val = decodeULEB128(Data, &NumBytesRead); 470 471 if (Val > std::numeric_limits<T>::max()) { 472 std::error_code EC = sampleprof_error::malformed; 473 reportError(0, EC.message()); 474 return EC; 475 } else if (Data + NumBytesRead > End) { 476 std::error_code EC = sampleprof_error::truncated; 477 reportError(0, EC.message()); 478 return EC; 479 } 480 481 Data += NumBytesRead; 482 return static_cast<T>(Val); 483 } 484 485 ErrorOr<StringRef> SampleProfileReaderBinary::readString() { 486 StringRef Str(reinterpret_cast<const char *>(Data)); 487 if (Data + Str.size() + 1 > End) { 488 std::error_code EC = sampleprof_error::truncated; 489 reportError(0, EC.message()); 490 return EC; 491 } 492 493 Data += Str.size() + 1; 494 return Str; 495 } 496 497 template <typename T> 498 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() { 499 if (Data + sizeof(T) > End) { 500 std::error_code EC = sampleprof_error::truncated; 501 reportError(0, EC.message()); 502 return EC; 503 } 504 505 using namespace support; 506 T Val = endian::readNext<T, llvm::endianness::little, unaligned>(Data); 507 return Val; 508 } 509 510 template <typename T> 511 inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) { 512 auto Idx = readNumber<size_t>(); 513 if (std::error_code EC = Idx.getError()) 514 return EC; 515 if (*Idx >= Table.size()) 516 return sampleprof_error::truncated_name_table; 517 return *Idx; 518 } 519 520 ErrorOr<FunctionId> 521 SampleProfileReaderBinary::readStringFromTable(size_t *RetIdx) { 522 auto Idx = readStringIndex(NameTable); 523 if (std::error_code EC = Idx.getError()) 524 return EC; 525 if (RetIdx) 526 *RetIdx = *Idx; 527 return NameTable[*Idx]; 528 } 529 530 ErrorOr<SampleContextFrames> 531 SampleProfileReaderBinary::readContextFromTable(size_t *RetIdx) { 532 auto ContextIdx = readNumber<size_t>(); 533 if (std::error_code EC = ContextIdx.getError()) 534 return EC; 535 if (*ContextIdx >= CSNameTable.size()) 536 return sampleprof_error::truncated_name_table; 537 if (RetIdx) 538 *RetIdx = *ContextIdx; 539 return CSNameTable[*ContextIdx]; 540 } 541 542 ErrorOr<std::pair<SampleContext, uint64_t>> 543 SampleProfileReaderBinary::readSampleContextFromTable() { 544 SampleContext Context; 545 size_t Idx; 546 if (ProfileIsCS) { 547 auto FContext(readContextFromTable(&Idx)); 548 if (std::error_code EC = FContext.getError()) 549 return EC; 550 Context = SampleContext(*FContext); 551 } else { 552 auto FName(readStringFromTable(&Idx)); 553 if (std::error_code EC = FName.getError()) 554 return EC; 555 Context = SampleContext(*FName); 556 } 557 // Since MD5SampleContextStart may point to the profile's file data, need to 558 // make sure it is reading the same value on big endian CPU. 559 uint64_t Hash = support::endian::read64le(MD5SampleContextStart + Idx); 560 // Lazy computing of hash value, write back to the table to cache it. Only 561 // compute the context's hash value if it is being referenced for the first 562 // time. 563 if (Hash == 0) { 564 assert(MD5SampleContextStart == MD5SampleContextTable.data()); 565 Hash = Context.getHashCode(); 566 support::endian::write64le(&MD5SampleContextTable[Idx], Hash); 567 } 568 return std::make_pair(Context, Hash); 569 } 570 571 std::error_code 572 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { 573 auto NumSamples = readNumber<uint64_t>(); 574 if (std::error_code EC = NumSamples.getError()) 575 return EC; 576 FProfile.addTotalSamples(*NumSamples); 577 578 // Read the samples in the body. 579 auto NumRecords = readNumber<uint32_t>(); 580 if (std::error_code EC = NumRecords.getError()) 581 return EC; 582 583 for (uint32_t I = 0; I < *NumRecords; ++I) { 584 auto LineOffset = readNumber<uint64_t>(); 585 if (std::error_code EC = LineOffset.getError()) 586 return EC; 587 588 if (!isOffsetLegal(*LineOffset)) { 589 return std::error_code(); 590 } 591 592 auto Discriminator = readNumber<uint64_t>(); 593 if (std::error_code EC = Discriminator.getError()) 594 return EC; 595 596 auto NumSamples = readNumber<uint64_t>(); 597 if (std::error_code EC = NumSamples.getError()) 598 return EC; 599 600 auto NumCalls = readNumber<uint32_t>(); 601 if (std::error_code EC = NumCalls.getError()) 602 return EC; 603 604 // Here we handle FS discriminators: 605 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); 606 607 for (uint32_t J = 0; J < *NumCalls; ++J) { 608 auto CalledFunction(readStringFromTable()); 609 if (std::error_code EC = CalledFunction.getError()) 610 return EC; 611 612 auto CalledFunctionSamples = readNumber<uint64_t>(); 613 if (std::error_code EC = CalledFunctionSamples.getError()) 614 return EC; 615 616 FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal, 617 *CalledFunction, *CalledFunctionSamples); 618 } 619 620 FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples); 621 } 622 623 // Read all the samples for inlined function calls. 624 auto NumCallsites = readNumber<uint32_t>(); 625 if (std::error_code EC = NumCallsites.getError()) 626 return EC; 627 628 for (uint32_t J = 0; J < *NumCallsites; ++J) { 629 auto LineOffset = readNumber<uint64_t>(); 630 if (std::error_code EC = LineOffset.getError()) 631 return EC; 632 633 auto Discriminator = readNumber<uint64_t>(); 634 if (std::error_code EC = Discriminator.getError()) 635 return EC; 636 637 auto FName(readStringFromTable()); 638 if (std::error_code EC = FName.getError()) 639 return EC; 640 641 // Here we handle FS discriminators: 642 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask(); 643 644 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( 645 LineLocation(*LineOffset, DiscriminatorVal))[*FName]; 646 CalleeProfile.setFunction(*FName); 647 if (std::error_code EC = readProfile(CalleeProfile)) 648 return EC; 649 } 650 651 return sampleprof_error::success; 652 } 653 654 std::error_code 655 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) { 656 Data = Start; 657 auto NumHeadSamples = readNumber<uint64_t>(); 658 if (std::error_code EC = NumHeadSamples.getError()) 659 return EC; 660 661 auto FContextHash(readSampleContextFromTable()); 662 if (std::error_code EC = FContextHash.getError()) 663 return EC; 664 665 auto &[FContext, Hash] = *FContextHash; 666 // Use the cached hash value for insertion instead of recalculating it. 667 auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples()); 668 FunctionSamples &FProfile = Res.first->second; 669 FProfile.setContext(FContext); 670 FProfile.addHeadSamples(*NumHeadSamples); 671 672 if (FContext.hasContext()) 673 CSProfileCount++; 674 675 if (std::error_code EC = readProfile(FProfile)) 676 return EC; 677 return sampleprof_error::success; 678 } 679 680 std::error_code SampleProfileReaderBinary::readImpl() { 681 ProfileIsFS = ProfileIsFSDisciminator; 682 FunctionSamples::ProfileIsFS = ProfileIsFS; 683 while (Data < End) { 684 if (std::error_code EC = readFuncProfile(Data)) 685 return EC; 686 } 687 688 return sampleprof_error::success; 689 } 690 691 std::error_code SampleProfileReaderExtBinaryBase::readOneSection( 692 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) { 693 Data = Start; 694 End = Start + Size; 695 switch (Entry.Type) { 696 case SecProfSummary: 697 if (std::error_code EC = readSummary()) 698 return EC; 699 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 700 Summary->setPartialProfile(true); 701 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 702 FunctionSamples::ProfileIsCS = ProfileIsCS = true; 703 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined)) 704 FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true; 705 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) 706 FunctionSamples::ProfileIsFS = ProfileIsFS = true; 707 break; 708 case SecNameTable: { 709 bool FixedLengthMD5 = 710 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5); 711 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name); 712 // UseMD5 means if THIS section uses MD5, ProfileIsMD5 means if the entire 713 // profile uses MD5 for function name matching in IPO passes. 714 ProfileIsMD5 = ProfileIsMD5 || UseMD5; 715 FunctionSamples::HasUniqSuffix = 716 hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix); 717 if (std::error_code EC = readNameTableSec(UseMD5, FixedLengthMD5)) 718 return EC; 719 break; 720 } 721 case SecCSNameTable: { 722 if (std::error_code EC = readCSNameTableSec()) 723 return EC; 724 break; 725 } 726 case SecLBRProfile: 727 if (std::error_code EC = readFuncProfiles()) 728 return EC; 729 break; 730 case SecFuncOffsetTable: 731 // If module is absent, we are using LLVM tools, and need to read all 732 // profiles, so skip reading the function offset table. 733 if (!M) { 734 Data = End; 735 } else { 736 assert((!ProfileIsCS || 737 hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) && 738 "func offset table should always be sorted in CS profile"); 739 if (std::error_code EC = readFuncOffsetTable()) 740 return EC; 741 } 742 break; 743 case SecFuncMetadata: { 744 ProfileIsProbeBased = 745 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased); 746 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased; 747 bool HasAttribute = 748 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute); 749 if (std::error_code EC = readFuncMetadata(HasAttribute)) 750 return EC; 751 break; 752 } 753 case SecProfileSymbolList: 754 if (std::error_code EC = readProfileSymbolList()) 755 return EC; 756 break; 757 default: 758 if (std::error_code EC = readCustomSection(Entry)) 759 return EC; 760 break; 761 } 762 return sampleprof_error::success; 763 } 764 765 bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const { 766 // If profile is CS, the function offset section is expected to consist of 767 // sequences of contexts in pre-order layout 768 // (e.g. [A, A:1 @ B, A:1 @ B:2.3 @ C] [D, D:1 @ E]), so that when a matched 769 // context in the module is found, the profiles of all its callees are 770 // recursively loaded. A list is needed since the order of profiles matters. 771 if (ProfileIsCS) 772 return true; 773 774 // If the profile is MD5, use the map container to lookup functions in 775 // the module. A remapper has no use on MD5 names. 776 if (useMD5()) 777 return false; 778 779 // Profile is not MD5 and if a remapper is present, the remapped name of 780 // every function needed to be matched against the module, so use the list 781 // container since each entry is accessed. 782 if (Remapper) 783 return true; 784 785 // Otherwise use the map container for faster lookup. 786 // TODO: If the cardinality of the function offset section is much smaller 787 // than the number of functions in the module, using the list container can 788 // be always faster, but we need to figure out the constant factor to 789 // determine the cutoff. 790 return false; 791 } 792 793 794 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() { 795 if (!M) 796 return false; 797 FuncsToUse.clear(); 798 for (auto &F : *M) 799 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F)); 800 return true; 801 } 802 803 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() { 804 // If there are more than one function offset section, the profile associated 805 // with the previous section has to be done reading before next one is read. 806 FuncOffsetTable.clear(); 807 FuncOffsetList.clear(); 808 809 auto Size = readNumber<uint64_t>(); 810 if (std::error_code EC = Size.getError()) 811 return EC; 812 813 bool UseFuncOffsetList = useFuncOffsetList(); 814 if (UseFuncOffsetList) 815 FuncOffsetList.reserve(*Size); 816 else 817 FuncOffsetTable.reserve(*Size); 818 819 for (uint64_t I = 0; I < *Size; ++I) { 820 auto FContextHash(readSampleContextFromTable()); 821 if (std::error_code EC = FContextHash.getError()) 822 return EC; 823 824 auto &[FContext, Hash] = *FContextHash; 825 auto Offset = readNumber<uint64_t>(); 826 if (std::error_code EC = Offset.getError()) 827 return EC; 828 829 if (UseFuncOffsetList) 830 FuncOffsetList.emplace_back(FContext, *Offset); 831 else 832 // Because Porfiles replace existing value with new value if collision 833 // happens, we also use the latest offset so that they are consistent. 834 FuncOffsetTable[Hash] = *Offset; 835 } 836 837 return sampleprof_error::success; 838 } 839 840 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() { 841 // Collect functions used by current module if the Reader has been 842 // given a module. 843 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName 844 // which will query FunctionSamples::HasUniqSuffix, so it has to be 845 // called after FunctionSamples::HasUniqSuffix is set, i.e. after 846 // NameTable section is read. 847 bool LoadFuncsToBeUsed = collectFuncsFromModule(); 848 849 // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all 850 // profiles. 851 const uint8_t *Start = Data; 852 if (!LoadFuncsToBeUsed) { 853 while (Data < End) { 854 if (std::error_code EC = readFuncProfile(Data)) 855 return EC; 856 } 857 assert(Data == End && "More data is read than expected"); 858 } else { 859 // Load function profiles on demand. 860 if (Remapper) { 861 for (auto Name : FuncsToUse) { 862 Remapper->insert(Name); 863 } 864 } 865 866 if (ProfileIsCS) { 867 assert(useFuncOffsetList()); 868 DenseSet<uint64_t> FuncGuidsToUse; 869 if (useMD5()) { 870 for (auto Name : FuncsToUse) 871 FuncGuidsToUse.insert(Function::getGUID(Name)); 872 } 873 874 // For each function in current module, load all context profiles for 875 // the function as well as their callee contexts which can help profile 876 // guided importing for ThinLTO. This can be achieved by walking 877 // through an ordered context container, where contexts are laid out 878 // as if they were walked in preorder of a context trie. While 879 // traversing the trie, a link to the highest common ancestor node is 880 // kept so that all of its decendants will be loaded. 881 const SampleContext *CommonContext = nullptr; 882 for (const auto &NameOffset : FuncOffsetList) { 883 const auto &FContext = NameOffset.first; 884 FunctionId FName = FContext.getFunction(); 885 StringRef FNameString; 886 if (!useMD5()) 887 FNameString = FName.stringRef(); 888 889 // For function in the current module, keep its farthest ancestor 890 // context. This can be used to load itself and its child and 891 // sibling contexts. 892 if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) || 893 (!useMD5() && (FuncsToUse.count(FNameString) || 894 (Remapper && Remapper->exist(FNameString))))) { 895 if (!CommonContext || !CommonContext->IsPrefixOf(FContext)) 896 CommonContext = &FContext; 897 } 898 899 if (CommonContext == &FContext || 900 (CommonContext && CommonContext->IsPrefixOf(FContext))) { 901 // Load profile for the current context which originated from 902 // the common ancestor. 903 const uint8_t *FuncProfileAddr = Start + NameOffset.second; 904 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 905 return EC; 906 } 907 } 908 } else if (useMD5()) { 909 assert(!useFuncOffsetList()); 910 for (auto Name : FuncsToUse) { 911 auto GUID = MD5Hash(Name); 912 auto iter = FuncOffsetTable.find(GUID); 913 if (iter == FuncOffsetTable.end()) 914 continue; 915 const uint8_t *FuncProfileAddr = Start + iter->second; 916 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 917 return EC; 918 } 919 } else if (Remapper) { 920 assert(useFuncOffsetList()); 921 for (auto NameOffset : FuncOffsetList) { 922 SampleContext FContext(NameOffset.first); 923 auto FuncName = FContext.getFunction(); 924 StringRef FuncNameStr = FuncName.stringRef(); 925 if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr)) 926 continue; 927 const uint8_t *FuncProfileAddr = Start + NameOffset.second; 928 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 929 return EC; 930 } 931 } else { 932 assert(!useFuncOffsetList()); 933 for (auto Name : FuncsToUse) { 934 auto iter = FuncOffsetTable.find(MD5Hash(Name)); 935 if (iter == FuncOffsetTable.end()) 936 continue; 937 const uint8_t *FuncProfileAddr = Start + iter->second; 938 if (std::error_code EC = readFuncProfile(FuncProfileAddr)) 939 return EC; 940 } 941 } 942 Data = End; 943 } 944 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) && 945 "Cannot have both context-sensitive and regular profile"); 946 assert((!CSProfileCount || ProfileIsCS) && 947 "Section flag should be consistent with actual profile"); 948 return sampleprof_error::success; 949 } 950 951 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() { 952 if (!ProfSymList) 953 ProfSymList = std::make_unique<ProfileSymbolList>(); 954 955 if (std::error_code EC = ProfSymList->read(Data, End - Data)) 956 return EC; 957 958 Data = End; 959 return sampleprof_error::success; 960 } 961 962 std::error_code SampleProfileReaderExtBinaryBase::decompressSection( 963 const uint8_t *SecStart, const uint64_t SecSize, 964 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) { 965 Data = SecStart; 966 End = SecStart + SecSize; 967 auto DecompressSize = readNumber<uint64_t>(); 968 if (std::error_code EC = DecompressSize.getError()) 969 return EC; 970 DecompressBufSize = *DecompressSize; 971 972 auto CompressSize = readNumber<uint64_t>(); 973 if (std::error_code EC = CompressSize.getError()) 974 return EC; 975 976 if (!llvm::compression::zlib::isAvailable()) 977 return sampleprof_error::zlib_unavailable; 978 979 uint8_t *Buffer = Allocator.Allocate<uint8_t>(DecompressBufSize); 980 size_t UCSize = DecompressBufSize; 981 llvm::Error E = compression::zlib::decompress(ArrayRef(Data, *CompressSize), 982 Buffer, UCSize); 983 if (E) 984 return sampleprof_error::uncompress_failed; 985 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer); 986 return sampleprof_error::success; 987 } 988 989 std::error_code SampleProfileReaderExtBinaryBase::readImpl() { 990 const uint8_t *BufStart = 991 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 992 993 for (auto &Entry : SecHdrTable) { 994 // Skip empty section. 995 if (!Entry.Size) 996 continue; 997 998 // Skip sections without context when SkipFlatProf is true. 999 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 1000 continue; 1001 1002 const uint8_t *SecStart = BufStart + Entry.Offset; 1003 uint64_t SecSize = Entry.Size; 1004 1005 // If the section is compressed, decompress it into a buffer 1006 // DecompressBuf before reading the actual data. The pointee of 1007 // 'Data' will be changed to buffer hold by DecompressBuf 1008 // temporarily when reading the actual data. 1009 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress); 1010 if (isCompressed) { 1011 const uint8_t *DecompressBuf; 1012 uint64_t DecompressBufSize; 1013 if (std::error_code EC = decompressSection( 1014 SecStart, SecSize, DecompressBuf, DecompressBufSize)) 1015 return EC; 1016 SecStart = DecompressBuf; 1017 SecSize = DecompressBufSize; 1018 } 1019 1020 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry)) 1021 return EC; 1022 if (Data != SecStart + SecSize) 1023 return sampleprof_error::malformed; 1024 1025 // Change the pointee of 'Data' from DecompressBuf to original Buffer. 1026 if (isCompressed) { 1027 Data = BufStart + Entry.Offset; 1028 End = BufStart + Buffer->getBufferSize(); 1029 } 1030 } 1031 1032 return sampleprof_error::success; 1033 } 1034 1035 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) { 1036 if (Magic == SPMagic()) 1037 return sampleprof_error::success; 1038 return sampleprof_error::bad_magic; 1039 } 1040 1041 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) { 1042 if (Magic == SPMagic(SPF_Ext_Binary)) 1043 return sampleprof_error::success; 1044 return sampleprof_error::bad_magic; 1045 } 1046 1047 std::error_code SampleProfileReaderBinary::readNameTable() { 1048 auto Size = readNumber<size_t>(); 1049 if (std::error_code EC = Size.getError()) 1050 return EC; 1051 1052 // Normally if useMD5 is true, the name table should have MD5 values, not 1053 // strings, however in the case that ExtBinary profile has multiple name 1054 // tables mixing string and MD5, all of them have to be normalized to use MD5, 1055 // because optimization passes can only handle either type. 1056 bool UseMD5 = useMD5(); 1057 1058 NameTable.clear(); 1059 NameTable.reserve(*Size); 1060 if (!ProfileIsCS) { 1061 MD5SampleContextTable.clear(); 1062 if (UseMD5) 1063 MD5SampleContextTable.reserve(*Size); 1064 else 1065 // If we are using strings, delay MD5 computation since only a portion of 1066 // names are used by top level functions. Use 0 to indicate MD5 value is 1067 // to be calculated as no known string has a MD5 value of 0. 1068 MD5SampleContextTable.resize(*Size); 1069 } 1070 for (size_t I = 0; I < *Size; ++I) { 1071 auto Name(readString()); 1072 if (std::error_code EC = Name.getError()) 1073 return EC; 1074 if (UseMD5) { 1075 FunctionId FID(*Name); 1076 if (!ProfileIsCS) 1077 MD5SampleContextTable.emplace_back(FID.getHashCode()); 1078 NameTable.emplace_back(FID); 1079 } else 1080 NameTable.push_back(FunctionId(*Name)); 1081 } 1082 if (!ProfileIsCS) 1083 MD5SampleContextStart = MD5SampleContextTable.data(); 1084 return sampleprof_error::success; 1085 } 1086 1087 std::error_code 1088 SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5, 1089 bool FixedLengthMD5) { 1090 if (FixedLengthMD5) { 1091 if (!IsMD5) 1092 errs() << "If FixedLengthMD5 is true, UseMD5 has to be true"; 1093 auto Size = readNumber<size_t>(); 1094 if (std::error_code EC = Size.getError()) 1095 return EC; 1096 1097 assert(Data + (*Size) * sizeof(uint64_t) == End && 1098 "Fixed length MD5 name table does not contain specified number of " 1099 "entries"); 1100 if (Data + (*Size) * sizeof(uint64_t) > End) 1101 return sampleprof_error::truncated; 1102 1103 NameTable.clear(); 1104 NameTable.reserve(*Size); 1105 for (size_t I = 0; I < *Size; ++I) { 1106 using namespace support; 1107 uint64_t FID = endian::read<uint64_t, endianness::little, unaligned>( 1108 Data + I * sizeof(uint64_t)); 1109 NameTable.emplace_back(FunctionId(FID)); 1110 } 1111 if (!ProfileIsCS) 1112 MD5SampleContextStart = reinterpret_cast<const uint64_t *>(Data); 1113 Data = Data + (*Size) * sizeof(uint64_t); 1114 return sampleprof_error::success; 1115 } 1116 1117 if (IsMD5) { 1118 assert(!FixedLengthMD5 && "FixedLengthMD5 should be unreachable here"); 1119 auto Size = readNumber<size_t>(); 1120 if (std::error_code EC = Size.getError()) 1121 return EC; 1122 1123 NameTable.clear(); 1124 NameTable.reserve(*Size); 1125 if (!ProfileIsCS) 1126 MD5SampleContextTable.resize(*Size); 1127 for (size_t I = 0; I < *Size; ++I) { 1128 auto FID = readNumber<uint64_t>(); 1129 if (std::error_code EC = FID.getError()) 1130 return EC; 1131 if (!ProfileIsCS) 1132 support::endian::write64le(&MD5SampleContextTable[I], *FID); 1133 NameTable.emplace_back(FunctionId(*FID)); 1134 } 1135 if (!ProfileIsCS) 1136 MD5SampleContextStart = MD5SampleContextTable.data(); 1137 return sampleprof_error::success; 1138 } 1139 1140 return SampleProfileReaderBinary::readNameTable(); 1141 } 1142 1143 // Read in the CS name table section, which basically contains a list of context 1144 // vectors. Each element of a context vector, aka a frame, refers to the 1145 // underlying raw function names that are stored in the name table, as well as 1146 // a callsite identifier that only makes sense for non-leaf frames. 1147 std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() { 1148 auto Size = readNumber<size_t>(); 1149 if (std::error_code EC = Size.getError()) 1150 return EC; 1151 1152 CSNameTable.clear(); 1153 CSNameTable.reserve(*Size); 1154 if (ProfileIsCS) { 1155 // Delay MD5 computation of CS context until they are needed. Use 0 to 1156 // indicate MD5 value is to be calculated as no known string has a MD5 1157 // value of 0. 1158 MD5SampleContextTable.clear(); 1159 MD5SampleContextTable.resize(*Size); 1160 MD5SampleContextStart = MD5SampleContextTable.data(); 1161 } 1162 for (size_t I = 0; I < *Size; ++I) { 1163 CSNameTable.emplace_back(SampleContextFrameVector()); 1164 auto ContextSize = readNumber<uint32_t>(); 1165 if (std::error_code EC = ContextSize.getError()) 1166 return EC; 1167 for (uint32_t J = 0; J < *ContextSize; ++J) { 1168 auto FName(readStringFromTable()); 1169 if (std::error_code EC = FName.getError()) 1170 return EC; 1171 auto LineOffset = readNumber<uint64_t>(); 1172 if (std::error_code EC = LineOffset.getError()) 1173 return EC; 1174 1175 if (!isOffsetLegal(*LineOffset)) 1176 return std::error_code(); 1177 1178 auto Discriminator = readNumber<uint64_t>(); 1179 if (std::error_code EC = Discriminator.getError()) 1180 return EC; 1181 1182 CSNameTable.back().emplace_back( 1183 FName.get(), LineLocation(LineOffset.get(), Discriminator.get())); 1184 } 1185 } 1186 1187 return sampleprof_error::success; 1188 } 1189 1190 std::error_code 1191 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute, 1192 FunctionSamples *FProfile) { 1193 if (Data < End) { 1194 if (ProfileIsProbeBased) { 1195 auto Checksum = readNumber<uint64_t>(); 1196 if (std::error_code EC = Checksum.getError()) 1197 return EC; 1198 if (FProfile) 1199 FProfile->setFunctionHash(*Checksum); 1200 } 1201 1202 if (ProfileHasAttribute) { 1203 auto Attributes = readNumber<uint32_t>(); 1204 if (std::error_code EC = Attributes.getError()) 1205 return EC; 1206 if (FProfile) 1207 FProfile->getContext().setAllAttributes(*Attributes); 1208 } 1209 1210 if (!ProfileIsCS) { 1211 // Read all the attributes for inlined function calls. 1212 auto NumCallsites = readNumber<uint32_t>(); 1213 if (std::error_code EC = NumCallsites.getError()) 1214 return EC; 1215 1216 for (uint32_t J = 0; J < *NumCallsites; ++J) { 1217 auto LineOffset = readNumber<uint64_t>(); 1218 if (std::error_code EC = LineOffset.getError()) 1219 return EC; 1220 1221 auto Discriminator = readNumber<uint64_t>(); 1222 if (std::error_code EC = Discriminator.getError()) 1223 return EC; 1224 1225 auto FContextHash(readSampleContextFromTable()); 1226 if (std::error_code EC = FContextHash.getError()) 1227 return EC; 1228 1229 auto &[FContext, Hash] = *FContextHash; 1230 FunctionSamples *CalleeProfile = nullptr; 1231 if (FProfile) { 1232 CalleeProfile = const_cast<FunctionSamples *>( 1233 &FProfile->functionSamplesAt(LineLocation( 1234 *LineOffset, 1235 *Discriminator))[FContext.getFunction()]); 1236 } 1237 if (std::error_code EC = 1238 readFuncMetadata(ProfileHasAttribute, CalleeProfile)) 1239 return EC; 1240 } 1241 } 1242 } 1243 1244 return sampleprof_error::success; 1245 } 1246 1247 std::error_code 1248 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) { 1249 while (Data < End) { 1250 auto FContextHash(readSampleContextFromTable()); 1251 if (std::error_code EC = FContextHash.getError()) 1252 return EC; 1253 auto &[FContext, Hash] = *FContextHash; 1254 FunctionSamples *FProfile = nullptr; 1255 auto It = Profiles.find(FContext); 1256 if (It != Profiles.end()) 1257 FProfile = &It->second; 1258 1259 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile)) 1260 return EC; 1261 } 1262 1263 assert(Data == End && "More data is read than expected"); 1264 return sampleprof_error::success; 1265 } 1266 1267 std::error_code 1268 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint64_t Idx) { 1269 SecHdrTableEntry Entry; 1270 auto Type = readUnencodedNumber<uint64_t>(); 1271 if (std::error_code EC = Type.getError()) 1272 return EC; 1273 Entry.Type = static_cast<SecType>(*Type); 1274 1275 auto Flags = readUnencodedNumber<uint64_t>(); 1276 if (std::error_code EC = Flags.getError()) 1277 return EC; 1278 Entry.Flags = *Flags; 1279 1280 auto Offset = readUnencodedNumber<uint64_t>(); 1281 if (std::error_code EC = Offset.getError()) 1282 return EC; 1283 Entry.Offset = *Offset; 1284 1285 auto Size = readUnencodedNumber<uint64_t>(); 1286 if (std::error_code EC = Size.getError()) 1287 return EC; 1288 Entry.Size = *Size; 1289 1290 Entry.LayoutIndex = Idx; 1291 SecHdrTable.push_back(std::move(Entry)); 1292 return sampleprof_error::success; 1293 } 1294 1295 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() { 1296 auto EntryNum = readUnencodedNumber<uint64_t>(); 1297 if (std::error_code EC = EntryNum.getError()) 1298 return EC; 1299 1300 for (uint64_t i = 0; i < (*EntryNum); i++) 1301 if (std::error_code EC = readSecHdrTableEntry(i)) 1302 return EC; 1303 1304 return sampleprof_error::success; 1305 } 1306 1307 std::error_code SampleProfileReaderExtBinaryBase::readHeader() { 1308 const uint8_t *BufStart = 1309 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1310 Data = BufStart; 1311 End = BufStart + Buffer->getBufferSize(); 1312 1313 if (std::error_code EC = readMagicIdent()) 1314 return EC; 1315 1316 if (std::error_code EC = readSecHdrTable()) 1317 return EC; 1318 1319 return sampleprof_error::success; 1320 } 1321 1322 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) { 1323 uint64_t Size = 0; 1324 for (auto &Entry : SecHdrTable) { 1325 if (Entry.Type == Type) 1326 Size += Entry.Size; 1327 } 1328 return Size; 1329 } 1330 1331 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() { 1332 // Sections in SecHdrTable is not necessarily in the same order as 1333 // sections in the profile because section like FuncOffsetTable needs 1334 // to be written after section LBRProfile but needs to be read before 1335 // section LBRProfile, so we cannot simply use the last entry in 1336 // SecHdrTable to calculate the file size. 1337 uint64_t FileSize = 0; 1338 for (auto &Entry : SecHdrTable) { 1339 FileSize = std::max(Entry.Offset + Entry.Size, FileSize); 1340 } 1341 return FileSize; 1342 } 1343 1344 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { 1345 std::string Flags; 1346 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) 1347 Flags.append("{compressed,"); 1348 else 1349 Flags.append("{"); 1350 1351 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat)) 1352 Flags.append("flat,"); 1353 1354 switch (Entry.Type) { 1355 case SecNameTable: 1356 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5)) 1357 Flags.append("fixlenmd5,"); 1358 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)) 1359 Flags.append("md5,"); 1360 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix)) 1361 Flags.append("uniq,"); 1362 break; 1363 case SecProfSummary: 1364 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial)) 1365 Flags.append("partial,"); 1366 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext)) 1367 Flags.append("context,"); 1368 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsPreInlined)) 1369 Flags.append("preInlined,"); 1370 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) 1371 Flags.append("fs-discriminator,"); 1372 break; 1373 case SecFuncOffsetTable: 1374 if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) 1375 Flags.append("ordered,"); 1376 break; 1377 case SecFuncMetadata: 1378 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased)) 1379 Flags.append("probe,"); 1380 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute)) 1381 Flags.append("attr,"); 1382 break; 1383 default: 1384 break; 1385 } 1386 char &last = Flags.back(); 1387 if (last == ',') 1388 last = '}'; 1389 else 1390 Flags.append("}"); 1391 return Flags; 1392 } 1393 1394 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) { 1395 uint64_t TotalSecsSize = 0; 1396 for (auto &Entry : SecHdrTable) { 1397 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset 1398 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry) 1399 << "\n"; 1400 ; 1401 TotalSecsSize += Entry.Size; 1402 } 1403 uint64_t HeaderSize = SecHdrTable.front().Offset; 1404 assert(HeaderSize + TotalSecsSize == getFileSize() && 1405 "Size of 'header + sections' doesn't match the total size of profile"); 1406 1407 OS << "Header Size: " << HeaderSize << "\n"; 1408 OS << "Total Sections Size: " << TotalSecsSize << "\n"; 1409 OS << "File Size: " << getFileSize() << "\n"; 1410 return true; 1411 } 1412 1413 std::error_code SampleProfileReaderBinary::readMagicIdent() { 1414 // Read and check the magic identifier. 1415 auto Magic = readNumber<uint64_t>(); 1416 if (std::error_code EC = Magic.getError()) 1417 return EC; 1418 else if (std::error_code EC = verifySPMagic(*Magic)) 1419 return EC; 1420 1421 // Read the version number. 1422 auto Version = readNumber<uint64_t>(); 1423 if (std::error_code EC = Version.getError()) 1424 return EC; 1425 else if (*Version != SPVersion()) 1426 return sampleprof_error::unsupported_version; 1427 1428 return sampleprof_error::success; 1429 } 1430 1431 std::error_code SampleProfileReaderBinary::readHeader() { 1432 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); 1433 End = Data + Buffer->getBufferSize(); 1434 1435 if (std::error_code EC = readMagicIdent()) 1436 return EC; 1437 1438 if (std::error_code EC = readSummary()) 1439 return EC; 1440 1441 if (std::error_code EC = readNameTable()) 1442 return EC; 1443 return sampleprof_error::success; 1444 } 1445 1446 std::error_code SampleProfileReaderBinary::readSummaryEntry( 1447 std::vector<ProfileSummaryEntry> &Entries) { 1448 auto Cutoff = readNumber<uint64_t>(); 1449 if (std::error_code EC = Cutoff.getError()) 1450 return EC; 1451 1452 auto MinBlockCount = readNumber<uint64_t>(); 1453 if (std::error_code EC = MinBlockCount.getError()) 1454 return EC; 1455 1456 auto NumBlocks = readNumber<uint64_t>(); 1457 if (std::error_code EC = NumBlocks.getError()) 1458 return EC; 1459 1460 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks); 1461 return sampleprof_error::success; 1462 } 1463 1464 std::error_code SampleProfileReaderBinary::readSummary() { 1465 auto TotalCount = readNumber<uint64_t>(); 1466 if (std::error_code EC = TotalCount.getError()) 1467 return EC; 1468 1469 auto MaxBlockCount = readNumber<uint64_t>(); 1470 if (std::error_code EC = MaxBlockCount.getError()) 1471 return EC; 1472 1473 auto MaxFunctionCount = readNumber<uint64_t>(); 1474 if (std::error_code EC = MaxFunctionCount.getError()) 1475 return EC; 1476 1477 auto NumBlocks = readNumber<uint64_t>(); 1478 if (std::error_code EC = NumBlocks.getError()) 1479 return EC; 1480 1481 auto NumFunctions = readNumber<uint64_t>(); 1482 if (std::error_code EC = NumFunctions.getError()) 1483 return EC; 1484 1485 auto NumSummaryEntries = readNumber<uint64_t>(); 1486 if (std::error_code EC = NumSummaryEntries.getError()) 1487 return EC; 1488 1489 std::vector<ProfileSummaryEntry> Entries; 1490 for (unsigned i = 0; i < *NumSummaryEntries; i++) { 1491 std::error_code EC = readSummaryEntry(Entries); 1492 if (EC != sampleprof_error::success) 1493 return EC; 1494 } 1495 Summary = std::make_unique<ProfileSummary>( 1496 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0, 1497 *MaxFunctionCount, *NumBlocks, *NumFunctions); 1498 1499 return sampleprof_error::success; 1500 } 1501 1502 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) { 1503 const uint8_t *Data = 1504 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1505 uint64_t Magic = decodeULEB128(Data); 1506 return Magic == SPMagic(); 1507 } 1508 1509 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) { 1510 const uint8_t *Data = 1511 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); 1512 uint64_t Magic = decodeULEB128(Data); 1513 return Magic == SPMagic(SPF_Ext_Binary); 1514 } 1515 1516 std::error_code SampleProfileReaderGCC::skipNextWord() { 1517 uint32_t dummy; 1518 if (!GcovBuffer.readInt(dummy)) 1519 return sampleprof_error::truncated; 1520 return sampleprof_error::success; 1521 } 1522 1523 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() { 1524 if (sizeof(T) <= sizeof(uint32_t)) { 1525 uint32_t Val; 1526 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max()) 1527 return static_cast<T>(Val); 1528 } else if (sizeof(T) <= sizeof(uint64_t)) { 1529 uint64_t Val; 1530 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max()) 1531 return static_cast<T>(Val); 1532 } 1533 1534 std::error_code EC = sampleprof_error::malformed; 1535 reportError(0, EC.message()); 1536 return EC; 1537 } 1538 1539 ErrorOr<StringRef> SampleProfileReaderGCC::readString() { 1540 StringRef Str; 1541 if (!GcovBuffer.readString(Str)) 1542 return sampleprof_error::truncated; 1543 return Str; 1544 } 1545 1546 std::error_code SampleProfileReaderGCC::readHeader() { 1547 // Read the magic identifier. 1548 if (!GcovBuffer.readGCDAFormat()) 1549 return sampleprof_error::unrecognized_format; 1550 1551 // Read the version number. Note - the GCC reader does not validate this 1552 // version, but the profile creator generates v704. 1553 GCOV::GCOVVersion version; 1554 if (!GcovBuffer.readGCOVVersion(version)) 1555 return sampleprof_error::unrecognized_format; 1556 1557 if (version != GCOV::V407) 1558 return sampleprof_error::unsupported_version; 1559 1560 // Skip the empty integer. 1561 if (std::error_code EC = skipNextWord()) 1562 return EC; 1563 1564 return sampleprof_error::success; 1565 } 1566 1567 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) { 1568 uint32_t Tag; 1569 if (!GcovBuffer.readInt(Tag)) 1570 return sampleprof_error::truncated; 1571 1572 if (Tag != Expected) 1573 return sampleprof_error::malformed; 1574 1575 if (std::error_code EC = skipNextWord()) 1576 return EC; 1577 1578 return sampleprof_error::success; 1579 } 1580 1581 std::error_code SampleProfileReaderGCC::readNameTable() { 1582 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames)) 1583 return EC; 1584 1585 uint32_t Size; 1586 if (!GcovBuffer.readInt(Size)) 1587 return sampleprof_error::truncated; 1588 1589 for (uint32_t I = 0; I < Size; ++I) { 1590 StringRef Str; 1591 if (!GcovBuffer.readString(Str)) 1592 return sampleprof_error::truncated; 1593 Names.push_back(std::string(Str)); 1594 } 1595 1596 return sampleprof_error::success; 1597 } 1598 1599 std::error_code SampleProfileReaderGCC::readFunctionProfiles() { 1600 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction)) 1601 return EC; 1602 1603 uint32_t NumFunctions; 1604 if (!GcovBuffer.readInt(NumFunctions)) 1605 return sampleprof_error::truncated; 1606 1607 InlineCallStack Stack; 1608 for (uint32_t I = 0; I < NumFunctions; ++I) 1609 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0)) 1610 return EC; 1611 1612 computeSummary(); 1613 return sampleprof_error::success; 1614 } 1615 1616 std::error_code SampleProfileReaderGCC::readOneFunctionProfile( 1617 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) { 1618 uint64_t HeadCount = 0; 1619 if (InlineStack.size() == 0) 1620 if (!GcovBuffer.readInt64(HeadCount)) 1621 return sampleprof_error::truncated; 1622 1623 uint32_t NameIdx; 1624 if (!GcovBuffer.readInt(NameIdx)) 1625 return sampleprof_error::truncated; 1626 1627 StringRef Name(Names[NameIdx]); 1628 1629 uint32_t NumPosCounts; 1630 if (!GcovBuffer.readInt(NumPosCounts)) 1631 return sampleprof_error::truncated; 1632 1633 uint32_t NumCallsites; 1634 if (!GcovBuffer.readInt(NumCallsites)) 1635 return sampleprof_error::truncated; 1636 1637 FunctionSamples *FProfile = nullptr; 1638 if (InlineStack.size() == 0) { 1639 // If this is a top function that we have already processed, do not 1640 // update its profile again. This happens in the presence of 1641 // function aliases. Since these aliases share the same function 1642 // body, there will be identical replicated profiles for the 1643 // original function. In this case, we simply not bother updating 1644 // the profile of the original function. 1645 FProfile = &Profiles[FunctionId(Name)]; 1646 FProfile->addHeadSamples(HeadCount); 1647 if (FProfile->getTotalSamples() > 0) 1648 Update = false; 1649 } else { 1650 // Otherwise, we are reading an inlined instance. The top of the 1651 // inline stack contains the profile of the caller. Insert this 1652 // callee in the caller's CallsiteMap. 1653 FunctionSamples *CallerProfile = InlineStack.front(); 1654 uint32_t LineOffset = Offset >> 16; 1655 uint32_t Discriminator = Offset & 0xffff; 1656 FProfile = &CallerProfile->functionSamplesAt( 1657 LineLocation(LineOffset, Discriminator))[FunctionId(Name)]; 1658 } 1659 FProfile->setFunction(FunctionId(Name)); 1660 1661 for (uint32_t I = 0; I < NumPosCounts; ++I) { 1662 uint32_t Offset; 1663 if (!GcovBuffer.readInt(Offset)) 1664 return sampleprof_error::truncated; 1665 1666 uint32_t NumTargets; 1667 if (!GcovBuffer.readInt(NumTargets)) 1668 return sampleprof_error::truncated; 1669 1670 uint64_t Count; 1671 if (!GcovBuffer.readInt64(Count)) 1672 return sampleprof_error::truncated; 1673 1674 // The line location is encoded in the offset as: 1675 // high 16 bits: line offset to the start of the function. 1676 // low 16 bits: discriminator. 1677 uint32_t LineOffset = Offset >> 16; 1678 uint32_t Discriminator = Offset & 0xffff; 1679 1680 InlineCallStack NewStack; 1681 NewStack.push_back(FProfile); 1682 llvm::append_range(NewStack, InlineStack); 1683 if (Update) { 1684 // Walk up the inline stack, adding the samples on this line to 1685 // the total sample count of the callers in the chain. 1686 for (auto *CallerProfile : NewStack) 1687 CallerProfile->addTotalSamples(Count); 1688 1689 // Update the body samples for the current profile. 1690 FProfile->addBodySamples(LineOffset, Discriminator, Count); 1691 } 1692 1693 // Process the list of functions called at an indirect call site. 1694 // These are all the targets that a function pointer (or virtual 1695 // function) resolved at runtime. 1696 for (uint32_t J = 0; J < NumTargets; J++) { 1697 uint32_t HistVal; 1698 if (!GcovBuffer.readInt(HistVal)) 1699 return sampleprof_error::truncated; 1700 1701 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN) 1702 return sampleprof_error::malformed; 1703 1704 uint64_t TargetIdx; 1705 if (!GcovBuffer.readInt64(TargetIdx)) 1706 return sampleprof_error::truncated; 1707 StringRef TargetName(Names[TargetIdx]); 1708 1709 uint64_t TargetCount; 1710 if (!GcovBuffer.readInt64(TargetCount)) 1711 return sampleprof_error::truncated; 1712 1713 if (Update) 1714 FProfile->addCalledTargetSamples(LineOffset, Discriminator, 1715 FunctionId(TargetName), 1716 TargetCount); 1717 } 1718 } 1719 1720 // Process all the inlined callers into the current function. These 1721 // are all the callsites that were inlined into this function. 1722 for (uint32_t I = 0; I < NumCallsites; I++) { 1723 // The offset is encoded as: 1724 // high 16 bits: line offset to the start of the function. 1725 // low 16 bits: discriminator. 1726 uint32_t Offset; 1727 if (!GcovBuffer.readInt(Offset)) 1728 return sampleprof_error::truncated; 1729 InlineCallStack NewStack; 1730 NewStack.push_back(FProfile); 1731 llvm::append_range(NewStack, InlineStack); 1732 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset)) 1733 return EC; 1734 } 1735 1736 return sampleprof_error::success; 1737 } 1738 1739 /// Read a GCC AutoFDO profile. 1740 /// 1741 /// This format is generated by the Linux Perf conversion tool at 1742 /// https://github.com/google/autofdo. 1743 std::error_code SampleProfileReaderGCC::readImpl() { 1744 assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator"); 1745 // Read the string table. 1746 if (std::error_code EC = readNameTable()) 1747 return EC; 1748 1749 // Read the source profile. 1750 if (std::error_code EC = readFunctionProfiles()) 1751 return EC; 1752 1753 return sampleprof_error::success; 1754 } 1755 1756 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { 1757 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart())); 1758 return Magic == "adcg*704"; 1759 } 1760 1761 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) { 1762 // If the reader uses MD5 to represent string, we can't remap it because 1763 // we don't know what the original function names were. 1764 if (Reader.useMD5()) { 1765 Ctx.diagnose(DiagnosticInfoSampleProfile( 1766 Reader.getBuffer()->getBufferIdentifier(), 1767 "Profile data remapping cannot be applied to profile data " 1768 "using MD5 names (original mangled names are not available).", 1769 DS_Warning)); 1770 return; 1771 } 1772 1773 // CSSPGO-TODO: Remapper is not yet supported. 1774 // We will need to remap the entire context string. 1775 assert(Remappings && "should be initialized while creating remapper"); 1776 for (auto &Sample : Reader.getProfiles()) { 1777 DenseSet<FunctionId> NamesInSample; 1778 Sample.second.findAllNames(NamesInSample); 1779 for (auto &Name : NamesInSample) { 1780 StringRef NameStr = Name.stringRef(); 1781 if (auto Key = Remappings->insert(NameStr)) 1782 NameMap.insert({Key, NameStr}); 1783 } 1784 } 1785 1786 RemappingApplied = true; 1787 } 1788 1789 std::optional<StringRef> 1790 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) { 1791 if (auto Key = Remappings->lookup(Fname)) { 1792 StringRef Result = NameMap.lookup(Key); 1793 if (!Result.empty()) 1794 return Result; 1795 } 1796 return std::nullopt; 1797 } 1798 1799 /// Prepare a memory buffer for the contents of \p Filename. 1800 /// 1801 /// \returns an error code indicating the status of the buffer. 1802 static ErrorOr<std::unique_ptr<MemoryBuffer>> 1803 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { 1804 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() 1805 : FS.getBufferForFile(Filename); 1806 if (std::error_code EC = BufferOrErr.getError()) 1807 return EC; 1808 auto Buffer = std::move(BufferOrErr.get()); 1809 1810 return std::move(Buffer); 1811 } 1812 1813 /// Create a sample profile reader based on the format of the input file. 1814 /// 1815 /// \param Filename The file to open. 1816 /// 1817 /// \param C The LLVM context to use to emit diagnostics. 1818 /// 1819 /// \param P The FSDiscriminatorPass. 1820 /// 1821 /// \param RemapFilename The file used for profile remapping. 1822 /// 1823 /// \returns an error code indicating the status of the created reader. 1824 ErrorOr<std::unique_ptr<SampleProfileReader>> 1825 SampleProfileReader::create(const std::string Filename, LLVMContext &C, 1826 vfs::FileSystem &FS, FSDiscriminatorPass P, 1827 const std::string RemapFilename) { 1828 auto BufferOrError = setupMemoryBuffer(Filename, FS); 1829 if (std::error_code EC = BufferOrError.getError()) 1830 return EC; 1831 return create(BufferOrError.get(), C, FS, P, RemapFilename); 1832 } 1833 1834 /// Create a sample profile remapper from the given input, to remap the 1835 /// function names in the given profile data. 1836 /// 1837 /// \param Filename The file to open. 1838 /// 1839 /// \param Reader The profile reader the remapper is going to be applied to. 1840 /// 1841 /// \param C The LLVM context to use to emit diagnostics. 1842 /// 1843 /// \returns an error code indicating the status of the created reader. 1844 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1845 SampleProfileReaderItaniumRemapper::create(const std::string Filename, 1846 vfs::FileSystem &FS, 1847 SampleProfileReader &Reader, 1848 LLVMContext &C) { 1849 auto BufferOrError = setupMemoryBuffer(Filename, FS); 1850 if (std::error_code EC = BufferOrError.getError()) 1851 return EC; 1852 return create(BufferOrError.get(), Reader, C); 1853 } 1854 1855 /// Create a sample profile remapper from the given input, to remap the 1856 /// function names in the given profile data. 1857 /// 1858 /// \param B The memory buffer to create the reader from (assumes ownership). 1859 /// 1860 /// \param C The LLVM context to use to emit diagnostics. 1861 /// 1862 /// \param Reader The profile reader the remapper is going to be applied to. 1863 /// 1864 /// \returns an error code indicating the status of the created reader. 1865 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>> 1866 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B, 1867 SampleProfileReader &Reader, 1868 LLVMContext &C) { 1869 auto Remappings = std::make_unique<SymbolRemappingReader>(); 1870 if (Error E = Remappings->read(*B)) { 1871 handleAllErrors( 1872 std::move(E), [&](const SymbolRemappingParseError &ParseError) { 1873 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(), 1874 ParseError.getLineNum(), 1875 ParseError.getMessage())); 1876 }); 1877 return sampleprof_error::malformed; 1878 } 1879 1880 return std::make_unique<SampleProfileReaderItaniumRemapper>( 1881 std::move(B), std::move(Remappings), Reader); 1882 } 1883 1884 /// Create a sample profile reader based on the format of the input data. 1885 /// 1886 /// \param B The memory buffer to create the reader from (assumes ownership). 1887 /// 1888 /// \param C The LLVM context to use to emit diagnostics. 1889 /// 1890 /// \param P The FSDiscriminatorPass. 1891 /// 1892 /// \param RemapFilename The file used for profile remapping. 1893 /// 1894 /// \returns an error code indicating the status of the created reader. 1895 ErrorOr<std::unique_ptr<SampleProfileReader>> 1896 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, 1897 vfs::FileSystem &FS, FSDiscriminatorPass P, 1898 const std::string RemapFilename) { 1899 std::unique_ptr<SampleProfileReader> Reader; 1900 if (SampleProfileReaderRawBinary::hasFormat(*B)) 1901 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C)); 1902 else if (SampleProfileReaderExtBinary::hasFormat(*B)) 1903 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C)); 1904 else if (SampleProfileReaderGCC::hasFormat(*B)) 1905 Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); 1906 else if (SampleProfileReaderText::hasFormat(*B)) 1907 Reader.reset(new SampleProfileReaderText(std::move(B), C)); 1908 else 1909 return sampleprof_error::unrecognized_format; 1910 1911 if (!RemapFilename.empty()) { 1912 auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create( 1913 RemapFilename, FS, *Reader, C); 1914 if (std::error_code EC = ReaderOrErr.getError()) { 1915 std::string Msg = "Could not create remapper: " + EC.message(); 1916 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg)); 1917 return EC; 1918 } 1919 Reader->Remapper = std::move(ReaderOrErr.get()); 1920 } 1921 1922 if (std::error_code EC = Reader->readHeader()) { 1923 return EC; 1924 } 1925 1926 Reader->setDiscriminatorMaskedBitFrom(P); 1927 1928 return std::move(Reader); 1929 } 1930 1931 // For text and GCC file formats, we compute the summary after reading the 1932 // profile. Binary format has the profile summary in its header. 1933 void SampleProfileReader::computeSummary() { 1934 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); 1935 Summary = Builder.computeSummaryForProfiles(Profiles); 1936 } 1937