1 //===- InstrProf.cpp - Instrumented profiling format support --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for clang's instrumentation based PGO and 10 // coverage. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ProfileData/InstrProf.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/ADT/StringExtras.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/Config/config.h" 20 #include "llvm/IR/Constant.h" 21 #include "llvm/IR/Constants.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/GlobalValue.h" 24 #include "llvm/IR/GlobalVariable.h" 25 #include "llvm/IR/Instruction.h" 26 #include "llvm/IR/LLVMContext.h" 27 #include "llvm/IR/MDBuilder.h" 28 #include "llvm/IR/Metadata.h" 29 #include "llvm/IR/Module.h" 30 #include "llvm/IR/Type.h" 31 #include "llvm/ProfileData/InstrProfReader.h" 32 #include "llvm/Support/Casting.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Compiler.h" 35 #include "llvm/Support/Compression.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/Endian.h" 38 #include "llvm/Support/Error.h" 39 #include "llvm/Support/ErrorHandling.h" 40 #include "llvm/Support/LEB128.h" 41 #include "llvm/Support/MathExtras.h" 42 #include "llvm/Support/Path.h" 43 #include "llvm/Support/SwapByteOrder.h" 44 #include "llvm/Support/VirtualFileSystem.h" 45 #include "llvm/TargetParser/Triple.h" 46 #include <algorithm> 47 #include <cassert> 48 #include <cstddef> 49 #include <cstdint> 50 #include <cstring> 51 #include <memory> 52 #include <string> 53 #include <system_error> 54 #include <type_traits> 55 #include <utility> 56 #include <vector> 57 58 using namespace llvm; 59 60 #define DEBUG_TYPE "instrprof" 61 62 static cl::opt<bool> StaticFuncFullModulePrefix( 63 "static-func-full-module-prefix", cl::init(true), cl::Hidden, 64 cl::desc("Use full module build paths in the profile counter names for " 65 "static functions.")); 66 67 // This option is tailored to users that have different top-level directory in 68 // profile-gen and profile-use compilation. Users need to specific the number 69 // of levels to strip. A value larger than the number of directories in the 70 // source file will strip all the directory names and only leave the basename. 71 // 72 // Note current ThinLTO module importing for the indirect-calls assumes 73 // the source directory name not being stripped. A non-zero option value here 74 // can potentially prevent some inter-module indirect-call-promotions. 75 static cl::opt<unsigned> StaticFuncStripDirNamePrefix( 76 "static-func-strip-dirname-prefix", cl::init(0), cl::Hidden, 77 cl::desc("Strip specified level of directory name from source path in " 78 "the profile counter name for static functions.")); 79 80 static std::string getInstrProfErrString(instrprof_error Err, 81 const std::string &ErrMsg = "") { 82 std::string Msg; 83 raw_string_ostream OS(Msg); 84 85 switch (Err) { 86 case instrprof_error::success: 87 OS << "success"; 88 break; 89 case instrprof_error::eof: 90 OS << "end of File"; 91 break; 92 case instrprof_error::unrecognized_format: 93 OS << "unrecognized instrumentation profile encoding format"; 94 break; 95 case instrprof_error::bad_magic: 96 OS << "invalid instrumentation profile data (bad magic)"; 97 break; 98 case instrprof_error::bad_header: 99 OS << "invalid instrumentation profile data (file header is corrupt)"; 100 break; 101 case instrprof_error::unsupported_version: 102 OS << "unsupported instrumentation profile format version"; 103 break; 104 case instrprof_error::unsupported_hash_type: 105 OS << "unsupported instrumentation profile hash type"; 106 break; 107 case instrprof_error::too_large: 108 OS << "too much profile data"; 109 break; 110 case instrprof_error::truncated: 111 OS << "truncated profile data"; 112 break; 113 case instrprof_error::malformed: 114 OS << "malformed instrumentation profile data"; 115 break; 116 case instrprof_error::missing_correlation_info: 117 OS << "debug info/binary for correlation is required"; 118 break; 119 case instrprof_error::unexpected_correlation_info: 120 OS << "debug info/binary for correlation is not necessary"; 121 break; 122 case instrprof_error::unable_to_correlate_profile: 123 OS << "unable to correlate profile"; 124 break; 125 case instrprof_error::invalid_prof: 126 OS << "invalid profile created. Please file a bug " 127 "at: " BUG_REPORT_URL 128 " and include the profraw files that caused this error."; 129 break; 130 case instrprof_error::unknown_function: 131 OS << "no profile data available for function"; 132 break; 133 case instrprof_error::hash_mismatch: 134 OS << "function control flow change detected (hash mismatch)"; 135 break; 136 case instrprof_error::count_mismatch: 137 OS << "function basic block count change detected (counter mismatch)"; 138 break; 139 case instrprof_error::bitmap_mismatch: 140 OS << "function bitmap size change detected (bitmap size mismatch)"; 141 break; 142 case instrprof_error::counter_overflow: 143 OS << "counter overflow"; 144 break; 145 case instrprof_error::value_site_count_mismatch: 146 OS << "function value site count change detected (counter mismatch)"; 147 break; 148 case instrprof_error::compress_failed: 149 OS << "failed to compress data (zlib)"; 150 break; 151 case instrprof_error::uncompress_failed: 152 OS << "failed to uncompress data (zlib)"; 153 break; 154 case instrprof_error::empty_raw_profile: 155 OS << "empty raw profile file"; 156 break; 157 case instrprof_error::zlib_unavailable: 158 OS << "profile uses zlib compression but the profile reader was built " 159 "without zlib support"; 160 break; 161 case instrprof_error::raw_profile_version_mismatch: 162 OS << "raw profile version mismatch"; 163 break; 164 case instrprof_error::counter_value_too_large: 165 OS << "excessively large counter value suggests corrupted profile data"; 166 break; 167 } 168 169 // If optional error message is not empty, append it to the message. 170 if (!ErrMsg.empty()) 171 OS << ": " << ErrMsg; 172 173 return OS.str(); 174 } 175 176 namespace { 177 178 // FIXME: This class is only here to support the transition to llvm::Error. It 179 // will be removed once this transition is complete. Clients should prefer to 180 // deal with the Error value directly, rather than converting to error_code. 181 class InstrProfErrorCategoryType : public std::error_category { 182 const char *name() const noexcept override { return "llvm.instrprof"; } 183 184 std::string message(int IE) const override { 185 return getInstrProfErrString(static_cast<instrprof_error>(IE)); 186 } 187 }; 188 189 } // end anonymous namespace 190 191 const std::error_category &llvm::instrprof_category() { 192 static InstrProfErrorCategoryType ErrorCategory; 193 return ErrorCategory; 194 } 195 196 namespace { 197 198 const char *InstrProfSectNameCommon[] = { 199 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ 200 SectNameCommon, 201 #include "llvm/ProfileData/InstrProfData.inc" 202 }; 203 204 const char *InstrProfSectNameCoff[] = { 205 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ 206 SectNameCoff, 207 #include "llvm/ProfileData/InstrProfData.inc" 208 }; 209 210 const char *InstrProfSectNamePrefix[] = { 211 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ 212 Prefix, 213 #include "llvm/ProfileData/InstrProfData.inc" 214 }; 215 216 } // namespace 217 218 namespace llvm { 219 220 cl::opt<bool> DoInstrProfNameCompression( 221 "enable-name-compression", 222 cl::desc("Enable name/filename string compression"), cl::init(true)); 223 224 cl::opt<bool> EnableVTableValueProfiling( 225 "enable-vtable-value-profiling", cl::init(false), 226 cl::desc("If true, the virtual table address will be instrumented to know " 227 "the types of a C++ pointer. The information is used in indirect " 228 "call promotion to do selective vtable-based comparison.")); 229 230 cl::opt<bool> EnableVTableProfileUse( 231 "enable-vtable-profile-use", cl::init(false), 232 cl::desc("If ThinLTO and WPD is enabled and this option is true, vtable " 233 "profiles will be used by ICP pass for more efficient indirect " 234 "call sequence. If false, type profiles won't be used.")); 235 236 std::string getInstrProfSectionName(InstrProfSectKind IPSK, 237 Triple::ObjectFormatType OF, 238 bool AddSegmentInfo) { 239 std::string SectName; 240 241 if (OF == Triple::MachO && AddSegmentInfo) 242 SectName = InstrProfSectNamePrefix[IPSK]; 243 244 if (OF == Triple::COFF) 245 SectName += InstrProfSectNameCoff[IPSK]; 246 else 247 SectName += InstrProfSectNameCommon[IPSK]; 248 249 if (OF == Triple::MachO && IPSK == IPSK_data && AddSegmentInfo) 250 SectName += ",regular,live_support"; 251 252 return SectName; 253 } 254 255 std::string InstrProfError::message() const { 256 return getInstrProfErrString(Err, Msg); 257 } 258 259 char InstrProfError::ID = 0; 260 261 std::string getPGOFuncName(StringRef Name, GlobalValue::LinkageTypes Linkage, 262 StringRef FileName, 263 uint64_t Version LLVM_ATTRIBUTE_UNUSED) { 264 // Value names may be prefixed with a binary '1' to indicate 265 // that the backend should not modify the symbols due to any platform 266 // naming convention. Do not include that '1' in the PGO profile name. 267 if (Name[0] == '\1') 268 Name = Name.substr(1); 269 270 std::string NewName = std::string(Name); 271 if (llvm::GlobalValue::isLocalLinkage(Linkage)) { 272 // For local symbols, prepend the main file name to distinguish them. 273 // Do not include the full path in the file name since there's no guarantee 274 // that it will stay the same, e.g., if the files are checked out from 275 // version control in different locations. 276 if (FileName.empty()) 277 NewName = NewName.insert(0, "<unknown>:"); 278 else 279 NewName = NewName.insert(0, FileName.str() + ":"); 280 } 281 return NewName; 282 } 283 284 // Strip NumPrefix level of directory name from PathNameStr. If the number of 285 // directory separators is less than NumPrefix, strip all the directories and 286 // leave base file name only. 287 static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) { 288 uint32_t Count = NumPrefix; 289 uint32_t Pos = 0, LastPos = 0; 290 for (const auto &CI : PathNameStr) { 291 ++Pos; 292 if (llvm::sys::path::is_separator(CI)) { 293 LastPos = Pos; 294 --Count; 295 } 296 if (Count == 0) 297 break; 298 } 299 return PathNameStr.substr(LastPos); 300 } 301 302 static StringRef getStrippedSourceFileName(const GlobalObject &GO) { 303 StringRef FileName(GO.getParent()->getSourceFileName()); 304 uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1; 305 if (StripLevel < StaticFuncStripDirNamePrefix) 306 StripLevel = StaticFuncStripDirNamePrefix; 307 if (StripLevel) 308 FileName = stripDirPrefix(FileName, StripLevel); 309 return FileName; 310 } 311 312 // The PGO name has the format [<filepath>;]<mangled-name> where <filepath>; is 313 // provided if linkage is local and is used to discriminate possibly identical 314 // mangled names. ";" is used because it is unlikely to be found in either 315 // <filepath> or <mangled-name>. 316 // 317 // Older compilers used getPGOFuncName() which has the format 318 // [<filepath>:]<mangled-name>. This caused trouble for Objective-C functions 319 // which commonly have :'s in their names. We still need to compute this name to 320 // lookup functions from profiles built by older compilers. 321 static std::string 322 getIRPGONameForGlobalObject(const GlobalObject &GO, 323 GlobalValue::LinkageTypes Linkage, 324 StringRef FileName) { 325 return GlobalValue::getGlobalIdentifier(GO.getName(), Linkage, FileName); 326 } 327 328 static std::optional<std::string> lookupPGONameFromMetadata(MDNode *MD) { 329 if (MD != nullptr) { 330 StringRef S = cast<MDString>(MD->getOperand(0))->getString(); 331 return S.str(); 332 } 333 return {}; 334 } 335 336 // Returns the PGO object name. This function has some special handling 337 // when called in LTO optimization. The following only applies when calling in 338 // LTO passes (when \c InLTO is true): LTO's internalization privatizes many 339 // global linkage symbols. This happens after value profile annotation, but 340 // those internal linkage functions should not have a source prefix. 341 // Additionally, for ThinLTO mode, exported internal functions are promoted 342 // and renamed. We need to ensure that the original internal PGO name is 343 // used when computing the GUID that is compared against the profiled GUIDs. 344 // To differentiate compiler generated internal symbols from original ones, 345 // PGOFuncName meta data are created and attached to the original internal 346 // symbols in the value profile annotation step 347 // (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta 348 // data, its original linkage must be non-internal. 349 static std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO, 350 MDNode *PGONameMetadata) { 351 if (!InLTO) { 352 auto FileName = getStrippedSourceFileName(GO); 353 return getIRPGONameForGlobalObject(GO, GO.getLinkage(), FileName); 354 } 355 356 // In LTO mode (when InLTO is true), first check if there is a meta data. 357 if (auto IRPGOFuncName = lookupPGONameFromMetadata(PGONameMetadata)) 358 return *IRPGOFuncName; 359 360 // If there is no meta data, the function must be a global before the value 361 // profile annotation pass. Its current linkage may be internal if it is 362 // internalized in LTO mode. 363 return getIRPGONameForGlobalObject(GO, GlobalValue::ExternalLinkage, ""); 364 } 365 366 // Returns the IRPGO function name and does special handling when called 367 // in LTO optimization. See the comments of `getIRPGOObjectName` for details. 368 std::string getIRPGOFuncName(const Function &F, bool InLTO) { 369 return getIRPGOObjectName(F, InLTO, getPGOFuncNameMetadata(F)); 370 } 371 372 // Please use getIRPGOFuncName for LLVM IR instrumentation. This function is 373 // for front-end (Clang, etc) instrumentation. 374 // The implementation is kept for profile matching from older profiles. 375 // This is similar to `getIRPGOFuncName` except that this function calls 376 // 'getPGOFuncName' to get a name and `getIRPGOFuncName` calls 377 // 'getIRPGONameForGlobalObject'. See the difference between two callees in the 378 // comments of `getIRPGONameForGlobalObject`. 379 std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) { 380 if (!InLTO) { 381 auto FileName = getStrippedSourceFileName(F); 382 return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version); 383 } 384 385 // In LTO mode (when InLTO is true), first check if there is a meta data. 386 if (auto PGOFuncName = lookupPGONameFromMetadata(getPGOFuncNameMetadata(F))) 387 return *PGOFuncName; 388 389 // If there is no meta data, the function must be a global before the value 390 // profile annotation pass. Its current linkage may be internal if it is 391 // internalized in LTO mode. 392 return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, ""); 393 } 394 395 std::string getPGOName(const GlobalVariable &V, bool InLTO) { 396 // PGONameMetadata should be set by compiler at profile use time 397 // and read by symtab creation to look up symbols corresponding to 398 // a MD5 hash. 399 return getIRPGOObjectName(V, InLTO, V.getMetadata(getPGONameMetadataName())); 400 } 401 402 // See getIRPGOObjectName() for a discription of the format. 403 std::pair<StringRef, StringRef> getParsedIRPGOName(StringRef IRPGOName) { 404 auto [FileName, MangledName] = IRPGOName.split(GlobalIdentifierDelimiter); 405 if (MangledName.empty()) 406 return std::make_pair(StringRef(), IRPGOName); 407 return std::make_pair(FileName, MangledName); 408 } 409 410 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) { 411 if (FileName.empty()) 412 return PGOFuncName; 413 // Drop the file name including ':' or ';'. See getIRPGONameForGlobalObject as 414 // well. 415 if (PGOFuncName.starts_with(FileName)) 416 PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1); 417 return PGOFuncName; 418 } 419 420 // \p FuncName is the string used as profile lookup key for the function. A 421 // symbol is created to hold the name. Return the legalized symbol name. 422 std::string getPGOFuncNameVarName(StringRef FuncName, 423 GlobalValue::LinkageTypes Linkage) { 424 std::string VarName = std::string(getInstrProfNameVarPrefix()); 425 VarName += FuncName; 426 427 if (!GlobalValue::isLocalLinkage(Linkage)) 428 return VarName; 429 430 // Now fix up illegal chars in local VarName that may upset the assembler. 431 const char InvalidChars[] = "-:;<>/\"'"; 432 size_t FoundPos = VarName.find_first_of(InvalidChars); 433 while (FoundPos != std::string::npos) { 434 VarName[FoundPos] = '_'; 435 FoundPos = VarName.find_first_of(InvalidChars, FoundPos + 1); 436 } 437 return VarName; 438 } 439 440 GlobalVariable *createPGOFuncNameVar(Module &M, 441 GlobalValue::LinkageTypes Linkage, 442 StringRef PGOFuncName) { 443 // We generally want to match the function's linkage, but available_externally 444 // and extern_weak both have the wrong semantics, and anything that doesn't 445 // need to link across compilation units doesn't need to be visible at all. 446 if (Linkage == GlobalValue::ExternalWeakLinkage) 447 Linkage = GlobalValue::LinkOnceAnyLinkage; 448 else if (Linkage == GlobalValue::AvailableExternallyLinkage) 449 Linkage = GlobalValue::LinkOnceODRLinkage; 450 else if (Linkage == GlobalValue::InternalLinkage || 451 Linkage == GlobalValue::ExternalLinkage) 452 Linkage = GlobalValue::PrivateLinkage; 453 454 auto *Value = 455 ConstantDataArray::getString(M.getContext(), PGOFuncName, false); 456 auto *FuncNameVar = 457 new GlobalVariable(M, Value->getType(), true, Linkage, Value, 458 getPGOFuncNameVarName(PGOFuncName, Linkage)); 459 460 // Hide the symbol so that we correctly get a copy for each executable. 461 if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) 462 FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); 463 464 return FuncNameVar; 465 } 466 467 GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName) { 468 return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), PGOFuncName); 469 } 470 471 Error InstrProfSymtab::create(Module &M, bool InLTO) { 472 for (Function &F : M) { 473 // Function may not have a name: like using asm("") to overwrite the name. 474 // Ignore in this case. 475 if (!F.hasName()) 476 continue; 477 if (Error E = addFuncWithName(F, getIRPGOFuncName(F, InLTO))) 478 return E; 479 // Also use getPGOFuncName() so that we can find records from older profiles 480 if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO))) 481 return E; 482 } 483 484 SmallVector<MDNode *, 2> Types; 485 for (GlobalVariable &G : M.globals()) { 486 if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type)) 487 continue; 488 if (Error E = addVTableWithName(G, getPGOName(G, InLTO))) 489 return E; 490 } 491 492 Sorted = false; 493 finalizeSymtab(); 494 return Error::success(); 495 } 496 497 Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable, 498 StringRef VTablePGOName) { 499 auto NameToGUIDMap = [&](StringRef Name) -> Error { 500 if (Error E = addSymbolName(Name)) 501 return E; 502 503 bool Inserted = true; 504 std::tie(std::ignore, Inserted) = 505 MD5VTableMap.try_emplace(GlobalValue::getGUID(Name), &VTable); 506 if (!Inserted) 507 LLVM_DEBUG(dbgs() << "GUID conflict within one module"); 508 return Error::success(); 509 }; 510 if (Error E = NameToGUIDMap(VTablePGOName)) 511 return E; 512 513 StringRef CanonicalName = getCanonicalName(VTablePGOName); 514 if (CanonicalName != VTablePGOName) 515 return NameToGUIDMap(CanonicalName); 516 517 return Error::success(); 518 } 519 520 /// \c NameStrings is a string composed of one of more possibly encoded 521 /// sub-strings. The substrings are separated by 0 or more zero bytes. This 522 /// method decodes the string and calls `NameCallback` for each substring. 523 static Error 524 readAndDecodeStrings(StringRef NameStrings, 525 std::function<Error(StringRef)> NameCallback) { 526 const uint8_t *P = NameStrings.bytes_begin(); 527 const uint8_t *EndP = NameStrings.bytes_end(); 528 while (P < EndP) { 529 uint32_t N; 530 uint64_t UncompressedSize = decodeULEB128(P, &N); 531 P += N; 532 uint64_t CompressedSize = decodeULEB128(P, &N); 533 P += N; 534 const bool IsCompressed = (CompressedSize != 0); 535 SmallVector<uint8_t, 128> UncompressedNameStrings; 536 StringRef NameStrings; 537 if (IsCompressed) { 538 if (!llvm::compression::zlib::isAvailable()) 539 return make_error<InstrProfError>(instrprof_error::zlib_unavailable); 540 541 if (Error E = compression::zlib::decompress(ArrayRef(P, CompressedSize), 542 UncompressedNameStrings, 543 UncompressedSize)) { 544 consumeError(std::move(E)); 545 return make_error<InstrProfError>(instrprof_error::uncompress_failed); 546 } 547 P += CompressedSize; 548 NameStrings = toStringRef(UncompressedNameStrings); 549 } else { 550 NameStrings = 551 StringRef(reinterpret_cast<const char *>(P), UncompressedSize); 552 P += UncompressedSize; 553 } 554 // Now parse the name strings. 555 SmallVector<StringRef, 0> Names; 556 NameStrings.split(Names, getInstrProfNameSeparator()); 557 for (StringRef &Name : Names) 558 if (Error E = NameCallback(Name)) 559 return E; 560 561 while (P < EndP && *P == 0) 562 P++; 563 } 564 return Error::success(); 565 } 566 567 Error InstrProfSymtab::create(StringRef NameStrings) { 568 return readAndDecodeStrings( 569 NameStrings, 570 std::bind(&InstrProfSymtab::addFuncName, this, std::placeholders::_1)); 571 } 572 573 Error InstrProfSymtab::create(StringRef FuncNameStrings, 574 StringRef VTableNameStrings) { 575 if (Error E = readAndDecodeStrings(FuncNameStrings, 576 std::bind(&InstrProfSymtab::addFuncName, 577 this, std::placeholders::_1))) 578 return E; 579 580 return readAndDecodeStrings( 581 VTableNameStrings, 582 std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1)); 583 } 584 585 Error InstrProfSymtab::initVTableNamesFromCompressedStrings( 586 StringRef CompressedVTableStrings) { 587 return readAndDecodeStrings( 588 CompressedVTableStrings, 589 std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1)); 590 } 591 592 StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName) { 593 // In ThinLTO, local function may have been promoted to global and have 594 // suffix ".llvm." added to the function name. We need to add the 595 // stripped function name to the symbol table so that we can find a match 596 // from profile. 597 // 598 // ".__uniq." suffix is used to differentiate internal linkage functions in 599 // different modules and should be kept. This is the only suffix with the 600 // pattern ".xxx" which is kept before matching, other suffixes similar as 601 // ".llvm." will be stripped. 602 const std::string UniqSuffix = ".__uniq."; 603 size_t Pos = PGOName.find(UniqSuffix); 604 if (Pos != StringRef::npos) 605 Pos += UniqSuffix.length(); 606 else 607 Pos = 0; 608 609 // Search '.' after ".__uniq." if ".__uniq." exists, otherwise search '.' from 610 // the beginning. 611 Pos = PGOName.find('.', Pos); 612 if (Pos != StringRef::npos && Pos != 0) 613 return PGOName.substr(0, Pos); 614 615 return PGOName; 616 } 617 618 Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { 619 auto NameToGUIDMap = [&](StringRef Name) -> Error { 620 if (Error E = addFuncName(Name)) 621 return E; 622 MD5FuncMap.emplace_back(Function::getGUID(Name), &F); 623 return Error::success(); 624 }; 625 if (Error E = NameToGUIDMap(PGOFuncName)) 626 return E; 627 628 StringRef CanonicalFuncName = getCanonicalName(PGOFuncName); 629 if (CanonicalFuncName != PGOFuncName) 630 return NameToGUIDMap(CanonicalFuncName); 631 632 return Error::success(); 633 } 634 635 uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) { 636 // Given a runtime address, look up the hash value in the interval map, and 637 // fallback to value 0 if a hash value is not found. 638 return VTableAddrMap.lookup(Address, 0); 639 } 640 641 uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) { 642 finalizeSymtab(); 643 auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) { 644 return A.first < Address; 645 }); 646 // Raw function pointer collected by value profiler may be from 647 // external functions that are not instrumented. They won't have 648 // mapping data to be used by the deserializer. Force the value to 649 // be 0 in this case. 650 if (It != AddrToMD5Map.end() && It->first == Address) 651 return (uint64_t)It->second; 652 return 0; 653 } 654 655 void InstrProfSymtab::dumpNames(raw_ostream &OS) const { 656 SmallVector<StringRef, 0> Sorted(NameTab.keys()); 657 llvm::sort(Sorted); 658 for (StringRef S : Sorted) 659 OS << S << '\n'; 660 } 661 662 Error collectGlobalObjectNameStrings(ArrayRef<std::string> NameStrs, 663 bool DoCompression, std::string &Result) { 664 assert(!NameStrs.empty() && "No name data to emit"); 665 666 uint8_t Header[20], *P = Header; 667 std::string UncompressedNameStrings = 668 join(NameStrs.begin(), NameStrs.end(), getInstrProfNameSeparator()); 669 670 assert(StringRef(UncompressedNameStrings) 671 .count(getInstrProfNameSeparator()) == (NameStrs.size() - 1) && 672 "PGO name is invalid (contains separator token)"); 673 674 unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P); 675 P += EncLen; 676 677 auto WriteStringToResult = [&](size_t CompressedLen, StringRef InputStr) { 678 EncLen = encodeULEB128(CompressedLen, P); 679 P += EncLen; 680 char *HeaderStr = reinterpret_cast<char *>(&Header[0]); 681 unsigned HeaderLen = P - &Header[0]; 682 Result.append(HeaderStr, HeaderLen); 683 Result += InputStr; 684 return Error::success(); 685 }; 686 687 if (!DoCompression) { 688 return WriteStringToResult(0, UncompressedNameStrings); 689 } 690 691 SmallVector<uint8_t, 128> CompressedNameStrings; 692 compression::zlib::compress(arrayRefFromStringRef(UncompressedNameStrings), 693 CompressedNameStrings, 694 compression::zlib::BestSizeCompression); 695 696 return WriteStringToResult(CompressedNameStrings.size(), 697 toStringRef(CompressedNameStrings)); 698 } 699 700 StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar) { 701 auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer()); 702 StringRef NameStr = 703 Arr->isCString() ? Arr->getAsCString() : Arr->getAsString(); 704 return NameStr; 705 } 706 707 Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars, 708 std::string &Result, bool DoCompression) { 709 std::vector<std::string> NameStrs; 710 for (auto *NameVar : NameVars) { 711 NameStrs.push_back(std::string(getPGOFuncNameVarInitializer(NameVar))); 712 } 713 return collectGlobalObjectNameStrings( 714 NameStrs, compression::zlib::isAvailable() && DoCompression, Result); 715 } 716 717 Error collectVTableStrings(ArrayRef<GlobalVariable *> VTables, 718 std::string &Result, bool DoCompression) { 719 std::vector<std::string> VTableNameStrs; 720 for (auto *VTable : VTables) 721 VTableNameStrs.push_back(getPGOName(*VTable)); 722 return collectGlobalObjectNameStrings( 723 VTableNameStrs, compression::zlib::isAvailable() && DoCompression, 724 Result); 725 } 726 727 void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const { 728 uint64_t FuncSum = 0; 729 Sum.NumEntries += Counts.size(); 730 for (uint64_t Count : Counts) 731 FuncSum += Count; 732 Sum.CountSum += FuncSum; 733 734 for (uint32_t VK = IPVK_First; VK <= IPVK_Last; ++VK) { 735 uint64_t KindSum = 0; 736 uint32_t NumValueSites = getNumValueSites(VK); 737 for (size_t I = 0; I < NumValueSites; ++I) { 738 for (const auto &V : getValueArrayForSite(VK, I)) 739 KindSum += V.Count; 740 } 741 Sum.ValueCounts[VK] += KindSum; 742 } 743 } 744 745 void InstrProfValueSiteRecord::overlap(InstrProfValueSiteRecord &Input, 746 uint32_t ValueKind, 747 OverlapStats &Overlap, 748 OverlapStats &FuncLevelOverlap) { 749 this->sortByTargetValues(); 750 Input.sortByTargetValues(); 751 double Score = 0.0f, FuncLevelScore = 0.0f; 752 auto I = ValueData.begin(); 753 auto IE = ValueData.end(); 754 auto J = Input.ValueData.begin(); 755 auto JE = Input.ValueData.end(); 756 while (I != IE && J != JE) { 757 if (I->Value == J->Value) { 758 Score += OverlapStats::score(I->Count, J->Count, 759 Overlap.Base.ValueCounts[ValueKind], 760 Overlap.Test.ValueCounts[ValueKind]); 761 FuncLevelScore += OverlapStats::score( 762 I->Count, J->Count, FuncLevelOverlap.Base.ValueCounts[ValueKind], 763 FuncLevelOverlap.Test.ValueCounts[ValueKind]); 764 ++I; 765 } else if (I->Value < J->Value) { 766 ++I; 767 continue; 768 } 769 ++J; 770 } 771 Overlap.Overlap.ValueCounts[ValueKind] += Score; 772 FuncLevelOverlap.Overlap.ValueCounts[ValueKind] += FuncLevelScore; 773 } 774 775 // Return false on mismatch. 776 void InstrProfRecord::overlapValueProfData(uint32_t ValueKind, 777 InstrProfRecord &Other, 778 OverlapStats &Overlap, 779 OverlapStats &FuncLevelOverlap) { 780 uint32_t ThisNumValueSites = getNumValueSites(ValueKind); 781 assert(ThisNumValueSites == Other.getNumValueSites(ValueKind)); 782 if (!ThisNumValueSites) 783 return; 784 785 std::vector<InstrProfValueSiteRecord> &ThisSiteRecords = 786 getOrCreateValueSitesForKind(ValueKind); 787 MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords = 788 Other.getValueSitesForKind(ValueKind); 789 for (uint32_t I = 0; I < ThisNumValueSites; I++) 790 ThisSiteRecords[I].overlap(OtherSiteRecords[I], ValueKind, Overlap, 791 FuncLevelOverlap); 792 } 793 794 void InstrProfRecord::overlap(InstrProfRecord &Other, OverlapStats &Overlap, 795 OverlapStats &FuncLevelOverlap, 796 uint64_t ValueCutoff) { 797 // FuncLevel CountSum for other should already computed and nonzero. 798 assert(FuncLevelOverlap.Test.CountSum >= 1.0f); 799 accumulateCounts(FuncLevelOverlap.Base); 800 bool Mismatch = (Counts.size() != Other.Counts.size()); 801 802 // Check if the value profiles mismatch. 803 if (!Mismatch) { 804 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) { 805 uint32_t ThisNumValueSites = getNumValueSites(Kind); 806 uint32_t OtherNumValueSites = Other.getNumValueSites(Kind); 807 if (ThisNumValueSites != OtherNumValueSites) { 808 Mismatch = true; 809 break; 810 } 811 } 812 } 813 if (Mismatch) { 814 Overlap.addOneMismatch(FuncLevelOverlap.Test); 815 return; 816 } 817 818 // Compute overlap for value counts. 819 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 820 overlapValueProfData(Kind, Other, Overlap, FuncLevelOverlap); 821 822 double Score = 0.0; 823 uint64_t MaxCount = 0; 824 // Compute overlap for edge counts. 825 for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { 826 Score += OverlapStats::score(Counts[I], Other.Counts[I], 827 Overlap.Base.CountSum, Overlap.Test.CountSum); 828 MaxCount = std::max(Other.Counts[I], MaxCount); 829 } 830 Overlap.Overlap.CountSum += Score; 831 Overlap.Overlap.NumEntries += 1; 832 833 if (MaxCount >= ValueCutoff) { 834 double FuncScore = 0.0; 835 for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) 836 FuncScore += OverlapStats::score(Counts[I], Other.Counts[I], 837 FuncLevelOverlap.Base.CountSum, 838 FuncLevelOverlap.Test.CountSum); 839 FuncLevelOverlap.Overlap.CountSum = FuncScore; 840 FuncLevelOverlap.Overlap.NumEntries = Other.Counts.size(); 841 FuncLevelOverlap.Valid = true; 842 } 843 } 844 845 void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input, 846 uint64_t Weight, 847 function_ref<void(instrprof_error)> Warn) { 848 this->sortByTargetValues(); 849 Input.sortByTargetValues(); 850 auto I = ValueData.begin(); 851 auto IE = ValueData.end(); 852 std::vector<InstrProfValueData> Merged; 853 Merged.reserve(std::max(ValueData.size(), Input.ValueData.size())); 854 for (const InstrProfValueData &J : Input.ValueData) { 855 while (I != IE && I->Value < J.Value) { 856 Merged.push_back(*I); 857 ++I; 858 } 859 if (I != IE && I->Value == J.Value) { 860 bool Overflowed; 861 I->Count = SaturatingMultiplyAdd(J.Count, Weight, I->Count, &Overflowed); 862 if (Overflowed) 863 Warn(instrprof_error::counter_overflow); 864 Merged.push_back(*I); 865 ++I; 866 continue; 867 } 868 Merged.push_back(J); 869 } 870 Merged.insert(Merged.end(), I, IE); 871 ValueData = std::move(Merged); 872 } 873 874 void InstrProfValueSiteRecord::scale(uint64_t N, uint64_t D, 875 function_ref<void(instrprof_error)> Warn) { 876 for (InstrProfValueData &I : ValueData) { 877 bool Overflowed; 878 I.Count = SaturatingMultiply(I.Count, N, &Overflowed) / D; 879 if (Overflowed) 880 Warn(instrprof_error::counter_overflow); 881 } 882 } 883 884 // Merge Value Profile data from Src record to this record for ValueKind. 885 // Scale merged value counts by \p Weight. 886 void InstrProfRecord::mergeValueProfData( 887 uint32_t ValueKind, InstrProfRecord &Src, uint64_t Weight, 888 function_ref<void(instrprof_error)> Warn) { 889 uint32_t ThisNumValueSites = getNumValueSites(ValueKind); 890 uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); 891 if (ThisNumValueSites != OtherNumValueSites) { 892 Warn(instrprof_error::value_site_count_mismatch); 893 return; 894 } 895 if (!ThisNumValueSites) 896 return; 897 std::vector<InstrProfValueSiteRecord> &ThisSiteRecords = 898 getOrCreateValueSitesForKind(ValueKind); 899 MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords = 900 Src.getValueSitesForKind(ValueKind); 901 for (uint32_t I = 0; I < ThisNumValueSites; I++) 902 ThisSiteRecords[I].merge(OtherSiteRecords[I], Weight, Warn); 903 } 904 905 void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight, 906 function_ref<void(instrprof_error)> Warn) { 907 // If the number of counters doesn't match we either have bad data 908 // or a hash collision. 909 if (Counts.size() != Other.Counts.size()) { 910 Warn(instrprof_error::count_mismatch); 911 return; 912 } 913 914 // Special handling of the first count as the PseudoCount. 915 CountPseudoKind OtherKind = Other.getCountPseudoKind(); 916 CountPseudoKind ThisKind = getCountPseudoKind(); 917 if (OtherKind != NotPseudo || ThisKind != NotPseudo) { 918 // We don't allow the merge of a profile with pseudo counts and 919 // a normal profile (i.e. without pesudo counts). 920 // Profile supplimenation should be done after the profile merge. 921 if (OtherKind == NotPseudo || ThisKind == NotPseudo) { 922 Warn(instrprof_error::count_mismatch); 923 return; 924 } 925 if (OtherKind == PseudoHot || ThisKind == PseudoHot) 926 setPseudoCount(PseudoHot); 927 else 928 setPseudoCount(PseudoWarm); 929 return; 930 } 931 932 for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { 933 bool Overflowed; 934 uint64_t Value = 935 SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed); 936 if (Value > getInstrMaxCountValue()) { 937 Value = getInstrMaxCountValue(); 938 Overflowed = true; 939 } 940 Counts[I] = Value; 941 if (Overflowed) 942 Warn(instrprof_error::counter_overflow); 943 } 944 945 // If the number of bitmap bytes doesn't match we either have bad data 946 // or a hash collision. 947 if (BitmapBytes.size() != Other.BitmapBytes.size()) { 948 Warn(instrprof_error::bitmap_mismatch); 949 return; 950 } 951 952 // Bitmap bytes are merged by simply ORing them together. 953 for (size_t I = 0, E = Other.BitmapBytes.size(); I < E; ++I) { 954 BitmapBytes[I] = Other.BitmapBytes[I] | BitmapBytes[I]; 955 } 956 957 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 958 mergeValueProfData(Kind, Other, Weight, Warn); 959 } 960 961 void InstrProfRecord::scaleValueProfData( 962 uint32_t ValueKind, uint64_t N, uint64_t D, 963 function_ref<void(instrprof_error)> Warn) { 964 for (auto &R : getValueSitesForKind(ValueKind)) 965 R.scale(N, D, Warn); 966 } 967 968 void InstrProfRecord::scale(uint64_t N, uint64_t D, 969 function_ref<void(instrprof_error)> Warn) { 970 assert(D != 0 && "D cannot be 0"); 971 for (auto &Count : this->Counts) { 972 bool Overflowed; 973 Count = SaturatingMultiply(Count, N, &Overflowed) / D; 974 if (Count > getInstrMaxCountValue()) { 975 Count = getInstrMaxCountValue(); 976 Overflowed = true; 977 } 978 if (Overflowed) 979 Warn(instrprof_error::counter_overflow); 980 } 981 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 982 scaleValueProfData(Kind, N, D, Warn); 983 } 984 985 // Map indirect call target name hash to name string. 986 uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, 987 InstrProfSymtab *SymTab) { 988 if (!SymTab) 989 return Value; 990 991 if (ValueKind == IPVK_IndirectCallTarget) 992 return SymTab->getFunctionHashFromAddress(Value); 993 994 if (ValueKind == IPVK_VTableTarget) 995 return SymTab->getVTableHashFromAddress(Value); 996 997 return Value; 998 } 999 1000 void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site, 1001 ArrayRef<InstrProfValueData> VData, 1002 InstrProfSymtab *ValueMap) { 1003 // Remap values. 1004 std::vector<InstrProfValueData> RemappedVD; 1005 RemappedVD.reserve(VData.size()); 1006 for (const auto &V : VData) { 1007 uint64_t NewValue = remapValue(V.Value, ValueKind, ValueMap); 1008 RemappedVD.push_back({NewValue, V.Count}); 1009 } 1010 1011 std::vector<InstrProfValueSiteRecord> &ValueSites = 1012 getOrCreateValueSitesForKind(ValueKind); 1013 assert(ValueSites.size() == Site); 1014 1015 // Add a new value site with remapped value profiling data. 1016 ValueSites.emplace_back(std::move(RemappedVD)); 1017 } 1018 1019 void TemporalProfTraceTy::createBPFunctionNodes( 1020 ArrayRef<TemporalProfTraceTy> Traces, std::vector<BPFunctionNode> &Nodes, 1021 bool RemoveOutlierUNs) { 1022 using IDT = BPFunctionNode::IDT; 1023 using UtilityNodeT = BPFunctionNode::UtilityNodeT; 1024 UtilityNodeT MaxUN = 0; 1025 DenseMap<IDT, size_t> IdToFirstTimestamp; 1026 DenseMap<IDT, UtilityNodeT> IdToFirstUN; 1027 DenseMap<IDT, SmallVector<UtilityNodeT>> IdToUNs; 1028 // TODO: We need to use the Trace.Weight field to give more weight to more 1029 // important utilities 1030 for (auto &Trace : Traces) { 1031 size_t CutoffTimestamp = 1; 1032 for (size_t Timestamp = 0; Timestamp < Trace.FunctionNameRefs.size(); 1033 Timestamp++) { 1034 IDT Id = Trace.FunctionNameRefs[Timestamp]; 1035 auto [It, WasInserted] = IdToFirstTimestamp.try_emplace(Id, Timestamp); 1036 if (!WasInserted) 1037 It->getSecond() = std::min<size_t>(It->getSecond(), Timestamp); 1038 if (Timestamp >= CutoffTimestamp) { 1039 ++MaxUN; 1040 CutoffTimestamp = 2 * Timestamp; 1041 } 1042 IdToFirstUN.try_emplace(Id, MaxUN); 1043 } 1044 for (auto &[Id, FirstUN] : IdToFirstUN) 1045 for (auto UN = FirstUN; UN <= MaxUN; ++UN) 1046 IdToUNs[Id].push_back(UN); 1047 ++MaxUN; 1048 IdToFirstUN.clear(); 1049 } 1050 1051 if (RemoveOutlierUNs) { 1052 DenseMap<UtilityNodeT, unsigned> UNFrequency; 1053 for (auto &[Id, UNs] : IdToUNs) 1054 for (auto &UN : UNs) 1055 ++UNFrequency[UN]; 1056 // Filter out utility nodes that are too infrequent or too prevalent to make 1057 // BalancedPartitioning more effective. 1058 for (auto &[Id, UNs] : IdToUNs) 1059 llvm::erase_if(UNs, [&](auto &UN) { 1060 return UNFrequency[UN] <= 1 || 2 * UNFrequency[UN] > IdToUNs.size(); 1061 }); 1062 } 1063 1064 for (auto &[Id, UNs] : IdToUNs) 1065 Nodes.emplace_back(Id, UNs); 1066 1067 // Since BalancedPartitioning is sensitive to the initial order, we explicitly 1068 // order nodes by their earliest timestamp. 1069 llvm::sort(Nodes, [&](auto &L, auto &R) { 1070 return std::make_pair(IdToFirstTimestamp[L.Id], L.Id) < 1071 std::make_pair(IdToFirstTimestamp[R.Id], R.Id); 1072 }); 1073 } 1074 1075 #define INSTR_PROF_COMMON_API_IMPL 1076 #include "llvm/ProfileData/InstrProfData.inc" 1077 1078 /*! 1079 * ValueProfRecordClosure Interface implementation for InstrProfRecord 1080 * class. These C wrappers are used as adaptors so that C++ code can be 1081 * invoked as callbacks. 1082 */ 1083 uint32_t getNumValueKindsInstrProf(const void *Record) { 1084 return reinterpret_cast<const InstrProfRecord *>(Record)->getNumValueKinds(); 1085 } 1086 1087 uint32_t getNumValueSitesInstrProf(const void *Record, uint32_t VKind) { 1088 return reinterpret_cast<const InstrProfRecord *>(Record) 1089 ->getNumValueSites(VKind); 1090 } 1091 1092 uint32_t getNumValueDataInstrProf(const void *Record, uint32_t VKind) { 1093 return reinterpret_cast<const InstrProfRecord *>(Record) 1094 ->getNumValueData(VKind); 1095 } 1096 1097 uint32_t getNumValueDataForSiteInstrProf(const void *R, uint32_t VK, 1098 uint32_t S) { 1099 const auto *IPR = reinterpret_cast<const InstrProfRecord *>(R); 1100 return IPR->getValueArrayForSite(VK, S).size(); 1101 } 1102 1103 void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst, 1104 uint32_t K, uint32_t S) { 1105 const auto *IPR = reinterpret_cast<const InstrProfRecord *>(R); 1106 llvm::copy(IPR->getValueArrayForSite(K, S), Dst); 1107 } 1108 1109 ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) { 1110 ValueProfData *VD = 1111 (ValueProfData *)(new (::operator new(TotalSizeInBytes)) ValueProfData()); 1112 memset(VD, 0, TotalSizeInBytes); 1113 return VD; 1114 } 1115 1116 static ValueProfRecordClosure InstrProfRecordClosure = { 1117 nullptr, 1118 getNumValueKindsInstrProf, 1119 getNumValueSitesInstrProf, 1120 getNumValueDataInstrProf, 1121 getNumValueDataForSiteInstrProf, 1122 nullptr, 1123 getValueForSiteInstrProf, 1124 allocValueProfDataInstrProf}; 1125 1126 // Wrapper implementation using the closure mechanism. 1127 uint32_t ValueProfData::getSize(const InstrProfRecord &Record) { 1128 auto Closure = InstrProfRecordClosure; 1129 Closure.Record = &Record; 1130 return getValueProfDataSize(&Closure); 1131 } 1132 1133 // Wrapper implementation using the closure mechanism. 1134 std::unique_ptr<ValueProfData> 1135 ValueProfData::serializeFrom(const InstrProfRecord &Record) { 1136 InstrProfRecordClosure.Record = &Record; 1137 1138 std::unique_ptr<ValueProfData> VPD( 1139 serializeValueProfDataFrom(&InstrProfRecordClosure, nullptr)); 1140 return VPD; 1141 } 1142 1143 void ValueProfRecord::deserializeTo(InstrProfRecord &Record, 1144 InstrProfSymtab *SymTab) { 1145 Record.reserveSites(Kind, NumValueSites); 1146 1147 InstrProfValueData *ValueData = getValueProfRecordValueData(this); 1148 for (uint64_t VSite = 0; VSite < NumValueSites; ++VSite) { 1149 uint8_t ValueDataCount = this->SiteCountArray[VSite]; 1150 ArrayRef<InstrProfValueData> VDs(ValueData, ValueDataCount); 1151 Record.addValueData(Kind, VSite, VDs, SymTab); 1152 ValueData += ValueDataCount; 1153 } 1154 } 1155 1156 // For writing/serializing, Old is the host endianness, and New is 1157 // byte order intended on disk. For Reading/deserialization, Old 1158 // is the on-disk source endianness, and New is the host endianness. 1159 void ValueProfRecord::swapBytes(llvm::endianness Old, llvm::endianness New) { 1160 using namespace support; 1161 1162 if (Old == New) 1163 return; 1164 1165 if (llvm::endianness::native != Old) { 1166 sys::swapByteOrder<uint32_t>(NumValueSites); 1167 sys::swapByteOrder<uint32_t>(Kind); 1168 } 1169 uint32_t ND = getValueProfRecordNumValueData(this); 1170 InstrProfValueData *VD = getValueProfRecordValueData(this); 1171 1172 // No need to swap byte array: SiteCountArrray. 1173 for (uint32_t I = 0; I < ND; I++) { 1174 sys::swapByteOrder<uint64_t>(VD[I].Value); 1175 sys::swapByteOrder<uint64_t>(VD[I].Count); 1176 } 1177 if (llvm::endianness::native == Old) { 1178 sys::swapByteOrder<uint32_t>(NumValueSites); 1179 sys::swapByteOrder<uint32_t>(Kind); 1180 } 1181 } 1182 1183 void ValueProfData::deserializeTo(InstrProfRecord &Record, 1184 InstrProfSymtab *SymTab) { 1185 if (NumValueKinds == 0) 1186 return; 1187 1188 ValueProfRecord *VR = getFirstValueProfRecord(this); 1189 for (uint32_t K = 0; K < NumValueKinds; K++) { 1190 VR->deserializeTo(Record, SymTab); 1191 VR = getValueProfRecordNext(VR); 1192 } 1193 } 1194 1195 static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) { 1196 return std::unique_ptr<ValueProfData>(new (::operator new(TotalSize)) 1197 ValueProfData()); 1198 } 1199 1200 Error ValueProfData::checkIntegrity() { 1201 if (NumValueKinds > IPVK_Last + 1) 1202 return make_error<InstrProfError>( 1203 instrprof_error::malformed, "number of value profile kinds is invalid"); 1204 // Total size needs to be multiple of quadword size. 1205 if (TotalSize % sizeof(uint64_t)) 1206 return make_error<InstrProfError>( 1207 instrprof_error::malformed, "total size is not multiples of quardword"); 1208 1209 ValueProfRecord *VR = getFirstValueProfRecord(this); 1210 for (uint32_t K = 0; K < this->NumValueKinds; K++) { 1211 if (VR->Kind > IPVK_Last) 1212 return make_error<InstrProfError>(instrprof_error::malformed, 1213 "value kind is invalid"); 1214 VR = getValueProfRecordNext(VR); 1215 if ((char *)VR - (char *)this > (ptrdiff_t)TotalSize) 1216 return make_error<InstrProfError>( 1217 instrprof_error::malformed, 1218 "value profile address is greater than total size"); 1219 } 1220 return Error::success(); 1221 } 1222 1223 Expected<std::unique_ptr<ValueProfData>> 1224 ValueProfData::getValueProfData(const unsigned char *D, 1225 const unsigned char *const BufferEnd, 1226 llvm::endianness Endianness) { 1227 using namespace support; 1228 1229 if (D + sizeof(ValueProfData) > BufferEnd) 1230 return make_error<InstrProfError>(instrprof_error::truncated); 1231 1232 const unsigned char *Header = D; 1233 uint32_t TotalSize = endian::readNext<uint32_t>(Header, Endianness); 1234 1235 if (D + TotalSize > BufferEnd) 1236 return make_error<InstrProfError>(instrprof_error::too_large); 1237 1238 std::unique_ptr<ValueProfData> VPD = allocValueProfData(TotalSize); 1239 memcpy(VPD.get(), D, TotalSize); 1240 // Byte swap. 1241 VPD->swapBytesToHost(Endianness); 1242 1243 Error E = VPD->checkIntegrity(); 1244 if (E) 1245 return std::move(E); 1246 1247 return std::move(VPD); 1248 } 1249 1250 void ValueProfData::swapBytesToHost(llvm::endianness Endianness) { 1251 using namespace support; 1252 1253 if (Endianness == llvm::endianness::native) 1254 return; 1255 1256 sys::swapByteOrder<uint32_t>(TotalSize); 1257 sys::swapByteOrder<uint32_t>(NumValueKinds); 1258 1259 ValueProfRecord *VR = getFirstValueProfRecord(this); 1260 for (uint32_t K = 0; K < NumValueKinds; K++) { 1261 VR->swapBytes(Endianness, llvm::endianness::native); 1262 VR = getValueProfRecordNext(VR); 1263 } 1264 } 1265 1266 void ValueProfData::swapBytesFromHost(llvm::endianness Endianness) { 1267 using namespace support; 1268 1269 if (Endianness == llvm::endianness::native) 1270 return; 1271 1272 ValueProfRecord *VR = getFirstValueProfRecord(this); 1273 for (uint32_t K = 0; K < NumValueKinds; K++) { 1274 ValueProfRecord *NVR = getValueProfRecordNext(VR); 1275 VR->swapBytes(llvm::endianness::native, Endianness); 1276 VR = NVR; 1277 } 1278 sys::swapByteOrder<uint32_t>(TotalSize); 1279 sys::swapByteOrder<uint32_t>(NumValueKinds); 1280 } 1281 1282 void annotateValueSite(Module &M, Instruction &Inst, 1283 const InstrProfRecord &InstrProfR, 1284 InstrProfValueKind ValueKind, uint32_t SiteIdx, 1285 uint32_t MaxMDCount) { 1286 auto VDs = InstrProfR.getValueArrayForSite(ValueKind, SiteIdx); 1287 if (VDs.empty()) 1288 return; 1289 uint64_t Sum = 0; 1290 for (const InstrProfValueData &V : VDs) 1291 Sum = SaturatingAdd(Sum, V.Count); 1292 annotateValueSite(M, Inst, VDs, Sum, ValueKind, MaxMDCount); 1293 } 1294 1295 void annotateValueSite(Module &M, Instruction &Inst, 1296 ArrayRef<InstrProfValueData> VDs, 1297 uint64_t Sum, InstrProfValueKind ValueKind, 1298 uint32_t MaxMDCount) { 1299 if (VDs.empty()) 1300 return; 1301 LLVMContext &Ctx = M.getContext(); 1302 MDBuilder MDHelper(Ctx); 1303 SmallVector<Metadata *, 3> Vals; 1304 // Tag 1305 Vals.push_back(MDHelper.createString("VP")); 1306 // Value Kind 1307 Vals.push_back(MDHelper.createConstant( 1308 ConstantInt::get(Type::getInt32Ty(Ctx), ValueKind))); 1309 // Total Count 1310 Vals.push_back( 1311 MDHelper.createConstant(ConstantInt::get(Type::getInt64Ty(Ctx), Sum))); 1312 1313 // Value Profile Data 1314 uint32_t MDCount = MaxMDCount; 1315 for (const auto &VD : VDs) { 1316 Vals.push_back(MDHelper.createConstant( 1317 ConstantInt::get(Type::getInt64Ty(Ctx), VD.Value))); 1318 Vals.push_back(MDHelper.createConstant( 1319 ConstantInt::get(Type::getInt64Ty(Ctx), VD.Count))); 1320 if (--MDCount == 0) 1321 break; 1322 } 1323 Inst.setMetadata(LLVMContext::MD_prof, MDNode::get(Ctx, Vals)); 1324 } 1325 1326 MDNode *mayHaveValueProfileOfKind(const Instruction &Inst, 1327 InstrProfValueKind ValueKind) { 1328 MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof); 1329 if (!MD) 1330 return nullptr; 1331 1332 if (MD->getNumOperands() < 5) 1333 return nullptr; 1334 1335 MDString *Tag = cast<MDString>(MD->getOperand(0)); 1336 if (!Tag || Tag->getString() != "VP") 1337 return nullptr; 1338 1339 // Now check kind: 1340 ConstantInt *KindInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1)); 1341 if (!KindInt) 1342 return nullptr; 1343 if (KindInt->getZExtValue() != ValueKind) 1344 return nullptr; 1345 1346 return MD; 1347 } 1348 1349 SmallVector<InstrProfValueData, 4> 1350 getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, 1351 uint32_t MaxNumValueData, uint64_t &TotalC, 1352 bool GetNoICPValue) { 1353 // Four inline elements seem to work well in practice. With MaxNumValueData, 1354 // this array won't grow very big anyway. 1355 SmallVector<InstrProfValueData, 4> ValueData; 1356 MDNode *MD = mayHaveValueProfileOfKind(Inst, ValueKind); 1357 if (!MD) 1358 return ValueData; 1359 const unsigned NOps = MD->getNumOperands(); 1360 // Get total count 1361 ConstantInt *TotalCInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(2)); 1362 if (!TotalCInt) 1363 return ValueData; 1364 TotalC = TotalCInt->getZExtValue(); 1365 1366 ValueData.reserve((NOps - 3) / 2); 1367 for (unsigned I = 3; I < NOps; I += 2) { 1368 if (ValueData.size() >= MaxNumValueData) 1369 break; 1370 ConstantInt *Value = mdconst::dyn_extract<ConstantInt>(MD->getOperand(I)); 1371 ConstantInt *Count = 1372 mdconst::dyn_extract<ConstantInt>(MD->getOperand(I + 1)); 1373 if (!Value || !Count) { 1374 ValueData.clear(); 1375 return ValueData; 1376 } 1377 uint64_t CntValue = Count->getZExtValue(); 1378 if (!GetNoICPValue && (CntValue == NOMORE_ICP_MAGICNUM)) 1379 continue; 1380 InstrProfValueData V; 1381 V.Value = Value->getZExtValue(); 1382 V.Count = CntValue; 1383 ValueData.push_back(V); 1384 } 1385 return ValueData; 1386 } 1387 1388 MDNode *getPGOFuncNameMetadata(const Function &F) { 1389 return F.getMetadata(getPGOFuncNameMetadataName()); 1390 } 1391 1392 static void createPGONameMetadata(GlobalObject &GO, StringRef MetadataName, 1393 StringRef PGOName) { 1394 // Only for internal linkage functions or global variables. The name is not 1395 // the same as PGO name for these global objects. 1396 if (GO.getName() == PGOName) 1397 return; 1398 1399 // Don't create duplicated metadata. 1400 if (GO.getMetadata(MetadataName)) 1401 return; 1402 1403 LLVMContext &C = GO.getContext(); 1404 MDNode *N = MDNode::get(C, MDString::get(C, PGOName)); 1405 GO.setMetadata(MetadataName, N); 1406 } 1407 1408 void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) { 1409 return createPGONameMetadata(F, getPGOFuncNameMetadataName(), PGOFuncName); 1410 } 1411 1412 void createPGONameMetadata(GlobalObject &GO, StringRef PGOName) { 1413 return createPGONameMetadata(GO, getPGONameMetadataName(), PGOName); 1414 } 1415 1416 bool needsComdatForCounter(const GlobalObject &GO, const Module &M) { 1417 if (GO.hasComdat()) 1418 return true; 1419 1420 if (!Triple(M.getTargetTriple()).supportsCOMDAT()) 1421 return false; 1422 1423 // See createPGOFuncNameVar for more details. To avoid link errors, profile 1424 // counters for function with available_externally linkage needs to be changed 1425 // to linkonce linkage. On ELF based systems, this leads to weak symbols to be 1426 // created. Without using comdat, duplicate entries won't be removed by the 1427 // linker leading to increased data segement size and raw profile size. Even 1428 // worse, since the referenced counter from profile per-function data object 1429 // will be resolved to the common strong definition, the profile counts for 1430 // available_externally functions will end up being duplicated in raw profile 1431 // data. This can result in distorted profile as the counts of those dups 1432 // will be accumulated by the profile merger. 1433 GlobalValue::LinkageTypes Linkage = GO.getLinkage(); 1434 if (Linkage != GlobalValue::ExternalWeakLinkage && 1435 Linkage != GlobalValue::AvailableExternallyLinkage) 1436 return false; 1437 1438 return true; 1439 } 1440 1441 // Check if INSTR_PROF_RAW_VERSION_VAR is defined. 1442 bool isIRPGOFlagSet(const Module *M) { 1443 const GlobalVariable *IRInstrVar = 1444 M->getNamedGlobal(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); 1445 if (!IRInstrVar || IRInstrVar->hasLocalLinkage()) 1446 return false; 1447 1448 // For CSPGO+LTO, this variable might be marked as non-prevailing and we only 1449 // have the decl. 1450 if (IRInstrVar->isDeclaration()) 1451 return true; 1452 1453 // Check if the flag is set. 1454 if (!IRInstrVar->hasInitializer()) 1455 return false; 1456 1457 auto *InitVal = dyn_cast_or_null<ConstantInt>(IRInstrVar->getInitializer()); 1458 if (!InitVal) 1459 return false; 1460 return (InitVal->getZExtValue() & VARIANT_MASK_IR_PROF) != 0; 1461 } 1462 1463 // Check if we can safely rename this Comdat function. 1464 bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) { 1465 if (F.getName().empty()) 1466 return false; 1467 if (!needsComdatForCounter(F, *(F.getParent()))) 1468 return false; 1469 // Unsafe to rename the address-taken function (which can be used in 1470 // function comparison). 1471 if (CheckAddressTaken && F.hasAddressTaken()) 1472 return false; 1473 // Only safe to do if this function may be discarded if it is not used 1474 // in the compilation unit. 1475 if (!GlobalValue::isDiscardableIfUnused(F.getLinkage())) 1476 return false; 1477 1478 // For AvailableExternallyLinkage functions. 1479 if (!F.hasComdat()) { 1480 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage); 1481 return true; 1482 } 1483 return true; 1484 } 1485 1486 // Create the variable for the profile file name. 1487 void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) { 1488 if (InstrProfileOutput.empty()) 1489 return; 1490 Constant *ProfileNameConst = 1491 ConstantDataArray::getString(M.getContext(), InstrProfileOutput, true); 1492 GlobalVariable *ProfileNameVar = new GlobalVariable( 1493 M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage, 1494 ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)); 1495 ProfileNameVar->setVisibility(GlobalValue::HiddenVisibility); 1496 Triple TT(M.getTargetTriple()); 1497 if (TT.supportsCOMDAT()) { 1498 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); 1499 ProfileNameVar->setComdat(M.getOrInsertComdat( 1500 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)))); 1501 } 1502 } 1503 1504 Error OverlapStats::accumulateCounts(const std::string &BaseFilename, 1505 const std::string &TestFilename, 1506 bool IsCS) { 1507 auto GetProfileSum = [IsCS](const std::string &Filename, 1508 CountSumOrPercent &Sum) -> Error { 1509 // This function is only used from llvm-profdata that doesn't use any kind 1510 // of VFS. Just create a default RealFileSystem to read profiles. 1511 auto FS = vfs::getRealFileSystem(); 1512 auto ReaderOrErr = InstrProfReader::create(Filename, *FS); 1513 if (Error E = ReaderOrErr.takeError()) { 1514 return E; 1515 } 1516 auto Reader = std::move(ReaderOrErr.get()); 1517 Reader->accumulateCounts(Sum, IsCS); 1518 return Error::success(); 1519 }; 1520 auto Ret = GetProfileSum(BaseFilename, Base); 1521 if (Ret) 1522 return Ret; 1523 Ret = GetProfileSum(TestFilename, Test); 1524 if (Ret) 1525 return Ret; 1526 this->BaseFilename = &BaseFilename; 1527 this->TestFilename = &TestFilename; 1528 Valid = true; 1529 return Error::success(); 1530 } 1531 1532 void OverlapStats::addOneMismatch(const CountSumOrPercent &MismatchFunc) { 1533 Mismatch.NumEntries += 1; 1534 Mismatch.CountSum += MismatchFunc.CountSum / Test.CountSum; 1535 for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) { 1536 if (Test.ValueCounts[I] >= 1.0f) 1537 Mismatch.ValueCounts[I] += 1538 MismatchFunc.ValueCounts[I] / Test.ValueCounts[I]; 1539 } 1540 } 1541 1542 void OverlapStats::addOneUnique(const CountSumOrPercent &UniqueFunc) { 1543 Unique.NumEntries += 1; 1544 Unique.CountSum += UniqueFunc.CountSum / Test.CountSum; 1545 for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) { 1546 if (Test.ValueCounts[I] >= 1.0f) 1547 Unique.ValueCounts[I] += UniqueFunc.ValueCounts[I] / Test.ValueCounts[I]; 1548 } 1549 } 1550 1551 void OverlapStats::dump(raw_fd_ostream &OS) const { 1552 if (!Valid) 1553 return; 1554 1555 const char *EntryName = 1556 (Level == ProgramLevel ? "functions" : "edge counters"); 1557 if (Level == ProgramLevel) { 1558 OS << "Profile overlap infomation for base_profile: " << *BaseFilename 1559 << " and test_profile: " << *TestFilename << "\nProgram level:\n"; 1560 } else { 1561 OS << "Function level:\n" 1562 << " Function: " << FuncName << " (Hash=" << FuncHash << ")\n"; 1563 } 1564 1565 OS << " # of " << EntryName << " overlap: " << Overlap.NumEntries << "\n"; 1566 if (Mismatch.NumEntries) 1567 OS << " # of " << EntryName << " mismatch: " << Mismatch.NumEntries 1568 << "\n"; 1569 if (Unique.NumEntries) 1570 OS << " # of " << EntryName 1571 << " only in test_profile: " << Unique.NumEntries << "\n"; 1572 1573 OS << " Edge profile overlap: " << format("%.3f%%", Overlap.CountSum * 100) 1574 << "\n"; 1575 if (Mismatch.NumEntries) 1576 OS << " Mismatched count percentage (Edge): " 1577 << format("%.3f%%", Mismatch.CountSum * 100) << "\n"; 1578 if (Unique.NumEntries) 1579 OS << " Percentage of Edge profile only in test_profile: " 1580 << format("%.3f%%", Unique.CountSum * 100) << "\n"; 1581 OS << " Edge profile base count sum: " << format("%.0f", Base.CountSum) 1582 << "\n" 1583 << " Edge profile test count sum: " << format("%.0f", Test.CountSum) 1584 << "\n"; 1585 1586 for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) { 1587 if (Base.ValueCounts[I] < 1.0f && Test.ValueCounts[I] < 1.0f) 1588 continue; 1589 char ProfileKindName[20] = {0}; 1590 switch (I) { 1591 case IPVK_IndirectCallTarget: 1592 strncpy(ProfileKindName, "IndirectCall", 19); 1593 break; 1594 case IPVK_MemOPSize: 1595 strncpy(ProfileKindName, "MemOP", 19); 1596 break; 1597 case IPVK_VTableTarget: 1598 strncpy(ProfileKindName, "VTable", 19); 1599 break; 1600 default: 1601 snprintf(ProfileKindName, 19, "VP[%d]", I); 1602 break; 1603 } 1604 OS << " " << ProfileKindName 1605 << " profile overlap: " << format("%.3f%%", Overlap.ValueCounts[I] * 100) 1606 << "\n"; 1607 if (Mismatch.NumEntries) 1608 OS << " Mismatched count percentage (" << ProfileKindName 1609 << "): " << format("%.3f%%", Mismatch.ValueCounts[I] * 100) << "\n"; 1610 if (Unique.NumEntries) 1611 OS << " Percentage of " << ProfileKindName 1612 << " profile only in test_profile: " 1613 << format("%.3f%%", Unique.ValueCounts[I] * 100) << "\n"; 1614 OS << " " << ProfileKindName 1615 << " profile base count sum: " << format("%.0f", Base.ValueCounts[I]) 1616 << "\n" 1617 << " " << ProfileKindName 1618 << " profile test count sum: " << format("%.0f", Test.ValueCounts[I]) 1619 << "\n"; 1620 } 1621 } 1622 1623 namespace IndexedInstrProf { 1624 Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) { 1625 using namespace support; 1626 static_assert(std::is_standard_layout_v<Header>, 1627 "Use standard layout for Header for simplicity"); 1628 Header H; 1629 1630 H.Magic = endian::readNext<uint64_t, llvm::endianness::little>(Buffer); 1631 // Check the magic number. 1632 if (H.Magic != IndexedInstrProf::Magic) 1633 return make_error<InstrProfError>(instrprof_error::bad_magic); 1634 1635 // Read the version. 1636 H.Version = endian::readNext<uint64_t, llvm::endianness::little>(Buffer); 1637 if (H.getIndexedProfileVersion() > 1638 IndexedInstrProf::ProfVersion::CurrentVersion) 1639 return make_error<InstrProfError>(instrprof_error::unsupported_version); 1640 1641 static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12, 1642 "Please update the reader as needed when a new field is added " 1643 "or when indexed profile version gets bumped."); 1644 1645 Buffer += sizeof(uint64_t); // Skip Header.Unused field. 1646 H.HashType = endian::readNext<uint64_t, llvm::endianness::little>(Buffer); 1647 H.HashOffset = endian::readNext<uint64_t, llvm::endianness::little>(Buffer); 1648 if (H.getIndexedProfileVersion() >= 8) 1649 H.MemProfOffset = 1650 endian::readNext<uint64_t, llvm::endianness::little>(Buffer); 1651 if (H.getIndexedProfileVersion() >= 9) 1652 H.BinaryIdOffset = 1653 endian::readNext<uint64_t, llvm::endianness::little>(Buffer); 1654 // Version 11 is handled by this condition. 1655 if (H.getIndexedProfileVersion() >= 10) 1656 H.TemporalProfTracesOffset = 1657 endian::readNext<uint64_t, llvm::endianness::little>(Buffer); 1658 if (H.getIndexedProfileVersion() >= 12) 1659 H.VTableNamesOffset = 1660 endian::readNext<uint64_t, llvm::endianness::little>(Buffer); 1661 return H; 1662 } 1663 1664 uint64_t Header::getIndexedProfileVersion() const { 1665 return GET_VERSION(Version); 1666 } 1667 1668 size_t Header::size() const { 1669 switch (getIndexedProfileVersion()) { 1670 // To retain backward compatibility, new fields must be appended to the end 1671 // of the header, and byte offset of existing fields shouldn't change when 1672 // indexed profile version gets incremented. 1673 static_assert( 1674 IndexedInstrProf::ProfVersion::CurrentVersion == Version12, 1675 "Please update the size computation below if a new field has " 1676 "been added to the header; for a version bump without new " 1677 "fields, add a case statement to fall through to the latest version."); 1678 case 12ull: 1679 return 72; 1680 case 11ull: 1681 [[fallthrough]]; 1682 case 10ull: 1683 return 64; 1684 case 9ull: 1685 return 56; 1686 case 8ull: 1687 return 48; 1688 default: // Version7 (when the backwards compatible header was introduced). 1689 return 40; 1690 } 1691 } 1692 1693 } // namespace IndexedInstrProf 1694 1695 } // end namespace llvm 1696