1 //===- MemProfiler.cpp - memory allocation and access profiler ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of MemProfiler. Memory accesses are instrumented 10 // to increment the access count held in a shadow memory location, or 11 // alternatively to call into the runtime. Memory intrinsic calls (memmove, 12 // memcpy, memset) are changed to call the memory profiling runtime version 13 // instead. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Instrumentation/MemProfiler.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Analysis/MemoryBuiltins.h" 22 #include "llvm/Analysis/MemoryProfileInfo.h" 23 #include "llvm/Analysis/ValueTracking.h" 24 #include "llvm/IR/Constant.h" 25 #include "llvm/IR/DataLayout.h" 26 #include "llvm/IR/DiagnosticInfo.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/GlobalValue.h" 29 #include "llvm/IR/IRBuilder.h" 30 #include "llvm/IR/Instruction.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Value.h" 35 #include "llvm/ProfileData/InstrProf.h" 36 #include "llvm/ProfileData/InstrProfReader.h" 37 #include "llvm/Support/BLAKE3.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/HashBuilder.h" 41 #include "llvm/Support/VirtualFileSystem.h" 42 #include "llvm/TargetParser/Triple.h" 43 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 44 #include "llvm/Transforms/Utils/ModuleUtils.h" 45 #include <map> 46 #include <set> 47 48 using namespace llvm; 49 using namespace llvm::memprof; 50 51 #define DEBUG_TYPE "memprof" 52 53 namespace llvm { 54 extern cl::opt<bool> PGOWarnMissing; 55 extern cl::opt<bool> NoPGOWarnMismatch; 56 extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; 57 } // namespace llvm 58 59 constexpr int LLVM_MEM_PROFILER_VERSION = 1; 60 61 // Size of memory mapped to a single shadow location. 62 constexpr uint64_t DefaultMemGranularity = 64; 63 64 // Scale from granularity down to shadow size. 65 constexpr uint64_t DefaultShadowScale = 3; 66 67 constexpr char MemProfModuleCtorName[] = "memprof.module_ctor"; 68 constexpr uint64_t MemProfCtorAndDtorPriority = 1; 69 // On Emscripten, the system needs more than one priorities for constructors. 70 constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50; 71 constexpr char MemProfInitName[] = "__memprof_init"; 72 constexpr char MemProfVersionCheckNamePrefix[] = 73 "__memprof_version_mismatch_check_v"; 74 75 constexpr char MemProfShadowMemoryDynamicAddress[] = 76 "__memprof_shadow_memory_dynamic_address"; 77 78 constexpr char MemProfFilenameVar[] = "__memprof_profile_filename"; 79 80 constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram"; 81 82 // Command-line flags. 83 84 static cl::opt<bool> ClInsertVersionCheck( 85 "memprof-guard-against-version-mismatch", 86 cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden, 87 cl::init(true)); 88 89 // This flag may need to be replaced with -f[no-]memprof-reads. 90 static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads", 91 cl::desc("instrument read instructions"), 92 cl::Hidden, cl::init(true)); 93 94 static cl::opt<bool> 95 ClInstrumentWrites("memprof-instrument-writes", 96 cl::desc("instrument write instructions"), cl::Hidden, 97 cl::init(true)); 98 99 static cl::opt<bool> ClInstrumentAtomics( 100 "memprof-instrument-atomics", 101 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, 102 cl::init(true)); 103 104 static cl::opt<bool> ClUseCalls( 105 "memprof-use-callbacks", 106 cl::desc("Use callbacks instead of inline instrumentation sequences."), 107 cl::Hidden, cl::init(false)); 108 109 static cl::opt<std::string> 110 ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix", 111 cl::desc("Prefix for memory access callbacks"), 112 cl::Hidden, cl::init("__memprof_")); 113 114 // These flags allow to change the shadow mapping. 115 // The shadow mapping looks like 116 // Shadow = ((Mem & mask) >> scale) + offset 117 118 static cl::opt<int> ClMappingScale("memprof-mapping-scale", 119 cl::desc("scale of memprof shadow mapping"), 120 cl::Hidden, cl::init(DefaultShadowScale)); 121 122 static cl::opt<int> 123 ClMappingGranularity("memprof-mapping-granularity", 124 cl::desc("granularity of memprof shadow mapping"), 125 cl::Hidden, cl::init(DefaultMemGranularity)); 126 127 static cl::opt<bool> ClStack("memprof-instrument-stack", 128 cl::desc("Instrument scalar stack variables"), 129 cl::Hidden, cl::init(false)); 130 131 // Debug flags. 132 133 static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden, 134 cl::init(0)); 135 136 static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden, 137 cl::desc("Debug func")); 138 139 static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"), 140 cl::Hidden, cl::init(-1)); 141 142 static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"), 143 cl::Hidden, cl::init(-1)); 144 145 // By default disable matching of allocation profiles onto operator new that 146 // already explicitly pass a hot/cold hint, since we don't currently 147 // override these hints anyway. 148 static cl::opt<bool> ClMemProfMatchHotColdNew( 149 "memprof-match-hot-cold-new", 150 cl::desc( 151 "Match allocation profiles onto existing hot/cold operator new calls"), 152 cl::Hidden, cl::init(false)); 153 154 static cl::opt<bool> ClHistogram("memprof-histogram", 155 cl::desc("Collect access count histograms"), 156 cl::Hidden, cl::init(false)); 157 158 static cl::opt<bool> 159 ClPrintMemProfMatchInfo("memprof-print-match-info", 160 cl::desc("Print matching stats for each allocation " 161 "context in this module's profiles"), 162 cl::Hidden, cl::init(false)); 163 164 extern cl::opt<bool> MemProfReportHintedSizes; 165 166 // Instrumentation statistics 167 STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); 168 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); 169 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); 170 STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); 171 172 // Matching statistics 173 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); 174 STATISTIC(NumOfMemProfMismatch, 175 "Number of functions having mismatched memory profile hash."); 176 STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile."); 177 STATISTIC(NumOfMemProfAllocContextProfiles, 178 "Number of alloc contexts in memory profile."); 179 STATISTIC(NumOfMemProfCallSiteProfiles, 180 "Number of callsites in memory profile."); 181 STATISTIC(NumOfMemProfMatchedAllocContexts, 182 "Number of matched memory profile alloc contexts."); 183 STATISTIC(NumOfMemProfMatchedAllocs, 184 "Number of matched memory profile allocs."); 185 STATISTIC(NumOfMemProfMatchedCallSites, 186 "Number of matched memory profile callsites."); 187 188 namespace { 189 190 /// This struct defines the shadow mapping using the rule: 191 /// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset. 192 struct ShadowMapping { 193 ShadowMapping() { 194 Scale = ClMappingScale; 195 Granularity = ClMappingGranularity; 196 Mask = ~(Granularity - 1); 197 } 198 199 int Scale; 200 int Granularity; 201 uint64_t Mask; // Computed as ~(Granularity-1) 202 }; 203 204 static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { 205 return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority 206 : MemProfCtorAndDtorPriority; 207 } 208 209 struct InterestingMemoryAccess { 210 Value *Addr = nullptr; 211 bool IsWrite; 212 Type *AccessTy; 213 Value *MaybeMask = nullptr; 214 }; 215 216 /// Instrument the code in module to profile memory accesses. 217 class MemProfiler { 218 public: 219 MemProfiler(Module &M) { 220 C = &(M.getContext()); 221 LongSize = M.getDataLayout().getPointerSizeInBits(); 222 IntptrTy = Type::getIntNTy(*C, LongSize); 223 PtrTy = PointerType::getUnqual(*C); 224 } 225 226 /// If it is an interesting memory access, populate information 227 /// about the access and return a InterestingMemoryAccess struct. 228 /// Otherwise return std::nullopt. 229 std::optional<InterestingMemoryAccess> 230 isInterestingMemoryAccess(Instruction *I) const; 231 232 void instrumentMop(Instruction *I, const DataLayout &DL, 233 InterestingMemoryAccess &Access); 234 void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, 235 Value *Addr, bool IsWrite); 236 void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 237 Instruction *I, Value *Addr, Type *AccessTy, 238 bool IsWrite); 239 void instrumentMemIntrinsic(MemIntrinsic *MI); 240 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); 241 bool instrumentFunction(Function &F); 242 bool maybeInsertMemProfInitAtFunctionEntry(Function &F); 243 bool insertDynamicShadowAtFunctionEntry(Function &F); 244 245 private: 246 void initializeCallbacks(Module &M); 247 248 LLVMContext *C; 249 int LongSize; 250 Type *IntptrTy; 251 PointerType *PtrTy; 252 ShadowMapping Mapping; 253 254 // These arrays is indexed by AccessIsWrite 255 FunctionCallee MemProfMemoryAccessCallback[2]; 256 257 FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset; 258 Value *DynamicShadowOffset = nullptr; 259 }; 260 261 class ModuleMemProfiler { 262 public: 263 ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } 264 265 bool instrumentModule(Module &); 266 267 private: 268 Triple TargetTriple; 269 ShadowMapping Mapping; 270 Function *MemProfCtorFunction = nullptr; 271 }; 272 273 } // end anonymous namespace 274 275 MemProfilerPass::MemProfilerPass() = default; 276 277 PreservedAnalyses MemProfilerPass::run(Function &F, 278 AnalysisManager<Function> &AM) { 279 Module &M = *F.getParent(); 280 MemProfiler Profiler(M); 281 if (Profiler.instrumentFunction(F)) 282 return PreservedAnalyses::none(); 283 return PreservedAnalyses::all(); 284 } 285 286 ModuleMemProfilerPass::ModuleMemProfilerPass() = default; 287 288 PreservedAnalyses ModuleMemProfilerPass::run(Module &M, 289 AnalysisManager<Module> &AM) { 290 291 assert((!ClHistogram || (ClHistogram && ClUseCalls)) && 292 "Cannot use -memprof-histogram without Callbacks. Set " 293 "memprof-use-callbacks"); 294 295 ModuleMemProfiler Profiler(M); 296 if (Profiler.instrumentModule(M)) 297 return PreservedAnalyses::none(); 298 return PreservedAnalyses::all(); 299 } 300 301 Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { 302 // (Shadow & mask) >> scale 303 Shadow = IRB.CreateAnd(Shadow, Mapping.Mask); 304 Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); 305 // (Shadow >> scale) | offset 306 assert(DynamicShadowOffset); 307 return IRB.CreateAdd(Shadow, DynamicShadowOffset); 308 } 309 310 // Instrument memset/memmove/memcpy 311 void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { 312 IRBuilder<> IRB(MI); 313 if (isa<MemTransferInst>(MI)) { 314 IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy, 315 {MI->getOperand(0), MI->getOperand(1), 316 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 317 } else if (isa<MemSetInst>(MI)) { 318 IRB.CreateCall( 319 MemProfMemset, 320 {MI->getOperand(0), 321 IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), 322 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 323 } 324 MI->eraseFromParent(); 325 } 326 327 std::optional<InterestingMemoryAccess> 328 MemProfiler::isInterestingMemoryAccess(Instruction *I) const { 329 // Do not instrument the load fetching the dynamic shadow address. 330 if (DynamicShadowOffset == I) 331 return std::nullopt; 332 333 InterestingMemoryAccess Access; 334 335 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 336 if (!ClInstrumentReads) 337 return std::nullopt; 338 Access.IsWrite = false; 339 Access.AccessTy = LI->getType(); 340 Access.Addr = LI->getPointerOperand(); 341 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 342 if (!ClInstrumentWrites) 343 return std::nullopt; 344 Access.IsWrite = true; 345 Access.AccessTy = SI->getValueOperand()->getType(); 346 Access.Addr = SI->getPointerOperand(); 347 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 348 if (!ClInstrumentAtomics) 349 return std::nullopt; 350 Access.IsWrite = true; 351 Access.AccessTy = RMW->getValOperand()->getType(); 352 Access.Addr = RMW->getPointerOperand(); 353 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { 354 if (!ClInstrumentAtomics) 355 return std::nullopt; 356 Access.IsWrite = true; 357 Access.AccessTy = XCHG->getCompareOperand()->getType(); 358 Access.Addr = XCHG->getPointerOperand(); 359 } else if (auto *CI = dyn_cast<CallInst>(I)) { 360 auto *F = CI->getCalledFunction(); 361 if (F && (F->getIntrinsicID() == Intrinsic::masked_load || 362 F->getIntrinsicID() == Intrinsic::masked_store)) { 363 unsigned OpOffset = 0; 364 if (F->getIntrinsicID() == Intrinsic::masked_store) { 365 if (!ClInstrumentWrites) 366 return std::nullopt; 367 // Masked store has an initial operand for the value. 368 OpOffset = 1; 369 Access.AccessTy = CI->getArgOperand(0)->getType(); 370 Access.IsWrite = true; 371 } else { 372 if (!ClInstrumentReads) 373 return std::nullopt; 374 Access.AccessTy = CI->getType(); 375 Access.IsWrite = false; 376 } 377 378 auto *BasePtr = CI->getOperand(0 + OpOffset); 379 Access.MaybeMask = CI->getOperand(2 + OpOffset); 380 Access.Addr = BasePtr; 381 } 382 } 383 384 if (!Access.Addr) 385 return std::nullopt; 386 387 // Do not instrument accesses from different address spaces; we cannot deal 388 // with them. 389 Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType()); 390 if (PtrTy->getPointerAddressSpace() != 0) 391 return std::nullopt; 392 393 // Ignore swifterror addresses. 394 // swifterror memory addresses are mem2reg promoted by instruction 395 // selection. As such they cannot have regular uses like an instrumentation 396 // function and it makes no sense to track them as memory. 397 if (Access.Addr->isSwiftError()) 398 return std::nullopt; 399 400 // Peel off GEPs and BitCasts. 401 auto *Addr = Access.Addr->stripInBoundsOffsets(); 402 403 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 404 // Do not instrument PGO counter updates. 405 if (GV->hasSection()) { 406 StringRef SectionName = GV->getSection(); 407 // Check if the global is in the PGO counters section. 408 auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat(); 409 if (SectionName.ends_with( 410 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) 411 return std::nullopt; 412 } 413 414 // Do not instrument accesses to LLVM internal variables. 415 if (GV->getName().starts_with("__llvm")) 416 return std::nullopt; 417 } 418 419 return Access; 420 } 421 422 void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 423 Instruction *I, Value *Addr, 424 Type *AccessTy, bool IsWrite) { 425 auto *VTy = cast<FixedVectorType>(AccessTy); 426 unsigned Num = VTy->getNumElements(); 427 auto *Zero = ConstantInt::get(IntptrTy, 0); 428 for (unsigned Idx = 0; Idx < Num; ++Idx) { 429 Value *InstrumentedAddress = nullptr; 430 Instruction *InsertBefore = I; 431 if (auto *Vector = dyn_cast<ConstantVector>(Mask)) { 432 // dyn_cast as we might get UndefValue 433 if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) { 434 if (Masked->isZero()) 435 // Mask is constant false, so no instrumentation needed. 436 continue; 437 // If we have a true or undef value, fall through to instrumentAddress. 438 // with InsertBefore == I 439 } 440 } else { 441 IRBuilder<> IRB(I); 442 Value *MaskElem = IRB.CreateExtractElement(Mask, Idx); 443 Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false); 444 InsertBefore = ThenTerm; 445 } 446 447 IRBuilder<> IRB(InsertBefore); 448 InstrumentedAddress = 449 IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); 450 instrumentAddress(I, InsertBefore, InstrumentedAddress, IsWrite); 451 } 452 } 453 454 void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL, 455 InterestingMemoryAccess &Access) { 456 // Skip instrumentation of stack accesses unless requested. 457 if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) { 458 if (Access.IsWrite) 459 ++NumSkippedStackWrites; 460 else 461 ++NumSkippedStackReads; 462 return; 463 } 464 465 if (Access.IsWrite) 466 NumInstrumentedWrites++; 467 else 468 NumInstrumentedReads++; 469 470 if (Access.MaybeMask) { 471 instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr, 472 Access.AccessTy, Access.IsWrite); 473 } else { 474 // Since the access counts will be accumulated across the entire allocation, 475 // we only update the shadow access count for the first location and thus 476 // don't need to worry about alignment and type size. 477 instrumentAddress(I, I, Access.Addr, Access.IsWrite); 478 } 479 } 480 481 void MemProfiler::instrumentAddress(Instruction *OrigIns, 482 Instruction *InsertBefore, Value *Addr, 483 bool IsWrite) { 484 IRBuilder<> IRB(InsertBefore); 485 Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); 486 487 if (ClUseCalls) { 488 IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong); 489 return; 490 } 491 492 // Create an inline sequence to compute shadow location, and increment the 493 // value by one. 494 Type *ShadowTy = Type::getInt64Ty(*C); 495 Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); 496 Value *ShadowPtr = memToShadow(AddrLong, IRB); 497 Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy); 498 Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr); 499 Value *Inc = ConstantInt::get(Type::getInt64Ty(*C), 1); 500 ShadowValue = IRB.CreateAdd(ShadowValue, Inc); 501 IRB.CreateStore(ShadowValue, ShadowAddr); 502 } 503 504 // Create the variable for the profile file name. 505 void createProfileFileNameVar(Module &M) { 506 const MDString *MemProfFilename = 507 dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename")); 508 if (!MemProfFilename) 509 return; 510 assert(!MemProfFilename->getString().empty() && 511 "Unexpected MemProfProfileFilename metadata with empty string"); 512 Constant *ProfileNameConst = ConstantDataArray::getString( 513 M.getContext(), MemProfFilename->getString(), true); 514 GlobalVariable *ProfileNameVar = new GlobalVariable( 515 M, ProfileNameConst->getType(), /*isConstant=*/true, 516 GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar); 517 Triple TT(M.getTargetTriple()); 518 if (TT.supportsCOMDAT()) { 519 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); 520 ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar)); 521 } 522 } 523 524 // Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible 525 // to the runtime, changing shadow count behavior. 526 void createMemprofHistogramFlagVar(Module &M) { 527 const StringRef VarName(MemProfHistogramFlagVar); 528 Type *IntTy1 = Type::getInt1Ty(M.getContext()); 529 auto MemprofHistogramFlag = new GlobalVariable( 530 M, IntTy1, true, GlobalValue::WeakAnyLinkage, 531 Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName); 532 Triple TT(M.getTargetTriple()); 533 if (TT.supportsCOMDAT()) { 534 MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage); 535 MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName)); 536 } 537 appendToCompilerUsed(M, MemprofHistogramFlag); 538 } 539 540 bool ModuleMemProfiler::instrumentModule(Module &M) { 541 542 // Create a module constructor. 543 std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION); 544 std::string VersionCheckName = 545 ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion) 546 : ""; 547 std::tie(MemProfCtorFunction, std::ignore) = 548 createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName, 549 MemProfInitName, /*InitArgTypes=*/{}, 550 /*InitArgs=*/{}, VersionCheckName); 551 552 const uint64_t Priority = getCtorAndDtorPriority(TargetTriple); 553 appendToGlobalCtors(M, MemProfCtorFunction, Priority); 554 555 createProfileFileNameVar(M); 556 557 createMemprofHistogramFlagVar(M); 558 559 return true; 560 } 561 562 void MemProfiler::initializeCallbacks(Module &M) { 563 IRBuilder<> IRB(*C); 564 565 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { 566 const std::string TypeStr = AccessIsWrite ? "store" : "load"; 567 const std::string HistPrefix = ClHistogram ? "hist_" : ""; 568 569 SmallVector<Type *, 2> Args1{1, IntptrTy}; 570 MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction( 571 ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr, 572 FunctionType::get(IRB.getVoidTy(), Args1, false)); 573 } 574 MemProfMemmove = M.getOrInsertFunction( 575 ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy); 576 MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy", 577 PtrTy, PtrTy, PtrTy, IntptrTy); 578 MemProfMemset = 579 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy, 580 PtrTy, IRB.getInt32Ty(), IntptrTy); 581 } 582 583 bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) { 584 // For each NSObject descendant having a +load method, this method is invoked 585 // by the ObjC runtime before any of the static constructors is called. 586 // Therefore we need to instrument such methods with a call to __memprof_init 587 // at the beginning in order to initialize our runtime before any access to 588 // the shadow memory. 589 // We cannot just ignore these methods, because they may call other 590 // instrumented functions. 591 if (F.getName().contains(" load]")) { 592 FunctionCallee MemProfInitFunction = 593 declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {}); 594 IRBuilder<> IRB(&F.front(), F.front().begin()); 595 IRB.CreateCall(MemProfInitFunction, {}); 596 return true; 597 } 598 return false; 599 } 600 601 bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { 602 IRBuilder<> IRB(&F.front().front()); 603 Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( 604 MemProfShadowMemoryDynamicAddress, IntptrTy); 605 if (F.getParent()->getPICLevel() == PICLevel::NotPIC) 606 cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true); 607 DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress); 608 return true; 609 } 610 611 bool MemProfiler::instrumentFunction(Function &F) { 612 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 613 return false; 614 if (ClDebugFunc == F.getName()) 615 return false; 616 if (F.getName().starts_with("__memprof_")) 617 return false; 618 619 bool FunctionModified = false; 620 621 // If needed, insert __memprof_init. 622 // This function needs to be called even if the function body is not 623 // instrumented. 624 if (maybeInsertMemProfInitAtFunctionEntry(F)) 625 FunctionModified = true; 626 627 LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n"); 628 629 initializeCallbacks(*F.getParent()); 630 631 SmallVector<Instruction *, 16> ToInstrument; 632 633 // Fill the set of memory operations to instrument. 634 for (auto &BB : F) { 635 for (auto &Inst : BB) { 636 if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst)) 637 ToInstrument.push_back(&Inst); 638 } 639 } 640 641 if (ToInstrument.empty()) { 642 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified 643 << " " << F << "\n"); 644 645 return FunctionModified; 646 } 647 648 FunctionModified |= insertDynamicShadowAtFunctionEntry(F); 649 650 int NumInstrumented = 0; 651 for (auto *Inst : ToInstrument) { 652 if (ClDebugMin < 0 || ClDebugMax < 0 || 653 (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { 654 std::optional<InterestingMemoryAccess> Access = 655 isInterestingMemoryAccess(Inst); 656 if (Access) 657 instrumentMop(Inst, F.getDataLayout(), *Access); 658 else 659 instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); 660 } 661 NumInstrumented++; 662 } 663 664 if (NumInstrumented > 0) 665 FunctionModified = true; 666 667 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " " 668 << F << "\n"); 669 670 return FunctionModified; 671 } 672 673 static void addCallsiteMetadata(Instruction &I, 674 std::vector<uint64_t> &InlinedCallStack, 675 LLVMContext &Ctx) { 676 I.setMetadata(LLVMContext::MD_callsite, 677 buildCallstackMetadata(InlinedCallStack, Ctx)); 678 } 679 680 static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, 681 uint32_t Column) { 682 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 683 HashBuilder; 684 HashBuilder.add(Function, LineOffset, Column); 685 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 686 uint64_t Id; 687 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 688 return Id; 689 } 690 691 static uint64_t computeStackId(const memprof::Frame &Frame) { 692 return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); 693 } 694 695 // Helper to generate a single hash id for a given callstack, used for emitting 696 // matching statistics and useful for uniquing such statistics across modules. 697 static uint64_t 698 computeFullStackId(const std::vector<memprof::Frame> &CallStack) { 699 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 700 HashBuilder; 701 for (auto &F : CallStack) 702 HashBuilder.add(F.Function, F.LineOffset, F.Column); 703 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 704 uint64_t Id; 705 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 706 return Id; 707 } 708 709 static AllocationType addCallStack(CallStackTrie &AllocTrie, 710 const AllocationInfo *AllocInfo) { 711 SmallVector<uint64_t> StackIds; 712 for (const auto &StackFrame : AllocInfo->CallStack) 713 StackIds.push_back(computeStackId(StackFrame)); 714 auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), 715 AllocInfo->Info.getAllocCount(), 716 AllocInfo->Info.getTotalLifetime()); 717 uint64_t TotalSize = 0; 718 if (MemProfReportHintedSizes) { 719 TotalSize = AllocInfo->Info.getTotalSize(); 720 assert(TotalSize); 721 } 722 AllocTrie.addCallStack(AllocType, StackIds, TotalSize); 723 return AllocType; 724 } 725 726 // Helper to compare the InlinedCallStack computed from an instruction's debug 727 // info to a list of Frames from profile data (either the allocation data or a 728 // callsite). For callsites, the StartIndex to use in the Frame array may be 729 // non-zero. 730 static bool 731 stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, 732 ArrayRef<uint64_t> InlinedCallStack, 733 unsigned StartIndex = 0) { 734 auto StackFrame = ProfileCallStack.begin() + StartIndex; 735 auto InlCallStackIter = InlinedCallStack.begin(); 736 for (; StackFrame != ProfileCallStack.end() && 737 InlCallStackIter != InlinedCallStack.end(); 738 ++StackFrame, ++InlCallStackIter) { 739 uint64_t StackId = computeStackId(*StackFrame); 740 if (StackId != *InlCallStackIter) 741 return false; 742 } 743 // Return true if we found and matched all stack ids from the call 744 // instruction. 745 return InlCallStackIter == InlinedCallStack.end(); 746 } 747 748 static bool isNewWithHotColdVariant(Function *Callee, 749 const TargetLibraryInfo &TLI) { 750 if (!Callee) 751 return false; 752 LibFunc Func; 753 if (!TLI.getLibFunc(*Callee, Func)) 754 return false; 755 switch (Func) { 756 case LibFunc_Znwm: 757 case LibFunc_ZnwmRKSt9nothrow_t: 758 case LibFunc_ZnwmSt11align_val_t: 759 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: 760 case LibFunc_Znam: 761 case LibFunc_ZnamRKSt9nothrow_t: 762 case LibFunc_ZnamSt11align_val_t: 763 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: 764 return true; 765 case LibFunc_Znwm12__hot_cold_t: 766 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t: 767 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t: 768 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t: 769 case LibFunc_Znam12__hot_cold_t: 770 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t: 771 case LibFunc_ZnamSt11align_val_t12__hot_cold_t: 772 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t: 773 return ClMemProfMatchHotColdNew; 774 default: 775 return false; 776 } 777 } 778 779 struct AllocMatchInfo { 780 uint64_t TotalSize = 0; 781 AllocationType AllocType = AllocationType::None; 782 bool Matched = false; 783 }; 784 785 static void 786 readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, 787 const TargetLibraryInfo &TLI, 788 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) { 789 auto &Ctx = M.getContext(); 790 // Previously we used getIRPGOFuncName() here. If F is local linkage, 791 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But 792 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't 793 // contain FileName's prefix. It caused local linkage function can't 794 // find MemProfRecord. So we use getName() now. 795 // 'unique-internal-linkage-names' can make MemProf work better for local 796 // linkage function. 797 auto FuncName = F.getName(); 798 auto FuncGUID = Function::getGUID(FuncName); 799 std::optional<memprof::MemProfRecord> MemProfRec; 800 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec); 801 if (Err) { 802 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { 803 auto Err = IPE.get(); 804 bool SkipWarning = false; 805 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName 806 << ": "); 807 if (Err == instrprof_error::unknown_function) { 808 NumOfMemProfMissing++; 809 SkipWarning = !PGOWarnMissing; 810 LLVM_DEBUG(dbgs() << "unknown function"); 811 } else if (Err == instrprof_error::hash_mismatch) { 812 NumOfMemProfMismatch++; 813 SkipWarning = 814 NoPGOWarnMismatch || 815 (NoPGOWarnMismatchComdatWeak && 816 (F.hasComdat() || 817 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 818 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 819 } 820 821 if (SkipWarning) 822 return; 823 824 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + 825 Twine(" Hash = ") + std::to_string(FuncGUID)) 826 .str(); 827 828 Ctx.diagnose( 829 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 830 }); 831 return; 832 } 833 834 NumOfMemProfFunc++; 835 836 // Detect if there are non-zero column numbers in the profile. If not, 837 // treat all column numbers as 0 when matching (i.e. ignore any non-zero 838 // columns in the IR). The profiled binary might have been built with 839 // column numbers disabled, for example. 840 bool ProfileHasColumns = false; 841 842 // Build maps of the location hash to all profile data with that leaf location 843 // (allocation info and the callsites). 844 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; 845 // For the callsites we need to record the index of the associated frame in 846 // the frame array (see comments below where the map entries are added). 847 std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, unsigned>>> 848 LocHashToCallSites; 849 for (auto &AI : MemProfRec->AllocSites) { 850 NumOfMemProfAllocContextProfiles++; 851 // Associate the allocation info with the leaf frame. The later matching 852 // code will match any inlined call sequences in the IR with a longer prefix 853 // of call stack frames. 854 uint64_t StackId = computeStackId(AI.CallStack[0]); 855 LocHashToAllocInfo[StackId].insert(&AI); 856 ProfileHasColumns |= AI.CallStack[0].Column; 857 } 858 for (auto &CS : MemProfRec->CallSites) { 859 NumOfMemProfCallSiteProfiles++; 860 // Need to record all frames from leaf up to and including this function, 861 // as any of these may or may not have been inlined at this point. 862 unsigned Idx = 0; 863 for (auto &StackFrame : CS) { 864 uint64_t StackId = computeStackId(StackFrame); 865 LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); 866 ProfileHasColumns |= StackFrame.Column; 867 // Once we find this function, we can stop recording. 868 if (StackFrame.Function == FuncGUID) 869 break; 870 } 871 assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); 872 } 873 874 auto GetOffset = [](const DILocation *DIL) { 875 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 876 0xffff; 877 }; 878 879 // Now walk the instructions, looking up the associated profile data using 880 // debug locations. 881 for (auto &BB : F) { 882 for (auto &I : BB) { 883 if (I.isDebugOrPseudoInst()) 884 continue; 885 // We are only interested in calls (allocation or interior call stack 886 // context calls). 887 auto *CI = dyn_cast<CallBase>(&I); 888 if (!CI) 889 continue; 890 auto *CalledFunction = CI->getCalledFunction(); 891 if (CalledFunction && CalledFunction->isIntrinsic()) 892 continue; 893 // List of call stack ids computed from the location hashes on debug 894 // locations (leaf to inlined at root). 895 std::vector<uint64_t> InlinedCallStack; 896 // Was the leaf location found in one of the profile maps? 897 bool LeafFound = false; 898 // If leaf was found in a map, iterators pointing to its location in both 899 // of the maps. It might exist in neither, one, or both (the latter case 900 // can happen because we don't currently have discriminators to 901 // distinguish the case when a single line/col maps to both an allocation 902 // and another callsite). 903 std::map<uint64_t, std::set<const AllocationInfo *>>::iterator 904 AllocInfoIter; 905 std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, 906 unsigned>>>::iterator CallSitesIter; 907 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; 908 DIL = DIL->getInlinedAt()) { 909 // Use C++ linkage name if possible. Need to compile with 910 // -fdebug-info-for-profiling to get linkage name. 911 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); 912 if (Name.empty()) 913 Name = DIL->getScope()->getSubprogram()->getName(); 914 auto CalleeGUID = Function::getGUID(Name); 915 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL), 916 ProfileHasColumns ? DIL->getColumn() : 0); 917 // Check if we have found the profile's leaf frame. If yes, collect 918 // the rest of the call's inlined context starting here. If not, see if 919 // we find a match further up the inlined context (in case the profile 920 // was missing debug frames at the leaf). 921 if (!LeafFound) { 922 AllocInfoIter = LocHashToAllocInfo.find(StackId); 923 CallSitesIter = LocHashToCallSites.find(StackId); 924 if (AllocInfoIter != LocHashToAllocInfo.end() || 925 CallSitesIter != LocHashToCallSites.end()) 926 LeafFound = true; 927 } 928 if (LeafFound) 929 InlinedCallStack.push_back(StackId); 930 } 931 // If leaf not in either of the maps, skip inst. 932 if (!LeafFound) 933 continue; 934 935 // First add !memprof metadata from allocation info, if we found the 936 // instruction's leaf location in that map, and if the rest of the 937 // instruction's locations match the prefix Frame locations on an 938 // allocation context with the same leaf. 939 if (AllocInfoIter != LocHashToAllocInfo.end()) { 940 // Only consider allocations via new, to reduce unnecessary metadata, 941 // since those are the only allocations that will be targeted initially. 942 if (!isNewWithHotColdVariant(CI->getCalledFunction(), TLI)) 943 continue; 944 // We may match this instruction's location list to multiple MIB 945 // contexts. Add them to a Trie specialized for trimming the contexts to 946 // the minimal needed to disambiguate contexts with unique behavior. 947 CallStackTrie AllocTrie; 948 for (auto *AllocInfo : AllocInfoIter->second) { 949 // Check the full inlined call stack against this one. 950 // If we found and thus matched all frames on the call, include 951 // this MIB. 952 if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, 953 InlinedCallStack)) { 954 NumOfMemProfMatchedAllocContexts++; 955 auto AllocType = addCallStack(AllocTrie, AllocInfo); 956 // Record information about the allocation if match info printing 957 // was requested. 958 if (ClPrintMemProfMatchInfo) { 959 auto FullStackId = computeFullStackId(AllocInfo->CallStack); 960 FullStackIdToAllocMatchInfo[FullStackId] = { 961 AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true}; 962 } 963 } 964 } 965 // We might not have matched any to the full inlined call stack. 966 // But if we did, create and attach metadata, or a function attribute if 967 // all contexts have identical profiled behavior. 968 if (!AllocTrie.empty()) { 969 NumOfMemProfMatchedAllocs++; 970 // MemprofMDAttached will be false if a function attribute was 971 // attached. 972 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); 973 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); 974 if (MemprofMDAttached) { 975 // Add callsite metadata for the instruction's location list so that 976 // it simpler later on to identify which part of the MIB contexts 977 // are from this particular instruction (including during inlining, 978 // when the callsite metadata will be updated appropriately). 979 // FIXME: can this be changed to strip out the matching stack 980 // context ids from the MIB contexts and not add any callsite 981 // metadata here to save space? 982 addCallsiteMetadata(I, InlinedCallStack, Ctx); 983 } 984 } 985 continue; 986 } 987 988 // Otherwise, add callsite metadata. If we reach here then we found the 989 // instruction's leaf location in the callsites map and not the allocation 990 // map. 991 assert(CallSitesIter != LocHashToCallSites.end()); 992 for (auto CallStackIdx : CallSitesIter->second) { 993 // If we found and thus matched all frames on the call, create and 994 // attach call stack metadata. 995 if (stackFrameIncludesInlinedCallStack( 996 *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { 997 NumOfMemProfMatchedCallSites++; 998 addCallsiteMetadata(I, InlinedCallStack, Ctx); 999 // Only need to find one with a matching call stack and add a single 1000 // callsite metadata. 1001 break; 1002 } 1003 } 1004 } 1005 } 1006 } 1007 1008 MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, 1009 IntrusiveRefCntPtr<vfs::FileSystem> FS) 1010 : MemoryProfileFileName(MemoryProfileFile), FS(FS) { 1011 if (!FS) 1012 this->FS = vfs::getRealFileSystem(); 1013 } 1014 1015 PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { 1016 LLVM_DEBUG(dbgs() << "Read in memory profile:"); 1017 auto &Ctx = M.getContext(); 1018 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); 1019 if (Error E = ReaderOrErr.takeError()) { 1020 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 1021 Ctx.diagnose( 1022 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message())); 1023 }); 1024 return PreservedAnalyses::all(); 1025 } 1026 1027 std::unique_ptr<IndexedInstrProfReader> MemProfReader = 1028 std::move(ReaderOrErr.get()); 1029 if (!MemProfReader) { 1030 Ctx.diagnose(DiagnosticInfoPGOProfile( 1031 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader"))); 1032 return PreservedAnalyses::all(); 1033 } 1034 1035 if (!MemProfReader->hasMemoryProfile()) { 1036 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), 1037 "Not a memory profile")); 1038 return PreservedAnalyses::all(); 1039 } 1040 1041 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1042 1043 // Map from the stack has of each allocation context in the function profiles 1044 // to the total profiled size (bytes), allocation type, and whether we matched 1045 // it to an allocation in the IR. 1046 std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo; 1047 1048 for (auto &F : M) { 1049 if (F.isDeclaration()) 1050 continue; 1051 1052 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 1053 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo); 1054 } 1055 1056 if (ClPrintMemProfMatchInfo) { 1057 for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) 1058 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType) 1059 << " context with id " << Id << " has total profiled size " 1060 << Info.TotalSize << (Info.Matched ? " is" : " not") 1061 << " matched\n"; 1062 } 1063 1064 return PreservedAnalyses::none(); 1065 } 1066