1 //===- MemProfiler.cpp - memory allocation and access profiler ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of MemProfiler. Memory accesses are instrumented 10 // to increment the access count held in a shadow memory location, or 11 // alternatively to call into the runtime. Memory intrinsic calls (memmove, 12 // memcpy, memset) are changed to call the memory profiling runtime version 13 // instead. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Instrumentation/MemProfiler.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Analysis/MemoryBuiltins.h" 22 #include "llvm/Analysis/MemoryProfileInfo.h" 23 #include "llvm/Analysis/ValueTracking.h" 24 #include "llvm/IR/Constant.h" 25 #include "llvm/IR/DataLayout.h" 26 #include "llvm/IR/DiagnosticInfo.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/GlobalValue.h" 29 #include "llvm/IR/IRBuilder.h" 30 #include "llvm/IR/Instruction.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Value.h" 35 #include "llvm/ProfileData/InstrProf.h" 36 #include "llvm/ProfileData/InstrProfReader.h" 37 #include "llvm/Support/BLAKE3.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/HashBuilder.h" 41 #include "llvm/Support/VirtualFileSystem.h" 42 #include "llvm/TargetParser/Triple.h" 43 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 44 #include "llvm/Transforms/Utils/ModuleUtils.h" 45 #include <map> 46 #include <set> 47 48 using namespace llvm; 49 using namespace llvm::memprof; 50 51 #define DEBUG_TYPE "memprof" 52 53 namespace llvm { 54 extern cl::opt<bool> PGOWarnMissing; 55 extern cl::opt<bool> NoPGOWarnMismatch; 56 extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; 57 } // namespace llvm 58 59 constexpr int LLVM_MEM_PROFILER_VERSION = 1; 60 61 // Size of memory mapped to a single shadow location. 62 constexpr uint64_t DefaultShadowGranularity = 64; 63 64 // Scale from granularity down to shadow size. 65 constexpr uint64_t DefaultShadowScale = 3; 66 67 constexpr char MemProfModuleCtorName[] = "memprof.module_ctor"; 68 constexpr uint64_t MemProfCtorAndDtorPriority = 1; 69 // On Emscripten, the system needs more than one priorities for constructors. 70 constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50; 71 constexpr char MemProfInitName[] = "__memprof_init"; 72 constexpr char MemProfVersionCheckNamePrefix[] = 73 "__memprof_version_mismatch_check_v"; 74 75 constexpr char MemProfShadowMemoryDynamicAddress[] = 76 "__memprof_shadow_memory_dynamic_address"; 77 78 constexpr char MemProfFilenameVar[] = "__memprof_profile_filename"; 79 80 // Command-line flags. 81 82 static cl::opt<bool> ClInsertVersionCheck( 83 "memprof-guard-against-version-mismatch", 84 cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden, 85 cl::init(true)); 86 87 // This flag may need to be replaced with -f[no-]memprof-reads. 88 static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads", 89 cl::desc("instrument read instructions"), 90 cl::Hidden, cl::init(true)); 91 92 static cl::opt<bool> 93 ClInstrumentWrites("memprof-instrument-writes", 94 cl::desc("instrument write instructions"), cl::Hidden, 95 cl::init(true)); 96 97 static cl::opt<bool> ClInstrumentAtomics( 98 "memprof-instrument-atomics", 99 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, 100 cl::init(true)); 101 102 static cl::opt<bool> ClUseCalls( 103 "memprof-use-callbacks", 104 cl::desc("Use callbacks instead of inline instrumentation sequences."), 105 cl::Hidden, cl::init(false)); 106 107 static cl::opt<std::string> 108 ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix", 109 cl::desc("Prefix for memory access callbacks"), 110 cl::Hidden, cl::init("__memprof_")); 111 112 // These flags allow to change the shadow mapping. 113 // The shadow mapping looks like 114 // Shadow = ((Mem & mask) >> scale) + offset 115 116 static cl::opt<int> ClMappingScale("memprof-mapping-scale", 117 cl::desc("scale of memprof shadow mapping"), 118 cl::Hidden, cl::init(DefaultShadowScale)); 119 120 static cl::opt<int> 121 ClMappingGranularity("memprof-mapping-granularity", 122 cl::desc("granularity of memprof shadow mapping"), 123 cl::Hidden, cl::init(DefaultShadowGranularity)); 124 125 static cl::opt<bool> ClStack("memprof-instrument-stack", 126 cl::desc("Instrument scalar stack variables"), 127 cl::Hidden, cl::init(false)); 128 129 // Debug flags. 130 131 static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden, 132 cl::init(0)); 133 134 static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden, 135 cl::desc("Debug func")); 136 137 static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"), 138 cl::Hidden, cl::init(-1)); 139 140 static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"), 141 cl::Hidden, cl::init(-1)); 142 143 STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); 144 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); 145 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); 146 STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); 147 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); 148 149 namespace { 150 151 /// This struct defines the shadow mapping using the rule: 152 /// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset. 153 struct ShadowMapping { 154 ShadowMapping() { 155 Scale = ClMappingScale; 156 Granularity = ClMappingGranularity; 157 Mask = ~(Granularity - 1); 158 } 159 160 int Scale; 161 int Granularity; 162 uint64_t Mask; // Computed as ~(Granularity-1) 163 }; 164 165 static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { 166 return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority 167 : MemProfCtorAndDtorPriority; 168 } 169 170 struct InterestingMemoryAccess { 171 Value *Addr = nullptr; 172 bool IsWrite; 173 Type *AccessTy; 174 uint64_t TypeSize; 175 Value *MaybeMask = nullptr; 176 }; 177 178 /// Instrument the code in module to profile memory accesses. 179 class MemProfiler { 180 public: 181 MemProfiler(Module &M) { 182 C = &(M.getContext()); 183 LongSize = M.getDataLayout().getPointerSizeInBits(); 184 IntptrTy = Type::getIntNTy(*C, LongSize); 185 PtrTy = PointerType::getUnqual(*C); 186 } 187 188 /// If it is an interesting memory access, populate information 189 /// about the access and return a InterestingMemoryAccess struct. 190 /// Otherwise return std::nullopt. 191 std::optional<InterestingMemoryAccess> 192 isInterestingMemoryAccess(Instruction *I) const; 193 194 void instrumentMop(Instruction *I, const DataLayout &DL, 195 InterestingMemoryAccess &Access); 196 void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, 197 Value *Addr, uint32_t TypeSize, bool IsWrite); 198 void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 199 Instruction *I, Value *Addr, Type *AccessTy, 200 bool IsWrite); 201 void instrumentMemIntrinsic(MemIntrinsic *MI); 202 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); 203 bool instrumentFunction(Function &F); 204 bool maybeInsertMemProfInitAtFunctionEntry(Function &F); 205 bool insertDynamicShadowAtFunctionEntry(Function &F); 206 207 private: 208 void initializeCallbacks(Module &M); 209 210 LLVMContext *C; 211 int LongSize; 212 Type *IntptrTy; 213 PointerType *PtrTy; 214 ShadowMapping Mapping; 215 216 // These arrays is indexed by AccessIsWrite 217 FunctionCallee MemProfMemoryAccessCallback[2]; 218 FunctionCallee MemProfMemoryAccessCallbackSized[2]; 219 220 FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset; 221 Value *DynamicShadowOffset = nullptr; 222 }; 223 224 class ModuleMemProfiler { 225 public: 226 ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } 227 228 bool instrumentModule(Module &); 229 230 private: 231 Triple TargetTriple; 232 ShadowMapping Mapping; 233 Function *MemProfCtorFunction = nullptr; 234 }; 235 236 } // end anonymous namespace 237 238 MemProfilerPass::MemProfilerPass() = default; 239 240 PreservedAnalyses MemProfilerPass::run(Function &F, 241 AnalysisManager<Function> &AM) { 242 Module &M = *F.getParent(); 243 MemProfiler Profiler(M); 244 if (Profiler.instrumentFunction(F)) 245 return PreservedAnalyses::none(); 246 return PreservedAnalyses::all(); 247 } 248 249 ModuleMemProfilerPass::ModuleMemProfilerPass() = default; 250 251 PreservedAnalyses ModuleMemProfilerPass::run(Module &M, 252 AnalysisManager<Module> &AM) { 253 ModuleMemProfiler Profiler(M); 254 if (Profiler.instrumentModule(M)) 255 return PreservedAnalyses::none(); 256 return PreservedAnalyses::all(); 257 } 258 259 Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { 260 // (Shadow & mask) >> scale 261 Shadow = IRB.CreateAnd(Shadow, Mapping.Mask); 262 Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); 263 // (Shadow >> scale) | offset 264 assert(DynamicShadowOffset); 265 return IRB.CreateAdd(Shadow, DynamicShadowOffset); 266 } 267 268 // Instrument memset/memmove/memcpy 269 void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { 270 IRBuilder<> IRB(MI); 271 if (isa<MemTransferInst>(MI)) { 272 IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy, 273 {MI->getOperand(0), MI->getOperand(1), 274 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 275 } else if (isa<MemSetInst>(MI)) { 276 IRB.CreateCall( 277 MemProfMemset, 278 {MI->getOperand(0), 279 IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), 280 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 281 } 282 MI->eraseFromParent(); 283 } 284 285 std::optional<InterestingMemoryAccess> 286 MemProfiler::isInterestingMemoryAccess(Instruction *I) const { 287 // Do not instrument the load fetching the dynamic shadow address. 288 if (DynamicShadowOffset == I) 289 return std::nullopt; 290 291 InterestingMemoryAccess Access; 292 293 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 294 if (!ClInstrumentReads) 295 return std::nullopt; 296 Access.IsWrite = false; 297 Access.AccessTy = LI->getType(); 298 Access.Addr = LI->getPointerOperand(); 299 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 300 if (!ClInstrumentWrites) 301 return std::nullopt; 302 Access.IsWrite = true; 303 Access.AccessTy = SI->getValueOperand()->getType(); 304 Access.Addr = SI->getPointerOperand(); 305 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 306 if (!ClInstrumentAtomics) 307 return std::nullopt; 308 Access.IsWrite = true; 309 Access.AccessTy = RMW->getValOperand()->getType(); 310 Access.Addr = RMW->getPointerOperand(); 311 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { 312 if (!ClInstrumentAtomics) 313 return std::nullopt; 314 Access.IsWrite = true; 315 Access.AccessTy = XCHG->getCompareOperand()->getType(); 316 Access.Addr = XCHG->getPointerOperand(); 317 } else if (auto *CI = dyn_cast<CallInst>(I)) { 318 auto *F = CI->getCalledFunction(); 319 if (F && (F->getIntrinsicID() == Intrinsic::masked_load || 320 F->getIntrinsicID() == Intrinsic::masked_store)) { 321 unsigned OpOffset = 0; 322 if (F->getIntrinsicID() == Intrinsic::masked_store) { 323 if (!ClInstrumentWrites) 324 return std::nullopt; 325 // Masked store has an initial operand for the value. 326 OpOffset = 1; 327 Access.AccessTy = CI->getArgOperand(0)->getType(); 328 Access.IsWrite = true; 329 } else { 330 if (!ClInstrumentReads) 331 return std::nullopt; 332 Access.AccessTy = CI->getType(); 333 Access.IsWrite = false; 334 } 335 336 auto *BasePtr = CI->getOperand(0 + OpOffset); 337 Access.MaybeMask = CI->getOperand(2 + OpOffset); 338 Access.Addr = BasePtr; 339 } 340 } 341 342 if (!Access.Addr) 343 return std::nullopt; 344 345 // Do not instrument accesses from different address spaces; we cannot deal 346 // with them. 347 Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType()); 348 if (PtrTy->getPointerAddressSpace() != 0) 349 return std::nullopt; 350 351 // Ignore swifterror addresses. 352 // swifterror memory addresses are mem2reg promoted by instruction 353 // selection. As such they cannot have regular uses like an instrumentation 354 // function and it makes no sense to track them as memory. 355 if (Access.Addr->isSwiftError()) 356 return std::nullopt; 357 358 // Peel off GEPs and BitCasts. 359 auto *Addr = Access.Addr->stripInBoundsOffsets(); 360 361 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 362 // Do not instrument PGO counter updates. 363 if (GV->hasSection()) { 364 StringRef SectionName = GV->getSection(); 365 // Check if the global is in the PGO counters section. 366 auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat(); 367 if (SectionName.ends_with( 368 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) 369 return std::nullopt; 370 } 371 372 // Do not instrument accesses to LLVM internal variables. 373 if (GV->getName().starts_with("__llvm")) 374 return std::nullopt; 375 } 376 377 const DataLayout &DL = I->getModule()->getDataLayout(); 378 Access.TypeSize = DL.getTypeStoreSizeInBits(Access.AccessTy); 379 return Access; 380 } 381 382 void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 383 Instruction *I, Value *Addr, 384 Type *AccessTy, bool IsWrite) { 385 auto *VTy = cast<FixedVectorType>(AccessTy); 386 uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); 387 unsigned Num = VTy->getNumElements(); 388 auto *Zero = ConstantInt::get(IntptrTy, 0); 389 for (unsigned Idx = 0; Idx < Num; ++Idx) { 390 Value *InstrumentedAddress = nullptr; 391 Instruction *InsertBefore = I; 392 if (auto *Vector = dyn_cast<ConstantVector>(Mask)) { 393 // dyn_cast as we might get UndefValue 394 if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) { 395 if (Masked->isZero()) 396 // Mask is constant false, so no instrumentation needed. 397 continue; 398 // If we have a true or undef value, fall through to instrumentAddress. 399 // with InsertBefore == I 400 } 401 } else { 402 IRBuilder<> IRB(I); 403 Value *MaskElem = IRB.CreateExtractElement(Mask, Idx); 404 Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false); 405 InsertBefore = ThenTerm; 406 } 407 408 IRBuilder<> IRB(InsertBefore); 409 InstrumentedAddress = 410 IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); 411 instrumentAddress(I, InsertBefore, InstrumentedAddress, ElemTypeSize, 412 IsWrite); 413 } 414 } 415 416 void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL, 417 InterestingMemoryAccess &Access) { 418 // Skip instrumentation of stack accesses unless requested. 419 if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) { 420 if (Access.IsWrite) 421 ++NumSkippedStackWrites; 422 else 423 ++NumSkippedStackReads; 424 return; 425 } 426 427 if (Access.IsWrite) 428 NumInstrumentedWrites++; 429 else 430 NumInstrumentedReads++; 431 432 if (Access.MaybeMask) { 433 instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr, 434 Access.AccessTy, Access.IsWrite); 435 } else { 436 // Since the access counts will be accumulated across the entire allocation, 437 // we only update the shadow access count for the first location and thus 438 // don't need to worry about alignment and type size. 439 instrumentAddress(I, I, Access.Addr, Access.TypeSize, Access.IsWrite); 440 } 441 } 442 443 void MemProfiler::instrumentAddress(Instruction *OrigIns, 444 Instruction *InsertBefore, Value *Addr, 445 uint32_t TypeSize, bool IsWrite) { 446 IRBuilder<> IRB(InsertBefore); 447 Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); 448 449 if (ClUseCalls) { 450 IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong); 451 return; 452 } 453 454 // Create an inline sequence to compute shadow location, and increment the 455 // value by one. 456 Type *ShadowTy = Type::getInt64Ty(*C); 457 Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); 458 Value *ShadowPtr = memToShadow(AddrLong, IRB); 459 Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy); 460 Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr); 461 Value *Inc = ConstantInt::get(Type::getInt64Ty(*C), 1); 462 ShadowValue = IRB.CreateAdd(ShadowValue, Inc); 463 IRB.CreateStore(ShadowValue, ShadowAddr); 464 } 465 466 // Create the variable for the profile file name. 467 void createProfileFileNameVar(Module &M) { 468 const MDString *MemProfFilename = 469 dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename")); 470 if (!MemProfFilename) 471 return; 472 assert(!MemProfFilename->getString().empty() && 473 "Unexpected MemProfProfileFilename metadata with empty string"); 474 Constant *ProfileNameConst = ConstantDataArray::getString( 475 M.getContext(), MemProfFilename->getString(), true); 476 GlobalVariable *ProfileNameVar = new GlobalVariable( 477 M, ProfileNameConst->getType(), /*isConstant=*/true, 478 GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar); 479 Triple TT(M.getTargetTriple()); 480 if (TT.supportsCOMDAT()) { 481 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); 482 ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar)); 483 } 484 } 485 486 bool ModuleMemProfiler::instrumentModule(Module &M) { 487 // Create a module constructor. 488 std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION); 489 std::string VersionCheckName = 490 ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion) 491 : ""; 492 std::tie(MemProfCtorFunction, std::ignore) = 493 createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName, 494 MemProfInitName, /*InitArgTypes=*/{}, 495 /*InitArgs=*/{}, VersionCheckName); 496 497 const uint64_t Priority = getCtorAndDtorPriority(TargetTriple); 498 appendToGlobalCtors(M, MemProfCtorFunction, Priority); 499 500 createProfileFileNameVar(M); 501 502 return true; 503 } 504 505 void MemProfiler::initializeCallbacks(Module &M) { 506 IRBuilder<> IRB(*C); 507 508 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { 509 const std::string TypeStr = AccessIsWrite ? "store" : "load"; 510 511 SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy}; 512 SmallVector<Type *, 2> Args1{1, IntptrTy}; 513 MemProfMemoryAccessCallbackSized[AccessIsWrite] = 514 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr + "N", 515 FunctionType::get(IRB.getVoidTy(), Args2, false)); 516 517 MemProfMemoryAccessCallback[AccessIsWrite] = 518 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr, 519 FunctionType::get(IRB.getVoidTy(), Args1, false)); 520 } 521 MemProfMemmove = M.getOrInsertFunction( 522 ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy); 523 MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy", 524 PtrTy, PtrTy, PtrTy, IntptrTy); 525 MemProfMemset = 526 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy, 527 PtrTy, IRB.getInt32Ty(), IntptrTy); 528 } 529 530 bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) { 531 // For each NSObject descendant having a +load method, this method is invoked 532 // by the ObjC runtime before any of the static constructors is called. 533 // Therefore we need to instrument such methods with a call to __memprof_init 534 // at the beginning in order to initialize our runtime before any access to 535 // the shadow memory. 536 // We cannot just ignore these methods, because they may call other 537 // instrumented functions. 538 if (F.getName().contains(" load]")) { 539 FunctionCallee MemProfInitFunction = 540 declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {}); 541 IRBuilder<> IRB(&F.front(), F.front().begin()); 542 IRB.CreateCall(MemProfInitFunction, {}); 543 return true; 544 } 545 return false; 546 } 547 548 bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { 549 IRBuilder<> IRB(&F.front().front()); 550 Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( 551 MemProfShadowMemoryDynamicAddress, IntptrTy); 552 if (F.getParent()->getPICLevel() == PICLevel::NotPIC) 553 cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true); 554 DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress); 555 return true; 556 } 557 558 bool MemProfiler::instrumentFunction(Function &F) { 559 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 560 return false; 561 if (ClDebugFunc == F.getName()) 562 return false; 563 if (F.getName().starts_with("__memprof_")) 564 return false; 565 566 bool FunctionModified = false; 567 568 // If needed, insert __memprof_init. 569 // This function needs to be called even if the function body is not 570 // instrumented. 571 if (maybeInsertMemProfInitAtFunctionEntry(F)) 572 FunctionModified = true; 573 574 LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n"); 575 576 initializeCallbacks(*F.getParent()); 577 578 SmallVector<Instruction *, 16> ToInstrument; 579 580 // Fill the set of memory operations to instrument. 581 for (auto &BB : F) { 582 for (auto &Inst : BB) { 583 if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst)) 584 ToInstrument.push_back(&Inst); 585 } 586 } 587 588 if (ToInstrument.empty()) { 589 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified 590 << " " << F << "\n"); 591 592 return FunctionModified; 593 } 594 595 FunctionModified |= insertDynamicShadowAtFunctionEntry(F); 596 597 int NumInstrumented = 0; 598 for (auto *Inst : ToInstrument) { 599 if (ClDebugMin < 0 || ClDebugMax < 0 || 600 (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { 601 std::optional<InterestingMemoryAccess> Access = 602 isInterestingMemoryAccess(Inst); 603 if (Access) 604 instrumentMop(Inst, F.getParent()->getDataLayout(), *Access); 605 else 606 instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); 607 } 608 NumInstrumented++; 609 } 610 611 if (NumInstrumented > 0) 612 FunctionModified = true; 613 614 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " " 615 << F << "\n"); 616 617 return FunctionModified; 618 } 619 620 static void addCallsiteMetadata(Instruction &I, 621 std::vector<uint64_t> &InlinedCallStack, 622 LLVMContext &Ctx) { 623 I.setMetadata(LLVMContext::MD_callsite, 624 buildCallstackMetadata(InlinedCallStack, Ctx)); 625 } 626 627 static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, 628 uint32_t Column) { 629 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> 630 HashBuilder; 631 HashBuilder.add(Function, LineOffset, Column); 632 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 633 uint64_t Id; 634 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 635 return Id; 636 } 637 638 static uint64_t computeStackId(const memprof::Frame &Frame) { 639 return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); 640 } 641 642 static void addCallStack(CallStackTrie &AllocTrie, 643 const AllocationInfo *AllocInfo) { 644 SmallVector<uint64_t> StackIds; 645 for (const auto &StackFrame : AllocInfo->CallStack) 646 StackIds.push_back(computeStackId(StackFrame)); 647 auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), 648 AllocInfo->Info.getAllocCount(), 649 AllocInfo->Info.getTotalLifetime()); 650 AllocTrie.addCallStack(AllocType, StackIds); 651 } 652 653 // Helper to compare the InlinedCallStack computed from an instruction's debug 654 // info to a list of Frames from profile data (either the allocation data or a 655 // callsite). For callsites, the StartIndex to use in the Frame array may be 656 // non-zero. 657 static bool 658 stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, 659 ArrayRef<uint64_t> InlinedCallStack, 660 unsigned StartIndex = 0) { 661 auto StackFrame = ProfileCallStack.begin() + StartIndex; 662 auto InlCallStackIter = InlinedCallStack.begin(); 663 for (; StackFrame != ProfileCallStack.end() && 664 InlCallStackIter != InlinedCallStack.end(); 665 ++StackFrame, ++InlCallStackIter) { 666 uint64_t StackId = computeStackId(*StackFrame); 667 if (StackId != *InlCallStackIter) 668 return false; 669 } 670 // Return true if we found and matched all stack ids from the call 671 // instruction. 672 return InlCallStackIter == InlinedCallStack.end(); 673 } 674 675 static void readMemprof(Module &M, Function &F, 676 IndexedInstrProfReader *MemProfReader, 677 const TargetLibraryInfo &TLI) { 678 auto &Ctx = M.getContext(); 679 // Previously we used getIRPGOFuncName() here. If F is local linkage, 680 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But 681 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't 682 // contain FileName's prefix. It caused local linkage function can't 683 // find MemProfRecord. So we use getName() now. 684 // 'unique-internal-linkage-names' can make MemProf work better for local 685 // linkage function. 686 auto FuncName = F.getName(); 687 auto FuncGUID = Function::getGUID(FuncName); 688 std::optional<memprof::MemProfRecord> MemProfRec; 689 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec); 690 if (Err) { 691 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) { 692 auto Err = IPE.get(); 693 bool SkipWarning = false; 694 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName 695 << ": "); 696 if (Err == instrprof_error::unknown_function) { 697 NumOfMemProfMissing++; 698 SkipWarning = !PGOWarnMissing; 699 LLVM_DEBUG(dbgs() << "unknown function"); 700 } else if (Err == instrprof_error::hash_mismatch) { 701 SkipWarning = 702 NoPGOWarnMismatch || 703 (NoPGOWarnMismatchComdatWeak && 704 (F.hasComdat() || 705 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 706 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 707 } 708 709 if (SkipWarning) 710 return; 711 712 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + 713 Twine(" Hash = ") + std::to_string(FuncGUID)) 714 .str(); 715 716 Ctx.diagnose( 717 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 718 }); 719 return; 720 } 721 722 // Detect if there are non-zero column numbers in the profile. If not, 723 // treat all column numbers as 0 when matching (i.e. ignore any non-zero 724 // columns in the IR). The profiled binary might have been built with 725 // column numbers disabled, for example. 726 bool ProfileHasColumns = false; 727 728 // Build maps of the location hash to all profile data with that leaf location 729 // (allocation info and the callsites). 730 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; 731 // For the callsites we need to record the index of the associated frame in 732 // the frame array (see comments below where the map entries are added). 733 std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>> 734 LocHashToCallSites; 735 for (auto &AI : MemProfRec->AllocSites) { 736 // Associate the allocation info with the leaf frame. The later matching 737 // code will match any inlined call sequences in the IR with a longer prefix 738 // of call stack frames. 739 uint64_t StackId = computeStackId(AI.CallStack[0]); 740 LocHashToAllocInfo[StackId].insert(&AI); 741 ProfileHasColumns |= AI.CallStack[0].Column; 742 } 743 for (auto &CS : MemProfRec->CallSites) { 744 // Need to record all frames from leaf up to and including this function, 745 // as any of these may or may not have been inlined at this point. 746 unsigned Idx = 0; 747 for (auto &StackFrame : CS) { 748 uint64_t StackId = computeStackId(StackFrame); 749 LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); 750 ProfileHasColumns |= StackFrame.Column; 751 // Once we find this function, we can stop recording. 752 if (StackFrame.Function == FuncGUID) 753 break; 754 } 755 assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); 756 } 757 758 auto GetOffset = [](const DILocation *DIL) { 759 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 760 0xffff; 761 }; 762 763 // Now walk the instructions, looking up the associated profile data using 764 // dbug locations. 765 for (auto &BB : F) { 766 for (auto &I : BB) { 767 if (I.isDebugOrPseudoInst()) 768 continue; 769 // We are only interested in calls (allocation or interior call stack 770 // context calls). 771 auto *CI = dyn_cast<CallBase>(&I); 772 if (!CI) 773 continue; 774 auto *CalledFunction = CI->getCalledFunction(); 775 if (CalledFunction && CalledFunction->isIntrinsic()) 776 continue; 777 // List of call stack ids computed from the location hashes on debug 778 // locations (leaf to inlined at root). 779 std::vector<uint64_t> InlinedCallStack; 780 // Was the leaf location found in one of the profile maps? 781 bool LeafFound = false; 782 // If leaf was found in a map, iterators pointing to its location in both 783 // of the maps. It might exist in neither, one, or both (the latter case 784 // can happen because we don't currently have discriminators to 785 // distinguish the case when a single line/col maps to both an allocation 786 // and another callsite). 787 std::map<uint64_t, std::set<const AllocationInfo *>>::iterator 788 AllocInfoIter; 789 std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, 790 unsigned>>>::iterator CallSitesIter; 791 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; 792 DIL = DIL->getInlinedAt()) { 793 // Use C++ linkage name if possible. Need to compile with 794 // -fdebug-info-for-profiling to get linkage name. 795 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); 796 if (Name.empty()) 797 Name = DIL->getScope()->getSubprogram()->getName(); 798 auto CalleeGUID = Function::getGUID(Name); 799 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL), 800 ProfileHasColumns ? DIL->getColumn() : 0); 801 // Check if we have found the profile's leaf frame. If yes, collect 802 // the rest of the call's inlined context starting here. If not, see if 803 // we find a match further up the inlined context (in case the profile 804 // was missing debug frames at the leaf). 805 if (!LeafFound) { 806 AllocInfoIter = LocHashToAllocInfo.find(StackId); 807 CallSitesIter = LocHashToCallSites.find(StackId); 808 if (AllocInfoIter != LocHashToAllocInfo.end() || 809 CallSitesIter != LocHashToCallSites.end()) 810 LeafFound = true; 811 } 812 if (LeafFound) 813 InlinedCallStack.push_back(StackId); 814 } 815 // If leaf not in either of the maps, skip inst. 816 if (!LeafFound) 817 continue; 818 819 // First add !memprof metadata from allocation info, if we found the 820 // instruction's leaf location in that map, and if the rest of the 821 // instruction's locations match the prefix Frame locations on an 822 // allocation context with the same leaf. 823 if (AllocInfoIter != LocHashToAllocInfo.end()) { 824 // Only consider allocations via new, to reduce unnecessary metadata, 825 // since those are the only allocations that will be targeted initially. 826 if (!isNewLikeFn(CI, &TLI)) 827 continue; 828 // We may match this instruction's location list to multiple MIB 829 // contexts. Add them to a Trie specialized for trimming the contexts to 830 // the minimal needed to disambiguate contexts with unique behavior. 831 CallStackTrie AllocTrie; 832 for (auto *AllocInfo : AllocInfoIter->second) { 833 // Check the full inlined call stack against this one. 834 // If we found and thus matched all frames on the call, include 835 // this MIB. 836 if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, 837 InlinedCallStack)) 838 addCallStack(AllocTrie, AllocInfo); 839 } 840 // We might not have matched any to the full inlined call stack. 841 // But if we did, create and attach metadata, or a function attribute if 842 // all contexts have identical profiled behavior. 843 if (!AllocTrie.empty()) { 844 // MemprofMDAttached will be false if a function attribute was 845 // attached. 846 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); 847 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); 848 if (MemprofMDAttached) { 849 // Add callsite metadata for the instruction's location list so that 850 // it simpler later on to identify which part of the MIB contexts 851 // are from this particular instruction (including during inlining, 852 // when the callsite metdata will be updated appropriately). 853 // FIXME: can this be changed to strip out the matching stack 854 // context ids from the MIB contexts and not add any callsite 855 // metadata here to save space? 856 addCallsiteMetadata(I, InlinedCallStack, Ctx); 857 } 858 } 859 continue; 860 } 861 862 // Otherwise, add callsite metadata. If we reach here then we found the 863 // instruction's leaf location in the callsites map and not the allocation 864 // map. 865 assert(CallSitesIter != LocHashToCallSites.end()); 866 for (auto CallStackIdx : CallSitesIter->second) { 867 // If we found and thus matched all frames on the call, create and 868 // attach call stack metadata. 869 if (stackFrameIncludesInlinedCallStack( 870 *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { 871 addCallsiteMetadata(I, InlinedCallStack, Ctx); 872 // Only need to find one with a matching call stack and add a single 873 // callsite metadata. 874 break; 875 } 876 } 877 } 878 } 879 } 880 881 MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, 882 IntrusiveRefCntPtr<vfs::FileSystem> FS) 883 : MemoryProfileFileName(MemoryProfileFile), FS(FS) { 884 if (!FS) 885 this->FS = vfs::getRealFileSystem(); 886 } 887 888 PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { 889 LLVM_DEBUG(dbgs() << "Read in memory profile:"); 890 auto &Ctx = M.getContext(); 891 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); 892 if (Error E = ReaderOrErr.takeError()) { 893 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 894 Ctx.diagnose( 895 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message())); 896 }); 897 return PreservedAnalyses::all(); 898 } 899 900 std::unique_ptr<IndexedInstrProfReader> MemProfReader = 901 std::move(ReaderOrErr.get()); 902 if (!MemProfReader) { 903 Ctx.diagnose(DiagnosticInfoPGOProfile( 904 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader"))); 905 return PreservedAnalyses::all(); 906 } 907 908 if (!MemProfReader->hasMemoryProfile()) { 909 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), 910 "Not a memory profile")); 911 return PreservedAnalyses::all(); 912 } 913 914 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 915 916 for (auto &F : M) { 917 if (F.isDeclaration()) 918 continue; 919 920 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 921 readMemprof(M, F, MemProfReader.get(), TLI); 922 } 923 924 return PreservedAnalyses::none(); 925 } 926