1 //===- MemProfiler.cpp - memory allocation and access profiler ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of MemProfiler. Memory accesses are instrumented 10 // to increment the access count held in a shadow memory location, or 11 // alternatively to call into the runtime. Memory intrinsic calls (memmove, 12 // memcpy, memset) are changed to call the memory profiling runtime version 13 // instead. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "llvm/Transforms/Instrumentation/MemProfiler.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/Analysis/MemoryBuiltins.h" 22 #include "llvm/Analysis/MemoryProfileInfo.h" 23 #include "llvm/Analysis/ValueTracking.h" 24 #include "llvm/IR/Constant.h" 25 #include "llvm/IR/DataLayout.h" 26 #include "llvm/IR/DiagnosticInfo.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/GlobalValue.h" 29 #include "llvm/IR/IRBuilder.h" 30 #include "llvm/IR/Instruction.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Value.h" 35 #include "llvm/ProfileData/InstrProf.h" 36 #include "llvm/ProfileData/InstrProfReader.h" 37 #include "llvm/Support/BLAKE3.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/HashBuilder.h" 41 #include "llvm/Support/VirtualFileSystem.h" 42 #include "llvm/TargetParser/Triple.h" 43 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 44 #include "llvm/Transforms/Utils/ModuleUtils.h" 45 #include <map> 46 #include <set> 47 48 using namespace llvm; 49 using namespace llvm::memprof; 50 51 #define DEBUG_TYPE "memprof" 52 53 namespace llvm { 54 extern cl::opt<bool> PGOWarnMissing; 55 extern cl::opt<bool> NoPGOWarnMismatch; 56 extern cl::opt<bool> NoPGOWarnMismatchComdatWeak; 57 } // namespace llvm 58 59 constexpr int LLVM_MEM_PROFILER_VERSION = 1; 60 61 // Size of memory mapped to a single shadow location. 62 constexpr uint64_t DefaultShadowGranularity = 64; 63 64 // Scale from granularity down to shadow size. 65 constexpr uint64_t DefaultShadowScale = 3; 66 67 constexpr char MemProfModuleCtorName[] = "memprof.module_ctor"; 68 constexpr uint64_t MemProfCtorAndDtorPriority = 1; 69 // On Emscripten, the system needs more than one priorities for constructors. 70 constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50; 71 constexpr char MemProfInitName[] = "__memprof_init"; 72 constexpr char MemProfVersionCheckNamePrefix[] = 73 "__memprof_version_mismatch_check_v"; 74 75 constexpr char MemProfShadowMemoryDynamicAddress[] = 76 "__memprof_shadow_memory_dynamic_address"; 77 78 constexpr char MemProfFilenameVar[] = "__memprof_profile_filename"; 79 80 // Command-line flags. 81 82 static cl::opt<bool> ClInsertVersionCheck( 83 "memprof-guard-against-version-mismatch", 84 cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden, 85 cl::init(true)); 86 87 // This flag may need to be replaced with -f[no-]memprof-reads. 88 static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads", 89 cl::desc("instrument read instructions"), 90 cl::Hidden, cl::init(true)); 91 92 static cl::opt<bool> 93 ClInstrumentWrites("memprof-instrument-writes", 94 cl::desc("instrument write instructions"), cl::Hidden, 95 cl::init(true)); 96 97 static cl::opt<bool> ClInstrumentAtomics( 98 "memprof-instrument-atomics", 99 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, 100 cl::init(true)); 101 102 static cl::opt<bool> ClUseCalls( 103 "memprof-use-callbacks", 104 cl::desc("Use callbacks instead of inline instrumentation sequences."), 105 cl::Hidden, cl::init(false)); 106 107 static cl::opt<std::string> 108 ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix", 109 cl::desc("Prefix for memory access callbacks"), 110 cl::Hidden, cl::init("__memprof_")); 111 112 // These flags allow to change the shadow mapping. 113 // The shadow mapping looks like 114 // Shadow = ((Mem & mask) >> scale) + offset 115 116 static cl::opt<int> ClMappingScale("memprof-mapping-scale", 117 cl::desc("scale of memprof shadow mapping"), 118 cl::Hidden, cl::init(DefaultShadowScale)); 119 120 static cl::opt<int> 121 ClMappingGranularity("memprof-mapping-granularity", 122 cl::desc("granularity of memprof shadow mapping"), 123 cl::Hidden, cl::init(DefaultShadowGranularity)); 124 125 static cl::opt<bool> ClStack("memprof-instrument-stack", 126 cl::desc("Instrument scalar stack variables"), 127 cl::Hidden, cl::init(false)); 128 129 // Debug flags. 130 131 static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden, 132 cl::init(0)); 133 134 static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden, 135 cl::desc("Debug func")); 136 137 static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"), 138 cl::Hidden, cl::init(-1)); 139 140 static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"), 141 cl::Hidden, cl::init(-1)); 142 143 STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); 144 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); 145 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads"); 146 STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes"); 147 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile."); 148 149 namespace { 150 151 /// This struct defines the shadow mapping using the rule: 152 /// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset. 153 struct ShadowMapping { 154 ShadowMapping() { 155 Scale = ClMappingScale; 156 Granularity = ClMappingGranularity; 157 Mask = ~(Granularity - 1); 158 } 159 160 int Scale; 161 int Granularity; 162 uint64_t Mask; // Computed as ~(Granularity-1) 163 }; 164 165 static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { 166 return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority 167 : MemProfCtorAndDtorPriority; 168 } 169 170 struct InterestingMemoryAccess { 171 Value *Addr = nullptr; 172 bool IsWrite; 173 Type *AccessTy; 174 uint64_t TypeSize; 175 Value *MaybeMask = nullptr; 176 }; 177 178 /// Instrument the code in module to profile memory accesses. 179 class MemProfiler { 180 public: 181 MemProfiler(Module &M) { 182 C = &(M.getContext()); 183 LongSize = M.getDataLayout().getPointerSizeInBits(); 184 IntptrTy = Type::getIntNTy(*C, LongSize); 185 } 186 187 /// If it is an interesting memory access, populate information 188 /// about the access and return a InterestingMemoryAccess struct. 189 /// Otherwise return std::nullopt. 190 std::optional<InterestingMemoryAccess> 191 isInterestingMemoryAccess(Instruction *I) const; 192 193 void instrumentMop(Instruction *I, const DataLayout &DL, 194 InterestingMemoryAccess &Access); 195 void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, 196 Value *Addr, uint32_t TypeSize, bool IsWrite); 197 void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 198 Instruction *I, Value *Addr, Type *AccessTy, 199 bool IsWrite); 200 void instrumentMemIntrinsic(MemIntrinsic *MI); 201 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); 202 bool instrumentFunction(Function &F); 203 bool maybeInsertMemProfInitAtFunctionEntry(Function &F); 204 bool insertDynamicShadowAtFunctionEntry(Function &F); 205 206 private: 207 void initializeCallbacks(Module &M); 208 209 LLVMContext *C; 210 int LongSize; 211 Type *IntptrTy; 212 ShadowMapping Mapping; 213 214 // These arrays is indexed by AccessIsWrite 215 FunctionCallee MemProfMemoryAccessCallback[2]; 216 FunctionCallee MemProfMemoryAccessCallbackSized[2]; 217 218 FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset; 219 Value *DynamicShadowOffset = nullptr; 220 }; 221 222 class ModuleMemProfiler { 223 public: 224 ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } 225 226 bool instrumentModule(Module &); 227 228 private: 229 Triple TargetTriple; 230 ShadowMapping Mapping; 231 Function *MemProfCtorFunction = nullptr; 232 }; 233 234 } // end anonymous namespace 235 236 MemProfilerPass::MemProfilerPass() = default; 237 238 PreservedAnalyses MemProfilerPass::run(Function &F, 239 AnalysisManager<Function> &AM) { 240 Module &M = *F.getParent(); 241 MemProfiler Profiler(M); 242 if (Profiler.instrumentFunction(F)) 243 return PreservedAnalyses::none(); 244 return PreservedAnalyses::all(); 245 } 246 247 ModuleMemProfilerPass::ModuleMemProfilerPass() = default; 248 249 PreservedAnalyses ModuleMemProfilerPass::run(Module &M, 250 AnalysisManager<Module> &AM) { 251 ModuleMemProfiler Profiler(M); 252 if (Profiler.instrumentModule(M)) 253 return PreservedAnalyses::none(); 254 return PreservedAnalyses::all(); 255 } 256 257 Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { 258 // (Shadow & mask) >> scale 259 Shadow = IRB.CreateAnd(Shadow, Mapping.Mask); 260 Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); 261 // (Shadow >> scale) | offset 262 assert(DynamicShadowOffset); 263 return IRB.CreateAdd(Shadow, DynamicShadowOffset); 264 } 265 266 // Instrument memset/memmove/memcpy 267 void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { 268 IRBuilder<> IRB(MI); 269 if (isa<MemTransferInst>(MI)) { 270 IRB.CreateCall( 271 isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy, 272 {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), 273 IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()), 274 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 275 } else if (isa<MemSetInst>(MI)) { 276 IRB.CreateCall( 277 MemProfMemset, 278 {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), 279 IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), 280 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); 281 } 282 MI->eraseFromParent(); 283 } 284 285 std::optional<InterestingMemoryAccess> 286 MemProfiler::isInterestingMemoryAccess(Instruction *I) const { 287 // Do not instrument the load fetching the dynamic shadow address. 288 if (DynamicShadowOffset == I) 289 return std::nullopt; 290 291 InterestingMemoryAccess Access; 292 293 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 294 if (!ClInstrumentReads) 295 return std::nullopt; 296 Access.IsWrite = false; 297 Access.AccessTy = LI->getType(); 298 Access.Addr = LI->getPointerOperand(); 299 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 300 if (!ClInstrumentWrites) 301 return std::nullopt; 302 Access.IsWrite = true; 303 Access.AccessTy = SI->getValueOperand()->getType(); 304 Access.Addr = SI->getPointerOperand(); 305 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) { 306 if (!ClInstrumentAtomics) 307 return std::nullopt; 308 Access.IsWrite = true; 309 Access.AccessTy = RMW->getValOperand()->getType(); 310 Access.Addr = RMW->getPointerOperand(); 311 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) { 312 if (!ClInstrumentAtomics) 313 return std::nullopt; 314 Access.IsWrite = true; 315 Access.AccessTy = XCHG->getCompareOperand()->getType(); 316 Access.Addr = XCHG->getPointerOperand(); 317 } else if (auto *CI = dyn_cast<CallInst>(I)) { 318 auto *F = CI->getCalledFunction(); 319 if (F && (F->getIntrinsicID() == Intrinsic::masked_load || 320 F->getIntrinsicID() == Intrinsic::masked_store)) { 321 unsigned OpOffset = 0; 322 if (F->getIntrinsicID() == Intrinsic::masked_store) { 323 if (!ClInstrumentWrites) 324 return std::nullopt; 325 // Masked store has an initial operand for the value. 326 OpOffset = 1; 327 Access.AccessTy = CI->getArgOperand(0)->getType(); 328 Access.IsWrite = true; 329 } else { 330 if (!ClInstrumentReads) 331 return std::nullopt; 332 Access.AccessTy = CI->getType(); 333 Access.IsWrite = false; 334 } 335 336 auto *BasePtr = CI->getOperand(0 + OpOffset); 337 Access.MaybeMask = CI->getOperand(2 + OpOffset); 338 Access.Addr = BasePtr; 339 } 340 } 341 342 if (!Access.Addr) 343 return std::nullopt; 344 345 // Do not instrument accesses from different address spaces; we cannot deal 346 // with them. 347 Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType()); 348 if (PtrTy->getPointerAddressSpace() != 0) 349 return std::nullopt; 350 351 // Ignore swifterror addresses. 352 // swifterror memory addresses are mem2reg promoted by instruction 353 // selection. As such they cannot have regular uses like an instrumentation 354 // function and it makes no sense to track them as memory. 355 if (Access.Addr->isSwiftError()) 356 return std::nullopt; 357 358 // Peel off GEPs and BitCasts. 359 auto *Addr = Access.Addr->stripInBoundsOffsets(); 360 361 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { 362 // Do not instrument PGO counter updates. 363 if (GV->hasSection()) { 364 StringRef SectionName = GV->getSection(); 365 // Check if the global is in the PGO counters section. 366 auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat(); 367 if (SectionName.endswith( 368 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false))) 369 return std::nullopt; 370 } 371 372 // Do not instrument accesses to LLVM internal variables. 373 if (GV->getName().startswith("__llvm")) 374 return std::nullopt; 375 } 376 377 const DataLayout &DL = I->getModule()->getDataLayout(); 378 Access.TypeSize = DL.getTypeStoreSizeInBits(Access.AccessTy); 379 return Access; 380 } 381 382 void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, 383 Instruction *I, Value *Addr, 384 Type *AccessTy, bool IsWrite) { 385 auto *VTy = cast<FixedVectorType>(AccessTy); 386 uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); 387 unsigned Num = VTy->getNumElements(); 388 auto *Zero = ConstantInt::get(IntptrTy, 0); 389 for (unsigned Idx = 0; Idx < Num; ++Idx) { 390 Value *InstrumentedAddress = nullptr; 391 Instruction *InsertBefore = I; 392 if (auto *Vector = dyn_cast<ConstantVector>(Mask)) { 393 // dyn_cast as we might get UndefValue 394 if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) { 395 if (Masked->isZero()) 396 // Mask is constant false, so no instrumentation needed. 397 continue; 398 // If we have a true or undef value, fall through to instrumentAddress. 399 // with InsertBefore == I 400 } 401 } else { 402 IRBuilder<> IRB(I); 403 Value *MaskElem = IRB.CreateExtractElement(Mask, Idx); 404 Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false); 405 InsertBefore = ThenTerm; 406 } 407 408 IRBuilder<> IRB(InsertBefore); 409 InstrumentedAddress = 410 IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); 411 instrumentAddress(I, InsertBefore, InstrumentedAddress, ElemTypeSize, 412 IsWrite); 413 } 414 } 415 416 void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL, 417 InterestingMemoryAccess &Access) { 418 // Skip instrumentation of stack accesses unless requested. 419 if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) { 420 if (Access.IsWrite) 421 ++NumSkippedStackWrites; 422 else 423 ++NumSkippedStackReads; 424 return; 425 } 426 427 if (Access.IsWrite) 428 NumInstrumentedWrites++; 429 else 430 NumInstrumentedReads++; 431 432 if (Access.MaybeMask) { 433 instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr, 434 Access.AccessTy, Access.IsWrite); 435 } else { 436 // Since the access counts will be accumulated across the entire allocation, 437 // we only update the shadow access count for the first location and thus 438 // don't need to worry about alignment and type size. 439 instrumentAddress(I, I, Access.Addr, Access.TypeSize, Access.IsWrite); 440 } 441 } 442 443 void MemProfiler::instrumentAddress(Instruction *OrigIns, 444 Instruction *InsertBefore, Value *Addr, 445 uint32_t TypeSize, bool IsWrite) { 446 IRBuilder<> IRB(InsertBefore); 447 Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); 448 449 if (ClUseCalls) { 450 IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong); 451 return; 452 } 453 454 // Create an inline sequence to compute shadow location, and increment the 455 // value by one. 456 Type *ShadowTy = Type::getInt64Ty(*C); 457 Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); 458 Value *ShadowPtr = memToShadow(AddrLong, IRB); 459 Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy); 460 Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr); 461 Value *Inc = ConstantInt::get(Type::getInt64Ty(*C), 1); 462 ShadowValue = IRB.CreateAdd(ShadowValue, Inc); 463 IRB.CreateStore(ShadowValue, ShadowAddr); 464 } 465 466 // Create the variable for the profile file name. 467 void createProfileFileNameVar(Module &M) { 468 const MDString *MemProfFilename = 469 dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename")); 470 if (!MemProfFilename) 471 return; 472 assert(!MemProfFilename->getString().empty() && 473 "Unexpected MemProfProfileFilename metadata with empty string"); 474 Constant *ProfileNameConst = ConstantDataArray::getString( 475 M.getContext(), MemProfFilename->getString(), true); 476 GlobalVariable *ProfileNameVar = new GlobalVariable( 477 M, ProfileNameConst->getType(), /*isConstant=*/true, 478 GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar); 479 Triple TT(M.getTargetTriple()); 480 if (TT.supportsCOMDAT()) { 481 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); 482 ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar)); 483 } 484 } 485 486 bool ModuleMemProfiler::instrumentModule(Module &M) { 487 // Create a module constructor. 488 std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION); 489 std::string VersionCheckName = 490 ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion) 491 : ""; 492 std::tie(MemProfCtorFunction, std::ignore) = 493 createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName, 494 MemProfInitName, /*InitArgTypes=*/{}, 495 /*InitArgs=*/{}, VersionCheckName); 496 497 const uint64_t Priority = getCtorAndDtorPriority(TargetTriple); 498 appendToGlobalCtors(M, MemProfCtorFunction, Priority); 499 500 createProfileFileNameVar(M); 501 502 return true; 503 } 504 505 void MemProfiler::initializeCallbacks(Module &M) { 506 IRBuilder<> IRB(*C); 507 508 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { 509 const std::string TypeStr = AccessIsWrite ? "store" : "load"; 510 511 SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy}; 512 SmallVector<Type *, 2> Args1{1, IntptrTy}; 513 MemProfMemoryAccessCallbackSized[AccessIsWrite] = 514 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr + "N", 515 FunctionType::get(IRB.getVoidTy(), Args2, false)); 516 517 MemProfMemoryAccessCallback[AccessIsWrite] = 518 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr, 519 FunctionType::get(IRB.getVoidTy(), Args1, false)); 520 } 521 MemProfMemmove = M.getOrInsertFunction( 522 ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(), 523 IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy); 524 MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy", 525 IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 526 IRB.getInt8PtrTy(), IntptrTy); 527 MemProfMemset = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", 528 IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), 529 IRB.getInt32Ty(), IntptrTy); 530 } 531 532 bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) { 533 // For each NSObject descendant having a +load method, this method is invoked 534 // by the ObjC runtime before any of the static constructors is called. 535 // Therefore we need to instrument such methods with a call to __memprof_init 536 // at the beginning in order to initialize our runtime before any access to 537 // the shadow memory. 538 // We cannot just ignore these methods, because they may call other 539 // instrumented functions. 540 if (F.getName().find(" load]") != std::string::npos) { 541 FunctionCallee MemProfInitFunction = 542 declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {}); 543 IRBuilder<> IRB(&F.front(), F.front().begin()); 544 IRB.CreateCall(MemProfInitFunction, {}); 545 return true; 546 } 547 return false; 548 } 549 550 bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { 551 IRBuilder<> IRB(&F.front().front()); 552 Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( 553 MemProfShadowMemoryDynamicAddress, IntptrTy); 554 if (F.getParent()->getPICLevel() == PICLevel::NotPIC) 555 cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true); 556 DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress); 557 return true; 558 } 559 560 bool MemProfiler::instrumentFunction(Function &F) { 561 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) 562 return false; 563 if (ClDebugFunc == F.getName()) 564 return false; 565 if (F.getName().startswith("__memprof_")) 566 return false; 567 568 bool FunctionModified = false; 569 570 // If needed, insert __memprof_init. 571 // This function needs to be called even if the function body is not 572 // instrumented. 573 if (maybeInsertMemProfInitAtFunctionEntry(F)) 574 FunctionModified = true; 575 576 LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n"); 577 578 initializeCallbacks(*F.getParent()); 579 580 SmallVector<Instruction *, 16> ToInstrument; 581 582 // Fill the set of memory operations to instrument. 583 for (auto &BB : F) { 584 for (auto &Inst : BB) { 585 if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst)) 586 ToInstrument.push_back(&Inst); 587 } 588 } 589 590 if (ToInstrument.empty()) { 591 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified 592 << " " << F << "\n"); 593 594 return FunctionModified; 595 } 596 597 FunctionModified |= insertDynamicShadowAtFunctionEntry(F); 598 599 int NumInstrumented = 0; 600 for (auto *Inst : ToInstrument) { 601 if (ClDebugMin < 0 || ClDebugMax < 0 || 602 (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { 603 std::optional<InterestingMemoryAccess> Access = 604 isInterestingMemoryAccess(Inst); 605 if (Access) 606 instrumentMop(Inst, F.getParent()->getDataLayout(), *Access); 607 else 608 instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); 609 } 610 NumInstrumented++; 611 } 612 613 if (NumInstrumented > 0) 614 FunctionModified = true; 615 616 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " " 617 << F << "\n"); 618 619 return FunctionModified; 620 } 621 622 static void addCallsiteMetadata(Instruction &I, 623 std::vector<uint64_t> &InlinedCallStack, 624 LLVMContext &Ctx) { 625 I.setMetadata(LLVMContext::MD_callsite, 626 buildCallstackMetadata(InlinedCallStack, Ctx)); 627 } 628 629 static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset, 630 uint32_t Column) { 631 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little> 632 HashBuilder; 633 HashBuilder.add(Function, LineOffset, Column); 634 llvm::BLAKE3Result<8> Hash = HashBuilder.final(); 635 uint64_t Id; 636 std::memcpy(&Id, Hash.data(), sizeof(Hash)); 637 return Id; 638 } 639 640 static uint64_t computeStackId(const memprof::Frame &Frame) { 641 return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column); 642 } 643 644 static void addCallStack(CallStackTrie &AllocTrie, 645 const AllocationInfo *AllocInfo) { 646 SmallVector<uint64_t> StackIds; 647 for (const auto &StackFrame : AllocInfo->CallStack) 648 StackIds.push_back(computeStackId(StackFrame)); 649 auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(), 650 AllocInfo->Info.getAllocCount(), 651 AllocInfo->Info.getTotalLifetime()); 652 AllocTrie.addCallStack(AllocType, StackIds); 653 } 654 655 // Helper to compare the InlinedCallStack computed from an instruction's debug 656 // info to a list of Frames from profile data (either the allocation data or a 657 // callsite). For callsites, the StartIndex to use in the Frame array may be 658 // non-zero. 659 static bool 660 stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack, 661 ArrayRef<uint64_t> InlinedCallStack, 662 unsigned StartIndex = 0) { 663 auto StackFrame = ProfileCallStack.begin() + StartIndex; 664 auto InlCallStackIter = InlinedCallStack.begin(); 665 for (; StackFrame != ProfileCallStack.end() && 666 InlCallStackIter != InlinedCallStack.end(); 667 ++StackFrame, ++InlCallStackIter) { 668 uint64_t StackId = computeStackId(*StackFrame); 669 if (StackId != *InlCallStackIter) 670 return false; 671 } 672 // Return true if we found and matched all stack ids from the call 673 // instruction. 674 return InlCallStackIter == InlinedCallStack.end(); 675 } 676 677 static void readMemprof(Module &M, Function &F, 678 IndexedInstrProfReader *MemProfReader, 679 const TargetLibraryInfo &TLI) { 680 auto &Ctx = M.getContext(); 681 682 auto FuncName = getPGOFuncName(F); 683 auto FuncGUID = Function::getGUID(FuncName); 684 Expected<memprof::MemProfRecord> MemProfResult = 685 MemProfReader->getMemProfRecord(FuncGUID); 686 if (Error E = MemProfResult.takeError()) { 687 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { 688 auto Err = IPE.get(); 689 bool SkipWarning = false; 690 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName 691 << ": "); 692 if (Err == instrprof_error::unknown_function) { 693 NumOfMemProfMissing++; 694 SkipWarning = !PGOWarnMissing; 695 LLVM_DEBUG(dbgs() << "unknown function"); 696 } else if (Err == instrprof_error::hash_mismatch) { 697 SkipWarning = 698 NoPGOWarnMismatch || 699 (NoPGOWarnMismatchComdatWeak && 700 (F.hasComdat() || 701 F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); 702 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); 703 } 704 705 if (SkipWarning) 706 return; 707 708 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() + 709 Twine(" Hash = ") + std::to_string(FuncGUID)) 710 .str(); 711 712 Ctx.diagnose( 713 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning)); 714 }); 715 return; 716 } 717 718 // Build maps of the location hash to all profile data with that leaf location 719 // (allocation info and the callsites). 720 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo; 721 // For the callsites we need to record the index of the associated frame in 722 // the frame array (see comments below where the map entries are added). 723 std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>> 724 LocHashToCallSites; 725 const auto MemProfRec = std::move(MemProfResult.get()); 726 for (auto &AI : MemProfRec.AllocSites) { 727 // Associate the allocation info with the leaf frame. The later matching 728 // code will match any inlined call sequences in the IR with a longer prefix 729 // of call stack frames. 730 uint64_t StackId = computeStackId(AI.CallStack[0]); 731 LocHashToAllocInfo[StackId].insert(&AI); 732 } 733 for (auto &CS : MemProfRec.CallSites) { 734 // Need to record all frames from leaf up to and including this function, 735 // as any of these may or may not have been inlined at this point. 736 unsigned Idx = 0; 737 for (auto &StackFrame : CS) { 738 uint64_t StackId = computeStackId(StackFrame); 739 LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++)); 740 // Once we find this function, we can stop recording. 741 if (StackFrame.Function == FuncGUID) 742 break; 743 } 744 assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID); 745 } 746 747 auto GetOffset = [](const DILocation *DIL) { 748 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & 749 0xffff; 750 }; 751 752 // Now walk the instructions, looking up the associated profile data using 753 // dbug locations. 754 for (auto &BB : F) { 755 for (auto &I : BB) { 756 if (I.isDebugOrPseudoInst()) 757 continue; 758 // We are only interested in calls (allocation or interior call stack 759 // context calls). 760 auto *CI = dyn_cast<CallBase>(&I); 761 if (!CI) 762 continue; 763 auto *CalledFunction = CI->getCalledFunction(); 764 if (CalledFunction && CalledFunction->isIntrinsic()) 765 continue; 766 // List of call stack ids computed from the location hashes on debug 767 // locations (leaf to inlined at root). 768 std::vector<uint64_t> InlinedCallStack; 769 // Was the leaf location found in one of the profile maps? 770 bool LeafFound = false; 771 // If leaf was found in a map, iterators pointing to its location in both 772 // of the maps. It might exist in neither, one, or both (the latter case 773 // can happen because we don't currently have discriminators to 774 // distinguish the case when a single line/col maps to both an allocation 775 // and another callsite). 776 std::map<uint64_t, std::set<const AllocationInfo *>>::iterator 777 AllocInfoIter; 778 std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, 779 unsigned>>>::iterator CallSitesIter; 780 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr; 781 DIL = DIL->getInlinedAt()) { 782 // Use C++ linkage name if possible. Need to compile with 783 // -fdebug-info-for-profiling to get linkage name. 784 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName(); 785 if (Name.empty()) 786 Name = DIL->getScope()->getSubprogram()->getName(); 787 auto CalleeGUID = Function::getGUID(Name); 788 auto StackId = 789 computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn()); 790 // LeafFound will only be false on the first iteration, since we either 791 // set it true or break out of the loop below. 792 if (!LeafFound) { 793 AllocInfoIter = LocHashToAllocInfo.find(StackId); 794 CallSitesIter = LocHashToCallSites.find(StackId); 795 // Check if the leaf is in one of the maps. If not, no need to look 796 // further at this call. 797 if (AllocInfoIter == LocHashToAllocInfo.end() && 798 CallSitesIter == LocHashToCallSites.end()) 799 break; 800 LeafFound = true; 801 } 802 InlinedCallStack.push_back(StackId); 803 } 804 // If leaf not in either of the maps, skip inst. 805 if (!LeafFound) 806 continue; 807 808 // First add !memprof metadata from allocation info, if we found the 809 // instruction's leaf location in that map, and if the rest of the 810 // instruction's locations match the prefix Frame locations on an 811 // allocation context with the same leaf. 812 if (AllocInfoIter != LocHashToAllocInfo.end()) { 813 // Only consider allocations via new, to reduce unnecessary metadata, 814 // since those are the only allocations that will be targeted initially. 815 if (!isNewLikeFn(CI, &TLI)) 816 continue; 817 // We may match this instruction's location list to multiple MIB 818 // contexts. Add them to a Trie specialized for trimming the contexts to 819 // the minimal needed to disambiguate contexts with unique behavior. 820 CallStackTrie AllocTrie; 821 for (auto *AllocInfo : AllocInfoIter->second) { 822 // Check the full inlined call stack against this one. 823 // If we found and thus matched all frames on the call, include 824 // this MIB. 825 if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack, 826 InlinedCallStack)) 827 addCallStack(AllocTrie, AllocInfo); 828 } 829 // We might not have matched any to the full inlined call stack. 830 // But if we did, create and attach metadata, or a function attribute if 831 // all contexts have identical profiled behavior. 832 if (!AllocTrie.empty()) { 833 // MemprofMDAttached will be false if a function attribute was 834 // attached. 835 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI); 836 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof)); 837 if (MemprofMDAttached) { 838 // Add callsite metadata for the instruction's location list so that 839 // it simpler later on to identify which part of the MIB contexts 840 // are from this particular instruction (including during inlining, 841 // when the callsite metdata will be updated appropriately). 842 // FIXME: can this be changed to strip out the matching stack 843 // context ids from the MIB contexts and not add any callsite 844 // metadata here to save space? 845 addCallsiteMetadata(I, InlinedCallStack, Ctx); 846 } 847 } 848 continue; 849 } 850 851 // Otherwise, add callsite metadata. If we reach here then we found the 852 // instruction's leaf location in the callsites map and not the allocation 853 // map. 854 assert(CallSitesIter != LocHashToCallSites.end()); 855 for (auto CallStackIdx : CallSitesIter->second) { 856 // If we found and thus matched all frames on the call, create and 857 // attach call stack metadata. 858 if (stackFrameIncludesInlinedCallStack( 859 *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) { 860 addCallsiteMetadata(I, InlinedCallStack, Ctx); 861 // Only need to find one with a matching call stack and add a single 862 // callsite metadata. 863 break; 864 } 865 } 866 } 867 } 868 } 869 870 MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile, 871 IntrusiveRefCntPtr<vfs::FileSystem> FS) 872 : MemoryProfileFileName(MemoryProfileFile), FS(FS) { 873 if (!FS) 874 this->FS = vfs::getRealFileSystem(); 875 } 876 877 PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { 878 LLVM_DEBUG(dbgs() << "Read in memory profile:"); 879 auto &Ctx = M.getContext(); 880 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS); 881 if (Error E = ReaderOrErr.takeError()) { 882 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { 883 Ctx.diagnose( 884 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message())); 885 }); 886 return PreservedAnalyses::all(); 887 } 888 889 std::unique_ptr<IndexedInstrProfReader> MemProfReader = 890 std::move(ReaderOrErr.get()); 891 if (!MemProfReader) { 892 Ctx.diagnose(DiagnosticInfoPGOProfile( 893 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader"))); 894 return PreservedAnalyses::all(); 895 } 896 897 if (!MemProfReader->hasMemoryProfile()) { 898 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), 899 "Not a memory profile")); 900 return PreservedAnalyses::all(); 901 } 902 903 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 904 905 for (auto &F : M) { 906 if (F.isDeclaration()) 907 continue; 908 909 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 910 readMemprof(M, F, MemProfReader.get(), TLI); 911 } 912 913 return PreservedAnalyses::none(); 914 } 915