1 //===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains routines that help determine which pointers are captured. 10 // A pointer value is captured if the function makes a copy of any part of the 11 // pointer that outlives the call. Not being captured means, more or less, that 12 // the pointer is only dereferenced and not stored in a global. Returning part 13 // of the pointer as the function return value may or may not count as capturing 14 // the pointer, depending on the context. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Analysis/CaptureTracking.h" 19 #include "llvm/ADT/SmallSet.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/Analysis/AliasAnalysis.h" 23 #include "llvm/Analysis/CFG.h" 24 #include "llvm/Analysis/ValueTracking.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Instructions.h" 28 #include "llvm/IR/IntrinsicInst.h" 29 #include "llvm/Support/CommandLine.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "capture-tracking" 34 35 STATISTIC(NumCaptured, "Number of pointers maybe captured"); 36 STATISTIC(NumNotCaptured, "Number of pointers not captured"); 37 STATISTIC(NumCapturedBefore, "Number of pointers maybe captured before"); 38 STATISTIC(NumNotCapturedBefore, "Number of pointers not captured before"); 39 40 /// The default value for MaxUsesToExplore argument. It's relatively small to 41 /// keep the cost of analysis reasonable for clients like BasicAliasAnalysis, 42 /// where the results can't be cached. 43 /// TODO: we should probably introduce a caching CaptureTracking analysis and 44 /// use it where possible. The caching version can use much higher limit or 45 /// don't have this cap at all. 46 static cl::opt<unsigned> 47 DefaultMaxUsesToExplore("capture-tracking-max-uses-to-explore", cl::Hidden, 48 cl::desc("Maximal number of uses to explore."), 49 cl::init(100)); 50 51 unsigned llvm::getDefaultMaxUsesToExploreForCaptureTracking() { 52 return DefaultMaxUsesToExplore; 53 } 54 55 CaptureTracker::~CaptureTracker() = default; 56 57 bool CaptureTracker::shouldExplore(const Use *U) { return true; } 58 59 namespace { 60 struct SimpleCaptureTracker : public CaptureTracker { 61 explicit SimpleCaptureTracker(bool ReturnCaptures, CaptureComponents Mask, 62 function_ref<bool(CaptureComponents)> StopFn) 63 : ReturnCaptures(ReturnCaptures), Mask(Mask), StopFn(StopFn) {} 64 65 void tooManyUses() override { 66 LLVM_DEBUG(dbgs() << "Captured due to too many uses\n"); 67 CC = Mask; 68 } 69 70 Action captured(const Use *U, UseCaptureInfo CI) override { 71 if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures) 72 return ContinueIgnoringReturn; 73 74 if (capturesNothing(CI.UseCC & Mask)) 75 return Continue; 76 77 LLVM_DEBUG(dbgs() << "Captured by: " << *U->getUser() << "\n"); 78 CC |= CI.UseCC & Mask; 79 return StopFn(CC) ? Stop : Continue; 80 } 81 82 bool ReturnCaptures; 83 CaptureComponents Mask; 84 function_ref<bool(CaptureComponents)> StopFn; 85 86 CaptureComponents CC = CaptureComponents::None; 87 }; 88 89 /// Only find pointer captures which happen before the given instruction. Uses 90 /// the dominator tree to determine whether one instruction is before another. 91 /// Only support the case where the Value is defined in the same basic block 92 /// as the given instruction and the use. 93 struct CapturesBefore : public CaptureTracker { 94 95 CapturesBefore(bool ReturnCaptures, const Instruction *I, 96 const DominatorTree *DT, bool IncludeI, const LoopInfo *LI, 97 CaptureComponents Mask, 98 function_ref<bool(CaptureComponents)> StopFn) 99 : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures), 100 IncludeI(IncludeI), LI(LI), Mask(Mask), StopFn(StopFn) {} 101 102 void tooManyUses() override { CC = Mask; } 103 104 bool isSafeToPrune(Instruction *I) { 105 if (BeforeHere == I) 106 return !IncludeI; 107 108 // We explore this usage only if the usage can reach "BeforeHere". 109 // If use is not reachable from entry, there is no need to explore. 110 if (!DT->isReachableFromEntry(I->getParent())) 111 return true; 112 113 // Check whether there is a path from I to BeforeHere. 114 return !isPotentiallyReachable(I, BeforeHere, nullptr, DT, LI); 115 } 116 117 Action captured(const Use *U, UseCaptureInfo CI) override { 118 Instruction *I = cast<Instruction>(U->getUser()); 119 if (isa<ReturnInst>(I) && !ReturnCaptures) 120 return ContinueIgnoringReturn; 121 122 // Check isSafeToPrune() here rather than in shouldExplore() to avoid 123 // an expensive reachability query for every instruction we look at. 124 // Instead we only do one for actual capturing candidates. 125 if (isSafeToPrune(I)) 126 // If the use is not reachable, the instruction result isn't either. 127 return ContinueIgnoringReturn; 128 129 if (capturesNothing(CI.UseCC & Mask)) 130 return Continue; 131 132 CC |= CI.UseCC & Mask; 133 return StopFn(CC) ? Stop : Continue; 134 } 135 136 const Instruction *BeforeHere; 137 const DominatorTree *DT; 138 139 bool ReturnCaptures; 140 bool IncludeI; 141 142 CaptureComponents CC = CaptureComponents::None; 143 144 const LoopInfo *LI; 145 CaptureComponents Mask; 146 function_ref<bool(CaptureComponents)> StopFn; 147 }; 148 149 /// Find the 'earliest' instruction before which the pointer is known not to 150 /// be captured. Here an instruction A is considered earlier than instruction 151 /// B, if A dominates B. If 2 escapes do not dominate each other, the 152 /// terminator of the common dominator is chosen. If not all uses cannot be 153 /// analyzed, the earliest escape is set to the first instruction in the 154 /// function entry block. 155 // NOTE: Users have to make sure instructions compared against the earliest 156 // escape are not in a cycle. 157 struct EarliestCaptures : public CaptureTracker { 158 159 EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT, 160 CaptureComponents Mask) 161 : DT(DT), ReturnCaptures(ReturnCaptures), F(F), Mask(Mask) {} 162 163 void tooManyUses() override { 164 CC = Mask; 165 EarliestCapture = &*F.getEntryBlock().begin(); 166 } 167 168 Action captured(const Use *U, UseCaptureInfo CI) override { 169 Instruction *I = cast<Instruction>(U->getUser()); 170 if (isa<ReturnInst>(I) && !ReturnCaptures) 171 return ContinueIgnoringReturn; 172 173 if (capturesAnything(CI.UseCC & Mask)) { 174 if (!EarliestCapture) 175 EarliestCapture = I; 176 else 177 EarliestCapture = DT.findNearestCommonDominator(EarliestCapture, I); 178 CC |= CI.UseCC & Mask; 179 } 180 181 // Continue analysis, as we need to see all potential captures. 182 return Continue; 183 } 184 185 const DominatorTree &DT; 186 bool ReturnCaptures; 187 Function &F; 188 CaptureComponents Mask; 189 190 Instruction *EarliestCapture = nullptr; 191 CaptureComponents CC = CaptureComponents::None; 192 }; 193 } // namespace 194 195 CaptureComponents llvm::PointerMayBeCaptured( 196 const Value *V, bool ReturnCaptures, CaptureComponents Mask, 197 function_ref<bool(CaptureComponents)> StopFn, unsigned MaxUsesToExplore) { 198 assert(!isa<GlobalValue>(V) && 199 "It doesn't make sense to ask whether a global is captured."); 200 201 LLVM_DEBUG(dbgs() << "Captured?: " << *V << " = "); 202 203 SimpleCaptureTracker SCT(ReturnCaptures, Mask, StopFn); 204 PointerMayBeCaptured(V, &SCT, MaxUsesToExplore); 205 if (capturesAnything(SCT.CC)) 206 ++NumCaptured; 207 else { 208 ++NumNotCaptured; 209 LLVM_DEBUG(dbgs() << "not captured\n"); 210 } 211 return SCT.CC; 212 } 213 214 bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, 215 unsigned MaxUsesToExplore) { 216 return capturesAnything( 217 PointerMayBeCaptured(V, ReturnCaptures, CaptureComponents::All, 218 capturesAnything, MaxUsesToExplore)); 219 } 220 221 CaptureComponents llvm::PointerMayBeCapturedBefore( 222 const Value *V, bool ReturnCaptures, const Instruction *I, 223 const DominatorTree *DT, bool IncludeI, CaptureComponents Mask, 224 function_ref<bool(CaptureComponents)> StopFn, const LoopInfo *LI, 225 unsigned MaxUsesToExplore) { 226 assert(!isa<GlobalValue>(V) && 227 "It doesn't make sense to ask whether a global is captured."); 228 229 if (!DT) 230 return PointerMayBeCaptured(V, ReturnCaptures, Mask, StopFn, 231 MaxUsesToExplore); 232 233 CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, LI, Mask, StopFn); 234 PointerMayBeCaptured(V, &CB, MaxUsesToExplore); 235 if (capturesAnything(CB.CC)) 236 ++NumCapturedBefore; 237 else 238 ++NumNotCapturedBefore; 239 return CB.CC; 240 } 241 242 bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, 243 const Instruction *I, 244 const DominatorTree *DT, bool IncludeI, 245 unsigned MaxUsesToExplore, 246 const LoopInfo *LI) { 247 return capturesAnything(PointerMayBeCapturedBefore( 248 V, ReturnCaptures, I, DT, IncludeI, CaptureComponents::All, 249 capturesAnything, LI, MaxUsesToExplore)); 250 } 251 252 std::pair<Instruction *, CaptureComponents> 253 llvm::FindEarliestCapture(const Value *V, Function &F, bool ReturnCaptures, 254 const DominatorTree &DT, CaptureComponents Mask, 255 unsigned MaxUsesToExplore) { 256 assert(!isa<GlobalValue>(V) && 257 "It doesn't make sense to ask whether a global is captured."); 258 259 EarliestCaptures CB(ReturnCaptures, F, DT, Mask); 260 PointerMayBeCaptured(V, &CB, MaxUsesToExplore); 261 if (capturesAnything(CB.CC)) 262 ++NumCapturedBefore; 263 else 264 ++NumNotCapturedBefore; 265 return {CB.EarliestCapture, CB.CC}; 266 } 267 268 UseCaptureInfo llvm::DetermineUseCaptureKind(const Use &U, const Value *Base) { 269 Instruction *I = dyn_cast<Instruction>(U.getUser()); 270 271 // TODO: Investigate non-instruction uses. 272 if (!I) 273 return CaptureComponents::All; 274 275 switch (I->getOpcode()) { 276 case Instruction::Call: 277 case Instruction::Invoke: { 278 auto *Call = cast<CallBase>(I); 279 // Not captured if the callee is readonly, doesn't return a copy through 280 // its return value and doesn't unwind or diverge (a readonly function can 281 // leak bits by throwing an exception or not depending on the input value). 282 if (Call->onlyReadsMemory() && Call->doesNotThrow() && Call->willReturn() && 283 Call->getType()->isVoidTy()) 284 return CaptureComponents::None; 285 286 // The pointer is not captured if returned pointer is not captured. 287 // NOTE: CaptureTracking users should not assume that only functions 288 // marked with nocapture do not capture. This means that places like 289 // getUnderlyingObject in ValueTracking or DecomposeGEPExpression 290 // in BasicAA also need to know about this property. 291 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, true)) 292 return UseCaptureInfo::passthrough(); 293 294 // Volatile operations effectively capture the memory location that they 295 // load and store to. 296 if (auto *MI = dyn_cast<MemIntrinsic>(Call)) 297 if (MI->isVolatile()) 298 return CaptureComponents::All; 299 300 // Calling a function pointer does not in itself cause the pointer to 301 // be captured. This is a subtle point considering that (for example) 302 // the callee might return its own address. It is analogous to saying 303 // that loading a value from a pointer does not cause the pointer to be 304 // captured, even though the loaded value might be the pointer itself 305 // (think of self-referential objects). 306 if (Call->isCallee(&U)) 307 return CaptureComponents::None; 308 309 // Not captured if only passed via 'nocapture' arguments. 310 assert(Call->isDataOperand(&U) && "Non-callee must be data operand"); 311 CaptureInfo CI = Call->getCaptureInfo(Call->getDataOperandNo(&U)); 312 return UseCaptureInfo(CI.getOtherComponents(), CI.getRetComponents()); 313 } 314 case Instruction::Load: 315 // Volatile loads make the address observable. 316 if (cast<LoadInst>(I)->isVolatile()) 317 return CaptureComponents::All; 318 return CaptureComponents::None; 319 case Instruction::VAArg: 320 // "va-arg" from a pointer does not cause it to be captured. 321 return CaptureComponents::None; 322 case Instruction::Store: 323 // Stored the pointer - conservatively assume it may be captured. 324 // Volatile stores make the address observable. 325 if (U.getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile()) 326 return CaptureComponents::All; 327 return CaptureComponents::None; 328 case Instruction::AtomicRMW: { 329 // atomicrmw conceptually includes both a load and store from 330 // the same location. 331 // As with a store, the location being accessed is not captured, 332 // but the value being stored is. 333 // Volatile stores make the address observable. 334 auto *ARMWI = cast<AtomicRMWInst>(I); 335 if (U.getOperandNo() == 1 || ARMWI->isVolatile()) 336 return CaptureComponents::All; 337 return CaptureComponents::None; 338 } 339 case Instruction::AtomicCmpXchg: { 340 // cmpxchg conceptually includes both a load and store from 341 // the same location. 342 // As with a store, the location being accessed is not captured, 343 // but the value being stored is. 344 // Volatile stores make the address observable. 345 auto *ACXI = cast<AtomicCmpXchgInst>(I); 346 if (U.getOperandNo() == 1 || U.getOperandNo() == 2 || ACXI->isVolatile()) 347 return CaptureComponents::All; 348 return CaptureComponents::None; 349 } 350 case Instruction::GetElementPtr: 351 // AA does not support pointers of vectors, so GEP vector splats need to 352 // be considered as captures. 353 if (I->getType()->isVectorTy()) 354 return CaptureComponents::All; 355 return UseCaptureInfo::passthrough(); 356 case Instruction::BitCast: 357 case Instruction::PHI: 358 case Instruction::Select: 359 case Instruction::AddrSpaceCast: 360 // The original value is not captured via this if the new value isn't. 361 return UseCaptureInfo::passthrough(); 362 case Instruction::ICmp: { 363 unsigned Idx = U.getOperandNo(); 364 unsigned OtherIdx = 1 - Idx; 365 if (isa<ConstantPointerNull>(I->getOperand(OtherIdx)) && 366 cast<ICmpInst>(I)->isEquality()) { 367 // TODO(captures): Remove these special cases once we make use of 368 // captures(address_is_null). 369 370 // Don't count comparisons of a no-alias return value against null as 371 // captures. This allows us to ignore comparisons of malloc results 372 // with null, for example. 373 if (U->getType()->getPointerAddressSpace() == 0) 374 if (isNoAliasCall(U.get()->stripPointerCasts())) 375 return CaptureComponents::None; 376 377 // Check whether this is a comparison of the base pointer against 378 // null. 379 if (U.get() == Base) 380 return CaptureComponents::AddressIsNull; 381 } 382 383 // Otherwise, be conservative. There are crazy ways to capture pointers 384 // using comparisons. However, only the address is captured, not the 385 // provenance. 386 return CaptureComponents::Address; 387 } 388 default: 389 // Something else - be conservative and say it is captured. 390 return CaptureComponents::All; 391 } 392 } 393 394 void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, 395 unsigned MaxUsesToExplore) { 396 assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); 397 if (MaxUsesToExplore == 0) 398 MaxUsesToExplore = DefaultMaxUsesToExplore; 399 400 SmallVector<const Use *, 20> Worklist; 401 Worklist.reserve(getDefaultMaxUsesToExploreForCaptureTracking()); 402 SmallSet<const Use *, 20> Visited; 403 404 auto AddUses = [&](const Value *V) { 405 for (const Use &U : V->uses()) { 406 // If there are lots of uses, conservatively say that the value 407 // is captured to avoid taking too much compile time. 408 if (Visited.size() >= MaxUsesToExplore) { 409 Tracker->tooManyUses(); 410 return false; 411 } 412 if (!Visited.insert(&U).second) 413 continue; 414 if (!Tracker->shouldExplore(&U)) 415 continue; 416 Worklist.push_back(&U); 417 } 418 return true; 419 }; 420 if (!AddUses(V)) 421 return; 422 423 while (!Worklist.empty()) { 424 const Use *U = Worklist.pop_back_val(); 425 UseCaptureInfo CI = DetermineUseCaptureKind(*U, V); 426 if (capturesAnything(CI.UseCC)) { 427 switch (Tracker->captured(U, CI)) { 428 case CaptureTracker::Stop: 429 return; 430 case CaptureTracker::ContinueIgnoringReturn: 431 continue; 432 case CaptureTracker::Continue: 433 // Fall through to passthrough handling, but only if ResultCC contains 434 // additional components that UseCC does not. We assume that a 435 // capture at this point will be strictly more constraining than a 436 // later capture from following the return value. 437 if (capturesNothing(CI.ResultCC & ~CI.UseCC)) 438 continue; 439 break; 440 } 441 } 442 // TODO(captures): We could keep track of ResultCC for the users. 443 if (capturesAnything(CI.ResultCC) && !AddUses(U->getUser())) 444 return; 445 } 446 447 // All uses examined. 448 } 449