1 //===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains routines that help determine which pointers are captured. 10 // A pointer value is captured if the function makes a copy of any part of the 11 // pointer that outlives the call. Not being captured means, more or less, that 12 // the pointer is only dereferenced and not stored in a global. Returning part 13 // of the pointer as the function return value may or may not count as capturing 14 // the pointer, depending on the context. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Analysis/CaptureTracking.h" 19 #include "llvm/ADT/SmallSet.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/Analysis/AliasAnalysis.h" 23 #include "llvm/Analysis/CFG.h" 24 #include "llvm/Analysis/ValueTracking.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Instructions.h" 28 #include "llvm/IR/IntrinsicInst.h" 29 #include "llvm/Support/CommandLine.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "capture-tracking" 34 35 STATISTIC(NumCaptured, "Number of pointers maybe captured"); 36 STATISTIC(NumNotCaptured, "Number of pointers not captured"); 37 STATISTIC(NumCapturedBefore, "Number of pointers maybe captured before"); 38 STATISTIC(NumNotCapturedBefore, "Number of pointers not captured before"); 39 40 /// The default value for MaxUsesToExplore argument. It's relatively small to 41 /// keep the cost of analysis reasonable for clients like BasicAliasAnalysis, 42 /// where the results can't be cached. 43 /// TODO: we should probably introduce a caching CaptureTracking analysis and 44 /// use it where possible. The caching version can use much higher limit or 45 /// don't have this cap at all. 46 static cl::opt<unsigned> 47 DefaultMaxUsesToExplore("capture-tracking-max-uses-to-explore", cl::Hidden, 48 cl::desc("Maximal number of uses to explore."), 49 cl::init(100)); 50 51 unsigned llvm::getDefaultMaxUsesToExploreForCaptureTracking() { 52 return DefaultMaxUsesToExplore; 53 } 54 55 CaptureTracker::~CaptureTracker() = default; 56 57 bool CaptureTracker::shouldExplore(const Use *U) { return true; } 58 59 bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) { 60 // We want comparisons to null pointers to not be considered capturing, 61 // but need to guard against cases like gep(p, -ptrtoint(p2)) == null, 62 // which are equivalent to p == p2 and would capture the pointer. 63 // 64 // A dereferenceable pointer is a case where this is known to be safe, 65 // because the pointer resulting from such a construction would not be 66 // dereferenceable. 67 // 68 // It is not sufficient to check for inbounds GEP here, because GEP with 69 // zero offset is always inbounds. 70 bool CanBeNull, CanBeFreed; 71 return O->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); 72 } 73 74 namespace { 75 struct SimpleCaptureTracker : public CaptureTracker { 76 explicit SimpleCaptureTracker(bool ReturnCaptures) 77 : ReturnCaptures(ReturnCaptures) {} 78 79 void tooManyUses() override { 80 LLVM_DEBUG(dbgs() << "Captured due to too many uses\n"); 81 Captured = true; 82 } 83 84 bool captured(const Use *U) override { 85 if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures) 86 return false; 87 88 LLVM_DEBUG(dbgs() << "Captured by: " << *U->getUser() << "\n"); 89 90 Captured = true; 91 return true; 92 } 93 94 bool ReturnCaptures; 95 96 bool Captured = false; 97 }; 98 99 /// Only find pointer captures which happen before the given instruction. Uses 100 /// the dominator tree to determine whether one instruction is before another. 101 /// Only support the case where the Value is defined in the same basic block 102 /// as the given instruction and the use. 103 struct CapturesBefore : public CaptureTracker { 104 105 CapturesBefore(bool ReturnCaptures, const Instruction *I, 106 const DominatorTree *DT, bool IncludeI, const LoopInfo *LI) 107 : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures), 108 IncludeI(IncludeI), LI(LI) {} 109 110 void tooManyUses() override { Captured = true; } 111 112 bool isSafeToPrune(Instruction *I) { 113 if (BeforeHere == I) 114 return !IncludeI; 115 116 // We explore this usage only if the usage can reach "BeforeHere". 117 // If use is not reachable from entry, there is no need to explore. 118 if (!DT->isReachableFromEntry(I->getParent())) 119 return true; 120 121 // Check whether there is a path from I to BeforeHere. 122 return !isPotentiallyReachable(I, BeforeHere, nullptr, DT, LI); 123 } 124 125 bool captured(const Use *U) override { 126 Instruction *I = cast<Instruction>(U->getUser()); 127 if (isa<ReturnInst>(I) && !ReturnCaptures) 128 return false; 129 130 // Check isSafeToPrune() here rather than in shouldExplore() to avoid 131 // an expensive reachability query for every instruction we look at. 132 // Instead we only do one for actual capturing candidates. 133 if (isSafeToPrune(I)) 134 return false; 135 136 Captured = true; 137 return true; 138 } 139 140 const Instruction *BeforeHere; 141 const DominatorTree *DT; 142 143 bool ReturnCaptures; 144 bool IncludeI; 145 146 bool Captured = false; 147 148 const LoopInfo *LI; 149 }; 150 151 /// Find the 'earliest' instruction before which the pointer is known not to 152 /// be captured. Here an instruction A is considered earlier than instruction 153 /// B, if A dominates B. If 2 escapes do not dominate each other, the 154 /// terminator of the common dominator is chosen. If not all uses cannot be 155 /// analyzed, the earliest escape is set to the first instruction in the 156 /// function entry block. 157 // NOTE: Users have to make sure instructions compared against the earliest 158 // escape are not in a cycle. 159 struct EarliestCaptures : public CaptureTracker { 160 161 EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT) 162 : DT(DT), ReturnCaptures(ReturnCaptures), F(F) {} 163 164 void tooManyUses() override { 165 Captured = true; 166 EarliestCapture = &*F.getEntryBlock().begin(); 167 } 168 169 bool captured(const Use *U) override { 170 Instruction *I = cast<Instruction>(U->getUser()); 171 if (isa<ReturnInst>(I) && !ReturnCaptures) 172 return false; 173 174 if (!EarliestCapture) 175 EarliestCapture = I; 176 else 177 EarliestCapture = DT.findNearestCommonDominator(EarliestCapture, I); 178 Captured = true; 179 180 // Return false to continue analysis; we need to see all potential 181 // captures. 182 return false; 183 } 184 185 Instruction *EarliestCapture = nullptr; 186 187 const DominatorTree &DT; 188 189 bool ReturnCaptures; 190 191 bool Captured = false; 192 193 Function &F; 194 }; 195 } // namespace 196 197 /// PointerMayBeCaptured - Return true if this pointer value may be captured 198 /// by the enclosing function (which is required to exist). This routine can 199 /// be expensive, so consider caching the results. The boolean ReturnCaptures 200 /// specifies whether returning the value (or part of it) from the function 201 /// counts as capturing it or not. The boolean StoreCaptures specified whether 202 /// storing the value (or part of it) into memory anywhere automatically 203 /// counts as capturing it or not. 204 bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, 205 bool StoreCaptures, unsigned MaxUsesToExplore) { 206 assert(!isa<GlobalValue>(V) && 207 "It doesn't make sense to ask whether a global is captured."); 208 209 // TODO: If StoreCaptures is not true, we could do Fancy analysis 210 // to determine whether this store is not actually an escape point. 211 // In that case, BasicAliasAnalysis should be updated as well to 212 // take advantage of this. 213 (void)StoreCaptures; 214 215 LLVM_DEBUG(dbgs() << "Captured?: " << *V << " = "); 216 217 SimpleCaptureTracker SCT(ReturnCaptures); 218 PointerMayBeCaptured(V, &SCT, MaxUsesToExplore); 219 if (SCT.Captured) 220 ++NumCaptured; 221 else { 222 ++NumNotCaptured; 223 LLVM_DEBUG(dbgs() << "not captured\n"); 224 } 225 return SCT.Captured; 226 } 227 228 /// PointerMayBeCapturedBefore - Return true if this pointer value may be 229 /// captured by the enclosing function (which is required to exist). If a 230 /// DominatorTree is provided, only captures which happen before the given 231 /// instruction are considered. This routine can be expensive, so consider 232 /// caching the results. The boolean ReturnCaptures specifies whether 233 /// returning the value (or part of it) from the function counts as capturing 234 /// it or not. The boolean StoreCaptures specified whether storing the value 235 /// (or part of it) into memory anywhere automatically counts as capturing it 236 /// or not. 237 bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, 238 bool StoreCaptures, const Instruction *I, 239 const DominatorTree *DT, bool IncludeI, 240 unsigned MaxUsesToExplore, 241 const LoopInfo *LI) { 242 assert(!isa<GlobalValue>(V) && 243 "It doesn't make sense to ask whether a global is captured."); 244 245 if (!DT) 246 return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures, 247 MaxUsesToExplore); 248 249 // TODO: See comment in PointerMayBeCaptured regarding what could be done 250 // with StoreCaptures. 251 252 CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, LI); 253 PointerMayBeCaptured(V, &CB, MaxUsesToExplore); 254 if (CB.Captured) 255 ++NumCapturedBefore; 256 else 257 ++NumNotCapturedBefore; 258 return CB.Captured; 259 } 260 261 Instruction *llvm::FindEarliestCapture(const Value *V, Function &F, 262 bool ReturnCaptures, bool StoreCaptures, 263 const DominatorTree &DT, 264 unsigned MaxUsesToExplore) { 265 assert(!isa<GlobalValue>(V) && 266 "It doesn't make sense to ask whether a global is captured."); 267 268 EarliestCaptures CB(ReturnCaptures, F, DT); 269 PointerMayBeCaptured(V, &CB, MaxUsesToExplore); 270 if (CB.Captured) 271 ++NumCapturedBefore; 272 else 273 ++NumNotCapturedBefore; 274 return CB.EarliestCapture; 275 } 276 277 UseCaptureKind llvm::DetermineUseCaptureKind( 278 const Use &U, 279 function_ref<bool(Value *, const DataLayout &)> IsDereferenceableOrNull) { 280 Instruction *I = dyn_cast<Instruction>(U.getUser()); 281 282 // TODO: Investigate non-instruction uses. 283 if (!I) 284 return UseCaptureKind::MAY_CAPTURE; 285 286 switch (I->getOpcode()) { 287 case Instruction::Call: 288 case Instruction::Invoke: { 289 auto *Call = cast<CallBase>(I); 290 // Not captured if the callee is readonly, doesn't return a copy through 291 // its return value and doesn't unwind (a readonly function can leak bits 292 // by throwing an exception or not depending on the input value). 293 if (Call->onlyReadsMemory() && Call->doesNotThrow() && 294 Call->getType()->isVoidTy()) 295 return UseCaptureKind::NO_CAPTURE; 296 297 // The pointer is not captured if returned pointer is not captured. 298 // NOTE: CaptureTracking users should not assume that only functions 299 // marked with nocapture do not capture. This means that places like 300 // getUnderlyingObject in ValueTracking or DecomposeGEPExpression 301 // in BasicAA also need to know about this property. 302 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, true)) 303 return UseCaptureKind::PASSTHROUGH; 304 305 // Volatile operations effectively capture the memory location that they 306 // load and store to. 307 if (auto *MI = dyn_cast<MemIntrinsic>(Call)) 308 if (MI->isVolatile()) 309 return UseCaptureKind::MAY_CAPTURE; 310 311 // Calling a function pointer does not in itself cause the pointer to 312 // be captured. This is a subtle point considering that (for example) 313 // the callee might return its own address. It is analogous to saying 314 // that loading a value from a pointer does not cause the pointer to be 315 // captured, even though the loaded value might be the pointer itself 316 // (think of self-referential objects). 317 if (Call->isCallee(&U)) 318 return UseCaptureKind::NO_CAPTURE; 319 320 // Not captured if only passed via 'nocapture' arguments. 321 if (Call->isDataOperand(&U) && 322 !Call->doesNotCapture(Call->getDataOperandNo(&U))) { 323 // The parameter is not marked 'nocapture' - captured. 324 return UseCaptureKind::MAY_CAPTURE; 325 } 326 return UseCaptureKind::NO_CAPTURE; 327 } 328 case Instruction::Load: 329 // Volatile loads make the address observable. 330 if (cast<LoadInst>(I)->isVolatile()) 331 return UseCaptureKind::MAY_CAPTURE; 332 return UseCaptureKind::NO_CAPTURE; 333 case Instruction::VAArg: 334 // "va-arg" from a pointer does not cause it to be captured. 335 return UseCaptureKind::NO_CAPTURE; 336 case Instruction::Store: 337 // Stored the pointer - conservatively assume it may be captured. 338 // Volatile stores make the address observable. 339 if (U.getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile()) 340 return UseCaptureKind::MAY_CAPTURE; 341 return UseCaptureKind::NO_CAPTURE; 342 case Instruction::AtomicRMW: { 343 // atomicrmw conceptually includes both a load and store from 344 // the same location. 345 // As with a store, the location being accessed is not captured, 346 // but the value being stored is. 347 // Volatile stores make the address observable. 348 auto *ARMWI = cast<AtomicRMWInst>(I); 349 if (U.getOperandNo() == 1 || ARMWI->isVolatile()) 350 return UseCaptureKind::MAY_CAPTURE; 351 return UseCaptureKind::NO_CAPTURE; 352 } 353 case Instruction::AtomicCmpXchg: { 354 // cmpxchg conceptually includes both a load and store from 355 // the same location. 356 // As with a store, the location being accessed is not captured, 357 // but the value being stored is. 358 // Volatile stores make the address observable. 359 auto *ACXI = cast<AtomicCmpXchgInst>(I); 360 if (U.getOperandNo() == 1 || U.getOperandNo() == 2 || ACXI->isVolatile()) 361 return UseCaptureKind::MAY_CAPTURE; 362 return UseCaptureKind::NO_CAPTURE; 363 } 364 case Instruction::GetElementPtr: 365 // AA does not support pointers of vectors, so GEP vector splats need to 366 // be considered as captures. 367 if (I->getType()->isVectorTy()) 368 return UseCaptureKind::MAY_CAPTURE; 369 return UseCaptureKind::PASSTHROUGH; 370 case Instruction::BitCast: 371 case Instruction::PHI: 372 case Instruction::Select: 373 case Instruction::AddrSpaceCast: 374 // The original value is not captured via this if the new value isn't. 375 return UseCaptureKind::PASSTHROUGH; 376 case Instruction::ICmp: { 377 unsigned Idx = U.getOperandNo(); 378 unsigned OtherIdx = 1 - Idx; 379 if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) { 380 // Don't count comparisons of a no-alias return value against null as 381 // captures. This allows us to ignore comparisons of malloc results 382 // with null, for example. 383 if (CPN->getType()->getAddressSpace() == 0) 384 if (isNoAliasCall(U.get()->stripPointerCasts())) 385 return UseCaptureKind::NO_CAPTURE; 386 if (!I->getFunction()->nullPointerIsDefined()) { 387 auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation(); 388 // Comparing a dereferenceable_or_null pointer against null cannot 389 // lead to pointer escapes, because if it is not null it must be a 390 // valid (in-bounds) pointer. 391 const DataLayout &DL = I->getDataLayout(); 392 if (IsDereferenceableOrNull && IsDereferenceableOrNull(O, DL)) 393 return UseCaptureKind::NO_CAPTURE; 394 } 395 } 396 397 // Otherwise, be conservative. There are crazy ways to capture pointers 398 // using comparisons. 399 return UseCaptureKind::MAY_CAPTURE; 400 } 401 default: 402 // Something else - be conservative and say it is captured. 403 return UseCaptureKind::MAY_CAPTURE; 404 } 405 } 406 407 void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, 408 unsigned MaxUsesToExplore) { 409 assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); 410 if (MaxUsesToExplore == 0) 411 MaxUsesToExplore = DefaultMaxUsesToExplore; 412 413 SmallVector<const Use *, 20> Worklist; 414 Worklist.reserve(getDefaultMaxUsesToExploreForCaptureTracking()); 415 SmallSet<const Use *, 20> Visited; 416 417 auto AddUses = [&](const Value *V) { 418 for (const Use &U : V->uses()) { 419 // If there are lots of uses, conservatively say that the value 420 // is captured to avoid taking too much compile time. 421 if (Visited.size() >= MaxUsesToExplore) { 422 Tracker->tooManyUses(); 423 return false; 424 } 425 if (!Visited.insert(&U).second) 426 continue; 427 if (!Tracker->shouldExplore(&U)) 428 continue; 429 Worklist.push_back(&U); 430 } 431 return true; 432 }; 433 if (!AddUses(V)) 434 return; 435 436 auto IsDereferenceableOrNull = [Tracker](Value *V, const DataLayout &DL) { 437 return Tracker->isDereferenceableOrNull(V, DL); 438 }; 439 while (!Worklist.empty()) { 440 const Use *U = Worklist.pop_back_val(); 441 switch (DetermineUseCaptureKind(*U, IsDereferenceableOrNull)) { 442 case UseCaptureKind::NO_CAPTURE: 443 continue; 444 case UseCaptureKind::MAY_CAPTURE: 445 if (Tracker->captured(U)) 446 return; 447 continue; 448 case UseCaptureKind::PASSTHROUGH: 449 if (!AddUses(U->getUser())) 450 return; 451 continue; 452 } 453 } 454 455 // All uses examined. 456 } 457 458 bool llvm::isNonEscapingLocalObject( 459 const Value *V, SmallDenseMap<const Value *, bool, 8> *IsCapturedCache) { 460 SmallDenseMap<const Value *, bool, 8>::iterator CacheIt; 461 if (IsCapturedCache) { 462 bool Inserted; 463 std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false}); 464 if (!Inserted) 465 // Found cached result, return it! 466 return CacheIt->second; 467 } 468 469 // If this is an identified function-local object, check to see if it escapes. 470 if (isIdentifiedFunctionLocal(V)) { 471 // Set StoreCaptures to True so that we can assume in our callers that the 472 // pointer is not the result of a load instruction. Currently 473 // PointerMayBeCaptured doesn't have any special analysis for the 474 // StoreCaptures=false case; if it did, our callers could be refined to be 475 // more precise. 476 auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); 477 if (IsCapturedCache) 478 CacheIt->second = Ret; 479 return Ret; 480 } 481 482 return false; 483 } 484