xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/CaptureTracking.cpp (revision f5f40dd63bc7acbb5312b26ac1ea1103c12352a6)
1 //===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains routines that help determine which pointers are captured.
10 // A pointer value is captured if the function makes a copy of any part of the
11 // pointer that outlives the call.  Not being captured means, more or less, that
12 // the pointer is only dereferenced and not stored in a global.  Returning part
13 // of the pointer as the function return value may or may not count as capturing
14 // the pointer, depending on the context.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/Analysis/CaptureTracking.h"
19 #include "llvm/ADT/SmallSet.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/Analysis/CFG.h"
24 #include "llvm/Analysis/ValueTracking.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/Support/CommandLine.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "capture-tracking"
34 
35 STATISTIC(NumCaptured,          "Number of pointers maybe captured");
36 STATISTIC(NumNotCaptured,       "Number of pointers not captured");
37 STATISTIC(NumCapturedBefore,    "Number of pointers maybe captured before");
38 STATISTIC(NumNotCapturedBefore, "Number of pointers not captured before");
39 
40 /// The default value for MaxUsesToExplore argument. It's relatively small to
41 /// keep the cost of analysis reasonable for clients like BasicAliasAnalysis,
42 /// where the results can't be cached.
43 /// TODO: we should probably introduce a caching CaptureTracking analysis and
44 /// use it where possible. The caching version can use much higher limit or
45 /// don't have this cap at all.
46 static cl::opt<unsigned>
47     DefaultMaxUsesToExplore("capture-tracking-max-uses-to-explore", cl::Hidden,
48                             cl::desc("Maximal number of uses to explore."),
49                             cl::init(100));
50 
51 unsigned llvm::getDefaultMaxUsesToExploreForCaptureTracking() {
52   return DefaultMaxUsesToExplore;
53 }
54 
55 CaptureTracker::~CaptureTracker() = default;
56 
57 bool CaptureTracker::shouldExplore(const Use *U) { return true; }
58 
59 bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) {
60   // We want comparisons to null pointers to not be considered capturing,
61   // but need to guard against cases like gep(p, -ptrtoint(p2)) == null,
62   // which are equivalent to p == p2 and would capture the pointer.
63   //
64   // A dereferenceable pointer is a case where this is known to be safe,
65   // because the pointer resulting from such a construction would not be
66   // dereferenceable.
67   //
68   // It is not sufficient to check for inbounds GEP here, because GEP with
69   // zero offset is always inbounds.
70   bool CanBeNull, CanBeFreed;
71   return O->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
72 }
73 
74 namespace {
75   struct SimpleCaptureTracker : public CaptureTracker {
76     explicit SimpleCaptureTracker(bool ReturnCaptures)
77         : ReturnCaptures(ReturnCaptures) {}
78 
79     void tooManyUses() override {
80       LLVM_DEBUG(dbgs() << "Captured due to too many uses\n");
81       Captured = true;
82     }
83 
84     bool captured(const Use *U) override {
85       if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
86         return false;
87 
88       LLVM_DEBUG(dbgs() << "Captured by: " << *U->getUser() << "\n");
89 
90       Captured = true;
91       return true;
92     }
93 
94     bool ReturnCaptures;
95 
96     bool Captured = false;
97   };
98 
99   /// Only find pointer captures which happen before the given instruction. Uses
100   /// the dominator tree to determine whether one instruction is before another.
101   /// Only support the case where the Value is defined in the same basic block
102   /// as the given instruction and the use.
103   struct CapturesBefore : public CaptureTracker {
104 
105     CapturesBefore(bool ReturnCaptures, const Instruction *I,
106                    const DominatorTree *DT, bool IncludeI, const LoopInfo *LI)
107         : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures),
108           IncludeI(IncludeI), LI(LI) {}
109 
110     void tooManyUses() override { Captured = true; }
111 
112     bool isSafeToPrune(Instruction *I) {
113       if (BeforeHere == I)
114         return !IncludeI;
115 
116       // We explore this usage only if the usage can reach "BeforeHere".
117       // If use is not reachable from entry, there is no need to explore.
118       if (!DT->isReachableFromEntry(I->getParent()))
119         return true;
120 
121       // Check whether there is a path from I to BeforeHere.
122       return !isPotentiallyReachable(I, BeforeHere, nullptr, DT, LI);
123     }
124 
125     bool captured(const Use *U) override {
126       Instruction *I = cast<Instruction>(U->getUser());
127       if (isa<ReturnInst>(I) && !ReturnCaptures)
128         return false;
129 
130       // Check isSafeToPrune() here rather than in shouldExplore() to avoid
131       // an expensive reachability query for every instruction we look at.
132       // Instead we only do one for actual capturing candidates.
133       if (isSafeToPrune(I))
134         return false;
135 
136       Captured = true;
137       return true;
138     }
139 
140     const Instruction *BeforeHere;
141     const DominatorTree *DT;
142 
143     bool ReturnCaptures;
144     bool IncludeI;
145 
146     bool Captured = false;
147 
148     const LoopInfo *LI;
149   };
150 
151   /// Find the 'earliest' instruction before which the pointer is known not to
152   /// be captured. Here an instruction A is considered earlier than instruction
153   /// B, if A dominates B. If 2 escapes do not dominate each other, the
154   /// terminator of the common dominator is chosen. If not all uses cannot be
155   /// analyzed, the earliest escape is set to the first instruction in the
156   /// function entry block.
157   // NOTE: Users have to make sure instructions compared against the earliest
158   // escape are not in a cycle.
159   struct EarliestCaptures : public CaptureTracker {
160 
161     EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT)
162         : DT(DT), ReturnCaptures(ReturnCaptures), F(F) {}
163 
164     void tooManyUses() override {
165       Captured = true;
166       EarliestCapture = &*F.getEntryBlock().begin();
167     }
168 
169     bool captured(const Use *U) override {
170       Instruction *I = cast<Instruction>(U->getUser());
171       if (isa<ReturnInst>(I) && !ReturnCaptures)
172         return false;
173 
174       if (!EarliestCapture)
175         EarliestCapture = I;
176       else
177         EarliestCapture = DT.findNearestCommonDominator(EarliestCapture, I);
178       Captured = true;
179 
180       // Return false to continue analysis; we need to see all potential
181       // captures.
182       return false;
183     }
184 
185     Instruction *EarliestCapture = nullptr;
186 
187     const DominatorTree &DT;
188 
189     bool ReturnCaptures;
190 
191     bool Captured = false;
192 
193     Function &F;
194   };
195 }
196 
197 /// PointerMayBeCaptured - Return true if this pointer value may be captured
198 /// by the enclosing function (which is required to exist).  This routine can
199 /// be expensive, so consider caching the results.  The boolean ReturnCaptures
200 /// specifies whether returning the value (or part of it) from the function
201 /// counts as capturing it or not.  The boolean StoreCaptures specified whether
202 /// storing the value (or part of it) into memory anywhere automatically
203 /// counts as capturing it or not.
204 bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
205                                 bool StoreCaptures, unsigned MaxUsesToExplore) {
206   assert(!isa<GlobalValue>(V) &&
207          "It doesn't make sense to ask whether a global is captured.");
208 
209   // TODO: If StoreCaptures is not true, we could do Fancy analysis
210   // to determine whether this store is not actually an escape point.
211   // In that case, BasicAliasAnalysis should be updated as well to
212   // take advantage of this.
213   (void)StoreCaptures;
214 
215   LLVM_DEBUG(dbgs() << "Captured?: " << *V << " = ");
216 
217   SimpleCaptureTracker SCT(ReturnCaptures);
218   PointerMayBeCaptured(V, &SCT, MaxUsesToExplore);
219   if (SCT.Captured)
220     ++NumCaptured;
221   else {
222     ++NumNotCaptured;
223     LLVM_DEBUG(dbgs() << "not captured\n");
224   }
225   return SCT.Captured;
226 }
227 
228 /// PointerMayBeCapturedBefore - Return true if this pointer value may be
229 /// captured by the enclosing function (which is required to exist). If a
230 /// DominatorTree is provided, only captures which happen before the given
231 /// instruction are considered. This routine can be expensive, so consider
232 /// caching the results.  The boolean ReturnCaptures specifies whether
233 /// returning the value (or part of it) from the function counts as capturing
234 /// it or not.  The boolean StoreCaptures specified whether storing the value
235 /// (or part of it) into memory anywhere automatically counts as capturing it
236 /// or not.
237 bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
238                                       bool StoreCaptures, const Instruction *I,
239                                       const DominatorTree *DT, bool IncludeI,
240                                       unsigned MaxUsesToExplore,
241                                       const LoopInfo *LI) {
242   assert(!isa<GlobalValue>(V) &&
243          "It doesn't make sense to ask whether a global is captured.");
244 
245   if (!DT)
246     return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures,
247                                 MaxUsesToExplore);
248 
249   // TODO: See comment in PointerMayBeCaptured regarding what could be done
250   // with StoreCaptures.
251 
252   CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, LI);
253   PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
254   if (CB.Captured)
255     ++NumCapturedBefore;
256   else
257     ++NumNotCapturedBefore;
258   return CB.Captured;
259 }
260 
261 Instruction *llvm::FindEarliestCapture(const Value *V, Function &F,
262                                        bool ReturnCaptures, bool StoreCaptures,
263                                        const DominatorTree &DT,
264                                        unsigned MaxUsesToExplore) {
265   assert(!isa<GlobalValue>(V) &&
266          "It doesn't make sense to ask whether a global is captured.");
267 
268   EarliestCaptures CB(ReturnCaptures, F, DT);
269   PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
270   if (CB.Captured)
271     ++NumCapturedBefore;
272   else
273     ++NumNotCapturedBefore;
274   return CB.EarliestCapture;
275 }
276 
277 UseCaptureKind llvm::DetermineUseCaptureKind(
278     const Use &U,
279     function_ref<bool(Value *, const DataLayout &)> IsDereferenceableOrNull) {
280   Instruction *I = dyn_cast<Instruction>(U.getUser());
281 
282   // TODO: Investigate non-instruction uses.
283   if (!I)
284     return UseCaptureKind::MAY_CAPTURE;
285 
286   switch (I->getOpcode()) {
287   case Instruction::Call:
288   case Instruction::Invoke: {
289     auto *Call = cast<CallBase>(I);
290     // Not captured if the callee is readonly, doesn't return a copy through
291     // its return value and doesn't unwind (a readonly function can leak bits
292     // by throwing an exception or not depending on the input value).
293     if (Call->onlyReadsMemory() && Call->doesNotThrow() &&
294         Call->getType()->isVoidTy())
295       return UseCaptureKind::NO_CAPTURE;
296 
297     // The pointer is not captured if returned pointer is not captured.
298     // NOTE: CaptureTracking users should not assume that only functions
299     // marked with nocapture do not capture. This means that places like
300     // getUnderlyingObject in ValueTracking or DecomposeGEPExpression
301     // in BasicAA also need to know about this property.
302     if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, true))
303       return UseCaptureKind::PASSTHROUGH;
304 
305     // Volatile operations effectively capture the memory location that they
306     // load and store to.
307     if (auto *MI = dyn_cast<MemIntrinsic>(Call))
308       if (MI->isVolatile())
309         return UseCaptureKind::MAY_CAPTURE;
310 
311     // Calling a function pointer does not in itself cause the pointer to
312     // be captured.  This is a subtle point considering that (for example)
313     // the callee might return its own address.  It is analogous to saying
314     // that loading a value from a pointer does not cause the pointer to be
315     // captured, even though the loaded value might be the pointer itself
316     // (think of self-referential objects).
317     if (Call->isCallee(&U))
318       return UseCaptureKind::NO_CAPTURE;
319 
320     // Not captured if only passed via 'nocapture' arguments.
321     if (Call->isDataOperand(&U) &&
322         !Call->doesNotCapture(Call->getDataOperandNo(&U))) {
323       // The parameter is not marked 'nocapture' - captured.
324       return UseCaptureKind::MAY_CAPTURE;
325     }
326     return UseCaptureKind::NO_CAPTURE;
327   }
328   case Instruction::Load:
329     // Volatile loads make the address observable.
330     if (cast<LoadInst>(I)->isVolatile())
331       return UseCaptureKind::MAY_CAPTURE;
332     return UseCaptureKind::NO_CAPTURE;
333   case Instruction::VAArg:
334     // "va-arg" from a pointer does not cause it to be captured.
335     return UseCaptureKind::NO_CAPTURE;
336   case Instruction::Store:
337     // Stored the pointer - conservatively assume it may be captured.
338     // Volatile stores make the address observable.
339     if (U.getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile())
340       return UseCaptureKind::MAY_CAPTURE;
341     return UseCaptureKind::NO_CAPTURE;
342   case Instruction::AtomicRMW: {
343     // atomicrmw conceptually includes both a load and store from
344     // the same location.
345     // As with a store, the location being accessed is not captured,
346     // but the value being stored is.
347     // Volatile stores make the address observable.
348     auto *ARMWI = cast<AtomicRMWInst>(I);
349     if (U.getOperandNo() == 1 || ARMWI->isVolatile())
350       return UseCaptureKind::MAY_CAPTURE;
351     return UseCaptureKind::NO_CAPTURE;
352   }
353   case Instruction::AtomicCmpXchg: {
354     // cmpxchg conceptually includes both a load and store from
355     // the same location.
356     // As with a store, the location being accessed is not captured,
357     // but the value being stored is.
358     // Volatile stores make the address observable.
359     auto *ACXI = cast<AtomicCmpXchgInst>(I);
360     if (U.getOperandNo() == 1 || U.getOperandNo() == 2 || ACXI->isVolatile())
361       return UseCaptureKind::MAY_CAPTURE;
362     return UseCaptureKind::NO_CAPTURE;
363   }
364   case Instruction::GetElementPtr:
365     // AA does not support pointers of vectors, so GEP vector splats need to
366     // be considered as captures.
367     if (I->getType()->isVectorTy())
368       return UseCaptureKind::MAY_CAPTURE;
369     return UseCaptureKind::PASSTHROUGH;
370   case Instruction::BitCast:
371   case Instruction::PHI:
372   case Instruction::Select:
373   case Instruction::AddrSpaceCast:
374     // The original value is not captured via this if the new value isn't.
375     return UseCaptureKind::PASSTHROUGH;
376   case Instruction::ICmp: {
377     unsigned Idx = U.getOperandNo();
378     unsigned OtherIdx = 1 - Idx;
379     if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) {
380       // Don't count comparisons of a no-alias return value against null as
381       // captures. This allows us to ignore comparisons of malloc results
382       // with null, for example.
383       if (CPN->getType()->getAddressSpace() == 0)
384         if (isNoAliasCall(U.get()->stripPointerCasts()))
385           return UseCaptureKind::NO_CAPTURE;
386       if (!I->getFunction()->nullPointerIsDefined()) {
387         auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation();
388         // Comparing a dereferenceable_or_null pointer against null cannot
389         // lead to pointer escapes, because if it is not null it must be a
390         // valid (in-bounds) pointer.
391         const DataLayout &DL = I->getModule()->getDataLayout();
392         if (IsDereferenceableOrNull && IsDereferenceableOrNull(O, DL))
393           return UseCaptureKind::NO_CAPTURE;
394       }
395     }
396 
397     // Otherwise, be conservative. There are crazy ways to capture pointers
398     // using comparisons.
399     return UseCaptureKind::MAY_CAPTURE;
400   }
401   default:
402     // Something else - be conservative and say it is captured.
403     return UseCaptureKind::MAY_CAPTURE;
404   }
405 }
406 
407 void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
408                                 unsigned MaxUsesToExplore) {
409   assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
410   if (MaxUsesToExplore == 0)
411     MaxUsesToExplore = DefaultMaxUsesToExplore;
412 
413   SmallVector<const Use *, 20> Worklist;
414   Worklist.reserve(getDefaultMaxUsesToExploreForCaptureTracking());
415   SmallSet<const Use *, 20> Visited;
416 
417   auto AddUses = [&](const Value *V) {
418     for (const Use &U : V->uses()) {
419       // If there are lots of uses, conservatively say that the value
420       // is captured to avoid taking too much compile time.
421       if (Visited.size()  >= MaxUsesToExplore) {
422         Tracker->tooManyUses();
423         return false;
424       }
425       if (!Visited.insert(&U).second)
426         continue;
427       if (!Tracker->shouldExplore(&U))
428         continue;
429       Worklist.push_back(&U);
430     }
431     return true;
432   };
433   if (!AddUses(V))
434     return;
435 
436   auto IsDereferenceableOrNull = [Tracker](Value *V, const DataLayout &DL) {
437     return Tracker->isDereferenceableOrNull(V, DL);
438   };
439   while (!Worklist.empty()) {
440     const Use *U = Worklist.pop_back_val();
441     switch (DetermineUseCaptureKind(*U, IsDereferenceableOrNull)) {
442     case UseCaptureKind::NO_CAPTURE:
443       continue;
444     case UseCaptureKind::MAY_CAPTURE:
445       if (Tracker->captured(U))
446         return;
447       continue;
448     case UseCaptureKind::PASSTHROUGH:
449       if (!AddUses(U->getUser()))
450         return;
451       continue;
452     }
453   }
454 
455   // All uses examined.
456 }
457 
458 bool llvm::isNonEscapingLocalObject(
459     const Value *V, SmallDenseMap<const Value *, bool, 8> *IsCapturedCache) {
460   SmallDenseMap<const Value *, bool, 8>::iterator CacheIt;
461   if (IsCapturedCache) {
462     bool Inserted;
463     std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false});
464     if (!Inserted)
465       // Found cached result, return it!
466       return CacheIt->second;
467   }
468 
469   // If this is an identified function-local object, check to see if it escapes.
470   if (isIdentifiedFunctionLocal(V)) {
471     // Set StoreCaptures to True so that we can assume in our callers that the
472     // pointer is not the result of a load instruction. Currently
473     // PointerMayBeCaptured doesn't have any special analysis for the
474     // StoreCaptures=false case; if it did, our callers could be refined to be
475     // more precise.
476     auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
477     if (IsCapturedCache)
478       CacheIt->second = Ret;
479     return Ret;
480   }
481 
482   return false;
483 }
484