xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/GlobalsModRef.cpp (revision be092bcde96bdcfde9013d60e442cca023bfbd1b)
1  //===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This simple pass provides alias and mod/ref information for global values
10  // that do not have their address taken, and keeps track of whether functions
11  // read or write memory (are "pure").  For this simple (but very common) case,
12  // we can provide pretty accurate and useful information.
13  //
14  //===----------------------------------------------------------------------===//
15  
16  #include "llvm/Analysis/GlobalsModRef.h"
17  #include "llvm/ADT/SCCIterator.h"
18  #include "llvm/ADT/SmallPtrSet.h"
19  #include "llvm/ADT/Statistic.h"
20  #include "llvm/Analysis/CallGraph.h"
21  #include "llvm/Analysis/MemoryBuiltins.h"
22  #include "llvm/Analysis/TargetLibraryInfo.h"
23  #include "llvm/Analysis/ValueTracking.h"
24  #include "llvm/IR/InstIterator.h"
25  #include "llvm/IR/Instructions.h"
26  #include "llvm/IR/Module.h"
27  #include "llvm/IR/PassManager.h"
28  #include "llvm/InitializePasses.h"
29  #include "llvm/Pass.h"
30  #include "llvm/Support/CommandLine.h"
31  
32  using namespace llvm;
33  
34  #define DEBUG_TYPE "globalsmodref-aa"
35  
36  STATISTIC(NumNonAddrTakenGlobalVars,
37            "Number of global vars without address taken");
38  STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
39  STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
40  STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
41  STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
42  
43  // An option to enable unsafe alias results from the GlobalsModRef analysis.
44  // When enabled, GlobalsModRef will provide no-alias results which in extremely
45  // rare cases may not be conservatively correct. In particular, in the face of
46  // transforms which cause asymmetry between how effective getUnderlyingObject
47  // is for two pointers, it may produce incorrect results.
48  //
49  // These unsafe results have been returned by GMR for many years without
50  // causing significant issues in the wild and so we provide a mechanism to
51  // re-enable them for users of LLVM that have a particular performance
52  // sensitivity and no known issues. The option also makes it easy to evaluate
53  // the performance impact of these results.
54  static cl::opt<bool> EnableUnsafeGlobalsModRefAliasResults(
55      "enable-unsafe-globalsmodref-alias-results", cl::init(false), cl::Hidden);
56  
57  /// The mod/ref information collected for a particular function.
58  ///
59  /// We collect information about mod/ref behavior of a function here, both in
60  /// general and as pertains to specific globals. We only have this detailed
61  /// information when we know *something* useful about the behavior. If we
62  /// saturate to fully general mod/ref, we remove the info for the function.
63  class GlobalsAAResult::FunctionInfo {
64    typedef SmallDenseMap<const GlobalValue *, ModRefInfo, 16> GlobalInfoMapType;
65  
66    /// Build a wrapper struct that has 8-byte alignment. All heap allocations
67    /// should provide this much alignment at least, but this makes it clear we
68    /// specifically rely on this amount of alignment.
69    struct alignas(8) AlignedMap {
70      AlignedMap() = default;
71      AlignedMap(const AlignedMap &Arg) = default;
72      GlobalInfoMapType Map;
73    };
74  
75    /// Pointer traits for our aligned map.
76    struct AlignedMapPointerTraits {
77      static inline void *getAsVoidPointer(AlignedMap *P) { return P; }
78      static inline AlignedMap *getFromVoidPointer(void *P) {
79        return (AlignedMap *)P;
80      }
81      static constexpr int NumLowBitsAvailable = 3;
82      static_assert(alignof(AlignedMap) >= (1 << NumLowBitsAvailable),
83                    "AlignedMap insufficiently aligned to have enough low bits.");
84    };
85  
86    /// The bit that flags that this function may read any global. This is
87    /// chosen to mix together with ModRefInfo bits.
88    /// FIXME: This assumes ModRefInfo lattice will remain 4 bits!
89    /// FunctionInfo.getModRefInfo() masks out everything except ModRef so
90    /// this remains correct.
91    enum { MayReadAnyGlobal = 4 };
92  
93    /// Checks to document the invariants of the bit packing here.
94    static_assert((MayReadAnyGlobal & static_cast<int>(ModRefInfo::ModRef)) == 0,
95                  "ModRef and the MayReadAnyGlobal flag bits overlap.");
96    static_assert(((MayReadAnyGlobal | static_cast<int>(ModRefInfo::ModRef)) >>
97                   AlignedMapPointerTraits::NumLowBitsAvailable) == 0,
98                  "Insufficient low bits to store our flag and ModRef info.");
99  
100  public:
101    FunctionInfo() = default;
102    ~FunctionInfo() {
103      delete Info.getPointer();
104    }
105    // Spell out the copy ond move constructors and assignment operators to get
106    // deep copy semantics and correct move semantics in the face of the
107    // pointer-int pair.
108    FunctionInfo(const FunctionInfo &Arg)
109        : Info(nullptr, Arg.Info.getInt()) {
110      if (const auto *ArgPtr = Arg.Info.getPointer())
111        Info.setPointer(new AlignedMap(*ArgPtr));
112    }
113    FunctionInfo(FunctionInfo &&Arg)
114        : Info(Arg.Info.getPointer(), Arg.Info.getInt()) {
115      Arg.Info.setPointerAndInt(nullptr, 0);
116    }
117    FunctionInfo &operator=(const FunctionInfo &RHS) {
118      delete Info.getPointer();
119      Info.setPointerAndInt(nullptr, RHS.Info.getInt());
120      if (const auto *RHSPtr = RHS.Info.getPointer())
121        Info.setPointer(new AlignedMap(*RHSPtr));
122      return *this;
123    }
124    FunctionInfo &operator=(FunctionInfo &&RHS) {
125      delete Info.getPointer();
126      Info.setPointerAndInt(RHS.Info.getPointer(), RHS.Info.getInt());
127      RHS.Info.setPointerAndInt(nullptr, 0);
128      return *this;
129    }
130  
131    /// This method clears MayReadAnyGlobal bit added by GlobalsAAResult to return
132    /// the corresponding ModRefInfo.
133    ModRefInfo globalClearMayReadAnyGlobal(int I) const {
134      return ModRefInfo(I & static_cast<int>(ModRefInfo::ModRef));
135    }
136  
137    /// Returns the \c ModRefInfo info for this function.
138    ModRefInfo getModRefInfo() const {
139      return globalClearMayReadAnyGlobal(Info.getInt());
140    }
141  
142    /// Adds new \c ModRefInfo for this function to its state.
143    void addModRefInfo(ModRefInfo NewMRI) {
144      Info.setInt(Info.getInt() | static_cast<int>(NewMRI));
145    }
146  
147    /// Returns whether this function may read any global variable, and we don't
148    /// know which global.
149    bool mayReadAnyGlobal() const { return Info.getInt() & MayReadAnyGlobal; }
150  
151    /// Sets this function as potentially reading from any global.
152    void setMayReadAnyGlobal() { Info.setInt(Info.getInt() | MayReadAnyGlobal); }
153  
154    /// Returns the \c ModRefInfo info for this function w.r.t. a particular
155    /// global, which may be more precise than the general information above.
156    ModRefInfo getModRefInfoForGlobal(const GlobalValue &GV) const {
157      ModRefInfo GlobalMRI =
158          mayReadAnyGlobal() ? ModRefInfo::Ref : ModRefInfo::NoModRef;
159      if (AlignedMap *P = Info.getPointer()) {
160        auto I = P->Map.find(&GV);
161        if (I != P->Map.end())
162          GlobalMRI |= I->second;
163      }
164      return GlobalMRI;
165    }
166  
167    /// Add mod/ref info from another function into ours, saturating towards
168    /// ModRef.
169    void addFunctionInfo(const FunctionInfo &FI) {
170      addModRefInfo(FI.getModRefInfo());
171  
172      if (FI.mayReadAnyGlobal())
173        setMayReadAnyGlobal();
174  
175      if (AlignedMap *P = FI.Info.getPointer())
176        for (const auto &G : P->Map)
177          addModRefInfoForGlobal(*G.first, G.second);
178    }
179  
180    void addModRefInfoForGlobal(const GlobalValue &GV, ModRefInfo NewMRI) {
181      AlignedMap *P = Info.getPointer();
182      if (!P) {
183        P = new AlignedMap();
184        Info.setPointer(P);
185      }
186      auto &GlobalMRI = P->Map[&GV];
187      GlobalMRI |= NewMRI;
188    }
189  
190    /// Clear a global's ModRef info. Should be used when a global is being
191    /// deleted.
192    void eraseModRefInfoForGlobal(const GlobalValue &GV) {
193      if (AlignedMap *P = Info.getPointer())
194        P->Map.erase(&GV);
195    }
196  
197  private:
198    /// All of the information is encoded into a single pointer, with a three bit
199    /// integer in the low three bits. The high bit provides a flag for when this
200    /// function may read any global. The low two bits are the ModRefInfo. And
201    /// the pointer, when non-null, points to a map from GlobalValue to
202    /// ModRefInfo specific to that GlobalValue.
203    PointerIntPair<AlignedMap *, 3, unsigned, AlignedMapPointerTraits> Info;
204  };
205  
206  void GlobalsAAResult::DeletionCallbackHandle::deleted() {
207    Value *V = getValPtr();
208    if (auto *F = dyn_cast<Function>(V))
209      GAR->FunctionInfos.erase(F);
210  
211    if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
212      if (GAR->NonAddressTakenGlobals.erase(GV)) {
213        // This global might be an indirect global.  If so, remove it and
214        // remove any AllocRelatedValues for it.
215        if (GAR->IndirectGlobals.erase(GV)) {
216          // Remove any entries in AllocsForIndirectGlobals for this global.
217          for (auto I = GAR->AllocsForIndirectGlobals.begin(),
218                    E = GAR->AllocsForIndirectGlobals.end();
219               I != E; ++I)
220            if (I->second == GV)
221              GAR->AllocsForIndirectGlobals.erase(I);
222        }
223  
224        // Scan the function info we have collected and remove this global
225        // from all of them.
226        for (auto &FIPair : GAR->FunctionInfos)
227          FIPair.second.eraseModRefInfoForGlobal(*GV);
228      }
229    }
230  
231    // If this is an allocation related to an indirect global, remove it.
232    GAR->AllocsForIndirectGlobals.erase(V);
233  
234    // And clear out the handle.
235    setValPtr(nullptr);
236    GAR->Handles.erase(I);
237    // This object is now destroyed!
238  }
239  
240  MemoryEffects GlobalsAAResult::getMemoryEffects(const Function *F) {
241    if (FunctionInfo *FI = getFunctionInfo(F))
242      return MemoryEffects(FI->getModRefInfo());
243  
244    return AAResultBase::getMemoryEffects(F);
245  }
246  
247  /// Returns the function info for the function, or null if we don't have
248  /// anything useful to say about it.
249  GlobalsAAResult::FunctionInfo *
250  GlobalsAAResult::getFunctionInfo(const Function *F) {
251    auto I = FunctionInfos.find(F);
252    if (I != FunctionInfos.end())
253      return &I->second;
254    return nullptr;
255  }
256  
257  /// AnalyzeGlobals - Scan through the users of all of the internal
258  /// GlobalValue's in the program.  If none of them have their "address taken"
259  /// (really, their address passed to something nontrivial), record this fact,
260  /// and record the functions that they are used directly in.
261  void GlobalsAAResult::AnalyzeGlobals(Module &M) {
262    SmallPtrSet<Function *, 32> TrackedFunctions;
263    for (Function &F : M)
264      if (F.hasLocalLinkage()) {
265        if (!AnalyzeUsesOfPointer(&F)) {
266          // Remember that we are tracking this global.
267          NonAddressTakenGlobals.insert(&F);
268          TrackedFunctions.insert(&F);
269          Handles.emplace_front(*this, &F);
270          Handles.front().I = Handles.begin();
271          ++NumNonAddrTakenFunctions;
272        } else
273          UnknownFunctionsWithLocalLinkage = true;
274      }
275  
276    SmallPtrSet<Function *, 16> Readers, Writers;
277    for (GlobalVariable &GV : M.globals())
278      if (GV.hasLocalLinkage()) {
279        if (!AnalyzeUsesOfPointer(&GV, &Readers,
280                                  GV.isConstant() ? nullptr : &Writers)) {
281          // Remember that we are tracking this global, and the mod/ref fns
282          NonAddressTakenGlobals.insert(&GV);
283          Handles.emplace_front(*this, &GV);
284          Handles.front().I = Handles.begin();
285  
286          for (Function *Reader : Readers) {
287            if (TrackedFunctions.insert(Reader).second) {
288              Handles.emplace_front(*this, Reader);
289              Handles.front().I = Handles.begin();
290            }
291            FunctionInfos[Reader].addModRefInfoForGlobal(GV, ModRefInfo::Ref);
292          }
293  
294          if (!GV.isConstant()) // No need to keep track of writers to constants
295            for (Function *Writer : Writers) {
296              if (TrackedFunctions.insert(Writer).second) {
297                Handles.emplace_front(*this, Writer);
298                Handles.front().I = Handles.begin();
299              }
300              FunctionInfos[Writer].addModRefInfoForGlobal(GV, ModRefInfo::Mod);
301            }
302          ++NumNonAddrTakenGlobalVars;
303  
304          // If this global holds a pointer type, see if it is an indirect global.
305          if (GV.getValueType()->isPointerTy() &&
306              AnalyzeIndirectGlobalMemory(&GV))
307            ++NumIndirectGlobalVars;
308        }
309        Readers.clear();
310        Writers.clear();
311      }
312  }
313  
314  /// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
315  /// If this is used by anything complex (i.e., the address escapes), return
316  /// true.  Also, while we are at it, keep track of those functions that read and
317  /// write to the value.
318  ///
319  /// If OkayStoreDest is non-null, stores into this global are allowed.
320  bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
321                                             SmallPtrSetImpl<Function *> *Readers,
322                                             SmallPtrSetImpl<Function *> *Writers,
323                                             GlobalValue *OkayStoreDest) {
324    if (!V->getType()->isPointerTy())
325      return true;
326  
327    for (Use &U : V->uses()) {
328      User *I = U.getUser();
329      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
330        if (Readers)
331          Readers->insert(LI->getParent()->getParent());
332      } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
333        if (V == SI->getOperand(1)) {
334          if (Writers)
335            Writers->insert(SI->getParent()->getParent());
336        } else if (SI->getOperand(1) != OkayStoreDest) {
337          return true; // Storing the pointer
338        }
339      } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
340        if (AnalyzeUsesOfPointer(I, Readers, Writers))
341          return true;
342      } else if (Operator::getOpcode(I) == Instruction::BitCast ||
343                 Operator::getOpcode(I) == Instruction::AddrSpaceCast) {
344        if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
345          return true;
346      } else if (auto *Call = dyn_cast<CallBase>(I)) {
347        // Make sure that this is just the function being called, not that it is
348        // passing into the function.
349        if (Call->isDataOperand(&U)) {
350          // Detect calls to free.
351          if (Call->isArgOperand(&U) &&
352              getFreedOperand(Call, &GetTLI(*Call->getFunction())) == U) {
353            if (Writers)
354              Writers->insert(Call->getParent()->getParent());
355          } else {
356            // In general, we return true for unknown calls, but there are
357            // some simple checks that we can do for functions that
358            // will never call back into the module.
359            auto *F = Call->getCalledFunction();
360            // TODO: we should be able to remove isDeclaration() check
361            // and let the function body analysis check for captures,
362            // and collect the mod-ref effects. This information will
363            // be later propagated via the call graph.
364            if (!F || !F->isDeclaration())
365              return true;
366            // Note that the NoCallback check here is a little bit too
367            // conservative. If there are no captures of the global
368            // in the module, then this call may not be a capture even
369            // if it does not have NoCallback.
370            if (!Call->hasFnAttr(Attribute::NoCallback) ||
371                !Call->isArgOperand(&U) ||
372                !Call->doesNotCapture(Call->getArgOperandNo(&U)))
373              return true;
374  
375            // Conservatively, assume the call reads and writes the global.
376            // We could use memory attributes to make it more precise.
377            if (Readers)
378              Readers->insert(Call->getParent()->getParent());
379            if (Writers)
380              Writers->insert(Call->getParent()->getParent());
381          }
382        }
383      } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
384        if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
385          return true; // Allow comparison against null.
386      } else if (Constant *C = dyn_cast<Constant>(I)) {
387        // Ignore constants which don't have any live uses.
388        if (isa<GlobalValue>(C) || C->isConstantUsed())
389          return true;
390      } else {
391        return true;
392      }
393    }
394  
395    return false;
396  }
397  
398  /// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
399  /// which holds a pointer type.  See if the global always points to non-aliased
400  /// heap memory: that is, all initializers of the globals store a value known
401  /// to be obtained via a noalias return function call which have no other use.
402  /// Further, all loads out of GV must directly use the memory, not store the
403  /// pointer somewhere.  If this is true, we consider the memory pointed to by
404  /// GV to be owned by GV and can disambiguate other pointers from it.
405  bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
406    // Keep track of values related to the allocation of the memory, f.e. the
407    // value produced by the noalias call and any casts.
408    std::vector<Value *> AllocRelatedValues;
409  
410    // If the initializer is a valid pointer, bail.
411    if (Constant *C = GV->getInitializer())
412      if (!C->isNullValue())
413        return false;
414  
415    // Walk the user list of the global.  If we find anything other than a direct
416    // load or store, bail out.
417    for (User *U : GV->users()) {
418      if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
419        // The pointer loaded from the global can only be used in simple ways:
420        // we allow addressing of it and loading storing to it.  We do *not* allow
421        // storing the loaded pointer somewhere else or passing to a function.
422        if (AnalyzeUsesOfPointer(LI))
423          return false; // Loaded pointer escapes.
424        // TODO: Could try some IP mod/ref of the loaded pointer.
425      } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
426        // Storing the global itself.
427        if (SI->getOperand(0) == GV)
428          return false;
429  
430        // If storing the null pointer, ignore it.
431        if (isa<ConstantPointerNull>(SI->getOperand(0)))
432          continue;
433  
434        // Check the value being stored.
435        Value *Ptr = getUnderlyingObject(SI->getOperand(0));
436  
437        if (!isNoAliasCall(Ptr))
438          return false; // Too hard to analyze.
439  
440        // Analyze all uses of the allocation.  If any of them are used in a
441        // non-simple way (e.g. stored to another global) bail out.
442        if (AnalyzeUsesOfPointer(Ptr, /*Readers*/ nullptr, /*Writers*/ nullptr,
443                                 GV))
444          return false; // Loaded pointer escapes.
445  
446        // Remember that this allocation is related to the indirect global.
447        AllocRelatedValues.push_back(Ptr);
448      } else {
449        // Something complex, bail out.
450        return false;
451      }
452    }
453  
454    // Okay, this is an indirect global.  Remember all of the allocations for
455    // this global in AllocsForIndirectGlobals.
456    while (!AllocRelatedValues.empty()) {
457      AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
458      Handles.emplace_front(*this, AllocRelatedValues.back());
459      Handles.front().I = Handles.begin();
460      AllocRelatedValues.pop_back();
461    }
462    IndirectGlobals.insert(GV);
463    Handles.emplace_front(*this, GV);
464    Handles.front().I = Handles.begin();
465    return true;
466  }
467  
468  void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {
469    // We do a bottom-up SCC traversal of the call graph.  In other words, we
470    // visit all callees before callers (leaf-first).
471    unsigned SCCID = 0;
472    for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
473      const std::vector<CallGraphNode *> &SCC = *I;
474      assert(!SCC.empty() && "SCC with no functions?");
475  
476      for (auto *CGN : SCC)
477        if (Function *F = CGN->getFunction())
478          FunctionToSCCMap[F] = SCCID;
479      ++SCCID;
480    }
481  }
482  
483  /// AnalyzeCallGraph - At this point, we know the functions where globals are
484  /// immediately stored to and read from.  Propagate this information up the call
485  /// graph to all callers and compute the mod/ref info for all memory for each
486  /// function.
487  void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
488    // We do a bottom-up SCC traversal of the call graph.  In other words, we
489    // visit all callees before callers (leaf-first).
490    for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
491      const std::vector<CallGraphNode *> &SCC = *I;
492      assert(!SCC.empty() && "SCC with no functions?");
493  
494      Function *F = SCC[0]->getFunction();
495  
496      if (!F || !F->isDefinitionExact()) {
497        // Calls externally or not exact - can't say anything useful. Remove any
498        // existing function records (may have been created when scanning
499        // globals).
500        for (auto *Node : SCC)
501          FunctionInfos.erase(Node->getFunction());
502        continue;
503      }
504  
505      FunctionInfo &FI = FunctionInfos[F];
506      Handles.emplace_front(*this, F);
507      Handles.front().I = Handles.begin();
508      bool KnowNothing = false;
509  
510      // Intrinsics, like any other synchronizing function, can make effects
511      // of other threads visible. Without nosync we know nothing really.
512      // Similarly, if `nocallback` is missing the function, or intrinsic,
513      // can call into the module arbitrarily. If both are set the function
514      // has an effect but will not interact with accesses of internal
515      // globals inside the module. We are conservative here for optnone
516      // functions, might not be necessary.
517      auto MaySyncOrCallIntoModule = [](const Function &F) {
518        return !F.isDeclaration() || !F.hasNoSync() ||
519               !F.hasFnAttribute(Attribute::NoCallback);
520      };
521  
522      // Collect the mod/ref properties due to called functions.  We only compute
523      // one mod-ref set.
524      for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
525        if (!F) {
526          KnowNothing = true;
527          break;
528        }
529  
530        if (F->isDeclaration() || F->hasOptNone()) {
531          // Try to get mod/ref behaviour from function attributes.
532          if (F->doesNotAccessMemory()) {
533            // Can't do better than that!
534          } else if (F->onlyReadsMemory()) {
535            FI.addModRefInfo(ModRefInfo::Ref);
536            if (!F->onlyAccessesArgMemory() && MaySyncOrCallIntoModule(*F))
537              // This function might call back into the module and read a global -
538              // consider every global as possibly being read by this function.
539              FI.setMayReadAnyGlobal();
540          } else {
541            FI.addModRefInfo(ModRefInfo::ModRef);
542            if (!F->onlyAccessesArgMemory())
543              FI.setMayReadAnyGlobal();
544            if (MaySyncOrCallIntoModule(*F)) {
545              KnowNothing = true;
546              break;
547            }
548          }
549          continue;
550        }
551  
552        for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
553             CI != E && !KnowNothing; ++CI)
554          if (Function *Callee = CI->second->getFunction()) {
555            if (FunctionInfo *CalleeFI = getFunctionInfo(Callee)) {
556              // Propagate function effect up.
557              FI.addFunctionInfo(*CalleeFI);
558            } else {
559              // Can't say anything about it.  However, if it is inside our SCC,
560              // then nothing needs to be done.
561              CallGraphNode *CalleeNode = CG[Callee];
562              if (!is_contained(SCC, CalleeNode))
563                KnowNothing = true;
564            }
565          } else {
566            KnowNothing = true;
567          }
568      }
569  
570      // If we can't say anything useful about this SCC, remove all SCC functions
571      // from the FunctionInfos map.
572      if (KnowNothing) {
573        for (auto *Node : SCC)
574          FunctionInfos.erase(Node->getFunction());
575        continue;
576      }
577  
578      // Scan the function bodies for explicit loads or stores.
579      for (auto *Node : SCC) {
580        if (isModAndRefSet(FI.getModRefInfo()))
581          break; // The mod/ref lattice saturates here.
582  
583        // Don't prove any properties based on the implementation of an optnone
584        // function. Function attributes were already used as a best approximation
585        // above.
586        if (Node->getFunction()->hasOptNone())
587          continue;
588  
589        for (Instruction &I : instructions(Node->getFunction())) {
590          if (isModAndRefSet(FI.getModRefInfo()))
591            break; // The mod/ref lattice saturates here.
592  
593          // We handle calls specially because the graph-relevant aspects are
594          // handled above.
595          if (isa<CallBase>(&I))
596            continue;
597  
598          // All non-call instructions we use the primary predicates for whether
599          // they read or write memory.
600          if (I.mayReadFromMemory())
601            FI.addModRefInfo(ModRefInfo::Ref);
602          if (I.mayWriteToMemory())
603            FI.addModRefInfo(ModRefInfo::Mod);
604        }
605      }
606  
607      if (!isModSet(FI.getModRefInfo()))
608        ++NumReadMemFunctions;
609      if (!isModOrRefSet(FI.getModRefInfo()))
610        ++NumNoMemFunctions;
611  
612      // Finally, now that we know the full effect on this SCC, clone the
613      // information to each function in the SCC.
614      // FI is a reference into FunctionInfos, so copy it now so that it doesn't
615      // get invalidated if DenseMap decides to re-hash.
616      FunctionInfo CachedFI = FI;
617      for (unsigned i = 1, e = SCC.size(); i != e; ++i)
618        FunctionInfos[SCC[i]->getFunction()] = CachedFI;
619    }
620  }
621  
622  // GV is a non-escaping global. V is a pointer address that has been loaded from.
623  // If we can prove that V must escape, we can conclude that a load from V cannot
624  // alias GV.
625  static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
626                                                 const Value *V,
627                                                 int &Depth,
628                                                 const DataLayout &DL) {
629    SmallPtrSet<const Value *, 8> Visited;
630    SmallVector<const Value *, 8> Inputs;
631    Visited.insert(V);
632    Inputs.push_back(V);
633    do {
634      const Value *Input = Inputs.pop_back_val();
635  
636      if (isa<GlobalValue>(Input) || isa<Argument>(Input) || isa<CallInst>(Input) ||
637          isa<InvokeInst>(Input))
638        // Arguments to functions or returns from functions are inherently
639        // escaping, so we can immediately classify those as not aliasing any
640        // non-addr-taken globals.
641        //
642        // (Transitive) loads from a global are also safe - if this aliased
643        // another global, its address would escape, so no alias.
644        continue;
645  
646      // Recurse through a limited number of selects, loads and PHIs. This is an
647      // arbitrary depth of 4, lower numbers could be used to fix compile time
648      // issues if needed, but this is generally expected to be only be important
649      // for small depths.
650      if (++Depth > 4)
651        return false;
652  
653      if (auto *LI = dyn_cast<LoadInst>(Input)) {
654        Inputs.push_back(getUnderlyingObject(LI->getPointerOperand()));
655        continue;
656      }
657      if (auto *SI = dyn_cast<SelectInst>(Input)) {
658        const Value *LHS = getUnderlyingObject(SI->getTrueValue());
659        const Value *RHS = getUnderlyingObject(SI->getFalseValue());
660        if (Visited.insert(LHS).second)
661          Inputs.push_back(LHS);
662        if (Visited.insert(RHS).second)
663          Inputs.push_back(RHS);
664        continue;
665      }
666      if (auto *PN = dyn_cast<PHINode>(Input)) {
667        for (const Value *Op : PN->incoming_values()) {
668          Op = getUnderlyingObject(Op);
669          if (Visited.insert(Op).second)
670            Inputs.push_back(Op);
671        }
672        continue;
673      }
674  
675      return false;
676    } while (!Inputs.empty());
677  
678    // All inputs were known to be no-alias.
679    return true;
680  }
681  
682  // There are particular cases where we can conclude no-alias between
683  // a non-addr-taken global and some other underlying object. Specifically,
684  // a non-addr-taken global is known to not be escaped from any function. It is
685  // also incorrect for a transformation to introduce an escape of a global in
686  // a way that is observable when it was not there previously. One function
687  // being transformed to introduce an escape which could possibly be observed
688  // (via loading from a global or the return value for example) within another
689  // function is never safe. If the observation is made through non-atomic
690  // operations on different threads, it is a data-race and UB. If the
691  // observation is well defined, by being observed the transformation would have
692  // changed program behavior by introducing the observed escape, making it an
693  // invalid transform.
694  //
695  // This property does require that transformations which *temporarily* escape
696  // a global that was not previously escaped, prior to restoring it, cannot rely
697  // on the results of GMR::alias. This seems a reasonable restriction, although
698  // currently there is no way to enforce it. There is also no realistic
699  // optimization pass that would make this mistake. The closest example is
700  // a transformation pass which does reg2mem of SSA values but stores them into
701  // global variables temporarily before restoring the global variable's value.
702  // This could be useful to expose "benign" races for example. However, it seems
703  // reasonable to require that a pass which introduces escapes of global
704  // variables in this way to either not trust AA results while the escape is
705  // active, or to be forced to operate as a module pass that cannot co-exist
706  // with an alias analysis such as GMR.
707  bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
708                                                   const Value *V) {
709    // In order to know that the underlying object cannot alias the
710    // non-addr-taken global, we must know that it would have to be an escape.
711    // Thus if the underlying object is a function argument, a load from
712    // a global, or the return of a function, it cannot alias. We can also
713    // recurse through PHI nodes and select nodes provided all of their inputs
714    // resolve to one of these known-escaping roots.
715    SmallPtrSet<const Value *, 8> Visited;
716    SmallVector<const Value *, 8> Inputs;
717    Visited.insert(V);
718    Inputs.push_back(V);
719    int Depth = 0;
720    do {
721      const Value *Input = Inputs.pop_back_val();
722  
723      if (auto *InputGV = dyn_cast<GlobalValue>(Input)) {
724        // If one input is the very global we're querying against, then we can't
725        // conclude no-alias.
726        if (InputGV == GV)
727          return false;
728  
729        // Distinct GlobalVariables never alias, unless overriden or zero-sized.
730        // FIXME: The condition can be refined, but be conservative for now.
731        auto *GVar = dyn_cast<GlobalVariable>(GV);
732        auto *InputGVar = dyn_cast<GlobalVariable>(InputGV);
733        if (GVar && InputGVar &&
734            !GVar->isDeclaration() && !InputGVar->isDeclaration() &&
735            !GVar->isInterposable() && !InputGVar->isInterposable()) {
736          Type *GVType = GVar->getInitializer()->getType();
737          Type *InputGVType = InputGVar->getInitializer()->getType();
738          if (GVType->isSized() && InputGVType->isSized() &&
739              (DL.getTypeAllocSize(GVType) > 0) &&
740              (DL.getTypeAllocSize(InputGVType) > 0))
741            continue;
742        }
743  
744        // Conservatively return false, even though we could be smarter
745        // (e.g. look through GlobalAliases).
746        return false;
747      }
748  
749      if (isa<Argument>(Input) || isa<CallInst>(Input) ||
750          isa<InvokeInst>(Input)) {
751        // Arguments to functions or returns from functions are inherently
752        // escaping, so we can immediately classify those as not aliasing any
753        // non-addr-taken globals.
754        continue;
755      }
756  
757      // Recurse through a limited number of selects, loads and PHIs. This is an
758      // arbitrary depth of 4, lower numbers could be used to fix compile time
759      // issues if needed, but this is generally expected to be only be important
760      // for small depths.
761      if (++Depth > 4)
762        return false;
763  
764      if (auto *LI = dyn_cast<LoadInst>(Input)) {
765        // A pointer loaded from a global would have been captured, and we know
766        // that the global is non-escaping, so no alias.
767        const Value *Ptr = getUnderlyingObject(LI->getPointerOperand());
768        if (isNonEscapingGlobalNoAliasWithLoad(GV, Ptr, Depth, DL))
769          // The load does not alias with GV.
770          continue;
771        // Otherwise, a load could come from anywhere, so bail.
772        return false;
773      }
774      if (auto *SI = dyn_cast<SelectInst>(Input)) {
775        const Value *LHS = getUnderlyingObject(SI->getTrueValue());
776        const Value *RHS = getUnderlyingObject(SI->getFalseValue());
777        if (Visited.insert(LHS).second)
778          Inputs.push_back(LHS);
779        if (Visited.insert(RHS).second)
780          Inputs.push_back(RHS);
781        continue;
782      }
783      if (auto *PN = dyn_cast<PHINode>(Input)) {
784        for (const Value *Op : PN->incoming_values()) {
785          Op = getUnderlyingObject(Op);
786          if (Visited.insert(Op).second)
787            Inputs.push_back(Op);
788        }
789        continue;
790      }
791  
792      // FIXME: It would be good to handle other obvious no-alias cases here, but
793      // it isn't clear how to do so reasonably without building a small version
794      // of BasicAA into this code. We could recurse into AAResultBase::alias
795      // here but that seems likely to go poorly as we're inside the
796      // implementation of such a query. Until then, just conservatively return
797      // false.
798      return false;
799    } while (!Inputs.empty());
800  
801    // If all the inputs to V were definitively no-alias, then V is no-alias.
802    return true;
803  }
804  
805  bool GlobalsAAResult::invalidate(Module &, const PreservedAnalyses &PA,
806                                   ModuleAnalysisManager::Invalidator &) {
807    // Check whether the analysis has been explicitly invalidated. Otherwise, it's
808    // stateless and remains preserved.
809    auto PAC = PA.getChecker<GlobalsAA>();
810    return !PAC.preservedWhenStateless();
811  }
812  
813  /// alias - If one of the pointers is to a global that we are tracking, and the
814  /// other is some random pointer, we know there cannot be an alias, because the
815  /// address of the global isn't taken.
816  AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
817                                     const MemoryLocation &LocB,
818                                     AAQueryInfo &AAQI, const Instruction *) {
819    // Get the base object these pointers point to.
820    const Value *UV1 =
821        getUnderlyingObject(LocA.Ptr->stripPointerCastsForAliasAnalysis());
822    const Value *UV2 =
823        getUnderlyingObject(LocB.Ptr->stripPointerCastsForAliasAnalysis());
824  
825    // If either of the underlying values is a global, they may be non-addr-taken
826    // globals, which we can answer queries about.
827    const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
828    const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
829    if (GV1 || GV2) {
830      // If the global's address is taken, pretend we don't know it's a pointer to
831      // the global.
832      if (GV1 && !NonAddressTakenGlobals.count(GV1))
833        GV1 = nullptr;
834      if (GV2 && !NonAddressTakenGlobals.count(GV2))
835        GV2 = nullptr;
836  
837      // If the two pointers are derived from two different non-addr-taken
838      // globals we know these can't alias.
839      if (GV1 && GV2 && GV1 != GV2)
840        return AliasResult::NoAlias;
841  
842      // If one is and the other isn't, it isn't strictly safe but we can fake
843      // this result if necessary for performance. This does not appear to be
844      // a common problem in practice.
845      if (EnableUnsafeGlobalsModRefAliasResults)
846        if ((GV1 || GV2) && GV1 != GV2)
847          return AliasResult::NoAlias;
848  
849      // Check for a special case where a non-escaping global can be used to
850      // conclude no-alias.
851      if ((GV1 || GV2) && GV1 != GV2) {
852        const GlobalValue *GV = GV1 ? GV1 : GV2;
853        const Value *UV = GV1 ? UV2 : UV1;
854        if (isNonEscapingGlobalNoAlias(GV, UV))
855          return AliasResult::NoAlias;
856      }
857  
858      // Otherwise if they are both derived from the same addr-taken global, we
859      // can't know the two accesses don't overlap.
860    }
861  
862    // These pointers may be based on the memory owned by an indirect global.  If
863    // so, we may be able to handle this.  First check to see if the base pointer
864    // is a direct load from an indirect global.
865    GV1 = GV2 = nullptr;
866    if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
867      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
868        if (IndirectGlobals.count(GV))
869          GV1 = GV;
870    if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
871      if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
872        if (IndirectGlobals.count(GV))
873          GV2 = GV;
874  
875    // These pointers may also be from an allocation for the indirect global.  If
876    // so, also handle them.
877    if (!GV1)
878      GV1 = AllocsForIndirectGlobals.lookup(UV1);
879    if (!GV2)
880      GV2 = AllocsForIndirectGlobals.lookup(UV2);
881  
882    // Now that we know whether the two pointers are related to indirect globals,
883    // use this to disambiguate the pointers. If the pointers are based on
884    // different indirect globals they cannot alias.
885    if (GV1 && GV2 && GV1 != GV2)
886      return AliasResult::NoAlias;
887  
888    // If one is based on an indirect global and the other isn't, it isn't
889    // strictly safe but we can fake this result if necessary for performance.
890    // This does not appear to be a common problem in practice.
891    if (EnableUnsafeGlobalsModRefAliasResults)
892      if ((GV1 || GV2) && GV1 != GV2)
893        return AliasResult::NoAlias;
894  
895    return AAResultBase::alias(LocA, LocB, AAQI, nullptr);
896  }
897  
898  ModRefInfo GlobalsAAResult::getModRefInfoForArgument(const CallBase *Call,
899                                                       const GlobalValue *GV,
900                                                       AAQueryInfo &AAQI) {
901    if (Call->doesNotAccessMemory())
902      return ModRefInfo::NoModRef;
903    ModRefInfo ConservativeResult =
904        Call->onlyReadsMemory() ? ModRefInfo::Ref : ModRefInfo::ModRef;
905  
906    // Iterate through all the arguments to the called function. If any argument
907    // is based on GV, return the conservative result.
908    for (const auto &A : Call->args()) {
909      SmallVector<const Value*, 4> Objects;
910      getUnderlyingObjects(A, Objects);
911  
912      // All objects must be identified.
913      if (!all_of(Objects, isIdentifiedObject) &&
914          // Try ::alias to see if all objects are known not to alias GV.
915          !all_of(Objects, [&](const Value *V) {
916            return this->alias(MemoryLocation::getBeforeOrAfter(V),
917                               MemoryLocation::getBeforeOrAfter(GV), AAQI,
918                               nullptr) == AliasResult::NoAlias;
919          }))
920        return ConservativeResult;
921  
922      if (is_contained(Objects, GV))
923        return ConservativeResult;
924    }
925  
926    // We identified all objects in the argument list, and none of them were GV.
927    return ModRefInfo::NoModRef;
928  }
929  
930  ModRefInfo GlobalsAAResult::getModRefInfo(const CallBase *Call,
931                                            const MemoryLocation &Loc,
932                                            AAQueryInfo &AAQI) {
933    ModRefInfo Known = ModRefInfo::ModRef;
934  
935    // If we are asking for mod/ref info of a direct call with a pointer to a
936    // global we are tracking, return information if we have it.
937    if (const GlobalValue *GV =
938            dyn_cast<GlobalValue>(getUnderlyingObject(Loc.Ptr)))
939      // If GV is internal to this IR and there is no function with local linkage
940      // that has had their address taken, keep looking for a tighter ModRefInfo.
941      if (GV->hasLocalLinkage() && !UnknownFunctionsWithLocalLinkage)
942        if (const Function *F = Call->getCalledFunction())
943          if (NonAddressTakenGlobals.count(GV))
944            if (const FunctionInfo *FI = getFunctionInfo(F))
945              Known = FI->getModRefInfoForGlobal(*GV) |
946                      getModRefInfoForArgument(Call, GV, AAQI);
947  
948    return Known;
949  }
950  
951  GlobalsAAResult::GlobalsAAResult(
952      const DataLayout &DL,
953      std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
954      : DL(DL), GetTLI(std::move(GetTLI)) {}
955  
956  GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
957      : AAResultBase(std::move(Arg)), DL(Arg.DL), GetTLI(std::move(Arg.GetTLI)),
958        NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)),
959        IndirectGlobals(std::move(Arg.IndirectGlobals)),
960        AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)),
961        FunctionInfos(std::move(Arg.FunctionInfos)),
962        Handles(std::move(Arg.Handles)) {
963    // Update the parent for each DeletionCallbackHandle.
964    for (auto &H : Handles) {
965      assert(H.GAR == &Arg);
966      H.GAR = this;
967    }
968  }
969  
970  GlobalsAAResult::~GlobalsAAResult() = default;
971  
972  /*static*/ GlobalsAAResult GlobalsAAResult::analyzeModule(
973      Module &M, std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
974      CallGraph &CG) {
975    GlobalsAAResult Result(M.getDataLayout(), GetTLI);
976  
977    // Discover which functions aren't recursive, to feed into AnalyzeGlobals.
978    Result.CollectSCCMembership(CG);
979  
980    // Find non-addr taken globals.
981    Result.AnalyzeGlobals(M);
982  
983    // Propagate on CG.
984    Result.AnalyzeCallGraph(CG, M);
985  
986    return Result;
987  }
988  
989  AnalysisKey GlobalsAA::Key;
990  
991  GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) {
992    FunctionAnalysisManager &FAM =
993        AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
994    auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
995      return FAM.getResult<TargetLibraryAnalysis>(F);
996    };
997    return GlobalsAAResult::analyzeModule(M, GetTLI,
998                                          AM.getResult<CallGraphAnalysis>(M));
999  }
1000  
1001  PreservedAnalyses RecomputeGlobalsAAPass::run(Module &M,
1002                                                ModuleAnalysisManager &AM) {
1003    if (auto *G = AM.getCachedResult<GlobalsAA>(M)) {
1004      auto &CG = AM.getResult<CallGraphAnalysis>(M);
1005      G->NonAddressTakenGlobals.clear();
1006      G->UnknownFunctionsWithLocalLinkage = false;
1007      G->IndirectGlobals.clear();
1008      G->AllocsForIndirectGlobals.clear();
1009      G->FunctionInfos.clear();
1010      G->FunctionToSCCMap.clear();
1011      G->Handles.clear();
1012      G->CollectSCCMembership(CG);
1013      G->AnalyzeGlobals(M);
1014      G->AnalyzeCallGraph(CG, M);
1015    }
1016    return PreservedAnalyses::all();
1017  }
1018  
1019  char GlobalsAAWrapperPass::ID = 0;
1020  INITIALIZE_PASS_BEGIN(GlobalsAAWrapperPass, "globals-aa",
1021                        "Globals Alias Analysis", false, true)
1022  INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
1023  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1024  INITIALIZE_PASS_END(GlobalsAAWrapperPass, "globals-aa",
1025                      "Globals Alias Analysis", false, true)
1026  
1027  ModulePass *llvm::createGlobalsAAWrapperPass() {
1028    return new GlobalsAAWrapperPass();
1029  }
1030  
1031  GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) {
1032    initializeGlobalsAAWrapperPassPass(*PassRegistry::getPassRegistry());
1033  }
1034  
1035  bool GlobalsAAWrapperPass::runOnModule(Module &M) {
1036    auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
1037      return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1038    };
1039    Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule(
1040        M, GetTLI, getAnalysis<CallGraphWrapperPass>().getCallGraph())));
1041    return false;
1042  }
1043  
1044  bool GlobalsAAWrapperPass::doFinalization(Module &M) {
1045    Result.reset();
1046    return false;
1047  }
1048  
1049  void GlobalsAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1050    AU.setPreservesAll();
1051    AU.addRequired<CallGraphWrapperPass>();
1052    AU.addRequired<TargetLibraryInfoWrapperPass>();
1053  }
1054