1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file implements interprocedural passes which walk the
11 /// call-graph deducing and/or propagating function attributes.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/Transforms/IPO/FunctionAttrs.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/SCCIterator.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/AssumptionCache.h"
26 #include "llvm/Analysis/BasicAliasAnalysis.h"
27 #include "llvm/Analysis/CFG.h"
28 #include "llvm/Analysis/CGSCCPassManager.h"
29 #include "llvm/Analysis/CallGraph.h"
30 #include "llvm/Analysis/CallGraphSCCPass.h"
31 #include "llvm/Analysis/CaptureTracking.h"
32 #include "llvm/Analysis/LazyCallGraph.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/Analysis/ValueTracking.h"
35 #include "llvm/IR/Argument.h"
36 #include "llvm/IR/Attributes.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/Constant.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/InstIterator.h"
42 #include "llvm/IR/InstrTypes.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Metadata.h"
47 #include "llvm/IR/ModuleSummaryIndex.h"
48 #include "llvm/IR/PassManager.h"
49 #include "llvm/IR/Type.h"
50 #include "llvm/IR/Use.h"
51 #include "llvm/IR/User.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/CommandLine.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
57 #include "llvm/Support/ErrorHandling.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/Transforms/IPO.h"
60 #include "llvm/Transforms/Utils/Local.h"
61 #include <cassert>
62 #include <iterator>
63 #include <map>
64 #include <optional>
65 #include <vector>
66
67 using namespace llvm;
68
69 #define DEBUG_TYPE "function-attrs"
70
71 STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute");
72 STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
73 STATISTIC(NumReturned, "Number of arguments marked returned");
74 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
75 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
76 STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly");
77 STATISTIC(NumNoAlias, "Number of function returns marked noalias");
78 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
79 STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef");
80 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
81 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
82 STATISTIC(NumNoFree, "Number of functions marked as nofree");
83 STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
84 STATISTIC(NumNoSync, "Number of functions marked as nosync");
85
86 STATISTIC(NumThinLinkNoRecurse,
87 "Number of functions marked as norecurse during thinlink");
88 STATISTIC(NumThinLinkNoUnwind,
89 "Number of functions marked as nounwind during thinlink");
90
91 static cl::opt<bool> EnableNonnullArgPropagation(
92 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden,
93 cl::desc("Try to propagate nonnull argument attributes from callsites to "
94 "caller functions."));
95
96 static cl::opt<bool> DisableNoUnwindInference(
97 "disable-nounwind-inference", cl::Hidden,
98 cl::desc("Stop inferring nounwind attribute during function-attrs pass"));
99
100 static cl::opt<bool> DisableNoFreeInference(
101 "disable-nofree-inference", cl::Hidden,
102 cl::desc("Stop inferring nofree attribute during function-attrs pass"));
103
104 static cl::opt<bool> DisableThinLTOPropagation(
105 "disable-thinlto-funcattrs", cl::init(true), cl::Hidden,
106 cl::desc("Don't propagate function-attrs in thinLTO"));
107
108 namespace {
109
110 using SCCNodeSet = SmallSetVector<Function *, 8>;
111
112 } // end anonymous namespace
113
addLocAccess(MemoryEffects & ME,const MemoryLocation & Loc,ModRefInfo MR,AAResults & AAR)114 static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc,
115 ModRefInfo MR, AAResults &AAR) {
116 // Ignore accesses to known-invariant or local memory.
117 MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/true);
118 if (isNoModRef(MR))
119 return;
120
121 const Value *UO = getUnderlyingObject(Loc.Ptr);
122 assert(!isa<AllocaInst>(UO) &&
123 "Should have been handled by getModRefInfoMask()");
124 if (isa<Argument>(UO)) {
125 ME |= MemoryEffects::argMemOnly(MR);
126 return;
127 }
128
129 // If it's not an identified object, it might be an argument.
130 if (!isIdentifiedObject(UO))
131 ME |= MemoryEffects::argMemOnly(MR);
132 ME |= MemoryEffects(IRMemLocation::Other, MR);
133 }
134
addArgLocs(MemoryEffects & ME,const CallBase * Call,ModRefInfo ArgMR,AAResults & AAR)135 static void addArgLocs(MemoryEffects &ME, const CallBase *Call,
136 ModRefInfo ArgMR, AAResults &AAR) {
137 for (const Value *Arg : Call->args()) {
138 if (!Arg->getType()->isPtrOrPtrVectorTy())
139 continue;
140
141 addLocAccess(ME,
142 MemoryLocation::getBeforeOrAfter(Arg, Call->getAAMetadata()),
143 ArgMR, AAR);
144 }
145 }
146
147 /// Returns the memory access attribute for function F using AAR for AA results,
148 /// where SCCNodes is the current SCC.
149 ///
150 /// If ThisBody is true, this function may examine the function body and will
151 /// return a result pertaining to this copy of the function. If it is false, the
152 /// result will be based only on AA results for the function declaration; it
153 /// will be assumed that some other (perhaps less optimized) version of the
154 /// function may be selected at link time.
155 ///
156 /// The return value is split into two parts: Memory effects that always apply,
157 /// and additional memory effects that apply if any of the functions in the SCC
158 /// can access argmem.
159 static std::pair<MemoryEffects, MemoryEffects>
checkFunctionMemoryAccess(Function & F,bool ThisBody,AAResults & AAR,const SCCNodeSet & SCCNodes)160 checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR,
161 const SCCNodeSet &SCCNodes) {
162 MemoryEffects OrigME = AAR.getMemoryEffects(&F);
163 if (OrigME.doesNotAccessMemory())
164 // Already perfect!
165 return {OrigME, MemoryEffects::none()};
166
167 if (!ThisBody)
168 return {OrigME, MemoryEffects::none()};
169
170 MemoryEffects ME = MemoryEffects::none();
171 // Additional locations accessed if the SCC accesses argmem.
172 MemoryEffects RecursiveArgME = MemoryEffects::none();
173
174 // Inalloca and preallocated arguments are always clobbered by the call.
175 if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
176 F.getAttributes().hasAttrSomewhere(Attribute::Preallocated))
177 ME |= MemoryEffects::argMemOnly(ModRefInfo::ModRef);
178
179 // Scan the function body for instructions that may read or write memory.
180 for (Instruction &I : instructions(F)) {
181 // Some instructions can be ignored even if they read or write memory.
182 // Detect these now, skipping to the next instruction if one is found.
183 if (auto *Call = dyn_cast<CallBase>(&I)) {
184 // We can optimistically ignore calls to functions in the same SCC, with
185 // two caveats:
186 // * Calls with operand bundles may have additional effects.
187 // * Argument memory accesses may imply additional effects depending on
188 // what the argument location is.
189 if (!Call->hasOperandBundles() && Call->getCalledFunction() &&
190 SCCNodes.count(Call->getCalledFunction())) {
191 // Keep track of which additional locations are accessed if the SCC
192 // turns out to access argmem.
193 addArgLocs(RecursiveArgME, Call, ModRefInfo::ModRef, AAR);
194 continue;
195 }
196
197 MemoryEffects CallME = AAR.getMemoryEffects(Call);
198
199 // If the call doesn't access memory, we're done.
200 if (CallME.doesNotAccessMemory())
201 continue;
202
203 // A pseudo probe call shouldn't change any function attribute since it
204 // doesn't translate to a real instruction. It comes with a memory access
205 // tag to prevent itself being removed by optimizations and not block
206 // other instructions being optimized.
207 if (isa<PseudoProbeInst>(I))
208 continue;
209
210 ME |= CallME.getWithoutLoc(IRMemLocation::ArgMem);
211
212 // If the call accesses captured memory (currently part of "other") and
213 // an argument is captured (currently not tracked), then it may also
214 // access argument memory.
215 ModRefInfo OtherMR = CallME.getModRef(IRMemLocation::Other);
216 ME |= MemoryEffects::argMemOnly(OtherMR);
217
218 // Check whether all pointer arguments point to local memory, and
219 // ignore calls that only access local memory.
220 ModRefInfo ArgMR = CallME.getModRef(IRMemLocation::ArgMem);
221 if (ArgMR != ModRefInfo::NoModRef)
222 addArgLocs(ME, Call, ArgMR, AAR);
223 continue;
224 }
225
226 ModRefInfo MR = ModRefInfo::NoModRef;
227 if (I.mayWriteToMemory())
228 MR |= ModRefInfo::Mod;
229 if (I.mayReadFromMemory())
230 MR |= ModRefInfo::Ref;
231 if (MR == ModRefInfo::NoModRef)
232 continue;
233
234 std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(&I);
235 if (!Loc) {
236 // If no location is known, conservatively assume anything can be
237 // accessed.
238 ME |= MemoryEffects(MR);
239 continue;
240 }
241
242 // Volatile operations may access inaccessible memory.
243 if (I.isVolatile())
244 ME |= MemoryEffects::inaccessibleMemOnly(MR);
245
246 addLocAccess(ME, *Loc, MR, AAR);
247 }
248
249 return {OrigME & ME, RecursiveArgME};
250 }
251
computeFunctionBodyMemoryAccess(Function & F,AAResults & AAR)252 MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F,
253 AAResults &AAR) {
254 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}).first;
255 }
256
257 /// Deduce readonly/readnone/writeonly attributes for the SCC.
258 template <typename AARGetterT>
addMemoryAttrs(const SCCNodeSet & SCCNodes,AARGetterT && AARGetter,SmallSet<Function *,8> & Changed)259 static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
260 SmallSet<Function *, 8> &Changed) {
261 MemoryEffects ME = MemoryEffects::none();
262 MemoryEffects RecursiveArgME = MemoryEffects::none();
263 for (Function *F : SCCNodes) {
264 // Call the callable parameter to look up AA results for this function.
265 AAResults &AAR = AARGetter(*F);
266 // Non-exact function definitions may not be selected at link time, and an
267 // alternative version that writes to memory may be selected. See the
268 // comment on GlobalValue::isDefinitionExact for more details.
269 auto [FnME, FnRecursiveArgME] =
270 checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes);
271 ME |= FnME;
272 RecursiveArgME |= FnRecursiveArgME;
273 // Reached bottom of the lattice, we will not be able to improve the result.
274 if (ME == MemoryEffects::unknown())
275 return;
276 }
277
278 // If the SCC accesses argmem, add recursive accesses resulting from that.
279 ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem);
280 if (ArgMR != ModRefInfo::NoModRef)
281 ME |= RecursiveArgME & MemoryEffects(ArgMR);
282
283 for (Function *F : SCCNodes) {
284 MemoryEffects OldME = F->getMemoryEffects();
285 MemoryEffects NewME = ME & OldME;
286 if (NewME != OldME) {
287 ++NumMemoryAttr;
288 F->setMemoryEffects(NewME);
289 // Remove conflicting writable attributes.
290 if (!isModSet(NewME.getModRef(IRMemLocation::ArgMem)))
291 for (Argument &A : F->args())
292 A.removeAttr(Attribute::Writable);
293 Changed.insert(F);
294 }
295 }
296 }
297
298 // Compute definitive function attributes for a function taking into account
299 // prevailing definitions and linkage types
calculatePrevailingSummary(ValueInfo VI,DenseMap<ValueInfo,FunctionSummary * > & CachedPrevailingSummary,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing)300 static FunctionSummary *calculatePrevailingSummary(
301 ValueInfo VI,
302 DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary,
303 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
304 IsPrevailing) {
305
306 if (CachedPrevailingSummary.count(VI))
307 return CachedPrevailingSummary[VI];
308
309 /// At this point, prevailing symbols have been resolved. The following leads
310 /// to returning a conservative result:
311 /// - Multiple instances with local linkage. Normally local linkage would be
312 /// unique per module
313 /// as the GUID includes the module path. We could have a guid alias if
314 /// there wasn't any distinguishing path when each file was compiled, but
315 /// that should be rare so we'll punt on those.
316
317 /// These next 2 cases should not happen and will assert:
318 /// - Multiple instances with external linkage. This should be caught in
319 /// symbol resolution
320 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our
321 /// knowledge meaning we have to go conservative.
322
323 /// Otherwise, we calculate attributes for a function as:
324 /// 1. If we have a local linkage, take its attributes. If there's somehow
325 /// multiple, bail and go conservative.
326 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is
327 /// prevailing, take its attributes.
328 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic
329 /// differences. However, if the prevailing copy is known it will be used
330 /// so take its attributes. If the prevailing copy is in a native file
331 /// all IR copies will be dead and propagation will go conservative.
332 /// 4. AvailableExternally summaries without a prevailing copy are known to
333 /// occur in a couple of circumstances:
334 /// a. An internal function gets imported due to its caller getting
335 /// imported, it becomes AvailableExternally but no prevailing
336 /// definition exists. Because it has to get imported along with its
337 /// caller the attributes will be captured by propagating on its
338 /// caller.
339 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally
340 /// definitions of explicitly instanced template declarations
341 /// for inlining which are ultimately dropped from the TU. Since this
342 /// is localized to the TU the attributes will have already made it to
343 /// the callers.
344 /// These are edge cases and already captured by their callers so we
345 /// ignore these for now. If they become relevant to optimize in the
346 /// future this can be revisited.
347 /// 5. Otherwise, go conservative.
348
349 CachedPrevailingSummary[VI] = nullptr;
350 FunctionSummary *Local = nullptr;
351 FunctionSummary *Prevailing = nullptr;
352
353 for (const auto &GVS : VI.getSummaryList()) {
354 if (!GVS->isLive())
355 continue;
356
357 FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject());
358 // Virtual and Unknown (e.g. indirect) calls require going conservative
359 if (!FS || FS->fflags().HasUnknownCall)
360 return nullptr;
361
362 const auto &Linkage = GVS->linkage();
363 if (GlobalValue::isLocalLinkage(Linkage)) {
364 if (Local) {
365 LLVM_DEBUG(
366 dbgs()
367 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on "
368 "function "
369 << VI.name() << " from " << FS->modulePath() << ". Previous module "
370 << Local->modulePath() << "\n");
371 return nullptr;
372 }
373 Local = FS;
374 } else if (GlobalValue::isExternalLinkage(Linkage)) {
375 assert(IsPrevailing(VI.getGUID(), GVS.get()));
376 Prevailing = FS;
377 break;
378 } else if (GlobalValue::isWeakODRLinkage(Linkage) ||
379 GlobalValue::isLinkOnceODRLinkage(Linkage) ||
380 GlobalValue::isWeakAnyLinkage(Linkage) ||
381 GlobalValue::isLinkOnceAnyLinkage(Linkage)) {
382 if (IsPrevailing(VI.getGUID(), GVS.get())) {
383 Prevailing = FS;
384 break;
385 }
386 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) {
387 // TODO: Handle these cases if they become meaningful
388 continue;
389 }
390 }
391
392 if (Local) {
393 assert(!Prevailing);
394 CachedPrevailingSummary[VI] = Local;
395 } else if (Prevailing) {
396 assert(!Local);
397 CachedPrevailingSummary[VI] = Prevailing;
398 }
399
400 return CachedPrevailingSummary[VI];
401 }
402
thinLTOPropagateFunctionAttrs(ModuleSummaryIndex & Index,function_ref<bool (GlobalValue::GUID,const GlobalValueSummary *)> IsPrevailing)403 bool llvm::thinLTOPropagateFunctionAttrs(
404 ModuleSummaryIndex &Index,
405 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
406 IsPrevailing) {
407 // TODO: implement addNoAliasAttrs once
408 // there's more information about the return type in the summary
409 if (DisableThinLTOPropagation)
410 return false;
411
412 DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary;
413 bool Changed = false;
414
415 auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) {
416 // Assume we can propagate unless we discover otherwise
417 FunctionSummary::FFlags InferredFlags;
418 InferredFlags.NoRecurse = (SCCNodes.size() == 1);
419 InferredFlags.NoUnwind = true;
420
421 for (auto &V : SCCNodes) {
422 FunctionSummary *CallerSummary =
423 calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing);
424
425 // Function summaries can fail to contain information such as declarations
426 if (!CallerSummary)
427 return;
428
429 if (CallerSummary->fflags().MayThrow)
430 InferredFlags.NoUnwind = false;
431
432 for (const auto &Callee : CallerSummary->calls()) {
433 FunctionSummary *CalleeSummary = calculatePrevailingSummary(
434 Callee.first, CachedPrevailingSummary, IsPrevailing);
435
436 if (!CalleeSummary)
437 return;
438
439 if (!CalleeSummary->fflags().NoRecurse)
440 InferredFlags.NoRecurse = false;
441
442 if (!CalleeSummary->fflags().NoUnwind)
443 InferredFlags.NoUnwind = false;
444
445 if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse)
446 break;
447 }
448 }
449
450 if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) {
451 Changed = true;
452 for (auto &V : SCCNodes) {
453 if (InferredFlags.NoRecurse) {
454 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to "
455 << V.name() << "\n");
456 ++NumThinLinkNoRecurse;
457 }
458
459 if (InferredFlags.NoUnwind) {
460 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to "
461 << V.name() << "\n");
462 ++NumThinLinkNoUnwind;
463 }
464
465 for (const auto &S : V.getSummaryList()) {
466 if (auto *FS = dyn_cast<FunctionSummary>(S.get())) {
467 if (InferredFlags.NoRecurse)
468 FS->setNoRecurse();
469
470 if (InferredFlags.NoUnwind)
471 FS->setNoUnwind();
472 }
473 }
474 }
475 }
476 };
477
478 // Call propagation functions on each SCC in the Index
479 for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd();
480 ++I) {
481 std::vector<ValueInfo> Nodes(*I);
482 PropagateAttributes(Nodes);
483 }
484 return Changed;
485 }
486
487 namespace {
488
489 /// For a given pointer Argument, this retains a list of Arguments of functions
490 /// in the same SCC that the pointer data flows into. We use this to build an
491 /// SCC of the arguments.
492 struct ArgumentGraphNode {
493 Argument *Definition;
494 SmallVector<ArgumentGraphNode *, 4> Uses;
495 };
496
497 class ArgumentGraph {
498 // We store pointers to ArgumentGraphNode objects, so it's important that
499 // that they not move around upon insert.
500 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>;
501
502 ArgumentMapTy ArgumentMap;
503
504 // There is no root node for the argument graph, in fact:
505 // void f(int *x, int *y) { if (...) f(x, y); }
506 // is an example where the graph is disconnected. The SCCIterator requires a
507 // single entry point, so we maintain a fake ("synthetic") root node that
508 // uses every node. Because the graph is directed and nothing points into
509 // the root, it will not participate in any SCCs (except for its own).
510 ArgumentGraphNode SyntheticRoot;
511
512 public:
ArgumentGraph()513 ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
514
515 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator;
516
begin()517 iterator begin() { return SyntheticRoot.Uses.begin(); }
end()518 iterator end() { return SyntheticRoot.Uses.end(); }
getEntryNode()519 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
520
operator [](Argument * A)521 ArgumentGraphNode *operator[](Argument *A) {
522 ArgumentGraphNode &Node = ArgumentMap[A];
523 Node.Definition = A;
524 SyntheticRoot.Uses.push_back(&Node);
525 return &Node;
526 }
527 };
528
529 /// This tracker checks whether callees are in the SCC, and if so it does not
530 /// consider that a capture, instead adding it to the "Uses" list and
531 /// continuing with the analysis.
532 struct ArgumentUsesTracker : public CaptureTracker {
ArgumentUsesTracker__anon416db8590311::ArgumentUsesTracker533 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {}
534
tooManyUses__anon416db8590311::ArgumentUsesTracker535 void tooManyUses() override { Captured = true; }
536
captured__anon416db8590311::ArgumentUsesTracker537 bool captured(const Use *U) override {
538 CallBase *CB = dyn_cast<CallBase>(U->getUser());
539 if (!CB) {
540 Captured = true;
541 return true;
542 }
543
544 Function *F = CB->getCalledFunction();
545 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) {
546 Captured = true;
547 return true;
548 }
549
550 assert(!CB->isCallee(U) && "callee operand reported captured?");
551 const unsigned UseIndex = CB->getDataOperandNo(U);
552 if (UseIndex >= CB->arg_size()) {
553 // Data operand, but not a argument operand -- must be a bundle operand
554 assert(CB->hasOperandBundles() && "Must be!");
555
556 // CaptureTracking told us that we're being captured by an operand bundle
557 // use. In this case it does not matter if the callee is within our SCC
558 // or not -- we've been captured in some unknown way, and we have to be
559 // conservative.
560 Captured = true;
561 return true;
562 }
563
564 if (UseIndex >= F->arg_size()) {
565 assert(F->isVarArg() && "More params than args in non-varargs call");
566 Captured = true;
567 return true;
568 }
569
570 Uses.push_back(&*std::next(F->arg_begin(), UseIndex));
571 return false;
572 }
573
574 // True only if certainly captured (used outside our SCC).
575 bool Captured = false;
576
577 // Uses within our SCC.
578 SmallVector<Argument *, 4> Uses;
579
580 const SCCNodeSet &SCCNodes;
581 };
582
583 } // end anonymous namespace
584
585 namespace llvm {
586
587 template <> struct GraphTraits<ArgumentGraphNode *> {
588 using NodeRef = ArgumentGraphNode *;
589 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator;
590
getEntryNodellvm::GraphTraits591 static NodeRef getEntryNode(NodeRef A) { return A; }
child_beginllvm::GraphTraits592 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); }
child_endllvm::GraphTraits593 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); }
594 };
595
596 template <>
597 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
getEntryNodellvm::GraphTraits598 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); }
599
nodes_beginllvm::GraphTraits600 static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
601 return AG->begin();
602 }
603
nodes_endllvm::GraphTraits604 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
605 };
606
607 } // end namespace llvm
608
609 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
610 static Attribute::AttrKind
determinePointerAccessAttrs(Argument * A,const SmallPtrSet<Argument *,8> & SCCNodes)611 determinePointerAccessAttrs(Argument *A,
612 const SmallPtrSet<Argument *, 8> &SCCNodes) {
613 SmallVector<Use *, 32> Worklist;
614 SmallPtrSet<Use *, 32> Visited;
615
616 // inalloca arguments are always clobbered by the call.
617 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
618 return Attribute::None;
619
620 bool IsRead = false;
621 bool IsWrite = false;
622
623 for (Use &U : A->uses()) {
624 Visited.insert(&U);
625 Worklist.push_back(&U);
626 }
627
628 while (!Worklist.empty()) {
629 if (IsWrite && IsRead)
630 // No point in searching further..
631 return Attribute::None;
632
633 Use *U = Worklist.pop_back_val();
634 Instruction *I = cast<Instruction>(U->getUser());
635
636 switch (I->getOpcode()) {
637 case Instruction::BitCast:
638 case Instruction::GetElementPtr:
639 case Instruction::PHI:
640 case Instruction::Select:
641 case Instruction::AddrSpaceCast:
642 // The original value is not read/written via this if the new value isn't.
643 for (Use &UU : I->uses())
644 if (Visited.insert(&UU).second)
645 Worklist.push_back(&UU);
646 break;
647
648 case Instruction::Call:
649 case Instruction::Invoke: {
650 CallBase &CB = cast<CallBase>(*I);
651 if (CB.isCallee(U)) {
652 IsRead = true;
653 // Note that indirect calls do not capture, see comment in
654 // CaptureTracking for context
655 continue;
656 }
657
658 // Given we've explictily handled the callee operand above, what's left
659 // must be a data operand (e.g. argument or operand bundle)
660 const unsigned UseIndex = CB.getDataOperandNo(U);
661
662 // Some intrinsics (for instance ptrmask) do not capture their results,
663 // but return results thas alias their pointer argument, and thus should
664 // be handled like GEP or addrspacecast above.
665 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
666 &CB, /*MustPreserveNullness=*/false)) {
667 for (Use &UU : CB.uses())
668 if (Visited.insert(&UU).second)
669 Worklist.push_back(&UU);
670 } else if (!CB.doesNotCapture(UseIndex)) {
671 if (!CB.onlyReadsMemory())
672 // If the callee can save a copy into other memory, then simply
673 // scanning uses of the call is insufficient. We have no way
674 // of tracking copies of the pointer through memory to see
675 // if a reloaded copy is written to, thus we must give up.
676 return Attribute::None;
677 // Push users for processing once we finish this one
678 if (!I->getType()->isVoidTy())
679 for (Use &UU : I->uses())
680 if (Visited.insert(&UU).second)
681 Worklist.push_back(&UU);
682 }
683
684 ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(IRMemLocation::ArgMem);
685 if (isNoModRef(ArgMR))
686 continue;
687
688 if (Function *F = CB.getCalledFunction())
689 if (CB.isArgOperand(U) && UseIndex < F->arg_size() &&
690 SCCNodes.count(F->getArg(UseIndex)))
691 // This is an argument which is part of the speculative SCC. Note
692 // that only operands corresponding to formal arguments of the callee
693 // can participate in the speculation.
694 break;
695
696 // The accessors used on call site here do the right thing for calls and
697 // invokes with operand bundles.
698 if (CB.doesNotAccessMemory(UseIndex)) {
699 /* nop */
700 } else if (!isModSet(ArgMR) || CB.onlyReadsMemory(UseIndex)) {
701 IsRead = true;
702 } else if (!isRefSet(ArgMR) ||
703 CB.dataOperandHasImpliedAttr(UseIndex, Attribute::WriteOnly)) {
704 IsWrite = true;
705 } else {
706 return Attribute::None;
707 }
708 break;
709 }
710
711 case Instruction::Load:
712 // A volatile load has side effects beyond what readonly can be relied
713 // upon.
714 if (cast<LoadInst>(I)->isVolatile())
715 return Attribute::None;
716
717 IsRead = true;
718 break;
719
720 case Instruction::Store:
721 if (cast<StoreInst>(I)->getValueOperand() == *U)
722 // untrackable capture
723 return Attribute::None;
724
725 // A volatile store has side effects beyond what writeonly can be relied
726 // upon.
727 if (cast<StoreInst>(I)->isVolatile())
728 return Attribute::None;
729
730 IsWrite = true;
731 break;
732
733 case Instruction::ICmp:
734 case Instruction::Ret:
735 break;
736
737 default:
738 return Attribute::None;
739 }
740 }
741
742 if (IsWrite && IsRead)
743 return Attribute::None;
744 else if (IsRead)
745 return Attribute::ReadOnly;
746 else if (IsWrite)
747 return Attribute::WriteOnly;
748 else
749 return Attribute::ReadNone;
750 }
751
752 /// Deduce returned attributes for the SCC.
addArgumentReturnedAttrs(const SCCNodeSet & SCCNodes,SmallSet<Function *,8> & Changed)753 static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes,
754 SmallSet<Function *, 8> &Changed) {
755 // Check each function in turn, determining if an argument is always returned.
756 for (Function *F : SCCNodes) {
757 // We can infer and propagate function attributes only when we know that the
758 // definition we'll get at link time is *exactly* the definition we see now.
759 // For more details, see GlobalValue::mayBeDerefined.
760 if (!F->hasExactDefinition())
761 continue;
762
763 if (F->getReturnType()->isVoidTy())
764 continue;
765
766 // There is nothing to do if an argument is already marked as 'returned'.
767 if (F->getAttributes().hasAttrSomewhere(Attribute::Returned))
768 continue;
769
770 auto FindRetArg = [&]() -> Argument * {
771 Argument *RetArg = nullptr;
772 for (BasicBlock &BB : *F)
773 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) {
774 // Note that stripPointerCasts should look through functions with
775 // returned arguments.
776 auto *RetVal =
777 dyn_cast<Argument>(Ret->getReturnValue()->stripPointerCasts());
778 if (!RetVal || RetVal->getType() != F->getReturnType())
779 return nullptr;
780
781 if (!RetArg)
782 RetArg = RetVal;
783 else if (RetArg != RetVal)
784 return nullptr;
785 }
786
787 return RetArg;
788 };
789
790 if (Argument *RetArg = FindRetArg()) {
791 RetArg->addAttr(Attribute::Returned);
792 ++NumReturned;
793 Changed.insert(F);
794 }
795 }
796 }
797
798 /// If a callsite has arguments that are also arguments to the parent function,
799 /// try to propagate attributes from the callsite's arguments to the parent's
800 /// arguments. This may be important because inlining can cause information loss
801 /// when attribute knowledge disappears with the inlined call.
addArgumentAttrsFromCallsites(Function & F)802 static bool addArgumentAttrsFromCallsites(Function &F) {
803 if (!EnableNonnullArgPropagation)
804 return false;
805
806 bool Changed = false;
807
808 // For an argument attribute to transfer from a callsite to the parent, the
809 // call must be guaranteed to execute every time the parent is called.
810 // Conservatively, just check for calls in the entry block that are guaranteed
811 // to execute.
812 // TODO: This could be enhanced by testing if the callsite post-dominates the
813 // entry block or by doing simple forward walks or backward walks to the
814 // callsite.
815 BasicBlock &Entry = F.getEntryBlock();
816 for (Instruction &I : Entry) {
817 if (auto *CB = dyn_cast<CallBase>(&I)) {
818 if (auto *CalledFunc = CB->getCalledFunction()) {
819 for (auto &CSArg : CalledFunc->args()) {
820 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false))
821 continue;
822
823 // If the non-null callsite argument operand is an argument to 'F'
824 // (the caller) and the call is guaranteed to execute, then the value
825 // must be non-null throughout 'F'.
826 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo()));
827 if (FArg && !FArg->hasNonNullAttr()) {
828 FArg->addAttr(Attribute::NonNull);
829 Changed = true;
830 }
831 }
832 }
833 }
834 if (!isGuaranteedToTransferExecutionToSuccessor(&I))
835 break;
836 }
837
838 return Changed;
839 }
840
addAccessAttr(Argument * A,Attribute::AttrKind R)841 static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
842 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone ||
843 R == Attribute::WriteOnly)
844 && "Must be an access attribute.");
845 assert(A && "Argument must not be null.");
846
847 // If the argument already has the attribute, nothing needs to be done.
848 if (A->hasAttribute(R))
849 return false;
850
851 // Otherwise, remove potentially conflicting attribute, add the new one,
852 // and update statistics.
853 A->removeAttr(Attribute::WriteOnly);
854 A->removeAttr(Attribute::ReadOnly);
855 A->removeAttr(Attribute::ReadNone);
856 // Remove conflicting writable attribute.
857 if (R == Attribute::ReadNone || R == Attribute::ReadOnly)
858 A->removeAttr(Attribute::Writable);
859 A->addAttr(R);
860 if (R == Attribute::ReadOnly)
861 ++NumReadOnlyArg;
862 else if (R == Attribute::WriteOnly)
863 ++NumWriteOnlyArg;
864 else
865 ++NumReadNoneArg;
866 return true;
867 }
868
869 /// Deduce nocapture attributes for the SCC.
addArgumentAttrs(const SCCNodeSet & SCCNodes,SmallSet<Function *,8> & Changed)870 static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
871 SmallSet<Function *, 8> &Changed) {
872 ArgumentGraph AG;
873
874 // Check each function in turn, determining which pointer arguments are not
875 // captured.
876 for (Function *F : SCCNodes) {
877 // We can infer and propagate function attributes only when we know that the
878 // definition we'll get at link time is *exactly* the definition we see now.
879 // For more details, see GlobalValue::mayBeDerefined.
880 if (!F->hasExactDefinition())
881 continue;
882
883 if (addArgumentAttrsFromCallsites(*F))
884 Changed.insert(F);
885
886 // Functions that are readonly (or readnone) and nounwind and don't return
887 // a value can't capture arguments. Don't analyze them.
888 if (F->onlyReadsMemory() && F->doesNotThrow() &&
889 F->getReturnType()->isVoidTy()) {
890 for (Argument &A : F->args()) {
891 if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) {
892 A.addAttr(Attribute::NoCapture);
893 ++NumNoCapture;
894 Changed.insert(F);
895 }
896 }
897 continue;
898 }
899
900 for (Argument &A : F->args()) {
901 if (!A.getType()->isPointerTy())
902 continue;
903 bool HasNonLocalUses = false;
904 if (!A.hasNoCaptureAttr()) {
905 ArgumentUsesTracker Tracker(SCCNodes);
906 PointerMayBeCaptured(&A, &Tracker);
907 if (!Tracker.Captured) {
908 if (Tracker.Uses.empty()) {
909 // If it's trivially not captured, mark it nocapture now.
910 A.addAttr(Attribute::NoCapture);
911 ++NumNoCapture;
912 Changed.insert(F);
913 } else {
914 // If it's not trivially captured and not trivially not captured,
915 // then it must be calling into another function in our SCC. Save
916 // its particulars for Argument-SCC analysis later.
917 ArgumentGraphNode *Node = AG[&A];
918 for (Argument *Use : Tracker.Uses) {
919 Node->Uses.push_back(AG[Use]);
920 if (Use != &A)
921 HasNonLocalUses = true;
922 }
923 }
924 }
925 // Otherwise, it's captured. Don't bother doing SCC analysis on it.
926 }
927 if (!HasNonLocalUses && !A.onlyReadsMemory()) {
928 // Can we determine that it's readonly/readnone/writeonly without doing
929 // an SCC? Note that we don't allow any calls at all here, or else our
930 // result will be dependent on the iteration order through the
931 // functions in the SCC.
932 SmallPtrSet<Argument *, 8> Self;
933 Self.insert(&A);
934 Attribute::AttrKind R = determinePointerAccessAttrs(&A, Self);
935 if (R != Attribute::None)
936 if (addAccessAttr(&A, R))
937 Changed.insert(F);
938 }
939 }
940 }
941
942 // The graph we've collected is partial because we stopped scanning for
943 // argument uses once we solved the argument trivially. These partial nodes
944 // show up as ArgumentGraphNode objects with an empty Uses list, and for
945 // these nodes the final decision about whether they capture has already been
946 // made. If the definition doesn't have a 'nocapture' attribute by now, it
947 // captures.
948
949 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
950 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
951 if (ArgumentSCC.size() == 1) {
952 if (!ArgumentSCC[0]->Definition)
953 continue; // synthetic root node
954
955 // eg. "void f(int* x) { if (...) f(x); }"
956 if (ArgumentSCC[0]->Uses.size() == 1 &&
957 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
958 Argument *A = ArgumentSCC[0]->Definition;
959 A->addAttr(Attribute::NoCapture);
960 ++NumNoCapture;
961 Changed.insert(A->getParent());
962
963 // Infer the access attributes given the new nocapture one
964 SmallPtrSet<Argument *, 8> Self;
965 Self.insert(&*A);
966 Attribute::AttrKind R = determinePointerAccessAttrs(&*A, Self);
967 if (R != Attribute::None)
968 addAccessAttr(A, R);
969 }
970 continue;
971 }
972
973 bool SCCCaptured = false;
974 for (ArgumentGraphNode *Node : ArgumentSCC) {
975 if (Node->Uses.empty() && !Node->Definition->hasNoCaptureAttr()) {
976 SCCCaptured = true;
977 break;
978 }
979 }
980 if (SCCCaptured)
981 continue;
982
983 SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
984 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
985 // quickly looking up whether a given Argument is in this ArgumentSCC.
986 for (ArgumentGraphNode *I : ArgumentSCC) {
987 ArgumentSCCNodes.insert(I->Definition);
988 }
989
990 for (ArgumentGraphNode *N : ArgumentSCC) {
991 for (ArgumentGraphNode *Use : N->Uses) {
992 Argument *A = Use->Definition;
993 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
994 continue;
995 SCCCaptured = true;
996 break;
997 }
998 if (SCCCaptured)
999 break;
1000 }
1001 if (SCCCaptured)
1002 continue;
1003
1004 for (ArgumentGraphNode *N : ArgumentSCC) {
1005 Argument *A = N->Definition;
1006 A->addAttr(Attribute::NoCapture);
1007 ++NumNoCapture;
1008 Changed.insert(A->getParent());
1009 }
1010
1011 // We also want to compute readonly/readnone/writeonly. With a small number
1012 // of false negatives, we can assume that any pointer which is captured
1013 // isn't going to be provably readonly or readnone, since by definition
1014 // we can't analyze all uses of a captured pointer.
1015 //
1016 // The false negatives happen when the pointer is captured by a function
1017 // that promises readonly/readnone behaviour on the pointer, then the
1018 // pointer's lifetime ends before anything that writes to arbitrary memory.
1019 // Also, a readonly/readnone pointer may be returned, but returning a
1020 // pointer is capturing it.
1021
1022 auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) {
1023 if (A == B)
1024 return A;
1025 if (A == Attribute::ReadNone)
1026 return B;
1027 if (B == Attribute::ReadNone)
1028 return A;
1029 return Attribute::None;
1030 };
1031
1032 Attribute::AttrKind AccessAttr = Attribute::ReadNone;
1033 for (ArgumentGraphNode *N : ArgumentSCC) {
1034 Argument *A = N->Definition;
1035 Attribute::AttrKind K = determinePointerAccessAttrs(A, ArgumentSCCNodes);
1036 AccessAttr = meetAccessAttr(AccessAttr, K);
1037 if (AccessAttr == Attribute::None)
1038 break;
1039 }
1040
1041 if (AccessAttr != Attribute::None) {
1042 for (ArgumentGraphNode *N : ArgumentSCC) {
1043 Argument *A = N->Definition;
1044 if (addAccessAttr(A, AccessAttr))
1045 Changed.insert(A->getParent());
1046 }
1047 }
1048 }
1049 }
1050
1051 /// Tests whether a function is "malloc-like".
1052 ///
1053 /// A function is "malloc-like" if it returns either null or a pointer that
1054 /// doesn't alias any other pointer visible to the caller.
isFunctionMallocLike(Function * F,const SCCNodeSet & SCCNodes)1055 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
1056 SmallSetVector<Value *, 8> FlowsToReturn;
1057 for (BasicBlock &BB : *F)
1058 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
1059 FlowsToReturn.insert(Ret->getReturnValue());
1060
1061 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
1062 Value *RetVal = FlowsToReturn[i];
1063
1064 if (Constant *C = dyn_cast<Constant>(RetVal)) {
1065 if (!C->isNullValue() && !isa<UndefValue>(C))
1066 return false;
1067
1068 continue;
1069 }
1070
1071 if (isa<Argument>(RetVal))
1072 return false;
1073
1074 if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
1075 switch (RVI->getOpcode()) {
1076 // Extend the analysis by looking upwards.
1077 case Instruction::BitCast:
1078 case Instruction::GetElementPtr:
1079 case Instruction::AddrSpaceCast:
1080 FlowsToReturn.insert(RVI->getOperand(0));
1081 continue;
1082 case Instruction::Select: {
1083 SelectInst *SI = cast<SelectInst>(RVI);
1084 FlowsToReturn.insert(SI->getTrueValue());
1085 FlowsToReturn.insert(SI->getFalseValue());
1086 continue;
1087 }
1088 case Instruction::PHI: {
1089 PHINode *PN = cast<PHINode>(RVI);
1090 for (Value *IncValue : PN->incoming_values())
1091 FlowsToReturn.insert(IncValue);
1092 continue;
1093 }
1094
1095 // Check whether the pointer came from an allocation.
1096 case Instruction::Alloca:
1097 break;
1098 case Instruction::Call:
1099 case Instruction::Invoke: {
1100 CallBase &CB = cast<CallBase>(*RVI);
1101 if (CB.hasRetAttr(Attribute::NoAlias))
1102 break;
1103 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction()))
1104 break;
1105 [[fallthrough]];
1106 }
1107 default:
1108 return false; // Did not come from an allocation.
1109 }
1110
1111 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
1112 return false;
1113 }
1114
1115 return true;
1116 }
1117
1118 /// Deduce noalias attributes for the SCC.
addNoAliasAttrs(const SCCNodeSet & SCCNodes,SmallSet<Function *,8> & Changed)1119 static void addNoAliasAttrs(const SCCNodeSet &SCCNodes,
1120 SmallSet<Function *, 8> &Changed) {
1121 // Check each function in turn, determining which functions return noalias
1122 // pointers.
1123 for (Function *F : SCCNodes) {
1124 // Already noalias.
1125 if (F->returnDoesNotAlias())
1126 continue;
1127
1128 // We can infer and propagate function attributes only when we know that the
1129 // definition we'll get at link time is *exactly* the definition we see now.
1130 // For more details, see GlobalValue::mayBeDerefined.
1131 if (!F->hasExactDefinition())
1132 return;
1133
1134 // We annotate noalias return values, which are only applicable to
1135 // pointer types.
1136 if (!F->getReturnType()->isPointerTy())
1137 continue;
1138
1139 if (!isFunctionMallocLike(F, SCCNodes))
1140 return;
1141 }
1142
1143 for (Function *F : SCCNodes) {
1144 if (F->returnDoesNotAlias() ||
1145 !F->getReturnType()->isPointerTy())
1146 continue;
1147
1148 F->setReturnDoesNotAlias();
1149 ++NumNoAlias;
1150 Changed.insert(F);
1151 }
1152 }
1153
1154 /// Tests whether this function is known to not return null.
1155 ///
1156 /// Requires that the function returns a pointer.
1157 ///
1158 /// Returns true if it believes the function will not return a null, and sets
1159 /// \p Speculative based on whether the returned conclusion is a speculative
1160 /// conclusion due to SCC calls.
isReturnNonNull(Function * F,const SCCNodeSet & SCCNodes,bool & Speculative)1161 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
1162 bool &Speculative) {
1163 assert(F->getReturnType()->isPointerTy() &&
1164 "nonnull only meaningful on pointer types");
1165 Speculative = false;
1166
1167 SmallSetVector<Value *, 8> FlowsToReturn;
1168 for (BasicBlock &BB : *F)
1169 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
1170 FlowsToReturn.insert(Ret->getReturnValue());
1171
1172 auto &DL = F->getDataLayout();
1173
1174 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
1175 Value *RetVal = FlowsToReturn[i];
1176
1177 // If this value is locally known to be non-null, we're good
1178 if (isKnownNonZero(RetVal, DL))
1179 continue;
1180
1181 // Otherwise, we need to look upwards since we can't make any local
1182 // conclusions.
1183 Instruction *RVI = dyn_cast<Instruction>(RetVal);
1184 if (!RVI)
1185 return false;
1186 switch (RVI->getOpcode()) {
1187 // Extend the analysis by looking upwards.
1188 case Instruction::BitCast:
1189 case Instruction::AddrSpaceCast:
1190 FlowsToReturn.insert(RVI->getOperand(0));
1191 continue;
1192 case Instruction::GetElementPtr:
1193 if (cast<GEPOperator>(RVI)->isInBounds()) {
1194 FlowsToReturn.insert(RVI->getOperand(0));
1195 continue;
1196 }
1197 return false;
1198 case Instruction::Select: {
1199 SelectInst *SI = cast<SelectInst>(RVI);
1200 FlowsToReturn.insert(SI->getTrueValue());
1201 FlowsToReturn.insert(SI->getFalseValue());
1202 continue;
1203 }
1204 case Instruction::PHI: {
1205 PHINode *PN = cast<PHINode>(RVI);
1206 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
1207 FlowsToReturn.insert(PN->getIncomingValue(i));
1208 continue;
1209 }
1210 case Instruction::Call:
1211 case Instruction::Invoke: {
1212 CallBase &CB = cast<CallBase>(*RVI);
1213 Function *Callee = CB.getCalledFunction();
1214 // A call to a node within the SCC is assumed to return null until
1215 // proven otherwise
1216 if (Callee && SCCNodes.count(Callee)) {
1217 Speculative = true;
1218 continue;
1219 }
1220 return false;
1221 }
1222 default:
1223 return false; // Unknown source, may be null
1224 };
1225 llvm_unreachable("should have either continued or returned");
1226 }
1227
1228 return true;
1229 }
1230
1231 /// Deduce nonnull attributes for the SCC.
addNonNullAttrs(const SCCNodeSet & SCCNodes,SmallSet<Function *,8> & Changed)1232 static void addNonNullAttrs(const SCCNodeSet &SCCNodes,
1233 SmallSet<Function *, 8> &Changed) {
1234 // Speculative that all functions in the SCC return only nonnull
1235 // pointers. We may refute this as we analyze functions.
1236 bool SCCReturnsNonNull = true;
1237
1238 // Check each function in turn, determining which functions return nonnull
1239 // pointers.
1240 for (Function *F : SCCNodes) {
1241 // Already nonnull.
1242 if (F->getAttributes().hasRetAttr(Attribute::NonNull))
1243 continue;
1244
1245 // We can infer and propagate function attributes only when we know that the
1246 // definition we'll get at link time is *exactly* the definition we see now.
1247 // For more details, see GlobalValue::mayBeDerefined.
1248 if (!F->hasExactDefinition())
1249 return;
1250
1251 // We annotate nonnull return values, which are only applicable to
1252 // pointer types.
1253 if (!F->getReturnType()->isPointerTy())
1254 continue;
1255
1256 bool Speculative = false;
1257 if (isReturnNonNull(F, SCCNodes, Speculative)) {
1258 if (!Speculative) {
1259 // Mark the function eagerly since we may discover a function
1260 // which prevents us from speculating about the entire SCC
1261 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName()
1262 << " as nonnull\n");
1263 F->addRetAttr(Attribute::NonNull);
1264 ++NumNonNullReturn;
1265 Changed.insert(F);
1266 }
1267 continue;
1268 }
1269 // At least one function returns something which could be null, can't
1270 // speculate any more.
1271 SCCReturnsNonNull = false;
1272 }
1273
1274 if (SCCReturnsNonNull) {
1275 for (Function *F : SCCNodes) {
1276 if (F->getAttributes().hasRetAttr(Attribute::NonNull) ||
1277 !F->getReturnType()->isPointerTy())
1278 continue;
1279
1280 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
1281 F->addRetAttr(Attribute::NonNull);
1282 ++NumNonNullReturn;
1283 Changed.insert(F);
1284 }
1285 }
1286 }
1287
1288 /// Deduce noundef attributes for the SCC.
addNoUndefAttrs(const SCCNodeSet & SCCNodes,SmallSet<Function *,8> & Changed)1289 static void addNoUndefAttrs(const SCCNodeSet &SCCNodes,
1290 SmallSet<Function *, 8> &Changed) {
1291 // Check each function in turn, determining which functions return noundef
1292 // values.
1293 for (Function *F : SCCNodes) {
1294 // Already noundef.
1295 AttributeList Attrs = F->getAttributes();
1296 if (Attrs.hasRetAttr(Attribute::NoUndef))
1297 continue;
1298
1299 // We can infer and propagate function attributes only when we know that the
1300 // definition we'll get at link time is *exactly* the definition we see now.
1301 // For more details, see GlobalValue::mayBeDerefined.
1302 if (!F->hasExactDefinition())
1303 return;
1304
1305 // MemorySanitizer assumes that the definition and declaration of a
1306 // function will be consistent. A function with sanitize_memory attribute
1307 // should be skipped from inference.
1308 if (F->hasFnAttribute(Attribute::SanitizeMemory))
1309 continue;
1310
1311 if (F->getReturnType()->isVoidTy())
1312 continue;
1313
1314 const DataLayout &DL = F->getDataLayout();
1315 if (all_of(*F, [&](BasicBlock &BB) {
1316 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) {
1317 // TODO: perform context-sensitive analysis?
1318 Value *RetVal = Ret->getReturnValue();
1319 if (!isGuaranteedNotToBeUndefOrPoison(RetVal))
1320 return false;
1321
1322 // We know the original return value is not poison now, but it
1323 // could still be converted to poison by another return attribute.
1324 // Try to explicitly re-prove the relevant attributes.
1325 if (Attrs.hasRetAttr(Attribute::NonNull) &&
1326 !isKnownNonZero(RetVal, DL))
1327 return false;
1328
1329 if (MaybeAlign Align = Attrs.getRetAlignment())
1330 if (RetVal->getPointerAlignment(DL) < *Align)
1331 return false;
1332
1333 Attribute Attr = Attrs.getRetAttr(Attribute::Range);
1334 if (Attr.isValid() &&
1335 !Attr.getRange().contains(
1336 computeConstantRange(RetVal, /*ForSigned=*/false)))
1337 return false;
1338 }
1339 return true;
1340 })) {
1341 F->addRetAttr(Attribute::NoUndef);
1342 ++NumNoUndefReturn;
1343 Changed.insert(F);
1344 }
1345 }
1346 }
1347
1348 namespace {
1349
1350 /// Collects a set of attribute inference requests and performs them all in one
1351 /// go on a single SCC Node. Inference involves scanning function bodies
1352 /// looking for instructions that violate attribute assumptions.
1353 /// As soon as all the bodies are fine we are free to set the attribute.
1354 /// Customization of inference for individual attributes is performed by
1355 /// providing a handful of predicates for each attribute.
1356 class AttributeInferer {
1357 public:
1358 /// Describes a request for inference of a single attribute.
1359 struct InferenceDescriptor {
1360
1361 /// Returns true if this function does not have to be handled.
1362 /// General intent for this predicate is to provide an optimization
1363 /// for functions that do not need this attribute inference at all
1364 /// (say, for functions that already have the attribute).
1365 std::function<bool(const Function &)> SkipFunction;
1366
1367 /// Returns true if this instruction violates attribute assumptions.
1368 std::function<bool(Instruction &)> InstrBreaksAttribute;
1369
1370 /// Sets the inferred attribute for this function.
1371 std::function<void(Function &)> SetAttribute;
1372
1373 /// Attribute we derive.
1374 Attribute::AttrKind AKind;
1375
1376 /// If true, only "exact" definitions can be used to infer this attribute.
1377 /// See GlobalValue::isDefinitionExact.
1378 bool RequiresExactDefinition;
1379
InferenceDescriptor__anon416db8590711::AttributeInferer::InferenceDescriptor1380 InferenceDescriptor(Attribute::AttrKind AK,
1381 std::function<bool(const Function &)> SkipFunc,
1382 std::function<bool(Instruction &)> InstrScan,
1383 std::function<void(Function &)> SetAttr,
1384 bool ReqExactDef)
1385 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan),
1386 SetAttribute(SetAttr), AKind(AK),
1387 RequiresExactDefinition(ReqExactDef) {}
1388 };
1389
1390 private:
1391 SmallVector<InferenceDescriptor, 4> InferenceDescriptors;
1392
1393 public:
registerAttrInference(InferenceDescriptor AttrInference)1394 void registerAttrInference(InferenceDescriptor AttrInference) {
1395 InferenceDescriptors.push_back(AttrInference);
1396 }
1397
1398 void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed);
1399 };
1400
1401 /// Perform all the requested attribute inference actions according to the
1402 /// attribute predicates stored before.
run(const SCCNodeSet & SCCNodes,SmallSet<Function *,8> & Changed)1403 void AttributeInferer::run(const SCCNodeSet &SCCNodes,
1404 SmallSet<Function *, 8> &Changed) {
1405 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors;
1406 // Go through all the functions in SCC and check corresponding attribute
1407 // assumptions for each of them. Attributes that are invalid for this SCC
1408 // will be removed from InferInSCC.
1409 for (Function *F : SCCNodes) {
1410
1411 // No attributes whose assumptions are still valid - done.
1412 if (InferInSCC.empty())
1413 return;
1414
1415 // Check if our attributes ever need scanning/can be scanned.
1416 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) {
1417 if (ID.SkipFunction(*F))
1418 return false;
1419
1420 // Remove from further inference (invalidate) when visiting a function
1421 // that has no instructions to scan/has an unsuitable definition.
1422 return F->isDeclaration() ||
1423 (ID.RequiresExactDefinition && !F->hasExactDefinition());
1424 });
1425
1426 // For each attribute still in InferInSCC that doesn't explicitly skip F,
1427 // set up the F instructions scan to verify assumptions of the attribute.
1428 SmallVector<InferenceDescriptor, 4> InferInThisFunc;
1429 llvm::copy_if(
1430 InferInSCC, std::back_inserter(InferInThisFunc),
1431 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); });
1432
1433 if (InferInThisFunc.empty())
1434 continue;
1435
1436 // Start instruction scan.
1437 for (Instruction &I : instructions(*F)) {
1438 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) {
1439 if (!ID.InstrBreaksAttribute(I))
1440 return false;
1441 // Remove attribute from further inference on any other functions
1442 // because attribute assumptions have just been violated.
1443 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) {
1444 return D.AKind == ID.AKind;
1445 });
1446 // Remove attribute from the rest of current instruction scan.
1447 return true;
1448 });
1449
1450 if (InferInThisFunc.empty())
1451 break;
1452 }
1453 }
1454
1455 if (InferInSCC.empty())
1456 return;
1457
1458 for (Function *F : SCCNodes)
1459 // At this point InferInSCC contains only functions that were either:
1460 // - explicitly skipped from scan/inference, or
1461 // - verified to have no instructions that break attribute assumptions.
1462 // Hence we just go and force the attribute for all non-skipped functions.
1463 for (auto &ID : InferInSCC) {
1464 if (ID.SkipFunction(*F))
1465 continue;
1466 Changed.insert(F);
1467 ID.SetAttribute(*F);
1468 }
1469 }
1470
1471 struct SCCNodesResult {
1472 SCCNodeSet SCCNodes;
1473 bool HasUnknownCall;
1474 };
1475
1476 } // end anonymous namespace
1477
1478 /// Helper for non-Convergent inference predicate InstrBreaksAttribute.
InstrBreaksNonConvergent(Instruction & I,const SCCNodeSet & SCCNodes)1479 static bool InstrBreaksNonConvergent(Instruction &I,
1480 const SCCNodeSet &SCCNodes) {
1481 const CallBase *CB = dyn_cast<CallBase>(&I);
1482 // Breaks non-convergent assumption if CS is a convergent call to a function
1483 // not in the SCC.
1484 return CB && CB->isConvergent() &&
1485 !SCCNodes.contains(CB->getCalledFunction());
1486 }
1487
1488 /// Helper for NoUnwind inference predicate InstrBreaksAttribute.
InstrBreaksNonThrowing(Instruction & I,const SCCNodeSet & SCCNodes)1489 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
1490 if (!I.mayThrow(/* IncludePhaseOneUnwind */ true))
1491 return false;
1492 if (const auto *CI = dyn_cast<CallInst>(&I)) {
1493 if (Function *Callee = CI->getCalledFunction()) {
1494 // I is a may-throw call to a function inside our SCC. This doesn't
1495 // invalidate our current working assumption that the SCC is no-throw; we
1496 // just have to scan that other function.
1497 if (SCCNodes.contains(Callee))
1498 return false;
1499 }
1500 }
1501 return true;
1502 }
1503
1504 /// Helper for NoFree inference predicate InstrBreaksAttribute.
InstrBreaksNoFree(Instruction & I,const SCCNodeSet & SCCNodes)1505 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
1506 CallBase *CB = dyn_cast<CallBase>(&I);
1507 if (!CB)
1508 return false;
1509
1510 if (CB->hasFnAttr(Attribute::NoFree))
1511 return false;
1512
1513 // Speculatively assume in SCC.
1514 if (Function *Callee = CB->getCalledFunction())
1515 if (SCCNodes.contains(Callee))
1516 return false;
1517
1518 return true;
1519 }
1520
1521 // Return true if this is an atomic which has an ordering stronger than
1522 // unordered. Note that this is different than the predicate we use in
1523 // Attributor. Here we chose to be conservative and consider monotonic
1524 // operations potentially synchronizing. We generally don't do much with
1525 // monotonic operations, so this is simply risk reduction.
isOrderedAtomic(Instruction * I)1526 static bool isOrderedAtomic(Instruction *I) {
1527 if (!I->isAtomic())
1528 return false;
1529
1530 if (auto *FI = dyn_cast<FenceInst>(I))
1531 // All legal orderings for fence are stronger than monotonic.
1532 return FI->getSyncScopeID() != SyncScope::SingleThread;
1533 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I))
1534 return true;
1535 else if (auto *SI = dyn_cast<StoreInst>(I))
1536 return !SI->isUnordered();
1537 else if (auto *LI = dyn_cast<LoadInst>(I))
1538 return !LI->isUnordered();
1539 else {
1540 llvm_unreachable("unknown atomic instruction?");
1541 }
1542 }
1543
InstrBreaksNoSync(Instruction & I,const SCCNodeSet & SCCNodes)1544 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
1545 // Volatile may synchronize
1546 if (I.isVolatile())
1547 return true;
1548
1549 // An ordered atomic may synchronize. (See comment about on monotonic.)
1550 if (isOrderedAtomic(&I))
1551 return true;
1552
1553 auto *CB = dyn_cast<CallBase>(&I);
1554 if (!CB)
1555 // Non call site cases covered by the two checks above
1556 return false;
1557
1558 if (CB->hasFnAttr(Attribute::NoSync))
1559 return false;
1560
1561 // Non volatile memset/memcpy/memmoves are nosync
1562 // NOTE: Only intrinsics with volatile flags should be handled here. All
1563 // others should be marked in Intrinsics.td.
1564 if (auto *MI = dyn_cast<MemIntrinsic>(&I))
1565 if (!MI->isVolatile())
1566 return false;
1567
1568 // Speculatively assume in SCC.
1569 if (Function *Callee = CB->getCalledFunction())
1570 if (SCCNodes.contains(Callee))
1571 return false;
1572
1573 return true;
1574 }
1575
1576 /// Attempt to remove convergent function attribute when possible.
1577 ///
1578 /// Returns true if any changes to function attributes were made.
inferConvergent(const SCCNodeSet & SCCNodes,SmallSet<Function *,8> & Changed)1579 static void inferConvergent(const SCCNodeSet &SCCNodes,
1580 SmallSet<Function *, 8> &Changed) {
1581 AttributeInferer AI;
1582
1583 // Request to remove the convergent attribute from all functions in the SCC
1584 // if every callsite within the SCC is not convergent (except for calls
1585 // to functions within the SCC).
1586 // Note: Removal of the attr from the callsites will happen in
1587 // InstCombineCalls separately.
1588 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
1589 Attribute::Convergent,
1590 // Skip non-convergent functions.
1591 [](const Function &F) { return !F.isConvergent(); },
1592 // Instructions that break non-convergent assumption.
1593 [SCCNodes](Instruction &I) {
1594 return InstrBreaksNonConvergent(I, SCCNodes);
1595 },
1596 [](Function &F) {
1597 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName()
1598 << "\n");
1599 F.setNotConvergent();
1600 },
1601 /* RequiresExactDefinition= */ false});
1602 // Perform all the requested attribute inference actions.
1603 AI.run(SCCNodes, Changed);
1604 }
1605
1606 /// Infer attributes from all functions in the SCC by scanning every
1607 /// instruction for compliance to the attribute assumptions.
1608 ///
1609 /// Returns true if any changes to function attributes were made.
inferAttrsFromFunctionBodies(const SCCNodeSet & SCCNodes,SmallSet<Function *,8> & Changed)1610 static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
1611 SmallSet<Function *, 8> &Changed) {
1612 AttributeInferer AI;
1613
1614 if (!DisableNoUnwindInference)
1615 // Request to infer nounwind attribute for all the functions in the SCC if
1616 // every callsite within the SCC is not throwing (except for calls to
1617 // functions within the SCC). Note that nounwind attribute suffers from
1618 // derefinement - results may change depending on how functions are
1619 // optimized. Thus it can be inferred only from exact definitions.
1620 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
1621 Attribute::NoUnwind,
1622 // Skip non-throwing functions.
1623 [](const Function &F) { return F.doesNotThrow(); },
1624 // Instructions that break non-throwing assumption.
1625 [&SCCNodes](Instruction &I) {
1626 return InstrBreaksNonThrowing(I, SCCNodes);
1627 },
1628 [](Function &F) {
1629 LLVM_DEBUG(dbgs()
1630 << "Adding nounwind attr to fn " << F.getName() << "\n");
1631 F.setDoesNotThrow();
1632 ++NumNoUnwind;
1633 },
1634 /* RequiresExactDefinition= */ true});
1635
1636 if (!DisableNoFreeInference)
1637 // Request to infer nofree attribute for all the functions in the SCC if
1638 // every callsite within the SCC does not directly or indirectly free
1639 // memory (except for calls to functions within the SCC). Note that nofree
1640 // attribute suffers from derefinement - results may change depending on
1641 // how functions are optimized. Thus it can be inferred only from exact
1642 // definitions.
1643 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
1644 Attribute::NoFree,
1645 // Skip functions known not to free memory.
1646 [](const Function &F) { return F.doesNotFreeMemory(); },
1647 // Instructions that break non-deallocating assumption.
1648 [&SCCNodes](Instruction &I) {
1649 return InstrBreaksNoFree(I, SCCNodes);
1650 },
1651 [](Function &F) {
1652 LLVM_DEBUG(dbgs()
1653 << "Adding nofree attr to fn " << F.getName() << "\n");
1654 F.setDoesNotFreeMemory();
1655 ++NumNoFree;
1656 },
1657 /* RequiresExactDefinition= */ true});
1658
1659 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
1660 Attribute::NoSync,
1661 // Skip already marked functions.
1662 [](const Function &F) { return F.hasNoSync(); },
1663 // Instructions that break nosync assumption.
1664 [&SCCNodes](Instruction &I) {
1665 return InstrBreaksNoSync(I, SCCNodes);
1666 },
1667 [](Function &F) {
1668 LLVM_DEBUG(dbgs()
1669 << "Adding nosync attr to fn " << F.getName() << "\n");
1670 F.setNoSync();
1671 ++NumNoSync;
1672 },
1673 /* RequiresExactDefinition= */ true});
1674
1675 // Perform all the requested attribute inference actions.
1676 AI.run(SCCNodes, Changed);
1677 }
1678
addNoRecurseAttrs(const SCCNodeSet & SCCNodes,SmallSet<Function *,8> & Changed)1679 static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
1680 SmallSet<Function *, 8> &Changed) {
1681 // Try and identify functions that do not recurse.
1682
1683 // If the SCC contains multiple nodes we know for sure there is recursion.
1684 if (SCCNodes.size() != 1)
1685 return;
1686
1687 Function *F = *SCCNodes.begin();
1688 if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
1689 return;
1690
1691 // If all of the calls in F are identifiable and are to norecurse functions, F
1692 // is norecurse. This check also detects self-recursion as F is not currently
1693 // marked norecurse, so any called from F to F will not be marked norecurse.
1694 for (auto &BB : *F)
1695 for (auto &I : BB.instructionsWithoutDebug())
1696 if (auto *CB = dyn_cast<CallBase>(&I)) {
1697 Function *Callee = CB->getCalledFunction();
1698 if (!Callee || Callee == F ||
1699 (!Callee->doesNotRecurse() &&
1700 !(Callee->isDeclaration() &&
1701 Callee->hasFnAttribute(Attribute::NoCallback))))
1702 // Function calls a potentially recursive function.
1703 return;
1704 }
1705
1706 // Every call was to a non-recursive function other than this function, and
1707 // we have no indirect recursion as the SCC size is one. This function cannot
1708 // recurse.
1709 F->setDoesNotRecurse();
1710 ++NumNoRecurse;
1711 Changed.insert(F);
1712 }
1713
instructionDoesNotReturn(Instruction & I)1714 static bool instructionDoesNotReturn(Instruction &I) {
1715 if (auto *CB = dyn_cast<CallBase>(&I))
1716 return CB->hasFnAttr(Attribute::NoReturn);
1717 return false;
1718 }
1719
1720 // A basic block can only return if it terminates with a ReturnInst and does not
1721 // contain calls to noreturn functions.
basicBlockCanReturn(BasicBlock & BB)1722 static bool basicBlockCanReturn(BasicBlock &BB) {
1723 if (!isa<ReturnInst>(BB.getTerminator()))
1724 return false;
1725 return none_of(BB, instructionDoesNotReturn);
1726 }
1727
1728 // FIXME: this doesn't handle recursion.
canReturn(Function & F)1729 static bool canReturn(Function &F) {
1730 SmallVector<BasicBlock *, 16> Worklist;
1731 SmallPtrSet<BasicBlock *, 16> Visited;
1732
1733 Visited.insert(&F.front());
1734 Worklist.push_back(&F.front());
1735
1736 do {
1737 BasicBlock *BB = Worklist.pop_back_val();
1738 if (basicBlockCanReturn(*BB))
1739 return true;
1740 for (BasicBlock *Succ : successors(BB))
1741 if (Visited.insert(Succ).second)
1742 Worklist.push_back(Succ);
1743 } while (!Worklist.empty());
1744
1745 return false;
1746 }
1747
1748 // Set the noreturn function attribute if possible.
addNoReturnAttrs(const SCCNodeSet & SCCNodes,SmallSet<Function *,8> & Changed)1749 static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
1750 SmallSet<Function *, 8> &Changed) {
1751 for (Function *F : SCCNodes) {
1752 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) ||
1753 F->doesNotReturn())
1754 continue;
1755
1756 if (!canReturn(*F)) {
1757 F->setDoesNotReturn();
1758 Changed.insert(F);
1759 }
1760 }
1761 }
1762
functionWillReturn(const Function & F)1763 static bool functionWillReturn(const Function &F) {
1764 // We can infer and propagate function attributes only when we know that the
1765 // definition we'll get at link time is *exactly* the definition we see now.
1766 // For more details, see GlobalValue::mayBeDerefined.
1767 if (!F.hasExactDefinition())
1768 return false;
1769
1770 // Must-progress function without side-effects must return.
1771 if (F.mustProgress() && F.onlyReadsMemory())
1772 return true;
1773
1774 // Can only analyze functions with a definition.
1775 if (F.isDeclaration())
1776 return false;
1777
1778 // Functions with loops require more sophisticated analysis, as the loop
1779 // may be infinite. For now, don't try to handle them.
1780 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges;
1781 FindFunctionBackedges(F, Backedges);
1782 if (!Backedges.empty())
1783 return false;
1784
1785 // If there are no loops, then the function is willreturn if all calls in
1786 // it are willreturn.
1787 return all_of(instructions(F), [](const Instruction &I) {
1788 return I.willReturn();
1789 });
1790 }
1791
1792 // Set the willreturn function attribute if possible.
addWillReturn(const SCCNodeSet & SCCNodes,SmallSet<Function *,8> & Changed)1793 static void addWillReturn(const SCCNodeSet &SCCNodes,
1794 SmallSet<Function *, 8> &Changed) {
1795 for (Function *F : SCCNodes) {
1796 if (!F || F->willReturn() || !functionWillReturn(*F))
1797 continue;
1798
1799 F->setWillReturn();
1800 NumWillReturn++;
1801 Changed.insert(F);
1802 }
1803 }
1804
createSCCNodeSet(ArrayRef<Function * > Functions)1805 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
1806 SCCNodesResult Res;
1807 Res.HasUnknownCall = false;
1808 for (Function *F : Functions) {
1809 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) ||
1810 F->isPresplitCoroutine()) {
1811 // Treat any function we're trying not to optimize as if it were an
1812 // indirect call and omit it from the node set used below.
1813 Res.HasUnknownCall = true;
1814 continue;
1815 }
1816 // Track whether any functions in this SCC have an unknown call edge.
1817 // Note: if this is ever a performance hit, we can common it with
1818 // subsequent routines which also do scans over the instructions of the
1819 // function.
1820 if (!Res.HasUnknownCall) {
1821 for (Instruction &I : instructions(*F)) {
1822 if (auto *CB = dyn_cast<CallBase>(&I)) {
1823 if (!CB->getCalledFunction()) {
1824 Res.HasUnknownCall = true;
1825 break;
1826 }
1827 }
1828 }
1829 }
1830 Res.SCCNodes.insert(F);
1831 }
1832 return Res;
1833 }
1834
1835 template <typename AARGetterT>
1836 static SmallSet<Function *, 8>
deriveAttrsInPostOrder(ArrayRef<Function * > Functions,AARGetterT && AARGetter,bool ArgAttrsOnly)1837 deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
1838 bool ArgAttrsOnly) {
1839 SCCNodesResult Nodes = createSCCNodeSet(Functions);
1840
1841 // Bail if the SCC only contains optnone functions.
1842 if (Nodes.SCCNodes.empty())
1843 return {};
1844
1845 SmallSet<Function *, 8> Changed;
1846 if (ArgAttrsOnly) {
1847 addArgumentAttrs(Nodes.SCCNodes, Changed);
1848 return Changed;
1849 }
1850
1851 addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
1852 addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
1853 addArgumentAttrs(Nodes.SCCNodes, Changed);
1854 inferConvergent(Nodes.SCCNodes, Changed);
1855 addNoReturnAttrs(Nodes.SCCNodes, Changed);
1856 addWillReturn(Nodes.SCCNodes, Changed);
1857 addNoUndefAttrs(Nodes.SCCNodes, Changed);
1858
1859 // If we have no external nodes participating in the SCC, we can deduce some
1860 // more precise attributes as well.
1861 if (!Nodes.HasUnknownCall) {
1862 addNoAliasAttrs(Nodes.SCCNodes, Changed);
1863 addNonNullAttrs(Nodes.SCCNodes, Changed);
1864 inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed);
1865 addNoRecurseAttrs(Nodes.SCCNodes, Changed);
1866 }
1867
1868 // Finally, infer the maximal set of attributes from the ones we've inferred
1869 // above. This is handling the cases where one attribute on a signature
1870 // implies another, but for implementation reasons the inference rule for
1871 // the later is missing (or simply less sophisticated).
1872 for (Function *F : Nodes.SCCNodes)
1873 if (F)
1874 if (inferAttributesFromOthers(*F))
1875 Changed.insert(F);
1876
1877 return Changed;
1878 }
1879
run(LazyCallGraph::SCC & C,CGSCCAnalysisManager & AM,LazyCallGraph & CG,CGSCCUpdateResult &)1880 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
1881 CGSCCAnalysisManager &AM,
1882 LazyCallGraph &CG,
1883 CGSCCUpdateResult &) {
1884 // Skip non-recursive functions if requested.
1885 // Only infer argument attributes for non-recursive functions, because
1886 // it can affect optimization behavior in conjunction with noalias.
1887 bool ArgAttrsOnly = false;
1888 if (C.size() == 1 && SkipNonRecursive) {
1889 LazyCallGraph::Node &N = *C.begin();
1890 if (!N->lookup(N))
1891 ArgAttrsOnly = true;
1892 }
1893
1894 FunctionAnalysisManager &FAM =
1895 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
1896
1897 // We pass a lambda into functions to wire them up to the analysis manager
1898 // for getting function analyses.
1899 auto AARGetter = [&](Function &F) -> AAResults & {
1900 return FAM.getResult<AAManager>(F);
1901 };
1902
1903 SmallVector<Function *, 8> Functions;
1904 for (LazyCallGraph::Node &N : C) {
1905 Functions.push_back(&N.getFunction());
1906 }
1907
1908 auto ChangedFunctions =
1909 deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly);
1910 if (ChangedFunctions.empty())
1911 return PreservedAnalyses::all();
1912
1913 // Invalidate analyses for modified functions so that we don't have to
1914 // invalidate all analyses for all functions in this SCC.
1915 PreservedAnalyses FuncPA;
1916 // We haven't changed the CFG for modified functions.
1917 FuncPA.preserveSet<CFGAnalyses>();
1918 for (Function *Changed : ChangedFunctions) {
1919 FAM.invalidate(*Changed, FuncPA);
1920 // Also invalidate any direct callers of changed functions since analyses
1921 // may care about attributes of direct callees. For example, MemorySSA cares
1922 // about whether or not a call's callee modifies memory and queries that
1923 // through function attributes.
1924 for (auto *U : Changed->users()) {
1925 if (auto *Call = dyn_cast<CallBase>(U)) {
1926 if (Call->getCalledFunction() == Changed)
1927 FAM.invalidate(*Call->getFunction(), FuncPA);
1928 }
1929 }
1930 }
1931
1932 PreservedAnalyses PA;
1933 // We have not added or removed functions.
1934 PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
1935 // We already invalidated all relevant function analyses above.
1936 PA.preserveSet<AllAnalysesOn<Function>>();
1937 return PA;
1938 }
1939
printPipeline(raw_ostream & OS,function_ref<StringRef (StringRef)> MapClassName2PassName)1940 void PostOrderFunctionAttrsPass::printPipeline(
1941 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1942 static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline(
1943 OS, MapClassName2PassName);
1944 if (SkipNonRecursive)
1945 OS << "<skip-non-recursive-function-attrs>";
1946 }
1947
1948 template <typename AARGetterT>
runImpl(CallGraphSCC & SCC,AARGetterT AARGetter)1949 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
1950 SmallVector<Function *, 8> Functions;
1951 for (CallGraphNode *I : SCC) {
1952 Functions.push_back(I->getFunction());
1953 }
1954
1955 return !deriveAttrsInPostOrder(Functions, AARGetter).empty();
1956 }
1957
addNoRecurseAttrsTopDown(Function & F)1958 static bool addNoRecurseAttrsTopDown(Function &F) {
1959 // We check the preconditions for the function prior to calling this to avoid
1960 // the cost of building up a reversible post-order list. We assert them here
1961 // to make sure none of the invariants this relies on were violated.
1962 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!");
1963 assert(!F.doesNotRecurse() &&
1964 "This function has already been deduced as norecurs!");
1965 assert(F.hasInternalLinkage() &&
1966 "Can only do top-down deduction for internal linkage functions!");
1967
1968 // If F is internal and all of its uses are calls from a non-recursive
1969 // functions, then none of its calls could in fact recurse without going
1970 // through a function marked norecurse, and so we can mark this function too
1971 // as norecurse. Note that the uses must actually be calls -- otherwise
1972 // a pointer to this function could be returned from a norecurse function but
1973 // this function could be recursively (indirectly) called. Note that this
1974 // also detects if F is directly recursive as F is not yet marked as
1975 // a norecurse function.
1976 for (auto &U : F.uses()) {
1977 auto *I = dyn_cast<Instruction>(U.getUser());
1978 if (!I)
1979 return false;
1980 CallBase *CB = dyn_cast<CallBase>(I);
1981 if (!CB || !CB->isCallee(&U) ||
1982 !CB->getParent()->getParent()->doesNotRecurse())
1983 return false;
1984 }
1985 F.setDoesNotRecurse();
1986 ++NumNoRecurse;
1987 return true;
1988 }
1989
deduceFunctionAttributeInRPO(Module & M,LazyCallGraph & CG)1990 static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) {
1991 // We only have a post-order SCC traversal (because SCCs are inherently
1992 // discovered in post-order), so we accumulate them in a vector and then walk
1993 // it in reverse. This is simpler than using the RPO iterator infrastructure
1994 // because we need to combine SCC detection and the PO walk of the call
1995 // graph. We can also cheat egregiously because we're primarily interested in
1996 // synthesizing norecurse and so we can only save the singular SCCs as SCCs
1997 // with multiple functions in them will clearly be recursive.
1998
1999 SmallVector<Function *, 16> Worklist;
2000 CG.buildRefSCCs();
2001 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
2002 for (LazyCallGraph::SCC &SCC : RC) {
2003 if (SCC.size() != 1)
2004 continue;
2005 Function &F = SCC.begin()->getFunction();
2006 if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage())
2007 Worklist.push_back(&F);
2008 }
2009 }
2010 bool Changed = false;
2011 for (auto *F : llvm::reverse(Worklist))
2012 Changed |= addNoRecurseAttrsTopDown(*F);
2013
2014 return Changed;
2015 }
2016
2017 PreservedAnalyses
run(Module & M,ModuleAnalysisManager & AM)2018 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
2019 auto &CG = AM.getResult<LazyCallGraphAnalysis>(M);
2020
2021 if (!deduceFunctionAttributeInRPO(M, CG))
2022 return PreservedAnalyses::all();
2023
2024 PreservedAnalyses PA;
2025 PA.preserve<LazyCallGraphAnalysis>();
2026 return PA;
2027 }
2028