1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file implements interprocedural passes which walk the 11 /// call-graph deducing and/or propagating function attributes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/FunctionAttrs.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/SCCIterator.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallPtrSet.h" 22 #include "llvm/ADT/SmallSet.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/Analysis/AssumptionCache.h" 26 #include "llvm/Analysis/BasicAliasAnalysis.h" 27 #include "llvm/Analysis/CFG.h" 28 #include "llvm/Analysis/CGSCCPassManager.h" 29 #include "llvm/Analysis/CallGraph.h" 30 #include "llvm/Analysis/CallGraphSCCPass.h" 31 #include "llvm/Analysis/CaptureTracking.h" 32 #include "llvm/Analysis/LazyCallGraph.h" 33 #include "llvm/Analysis/MemoryLocation.h" 34 #include "llvm/Analysis/ValueTracking.h" 35 #include "llvm/IR/Argument.h" 36 #include "llvm/IR/Attributes.h" 37 #include "llvm/IR/BasicBlock.h" 38 #include "llvm/IR/Constant.h" 39 #include "llvm/IR/Constants.h" 40 #include "llvm/IR/Function.h" 41 #include "llvm/IR/InstIterator.h" 42 #include "llvm/IR/InstrTypes.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/Metadata.h" 47 #include "llvm/IR/ModuleSummaryIndex.h" 48 #include "llvm/IR/PassManager.h" 49 #include "llvm/IR/Type.h" 50 #include "llvm/IR/Use.h" 51 #include "llvm/IR/User.h" 52 #include "llvm/IR/Value.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Compiler.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/ErrorHandling.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Transforms/IPO.h" 60 #include "llvm/Transforms/Utils/Local.h" 61 #include <cassert> 62 #include <iterator> 63 #include <map> 64 #include <optional> 65 #include <vector> 66 67 using namespace llvm; 68 69 #define DEBUG_TYPE "function-attrs" 70 71 STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute"); 72 STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); 73 STATISTIC(NumReturned, "Number of arguments marked returned"); 74 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 75 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 76 STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly"); 77 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 78 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 79 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 80 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); 81 STATISTIC(NumNoFree, "Number of functions marked as nofree"); 82 STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); 83 STATISTIC(NumNoSync, "Number of functions marked as nosync"); 84 85 STATISTIC(NumThinLinkNoRecurse, 86 "Number of functions marked as norecurse during thinlink"); 87 STATISTIC(NumThinLinkNoUnwind, 88 "Number of functions marked as nounwind during thinlink"); 89 90 static cl::opt<bool> EnableNonnullArgPropagation( 91 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, 92 cl::desc("Try to propagate nonnull argument attributes from callsites to " 93 "caller functions.")); 94 95 static cl::opt<bool> DisableNoUnwindInference( 96 "disable-nounwind-inference", cl::Hidden, 97 cl::desc("Stop inferring nounwind attribute during function-attrs pass")); 98 99 static cl::opt<bool> DisableNoFreeInference( 100 "disable-nofree-inference", cl::Hidden, 101 cl::desc("Stop inferring nofree attribute during function-attrs pass")); 102 103 static cl::opt<bool> DisableThinLTOPropagation( 104 "disable-thinlto-funcattrs", cl::init(true), cl::Hidden, 105 cl::desc("Don't propagate function-attrs in thinLTO")); 106 107 namespace { 108 109 using SCCNodeSet = SmallSetVector<Function *, 8>; 110 111 } // end anonymous namespace 112 113 static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc, 114 ModRefInfo MR, AAResults &AAR) { 115 // Ignore accesses to known-invariant or local memory. 116 MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/true); 117 if (isNoModRef(MR)) 118 return; 119 120 const Value *UO = getUnderlyingObject(Loc.Ptr); 121 assert(!isa<AllocaInst>(UO) && 122 "Should have been handled by getModRefInfoMask()"); 123 if (isa<Argument>(UO)) { 124 ME |= MemoryEffects::argMemOnly(MR); 125 return; 126 } 127 128 // If it's not an identified object, it might be an argument. 129 if (!isIdentifiedObject(UO)) 130 ME |= MemoryEffects::argMemOnly(MR); 131 ME |= MemoryEffects(IRMemLocation::Other, MR); 132 } 133 134 static void addArgLocs(MemoryEffects &ME, const CallBase *Call, 135 ModRefInfo ArgMR, AAResults &AAR) { 136 for (const Value *Arg : Call->args()) { 137 if (!Arg->getType()->isPtrOrPtrVectorTy()) 138 continue; 139 140 addLocAccess(ME, 141 MemoryLocation::getBeforeOrAfter(Arg, Call->getAAMetadata()), 142 ArgMR, AAR); 143 } 144 } 145 146 /// Returns the memory access attribute for function F using AAR for AA results, 147 /// where SCCNodes is the current SCC. 148 /// 149 /// If ThisBody is true, this function may examine the function body and will 150 /// return a result pertaining to this copy of the function. If it is false, the 151 /// result will be based only on AA results for the function declaration; it 152 /// will be assumed that some other (perhaps less optimized) version of the 153 /// function may be selected at link time. 154 /// 155 /// The return value is split into two parts: Memory effects that always apply, 156 /// and additional memory effects that apply if any of the functions in the SCC 157 /// can access argmem. 158 static std::pair<MemoryEffects, MemoryEffects> 159 checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, 160 const SCCNodeSet &SCCNodes) { 161 MemoryEffects OrigME = AAR.getMemoryEffects(&F); 162 if (OrigME.doesNotAccessMemory()) 163 // Already perfect! 164 return {OrigME, MemoryEffects::none()}; 165 166 if (!ThisBody) 167 return {OrigME, MemoryEffects::none()}; 168 169 MemoryEffects ME = MemoryEffects::none(); 170 // Additional locations accessed if the SCC accesses argmem. 171 MemoryEffects RecursiveArgME = MemoryEffects::none(); 172 173 // Inalloca and preallocated arguments are always clobbered by the call. 174 if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || 175 F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) 176 ME |= MemoryEffects::argMemOnly(ModRefInfo::ModRef); 177 178 // Scan the function body for instructions that may read or write memory. 179 for (Instruction &I : instructions(F)) { 180 // Some instructions can be ignored even if they read or write memory. 181 // Detect these now, skipping to the next instruction if one is found. 182 if (auto *Call = dyn_cast<CallBase>(&I)) { 183 // We can optimistically ignore calls to functions in the same SCC, with 184 // two caveats: 185 // * Calls with operand bundles may have additional effects. 186 // * Argument memory accesses may imply additional effects depending on 187 // what the argument location is. 188 if (!Call->hasOperandBundles() && Call->getCalledFunction() && 189 SCCNodes.count(Call->getCalledFunction())) { 190 // Keep track of which additional locations are accessed if the SCC 191 // turns out to access argmem. 192 addArgLocs(RecursiveArgME, Call, ModRefInfo::ModRef, AAR); 193 continue; 194 } 195 196 MemoryEffects CallME = AAR.getMemoryEffects(Call); 197 198 // If the call doesn't access memory, we're done. 199 if (CallME.doesNotAccessMemory()) 200 continue; 201 202 // A pseudo probe call shouldn't change any function attribute since it 203 // doesn't translate to a real instruction. It comes with a memory access 204 // tag to prevent itself being removed by optimizations and not block 205 // other instructions being optimized. 206 if (isa<PseudoProbeInst>(I)) 207 continue; 208 209 ME |= CallME.getWithoutLoc(IRMemLocation::ArgMem); 210 211 // If the call accesses captured memory (currently part of "other") and 212 // an argument is captured (currently not tracked), then it may also 213 // access argument memory. 214 ModRefInfo OtherMR = CallME.getModRef(IRMemLocation::Other); 215 ME |= MemoryEffects::argMemOnly(OtherMR); 216 217 // Check whether all pointer arguments point to local memory, and 218 // ignore calls that only access local memory. 219 ModRefInfo ArgMR = CallME.getModRef(IRMemLocation::ArgMem); 220 if (ArgMR != ModRefInfo::NoModRef) 221 addArgLocs(ME, Call, ArgMR, AAR); 222 continue; 223 } 224 225 ModRefInfo MR = ModRefInfo::NoModRef; 226 if (I.mayWriteToMemory()) 227 MR |= ModRefInfo::Mod; 228 if (I.mayReadFromMemory()) 229 MR |= ModRefInfo::Ref; 230 if (MR == ModRefInfo::NoModRef) 231 continue; 232 233 std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(&I); 234 if (!Loc) { 235 // If no location is known, conservatively assume anything can be 236 // accessed. 237 ME |= MemoryEffects(MR); 238 continue; 239 } 240 241 // Volatile operations may access inaccessible memory. 242 if (I.isVolatile()) 243 ME |= MemoryEffects::inaccessibleMemOnly(MR); 244 245 addLocAccess(ME, *Loc, MR, AAR); 246 } 247 248 return {OrigME & ME, RecursiveArgME}; 249 } 250 251 MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F, 252 AAResults &AAR) { 253 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}).first; 254 } 255 256 /// Deduce readonly/readnone/writeonly attributes for the SCC. 257 template <typename AARGetterT> 258 static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, 259 SmallSet<Function *, 8> &Changed) { 260 MemoryEffects ME = MemoryEffects::none(); 261 MemoryEffects RecursiveArgME = MemoryEffects::none(); 262 for (Function *F : SCCNodes) { 263 // Call the callable parameter to look up AA results for this function. 264 AAResults &AAR = AARGetter(*F); 265 // Non-exact function definitions may not be selected at link time, and an 266 // alternative version that writes to memory may be selected. See the 267 // comment on GlobalValue::isDefinitionExact for more details. 268 auto [FnME, FnRecursiveArgME] = 269 checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes); 270 ME |= FnME; 271 RecursiveArgME |= FnRecursiveArgME; 272 // Reached bottom of the lattice, we will not be able to improve the result. 273 if (ME == MemoryEffects::unknown()) 274 return; 275 } 276 277 // If the SCC accesses argmem, add recursive accesses resulting from that. 278 ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem); 279 if (ArgMR != ModRefInfo::NoModRef) 280 ME |= RecursiveArgME & MemoryEffects(ArgMR); 281 282 for (Function *F : SCCNodes) { 283 MemoryEffects OldME = F->getMemoryEffects(); 284 MemoryEffects NewME = ME & OldME; 285 if (NewME != OldME) { 286 ++NumMemoryAttr; 287 F->setMemoryEffects(NewME); 288 // Remove conflicting writable attributes. 289 if (!isModSet(NewME.getModRef(IRMemLocation::ArgMem))) 290 for (Argument &A : F->args()) 291 A.removeAttr(Attribute::Writable); 292 Changed.insert(F); 293 } 294 } 295 } 296 297 // Compute definitive function attributes for a function taking into account 298 // prevailing definitions and linkage types 299 static FunctionSummary *calculatePrevailingSummary( 300 ValueInfo VI, 301 DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary, 302 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 303 IsPrevailing) { 304 305 if (CachedPrevailingSummary.count(VI)) 306 return CachedPrevailingSummary[VI]; 307 308 /// At this point, prevailing symbols have been resolved. The following leads 309 /// to returning a conservative result: 310 /// - Multiple instances with local linkage. Normally local linkage would be 311 /// unique per module 312 /// as the GUID includes the module path. We could have a guid alias if 313 /// there wasn't any distinguishing path when each file was compiled, but 314 /// that should be rare so we'll punt on those. 315 316 /// These next 2 cases should not happen and will assert: 317 /// - Multiple instances with external linkage. This should be caught in 318 /// symbol resolution 319 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our 320 /// knowledge meaning we have to go conservative. 321 322 /// Otherwise, we calculate attributes for a function as: 323 /// 1. If we have a local linkage, take its attributes. If there's somehow 324 /// multiple, bail and go conservative. 325 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is 326 /// prevailing, take its attributes. 327 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic 328 /// differences. However, if the prevailing copy is known it will be used 329 /// so take its attributes. If the prevailing copy is in a native file 330 /// all IR copies will be dead and propagation will go conservative. 331 /// 4. AvailableExternally summaries without a prevailing copy are known to 332 /// occur in a couple of circumstances: 333 /// a. An internal function gets imported due to its caller getting 334 /// imported, it becomes AvailableExternally but no prevailing 335 /// definition exists. Because it has to get imported along with its 336 /// caller the attributes will be captured by propagating on its 337 /// caller. 338 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally 339 /// definitions of explicitly instanced template declarations 340 /// for inlining which are ultimately dropped from the TU. Since this 341 /// is localized to the TU the attributes will have already made it to 342 /// the callers. 343 /// These are edge cases and already captured by their callers so we 344 /// ignore these for now. If they become relevant to optimize in the 345 /// future this can be revisited. 346 /// 5. Otherwise, go conservative. 347 348 CachedPrevailingSummary[VI] = nullptr; 349 FunctionSummary *Local = nullptr; 350 FunctionSummary *Prevailing = nullptr; 351 352 for (const auto &GVS : VI.getSummaryList()) { 353 if (!GVS->isLive()) 354 continue; 355 356 FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject()); 357 // Virtual and Unknown (e.g. indirect) calls require going conservative 358 if (!FS || FS->fflags().HasUnknownCall) 359 return nullptr; 360 361 const auto &Linkage = GVS->linkage(); 362 if (GlobalValue::isLocalLinkage(Linkage)) { 363 if (Local) { 364 LLVM_DEBUG( 365 dbgs() 366 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on " 367 "function " 368 << VI.name() << " from " << FS->modulePath() << ". Previous module " 369 << Local->modulePath() << "\n"); 370 return nullptr; 371 } 372 Local = FS; 373 } else if (GlobalValue::isExternalLinkage(Linkage)) { 374 assert(IsPrevailing(VI.getGUID(), GVS.get())); 375 Prevailing = FS; 376 break; 377 } else if (GlobalValue::isWeakODRLinkage(Linkage) || 378 GlobalValue::isLinkOnceODRLinkage(Linkage) || 379 GlobalValue::isWeakAnyLinkage(Linkage) || 380 GlobalValue::isLinkOnceAnyLinkage(Linkage)) { 381 if (IsPrevailing(VI.getGUID(), GVS.get())) { 382 Prevailing = FS; 383 break; 384 } 385 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) { 386 // TODO: Handle these cases if they become meaningful 387 continue; 388 } 389 } 390 391 if (Local) { 392 assert(!Prevailing); 393 CachedPrevailingSummary[VI] = Local; 394 } else if (Prevailing) { 395 assert(!Local); 396 CachedPrevailingSummary[VI] = Prevailing; 397 } 398 399 return CachedPrevailingSummary[VI]; 400 } 401 402 bool llvm::thinLTOPropagateFunctionAttrs( 403 ModuleSummaryIndex &Index, 404 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 405 IsPrevailing) { 406 // TODO: implement addNoAliasAttrs once 407 // there's more information about the return type in the summary 408 if (DisableThinLTOPropagation) 409 return false; 410 411 DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary; 412 bool Changed = false; 413 414 auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) { 415 // Assume we can propagate unless we discover otherwise 416 FunctionSummary::FFlags InferredFlags; 417 InferredFlags.NoRecurse = (SCCNodes.size() == 1); 418 InferredFlags.NoUnwind = true; 419 420 for (auto &V : SCCNodes) { 421 FunctionSummary *CallerSummary = 422 calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing); 423 424 // Function summaries can fail to contain information such as declarations 425 if (!CallerSummary) 426 return; 427 428 if (CallerSummary->fflags().MayThrow) 429 InferredFlags.NoUnwind = false; 430 431 for (const auto &Callee : CallerSummary->calls()) { 432 FunctionSummary *CalleeSummary = calculatePrevailingSummary( 433 Callee.first, CachedPrevailingSummary, IsPrevailing); 434 435 if (!CalleeSummary) 436 return; 437 438 if (!CalleeSummary->fflags().NoRecurse) 439 InferredFlags.NoRecurse = false; 440 441 if (!CalleeSummary->fflags().NoUnwind) 442 InferredFlags.NoUnwind = false; 443 444 if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse) 445 break; 446 } 447 } 448 449 if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) { 450 Changed = true; 451 for (auto &V : SCCNodes) { 452 if (InferredFlags.NoRecurse) { 453 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to " 454 << V.name() << "\n"); 455 ++NumThinLinkNoRecurse; 456 } 457 458 if (InferredFlags.NoUnwind) { 459 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to " 460 << V.name() << "\n"); 461 ++NumThinLinkNoUnwind; 462 } 463 464 for (const auto &S : V.getSummaryList()) { 465 if (auto *FS = dyn_cast<FunctionSummary>(S.get())) { 466 if (InferredFlags.NoRecurse) 467 FS->setNoRecurse(); 468 469 if (InferredFlags.NoUnwind) 470 FS->setNoUnwind(); 471 } 472 } 473 } 474 } 475 }; 476 477 // Call propagation functions on each SCC in the Index 478 for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd(); 479 ++I) { 480 std::vector<ValueInfo> Nodes(*I); 481 PropagateAttributes(Nodes); 482 } 483 return Changed; 484 } 485 486 namespace { 487 488 /// For a given pointer Argument, this retains a list of Arguments of functions 489 /// in the same SCC that the pointer data flows into. We use this to build an 490 /// SCC of the arguments. 491 struct ArgumentGraphNode { 492 Argument *Definition; 493 SmallVector<ArgumentGraphNode *, 4> Uses; 494 }; 495 496 class ArgumentGraph { 497 // We store pointers to ArgumentGraphNode objects, so it's important that 498 // that they not move around upon insert. 499 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; 500 501 ArgumentMapTy ArgumentMap; 502 503 // There is no root node for the argument graph, in fact: 504 // void f(int *x, int *y) { if (...) f(x, y); } 505 // is an example where the graph is disconnected. The SCCIterator requires a 506 // single entry point, so we maintain a fake ("synthetic") root node that 507 // uses every node. Because the graph is directed and nothing points into 508 // the root, it will not participate in any SCCs (except for its own). 509 ArgumentGraphNode SyntheticRoot; 510 511 public: 512 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 513 514 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; 515 516 iterator begin() { return SyntheticRoot.Uses.begin(); } 517 iterator end() { return SyntheticRoot.Uses.end(); } 518 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 519 520 ArgumentGraphNode *operator[](Argument *A) { 521 ArgumentGraphNode &Node = ArgumentMap[A]; 522 Node.Definition = A; 523 SyntheticRoot.Uses.push_back(&Node); 524 return &Node; 525 } 526 }; 527 528 /// This tracker checks whether callees are in the SCC, and if so it does not 529 /// consider that a capture, instead adding it to the "Uses" list and 530 /// continuing with the analysis. 531 struct ArgumentUsesTracker : public CaptureTracker { 532 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} 533 534 void tooManyUses() override { Captured = true; } 535 536 bool captured(const Use *U) override { 537 CallBase *CB = dyn_cast<CallBase>(U->getUser()); 538 if (!CB) { 539 Captured = true; 540 return true; 541 } 542 543 Function *F = CB->getCalledFunction(); 544 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 545 Captured = true; 546 return true; 547 } 548 549 assert(!CB->isCallee(U) && "callee operand reported captured?"); 550 const unsigned UseIndex = CB->getDataOperandNo(U); 551 if (UseIndex >= CB->arg_size()) { 552 // Data operand, but not a argument operand -- must be a bundle operand 553 assert(CB->hasOperandBundles() && "Must be!"); 554 555 // CaptureTracking told us that we're being captured by an operand bundle 556 // use. In this case it does not matter if the callee is within our SCC 557 // or not -- we've been captured in some unknown way, and we have to be 558 // conservative. 559 Captured = true; 560 return true; 561 } 562 563 if (UseIndex >= F->arg_size()) { 564 assert(F->isVarArg() && "More params than args in non-varargs call"); 565 Captured = true; 566 return true; 567 } 568 569 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 570 return false; 571 } 572 573 // True only if certainly captured (used outside our SCC). 574 bool Captured = false; 575 576 // Uses within our SCC. 577 SmallVector<Argument *, 4> Uses; 578 579 const SCCNodeSet &SCCNodes; 580 }; 581 582 } // end anonymous namespace 583 584 namespace llvm { 585 586 template <> struct GraphTraits<ArgumentGraphNode *> { 587 using NodeRef = ArgumentGraphNode *; 588 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; 589 590 static NodeRef getEntryNode(NodeRef A) { return A; } 591 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } 592 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } 593 }; 594 595 template <> 596 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 597 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } 598 599 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 600 return AG->begin(); 601 } 602 603 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 604 }; 605 606 } // end namespace llvm 607 608 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 609 static Attribute::AttrKind 610 determinePointerAccessAttrs(Argument *A, 611 const SmallPtrSet<Argument *, 8> &SCCNodes) { 612 SmallVector<Use *, 32> Worklist; 613 SmallPtrSet<Use *, 32> Visited; 614 615 // inalloca arguments are always clobbered by the call. 616 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) 617 return Attribute::None; 618 619 bool IsRead = false; 620 bool IsWrite = false; 621 622 for (Use &U : A->uses()) { 623 Visited.insert(&U); 624 Worklist.push_back(&U); 625 } 626 627 while (!Worklist.empty()) { 628 if (IsWrite && IsRead) 629 // No point in searching further.. 630 return Attribute::None; 631 632 Use *U = Worklist.pop_back_val(); 633 Instruction *I = cast<Instruction>(U->getUser()); 634 635 switch (I->getOpcode()) { 636 case Instruction::BitCast: 637 case Instruction::GetElementPtr: 638 case Instruction::PHI: 639 case Instruction::Select: 640 case Instruction::AddrSpaceCast: 641 // The original value is not read/written via this if the new value isn't. 642 for (Use &UU : I->uses()) 643 if (Visited.insert(&UU).second) 644 Worklist.push_back(&UU); 645 break; 646 647 case Instruction::Call: 648 case Instruction::Invoke: { 649 CallBase &CB = cast<CallBase>(*I); 650 if (CB.isCallee(U)) { 651 IsRead = true; 652 // Note that indirect calls do not capture, see comment in 653 // CaptureTracking for context 654 continue; 655 } 656 657 // Given we've explictily handled the callee operand above, what's left 658 // must be a data operand (e.g. argument or operand bundle) 659 const unsigned UseIndex = CB.getDataOperandNo(U); 660 661 // Some intrinsics (for instance ptrmask) do not capture their results, 662 // but return results thas alias their pointer argument, and thus should 663 // be handled like GEP or addrspacecast above. 664 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 665 &CB, /*MustPreserveNullness=*/false)) { 666 for (Use &UU : CB.uses()) 667 if (Visited.insert(&UU).second) 668 Worklist.push_back(&UU); 669 } else if (!CB.doesNotCapture(UseIndex)) { 670 if (!CB.onlyReadsMemory()) 671 // If the callee can save a copy into other memory, then simply 672 // scanning uses of the call is insufficient. We have no way 673 // of tracking copies of the pointer through memory to see 674 // if a reloaded copy is written to, thus we must give up. 675 return Attribute::None; 676 // Push users for processing once we finish this one 677 if (!I->getType()->isVoidTy()) 678 for (Use &UU : I->uses()) 679 if (Visited.insert(&UU).second) 680 Worklist.push_back(&UU); 681 } 682 683 ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(IRMemLocation::ArgMem); 684 if (isNoModRef(ArgMR)) 685 continue; 686 687 if (Function *F = CB.getCalledFunction()) 688 if (CB.isArgOperand(U) && UseIndex < F->arg_size() && 689 SCCNodes.count(F->getArg(UseIndex))) 690 // This is an argument which is part of the speculative SCC. Note 691 // that only operands corresponding to formal arguments of the callee 692 // can participate in the speculation. 693 break; 694 695 // The accessors used on call site here do the right thing for calls and 696 // invokes with operand bundles. 697 if (CB.doesNotAccessMemory(UseIndex)) { 698 /* nop */ 699 } else if (!isModSet(ArgMR) || CB.onlyReadsMemory(UseIndex)) { 700 IsRead = true; 701 } else if (!isRefSet(ArgMR) || 702 CB.dataOperandHasImpliedAttr(UseIndex, Attribute::WriteOnly)) { 703 IsWrite = true; 704 } else { 705 return Attribute::None; 706 } 707 break; 708 } 709 710 case Instruction::Load: 711 // A volatile load has side effects beyond what readonly can be relied 712 // upon. 713 if (cast<LoadInst>(I)->isVolatile()) 714 return Attribute::None; 715 716 IsRead = true; 717 break; 718 719 case Instruction::Store: 720 if (cast<StoreInst>(I)->getValueOperand() == *U) 721 // untrackable capture 722 return Attribute::None; 723 724 // A volatile store has side effects beyond what writeonly can be relied 725 // upon. 726 if (cast<StoreInst>(I)->isVolatile()) 727 return Attribute::None; 728 729 IsWrite = true; 730 break; 731 732 case Instruction::ICmp: 733 case Instruction::Ret: 734 break; 735 736 default: 737 return Attribute::None; 738 } 739 } 740 741 if (IsWrite && IsRead) 742 return Attribute::None; 743 else if (IsRead) 744 return Attribute::ReadOnly; 745 else if (IsWrite) 746 return Attribute::WriteOnly; 747 else 748 return Attribute::ReadNone; 749 } 750 751 /// Deduce returned attributes for the SCC. 752 static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes, 753 SmallSet<Function *, 8> &Changed) { 754 // Check each function in turn, determining if an argument is always returned. 755 for (Function *F : SCCNodes) { 756 // We can infer and propagate function attributes only when we know that the 757 // definition we'll get at link time is *exactly* the definition we see now. 758 // For more details, see GlobalValue::mayBeDerefined. 759 if (!F->hasExactDefinition()) 760 continue; 761 762 if (F->getReturnType()->isVoidTy()) 763 continue; 764 765 // There is nothing to do if an argument is already marked as 'returned'. 766 if (F->getAttributes().hasAttrSomewhere(Attribute::Returned)) 767 continue; 768 769 auto FindRetArg = [&]() -> Argument * { 770 Argument *RetArg = nullptr; 771 for (BasicBlock &BB : *F) 772 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 773 // Note that stripPointerCasts should look through functions with 774 // returned arguments. 775 auto *RetVal = 776 dyn_cast<Argument>(Ret->getReturnValue()->stripPointerCasts()); 777 if (!RetVal || RetVal->getType() != F->getReturnType()) 778 return nullptr; 779 780 if (!RetArg) 781 RetArg = RetVal; 782 else if (RetArg != RetVal) 783 return nullptr; 784 } 785 786 return RetArg; 787 }; 788 789 if (Argument *RetArg = FindRetArg()) { 790 RetArg->addAttr(Attribute::Returned); 791 ++NumReturned; 792 Changed.insert(F); 793 } 794 } 795 } 796 797 /// If a callsite has arguments that are also arguments to the parent function, 798 /// try to propagate attributes from the callsite's arguments to the parent's 799 /// arguments. This may be important because inlining can cause information loss 800 /// when attribute knowledge disappears with the inlined call. 801 static bool addArgumentAttrsFromCallsites(Function &F) { 802 if (!EnableNonnullArgPropagation) 803 return false; 804 805 bool Changed = false; 806 807 // For an argument attribute to transfer from a callsite to the parent, the 808 // call must be guaranteed to execute every time the parent is called. 809 // Conservatively, just check for calls in the entry block that are guaranteed 810 // to execute. 811 // TODO: This could be enhanced by testing if the callsite post-dominates the 812 // entry block or by doing simple forward walks or backward walks to the 813 // callsite. 814 BasicBlock &Entry = F.getEntryBlock(); 815 for (Instruction &I : Entry) { 816 if (auto *CB = dyn_cast<CallBase>(&I)) { 817 if (auto *CalledFunc = CB->getCalledFunction()) { 818 for (auto &CSArg : CalledFunc->args()) { 819 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) 820 continue; 821 822 // If the non-null callsite argument operand is an argument to 'F' 823 // (the caller) and the call is guaranteed to execute, then the value 824 // must be non-null throughout 'F'. 825 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); 826 if (FArg && !FArg->hasNonNullAttr()) { 827 FArg->addAttr(Attribute::NonNull); 828 Changed = true; 829 } 830 } 831 } 832 } 833 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 834 break; 835 } 836 837 return Changed; 838 } 839 840 static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { 841 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone || 842 R == Attribute::WriteOnly) 843 && "Must be an access attribute."); 844 assert(A && "Argument must not be null."); 845 846 // If the argument already has the attribute, nothing needs to be done. 847 if (A->hasAttribute(R)) 848 return false; 849 850 // Otherwise, remove potentially conflicting attribute, add the new one, 851 // and update statistics. 852 A->removeAttr(Attribute::WriteOnly); 853 A->removeAttr(Attribute::ReadOnly); 854 A->removeAttr(Attribute::ReadNone); 855 // Remove conflicting writable attribute. 856 if (R == Attribute::ReadNone || R == Attribute::ReadOnly) 857 A->removeAttr(Attribute::Writable); 858 A->addAttr(R); 859 if (R == Attribute::ReadOnly) 860 ++NumReadOnlyArg; 861 else if (R == Attribute::WriteOnly) 862 ++NumWriteOnlyArg; 863 else 864 ++NumReadNoneArg; 865 return true; 866 } 867 868 /// Deduce nocapture attributes for the SCC. 869 static void addArgumentAttrs(const SCCNodeSet &SCCNodes, 870 SmallSet<Function *, 8> &Changed) { 871 ArgumentGraph AG; 872 873 // Check each function in turn, determining which pointer arguments are not 874 // captured. 875 for (Function *F : SCCNodes) { 876 // We can infer and propagate function attributes only when we know that the 877 // definition we'll get at link time is *exactly* the definition we see now. 878 // For more details, see GlobalValue::mayBeDerefined. 879 if (!F->hasExactDefinition()) 880 continue; 881 882 if (addArgumentAttrsFromCallsites(*F)) 883 Changed.insert(F); 884 885 // Functions that are readonly (or readnone) and nounwind and don't return 886 // a value can't capture arguments. Don't analyze them. 887 if (F->onlyReadsMemory() && F->doesNotThrow() && 888 F->getReturnType()->isVoidTy()) { 889 for (Argument &A : F->args()) { 890 if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) { 891 A.addAttr(Attribute::NoCapture); 892 ++NumNoCapture; 893 Changed.insert(F); 894 } 895 } 896 continue; 897 } 898 899 for (Argument &A : F->args()) { 900 if (!A.getType()->isPointerTy()) 901 continue; 902 bool HasNonLocalUses = false; 903 if (!A.hasNoCaptureAttr()) { 904 ArgumentUsesTracker Tracker(SCCNodes); 905 PointerMayBeCaptured(&A, &Tracker); 906 if (!Tracker.Captured) { 907 if (Tracker.Uses.empty()) { 908 // If it's trivially not captured, mark it nocapture now. 909 A.addAttr(Attribute::NoCapture); 910 ++NumNoCapture; 911 Changed.insert(F); 912 } else { 913 // If it's not trivially captured and not trivially not captured, 914 // then it must be calling into another function in our SCC. Save 915 // its particulars for Argument-SCC analysis later. 916 ArgumentGraphNode *Node = AG[&A]; 917 for (Argument *Use : Tracker.Uses) { 918 Node->Uses.push_back(AG[Use]); 919 if (Use != &A) 920 HasNonLocalUses = true; 921 } 922 } 923 } 924 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 925 } 926 if (!HasNonLocalUses && !A.onlyReadsMemory()) { 927 // Can we determine that it's readonly/readnone/writeonly without doing 928 // an SCC? Note that we don't allow any calls at all here, or else our 929 // result will be dependent on the iteration order through the 930 // functions in the SCC. 931 SmallPtrSet<Argument *, 8> Self; 932 Self.insert(&A); 933 Attribute::AttrKind R = determinePointerAccessAttrs(&A, Self); 934 if (R != Attribute::None) 935 if (addAccessAttr(&A, R)) 936 Changed.insert(F); 937 } 938 } 939 } 940 941 // The graph we've collected is partial because we stopped scanning for 942 // argument uses once we solved the argument trivially. These partial nodes 943 // show up as ArgumentGraphNode objects with an empty Uses list, and for 944 // these nodes the final decision about whether they capture has already been 945 // made. If the definition doesn't have a 'nocapture' attribute by now, it 946 // captures. 947 948 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 949 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 950 if (ArgumentSCC.size() == 1) { 951 if (!ArgumentSCC[0]->Definition) 952 continue; // synthetic root node 953 954 // eg. "void f(int* x) { if (...) f(x); }" 955 if (ArgumentSCC[0]->Uses.size() == 1 && 956 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 957 Argument *A = ArgumentSCC[0]->Definition; 958 A->addAttr(Attribute::NoCapture); 959 ++NumNoCapture; 960 Changed.insert(A->getParent()); 961 962 // Infer the access attributes given the new nocapture one 963 SmallPtrSet<Argument *, 8> Self; 964 Self.insert(&*A); 965 Attribute::AttrKind R = determinePointerAccessAttrs(&*A, Self); 966 if (R != Attribute::None) 967 addAccessAttr(A, R); 968 } 969 continue; 970 } 971 972 bool SCCCaptured = false; 973 for (ArgumentGraphNode *Node : ArgumentSCC) { 974 if (Node->Uses.empty() && !Node->Definition->hasNoCaptureAttr()) { 975 SCCCaptured = true; 976 break; 977 } 978 } 979 if (SCCCaptured) 980 continue; 981 982 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 983 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 984 // quickly looking up whether a given Argument is in this ArgumentSCC. 985 for (ArgumentGraphNode *I : ArgumentSCC) { 986 ArgumentSCCNodes.insert(I->Definition); 987 } 988 989 for (ArgumentGraphNode *N : ArgumentSCC) { 990 for (ArgumentGraphNode *Use : N->Uses) { 991 Argument *A = Use->Definition; 992 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) 993 continue; 994 SCCCaptured = true; 995 break; 996 } 997 if (SCCCaptured) 998 break; 999 } 1000 if (SCCCaptured) 1001 continue; 1002 1003 for (ArgumentGraphNode *N : ArgumentSCC) { 1004 Argument *A = N->Definition; 1005 A->addAttr(Attribute::NoCapture); 1006 ++NumNoCapture; 1007 Changed.insert(A->getParent()); 1008 } 1009 1010 // We also want to compute readonly/readnone/writeonly. With a small number 1011 // of false negatives, we can assume that any pointer which is captured 1012 // isn't going to be provably readonly or readnone, since by definition 1013 // we can't analyze all uses of a captured pointer. 1014 // 1015 // The false negatives happen when the pointer is captured by a function 1016 // that promises readonly/readnone behaviour on the pointer, then the 1017 // pointer's lifetime ends before anything that writes to arbitrary memory. 1018 // Also, a readonly/readnone pointer may be returned, but returning a 1019 // pointer is capturing it. 1020 1021 auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) { 1022 if (A == B) 1023 return A; 1024 if (A == Attribute::ReadNone) 1025 return B; 1026 if (B == Attribute::ReadNone) 1027 return A; 1028 return Attribute::None; 1029 }; 1030 1031 Attribute::AttrKind AccessAttr = Attribute::ReadNone; 1032 for (ArgumentGraphNode *N : ArgumentSCC) { 1033 Argument *A = N->Definition; 1034 Attribute::AttrKind K = determinePointerAccessAttrs(A, ArgumentSCCNodes); 1035 AccessAttr = meetAccessAttr(AccessAttr, K); 1036 if (AccessAttr == Attribute::None) 1037 break; 1038 } 1039 1040 if (AccessAttr != Attribute::None) { 1041 for (ArgumentGraphNode *N : ArgumentSCC) { 1042 Argument *A = N->Definition; 1043 if (addAccessAttr(A, AccessAttr)) 1044 Changed.insert(A->getParent()); 1045 } 1046 } 1047 } 1048 } 1049 1050 /// Tests whether a function is "malloc-like". 1051 /// 1052 /// A function is "malloc-like" if it returns either null or a pointer that 1053 /// doesn't alias any other pointer visible to the caller. 1054 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 1055 SmallSetVector<Value *, 8> FlowsToReturn; 1056 for (BasicBlock &BB : *F) 1057 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1058 FlowsToReturn.insert(Ret->getReturnValue()); 1059 1060 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1061 Value *RetVal = FlowsToReturn[i]; 1062 1063 if (Constant *C = dyn_cast<Constant>(RetVal)) { 1064 if (!C->isNullValue() && !isa<UndefValue>(C)) 1065 return false; 1066 1067 continue; 1068 } 1069 1070 if (isa<Argument>(RetVal)) 1071 return false; 1072 1073 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 1074 switch (RVI->getOpcode()) { 1075 // Extend the analysis by looking upwards. 1076 case Instruction::BitCast: 1077 case Instruction::GetElementPtr: 1078 case Instruction::AddrSpaceCast: 1079 FlowsToReturn.insert(RVI->getOperand(0)); 1080 continue; 1081 case Instruction::Select: { 1082 SelectInst *SI = cast<SelectInst>(RVI); 1083 FlowsToReturn.insert(SI->getTrueValue()); 1084 FlowsToReturn.insert(SI->getFalseValue()); 1085 continue; 1086 } 1087 case Instruction::PHI: { 1088 PHINode *PN = cast<PHINode>(RVI); 1089 for (Value *IncValue : PN->incoming_values()) 1090 FlowsToReturn.insert(IncValue); 1091 continue; 1092 } 1093 1094 // Check whether the pointer came from an allocation. 1095 case Instruction::Alloca: 1096 break; 1097 case Instruction::Call: 1098 case Instruction::Invoke: { 1099 CallBase &CB = cast<CallBase>(*RVI); 1100 if (CB.hasRetAttr(Attribute::NoAlias)) 1101 break; 1102 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) 1103 break; 1104 [[fallthrough]]; 1105 } 1106 default: 1107 return false; // Did not come from an allocation. 1108 } 1109 1110 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) 1111 return false; 1112 } 1113 1114 return true; 1115 } 1116 1117 /// Deduce noalias attributes for the SCC. 1118 static void addNoAliasAttrs(const SCCNodeSet &SCCNodes, 1119 SmallSet<Function *, 8> &Changed) { 1120 // Check each function in turn, determining which functions return noalias 1121 // pointers. 1122 for (Function *F : SCCNodes) { 1123 // Already noalias. 1124 if (F->returnDoesNotAlias()) 1125 continue; 1126 1127 // We can infer and propagate function attributes only when we know that the 1128 // definition we'll get at link time is *exactly* the definition we see now. 1129 // For more details, see GlobalValue::mayBeDerefined. 1130 if (!F->hasExactDefinition()) 1131 return; 1132 1133 // We annotate noalias return values, which are only applicable to 1134 // pointer types. 1135 if (!F->getReturnType()->isPointerTy()) 1136 continue; 1137 1138 if (!isFunctionMallocLike(F, SCCNodes)) 1139 return; 1140 } 1141 1142 for (Function *F : SCCNodes) { 1143 if (F->returnDoesNotAlias() || 1144 !F->getReturnType()->isPointerTy()) 1145 continue; 1146 1147 F->setReturnDoesNotAlias(); 1148 ++NumNoAlias; 1149 Changed.insert(F); 1150 } 1151 } 1152 1153 /// Tests whether this function is known to not return null. 1154 /// 1155 /// Requires that the function returns a pointer. 1156 /// 1157 /// Returns true if it believes the function will not return a null, and sets 1158 /// \p Speculative based on whether the returned conclusion is a speculative 1159 /// conclusion due to SCC calls. 1160 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 1161 bool &Speculative) { 1162 assert(F->getReturnType()->isPointerTy() && 1163 "nonnull only meaningful on pointer types"); 1164 Speculative = false; 1165 1166 SmallSetVector<Value *, 8> FlowsToReturn; 1167 for (BasicBlock &BB : *F) 1168 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1169 FlowsToReturn.insert(Ret->getReturnValue()); 1170 1171 auto &DL = F->getParent()->getDataLayout(); 1172 1173 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1174 Value *RetVal = FlowsToReturn[i]; 1175 1176 // If this value is locally known to be non-null, we're good 1177 if (isKnownNonZero(RetVal, DL)) 1178 continue; 1179 1180 // Otherwise, we need to look upwards since we can't make any local 1181 // conclusions. 1182 Instruction *RVI = dyn_cast<Instruction>(RetVal); 1183 if (!RVI) 1184 return false; 1185 switch (RVI->getOpcode()) { 1186 // Extend the analysis by looking upwards. 1187 case Instruction::BitCast: 1188 case Instruction::GetElementPtr: 1189 case Instruction::AddrSpaceCast: 1190 FlowsToReturn.insert(RVI->getOperand(0)); 1191 continue; 1192 case Instruction::Select: { 1193 SelectInst *SI = cast<SelectInst>(RVI); 1194 FlowsToReturn.insert(SI->getTrueValue()); 1195 FlowsToReturn.insert(SI->getFalseValue()); 1196 continue; 1197 } 1198 case Instruction::PHI: { 1199 PHINode *PN = cast<PHINode>(RVI); 1200 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1201 FlowsToReturn.insert(PN->getIncomingValue(i)); 1202 continue; 1203 } 1204 case Instruction::Call: 1205 case Instruction::Invoke: { 1206 CallBase &CB = cast<CallBase>(*RVI); 1207 Function *Callee = CB.getCalledFunction(); 1208 // A call to a node within the SCC is assumed to return null until 1209 // proven otherwise 1210 if (Callee && SCCNodes.count(Callee)) { 1211 Speculative = true; 1212 continue; 1213 } 1214 return false; 1215 } 1216 default: 1217 return false; // Unknown source, may be null 1218 }; 1219 llvm_unreachable("should have either continued or returned"); 1220 } 1221 1222 return true; 1223 } 1224 1225 /// Deduce nonnull attributes for the SCC. 1226 static void addNonNullAttrs(const SCCNodeSet &SCCNodes, 1227 SmallSet<Function *, 8> &Changed) { 1228 // Speculative that all functions in the SCC return only nonnull 1229 // pointers. We may refute this as we analyze functions. 1230 bool SCCReturnsNonNull = true; 1231 1232 // Check each function in turn, determining which functions return nonnull 1233 // pointers. 1234 for (Function *F : SCCNodes) { 1235 // Already nonnull. 1236 if (F->getAttributes().hasRetAttr(Attribute::NonNull)) 1237 continue; 1238 1239 // We can infer and propagate function attributes only when we know that the 1240 // definition we'll get at link time is *exactly* the definition we see now. 1241 // For more details, see GlobalValue::mayBeDerefined. 1242 if (!F->hasExactDefinition()) 1243 return; 1244 1245 // We annotate nonnull return values, which are only applicable to 1246 // pointer types. 1247 if (!F->getReturnType()->isPointerTy()) 1248 continue; 1249 1250 bool Speculative = false; 1251 if (isReturnNonNull(F, SCCNodes, Speculative)) { 1252 if (!Speculative) { 1253 // Mark the function eagerly since we may discover a function 1254 // which prevents us from speculating about the entire SCC 1255 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() 1256 << " as nonnull\n"); 1257 F->addRetAttr(Attribute::NonNull); 1258 ++NumNonNullReturn; 1259 Changed.insert(F); 1260 } 1261 continue; 1262 } 1263 // At least one function returns something which could be null, can't 1264 // speculate any more. 1265 SCCReturnsNonNull = false; 1266 } 1267 1268 if (SCCReturnsNonNull) { 1269 for (Function *F : SCCNodes) { 1270 if (F->getAttributes().hasRetAttr(Attribute::NonNull) || 1271 !F->getReturnType()->isPointerTy()) 1272 continue; 1273 1274 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 1275 F->addRetAttr(Attribute::NonNull); 1276 ++NumNonNullReturn; 1277 Changed.insert(F); 1278 } 1279 } 1280 } 1281 1282 namespace { 1283 1284 /// Collects a set of attribute inference requests and performs them all in one 1285 /// go on a single SCC Node. Inference involves scanning function bodies 1286 /// looking for instructions that violate attribute assumptions. 1287 /// As soon as all the bodies are fine we are free to set the attribute. 1288 /// Customization of inference for individual attributes is performed by 1289 /// providing a handful of predicates for each attribute. 1290 class AttributeInferer { 1291 public: 1292 /// Describes a request for inference of a single attribute. 1293 struct InferenceDescriptor { 1294 1295 /// Returns true if this function does not have to be handled. 1296 /// General intent for this predicate is to provide an optimization 1297 /// for functions that do not need this attribute inference at all 1298 /// (say, for functions that already have the attribute). 1299 std::function<bool(const Function &)> SkipFunction; 1300 1301 /// Returns true if this instruction violates attribute assumptions. 1302 std::function<bool(Instruction &)> InstrBreaksAttribute; 1303 1304 /// Sets the inferred attribute for this function. 1305 std::function<void(Function &)> SetAttribute; 1306 1307 /// Attribute we derive. 1308 Attribute::AttrKind AKind; 1309 1310 /// If true, only "exact" definitions can be used to infer this attribute. 1311 /// See GlobalValue::isDefinitionExact. 1312 bool RequiresExactDefinition; 1313 1314 InferenceDescriptor(Attribute::AttrKind AK, 1315 std::function<bool(const Function &)> SkipFunc, 1316 std::function<bool(Instruction &)> InstrScan, 1317 std::function<void(Function &)> SetAttr, 1318 bool ReqExactDef) 1319 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), 1320 SetAttribute(SetAttr), AKind(AK), 1321 RequiresExactDefinition(ReqExactDef) {} 1322 }; 1323 1324 private: 1325 SmallVector<InferenceDescriptor, 4> InferenceDescriptors; 1326 1327 public: 1328 void registerAttrInference(InferenceDescriptor AttrInference) { 1329 InferenceDescriptors.push_back(AttrInference); 1330 } 1331 1332 void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed); 1333 }; 1334 1335 /// Perform all the requested attribute inference actions according to the 1336 /// attribute predicates stored before. 1337 void AttributeInferer::run(const SCCNodeSet &SCCNodes, 1338 SmallSet<Function *, 8> &Changed) { 1339 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; 1340 // Go through all the functions in SCC and check corresponding attribute 1341 // assumptions for each of them. Attributes that are invalid for this SCC 1342 // will be removed from InferInSCC. 1343 for (Function *F : SCCNodes) { 1344 1345 // No attributes whose assumptions are still valid - done. 1346 if (InferInSCC.empty()) 1347 return; 1348 1349 // Check if our attributes ever need scanning/can be scanned. 1350 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { 1351 if (ID.SkipFunction(*F)) 1352 return false; 1353 1354 // Remove from further inference (invalidate) when visiting a function 1355 // that has no instructions to scan/has an unsuitable definition. 1356 return F->isDeclaration() || 1357 (ID.RequiresExactDefinition && !F->hasExactDefinition()); 1358 }); 1359 1360 // For each attribute still in InferInSCC that doesn't explicitly skip F, 1361 // set up the F instructions scan to verify assumptions of the attribute. 1362 SmallVector<InferenceDescriptor, 4> InferInThisFunc; 1363 llvm::copy_if( 1364 InferInSCC, std::back_inserter(InferInThisFunc), 1365 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); 1366 1367 if (InferInThisFunc.empty()) 1368 continue; 1369 1370 // Start instruction scan. 1371 for (Instruction &I : instructions(*F)) { 1372 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { 1373 if (!ID.InstrBreaksAttribute(I)) 1374 return false; 1375 // Remove attribute from further inference on any other functions 1376 // because attribute assumptions have just been violated. 1377 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { 1378 return D.AKind == ID.AKind; 1379 }); 1380 // Remove attribute from the rest of current instruction scan. 1381 return true; 1382 }); 1383 1384 if (InferInThisFunc.empty()) 1385 break; 1386 } 1387 } 1388 1389 if (InferInSCC.empty()) 1390 return; 1391 1392 for (Function *F : SCCNodes) 1393 // At this point InferInSCC contains only functions that were either: 1394 // - explicitly skipped from scan/inference, or 1395 // - verified to have no instructions that break attribute assumptions. 1396 // Hence we just go and force the attribute for all non-skipped functions. 1397 for (auto &ID : InferInSCC) { 1398 if (ID.SkipFunction(*F)) 1399 continue; 1400 Changed.insert(F); 1401 ID.SetAttribute(*F); 1402 } 1403 } 1404 1405 struct SCCNodesResult { 1406 SCCNodeSet SCCNodes; 1407 bool HasUnknownCall; 1408 }; 1409 1410 } // end anonymous namespace 1411 1412 /// Helper for non-Convergent inference predicate InstrBreaksAttribute. 1413 static bool InstrBreaksNonConvergent(Instruction &I, 1414 const SCCNodeSet &SCCNodes) { 1415 const CallBase *CB = dyn_cast<CallBase>(&I); 1416 // Breaks non-convergent assumption if CS is a convergent call to a function 1417 // not in the SCC. 1418 return CB && CB->isConvergent() && 1419 !SCCNodes.contains(CB->getCalledFunction()); 1420 } 1421 1422 /// Helper for NoUnwind inference predicate InstrBreaksAttribute. 1423 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { 1424 if (!I.mayThrow(/* IncludePhaseOneUnwind */ true)) 1425 return false; 1426 if (const auto *CI = dyn_cast<CallInst>(&I)) { 1427 if (Function *Callee = CI->getCalledFunction()) { 1428 // I is a may-throw call to a function inside our SCC. This doesn't 1429 // invalidate our current working assumption that the SCC is no-throw; we 1430 // just have to scan that other function. 1431 if (SCCNodes.contains(Callee)) 1432 return false; 1433 } 1434 } 1435 return true; 1436 } 1437 1438 /// Helper for NoFree inference predicate InstrBreaksAttribute. 1439 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { 1440 CallBase *CB = dyn_cast<CallBase>(&I); 1441 if (!CB) 1442 return false; 1443 1444 if (CB->hasFnAttr(Attribute::NoFree)) 1445 return false; 1446 1447 // Speculatively assume in SCC. 1448 if (Function *Callee = CB->getCalledFunction()) 1449 if (SCCNodes.contains(Callee)) 1450 return false; 1451 1452 return true; 1453 } 1454 1455 // Return true if this is an atomic which has an ordering stronger than 1456 // unordered. Note that this is different than the predicate we use in 1457 // Attributor. Here we chose to be conservative and consider monotonic 1458 // operations potentially synchronizing. We generally don't do much with 1459 // monotonic operations, so this is simply risk reduction. 1460 static bool isOrderedAtomic(Instruction *I) { 1461 if (!I->isAtomic()) 1462 return false; 1463 1464 if (auto *FI = dyn_cast<FenceInst>(I)) 1465 // All legal orderings for fence are stronger than monotonic. 1466 return FI->getSyncScopeID() != SyncScope::SingleThread; 1467 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) 1468 return true; 1469 else if (auto *SI = dyn_cast<StoreInst>(I)) 1470 return !SI->isUnordered(); 1471 else if (auto *LI = dyn_cast<LoadInst>(I)) 1472 return !LI->isUnordered(); 1473 else { 1474 llvm_unreachable("unknown atomic instruction?"); 1475 } 1476 } 1477 1478 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { 1479 // Volatile may synchronize 1480 if (I.isVolatile()) 1481 return true; 1482 1483 // An ordered atomic may synchronize. (See comment about on monotonic.) 1484 if (isOrderedAtomic(&I)) 1485 return true; 1486 1487 auto *CB = dyn_cast<CallBase>(&I); 1488 if (!CB) 1489 // Non call site cases covered by the two checks above 1490 return false; 1491 1492 if (CB->hasFnAttr(Attribute::NoSync)) 1493 return false; 1494 1495 // Non volatile memset/memcpy/memmoves are nosync 1496 // NOTE: Only intrinsics with volatile flags should be handled here. All 1497 // others should be marked in Intrinsics.td. 1498 if (auto *MI = dyn_cast<MemIntrinsic>(&I)) 1499 if (!MI->isVolatile()) 1500 return false; 1501 1502 // Speculatively assume in SCC. 1503 if (Function *Callee = CB->getCalledFunction()) 1504 if (SCCNodes.contains(Callee)) 1505 return false; 1506 1507 return true; 1508 } 1509 1510 /// Attempt to remove convergent function attribute when possible. 1511 /// 1512 /// Returns true if any changes to function attributes were made. 1513 static void inferConvergent(const SCCNodeSet &SCCNodes, 1514 SmallSet<Function *, 8> &Changed) { 1515 AttributeInferer AI; 1516 1517 // Request to remove the convergent attribute from all functions in the SCC 1518 // if every callsite within the SCC is not convergent (except for calls 1519 // to functions within the SCC). 1520 // Note: Removal of the attr from the callsites will happen in 1521 // InstCombineCalls separately. 1522 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1523 Attribute::Convergent, 1524 // Skip non-convergent functions. 1525 [](const Function &F) { return !F.isConvergent(); }, 1526 // Instructions that break non-convergent assumption. 1527 [SCCNodes](Instruction &I) { 1528 return InstrBreaksNonConvergent(I, SCCNodes); 1529 }, 1530 [](Function &F) { 1531 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() 1532 << "\n"); 1533 F.setNotConvergent(); 1534 }, 1535 /* RequiresExactDefinition= */ false}); 1536 // Perform all the requested attribute inference actions. 1537 AI.run(SCCNodes, Changed); 1538 } 1539 1540 /// Infer attributes from all functions in the SCC by scanning every 1541 /// instruction for compliance to the attribute assumptions. 1542 /// 1543 /// Returns true if any changes to function attributes were made. 1544 static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, 1545 SmallSet<Function *, 8> &Changed) { 1546 AttributeInferer AI; 1547 1548 if (!DisableNoUnwindInference) 1549 // Request to infer nounwind attribute for all the functions in the SCC if 1550 // every callsite within the SCC is not throwing (except for calls to 1551 // functions within the SCC). Note that nounwind attribute suffers from 1552 // derefinement - results may change depending on how functions are 1553 // optimized. Thus it can be inferred only from exact definitions. 1554 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1555 Attribute::NoUnwind, 1556 // Skip non-throwing functions. 1557 [](const Function &F) { return F.doesNotThrow(); }, 1558 // Instructions that break non-throwing assumption. 1559 [&SCCNodes](Instruction &I) { 1560 return InstrBreaksNonThrowing(I, SCCNodes); 1561 }, 1562 [](Function &F) { 1563 LLVM_DEBUG(dbgs() 1564 << "Adding nounwind attr to fn " << F.getName() << "\n"); 1565 F.setDoesNotThrow(); 1566 ++NumNoUnwind; 1567 }, 1568 /* RequiresExactDefinition= */ true}); 1569 1570 if (!DisableNoFreeInference) 1571 // Request to infer nofree attribute for all the functions in the SCC if 1572 // every callsite within the SCC does not directly or indirectly free 1573 // memory (except for calls to functions within the SCC). Note that nofree 1574 // attribute suffers from derefinement - results may change depending on 1575 // how functions are optimized. Thus it can be inferred only from exact 1576 // definitions. 1577 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1578 Attribute::NoFree, 1579 // Skip functions known not to free memory. 1580 [](const Function &F) { return F.doesNotFreeMemory(); }, 1581 // Instructions that break non-deallocating assumption. 1582 [&SCCNodes](Instruction &I) { 1583 return InstrBreaksNoFree(I, SCCNodes); 1584 }, 1585 [](Function &F) { 1586 LLVM_DEBUG(dbgs() 1587 << "Adding nofree attr to fn " << F.getName() << "\n"); 1588 F.setDoesNotFreeMemory(); 1589 ++NumNoFree; 1590 }, 1591 /* RequiresExactDefinition= */ true}); 1592 1593 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1594 Attribute::NoSync, 1595 // Skip already marked functions. 1596 [](const Function &F) { return F.hasNoSync(); }, 1597 // Instructions that break nosync assumption. 1598 [&SCCNodes](Instruction &I) { 1599 return InstrBreaksNoSync(I, SCCNodes); 1600 }, 1601 [](Function &F) { 1602 LLVM_DEBUG(dbgs() 1603 << "Adding nosync attr to fn " << F.getName() << "\n"); 1604 F.setNoSync(); 1605 ++NumNoSync; 1606 }, 1607 /* RequiresExactDefinition= */ true}); 1608 1609 // Perform all the requested attribute inference actions. 1610 AI.run(SCCNodes, Changed); 1611 } 1612 1613 static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, 1614 SmallSet<Function *, 8> &Changed) { 1615 // Try and identify functions that do not recurse. 1616 1617 // If the SCC contains multiple nodes we know for sure there is recursion. 1618 if (SCCNodes.size() != 1) 1619 return; 1620 1621 Function *F = *SCCNodes.begin(); 1622 if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 1623 return; 1624 1625 // If all of the calls in F are identifiable and are to norecurse functions, F 1626 // is norecurse. This check also detects self-recursion as F is not currently 1627 // marked norecurse, so any called from F to F will not be marked norecurse. 1628 for (auto &BB : *F) 1629 for (auto &I : BB.instructionsWithoutDebug()) 1630 if (auto *CB = dyn_cast<CallBase>(&I)) { 1631 Function *Callee = CB->getCalledFunction(); 1632 if (!Callee || Callee == F || !Callee->doesNotRecurse()) 1633 // Function calls a potentially recursive function. 1634 return; 1635 } 1636 1637 // Every call was to a non-recursive function other than this function, and 1638 // we have no indirect recursion as the SCC size is one. This function cannot 1639 // recurse. 1640 F->setDoesNotRecurse(); 1641 ++NumNoRecurse; 1642 Changed.insert(F); 1643 } 1644 1645 static bool instructionDoesNotReturn(Instruction &I) { 1646 if (auto *CB = dyn_cast<CallBase>(&I)) 1647 return CB->hasFnAttr(Attribute::NoReturn); 1648 return false; 1649 } 1650 1651 // A basic block can only return if it terminates with a ReturnInst and does not 1652 // contain calls to noreturn functions. 1653 static bool basicBlockCanReturn(BasicBlock &BB) { 1654 if (!isa<ReturnInst>(BB.getTerminator())) 1655 return false; 1656 return none_of(BB, instructionDoesNotReturn); 1657 } 1658 1659 // FIXME: this doesn't handle recursion. 1660 static bool canReturn(Function &F) { 1661 SmallVector<BasicBlock *, 16> Worklist; 1662 SmallPtrSet<BasicBlock *, 16> Visited; 1663 1664 Visited.insert(&F.front()); 1665 Worklist.push_back(&F.front()); 1666 1667 do { 1668 BasicBlock *BB = Worklist.pop_back_val(); 1669 if (basicBlockCanReturn(*BB)) 1670 return true; 1671 for (BasicBlock *Succ : successors(BB)) 1672 if (Visited.insert(Succ).second) 1673 Worklist.push_back(Succ); 1674 } while (!Worklist.empty()); 1675 1676 return false; 1677 } 1678 1679 // Set the noreturn function attribute if possible. 1680 static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, 1681 SmallSet<Function *, 8> &Changed) { 1682 for (Function *F : SCCNodes) { 1683 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 1684 F->doesNotReturn()) 1685 continue; 1686 1687 if (!canReturn(*F)) { 1688 F->setDoesNotReturn(); 1689 Changed.insert(F); 1690 } 1691 } 1692 } 1693 1694 static bool functionWillReturn(const Function &F) { 1695 // We can infer and propagate function attributes only when we know that the 1696 // definition we'll get at link time is *exactly* the definition we see now. 1697 // For more details, see GlobalValue::mayBeDerefined. 1698 if (!F.hasExactDefinition()) 1699 return false; 1700 1701 // Must-progress function without side-effects must return. 1702 if (F.mustProgress() && F.onlyReadsMemory()) 1703 return true; 1704 1705 // Can only analyze functions with a definition. 1706 if (F.isDeclaration()) 1707 return false; 1708 1709 // Functions with loops require more sophisticated analysis, as the loop 1710 // may be infinite. For now, don't try to handle them. 1711 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; 1712 FindFunctionBackedges(F, Backedges); 1713 if (!Backedges.empty()) 1714 return false; 1715 1716 // If there are no loops, then the function is willreturn if all calls in 1717 // it are willreturn. 1718 return all_of(instructions(F), [](const Instruction &I) { 1719 return I.willReturn(); 1720 }); 1721 } 1722 1723 // Set the willreturn function attribute if possible. 1724 static void addWillReturn(const SCCNodeSet &SCCNodes, 1725 SmallSet<Function *, 8> &Changed) { 1726 for (Function *F : SCCNodes) { 1727 if (!F || F->willReturn() || !functionWillReturn(*F)) 1728 continue; 1729 1730 F->setWillReturn(); 1731 NumWillReturn++; 1732 Changed.insert(F); 1733 } 1734 } 1735 1736 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { 1737 SCCNodesResult Res; 1738 Res.HasUnknownCall = false; 1739 for (Function *F : Functions) { 1740 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) || 1741 F->isPresplitCoroutine()) { 1742 // Treat any function we're trying not to optimize as if it were an 1743 // indirect call and omit it from the node set used below. 1744 Res.HasUnknownCall = true; 1745 continue; 1746 } 1747 // Track whether any functions in this SCC have an unknown call edge. 1748 // Note: if this is ever a performance hit, we can common it with 1749 // subsequent routines which also do scans over the instructions of the 1750 // function. 1751 if (!Res.HasUnknownCall) { 1752 for (Instruction &I : instructions(*F)) { 1753 if (auto *CB = dyn_cast<CallBase>(&I)) { 1754 if (!CB->getCalledFunction()) { 1755 Res.HasUnknownCall = true; 1756 break; 1757 } 1758 } 1759 } 1760 } 1761 Res.SCCNodes.insert(F); 1762 } 1763 return Res; 1764 } 1765 1766 template <typename AARGetterT> 1767 static SmallSet<Function *, 8> 1768 deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, 1769 bool ArgAttrsOnly) { 1770 SCCNodesResult Nodes = createSCCNodeSet(Functions); 1771 1772 // Bail if the SCC only contains optnone functions. 1773 if (Nodes.SCCNodes.empty()) 1774 return {}; 1775 1776 SmallSet<Function *, 8> Changed; 1777 if (ArgAttrsOnly) { 1778 addArgumentAttrs(Nodes.SCCNodes, Changed); 1779 return Changed; 1780 } 1781 1782 addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); 1783 addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); 1784 addArgumentAttrs(Nodes.SCCNodes, Changed); 1785 inferConvergent(Nodes.SCCNodes, Changed); 1786 addNoReturnAttrs(Nodes.SCCNodes, Changed); 1787 addWillReturn(Nodes.SCCNodes, Changed); 1788 1789 // If we have no external nodes participating in the SCC, we can deduce some 1790 // more precise attributes as well. 1791 if (!Nodes.HasUnknownCall) { 1792 addNoAliasAttrs(Nodes.SCCNodes, Changed); 1793 addNonNullAttrs(Nodes.SCCNodes, Changed); 1794 inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed); 1795 addNoRecurseAttrs(Nodes.SCCNodes, Changed); 1796 } 1797 1798 // Finally, infer the maximal set of attributes from the ones we've inferred 1799 // above. This is handling the cases where one attribute on a signature 1800 // implies another, but for implementation reasons the inference rule for 1801 // the later is missing (or simply less sophisticated). 1802 for (Function *F : Nodes.SCCNodes) 1803 if (F) 1804 if (inferAttributesFromOthers(*F)) 1805 Changed.insert(F); 1806 1807 return Changed; 1808 } 1809 1810 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 1811 CGSCCAnalysisManager &AM, 1812 LazyCallGraph &CG, 1813 CGSCCUpdateResult &) { 1814 // Skip non-recursive functions if requested. 1815 // Only infer argument attributes for non-recursive functions, because 1816 // it can affect optimization behavior in conjunction with noalias. 1817 bool ArgAttrsOnly = false; 1818 if (C.size() == 1 && SkipNonRecursive) { 1819 LazyCallGraph::Node &N = *C.begin(); 1820 if (!N->lookup(N)) 1821 ArgAttrsOnly = true; 1822 } 1823 1824 FunctionAnalysisManager &FAM = 1825 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 1826 1827 // We pass a lambda into functions to wire them up to the analysis manager 1828 // for getting function analyses. 1829 auto AARGetter = [&](Function &F) -> AAResults & { 1830 return FAM.getResult<AAManager>(F); 1831 }; 1832 1833 SmallVector<Function *, 8> Functions; 1834 for (LazyCallGraph::Node &N : C) { 1835 Functions.push_back(&N.getFunction()); 1836 } 1837 1838 auto ChangedFunctions = 1839 deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly); 1840 if (ChangedFunctions.empty()) 1841 return PreservedAnalyses::all(); 1842 1843 // Invalidate analyses for modified functions so that we don't have to 1844 // invalidate all analyses for all functions in this SCC. 1845 PreservedAnalyses FuncPA; 1846 // We haven't changed the CFG for modified functions. 1847 FuncPA.preserveSet<CFGAnalyses>(); 1848 for (Function *Changed : ChangedFunctions) { 1849 FAM.invalidate(*Changed, FuncPA); 1850 // Also invalidate any direct callers of changed functions since analyses 1851 // may care about attributes of direct callees. For example, MemorySSA cares 1852 // about whether or not a call's callee modifies memory and queries that 1853 // through function attributes. 1854 for (auto *U : Changed->users()) { 1855 if (auto *Call = dyn_cast<CallBase>(U)) { 1856 if (Call->getCalledFunction() == Changed) 1857 FAM.invalidate(*Call->getFunction(), FuncPA); 1858 } 1859 } 1860 } 1861 1862 PreservedAnalyses PA; 1863 // We have not added or removed functions. 1864 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 1865 // We already invalidated all relevant function analyses above. 1866 PA.preserveSet<AllAnalysesOn<Function>>(); 1867 return PA; 1868 } 1869 1870 void PostOrderFunctionAttrsPass::printPipeline( 1871 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1872 static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline( 1873 OS, MapClassName2PassName); 1874 if (SkipNonRecursive) 1875 OS << "<skip-non-recursive-function-attrs>"; 1876 } 1877 1878 template <typename AARGetterT> 1879 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { 1880 SmallVector<Function *, 8> Functions; 1881 for (CallGraphNode *I : SCC) { 1882 Functions.push_back(I->getFunction()); 1883 } 1884 1885 return !deriveAttrsInPostOrder(Functions, AARGetter).empty(); 1886 } 1887 1888 static bool addNoRecurseAttrsTopDown(Function &F) { 1889 // We check the preconditions for the function prior to calling this to avoid 1890 // the cost of building up a reversible post-order list. We assert them here 1891 // to make sure none of the invariants this relies on were violated. 1892 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 1893 assert(!F.doesNotRecurse() && 1894 "This function has already been deduced as norecurs!"); 1895 assert(F.hasInternalLinkage() && 1896 "Can only do top-down deduction for internal linkage functions!"); 1897 1898 // If F is internal and all of its uses are calls from a non-recursive 1899 // functions, then none of its calls could in fact recurse without going 1900 // through a function marked norecurse, and so we can mark this function too 1901 // as norecurse. Note that the uses must actually be calls -- otherwise 1902 // a pointer to this function could be returned from a norecurse function but 1903 // this function could be recursively (indirectly) called. Note that this 1904 // also detects if F is directly recursive as F is not yet marked as 1905 // a norecurse function. 1906 for (auto &U : F.uses()) { 1907 auto *I = dyn_cast<Instruction>(U.getUser()); 1908 if (!I) 1909 return false; 1910 CallBase *CB = dyn_cast<CallBase>(I); 1911 if (!CB || !CB->isCallee(&U) || 1912 !CB->getParent()->getParent()->doesNotRecurse()) 1913 return false; 1914 } 1915 F.setDoesNotRecurse(); 1916 ++NumNoRecurse; 1917 return true; 1918 } 1919 1920 static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) { 1921 // We only have a post-order SCC traversal (because SCCs are inherently 1922 // discovered in post-order), so we accumulate them in a vector and then walk 1923 // it in reverse. This is simpler than using the RPO iterator infrastructure 1924 // because we need to combine SCC detection and the PO walk of the call 1925 // graph. We can also cheat egregiously because we're primarily interested in 1926 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 1927 // with multiple functions in them will clearly be recursive. 1928 1929 SmallVector<Function *, 16> Worklist; 1930 CG.buildRefSCCs(); 1931 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { 1932 for (LazyCallGraph::SCC &SCC : RC) { 1933 if (SCC.size() != 1) 1934 continue; 1935 Function &F = SCC.begin()->getFunction(); 1936 if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) 1937 Worklist.push_back(&F); 1938 } 1939 } 1940 bool Changed = false; 1941 for (auto *F : llvm::reverse(Worklist)) 1942 Changed |= addNoRecurseAttrsTopDown(*F); 1943 1944 return Changed; 1945 } 1946 1947 PreservedAnalyses 1948 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { 1949 auto &CG = AM.getResult<LazyCallGraphAnalysis>(M); 1950 1951 if (!deduceFunctionAttributeInRPO(M, CG)) 1952 return PreservedAnalyses::all(); 1953 1954 PreservedAnalyses PA; 1955 PA.preserve<LazyCallGraphAnalysis>(); 1956 return PA; 1957 } 1958