1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file implements interprocedural passes which walk the 11 /// call-graph deducing and/or propagating function attributes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/FunctionAttrs.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/SCCIterator.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallPtrSet.h" 22 #include "llvm/ADT/SmallSet.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/Analysis/AssumptionCache.h" 26 #include "llvm/Analysis/BasicAliasAnalysis.h" 27 #include "llvm/Analysis/CFG.h" 28 #include "llvm/Analysis/CGSCCPassManager.h" 29 #include "llvm/Analysis/CallGraph.h" 30 #include "llvm/Analysis/CallGraphSCCPass.h" 31 #include "llvm/Analysis/CaptureTracking.h" 32 #include "llvm/Analysis/LazyCallGraph.h" 33 #include "llvm/Analysis/MemoryLocation.h" 34 #include "llvm/Analysis/ValueTracking.h" 35 #include "llvm/IR/Argument.h" 36 #include "llvm/IR/Attributes.h" 37 #include "llvm/IR/BasicBlock.h" 38 #include "llvm/IR/Constant.h" 39 #include "llvm/IR/Constants.h" 40 #include "llvm/IR/Function.h" 41 #include "llvm/IR/InstIterator.h" 42 #include "llvm/IR/InstrTypes.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/Metadata.h" 47 #include "llvm/IR/ModuleSummaryIndex.h" 48 #include "llvm/IR/PassManager.h" 49 #include "llvm/IR/Type.h" 50 #include "llvm/IR/Use.h" 51 #include "llvm/IR/User.h" 52 #include "llvm/IR/Value.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Compiler.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/ErrorHandling.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Transforms/IPO.h" 60 #include "llvm/Transforms/Utils/Local.h" 61 #include <cassert> 62 #include <iterator> 63 #include <map> 64 #include <optional> 65 #include <vector> 66 67 using namespace llvm; 68 69 #define DEBUG_TYPE "function-attrs" 70 71 STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute"); 72 STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); 73 STATISTIC(NumReturned, "Number of arguments marked returned"); 74 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 75 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 76 STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly"); 77 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 78 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 79 STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef"); 80 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 81 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); 82 STATISTIC(NumNoFree, "Number of functions marked as nofree"); 83 STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); 84 STATISTIC(NumNoSync, "Number of functions marked as nosync"); 85 86 STATISTIC(NumThinLinkNoRecurse, 87 "Number of functions marked as norecurse during thinlink"); 88 STATISTIC(NumThinLinkNoUnwind, 89 "Number of functions marked as nounwind during thinlink"); 90 91 static cl::opt<bool> EnableNonnullArgPropagation( 92 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, 93 cl::desc("Try to propagate nonnull argument attributes from callsites to " 94 "caller functions.")); 95 96 static cl::opt<bool> DisableNoUnwindInference( 97 "disable-nounwind-inference", cl::Hidden, 98 cl::desc("Stop inferring nounwind attribute during function-attrs pass")); 99 100 static cl::opt<bool> DisableNoFreeInference( 101 "disable-nofree-inference", cl::Hidden, 102 cl::desc("Stop inferring nofree attribute during function-attrs pass")); 103 104 static cl::opt<bool> DisableThinLTOPropagation( 105 "disable-thinlto-funcattrs", cl::init(true), cl::Hidden, 106 cl::desc("Don't propagate function-attrs in thinLTO")); 107 108 namespace { 109 110 using SCCNodeSet = SmallSetVector<Function *, 8>; 111 112 } // end anonymous namespace 113 114 static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc, 115 ModRefInfo MR, AAResults &AAR) { 116 // Ignore accesses to known-invariant or local memory. 117 MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/true); 118 if (isNoModRef(MR)) 119 return; 120 121 const Value *UO = getUnderlyingObject(Loc.Ptr); 122 assert(!isa<AllocaInst>(UO) && 123 "Should have been handled by getModRefInfoMask()"); 124 if (isa<Argument>(UO)) { 125 ME |= MemoryEffects::argMemOnly(MR); 126 return; 127 } 128 129 // If it's not an identified object, it might be an argument. 130 if (!isIdentifiedObject(UO)) 131 ME |= MemoryEffects::argMemOnly(MR); 132 ME |= MemoryEffects(IRMemLocation::Other, MR); 133 } 134 135 static void addArgLocs(MemoryEffects &ME, const CallBase *Call, 136 ModRefInfo ArgMR, AAResults &AAR) { 137 for (const Value *Arg : Call->args()) { 138 if (!Arg->getType()->isPtrOrPtrVectorTy()) 139 continue; 140 141 addLocAccess(ME, 142 MemoryLocation::getBeforeOrAfter(Arg, Call->getAAMetadata()), 143 ArgMR, AAR); 144 } 145 } 146 147 /// Returns the memory access attribute for function F using AAR for AA results, 148 /// where SCCNodes is the current SCC. 149 /// 150 /// If ThisBody is true, this function may examine the function body and will 151 /// return a result pertaining to this copy of the function. If it is false, the 152 /// result will be based only on AA results for the function declaration; it 153 /// will be assumed that some other (perhaps less optimized) version of the 154 /// function may be selected at link time. 155 /// 156 /// The return value is split into two parts: Memory effects that always apply, 157 /// and additional memory effects that apply if any of the functions in the SCC 158 /// can access argmem. 159 static std::pair<MemoryEffects, MemoryEffects> 160 checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, 161 const SCCNodeSet &SCCNodes) { 162 MemoryEffects OrigME = AAR.getMemoryEffects(&F); 163 if (OrigME.doesNotAccessMemory()) 164 // Already perfect! 165 return {OrigME, MemoryEffects::none()}; 166 167 if (!ThisBody) 168 return {OrigME, MemoryEffects::none()}; 169 170 MemoryEffects ME = MemoryEffects::none(); 171 // Additional locations accessed if the SCC accesses argmem. 172 MemoryEffects RecursiveArgME = MemoryEffects::none(); 173 174 // Inalloca and preallocated arguments are always clobbered by the call. 175 if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || 176 F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) 177 ME |= MemoryEffects::argMemOnly(ModRefInfo::ModRef); 178 179 // Scan the function body for instructions that may read or write memory. 180 for (Instruction &I : instructions(F)) { 181 // Some instructions can be ignored even if they read or write memory. 182 // Detect these now, skipping to the next instruction if one is found. 183 if (auto *Call = dyn_cast<CallBase>(&I)) { 184 // We can optimistically ignore calls to functions in the same SCC, with 185 // two caveats: 186 // * Calls with operand bundles may have additional effects. 187 // * Argument memory accesses may imply additional effects depending on 188 // what the argument location is. 189 if (!Call->hasOperandBundles() && Call->getCalledFunction() && 190 SCCNodes.count(Call->getCalledFunction())) { 191 // Keep track of which additional locations are accessed if the SCC 192 // turns out to access argmem. 193 addArgLocs(RecursiveArgME, Call, ModRefInfo::ModRef, AAR); 194 continue; 195 } 196 197 MemoryEffects CallME = AAR.getMemoryEffects(Call); 198 199 // If the call doesn't access memory, we're done. 200 if (CallME.doesNotAccessMemory()) 201 continue; 202 203 // A pseudo probe call shouldn't change any function attribute since it 204 // doesn't translate to a real instruction. It comes with a memory access 205 // tag to prevent itself being removed by optimizations and not block 206 // other instructions being optimized. 207 if (isa<PseudoProbeInst>(I)) 208 continue; 209 210 ME |= CallME.getWithoutLoc(IRMemLocation::ArgMem); 211 212 // If the call accesses captured memory (currently part of "other") and 213 // an argument is captured (currently not tracked), then it may also 214 // access argument memory. 215 ModRefInfo OtherMR = CallME.getModRef(IRMemLocation::Other); 216 ME |= MemoryEffects::argMemOnly(OtherMR); 217 218 // Check whether all pointer arguments point to local memory, and 219 // ignore calls that only access local memory. 220 ModRefInfo ArgMR = CallME.getModRef(IRMemLocation::ArgMem); 221 if (ArgMR != ModRefInfo::NoModRef) 222 addArgLocs(ME, Call, ArgMR, AAR); 223 continue; 224 } 225 226 ModRefInfo MR = ModRefInfo::NoModRef; 227 if (I.mayWriteToMemory()) 228 MR |= ModRefInfo::Mod; 229 if (I.mayReadFromMemory()) 230 MR |= ModRefInfo::Ref; 231 if (MR == ModRefInfo::NoModRef) 232 continue; 233 234 std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(&I); 235 if (!Loc) { 236 // If no location is known, conservatively assume anything can be 237 // accessed. 238 ME |= MemoryEffects(MR); 239 continue; 240 } 241 242 // Volatile operations may access inaccessible memory. 243 if (I.isVolatile()) 244 ME |= MemoryEffects::inaccessibleMemOnly(MR); 245 246 addLocAccess(ME, *Loc, MR, AAR); 247 } 248 249 return {OrigME & ME, RecursiveArgME}; 250 } 251 252 MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F, 253 AAResults &AAR) { 254 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}).first; 255 } 256 257 /// Deduce readonly/readnone/writeonly attributes for the SCC. 258 template <typename AARGetterT> 259 static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, 260 SmallSet<Function *, 8> &Changed) { 261 MemoryEffects ME = MemoryEffects::none(); 262 MemoryEffects RecursiveArgME = MemoryEffects::none(); 263 for (Function *F : SCCNodes) { 264 // Call the callable parameter to look up AA results for this function. 265 AAResults &AAR = AARGetter(*F); 266 // Non-exact function definitions may not be selected at link time, and an 267 // alternative version that writes to memory may be selected. See the 268 // comment on GlobalValue::isDefinitionExact for more details. 269 auto [FnME, FnRecursiveArgME] = 270 checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes); 271 ME |= FnME; 272 RecursiveArgME |= FnRecursiveArgME; 273 // Reached bottom of the lattice, we will not be able to improve the result. 274 if (ME == MemoryEffects::unknown()) 275 return; 276 } 277 278 // If the SCC accesses argmem, add recursive accesses resulting from that. 279 ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem); 280 if (ArgMR != ModRefInfo::NoModRef) 281 ME |= RecursiveArgME & MemoryEffects(ArgMR); 282 283 for (Function *F : SCCNodes) { 284 MemoryEffects OldME = F->getMemoryEffects(); 285 MemoryEffects NewME = ME & OldME; 286 if (NewME != OldME) { 287 ++NumMemoryAttr; 288 F->setMemoryEffects(NewME); 289 // Remove conflicting writable attributes. 290 if (!isModSet(NewME.getModRef(IRMemLocation::ArgMem))) 291 for (Argument &A : F->args()) 292 A.removeAttr(Attribute::Writable); 293 Changed.insert(F); 294 } 295 } 296 } 297 298 // Compute definitive function attributes for a function taking into account 299 // prevailing definitions and linkage types 300 static FunctionSummary *calculatePrevailingSummary( 301 ValueInfo VI, 302 DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary, 303 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 304 IsPrevailing) { 305 306 if (CachedPrevailingSummary.count(VI)) 307 return CachedPrevailingSummary[VI]; 308 309 /// At this point, prevailing symbols have been resolved. The following leads 310 /// to returning a conservative result: 311 /// - Multiple instances with local linkage. Normally local linkage would be 312 /// unique per module 313 /// as the GUID includes the module path. We could have a guid alias if 314 /// there wasn't any distinguishing path when each file was compiled, but 315 /// that should be rare so we'll punt on those. 316 317 /// These next 2 cases should not happen and will assert: 318 /// - Multiple instances with external linkage. This should be caught in 319 /// symbol resolution 320 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our 321 /// knowledge meaning we have to go conservative. 322 323 /// Otherwise, we calculate attributes for a function as: 324 /// 1. If we have a local linkage, take its attributes. If there's somehow 325 /// multiple, bail and go conservative. 326 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is 327 /// prevailing, take its attributes. 328 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic 329 /// differences. However, if the prevailing copy is known it will be used 330 /// so take its attributes. If the prevailing copy is in a native file 331 /// all IR copies will be dead and propagation will go conservative. 332 /// 4. AvailableExternally summaries without a prevailing copy are known to 333 /// occur in a couple of circumstances: 334 /// a. An internal function gets imported due to its caller getting 335 /// imported, it becomes AvailableExternally but no prevailing 336 /// definition exists. Because it has to get imported along with its 337 /// caller the attributes will be captured by propagating on its 338 /// caller. 339 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally 340 /// definitions of explicitly instanced template declarations 341 /// for inlining which are ultimately dropped from the TU. Since this 342 /// is localized to the TU the attributes will have already made it to 343 /// the callers. 344 /// These are edge cases and already captured by their callers so we 345 /// ignore these for now. If they become relevant to optimize in the 346 /// future this can be revisited. 347 /// 5. Otherwise, go conservative. 348 349 CachedPrevailingSummary[VI] = nullptr; 350 FunctionSummary *Local = nullptr; 351 FunctionSummary *Prevailing = nullptr; 352 353 for (const auto &GVS : VI.getSummaryList()) { 354 if (!GVS->isLive()) 355 continue; 356 357 FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject()); 358 // Virtual and Unknown (e.g. indirect) calls require going conservative 359 if (!FS || FS->fflags().HasUnknownCall) 360 return nullptr; 361 362 const auto &Linkage = GVS->linkage(); 363 if (GlobalValue::isLocalLinkage(Linkage)) { 364 if (Local) { 365 LLVM_DEBUG( 366 dbgs() 367 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on " 368 "function " 369 << VI.name() << " from " << FS->modulePath() << ". Previous module " 370 << Local->modulePath() << "\n"); 371 return nullptr; 372 } 373 Local = FS; 374 } else if (GlobalValue::isExternalLinkage(Linkage)) { 375 assert(IsPrevailing(VI.getGUID(), GVS.get())); 376 Prevailing = FS; 377 break; 378 } else if (GlobalValue::isWeakODRLinkage(Linkage) || 379 GlobalValue::isLinkOnceODRLinkage(Linkage) || 380 GlobalValue::isWeakAnyLinkage(Linkage) || 381 GlobalValue::isLinkOnceAnyLinkage(Linkage)) { 382 if (IsPrevailing(VI.getGUID(), GVS.get())) { 383 Prevailing = FS; 384 break; 385 } 386 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) { 387 // TODO: Handle these cases if they become meaningful 388 continue; 389 } 390 } 391 392 if (Local) { 393 assert(!Prevailing); 394 CachedPrevailingSummary[VI] = Local; 395 } else if (Prevailing) { 396 assert(!Local); 397 CachedPrevailingSummary[VI] = Prevailing; 398 } 399 400 return CachedPrevailingSummary[VI]; 401 } 402 403 bool llvm::thinLTOPropagateFunctionAttrs( 404 ModuleSummaryIndex &Index, 405 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 406 IsPrevailing) { 407 // TODO: implement addNoAliasAttrs once 408 // there's more information about the return type in the summary 409 if (DisableThinLTOPropagation) 410 return false; 411 412 DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary; 413 bool Changed = false; 414 415 auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) { 416 // Assume we can propagate unless we discover otherwise 417 FunctionSummary::FFlags InferredFlags; 418 InferredFlags.NoRecurse = (SCCNodes.size() == 1); 419 InferredFlags.NoUnwind = true; 420 421 for (auto &V : SCCNodes) { 422 FunctionSummary *CallerSummary = 423 calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing); 424 425 // Function summaries can fail to contain information such as declarations 426 if (!CallerSummary) 427 return; 428 429 if (CallerSummary->fflags().MayThrow) 430 InferredFlags.NoUnwind = false; 431 432 for (const auto &Callee : CallerSummary->calls()) { 433 FunctionSummary *CalleeSummary = calculatePrevailingSummary( 434 Callee.first, CachedPrevailingSummary, IsPrevailing); 435 436 if (!CalleeSummary) 437 return; 438 439 if (!CalleeSummary->fflags().NoRecurse) 440 InferredFlags.NoRecurse = false; 441 442 if (!CalleeSummary->fflags().NoUnwind) 443 InferredFlags.NoUnwind = false; 444 445 if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse) 446 break; 447 } 448 } 449 450 if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) { 451 Changed = true; 452 for (auto &V : SCCNodes) { 453 if (InferredFlags.NoRecurse) { 454 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to " 455 << V.name() << "\n"); 456 ++NumThinLinkNoRecurse; 457 } 458 459 if (InferredFlags.NoUnwind) { 460 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to " 461 << V.name() << "\n"); 462 ++NumThinLinkNoUnwind; 463 } 464 465 for (const auto &S : V.getSummaryList()) { 466 if (auto *FS = dyn_cast<FunctionSummary>(S.get())) { 467 if (InferredFlags.NoRecurse) 468 FS->setNoRecurse(); 469 470 if (InferredFlags.NoUnwind) 471 FS->setNoUnwind(); 472 } 473 } 474 } 475 } 476 }; 477 478 // Call propagation functions on each SCC in the Index 479 for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd(); 480 ++I) { 481 std::vector<ValueInfo> Nodes(*I); 482 PropagateAttributes(Nodes); 483 } 484 return Changed; 485 } 486 487 namespace { 488 489 /// For a given pointer Argument, this retains a list of Arguments of functions 490 /// in the same SCC that the pointer data flows into. We use this to build an 491 /// SCC of the arguments. 492 struct ArgumentGraphNode { 493 Argument *Definition; 494 SmallVector<ArgumentGraphNode *, 4> Uses; 495 }; 496 497 class ArgumentGraph { 498 // We store pointers to ArgumentGraphNode objects, so it's important that 499 // that they not move around upon insert. 500 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; 501 502 ArgumentMapTy ArgumentMap; 503 504 // There is no root node for the argument graph, in fact: 505 // void f(int *x, int *y) { if (...) f(x, y); } 506 // is an example where the graph is disconnected. The SCCIterator requires a 507 // single entry point, so we maintain a fake ("synthetic") root node that 508 // uses every node. Because the graph is directed and nothing points into 509 // the root, it will not participate in any SCCs (except for its own). 510 ArgumentGraphNode SyntheticRoot; 511 512 public: 513 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 514 515 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; 516 517 iterator begin() { return SyntheticRoot.Uses.begin(); } 518 iterator end() { return SyntheticRoot.Uses.end(); } 519 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 520 521 ArgumentGraphNode *operator[](Argument *A) { 522 ArgumentGraphNode &Node = ArgumentMap[A]; 523 Node.Definition = A; 524 SyntheticRoot.Uses.push_back(&Node); 525 return &Node; 526 } 527 }; 528 529 /// This tracker checks whether callees are in the SCC, and if so it does not 530 /// consider that a capture, instead adding it to the "Uses" list and 531 /// continuing with the analysis. 532 struct ArgumentUsesTracker : public CaptureTracker { 533 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} 534 535 void tooManyUses() override { Captured = true; } 536 537 bool captured(const Use *U) override { 538 CallBase *CB = dyn_cast<CallBase>(U->getUser()); 539 if (!CB) { 540 Captured = true; 541 return true; 542 } 543 544 Function *F = CB->getCalledFunction(); 545 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 546 Captured = true; 547 return true; 548 } 549 550 assert(!CB->isCallee(U) && "callee operand reported captured?"); 551 const unsigned UseIndex = CB->getDataOperandNo(U); 552 if (UseIndex >= CB->arg_size()) { 553 // Data operand, but not a argument operand -- must be a bundle operand 554 assert(CB->hasOperandBundles() && "Must be!"); 555 556 // CaptureTracking told us that we're being captured by an operand bundle 557 // use. In this case it does not matter if the callee is within our SCC 558 // or not -- we've been captured in some unknown way, and we have to be 559 // conservative. 560 Captured = true; 561 return true; 562 } 563 564 if (UseIndex >= F->arg_size()) { 565 assert(F->isVarArg() && "More params than args in non-varargs call"); 566 Captured = true; 567 return true; 568 } 569 570 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 571 return false; 572 } 573 574 // True only if certainly captured (used outside our SCC). 575 bool Captured = false; 576 577 // Uses within our SCC. 578 SmallVector<Argument *, 4> Uses; 579 580 const SCCNodeSet &SCCNodes; 581 }; 582 583 } // end anonymous namespace 584 585 namespace llvm { 586 587 template <> struct GraphTraits<ArgumentGraphNode *> { 588 using NodeRef = ArgumentGraphNode *; 589 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; 590 591 static NodeRef getEntryNode(NodeRef A) { return A; } 592 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } 593 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } 594 }; 595 596 template <> 597 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 598 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } 599 600 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 601 return AG->begin(); 602 } 603 604 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 605 }; 606 607 } // end namespace llvm 608 609 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 610 static Attribute::AttrKind 611 determinePointerAccessAttrs(Argument *A, 612 const SmallPtrSet<Argument *, 8> &SCCNodes) { 613 SmallVector<Use *, 32> Worklist; 614 SmallPtrSet<Use *, 32> Visited; 615 616 // inalloca arguments are always clobbered by the call. 617 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) 618 return Attribute::None; 619 620 bool IsRead = false; 621 bool IsWrite = false; 622 623 for (Use &U : A->uses()) { 624 Visited.insert(&U); 625 Worklist.push_back(&U); 626 } 627 628 while (!Worklist.empty()) { 629 if (IsWrite && IsRead) 630 // No point in searching further.. 631 return Attribute::None; 632 633 Use *U = Worklist.pop_back_val(); 634 Instruction *I = cast<Instruction>(U->getUser()); 635 636 switch (I->getOpcode()) { 637 case Instruction::BitCast: 638 case Instruction::GetElementPtr: 639 case Instruction::PHI: 640 case Instruction::Select: 641 case Instruction::AddrSpaceCast: 642 // The original value is not read/written via this if the new value isn't. 643 for (Use &UU : I->uses()) 644 if (Visited.insert(&UU).second) 645 Worklist.push_back(&UU); 646 break; 647 648 case Instruction::Call: 649 case Instruction::Invoke: { 650 CallBase &CB = cast<CallBase>(*I); 651 if (CB.isCallee(U)) { 652 IsRead = true; 653 // Note that indirect calls do not capture, see comment in 654 // CaptureTracking for context 655 continue; 656 } 657 658 // Given we've explictily handled the callee operand above, what's left 659 // must be a data operand (e.g. argument or operand bundle) 660 const unsigned UseIndex = CB.getDataOperandNo(U); 661 662 // Some intrinsics (for instance ptrmask) do not capture their results, 663 // but return results thas alias their pointer argument, and thus should 664 // be handled like GEP or addrspacecast above. 665 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 666 &CB, /*MustPreserveNullness=*/false)) { 667 for (Use &UU : CB.uses()) 668 if (Visited.insert(&UU).second) 669 Worklist.push_back(&UU); 670 } else if (!CB.doesNotCapture(UseIndex)) { 671 if (!CB.onlyReadsMemory()) 672 // If the callee can save a copy into other memory, then simply 673 // scanning uses of the call is insufficient. We have no way 674 // of tracking copies of the pointer through memory to see 675 // if a reloaded copy is written to, thus we must give up. 676 return Attribute::None; 677 // Push users for processing once we finish this one 678 if (!I->getType()->isVoidTy()) 679 for (Use &UU : I->uses()) 680 if (Visited.insert(&UU).second) 681 Worklist.push_back(&UU); 682 } 683 684 ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(IRMemLocation::ArgMem); 685 if (isNoModRef(ArgMR)) 686 continue; 687 688 if (Function *F = CB.getCalledFunction()) 689 if (CB.isArgOperand(U) && UseIndex < F->arg_size() && 690 SCCNodes.count(F->getArg(UseIndex))) 691 // This is an argument which is part of the speculative SCC. Note 692 // that only operands corresponding to formal arguments of the callee 693 // can participate in the speculation. 694 break; 695 696 // The accessors used on call site here do the right thing for calls and 697 // invokes with operand bundles. 698 if (CB.doesNotAccessMemory(UseIndex)) { 699 /* nop */ 700 } else if (!isModSet(ArgMR) || CB.onlyReadsMemory(UseIndex)) { 701 IsRead = true; 702 } else if (!isRefSet(ArgMR) || 703 CB.dataOperandHasImpliedAttr(UseIndex, Attribute::WriteOnly)) { 704 IsWrite = true; 705 } else { 706 return Attribute::None; 707 } 708 break; 709 } 710 711 case Instruction::Load: 712 // A volatile load has side effects beyond what readonly can be relied 713 // upon. 714 if (cast<LoadInst>(I)->isVolatile()) 715 return Attribute::None; 716 717 IsRead = true; 718 break; 719 720 case Instruction::Store: 721 if (cast<StoreInst>(I)->getValueOperand() == *U) 722 // untrackable capture 723 return Attribute::None; 724 725 // A volatile store has side effects beyond what writeonly can be relied 726 // upon. 727 if (cast<StoreInst>(I)->isVolatile()) 728 return Attribute::None; 729 730 IsWrite = true; 731 break; 732 733 case Instruction::ICmp: 734 case Instruction::Ret: 735 break; 736 737 default: 738 return Attribute::None; 739 } 740 } 741 742 if (IsWrite && IsRead) 743 return Attribute::None; 744 else if (IsRead) 745 return Attribute::ReadOnly; 746 else if (IsWrite) 747 return Attribute::WriteOnly; 748 else 749 return Attribute::ReadNone; 750 } 751 752 /// Deduce returned attributes for the SCC. 753 static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes, 754 SmallSet<Function *, 8> &Changed) { 755 // Check each function in turn, determining if an argument is always returned. 756 for (Function *F : SCCNodes) { 757 // We can infer and propagate function attributes only when we know that the 758 // definition we'll get at link time is *exactly* the definition we see now. 759 // For more details, see GlobalValue::mayBeDerefined. 760 if (!F->hasExactDefinition()) 761 continue; 762 763 if (F->getReturnType()->isVoidTy()) 764 continue; 765 766 // There is nothing to do if an argument is already marked as 'returned'. 767 if (F->getAttributes().hasAttrSomewhere(Attribute::Returned)) 768 continue; 769 770 auto FindRetArg = [&]() -> Argument * { 771 Argument *RetArg = nullptr; 772 for (BasicBlock &BB : *F) 773 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 774 // Note that stripPointerCasts should look through functions with 775 // returned arguments. 776 auto *RetVal = 777 dyn_cast<Argument>(Ret->getReturnValue()->stripPointerCasts()); 778 if (!RetVal || RetVal->getType() != F->getReturnType()) 779 return nullptr; 780 781 if (!RetArg) 782 RetArg = RetVal; 783 else if (RetArg != RetVal) 784 return nullptr; 785 } 786 787 return RetArg; 788 }; 789 790 if (Argument *RetArg = FindRetArg()) { 791 RetArg->addAttr(Attribute::Returned); 792 ++NumReturned; 793 Changed.insert(F); 794 } 795 } 796 } 797 798 /// If a callsite has arguments that are also arguments to the parent function, 799 /// try to propagate attributes from the callsite's arguments to the parent's 800 /// arguments. This may be important because inlining can cause information loss 801 /// when attribute knowledge disappears with the inlined call. 802 static bool addArgumentAttrsFromCallsites(Function &F) { 803 if (!EnableNonnullArgPropagation) 804 return false; 805 806 bool Changed = false; 807 808 // For an argument attribute to transfer from a callsite to the parent, the 809 // call must be guaranteed to execute every time the parent is called. 810 // Conservatively, just check for calls in the entry block that are guaranteed 811 // to execute. 812 // TODO: This could be enhanced by testing if the callsite post-dominates the 813 // entry block or by doing simple forward walks or backward walks to the 814 // callsite. 815 BasicBlock &Entry = F.getEntryBlock(); 816 for (Instruction &I : Entry) { 817 if (auto *CB = dyn_cast<CallBase>(&I)) { 818 if (auto *CalledFunc = CB->getCalledFunction()) { 819 for (auto &CSArg : CalledFunc->args()) { 820 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) 821 continue; 822 823 // If the non-null callsite argument operand is an argument to 'F' 824 // (the caller) and the call is guaranteed to execute, then the value 825 // must be non-null throughout 'F'. 826 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); 827 if (FArg && !FArg->hasNonNullAttr()) { 828 FArg->addAttr(Attribute::NonNull); 829 Changed = true; 830 } 831 } 832 } 833 } 834 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 835 break; 836 } 837 838 return Changed; 839 } 840 841 static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { 842 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone || 843 R == Attribute::WriteOnly) 844 && "Must be an access attribute."); 845 assert(A && "Argument must not be null."); 846 847 // If the argument already has the attribute, nothing needs to be done. 848 if (A->hasAttribute(R)) 849 return false; 850 851 // Otherwise, remove potentially conflicting attribute, add the new one, 852 // and update statistics. 853 A->removeAttr(Attribute::WriteOnly); 854 A->removeAttr(Attribute::ReadOnly); 855 A->removeAttr(Attribute::ReadNone); 856 // Remove conflicting writable attribute. 857 if (R == Attribute::ReadNone || R == Attribute::ReadOnly) 858 A->removeAttr(Attribute::Writable); 859 A->addAttr(R); 860 if (R == Attribute::ReadOnly) 861 ++NumReadOnlyArg; 862 else if (R == Attribute::WriteOnly) 863 ++NumWriteOnlyArg; 864 else 865 ++NumReadNoneArg; 866 return true; 867 } 868 869 /// Deduce nocapture attributes for the SCC. 870 static void addArgumentAttrs(const SCCNodeSet &SCCNodes, 871 SmallSet<Function *, 8> &Changed) { 872 ArgumentGraph AG; 873 874 // Check each function in turn, determining which pointer arguments are not 875 // captured. 876 for (Function *F : SCCNodes) { 877 // We can infer and propagate function attributes only when we know that the 878 // definition we'll get at link time is *exactly* the definition we see now. 879 // For more details, see GlobalValue::mayBeDerefined. 880 if (!F->hasExactDefinition()) 881 continue; 882 883 if (addArgumentAttrsFromCallsites(*F)) 884 Changed.insert(F); 885 886 // Functions that are readonly (or readnone) and nounwind and don't return 887 // a value can't capture arguments. Don't analyze them. 888 if (F->onlyReadsMemory() && F->doesNotThrow() && 889 F->getReturnType()->isVoidTy()) { 890 for (Argument &A : F->args()) { 891 if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) { 892 A.addAttr(Attribute::NoCapture); 893 ++NumNoCapture; 894 Changed.insert(F); 895 } 896 } 897 continue; 898 } 899 900 for (Argument &A : F->args()) { 901 if (!A.getType()->isPointerTy()) 902 continue; 903 bool HasNonLocalUses = false; 904 if (!A.hasNoCaptureAttr()) { 905 ArgumentUsesTracker Tracker(SCCNodes); 906 PointerMayBeCaptured(&A, &Tracker); 907 if (!Tracker.Captured) { 908 if (Tracker.Uses.empty()) { 909 // If it's trivially not captured, mark it nocapture now. 910 A.addAttr(Attribute::NoCapture); 911 ++NumNoCapture; 912 Changed.insert(F); 913 } else { 914 // If it's not trivially captured and not trivially not captured, 915 // then it must be calling into another function in our SCC. Save 916 // its particulars for Argument-SCC analysis later. 917 ArgumentGraphNode *Node = AG[&A]; 918 for (Argument *Use : Tracker.Uses) { 919 Node->Uses.push_back(AG[Use]); 920 if (Use != &A) 921 HasNonLocalUses = true; 922 } 923 } 924 } 925 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 926 } 927 if (!HasNonLocalUses && !A.onlyReadsMemory()) { 928 // Can we determine that it's readonly/readnone/writeonly without doing 929 // an SCC? Note that we don't allow any calls at all here, or else our 930 // result will be dependent on the iteration order through the 931 // functions in the SCC. 932 SmallPtrSet<Argument *, 8> Self; 933 Self.insert(&A); 934 Attribute::AttrKind R = determinePointerAccessAttrs(&A, Self); 935 if (R != Attribute::None) 936 if (addAccessAttr(&A, R)) 937 Changed.insert(F); 938 } 939 } 940 } 941 942 // The graph we've collected is partial because we stopped scanning for 943 // argument uses once we solved the argument trivially. These partial nodes 944 // show up as ArgumentGraphNode objects with an empty Uses list, and for 945 // these nodes the final decision about whether they capture has already been 946 // made. If the definition doesn't have a 'nocapture' attribute by now, it 947 // captures. 948 949 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 950 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 951 if (ArgumentSCC.size() == 1) { 952 if (!ArgumentSCC[0]->Definition) 953 continue; // synthetic root node 954 955 // eg. "void f(int* x) { if (...) f(x); }" 956 if (ArgumentSCC[0]->Uses.size() == 1 && 957 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 958 Argument *A = ArgumentSCC[0]->Definition; 959 A->addAttr(Attribute::NoCapture); 960 ++NumNoCapture; 961 Changed.insert(A->getParent()); 962 963 // Infer the access attributes given the new nocapture one 964 SmallPtrSet<Argument *, 8> Self; 965 Self.insert(&*A); 966 Attribute::AttrKind R = determinePointerAccessAttrs(&*A, Self); 967 if (R != Attribute::None) 968 addAccessAttr(A, R); 969 } 970 continue; 971 } 972 973 bool SCCCaptured = false; 974 for (ArgumentGraphNode *Node : ArgumentSCC) { 975 if (Node->Uses.empty() && !Node->Definition->hasNoCaptureAttr()) { 976 SCCCaptured = true; 977 break; 978 } 979 } 980 if (SCCCaptured) 981 continue; 982 983 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 984 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 985 // quickly looking up whether a given Argument is in this ArgumentSCC. 986 for (ArgumentGraphNode *I : ArgumentSCC) { 987 ArgumentSCCNodes.insert(I->Definition); 988 } 989 990 for (ArgumentGraphNode *N : ArgumentSCC) { 991 for (ArgumentGraphNode *Use : N->Uses) { 992 Argument *A = Use->Definition; 993 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) 994 continue; 995 SCCCaptured = true; 996 break; 997 } 998 if (SCCCaptured) 999 break; 1000 } 1001 if (SCCCaptured) 1002 continue; 1003 1004 for (ArgumentGraphNode *N : ArgumentSCC) { 1005 Argument *A = N->Definition; 1006 A->addAttr(Attribute::NoCapture); 1007 ++NumNoCapture; 1008 Changed.insert(A->getParent()); 1009 } 1010 1011 // We also want to compute readonly/readnone/writeonly. With a small number 1012 // of false negatives, we can assume that any pointer which is captured 1013 // isn't going to be provably readonly or readnone, since by definition 1014 // we can't analyze all uses of a captured pointer. 1015 // 1016 // The false negatives happen when the pointer is captured by a function 1017 // that promises readonly/readnone behaviour on the pointer, then the 1018 // pointer's lifetime ends before anything that writes to arbitrary memory. 1019 // Also, a readonly/readnone pointer may be returned, but returning a 1020 // pointer is capturing it. 1021 1022 auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) { 1023 if (A == B) 1024 return A; 1025 if (A == Attribute::ReadNone) 1026 return B; 1027 if (B == Attribute::ReadNone) 1028 return A; 1029 return Attribute::None; 1030 }; 1031 1032 Attribute::AttrKind AccessAttr = Attribute::ReadNone; 1033 for (ArgumentGraphNode *N : ArgumentSCC) { 1034 Argument *A = N->Definition; 1035 Attribute::AttrKind K = determinePointerAccessAttrs(A, ArgumentSCCNodes); 1036 AccessAttr = meetAccessAttr(AccessAttr, K); 1037 if (AccessAttr == Attribute::None) 1038 break; 1039 } 1040 1041 if (AccessAttr != Attribute::None) { 1042 for (ArgumentGraphNode *N : ArgumentSCC) { 1043 Argument *A = N->Definition; 1044 if (addAccessAttr(A, AccessAttr)) 1045 Changed.insert(A->getParent()); 1046 } 1047 } 1048 } 1049 } 1050 1051 /// Tests whether a function is "malloc-like". 1052 /// 1053 /// A function is "malloc-like" if it returns either null or a pointer that 1054 /// doesn't alias any other pointer visible to the caller. 1055 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 1056 SmallSetVector<Value *, 8> FlowsToReturn; 1057 for (BasicBlock &BB : *F) 1058 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1059 FlowsToReturn.insert(Ret->getReturnValue()); 1060 1061 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1062 Value *RetVal = FlowsToReturn[i]; 1063 1064 if (Constant *C = dyn_cast<Constant>(RetVal)) { 1065 if (!C->isNullValue() && !isa<UndefValue>(C)) 1066 return false; 1067 1068 continue; 1069 } 1070 1071 if (isa<Argument>(RetVal)) 1072 return false; 1073 1074 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 1075 switch (RVI->getOpcode()) { 1076 // Extend the analysis by looking upwards. 1077 case Instruction::BitCast: 1078 case Instruction::GetElementPtr: 1079 case Instruction::AddrSpaceCast: 1080 FlowsToReturn.insert(RVI->getOperand(0)); 1081 continue; 1082 case Instruction::Select: { 1083 SelectInst *SI = cast<SelectInst>(RVI); 1084 FlowsToReturn.insert(SI->getTrueValue()); 1085 FlowsToReturn.insert(SI->getFalseValue()); 1086 continue; 1087 } 1088 case Instruction::PHI: { 1089 PHINode *PN = cast<PHINode>(RVI); 1090 for (Value *IncValue : PN->incoming_values()) 1091 FlowsToReturn.insert(IncValue); 1092 continue; 1093 } 1094 1095 // Check whether the pointer came from an allocation. 1096 case Instruction::Alloca: 1097 break; 1098 case Instruction::Call: 1099 case Instruction::Invoke: { 1100 CallBase &CB = cast<CallBase>(*RVI); 1101 if (CB.hasRetAttr(Attribute::NoAlias)) 1102 break; 1103 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) 1104 break; 1105 [[fallthrough]]; 1106 } 1107 default: 1108 return false; // Did not come from an allocation. 1109 } 1110 1111 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) 1112 return false; 1113 } 1114 1115 return true; 1116 } 1117 1118 /// Deduce noalias attributes for the SCC. 1119 static void addNoAliasAttrs(const SCCNodeSet &SCCNodes, 1120 SmallSet<Function *, 8> &Changed) { 1121 // Check each function in turn, determining which functions return noalias 1122 // pointers. 1123 for (Function *F : SCCNodes) { 1124 // Already noalias. 1125 if (F->returnDoesNotAlias()) 1126 continue; 1127 1128 // We can infer and propagate function attributes only when we know that the 1129 // definition we'll get at link time is *exactly* the definition we see now. 1130 // For more details, see GlobalValue::mayBeDerefined. 1131 if (!F->hasExactDefinition()) 1132 return; 1133 1134 // We annotate noalias return values, which are only applicable to 1135 // pointer types. 1136 if (!F->getReturnType()->isPointerTy()) 1137 continue; 1138 1139 if (!isFunctionMallocLike(F, SCCNodes)) 1140 return; 1141 } 1142 1143 for (Function *F : SCCNodes) { 1144 if (F->returnDoesNotAlias() || 1145 !F->getReturnType()->isPointerTy()) 1146 continue; 1147 1148 F->setReturnDoesNotAlias(); 1149 ++NumNoAlias; 1150 Changed.insert(F); 1151 } 1152 } 1153 1154 /// Tests whether this function is known to not return null. 1155 /// 1156 /// Requires that the function returns a pointer. 1157 /// 1158 /// Returns true if it believes the function will not return a null, and sets 1159 /// \p Speculative based on whether the returned conclusion is a speculative 1160 /// conclusion due to SCC calls. 1161 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 1162 bool &Speculative) { 1163 assert(F->getReturnType()->isPointerTy() && 1164 "nonnull only meaningful on pointer types"); 1165 Speculative = false; 1166 1167 SmallSetVector<Value *, 8> FlowsToReturn; 1168 for (BasicBlock &BB : *F) 1169 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1170 FlowsToReturn.insert(Ret->getReturnValue()); 1171 1172 auto &DL = F->getParent()->getDataLayout(); 1173 1174 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1175 Value *RetVal = FlowsToReturn[i]; 1176 1177 // If this value is locally known to be non-null, we're good 1178 if (isKnownNonZero(RetVal, DL)) 1179 continue; 1180 1181 // Otherwise, we need to look upwards since we can't make any local 1182 // conclusions. 1183 Instruction *RVI = dyn_cast<Instruction>(RetVal); 1184 if (!RVI) 1185 return false; 1186 switch (RVI->getOpcode()) { 1187 // Extend the analysis by looking upwards. 1188 case Instruction::BitCast: 1189 case Instruction::GetElementPtr: 1190 case Instruction::AddrSpaceCast: 1191 FlowsToReturn.insert(RVI->getOperand(0)); 1192 continue; 1193 case Instruction::Select: { 1194 SelectInst *SI = cast<SelectInst>(RVI); 1195 FlowsToReturn.insert(SI->getTrueValue()); 1196 FlowsToReturn.insert(SI->getFalseValue()); 1197 continue; 1198 } 1199 case Instruction::PHI: { 1200 PHINode *PN = cast<PHINode>(RVI); 1201 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1202 FlowsToReturn.insert(PN->getIncomingValue(i)); 1203 continue; 1204 } 1205 case Instruction::Call: 1206 case Instruction::Invoke: { 1207 CallBase &CB = cast<CallBase>(*RVI); 1208 Function *Callee = CB.getCalledFunction(); 1209 // A call to a node within the SCC is assumed to return null until 1210 // proven otherwise 1211 if (Callee && SCCNodes.count(Callee)) { 1212 Speculative = true; 1213 continue; 1214 } 1215 return false; 1216 } 1217 default: 1218 return false; // Unknown source, may be null 1219 }; 1220 llvm_unreachable("should have either continued or returned"); 1221 } 1222 1223 return true; 1224 } 1225 1226 /// Deduce nonnull attributes for the SCC. 1227 static void addNonNullAttrs(const SCCNodeSet &SCCNodes, 1228 SmallSet<Function *, 8> &Changed) { 1229 // Speculative that all functions in the SCC return only nonnull 1230 // pointers. We may refute this as we analyze functions. 1231 bool SCCReturnsNonNull = true; 1232 1233 // Check each function in turn, determining which functions return nonnull 1234 // pointers. 1235 for (Function *F : SCCNodes) { 1236 // Already nonnull. 1237 if (F->getAttributes().hasRetAttr(Attribute::NonNull)) 1238 continue; 1239 1240 // We can infer and propagate function attributes only when we know that the 1241 // definition we'll get at link time is *exactly* the definition we see now. 1242 // For more details, see GlobalValue::mayBeDerefined. 1243 if (!F->hasExactDefinition()) 1244 return; 1245 1246 // We annotate nonnull return values, which are only applicable to 1247 // pointer types. 1248 if (!F->getReturnType()->isPointerTy()) 1249 continue; 1250 1251 bool Speculative = false; 1252 if (isReturnNonNull(F, SCCNodes, Speculative)) { 1253 if (!Speculative) { 1254 // Mark the function eagerly since we may discover a function 1255 // which prevents us from speculating about the entire SCC 1256 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() 1257 << " as nonnull\n"); 1258 F->addRetAttr(Attribute::NonNull); 1259 ++NumNonNullReturn; 1260 Changed.insert(F); 1261 } 1262 continue; 1263 } 1264 // At least one function returns something which could be null, can't 1265 // speculate any more. 1266 SCCReturnsNonNull = false; 1267 } 1268 1269 if (SCCReturnsNonNull) { 1270 for (Function *F : SCCNodes) { 1271 if (F->getAttributes().hasRetAttr(Attribute::NonNull) || 1272 !F->getReturnType()->isPointerTy()) 1273 continue; 1274 1275 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 1276 F->addRetAttr(Attribute::NonNull); 1277 ++NumNonNullReturn; 1278 Changed.insert(F); 1279 } 1280 } 1281 } 1282 1283 /// Deduce noundef attributes for the SCC. 1284 static void addNoUndefAttrs(const SCCNodeSet &SCCNodes, 1285 SmallSet<Function *, 8> &Changed) { 1286 // Check each function in turn, determining which functions return noundef 1287 // values. 1288 for (Function *F : SCCNodes) { 1289 // Already noundef. 1290 if (F->getAttributes().hasRetAttr(Attribute::NoUndef)) 1291 continue; 1292 1293 // We can infer and propagate function attributes only when we know that the 1294 // definition we'll get at link time is *exactly* the definition we see now. 1295 // For more details, see GlobalValue::mayBeDerefined. 1296 if (!F->hasExactDefinition()) 1297 return; 1298 1299 // MemorySanitizer assumes that the definition and declaration of a 1300 // function will be consistent. A function with sanitize_memory attribute 1301 // should be skipped from inference. 1302 if (F->hasFnAttribute(Attribute::SanitizeMemory)) 1303 continue; 1304 1305 if (F->getReturnType()->isVoidTy()) 1306 continue; 1307 1308 if (all_of(*F, [](BasicBlock &BB) { 1309 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 1310 // TODO: perform context-sensitive analysis? 1311 return isGuaranteedNotToBeUndefOrPoison(Ret->getReturnValue()); 1312 } 1313 return true; 1314 })) { 1315 F->addRetAttr(Attribute::NoUndef); 1316 ++NumNoUndefReturn; 1317 Changed.insert(F); 1318 } 1319 } 1320 } 1321 1322 namespace { 1323 1324 /// Collects a set of attribute inference requests and performs them all in one 1325 /// go on a single SCC Node. Inference involves scanning function bodies 1326 /// looking for instructions that violate attribute assumptions. 1327 /// As soon as all the bodies are fine we are free to set the attribute. 1328 /// Customization of inference for individual attributes is performed by 1329 /// providing a handful of predicates for each attribute. 1330 class AttributeInferer { 1331 public: 1332 /// Describes a request for inference of a single attribute. 1333 struct InferenceDescriptor { 1334 1335 /// Returns true if this function does not have to be handled. 1336 /// General intent for this predicate is to provide an optimization 1337 /// for functions that do not need this attribute inference at all 1338 /// (say, for functions that already have the attribute). 1339 std::function<bool(const Function &)> SkipFunction; 1340 1341 /// Returns true if this instruction violates attribute assumptions. 1342 std::function<bool(Instruction &)> InstrBreaksAttribute; 1343 1344 /// Sets the inferred attribute for this function. 1345 std::function<void(Function &)> SetAttribute; 1346 1347 /// Attribute we derive. 1348 Attribute::AttrKind AKind; 1349 1350 /// If true, only "exact" definitions can be used to infer this attribute. 1351 /// See GlobalValue::isDefinitionExact. 1352 bool RequiresExactDefinition; 1353 1354 InferenceDescriptor(Attribute::AttrKind AK, 1355 std::function<bool(const Function &)> SkipFunc, 1356 std::function<bool(Instruction &)> InstrScan, 1357 std::function<void(Function &)> SetAttr, 1358 bool ReqExactDef) 1359 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), 1360 SetAttribute(SetAttr), AKind(AK), 1361 RequiresExactDefinition(ReqExactDef) {} 1362 }; 1363 1364 private: 1365 SmallVector<InferenceDescriptor, 4> InferenceDescriptors; 1366 1367 public: 1368 void registerAttrInference(InferenceDescriptor AttrInference) { 1369 InferenceDescriptors.push_back(AttrInference); 1370 } 1371 1372 void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed); 1373 }; 1374 1375 /// Perform all the requested attribute inference actions according to the 1376 /// attribute predicates stored before. 1377 void AttributeInferer::run(const SCCNodeSet &SCCNodes, 1378 SmallSet<Function *, 8> &Changed) { 1379 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; 1380 // Go through all the functions in SCC and check corresponding attribute 1381 // assumptions for each of them. Attributes that are invalid for this SCC 1382 // will be removed from InferInSCC. 1383 for (Function *F : SCCNodes) { 1384 1385 // No attributes whose assumptions are still valid - done. 1386 if (InferInSCC.empty()) 1387 return; 1388 1389 // Check if our attributes ever need scanning/can be scanned. 1390 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { 1391 if (ID.SkipFunction(*F)) 1392 return false; 1393 1394 // Remove from further inference (invalidate) when visiting a function 1395 // that has no instructions to scan/has an unsuitable definition. 1396 return F->isDeclaration() || 1397 (ID.RequiresExactDefinition && !F->hasExactDefinition()); 1398 }); 1399 1400 // For each attribute still in InferInSCC that doesn't explicitly skip F, 1401 // set up the F instructions scan to verify assumptions of the attribute. 1402 SmallVector<InferenceDescriptor, 4> InferInThisFunc; 1403 llvm::copy_if( 1404 InferInSCC, std::back_inserter(InferInThisFunc), 1405 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); 1406 1407 if (InferInThisFunc.empty()) 1408 continue; 1409 1410 // Start instruction scan. 1411 for (Instruction &I : instructions(*F)) { 1412 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { 1413 if (!ID.InstrBreaksAttribute(I)) 1414 return false; 1415 // Remove attribute from further inference on any other functions 1416 // because attribute assumptions have just been violated. 1417 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { 1418 return D.AKind == ID.AKind; 1419 }); 1420 // Remove attribute from the rest of current instruction scan. 1421 return true; 1422 }); 1423 1424 if (InferInThisFunc.empty()) 1425 break; 1426 } 1427 } 1428 1429 if (InferInSCC.empty()) 1430 return; 1431 1432 for (Function *F : SCCNodes) 1433 // At this point InferInSCC contains only functions that were either: 1434 // - explicitly skipped from scan/inference, or 1435 // - verified to have no instructions that break attribute assumptions. 1436 // Hence we just go and force the attribute for all non-skipped functions. 1437 for (auto &ID : InferInSCC) { 1438 if (ID.SkipFunction(*F)) 1439 continue; 1440 Changed.insert(F); 1441 ID.SetAttribute(*F); 1442 } 1443 } 1444 1445 struct SCCNodesResult { 1446 SCCNodeSet SCCNodes; 1447 bool HasUnknownCall; 1448 }; 1449 1450 } // end anonymous namespace 1451 1452 /// Helper for non-Convergent inference predicate InstrBreaksAttribute. 1453 static bool InstrBreaksNonConvergent(Instruction &I, 1454 const SCCNodeSet &SCCNodes) { 1455 const CallBase *CB = dyn_cast<CallBase>(&I); 1456 // Breaks non-convergent assumption if CS is a convergent call to a function 1457 // not in the SCC. 1458 return CB && CB->isConvergent() && 1459 !SCCNodes.contains(CB->getCalledFunction()); 1460 } 1461 1462 /// Helper for NoUnwind inference predicate InstrBreaksAttribute. 1463 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { 1464 if (!I.mayThrow(/* IncludePhaseOneUnwind */ true)) 1465 return false; 1466 if (const auto *CI = dyn_cast<CallInst>(&I)) { 1467 if (Function *Callee = CI->getCalledFunction()) { 1468 // I is a may-throw call to a function inside our SCC. This doesn't 1469 // invalidate our current working assumption that the SCC is no-throw; we 1470 // just have to scan that other function. 1471 if (SCCNodes.contains(Callee)) 1472 return false; 1473 } 1474 } 1475 return true; 1476 } 1477 1478 /// Helper for NoFree inference predicate InstrBreaksAttribute. 1479 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { 1480 CallBase *CB = dyn_cast<CallBase>(&I); 1481 if (!CB) 1482 return false; 1483 1484 if (CB->hasFnAttr(Attribute::NoFree)) 1485 return false; 1486 1487 // Speculatively assume in SCC. 1488 if (Function *Callee = CB->getCalledFunction()) 1489 if (SCCNodes.contains(Callee)) 1490 return false; 1491 1492 return true; 1493 } 1494 1495 // Return true if this is an atomic which has an ordering stronger than 1496 // unordered. Note that this is different than the predicate we use in 1497 // Attributor. Here we chose to be conservative and consider monotonic 1498 // operations potentially synchronizing. We generally don't do much with 1499 // monotonic operations, so this is simply risk reduction. 1500 static bool isOrderedAtomic(Instruction *I) { 1501 if (!I->isAtomic()) 1502 return false; 1503 1504 if (auto *FI = dyn_cast<FenceInst>(I)) 1505 // All legal orderings for fence are stronger than monotonic. 1506 return FI->getSyncScopeID() != SyncScope::SingleThread; 1507 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) 1508 return true; 1509 else if (auto *SI = dyn_cast<StoreInst>(I)) 1510 return !SI->isUnordered(); 1511 else if (auto *LI = dyn_cast<LoadInst>(I)) 1512 return !LI->isUnordered(); 1513 else { 1514 llvm_unreachable("unknown atomic instruction?"); 1515 } 1516 } 1517 1518 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { 1519 // Volatile may synchronize 1520 if (I.isVolatile()) 1521 return true; 1522 1523 // An ordered atomic may synchronize. (See comment about on monotonic.) 1524 if (isOrderedAtomic(&I)) 1525 return true; 1526 1527 auto *CB = dyn_cast<CallBase>(&I); 1528 if (!CB) 1529 // Non call site cases covered by the two checks above 1530 return false; 1531 1532 if (CB->hasFnAttr(Attribute::NoSync)) 1533 return false; 1534 1535 // Non volatile memset/memcpy/memmoves are nosync 1536 // NOTE: Only intrinsics with volatile flags should be handled here. All 1537 // others should be marked in Intrinsics.td. 1538 if (auto *MI = dyn_cast<MemIntrinsic>(&I)) 1539 if (!MI->isVolatile()) 1540 return false; 1541 1542 // Speculatively assume in SCC. 1543 if (Function *Callee = CB->getCalledFunction()) 1544 if (SCCNodes.contains(Callee)) 1545 return false; 1546 1547 return true; 1548 } 1549 1550 /// Attempt to remove convergent function attribute when possible. 1551 /// 1552 /// Returns true if any changes to function attributes were made. 1553 static void inferConvergent(const SCCNodeSet &SCCNodes, 1554 SmallSet<Function *, 8> &Changed) { 1555 AttributeInferer AI; 1556 1557 // Request to remove the convergent attribute from all functions in the SCC 1558 // if every callsite within the SCC is not convergent (except for calls 1559 // to functions within the SCC). 1560 // Note: Removal of the attr from the callsites will happen in 1561 // InstCombineCalls separately. 1562 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1563 Attribute::Convergent, 1564 // Skip non-convergent functions. 1565 [](const Function &F) { return !F.isConvergent(); }, 1566 // Instructions that break non-convergent assumption. 1567 [SCCNodes](Instruction &I) { 1568 return InstrBreaksNonConvergent(I, SCCNodes); 1569 }, 1570 [](Function &F) { 1571 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() 1572 << "\n"); 1573 F.setNotConvergent(); 1574 }, 1575 /* RequiresExactDefinition= */ false}); 1576 // Perform all the requested attribute inference actions. 1577 AI.run(SCCNodes, Changed); 1578 } 1579 1580 /// Infer attributes from all functions in the SCC by scanning every 1581 /// instruction for compliance to the attribute assumptions. 1582 /// 1583 /// Returns true if any changes to function attributes were made. 1584 static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, 1585 SmallSet<Function *, 8> &Changed) { 1586 AttributeInferer AI; 1587 1588 if (!DisableNoUnwindInference) 1589 // Request to infer nounwind attribute for all the functions in the SCC if 1590 // every callsite within the SCC is not throwing (except for calls to 1591 // functions within the SCC). Note that nounwind attribute suffers from 1592 // derefinement - results may change depending on how functions are 1593 // optimized. Thus it can be inferred only from exact definitions. 1594 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1595 Attribute::NoUnwind, 1596 // Skip non-throwing functions. 1597 [](const Function &F) { return F.doesNotThrow(); }, 1598 // Instructions that break non-throwing assumption. 1599 [&SCCNodes](Instruction &I) { 1600 return InstrBreaksNonThrowing(I, SCCNodes); 1601 }, 1602 [](Function &F) { 1603 LLVM_DEBUG(dbgs() 1604 << "Adding nounwind attr to fn " << F.getName() << "\n"); 1605 F.setDoesNotThrow(); 1606 ++NumNoUnwind; 1607 }, 1608 /* RequiresExactDefinition= */ true}); 1609 1610 if (!DisableNoFreeInference) 1611 // Request to infer nofree attribute for all the functions in the SCC if 1612 // every callsite within the SCC does not directly or indirectly free 1613 // memory (except for calls to functions within the SCC). Note that nofree 1614 // attribute suffers from derefinement - results may change depending on 1615 // how functions are optimized. Thus it can be inferred only from exact 1616 // definitions. 1617 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1618 Attribute::NoFree, 1619 // Skip functions known not to free memory. 1620 [](const Function &F) { return F.doesNotFreeMemory(); }, 1621 // Instructions that break non-deallocating assumption. 1622 [&SCCNodes](Instruction &I) { 1623 return InstrBreaksNoFree(I, SCCNodes); 1624 }, 1625 [](Function &F) { 1626 LLVM_DEBUG(dbgs() 1627 << "Adding nofree attr to fn " << F.getName() << "\n"); 1628 F.setDoesNotFreeMemory(); 1629 ++NumNoFree; 1630 }, 1631 /* RequiresExactDefinition= */ true}); 1632 1633 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1634 Attribute::NoSync, 1635 // Skip already marked functions. 1636 [](const Function &F) { return F.hasNoSync(); }, 1637 // Instructions that break nosync assumption. 1638 [&SCCNodes](Instruction &I) { 1639 return InstrBreaksNoSync(I, SCCNodes); 1640 }, 1641 [](Function &F) { 1642 LLVM_DEBUG(dbgs() 1643 << "Adding nosync attr to fn " << F.getName() << "\n"); 1644 F.setNoSync(); 1645 ++NumNoSync; 1646 }, 1647 /* RequiresExactDefinition= */ true}); 1648 1649 // Perform all the requested attribute inference actions. 1650 AI.run(SCCNodes, Changed); 1651 } 1652 1653 static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, 1654 SmallSet<Function *, 8> &Changed) { 1655 // Try and identify functions that do not recurse. 1656 1657 // If the SCC contains multiple nodes we know for sure there is recursion. 1658 if (SCCNodes.size() != 1) 1659 return; 1660 1661 Function *F = *SCCNodes.begin(); 1662 if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 1663 return; 1664 1665 // If all of the calls in F are identifiable and are to norecurse functions, F 1666 // is norecurse. This check also detects self-recursion as F is not currently 1667 // marked norecurse, so any called from F to F will not be marked norecurse. 1668 for (auto &BB : *F) 1669 for (auto &I : BB.instructionsWithoutDebug()) 1670 if (auto *CB = dyn_cast<CallBase>(&I)) { 1671 Function *Callee = CB->getCalledFunction(); 1672 if (!Callee || Callee == F || 1673 (!Callee->doesNotRecurse() && 1674 !(Callee->isDeclaration() && 1675 Callee->hasFnAttribute(Attribute::NoCallback)))) 1676 // Function calls a potentially recursive function. 1677 return; 1678 } 1679 1680 // Every call was to a non-recursive function other than this function, and 1681 // we have no indirect recursion as the SCC size is one. This function cannot 1682 // recurse. 1683 F->setDoesNotRecurse(); 1684 ++NumNoRecurse; 1685 Changed.insert(F); 1686 } 1687 1688 static bool instructionDoesNotReturn(Instruction &I) { 1689 if (auto *CB = dyn_cast<CallBase>(&I)) 1690 return CB->hasFnAttr(Attribute::NoReturn); 1691 return false; 1692 } 1693 1694 // A basic block can only return if it terminates with a ReturnInst and does not 1695 // contain calls to noreturn functions. 1696 static bool basicBlockCanReturn(BasicBlock &BB) { 1697 if (!isa<ReturnInst>(BB.getTerminator())) 1698 return false; 1699 return none_of(BB, instructionDoesNotReturn); 1700 } 1701 1702 // FIXME: this doesn't handle recursion. 1703 static bool canReturn(Function &F) { 1704 SmallVector<BasicBlock *, 16> Worklist; 1705 SmallPtrSet<BasicBlock *, 16> Visited; 1706 1707 Visited.insert(&F.front()); 1708 Worklist.push_back(&F.front()); 1709 1710 do { 1711 BasicBlock *BB = Worklist.pop_back_val(); 1712 if (basicBlockCanReturn(*BB)) 1713 return true; 1714 for (BasicBlock *Succ : successors(BB)) 1715 if (Visited.insert(Succ).second) 1716 Worklist.push_back(Succ); 1717 } while (!Worklist.empty()); 1718 1719 return false; 1720 } 1721 1722 // Set the noreturn function attribute if possible. 1723 static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, 1724 SmallSet<Function *, 8> &Changed) { 1725 for (Function *F : SCCNodes) { 1726 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 1727 F->doesNotReturn()) 1728 continue; 1729 1730 if (!canReturn(*F)) { 1731 F->setDoesNotReturn(); 1732 Changed.insert(F); 1733 } 1734 } 1735 } 1736 1737 static bool functionWillReturn(const Function &F) { 1738 // We can infer and propagate function attributes only when we know that the 1739 // definition we'll get at link time is *exactly* the definition we see now. 1740 // For more details, see GlobalValue::mayBeDerefined. 1741 if (!F.hasExactDefinition()) 1742 return false; 1743 1744 // Must-progress function without side-effects must return. 1745 if (F.mustProgress() && F.onlyReadsMemory()) 1746 return true; 1747 1748 // Can only analyze functions with a definition. 1749 if (F.isDeclaration()) 1750 return false; 1751 1752 // Functions with loops require more sophisticated analysis, as the loop 1753 // may be infinite. For now, don't try to handle them. 1754 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; 1755 FindFunctionBackedges(F, Backedges); 1756 if (!Backedges.empty()) 1757 return false; 1758 1759 // If there are no loops, then the function is willreturn if all calls in 1760 // it are willreturn. 1761 return all_of(instructions(F), [](const Instruction &I) { 1762 return I.willReturn(); 1763 }); 1764 } 1765 1766 // Set the willreturn function attribute if possible. 1767 static void addWillReturn(const SCCNodeSet &SCCNodes, 1768 SmallSet<Function *, 8> &Changed) { 1769 for (Function *F : SCCNodes) { 1770 if (!F || F->willReturn() || !functionWillReturn(*F)) 1771 continue; 1772 1773 F->setWillReturn(); 1774 NumWillReturn++; 1775 Changed.insert(F); 1776 } 1777 } 1778 1779 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { 1780 SCCNodesResult Res; 1781 Res.HasUnknownCall = false; 1782 for (Function *F : Functions) { 1783 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) || 1784 F->isPresplitCoroutine()) { 1785 // Treat any function we're trying not to optimize as if it were an 1786 // indirect call and omit it from the node set used below. 1787 Res.HasUnknownCall = true; 1788 continue; 1789 } 1790 // Track whether any functions in this SCC have an unknown call edge. 1791 // Note: if this is ever a performance hit, we can common it with 1792 // subsequent routines which also do scans over the instructions of the 1793 // function. 1794 if (!Res.HasUnknownCall) { 1795 for (Instruction &I : instructions(*F)) { 1796 if (auto *CB = dyn_cast<CallBase>(&I)) { 1797 if (!CB->getCalledFunction()) { 1798 Res.HasUnknownCall = true; 1799 break; 1800 } 1801 } 1802 } 1803 } 1804 Res.SCCNodes.insert(F); 1805 } 1806 return Res; 1807 } 1808 1809 template <typename AARGetterT> 1810 static SmallSet<Function *, 8> 1811 deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, 1812 bool ArgAttrsOnly) { 1813 SCCNodesResult Nodes = createSCCNodeSet(Functions); 1814 1815 // Bail if the SCC only contains optnone functions. 1816 if (Nodes.SCCNodes.empty()) 1817 return {}; 1818 1819 SmallSet<Function *, 8> Changed; 1820 if (ArgAttrsOnly) { 1821 addArgumentAttrs(Nodes.SCCNodes, Changed); 1822 return Changed; 1823 } 1824 1825 addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); 1826 addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); 1827 addArgumentAttrs(Nodes.SCCNodes, Changed); 1828 inferConvergent(Nodes.SCCNodes, Changed); 1829 addNoReturnAttrs(Nodes.SCCNodes, Changed); 1830 addWillReturn(Nodes.SCCNodes, Changed); 1831 addNoUndefAttrs(Nodes.SCCNodes, Changed); 1832 1833 // If we have no external nodes participating in the SCC, we can deduce some 1834 // more precise attributes as well. 1835 if (!Nodes.HasUnknownCall) { 1836 addNoAliasAttrs(Nodes.SCCNodes, Changed); 1837 addNonNullAttrs(Nodes.SCCNodes, Changed); 1838 inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed); 1839 addNoRecurseAttrs(Nodes.SCCNodes, Changed); 1840 } 1841 1842 // Finally, infer the maximal set of attributes from the ones we've inferred 1843 // above. This is handling the cases where one attribute on a signature 1844 // implies another, but for implementation reasons the inference rule for 1845 // the later is missing (or simply less sophisticated). 1846 for (Function *F : Nodes.SCCNodes) 1847 if (F) 1848 if (inferAttributesFromOthers(*F)) 1849 Changed.insert(F); 1850 1851 return Changed; 1852 } 1853 1854 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 1855 CGSCCAnalysisManager &AM, 1856 LazyCallGraph &CG, 1857 CGSCCUpdateResult &) { 1858 // Skip non-recursive functions if requested. 1859 // Only infer argument attributes for non-recursive functions, because 1860 // it can affect optimization behavior in conjunction with noalias. 1861 bool ArgAttrsOnly = false; 1862 if (C.size() == 1 && SkipNonRecursive) { 1863 LazyCallGraph::Node &N = *C.begin(); 1864 if (!N->lookup(N)) 1865 ArgAttrsOnly = true; 1866 } 1867 1868 FunctionAnalysisManager &FAM = 1869 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 1870 1871 // We pass a lambda into functions to wire them up to the analysis manager 1872 // for getting function analyses. 1873 auto AARGetter = [&](Function &F) -> AAResults & { 1874 return FAM.getResult<AAManager>(F); 1875 }; 1876 1877 SmallVector<Function *, 8> Functions; 1878 for (LazyCallGraph::Node &N : C) { 1879 Functions.push_back(&N.getFunction()); 1880 } 1881 1882 auto ChangedFunctions = 1883 deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly); 1884 if (ChangedFunctions.empty()) 1885 return PreservedAnalyses::all(); 1886 1887 // Invalidate analyses for modified functions so that we don't have to 1888 // invalidate all analyses for all functions in this SCC. 1889 PreservedAnalyses FuncPA; 1890 // We haven't changed the CFG for modified functions. 1891 FuncPA.preserveSet<CFGAnalyses>(); 1892 for (Function *Changed : ChangedFunctions) { 1893 FAM.invalidate(*Changed, FuncPA); 1894 // Also invalidate any direct callers of changed functions since analyses 1895 // may care about attributes of direct callees. For example, MemorySSA cares 1896 // about whether or not a call's callee modifies memory and queries that 1897 // through function attributes. 1898 for (auto *U : Changed->users()) { 1899 if (auto *Call = dyn_cast<CallBase>(U)) { 1900 if (Call->getCalledFunction() == Changed) 1901 FAM.invalidate(*Call->getFunction(), FuncPA); 1902 } 1903 } 1904 } 1905 1906 PreservedAnalyses PA; 1907 // We have not added or removed functions. 1908 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 1909 // We already invalidated all relevant function analyses above. 1910 PA.preserveSet<AllAnalysesOn<Function>>(); 1911 return PA; 1912 } 1913 1914 void PostOrderFunctionAttrsPass::printPipeline( 1915 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1916 static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline( 1917 OS, MapClassName2PassName); 1918 if (SkipNonRecursive) 1919 OS << "<skip-non-recursive-function-attrs>"; 1920 } 1921 1922 template <typename AARGetterT> 1923 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { 1924 SmallVector<Function *, 8> Functions; 1925 for (CallGraphNode *I : SCC) { 1926 Functions.push_back(I->getFunction()); 1927 } 1928 1929 return !deriveAttrsInPostOrder(Functions, AARGetter).empty(); 1930 } 1931 1932 static bool addNoRecurseAttrsTopDown(Function &F) { 1933 // We check the preconditions for the function prior to calling this to avoid 1934 // the cost of building up a reversible post-order list. We assert them here 1935 // to make sure none of the invariants this relies on were violated. 1936 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 1937 assert(!F.doesNotRecurse() && 1938 "This function has already been deduced as norecurs!"); 1939 assert(F.hasInternalLinkage() && 1940 "Can only do top-down deduction for internal linkage functions!"); 1941 1942 // If F is internal and all of its uses are calls from a non-recursive 1943 // functions, then none of its calls could in fact recurse without going 1944 // through a function marked norecurse, and so we can mark this function too 1945 // as norecurse. Note that the uses must actually be calls -- otherwise 1946 // a pointer to this function could be returned from a norecurse function but 1947 // this function could be recursively (indirectly) called. Note that this 1948 // also detects if F is directly recursive as F is not yet marked as 1949 // a norecurse function. 1950 for (auto &U : F.uses()) { 1951 auto *I = dyn_cast<Instruction>(U.getUser()); 1952 if (!I) 1953 return false; 1954 CallBase *CB = dyn_cast<CallBase>(I); 1955 if (!CB || !CB->isCallee(&U) || 1956 !CB->getParent()->getParent()->doesNotRecurse()) 1957 return false; 1958 } 1959 F.setDoesNotRecurse(); 1960 ++NumNoRecurse; 1961 return true; 1962 } 1963 1964 static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) { 1965 // We only have a post-order SCC traversal (because SCCs are inherently 1966 // discovered in post-order), so we accumulate them in a vector and then walk 1967 // it in reverse. This is simpler than using the RPO iterator infrastructure 1968 // because we need to combine SCC detection and the PO walk of the call 1969 // graph. We can also cheat egregiously because we're primarily interested in 1970 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 1971 // with multiple functions in them will clearly be recursive. 1972 1973 SmallVector<Function *, 16> Worklist; 1974 CG.buildRefSCCs(); 1975 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { 1976 for (LazyCallGraph::SCC &SCC : RC) { 1977 if (SCC.size() != 1) 1978 continue; 1979 Function &F = SCC.begin()->getFunction(); 1980 if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) 1981 Worklist.push_back(&F); 1982 } 1983 } 1984 bool Changed = false; 1985 for (auto *F : llvm::reverse(Worklist)) 1986 Changed |= addNoRecurseAttrsTopDown(*F); 1987 1988 return Changed; 1989 } 1990 1991 PreservedAnalyses 1992 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { 1993 auto &CG = AM.getResult<LazyCallGraphAnalysis>(M); 1994 1995 if (!deduceFunctionAttributeInRPO(M, CG)) 1996 return PreservedAnalyses::all(); 1997 1998 PreservedAnalyses PA; 1999 PA.preserve<LazyCallGraphAnalysis>(); 2000 return PA; 2001 } 2002