1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file implements interprocedural passes which walk the 11 /// call-graph deducing and/or propagating function attributes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/FunctionAttrs.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/SCCIterator.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallPtrSet.h" 22 #include "llvm/ADT/SmallSet.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/Analysis/AssumptionCache.h" 26 #include "llvm/Analysis/BasicAliasAnalysis.h" 27 #include "llvm/Analysis/CFG.h" 28 #include "llvm/Analysis/CGSCCPassManager.h" 29 #include "llvm/Analysis/CallGraph.h" 30 #include "llvm/Analysis/CallGraphSCCPass.h" 31 #include "llvm/Analysis/CaptureTracking.h" 32 #include "llvm/Analysis/LazyCallGraph.h" 33 #include "llvm/Analysis/MemoryLocation.h" 34 #include "llvm/Analysis/ValueTracking.h" 35 #include "llvm/IR/Argument.h" 36 #include "llvm/IR/Attributes.h" 37 #include "llvm/IR/BasicBlock.h" 38 #include "llvm/IR/Constant.h" 39 #include "llvm/IR/Constants.h" 40 #include "llvm/IR/Function.h" 41 #include "llvm/IR/InstIterator.h" 42 #include "llvm/IR/InstrTypes.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/Metadata.h" 47 #include "llvm/IR/ModuleSummaryIndex.h" 48 #include "llvm/IR/PassManager.h" 49 #include "llvm/IR/Type.h" 50 #include "llvm/IR/Use.h" 51 #include "llvm/IR/User.h" 52 #include "llvm/IR/Value.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Compiler.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/ErrorHandling.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Transforms/IPO.h" 60 #include "llvm/Transforms/Utils/Local.h" 61 #include <cassert> 62 #include <iterator> 63 #include <map> 64 #include <optional> 65 #include <vector> 66 67 using namespace llvm; 68 69 #define DEBUG_TYPE "function-attrs" 70 71 STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute"); 72 STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); 73 STATISTIC(NumReturned, "Number of arguments marked returned"); 74 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 75 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 76 STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly"); 77 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 78 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 79 STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef"); 80 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 81 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); 82 STATISTIC(NumNoFree, "Number of functions marked as nofree"); 83 STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); 84 STATISTIC(NumNoSync, "Number of functions marked as nosync"); 85 86 STATISTIC(NumThinLinkNoRecurse, 87 "Number of functions marked as norecurse during thinlink"); 88 STATISTIC(NumThinLinkNoUnwind, 89 "Number of functions marked as nounwind during thinlink"); 90 91 static cl::opt<bool> EnableNonnullArgPropagation( 92 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, 93 cl::desc("Try to propagate nonnull argument attributes from callsites to " 94 "caller functions.")); 95 96 static cl::opt<bool> DisableNoUnwindInference( 97 "disable-nounwind-inference", cl::Hidden, 98 cl::desc("Stop inferring nounwind attribute during function-attrs pass")); 99 100 static cl::opt<bool> DisableNoFreeInference( 101 "disable-nofree-inference", cl::Hidden, 102 cl::desc("Stop inferring nofree attribute during function-attrs pass")); 103 104 static cl::opt<bool> DisableThinLTOPropagation( 105 "disable-thinlto-funcattrs", cl::init(true), cl::Hidden, 106 cl::desc("Don't propagate function-attrs in thinLTO")); 107 108 namespace { 109 110 using SCCNodeSet = SmallSetVector<Function *, 8>; 111 112 } // end anonymous namespace 113 114 static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc, 115 ModRefInfo MR, AAResults &AAR) { 116 // Ignore accesses to known-invariant or local memory. 117 MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/true); 118 if (isNoModRef(MR)) 119 return; 120 121 const Value *UO = getUnderlyingObject(Loc.Ptr); 122 assert(!isa<AllocaInst>(UO) && 123 "Should have been handled by getModRefInfoMask()"); 124 if (isa<Argument>(UO)) { 125 ME |= MemoryEffects::argMemOnly(MR); 126 return; 127 } 128 129 // If it's not an identified object, it might be an argument. 130 if (!isIdentifiedObject(UO)) 131 ME |= MemoryEffects::argMemOnly(MR); 132 ME |= MemoryEffects(IRMemLocation::Other, MR); 133 } 134 135 static void addArgLocs(MemoryEffects &ME, const CallBase *Call, 136 ModRefInfo ArgMR, AAResults &AAR) { 137 for (const Value *Arg : Call->args()) { 138 if (!Arg->getType()->isPtrOrPtrVectorTy()) 139 continue; 140 141 addLocAccess(ME, 142 MemoryLocation::getBeforeOrAfter(Arg, Call->getAAMetadata()), 143 ArgMR, AAR); 144 } 145 } 146 147 /// Returns the memory access attribute for function F using AAR for AA results, 148 /// where SCCNodes is the current SCC. 149 /// 150 /// If ThisBody is true, this function may examine the function body and will 151 /// return a result pertaining to this copy of the function. If it is false, the 152 /// result will be based only on AA results for the function declaration; it 153 /// will be assumed that some other (perhaps less optimized) version of the 154 /// function may be selected at link time. 155 /// 156 /// The return value is split into two parts: Memory effects that always apply, 157 /// and additional memory effects that apply if any of the functions in the SCC 158 /// can access argmem. 159 static std::pair<MemoryEffects, MemoryEffects> 160 checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, 161 const SCCNodeSet &SCCNodes) { 162 MemoryEffects OrigME = AAR.getMemoryEffects(&F); 163 if (OrigME.doesNotAccessMemory()) 164 // Already perfect! 165 return {OrigME, MemoryEffects::none()}; 166 167 if (!ThisBody) 168 return {OrigME, MemoryEffects::none()}; 169 170 MemoryEffects ME = MemoryEffects::none(); 171 // Additional locations accessed if the SCC accesses argmem. 172 MemoryEffects RecursiveArgME = MemoryEffects::none(); 173 174 // Inalloca and preallocated arguments are always clobbered by the call. 175 if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || 176 F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) 177 ME |= MemoryEffects::argMemOnly(ModRefInfo::ModRef); 178 179 // Scan the function body for instructions that may read or write memory. 180 for (Instruction &I : instructions(F)) { 181 // Some instructions can be ignored even if they read or write memory. 182 // Detect these now, skipping to the next instruction if one is found. 183 if (auto *Call = dyn_cast<CallBase>(&I)) { 184 // We can optimistically ignore calls to functions in the same SCC, with 185 // two caveats: 186 // * Calls with operand bundles may have additional effects. 187 // * Argument memory accesses may imply additional effects depending on 188 // what the argument location is. 189 if (!Call->hasOperandBundles() && Call->getCalledFunction() && 190 SCCNodes.count(Call->getCalledFunction())) { 191 // Keep track of which additional locations are accessed if the SCC 192 // turns out to access argmem. 193 addArgLocs(RecursiveArgME, Call, ModRefInfo::ModRef, AAR); 194 continue; 195 } 196 197 MemoryEffects CallME = AAR.getMemoryEffects(Call); 198 199 // If the call doesn't access memory, we're done. 200 if (CallME.doesNotAccessMemory()) 201 continue; 202 203 // A pseudo probe call shouldn't change any function attribute since it 204 // doesn't translate to a real instruction. It comes with a memory access 205 // tag to prevent itself being removed by optimizations and not block 206 // other instructions being optimized. 207 if (isa<PseudoProbeInst>(I)) 208 continue; 209 210 ME |= CallME.getWithoutLoc(IRMemLocation::ArgMem); 211 212 // If the call accesses captured memory (currently part of "other") and 213 // an argument is captured (currently not tracked), then it may also 214 // access argument memory. 215 ModRefInfo OtherMR = CallME.getModRef(IRMemLocation::Other); 216 ME |= MemoryEffects::argMemOnly(OtherMR); 217 218 // Check whether all pointer arguments point to local memory, and 219 // ignore calls that only access local memory. 220 ModRefInfo ArgMR = CallME.getModRef(IRMemLocation::ArgMem); 221 if (ArgMR != ModRefInfo::NoModRef) 222 addArgLocs(ME, Call, ArgMR, AAR); 223 continue; 224 } 225 226 ModRefInfo MR = ModRefInfo::NoModRef; 227 if (I.mayWriteToMemory()) 228 MR |= ModRefInfo::Mod; 229 if (I.mayReadFromMemory()) 230 MR |= ModRefInfo::Ref; 231 if (MR == ModRefInfo::NoModRef) 232 continue; 233 234 std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(&I); 235 if (!Loc) { 236 // If no location is known, conservatively assume anything can be 237 // accessed. 238 ME |= MemoryEffects(MR); 239 continue; 240 } 241 242 // Volatile operations may access inaccessible memory. 243 if (I.isVolatile()) 244 ME |= MemoryEffects::inaccessibleMemOnly(MR); 245 246 addLocAccess(ME, *Loc, MR, AAR); 247 } 248 249 return {OrigME & ME, RecursiveArgME}; 250 } 251 252 MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F, 253 AAResults &AAR) { 254 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}).first; 255 } 256 257 /// Deduce readonly/readnone/writeonly attributes for the SCC. 258 template <typename AARGetterT> 259 static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, 260 SmallSet<Function *, 8> &Changed) { 261 MemoryEffects ME = MemoryEffects::none(); 262 MemoryEffects RecursiveArgME = MemoryEffects::none(); 263 for (Function *F : SCCNodes) { 264 // Call the callable parameter to look up AA results for this function. 265 AAResults &AAR = AARGetter(*F); 266 // Non-exact function definitions may not be selected at link time, and an 267 // alternative version that writes to memory may be selected. See the 268 // comment on GlobalValue::isDefinitionExact for more details. 269 auto [FnME, FnRecursiveArgME] = 270 checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes); 271 ME |= FnME; 272 RecursiveArgME |= FnRecursiveArgME; 273 // Reached bottom of the lattice, we will not be able to improve the result. 274 if (ME == MemoryEffects::unknown()) 275 return; 276 } 277 278 // If the SCC accesses argmem, add recursive accesses resulting from that. 279 ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem); 280 if (ArgMR != ModRefInfo::NoModRef) 281 ME |= RecursiveArgME & MemoryEffects(ArgMR); 282 283 for (Function *F : SCCNodes) { 284 MemoryEffects OldME = F->getMemoryEffects(); 285 MemoryEffects NewME = ME & OldME; 286 if (NewME != OldME) { 287 ++NumMemoryAttr; 288 F->setMemoryEffects(NewME); 289 // Remove conflicting writable attributes. 290 if (!isModSet(NewME.getModRef(IRMemLocation::ArgMem))) 291 for (Argument &A : F->args()) 292 A.removeAttr(Attribute::Writable); 293 Changed.insert(F); 294 } 295 } 296 } 297 298 // Compute definitive function attributes for a function taking into account 299 // prevailing definitions and linkage types 300 static FunctionSummary *calculatePrevailingSummary( 301 ValueInfo VI, 302 DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary, 303 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 304 IsPrevailing) { 305 306 if (CachedPrevailingSummary.count(VI)) 307 return CachedPrevailingSummary[VI]; 308 309 /// At this point, prevailing symbols have been resolved. The following leads 310 /// to returning a conservative result: 311 /// - Multiple instances with local linkage. Normally local linkage would be 312 /// unique per module 313 /// as the GUID includes the module path. We could have a guid alias if 314 /// there wasn't any distinguishing path when each file was compiled, but 315 /// that should be rare so we'll punt on those. 316 317 /// These next 2 cases should not happen and will assert: 318 /// - Multiple instances with external linkage. This should be caught in 319 /// symbol resolution 320 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our 321 /// knowledge meaning we have to go conservative. 322 323 /// Otherwise, we calculate attributes for a function as: 324 /// 1. If we have a local linkage, take its attributes. If there's somehow 325 /// multiple, bail and go conservative. 326 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is 327 /// prevailing, take its attributes. 328 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic 329 /// differences. However, if the prevailing copy is known it will be used 330 /// so take its attributes. If the prevailing copy is in a native file 331 /// all IR copies will be dead and propagation will go conservative. 332 /// 4. AvailableExternally summaries without a prevailing copy are known to 333 /// occur in a couple of circumstances: 334 /// a. An internal function gets imported due to its caller getting 335 /// imported, it becomes AvailableExternally but no prevailing 336 /// definition exists. Because it has to get imported along with its 337 /// caller the attributes will be captured by propagating on its 338 /// caller. 339 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally 340 /// definitions of explicitly instanced template declarations 341 /// for inlining which are ultimately dropped from the TU. Since this 342 /// is localized to the TU the attributes will have already made it to 343 /// the callers. 344 /// These are edge cases and already captured by their callers so we 345 /// ignore these for now. If they become relevant to optimize in the 346 /// future this can be revisited. 347 /// 5. Otherwise, go conservative. 348 349 CachedPrevailingSummary[VI] = nullptr; 350 FunctionSummary *Local = nullptr; 351 FunctionSummary *Prevailing = nullptr; 352 353 for (const auto &GVS : VI.getSummaryList()) { 354 if (!GVS->isLive()) 355 continue; 356 357 FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject()); 358 // Virtual and Unknown (e.g. indirect) calls require going conservative 359 if (!FS || FS->fflags().HasUnknownCall) 360 return nullptr; 361 362 const auto &Linkage = GVS->linkage(); 363 if (GlobalValue::isLocalLinkage(Linkage)) { 364 if (Local) { 365 LLVM_DEBUG( 366 dbgs() 367 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on " 368 "function " 369 << VI.name() << " from " << FS->modulePath() << ". Previous module " 370 << Local->modulePath() << "\n"); 371 return nullptr; 372 } 373 Local = FS; 374 } else if (GlobalValue::isExternalLinkage(Linkage)) { 375 assert(IsPrevailing(VI.getGUID(), GVS.get())); 376 Prevailing = FS; 377 break; 378 } else if (GlobalValue::isWeakODRLinkage(Linkage) || 379 GlobalValue::isLinkOnceODRLinkage(Linkage) || 380 GlobalValue::isWeakAnyLinkage(Linkage) || 381 GlobalValue::isLinkOnceAnyLinkage(Linkage)) { 382 if (IsPrevailing(VI.getGUID(), GVS.get())) { 383 Prevailing = FS; 384 break; 385 } 386 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) { 387 // TODO: Handle these cases if they become meaningful 388 continue; 389 } 390 } 391 392 if (Local) { 393 assert(!Prevailing); 394 CachedPrevailingSummary[VI] = Local; 395 } else if (Prevailing) { 396 assert(!Local); 397 CachedPrevailingSummary[VI] = Prevailing; 398 } 399 400 return CachedPrevailingSummary[VI]; 401 } 402 403 bool llvm::thinLTOPropagateFunctionAttrs( 404 ModuleSummaryIndex &Index, 405 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 406 IsPrevailing) { 407 // TODO: implement addNoAliasAttrs once 408 // there's more information about the return type in the summary 409 if (DisableThinLTOPropagation) 410 return false; 411 412 DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary; 413 bool Changed = false; 414 415 auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) { 416 // Assume we can propagate unless we discover otherwise 417 FunctionSummary::FFlags InferredFlags; 418 InferredFlags.NoRecurse = (SCCNodes.size() == 1); 419 InferredFlags.NoUnwind = true; 420 421 for (auto &V : SCCNodes) { 422 FunctionSummary *CallerSummary = 423 calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing); 424 425 // Function summaries can fail to contain information such as declarations 426 if (!CallerSummary) 427 return; 428 429 if (CallerSummary->fflags().MayThrow) 430 InferredFlags.NoUnwind = false; 431 432 for (const auto &Callee : CallerSummary->calls()) { 433 FunctionSummary *CalleeSummary = calculatePrevailingSummary( 434 Callee.first, CachedPrevailingSummary, IsPrevailing); 435 436 if (!CalleeSummary) 437 return; 438 439 if (!CalleeSummary->fflags().NoRecurse) 440 InferredFlags.NoRecurse = false; 441 442 if (!CalleeSummary->fflags().NoUnwind) 443 InferredFlags.NoUnwind = false; 444 445 if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse) 446 break; 447 } 448 } 449 450 if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) { 451 Changed = true; 452 for (auto &V : SCCNodes) { 453 if (InferredFlags.NoRecurse) { 454 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to " 455 << V.name() << "\n"); 456 ++NumThinLinkNoRecurse; 457 } 458 459 if (InferredFlags.NoUnwind) { 460 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to " 461 << V.name() << "\n"); 462 ++NumThinLinkNoUnwind; 463 } 464 465 for (const auto &S : V.getSummaryList()) { 466 if (auto *FS = dyn_cast<FunctionSummary>(S.get())) { 467 if (InferredFlags.NoRecurse) 468 FS->setNoRecurse(); 469 470 if (InferredFlags.NoUnwind) 471 FS->setNoUnwind(); 472 } 473 } 474 } 475 } 476 }; 477 478 // Call propagation functions on each SCC in the Index 479 for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd(); 480 ++I) { 481 std::vector<ValueInfo> Nodes(*I); 482 PropagateAttributes(Nodes); 483 } 484 return Changed; 485 } 486 487 namespace { 488 489 /// For a given pointer Argument, this retains a list of Arguments of functions 490 /// in the same SCC that the pointer data flows into. We use this to build an 491 /// SCC of the arguments. 492 struct ArgumentGraphNode { 493 Argument *Definition; 494 SmallVector<ArgumentGraphNode *, 4> Uses; 495 }; 496 497 class ArgumentGraph { 498 // We store pointers to ArgumentGraphNode objects, so it's important that 499 // that they not move around upon insert. 500 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; 501 502 ArgumentMapTy ArgumentMap; 503 504 // There is no root node for the argument graph, in fact: 505 // void f(int *x, int *y) { if (...) f(x, y); } 506 // is an example where the graph is disconnected. The SCCIterator requires a 507 // single entry point, so we maintain a fake ("synthetic") root node that 508 // uses every node. Because the graph is directed and nothing points into 509 // the root, it will not participate in any SCCs (except for its own). 510 ArgumentGraphNode SyntheticRoot; 511 512 public: 513 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 514 515 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; 516 517 iterator begin() { return SyntheticRoot.Uses.begin(); } 518 iterator end() { return SyntheticRoot.Uses.end(); } 519 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 520 521 ArgumentGraphNode *operator[](Argument *A) { 522 ArgumentGraphNode &Node = ArgumentMap[A]; 523 Node.Definition = A; 524 SyntheticRoot.Uses.push_back(&Node); 525 return &Node; 526 } 527 }; 528 529 /// This tracker checks whether callees are in the SCC, and if so it does not 530 /// consider that a capture, instead adding it to the "Uses" list and 531 /// continuing with the analysis. 532 struct ArgumentUsesTracker : public CaptureTracker { 533 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} 534 535 void tooManyUses() override { Captured = true; } 536 537 bool captured(const Use *U) override { 538 CallBase *CB = dyn_cast<CallBase>(U->getUser()); 539 if (!CB) { 540 Captured = true; 541 return true; 542 } 543 544 Function *F = CB->getCalledFunction(); 545 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 546 Captured = true; 547 return true; 548 } 549 550 assert(!CB->isCallee(U) && "callee operand reported captured?"); 551 const unsigned UseIndex = CB->getDataOperandNo(U); 552 if (UseIndex >= CB->arg_size()) { 553 // Data operand, but not a argument operand -- must be a bundle operand 554 assert(CB->hasOperandBundles() && "Must be!"); 555 556 // CaptureTracking told us that we're being captured by an operand bundle 557 // use. In this case it does not matter if the callee is within our SCC 558 // or not -- we've been captured in some unknown way, and we have to be 559 // conservative. 560 Captured = true; 561 return true; 562 } 563 564 if (UseIndex >= F->arg_size()) { 565 assert(F->isVarArg() && "More params than args in non-varargs call"); 566 Captured = true; 567 return true; 568 } 569 570 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 571 return false; 572 } 573 574 // True only if certainly captured (used outside our SCC). 575 bool Captured = false; 576 577 // Uses within our SCC. 578 SmallVector<Argument *, 4> Uses; 579 580 const SCCNodeSet &SCCNodes; 581 }; 582 583 } // end anonymous namespace 584 585 namespace llvm { 586 587 template <> struct GraphTraits<ArgumentGraphNode *> { 588 using NodeRef = ArgumentGraphNode *; 589 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; 590 591 static NodeRef getEntryNode(NodeRef A) { return A; } 592 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } 593 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } 594 }; 595 596 template <> 597 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 598 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } 599 600 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 601 return AG->begin(); 602 } 603 604 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 605 }; 606 607 } // end namespace llvm 608 609 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 610 static Attribute::AttrKind 611 determinePointerAccessAttrs(Argument *A, 612 const SmallPtrSet<Argument *, 8> &SCCNodes) { 613 SmallVector<Use *, 32> Worklist; 614 SmallPtrSet<Use *, 32> Visited; 615 616 // inalloca arguments are always clobbered by the call. 617 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) 618 return Attribute::None; 619 620 bool IsRead = false; 621 bool IsWrite = false; 622 623 for (Use &U : A->uses()) { 624 Visited.insert(&U); 625 Worklist.push_back(&U); 626 } 627 628 while (!Worklist.empty()) { 629 if (IsWrite && IsRead) 630 // No point in searching further.. 631 return Attribute::None; 632 633 Use *U = Worklist.pop_back_val(); 634 Instruction *I = cast<Instruction>(U->getUser()); 635 636 switch (I->getOpcode()) { 637 case Instruction::BitCast: 638 case Instruction::GetElementPtr: 639 case Instruction::PHI: 640 case Instruction::Select: 641 case Instruction::AddrSpaceCast: 642 // The original value is not read/written via this if the new value isn't. 643 for (Use &UU : I->uses()) 644 if (Visited.insert(&UU).second) 645 Worklist.push_back(&UU); 646 break; 647 648 case Instruction::Call: 649 case Instruction::Invoke: { 650 CallBase &CB = cast<CallBase>(*I); 651 if (CB.isCallee(U)) { 652 IsRead = true; 653 // Note that indirect calls do not capture, see comment in 654 // CaptureTracking for context 655 continue; 656 } 657 658 // Given we've explictily handled the callee operand above, what's left 659 // must be a data operand (e.g. argument or operand bundle) 660 const unsigned UseIndex = CB.getDataOperandNo(U); 661 662 // Some intrinsics (for instance ptrmask) do not capture their results, 663 // but return results thas alias their pointer argument, and thus should 664 // be handled like GEP or addrspacecast above. 665 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 666 &CB, /*MustPreserveNullness=*/false)) { 667 for (Use &UU : CB.uses()) 668 if (Visited.insert(&UU).second) 669 Worklist.push_back(&UU); 670 } else if (!CB.doesNotCapture(UseIndex)) { 671 if (!CB.onlyReadsMemory()) 672 // If the callee can save a copy into other memory, then simply 673 // scanning uses of the call is insufficient. We have no way 674 // of tracking copies of the pointer through memory to see 675 // if a reloaded copy is written to, thus we must give up. 676 return Attribute::None; 677 // Push users for processing once we finish this one 678 if (!I->getType()->isVoidTy()) 679 for (Use &UU : I->uses()) 680 if (Visited.insert(&UU).second) 681 Worklist.push_back(&UU); 682 } 683 684 ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(IRMemLocation::ArgMem); 685 if (isNoModRef(ArgMR)) 686 continue; 687 688 if (Function *F = CB.getCalledFunction()) 689 if (CB.isArgOperand(U) && UseIndex < F->arg_size() && 690 SCCNodes.count(F->getArg(UseIndex))) 691 // This is an argument which is part of the speculative SCC. Note 692 // that only operands corresponding to formal arguments of the callee 693 // can participate in the speculation. 694 break; 695 696 // The accessors used on call site here do the right thing for calls and 697 // invokes with operand bundles. 698 if (CB.doesNotAccessMemory(UseIndex)) { 699 /* nop */ 700 } else if (!isModSet(ArgMR) || CB.onlyReadsMemory(UseIndex)) { 701 IsRead = true; 702 } else if (!isRefSet(ArgMR) || 703 CB.dataOperandHasImpliedAttr(UseIndex, Attribute::WriteOnly)) { 704 IsWrite = true; 705 } else { 706 return Attribute::None; 707 } 708 break; 709 } 710 711 case Instruction::Load: 712 // A volatile load has side effects beyond what readonly can be relied 713 // upon. 714 if (cast<LoadInst>(I)->isVolatile()) 715 return Attribute::None; 716 717 IsRead = true; 718 break; 719 720 case Instruction::Store: 721 if (cast<StoreInst>(I)->getValueOperand() == *U) 722 // untrackable capture 723 return Attribute::None; 724 725 // A volatile store has side effects beyond what writeonly can be relied 726 // upon. 727 if (cast<StoreInst>(I)->isVolatile()) 728 return Attribute::None; 729 730 IsWrite = true; 731 break; 732 733 case Instruction::ICmp: 734 case Instruction::Ret: 735 break; 736 737 default: 738 return Attribute::None; 739 } 740 } 741 742 if (IsWrite && IsRead) 743 return Attribute::None; 744 else if (IsRead) 745 return Attribute::ReadOnly; 746 else if (IsWrite) 747 return Attribute::WriteOnly; 748 else 749 return Attribute::ReadNone; 750 } 751 752 /// Deduce returned attributes for the SCC. 753 static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes, 754 SmallSet<Function *, 8> &Changed) { 755 // Check each function in turn, determining if an argument is always returned. 756 for (Function *F : SCCNodes) { 757 // We can infer and propagate function attributes only when we know that the 758 // definition we'll get at link time is *exactly* the definition we see now. 759 // For more details, see GlobalValue::mayBeDerefined. 760 if (!F->hasExactDefinition()) 761 continue; 762 763 if (F->getReturnType()->isVoidTy()) 764 continue; 765 766 // There is nothing to do if an argument is already marked as 'returned'. 767 if (F->getAttributes().hasAttrSomewhere(Attribute::Returned)) 768 continue; 769 770 auto FindRetArg = [&]() -> Argument * { 771 Argument *RetArg = nullptr; 772 for (BasicBlock &BB : *F) 773 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 774 // Note that stripPointerCasts should look through functions with 775 // returned arguments. 776 auto *RetVal = 777 dyn_cast<Argument>(Ret->getReturnValue()->stripPointerCasts()); 778 if (!RetVal || RetVal->getType() != F->getReturnType()) 779 return nullptr; 780 781 if (!RetArg) 782 RetArg = RetVal; 783 else if (RetArg != RetVal) 784 return nullptr; 785 } 786 787 return RetArg; 788 }; 789 790 if (Argument *RetArg = FindRetArg()) { 791 RetArg->addAttr(Attribute::Returned); 792 ++NumReturned; 793 Changed.insert(F); 794 } 795 } 796 } 797 798 /// If a callsite has arguments that are also arguments to the parent function, 799 /// try to propagate attributes from the callsite's arguments to the parent's 800 /// arguments. This may be important because inlining can cause information loss 801 /// when attribute knowledge disappears with the inlined call. 802 static bool addArgumentAttrsFromCallsites(Function &F) { 803 if (!EnableNonnullArgPropagation) 804 return false; 805 806 bool Changed = false; 807 808 // For an argument attribute to transfer from a callsite to the parent, the 809 // call must be guaranteed to execute every time the parent is called. 810 // Conservatively, just check for calls in the entry block that are guaranteed 811 // to execute. 812 // TODO: This could be enhanced by testing if the callsite post-dominates the 813 // entry block or by doing simple forward walks or backward walks to the 814 // callsite. 815 BasicBlock &Entry = F.getEntryBlock(); 816 for (Instruction &I : Entry) { 817 if (auto *CB = dyn_cast<CallBase>(&I)) { 818 if (auto *CalledFunc = CB->getCalledFunction()) { 819 for (auto &CSArg : CalledFunc->args()) { 820 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) 821 continue; 822 823 // If the non-null callsite argument operand is an argument to 'F' 824 // (the caller) and the call is guaranteed to execute, then the value 825 // must be non-null throughout 'F'. 826 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); 827 if (FArg && !FArg->hasNonNullAttr()) { 828 FArg->addAttr(Attribute::NonNull); 829 Changed = true; 830 } 831 } 832 } 833 } 834 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 835 break; 836 } 837 838 return Changed; 839 } 840 841 static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { 842 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone || 843 R == Attribute::WriteOnly) 844 && "Must be an access attribute."); 845 assert(A && "Argument must not be null."); 846 847 // If the argument already has the attribute, nothing needs to be done. 848 if (A->hasAttribute(R)) 849 return false; 850 851 // Otherwise, remove potentially conflicting attribute, add the new one, 852 // and update statistics. 853 A->removeAttr(Attribute::WriteOnly); 854 A->removeAttr(Attribute::ReadOnly); 855 A->removeAttr(Attribute::ReadNone); 856 // Remove conflicting writable attribute. 857 if (R == Attribute::ReadNone || R == Attribute::ReadOnly) 858 A->removeAttr(Attribute::Writable); 859 A->addAttr(R); 860 if (R == Attribute::ReadOnly) 861 ++NumReadOnlyArg; 862 else if (R == Attribute::WriteOnly) 863 ++NumWriteOnlyArg; 864 else 865 ++NumReadNoneArg; 866 return true; 867 } 868 869 /// Deduce nocapture attributes for the SCC. 870 static void addArgumentAttrs(const SCCNodeSet &SCCNodes, 871 SmallSet<Function *, 8> &Changed) { 872 ArgumentGraph AG; 873 874 // Check each function in turn, determining which pointer arguments are not 875 // captured. 876 for (Function *F : SCCNodes) { 877 // We can infer and propagate function attributes only when we know that the 878 // definition we'll get at link time is *exactly* the definition we see now. 879 // For more details, see GlobalValue::mayBeDerefined. 880 if (!F->hasExactDefinition()) 881 continue; 882 883 if (addArgumentAttrsFromCallsites(*F)) 884 Changed.insert(F); 885 886 // Functions that are readonly (or readnone) and nounwind and don't return 887 // a value can't capture arguments. Don't analyze them. 888 if (F->onlyReadsMemory() && F->doesNotThrow() && 889 F->getReturnType()->isVoidTy()) { 890 for (Argument &A : F->args()) { 891 if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) { 892 A.addAttr(Attribute::NoCapture); 893 ++NumNoCapture; 894 Changed.insert(F); 895 } 896 } 897 continue; 898 } 899 900 for (Argument &A : F->args()) { 901 if (!A.getType()->isPointerTy()) 902 continue; 903 bool HasNonLocalUses = false; 904 if (!A.hasNoCaptureAttr()) { 905 ArgumentUsesTracker Tracker(SCCNodes); 906 PointerMayBeCaptured(&A, &Tracker); 907 if (!Tracker.Captured) { 908 if (Tracker.Uses.empty()) { 909 // If it's trivially not captured, mark it nocapture now. 910 A.addAttr(Attribute::NoCapture); 911 ++NumNoCapture; 912 Changed.insert(F); 913 } else { 914 // If it's not trivially captured and not trivially not captured, 915 // then it must be calling into another function in our SCC. Save 916 // its particulars for Argument-SCC analysis later. 917 ArgumentGraphNode *Node = AG[&A]; 918 for (Argument *Use : Tracker.Uses) { 919 Node->Uses.push_back(AG[Use]); 920 if (Use != &A) 921 HasNonLocalUses = true; 922 } 923 } 924 } 925 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 926 } 927 if (!HasNonLocalUses && !A.onlyReadsMemory()) { 928 // Can we determine that it's readonly/readnone/writeonly without doing 929 // an SCC? Note that we don't allow any calls at all here, or else our 930 // result will be dependent on the iteration order through the 931 // functions in the SCC. 932 SmallPtrSet<Argument *, 8> Self; 933 Self.insert(&A); 934 Attribute::AttrKind R = determinePointerAccessAttrs(&A, Self); 935 if (R != Attribute::None) 936 if (addAccessAttr(&A, R)) 937 Changed.insert(F); 938 } 939 } 940 } 941 942 // The graph we've collected is partial because we stopped scanning for 943 // argument uses once we solved the argument trivially. These partial nodes 944 // show up as ArgumentGraphNode objects with an empty Uses list, and for 945 // these nodes the final decision about whether they capture has already been 946 // made. If the definition doesn't have a 'nocapture' attribute by now, it 947 // captures. 948 949 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 950 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 951 if (ArgumentSCC.size() == 1) { 952 if (!ArgumentSCC[0]->Definition) 953 continue; // synthetic root node 954 955 // eg. "void f(int* x) { if (...) f(x); }" 956 if (ArgumentSCC[0]->Uses.size() == 1 && 957 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 958 Argument *A = ArgumentSCC[0]->Definition; 959 A->addAttr(Attribute::NoCapture); 960 ++NumNoCapture; 961 Changed.insert(A->getParent()); 962 963 // Infer the access attributes given the new nocapture one 964 SmallPtrSet<Argument *, 8> Self; 965 Self.insert(&*A); 966 Attribute::AttrKind R = determinePointerAccessAttrs(&*A, Self); 967 if (R != Attribute::None) 968 addAccessAttr(A, R); 969 } 970 continue; 971 } 972 973 bool SCCCaptured = false; 974 for (ArgumentGraphNode *Node : ArgumentSCC) { 975 if (Node->Uses.empty() && !Node->Definition->hasNoCaptureAttr()) { 976 SCCCaptured = true; 977 break; 978 } 979 } 980 if (SCCCaptured) 981 continue; 982 983 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 984 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 985 // quickly looking up whether a given Argument is in this ArgumentSCC. 986 for (ArgumentGraphNode *I : ArgumentSCC) { 987 ArgumentSCCNodes.insert(I->Definition); 988 } 989 990 for (ArgumentGraphNode *N : ArgumentSCC) { 991 for (ArgumentGraphNode *Use : N->Uses) { 992 Argument *A = Use->Definition; 993 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) 994 continue; 995 SCCCaptured = true; 996 break; 997 } 998 if (SCCCaptured) 999 break; 1000 } 1001 if (SCCCaptured) 1002 continue; 1003 1004 for (ArgumentGraphNode *N : ArgumentSCC) { 1005 Argument *A = N->Definition; 1006 A->addAttr(Attribute::NoCapture); 1007 ++NumNoCapture; 1008 Changed.insert(A->getParent()); 1009 } 1010 1011 // We also want to compute readonly/readnone/writeonly. With a small number 1012 // of false negatives, we can assume that any pointer which is captured 1013 // isn't going to be provably readonly or readnone, since by definition 1014 // we can't analyze all uses of a captured pointer. 1015 // 1016 // The false negatives happen when the pointer is captured by a function 1017 // that promises readonly/readnone behaviour on the pointer, then the 1018 // pointer's lifetime ends before anything that writes to arbitrary memory. 1019 // Also, a readonly/readnone pointer may be returned, but returning a 1020 // pointer is capturing it. 1021 1022 auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) { 1023 if (A == B) 1024 return A; 1025 if (A == Attribute::ReadNone) 1026 return B; 1027 if (B == Attribute::ReadNone) 1028 return A; 1029 return Attribute::None; 1030 }; 1031 1032 Attribute::AttrKind AccessAttr = Attribute::ReadNone; 1033 for (ArgumentGraphNode *N : ArgumentSCC) { 1034 Argument *A = N->Definition; 1035 Attribute::AttrKind K = determinePointerAccessAttrs(A, ArgumentSCCNodes); 1036 AccessAttr = meetAccessAttr(AccessAttr, K); 1037 if (AccessAttr == Attribute::None) 1038 break; 1039 } 1040 1041 if (AccessAttr != Attribute::None) { 1042 for (ArgumentGraphNode *N : ArgumentSCC) { 1043 Argument *A = N->Definition; 1044 if (addAccessAttr(A, AccessAttr)) 1045 Changed.insert(A->getParent()); 1046 } 1047 } 1048 } 1049 } 1050 1051 /// Tests whether a function is "malloc-like". 1052 /// 1053 /// A function is "malloc-like" if it returns either null or a pointer that 1054 /// doesn't alias any other pointer visible to the caller. 1055 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 1056 SmallSetVector<Value *, 8> FlowsToReturn; 1057 for (BasicBlock &BB : *F) 1058 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1059 FlowsToReturn.insert(Ret->getReturnValue()); 1060 1061 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1062 Value *RetVal = FlowsToReturn[i]; 1063 1064 if (Constant *C = dyn_cast<Constant>(RetVal)) { 1065 if (!C->isNullValue() && !isa<UndefValue>(C)) 1066 return false; 1067 1068 continue; 1069 } 1070 1071 if (isa<Argument>(RetVal)) 1072 return false; 1073 1074 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 1075 switch (RVI->getOpcode()) { 1076 // Extend the analysis by looking upwards. 1077 case Instruction::BitCast: 1078 case Instruction::GetElementPtr: 1079 case Instruction::AddrSpaceCast: 1080 FlowsToReturn.insert(RVI->getOperand(0)); 1081 continue; 1082 case Instruction::Select: { 1083 SelectInst *SI = cast<SelectInst>(RVI); 1084 FlowsToReturn.insert(SI->getTrueValue()); 1085 FlowsToReturn.insert(SI->getFalseValue()); 1086 continue; 1087 } 1088 case Instruction::PHI: { 1089 PHINode *PN = cast<PHINode>(RVI); 1090 for (Value *IncValue : PN->incoming_values()) 1091 FlowsToReturn.insert(IncValue); 1092 continue; 1093 } 1094 1095 // Check whether the pointer came from an allocation. 1096 case Instruction::Alloca: 1097 break; 1098 case Instruction::Call: 1099 case Instruction::Invoke: { 1100 CallBase &CB = cast<CallBase>(*RVI); 1101 if (CB.hasRetAttr(Attribute::NoAlias)) 1102 break; 1103 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) 1104 break; 1105 [[fallthrough]]; 1106 } 1107 default: 1108 return false; // Did not come from an allocation. 1109 } 1110 1111 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) 1112 return false; 1113 } 1114 1115 return true; 1116 } 1117 1118 /// Deduce noalias attributes for the SCC. 1119 static void addNoAliasAttrs(const SCCNodeSet &SCCNodes, 1120 SmallSet<Function *, 8> &Changed) { 1121 // Check each function in turn, determining which functions return noalias 1122 // pointers. 1123 for (Function *F : SCCNodes) { 1124 // Already noalias. 1125 if (F->returnDoesNotAlias()) 1126 continue; 1127 1128 // We can infer and propagate function attributes only when we know that the 1129 // definition we'll get at link time is *exactly* the definition we see now. 1130 // For more details, see GlobalValue::mayBeDerefined. 1131 if (!F->hasExactDefinition()) 1132 return; 1133 1134 // We annotate noalias return values, which are only applicable to 1135 // pointer types. 1136 if (!F->getReturnType()->isPointerTy()) 1137 continue; 1138 1139 if (!isFunctionMallocLike(F, SCCNodes)) 1140 return; 1141 } 1142 1143 for (Function *F : SCCNodes) { 1144 if (F->returnDoesNotAlias() || 1145 !F->getReturnType()->isPointerTy()) 1146 continue; 1147 1148 F->setReturnDoesNotAlias(); 1149 ++NumNoAlias; 1150 Changed.insert(F); 1151 } 1152 } 1153 1154 /// Tests whether this function is known to not return null. 1155 /// 1156 /// Requires that the function returns a pointer. 1157 /// 1158 /// Returns true if it believes the function will not return a null, and sets 1159 /// \p Speculative based on whether the returned conclusion is a speculative 1160 /// conclusion due to SCC calls. 1161 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 1162 bool &Speculative) { 1163 assert(F->getReturnType()->isPointerTy() && 1164 "nonnull only meaningful on pointer types"); 1165 Speculative = false; 1166 1167 SmallSetVector<Value *, 8> FlowsToReturn; 1168 for (BasicBlock &BB : *F) 1169 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1170 FlowsToReturn.insert(Ret->getReturnValue()); 1171 1172 auto &DL = F->getDataLayout(); 1173 1174 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1175 Value *RetVal = FlowsToReturn[i]; 1176 1177 // If this value is locally known to be non-null, we're good 1178 if (isKnownNonZero(RetVal, DL)) 1179 continue; 1180 1181 // Otherwise, we need to look upwards since we can't make any local 1182 // conclusions. 1183 Instruction *RVI = dyn_cast<Instruction>(RetVal); 1184 if (!RVI) 1185 return false; 1186 switch (RVI->getOpcode()) { 1187 // Extend the analysis by looking upwards. 1188 case Instruction::BitCast: 1189 case Instruction::AddrSpaceCast: 1190 FlowsToReturn.insert(RVI->getOperand(0)); 1191 continue; 1192 case Instruction::GetElementPtr: 1193 if (cast<GEPOperator>(RVI)->isInBounds()) { 1194 FlowsToReturn.insert(RVI->getOperand(0)); 1195 continue; 1196 } 1197 return false; 1198 case Instruction::Select: { 1199 SelectInst *SI = cast<SelectInst>(RVI); 1200 FlowsToReturn.insert(SI->getTrueValue()); 1201 FlowsToReturn.insert(SI->getFalseValue()); 1202 continue; 1203 } 1204 case Instruction::PHI: { 1205 PHINode *PN = cast<PHINode>(RVI); 1206 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1207 FlowsToReturn.insert(PN->getIncomingValue(i)); 1208 continue; 1209 } 1210 case Instruction::Call: 1211 case Instruction::Invoke: { 1212 CallBase &CB = cast<CallBase>(*RVI); 1213 Function *Callee = CB.getCalledFunction(); 1214 // A call to a node within the SCC is assumed to return null until 1215 // proven otherwise 1216 if (Callee && SCCNodes.count(Callee)) { 1217 Speculative = true; 1218 continue; 1219 } 1220 return false; 1221 } 1222 default: 1223 return false; // Unknown source, may be null 1224 }; 1225 llvm_unreachable("should have either continued or returned"); 1226 } 1227 1228 return true; 1229 } 1230 1231 /// Deduce nonnull attributes for the SCC. 1232 static void addNonNullAttrs(const SCCNodeSet &SCCNodes, 1233 SmallSet<Function *, 8> &Changed) { 1234 // Speculative that all functions in the SCC return only nonnull 1235 // pointers. We may refute this as we analyze functions. 1236 bool SCCReturnsNonNull = true; 1237 1238 // Check each function in turn, determining which functions return nonnull 1239 // pointers. 1240 for (Function *F : SCCNodes) { 1241 // Already nonnull. 1242 if (F->getAttributes().hasRetAttr(Attribute::NonNull)) 1243 continue; 1244 1245 // We can infer and propagate function attributes only when we know that the 1246 // definition we'll get at link time is *exactly* the definition we see now. 1247 // For more details, see GlobalValue::mayBeDerefined. 1248 if (!F->hasExactDefinition()) 1249 return; 1250 1251 // We annotate nonnull return values, which are only applicable to 1252 // pointer types. 1253 if (!F->getReturnType()->isPointerTy()) 1254 continue; 1255 1256 bool Speculative = false; 1257 if (isReturnNonNull(F, SCCNodes, Speculative)) { 1258 if (!Speculative) { 1259 // Mark the function eagerly since we may discover a function 1260 // which prevents us from speculating about the entire SCC 1261 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() 1262 << " as nonnull\n"); 1263 F->addRetAttr(Attribute::NonNull); 1264 ++NumNonNullReturn; 1265 Changed.insert(F); 1266 } 1267 continue; 1268 } 1269 // At least one function returns something which could be null, can't 1270 // speculate any more. 1271 SCCReturnsNonNull = false; 1272 } 1273 1274 if (SCCReturnsNonNull) { 1275 for (Function *F : SCCNodes) { 1276 if (F->getAttributes().hasRetAttr(Attribute::NonNull) || 1277 !F->getReturnType()->isPointerTy()) 1278 continue; 1279 1280 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 1281 F->addRetAttr(Attribute::NonNull); 1282 ++NumNonNullReturn; 1283 Changed.insert(F); 1284 } 1285 } 1286 } 1287 1288 /// Deduce noundef attributes for the SCC. 1289 static void addNoUndefAttrs(const SCCNodeSet &SCCNodes, 1290 SmallSet<Function *, 8> &Changed) { 1291 // Check each function in turn, determining which functions return noundef 1292 // values. 1293 for (Function *F : SCCNodes) { 1294 // Already noundef. 1295 AttributeList Attrs = F->getAttributes(); 1296 if (Attrs.hasRetAttr(Attribute::NoUndef)) 1297 continue; 1298 1299 // We can infer and propagate function attributes only when we know that the 1300 // definition we'll get at link time is *exactly* the definition we see now. 1301 // For more details, see GlobalValue::mayBeDerefined. 1302 if (!F->hasExactDefinition()) 1303 return; 1304 1305 // MemorySanitizer assumes that the definition and declaration of a 1306 // function will be consistent. A function with sanitize_memory attribute 1307 // should be skipped from inference. 1308 if (F->hasFnAttribute(Attribute::SanitizeMemory)) 1309 continue; 1310 1311 if (F->getReturnType()->isVoidTy()) 1312 continue; 1313 1314 const DataLayout &DL = F->getDataLayout(); 1315 if (all_of(*F, [&](BasicBlock &BB) { 1316 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 1317 // TODO: perform context-sensitive analysis? 1318 Value *RetVal = Ret->getReturnValue(); 1319 if (!isGuaranteedNotToBeUndefOrPoison(RetVal)) 1320 return false; 1321 1322 // We know the original return value is not poison now, but it 1323 // could still be converted to poison by another return attribute. 1324 // Try to explicitly re-prove the relevant attributes. 1325 if (Attrs.hasRetAttr(Attribute::NonNull) && 1326 !isKnownNonZero(RetVal, DL)) 1327 return false; 1328 1329 if (MaybeAlign Align = Attrs.getRetAlignment()) 1330 if (RetVal->getPointerAlignment(DL) < *Align) 1331 return false; 1332 1333 Attribute Attr = Attrs.getRetAttr(Attribute::Range); 1334 if (Attr.isValid() && 1335 !Attr.getRange().contains( 1336 computeConstantRange(RetVal, /*ForSigned=*/false))) 1337 return false; 1338 } 1339 return true; 1340 })) { 1341 F->addRetAttr(Attribute::NoUndef); 1342 ++NumNoUndefReturn; 1343 Changed.insert(F); 1344 } 1345 } 1346 } 1347 1348 namespace { 1349 1350 /// Collects a set of attribute inference requests and performs them all in one 1351 /// go on a single SCC Node. Inference involves scanning function bodies 1352 /// looking for instructions that violate attribute assumptions. 1353 /// As soon as all the bodies are fine we are free to set the attribute. 1354 /// Customization of inference for individual attributes is performed by 1355 /// providing a handful of predicates for each attribute. 1356 class AttributeInferer { 1357 public: 1358 /// Describes a request for inference of a single attribute. 1359 struct InferenceDescriptor { 1360 1361 /// Returns true if this function does not have to be handled. 1362 /// General intent for this predicate is to provide an optimization 1363 /// for functions that do not need this attribute inference at all 1364 /// (say, for functions that already have the attribute). 1365 std::function<bool(const Function &)> SkipFunction; 1366 1367 /// Returns true if this instruction violates attribute assumptions. 1368 std::function<bool(Instruction &)> InstrBreaksAttribute; 1369 1370 /// Sets the inferred attribute for this function. 1371 std::function<void(Function &)> SetAttribute; 1372 1373 /// Attribute we derive. 1374 Attribute::AttrKind AKind; 1375 1376 /// If true, only "exact" definitions can be used to infer this attribute. 1377 /// See GlobalValue::isDefinitionExact. 1378 bool RequiresExactDefinition; 1379 1380 InferenceDescriptor(Attribute::AttrKind AK, 1381 std::function<bool(const Function &)> SkipFunc, 1382 std::function<bool(Instruction &)> InstrScan, 1383 std::function<void(Function &)> SetAttr, 1384 bool ReqExactDef) 1385 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), 1386 SetAttribute(SetAttr), AKind(AK), 1387 RequiresExactDefinition(ReqExactDef) {} 1388 }; 1389 1390 private: 1391 SmallVector<InferenceDescriptor, 4> InferenceDescriptors; 1392 1393 public: 1394 void registerAttrInference(InferenceDescriptor AttrInference) { 1395 InferenceDescriptors.push_back(AttrInference); 1396 } 1397 1398 void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed); 1399 }; 1400 1401 /// Perform all the requested attribute inference actions according to the 1402 /// attribute predicates stored before. 1403 void AttributeInferer::run(const SCCNodeSet &SCCNodes, 1404 SmallSet<Function *, 8> &Changed) { 1405 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; 1406 // Go through all the functions in SCC and check corresponding attribute 1407 // assumptions for each of them. Attributes that are invalid for this SCC 1408 // will be removed from InferInSCC. 1409 for (Function *F : SCCNodes) { 1410 1411 // No attributes whose assumptions are still valid - done. 1412 if (InferInSCC.empty()) 1413 return; 1414 1415 // Check if our attributes ever need scanning/can be scanned. 1416 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { 1417 if (ID.SkipFunction(*F)) 1418 return false; 1419 1420 // Remove from further inference (invalidate) when visiting a function 1421 // that has no instructions to scan/has an unsuitable definition. 1422 return F->isDeclaration() || 1423 (ID.RequiresExactDefinition && !F->hasExactDefinition()); 1424 }); 1425 1426 // For each attribute still in InferInSCC that doesn't explicitly skip F, 1427 // set up the F instructions scan to verify assumptions of the attribute. 1428 SmallVector<InferenceDescriptor, 4> InferInThisFunc; 1429 llvm::copy_if( 1430 InferInSCC, std::back_inserter(InferInThisFunc), 1431 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); 1432 1433 if (InferInThisFunc.empty()) 1434 continue; 1435 1436 // Start instruction scan. 1437 for (Instruction &I : instructions(*F)) { 1438 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { 1439 if (!ID.InstrBreaksAttribute(I)) 1440 return false; 1441 // Remove attribute from further inference on any other functions 1442 // because attribute assumptions have just been violated. 1443 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { 1444 return D.AKind == ID.AKind; 1445 }); 1446 // Remove attribute from the rest of current instruction scan. 1447 return true; 1448 }); 1449 1450 if (InferInThisFunc.empty()) 1451 break; 1452 } 1453 } 1454 1455 if (InferInSCC.empty()) 1456 return; 1457 1458 for (Function *F : SCCNodes) 1459 // At this point InferInSCC contains only functions that were either: 1460 // - explicitly skipped from scan/inference, or 1461 // - verified to have no instructions that break attribute assumptions. 1462 // Hence we just go and force the attribute for all non-skipped functions. 1463 for (auto &ID : InferInSCC) { 1464 if (ID.SkipFunction(*F)) 1465 continue; 1466 Changed.insert(F); 1467 ID.SetAttribute(*F); 1468 } 1469 } 1470 1471 struct SCCNodesResult { 1472 SCCNodeSet SCCNodes; 1473 bool HasUnknownCall; 1474 }; 1475 1476 } // end anonymous namespace 1477 1478 /// Helper for non-Convergent inference predicate InstrBreaksAttribute. 1479 static bool InstrBreaksNonConvergent(Instruction &I, 1480 const SCCNodeSet &SCCNodes) { 1481 const CallBase *CB = dyn_cast<CallBase>(&I); 1482 // Breaks non-convergent assumption if CS is a convergent call to a function 1483 // not in the SCC. 1484 return CB && CB->isConvergent() && 1485 !SCCNodes.contains(CB->getCalledFunction()); 1486 } 1487 1488 /// Helper for NoUnwind inference predicate InstrBreaksAttribute. 1489 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { 1490 if (!I.mayThrow(/* IncludePhaseOneUnwind */ true)) 1491 return false; 1492 if (const auto *CI = dyn_cast<CallInst>(&I)) { 1493 if (Function *Callee = CI->getCalledFunction()) { 1494 // I is a may-throw call to a function inside our SCC. This doesn't 1495 // invalidate our current working assumption that the SCC is no-throw; we 1496 // just have to scan that other function. 1497 if (SCCNodes.contains(Callee)) 1498 return false; 1499 } 1500 } 1501 return true; 1502 } 1503 1504 /// Helper for NoFree inference predicate InstrBreaksAttribute. 1505 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { 1506 CallBase *CB = dyn_cast<CallBase>(&I); 1507 if (!CB) 1508 return false; 1509 1510 if (CB->hasFnAttr(Attribute::NoFree)) 1511 return false; 1512 1513 // Speculatively assume in SCC. 1514 if (Function *Callee = CB->getCalledFunction()) 1515 if (SCCNodes.contains(Callee)) 1516 return false; 1517 1518 return true; 1519 } 1520 1521 // Return true if this is an atomic which has an ordering stronger than 1522 // unordered. Note that this is different than the predicate we use in 1523 // Attributor. Here we chose to be conservative and consider monotonic 1524 // operations potentially synchronizing. We generally don't do much with 1525 // monotonic operations, so this is simply risk reduction. 1526 static bool isOrderedAtomic(Instruction *I) { 1527 if (!I->isAtomic()) 1528 return false; 1529 1530 if (auto *FI = dyn_cast<FenceInst>(I)) 1531 // All legal orderings for fence are stronger than monotonic. 1532 return FI->getSyncScopeID() != SyncScope::SingleThread; 1533 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) 1534 return true; 1535 else if (auto *SI = dyn_cast<StoreInst>(I)) 1536 return !SI->isUnordered(); 1537 else if (auto *LI = dyn_cast<LoadInst>(I)) 1538 return !LI->isUnordered(); 1539 else { 1540 llvm_unreachable("unknown atomic instruction?"); 1541 } 1542 } 1543 1544 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { 1545 // Volatile may synchronize 1546 if (I.isVolatile()) 1547 return true; 1548 1549 // An ordered atomic may synchronize. (See comment about on monotonic.) 1550 if (isOrderedAtomic(&I)) 1551 return true; 1552 1553 auto *CB = dyn_cast<CallBase>(&I); 1554 if (!CB) 1555 // Non call site cases covered by the two checks above 1556 return false; 1557 1558 if (CB->hasFnAttr(Attribute::NoSync)) 1559 return false; 1560 1561 // Non volatile memset/memcpy/memmoves are nosync 1562 // NOTE: Only intrinsics with volatile flags should be handled here. All 1563 // others should be marked in Intrinsics.td. 1564 if (auto *MI = dyn_cast<MemIntrinsic>(&I)) 1565 if (!MI->isVolatile()) 1566 return false; 1567 1568 // Speculatively assume in SCC. 1569 if (Function *Callee = CB->getCalledFunction()) 1570 if (SCCNodes.contains(Callee)) 1571 return false; 1572 1573 return true; 1574 } 1575 1576 /// Attempt to remove convergent function attribute when possible. 1577 /// 1578 /// Returns true if any changes to function attributes were made. 1579 static void inferConvergent(const SCCNodeSet &SCCNodes, 1580 SmallSet<Function *, 8> &Changed) { 1581 AttributeInferer AI; 1582 1583 // Request to remove the convergent attribute from all functions in the SCC 1584 // if every callsite within the SCC is not convergent (except for calls 1585 // to functions within the SCC). 1586 // Note: Removal of the attr from the callsites will happen in 1587 // InstCombineCalls separately. 1588 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1589 Attribute::Convergent, 1590 // Skip non-convergent functions. 1591 [](const Function &F) { return !F.isConvergent(); }, 1592 // Instructions that break non-convergent assumption. 1593 [SCCNodes](Instruction &I) { 1594 return InstrBreaksNonConvergent(I, SCCNodes); 1595 }, 1596 [](Function &F) { 1597 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() 1598 << "\n"); 1599 F.setNotConvergent(); 1600 }, 1601 /* RequiresExactDefinition= */ false}); 1602 // Perform all the requested attribute inference actions. 1603 AI.run(SCCNodes, Changed); 1604 } 1605 1606 /// Infer attributes from all functions in the SCC by scanning every 1607 /// instruction for compliance to the attribute assumptions. 1608 /// 1609 /// Returns true if any changes to function attributes were made. 1610 static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, 1611 SmallSet<Function *, 8> &Changed) { 1612 AttributeInferer AI; 1613 1614 if (!DisableNoUnwindInference) 1615 // Request to infer nounwind attribute for all the functions in the SCC if 1616 // every callsite within the SCC is not throwing (except for calls to 1617 // functions within the SCC). Note that nounwind attribute suffers from 1618 // derefinement - results may change depending on how functions are 1619 // optimized. Thus it can be inferred only from exact definitions. 1620 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1621 Attribute::NoUnwind, 1622 // Skip non-throwing functions. 1623 [](const Function &F) { return F.doesNotThrow(); }, 1624 // Instructions that break non-throwing assumption. 1625 [&SCCNodes](Instruction &I) { 1626 return InstrBreaksNonThrowing(I, SCCNodes); 1627 }, 1628 [](Function &F) { 1629 LLVM_DEBUG(dbgs() 1630 << "Adding nounwind attr to fn " << F.getName() << "\n"); 1631 F.setDoesNotThrow(); 1632 ++NumNoUnwind; 1633 }, 1634 /* RequiresExactDefinition= */ true}); 1635 1636 if (!DisableNoFreeInference) 1637 // Request to infer nofree attribute for all the functions in the SCC if 1638 // every callsite within the SCC does not directly or indirectly free 1639 // memory (except for calls to functions within the SCC). Note that nofree 1640 // attribute suffers from derefinement - results may change depending on 1641 // how functions are optimized. Thus it can be inferred only from exact 1642 // definitions. 1643 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1644 Attribute::NoFree, 1645 // Skip functions known not to free memory. 1646 [](const Function &F) { return F.doesNotFreeMemory(); }, 1647 // Instructions that break non-deallocating assumption. 1648 [&SCCNodes](Instruction &I) { 1649 return InstrBreaksNoFree(I, SCCNodes); 1650 }, 1651 [](Function &F) { 1652 LLVM_DEBUG(dbgs() 1653 << "Adding nofree attr to fn " << F.getName() << "\n"); 1654 F.setDoesNotFreeMemory(); 1655 ++NumNoFree; 1656 }, 1657 /* RequiresExactDefinition= */ true}); 1658 1659 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1660 Attribute::NoSync, 1661 // Skip already marked functions. 1662 [](const Function &F) { return F.hasNoSync(); }, 1663 // Instructions that break nosync assumption. 1664 [&SCCNodes](Instruction &I) { 1665 return InstrBreaksNoSync(I, SCCNodes); 1666 }, 1667 [](Function &F) { 1668 LLVM_DEBUG(dbgs() 1669 << "Adding nosync attr to fn " << F.getName() << "\n"); 1670 F.setNoSync(); 1671 ++NumNoSync; 1672 }, 1673 /* RequiresExactDefinition= */ true}); 1674 1675 // Perform all the requested attribute inference actions. 1676 AI.run(SCCNodes, Changed); 1677 } 1678 1679 static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, 1680 SmallSet<Function *, 8> &Changed) { 1681 // Try and identify functions that do not recurse. 1682 1683 // If the SCC contains multiple nodes we know for sure there is recursion. 1684 if (SCCNodes.size() != 1) 1685 return; 1686 1687 Function *F = *SCCNodes.begin(); 1688 if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 1689 return; 1690 1691 // If all of the calls in F are identifiable and are to norecurse functions, F 1692 // is norecurse. This check also detects self-recursion as F is not currently 1693 // marked norecurse, so any called from F to F will not be marked norecurse. 1694 for (auto &BB : *F) 1695 for (auto &I : BB.instructionsWithoutDebug()) 1696 if (auto *CB = dyn_cast<CallBase>(&I)) { 1697 Function *Callee = CB->getCalledFunction(); 1698 if (!Callee || Callee == F || 1699 (!Callee->doesNotRecurse() && 1700 !(Callee->isDeclaration() && 1701 Callee->hasFnAttribute(Attribute::NoCallback)))) 1702 // Function calls a potentially recursive function. 1703 return; 1704 } 1705 1706 // Every call was to a non-recursive function other than this function, and 1707 // we have no indirect recursion as the SCC size is one. This function cannot 1708 // recurse. 1709 F->setDoesNotRecurse(); 1710 ++NumNoRecurse; 1711 Changed.insert(F); 1712 } 1713 1714 static bool instructionDoesNotReturn(Instruction &I) { 1715 if (auto *CB = dyn_cast<CallBase>(&I)) 1716 return CB->hasFnAttr(Attribute::NoReturn); 1717 return false; 1718 } 1719 1720 // A basic block can only return if it terminates with a ReturnInst and does not 1721 // contain calls to noreturn functions. 1722 static bool basicBlockCanReturn(BasicBlock &BB) { 1723 if (!isa<ReturnInst>(BB.getTerminator())) 1724 return false; 1725 return none_of(BB, instructionDoesNotReturn); 1726 } 1727 1728 // FIXME: this doesn't handle recursion. 1729 static bool canReturn(Function &F) { 1730 SmallVector<BasicBlock *, 16> Worklist; 1731 SmallPtrSet<BasicBlock *, 16> Visited; 1732 1733 Visited.insert(&F.front()); 1734 Worklist.push_back(&F.front()); 1735 1736 do { 1737 BasicBlock *BB = Worklist.pop_back_val(); 1738 if (basicBlockCanReturn(*BB)) 1739 return true; 1740 for (BasicBlock *Succ : successors(BB)) 1741 if (Visited.insert(Succ).second) 1742 Worklist.push_back(Succ); 1743 } while (!Worklist.empty()); 1744 1745 return false; 1746 } 1747 1748 // Set the noreturn function attribute if possible. 1749 static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, 1750 SmallSet<Function *, 8> &Changed) { 1751 for (Function *F : SCCNodes) { 1752 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 1753 F->doesNotReturn()) 1754 continue; 1755 1756 if (!canReturn(*F)) { 1757 F->setDoesNotReturn(); 1758 Changed.insert(F); 1759 } 1760 } 1761 } 1762 1763 static bool functionWillReturn(const Function &F) { 1764 // We can infer and propagate function attributes only when we know that the 1765 // definition we'll get at link time is *exactly* the definition we see now. 1766 // For more details, see GlobalValue::mayBeDerefined. 1767 if (!F.hasExactDefinition()) 1768 return false; 1769 1770 // Must-progress function without side-effects must return. 1771 if (F.mustProgress() && F.onlyReadsMemory()) 1772 return true; 1773 1774 // Can only analyze functions with a definition. 1775 if (F.isDeclaration()) 1776 return false; 1777 1778 // Functions with loops require more sophisticated analysis, as the loop 1779 // may be infinite. For now, don't try to handle them. 1780 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; 1781 FindFunctionBackedges(F, Backedges); 1782 if (!Backedges.empty()) 1783 return false; 1784 1785 // If there are no loops, then the function is willreturn if all calls in 1786 // it are willreturn. 1787 return all_of(instructions(F), [](const Instruction &I) { 1788 return I.willReturn(); 1789 }); 1790 } 1791 1792 // Set the willreturn function attribute if possible. 1793 static void addWillReturn(const SCCNodeSet &SCCNodes, 1794 SmallSet<Function *, 8> &Changed) { 1795 for (Function *F : SCCNodes) { 1796 if (!F || F->willReturn() || !functionWillReturn(*F)) 1797 continue; 1798 1799 F->setWillReturn(); 1800 NumWillReturn++; 1801 Changed.insert(F); 1802 } 1803 } 1804 1805 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { 1806 SCCNodesResult Res; 1807 Res.HasUnknownCall = false; 1808 for (Function *F : Functions) { 1809 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) || 1810 F->isPresplitCoroutine()) { 1811 // Treat any function we're trying not to optimize as if it were an 1812 // indirect call and omit it from the node set used below. 1813 Res.HasUnknownCall = true; 1814 continue; 1815 } 1816 // Track whether any functions in this SCC have an unknown call edge. 1817 // Note: if this is ever a performance hit, we can common it with 1818 // subsequent routines which also do scans over the instructions of the 1819 // function. 1820 if (!Res.HasUnknownCall) { 1821 for (Instruction &I : instructions(*F)) { 1822 if (auto *CB = dyn_cast<CallBase>(&I)) { 1823 if (!CB->getCalledFunction()) { 1824 Res.HasUnknownCall = true; 1825 break; 1826 } 1827 } 1828 } 1829 } 1830 Res.SCCNodes.insert(F); 1831 } 1832 return Res; 1833 } 1834 1835 template <typename AARGetterT> 1836 static SmallSet<Function *, 8> 1837 deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, 1838 bool ArgAttrsOnly) { 1839 SCCNodesResult Nodes = createSCCNodeSet(Functions); 1840 1841 // Bail if the SCC only contains optnone functions. 1842 if (Nodes.SCCNodes.empty()) 1843 return {}; 1844 1845 SmallSet<Function *, 8> Changed; 1846 if (ArgAttrsOnly) { 1847 addArgumentAttrs(Nodes.SCCNodes, Changed); 1848 return Changed; 1849 } 1850 1851 addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); 1852 addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); 1853 addArgumentAttrs(Nodes.SCCNodes, Changed); 1854 inferConvergent(Nodes.SCCNodes, Changed); 1855 addNoReturnAttrs(Nodes.SCCNodes, Changed); 1856 addWillReturn(Nodes.SCCNodes, Changed); 1857 addNoUndefAttrs(Nodes.SCCNodes, Changed); 1858 1859 // If we have no external nodes participating in the SCC, we can deduce some 1860 // more precise attributes as well. 1861 if (!Nodes.HasUnknownCall) { 1862 addNoAliasAttrs(Nodes.SCCNodes, Changed); 1863 addNonNullAttrs(Nodes.SCCNodes, Changed); 1864 inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed); 1865 addNoRecurseAttrs(Nodes.SCCNodes, Changed); 1866 } 1867 1868 // Finally, infer the maximal set of attributes from the ones we've inferred 1869 // above. This is handling the cases where one attribute on a signature 1870 // implies another, but for implementation reasons the inference rule for 1871 // the later is missing (or simply less sophisticated). 1872 for (Function *F : Nodes.SCCNodes) 1873 if (F) 1874 if (inferAttributesFromOthers(*F)) 1875 Changed.insert(F); 1876 1877 return Changed; 1878 } 1879 1880 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 1881 CGSCCAnalysisManager &AM, 1882 LazyCallGraph &CG, 1883 CGSCCUpdateResult &) { 1884 // Skip non-recursive functions if requested. 1885 // Only infer argument attributes for non-recursive functions, because 1886 // it can affect optimization behavior in conjunction with noalias. 1887 bool ArgAttrsOnly = false; 1888 if (C.size() == 1 && SkipNonRecursive) { 1889 LazyCallGraph::Node &N = *C.begin(); 1890 if (!N->lookup(N)) 1891 ArgAttrsOnly = true; 1892 } 1893 1894 FunctionAnalysisManager &FAM = 1895 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 1896 1897 // We pass a lambda into functions to wire them up to the analysis manager 1898 // for getting function analyses. 1899 auto AARGetter = [&](Function &F) -> AAResults & { 1900 return FAM.getResult<AAManager>(F); 1901 }; 1902 1903 SmallVector<Function *, 8> Functions; 1904 for (LazyCallGraph::Node &N : C) { 1905 Functions.push_back(&N.getFunction()); 1906 } 1907 1908 auto ChangedFunctions = 1909 deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly); 1910 if (ChangedFunctions.empty()) 1911 return PreservedAnalyses::all(); 1912 1913 // Invalidate analyses for modified functions so that we don't have to 1914 // invalidate all analyses for all functions in this SCC. 1915 PreservedAnalyses FuncPA; 1916 // We haven't changed the CFG for modified functions. 1917 FuncPA.preserveSet<CFGAnalyses>(); 1918 for (Function *Changed : ChangedFunctions) { 1919 FAM.invalidate(*Changed, FuncPA); 1920 // Also invalidate any direct callers of changed functions since analyses 1921 // may care about attributes of direct callees. For example, MemorySSA cares 1922 // about whether or not a call's callee modifies memory and queries that 1923 // through function attributes. 1924 for (auto *U : Changed->users()) { 1925 if (auto *Call = dyn_cast<CallBase>(U)) { 1926 if (Call->getCalledFunction() == Changed) 1927 FAM.invalidate(*Call->getFunction(), FuncPA); 1928 } 1929 } 1930 } 1931 1932 PreservedAnalyses PA; 1933 // We have not added or removed functions. 1934 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 1935 // We already invalidated all relevant function analyses above. 1936 PA.preserveSet<AllAnalysesOn<Function>>(); 1937 return PA; 1938 } 1939 1940 void PostOrderFunctionAttrsPass::printPipeline( 1941 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1942 static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline( 1943 OS, MapClassName2PassName); 1944 if (SkipNonRecursive) 1945 OS << "<skip-non-recursive-function-attrs>"; 1946 } 1947 1948 template <typename AARGetterT> 1949 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { 1950 SmallVector<Function *, 8> Functions; 1951 for (CallGraphNode *I : SCC) { 1952 Functions.push_back(I->getFunction()); 1953 } 1954 1955 return !deriveAttrsInPostOrder(Functions, AARGetter).empty(); 1956 } 1957 1958 static bool addNoRecurseAttrsTopDown(Function &F) { 1959 // We check the preconditions for the function prior to calling this to avoid 1960 // the cost of building up a reversible post-order list. We assert them here 1961 // to make sure none of the invariants this relies on were violated. 1962 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 1963 assert(!F.doesNotRecurse() && 1964 "This function has already been deduced as norecurs!"); 1965 assert(F.hasInternalLinkage() && 1966 "Can only do top-down deduction for internal linkage functions!"); 1967 1968 // If F is internal and all of its uses are calls from a non-recursive 1969 // functions, then none of its calls could in fact recurse without going 1970 // through a function marked norecurse, and so we can mark this function too 1971 // as norecurse. Note that the uses must actually be calls -- otherwise 1972 // a pointer to this function could be returned from a norecurse function but 1973 // this function could be recursively (indirectly) called. Note that this 1974 // also detects if F is directly recursive as F is not yet marked as 1975 // a norecurse function. 1976 for (auto &U : F.uses()) { 1977 auto *I = dyn_cast<Instruction>(U.getUser()); 1978 if (!I) 1979 return false; 1980 CallBase *CB = dyn_cast<CallBase>(I); 1981 if (!CB || !CB->isCallee(&U) || 1982 !CB->getParent()->getParent()->doesNotRecurse()) 1983 return false; 1984 } 1985 F.setDoesNotRecurse(); 1986 ++NumNoRecurse; 1987 return true; 1988 } 1989 1990 static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) { 1991 // We only have a post-order SCC traversal (because SCCs are inherently 1992 // discovered in post-order), so we accumulate them in a vector and then walk 1993 // it in reverse. This is simpler than using the RPO iterator infrastructure 1994 // because we need to combine SCC detection and the PO walk of the call 1995 // graph. We can also cheat egregiously because we're primarily interested in 1996 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 1997 // with multiple functions in them will clearly be recursive. 1998 1999 SmallVector<Function *, 16> Worklist; 2000 CG.buildRefSCCs(); 2001 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { 2002 for (LazyCallGraph::SCC &SCC : RC) { 2003 if (SCC.size() != 1) 2004 continue; 2005 Function &F = SCC.begin()->getFunction(); 2006 if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) 2007 Worklist.push_back(&F); 2008 } 2009 } 2010 bool Changed = false; 2011 for (auto *F : llvm::reverse(Worklist)) 2012 Changed |= addNoRecurseAttrsTopDown(*F); 2013 2014 return Changed; 2015 } 2016 2017 PreservedAnalyses 2018 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { 2019 auto &CG = AM.getResult<LazyCallGraphAnalysis>(M); 2020 2021 if (!deduceFunctionAttributeInRPO(M, CG)) 2022 return PreservedAnalyses::all(); 2023 2024 PreservedAnalyses PA; 2025 PA.preserve<LazyCallGraphAnalysis>(); 2026 return PA; 2027 } 2028