1 //===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file implements interprocedural passes which walk the 11 /// call-graph deducing and/or propagating function attributes. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/IPO/FunctionAttrs.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/SCCIterator.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallPtrSet.h" 22 #include "llvm/ADT/SmallSet.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/Analysis/AssumptionCache.h" 26 #include "llvm/Analysis/BasicAliasAnalysis.h" 27 #include "llvm/Analysis/CFG.h" 28 #include "llvm/Analysis/CGSCCPassManager.h" 29 #include "llvm/Analysis/CallGraph.h" 30 #include "llvm/Analysis/CallGraphSCCPass.h" 31 #include "llvm/Analysis/CaptureTracking.h" 32 #include "llvm/Analysis/LazyCallGraph.h" 33 #include "llvm/Analysis/MemoryLocation.h" 34 #include "llvm/Analysis/ValueTracking.h" 35 #include "llvm/IR/Argument.h" 36 #include "llvm/IR/Attributes.h" 37 #include "llvm/IR/BasicBlock.h" 38 #include "llvm/IR/Constant.h" 39 #include "llvm/IR/Constants.h" 40 #include "llvm/IR/Function.h" 41 #include "llvm/IR/InstIterator.h" 42 #include "llvm/IR/InstrTypes.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/Metadata.h" 47 #include "llvm/IR/ModuleSummaryIndex.h" 48 #include "llvm/IR/PassManager.h" 49 #include "llvm/IR/Type.h" 50 #include "llvm/IR/Use.h" 51 #include "llvm/IR/User.h" 52 #include "llvm/IR/Value.h" 53 #include "llvm/Support/Casting.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Compiler.h" 56 #include "llvm/Support/Debug.h" 57 #include "llvm/Support/ErrorHandling.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Transforms/IPO.h" 60 #include "llvm/Transforms/Utils/Local.h" 61 #include <cassert> 62 #include <iterator> 63 #include <map> 64 #include <optional> 65 #include <vector> 66 67 using namespace llvm; 68 69 #define DEBUG_TYPE "function-attrs" 70 71 STATISTIC(NumMemoryAttr, "Number of functions with improved memory attribute"); 72 STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); 73 STATISTIC(NumReturned, "Number of arguments marked returned"); 74 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone"); 75 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly"); 76 STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly"); 77 STATISTIC(NumNoAlias, "Number of function returns marked noalias"); 78 STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull"); 79 STATISTIC(NumNoUndefReturn, "Number of function returns marked noundef"); 80 STATISTIC(NumNoRecurse, "Number of functions marked as norecurse"); 81 STATISTIC(NumNoUnwind, "Number of functions marked as nounwind"); 82 STATISTIC(NumNoFree, "Number of functions marked as nofree"); 83 STATISTIC(NumWillReturn, "Number of functions marked as willreturn"); 84 STATISTIC(NumNoSync, "Number of functions marked as nosync"); 85 86 STATISTIC(NumThinLinkNoRecurse, 87 "Number of functions marked as norecurse during thinlink"); 88 STATISTIC(NumThinLinkNoUnwind, 89 "Number of functions marked as nounwind during thinlink"); 90 91 static cl::opt<bool> EnableNonnullArgPropagation( 92 "enable-nonnull-arg-prop", cl::init(true), cl::Hidden, 93 cl::desc("Try to propagate nonnull argument attributes from callsites to " 94 "caller functions.")); 95 96 static cl::opt<bool> DisableNoUnwindInference( 97 "disable-nounwind-inference", cl::Hidden, 98 cl::desc("Stop inferring nounwind attribute during function-attrs pass")); 99 100 static cl::opt<bool> DisableNoFreeInference( 101 "disable-nofree-inference", cl::Hidden, 102 cl::desc("Stop inferring nofree attribute during function-attrs pass")); 103 104 static cl::opt<bool> DisableThinLTOPropagation( 105 "disable-thinlto-funcattrs", cl::init(true), cl::Hidden, 106 cl::desc("Don't propagate function-attrs in thinLTO")); 107 108 namespace { 109 110 using SCCNodeSet = SmallSetVector<Function *, 8>; 111 112 } // end anonymous namespace 113 114 static void addLocAccess(MemoryEffects &ME, const MemoryLocation &Loc, 115 ModRefInfo MR, AAResults &AAR) { 116 // Ignore accesses to known-invariant or local memory. 117 MR &= AAR.getModRefInfoMask(Loc, /*IgnoreLocal=*/true); 118 if (isNoModRef(MR)) 119 return; 120 121 const Value *UO = getUnderlyingObject(Loc.Ptr); 122 assert(!isa<AllocaInst>(UO) && 123 "Should have been handled by getModRefInfoMask()"); 124 if (isa<Argument>(UO)) { 125 ME |= MemoryEffects::argMemOnly(MR); 126 return; 127 } 128 129 // If it's not an identified object, it might be an argument. 130 if (!isIdentifiedObject(UO)) 131 ME |= MemoryEffects::argMemOnly(MR); 132 ME |= MemoryEffects(IRMemLocation::Other, MR); 133 } 134 135 static void addArgLocs(MemoryEffects &ME, const CallBase *Call, 136 ModRefInfo ArgMR, AAResults &AAR) { 137 for (const Value *Arg : Call->args()) { 138 if (!Arg->getType()->isPtrOrPtrVectorTy()) 139 continue; 140 141 addLocAccess(ME, 142 MemoryLocation::getBeforeOrAfter(Arg, Call->getAAMetadata()), 143 ArgMR, AAR); 144 } 145 } 146 147 /// Returns the memory access attribute for function F using AAR for AA results, 148 /// where SCCNodes is the current SCC. 149 /// 150 /// If ThisBody is true, this function may examine the function body and will 151 /// return a result pertaining to this copy of the function. If it is false, the 152 /// result will be based only on AA results for the function declaration; it 153 /// will be assumed that some other (perhaps less optimized) version of the 154 /// function may be selected at link time. 155 /// 156 /// The return value is split into two parts: Memory effects that always apply, 157 /// and additional memory effects that apply if any of the functions in the SCC 158 /// can access argmem. 159 static std::pair<MemoryEffects, MemoryEffects> 160 checkFunctionMemoryAccess(Function &F, bool ThisBody, AAResults &AAR, 161 const SCCNodeSet &SCCNodes) { 162 MemoryEffects OrigME = AAR.getMemoryEffects(&F); 163 if (OrigME.doesNotAccessMemory()) 164 // Already perfect! 165 return {OrigME, MemoryEffects::none()}; 166 167 if (!ThisBody) 168 return {OrigME, MemoryEffects::none()}; 169 170 MemoryEffects ME = MemoryEffects::none(); 171 // Additional locations accessed if the SCC accesses argmem. 172 MemoryEffects RecursiveArgME = MemoryEffects::none(); 173 174 // Inalloca and preallocated arguments are always clobbered by the call. 175 if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) || 176 F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) 177 ME |= MemoryEffects::argMemOnly(ModRefInfo::ModRef); 178 179 // Scan the function body for instructions that may read or write memory. 180 for (Instruction &I : instructions(F)) { 181 // Some instructions can be ignored even if they read or write memory. 182 // Detect these now, skipping to the next instruction if one is found. 183 if (auto *Call = dyn_cast<CallBase>(&I)) { 184 // We can optimistically ignore calls to functions in the same SCC, with 185 // two caveats: 186 // * Calls with operand bundles may have additional effects. 187 // * Argument memory accesses may imply additional effects depending on 188 // what the argument location is. 189 if (!Call->hasOperandBundles() && Call->getCalledFunction() && 190 SCCNodes.count(Call->getCalledFunction())) { 191 // Keep track of which additional locations are accessed if the SCC 192 // turns out to access argmem. 193 addArgLocs(RecursiveArgME, Call, ModRefInfo::ModRef, AAR); 194 continue; 195 } 196 197 MemoryEffects CallME = AAR.getMemoryEffects(Call); 198 199 // If the call doesn't access memory, we're done. 200 if (CallME.doesNotAccessMemory()) 201 continue; 202 203 // A pseudo probe call shouldn't change any function attribute since it 204 // doesn't translate to a real instruction. It comes with a memory access 205 // tag to prevent itself being removed by optimizations and not block 206 // other instructions being optimized. 207 if (isa<PseudoProbeInst>(I)) 208 continue; 209 210 ME |= CallME.getWithoutLoc(IRMemLocation::ArgMem); 211 212 // If the call accesses captured memory (currently part of "other") and 213 // an argument is captured (currently not tracked), then it may also 214 // access argument memory. 215 ModRefInfo OtherMR = CallME.getModRef(IRMemLocation::Other); 216 ME |= MemoryEffects::argMemOnly(OtherMR); 217 218 // Check whether all pointer arguments point to local memory, and 219 // ignore calls that only access local memory. 220 ModRefInfo ArgMR = CallME.getModRef(IRMemLocation::ArgMem); 221 if (ArgMR != ModRefInfo::NoModRef) 222 addArgLocs(ME, Call, ArgMR, AAR); 223 continue; 224 } 225 226 ModRefInfo MR = ModRefInfo::NoModRef; 227 if (I.mayWriteToMemory()) 228 MR |= ModRefInfo::Mod; 229 if (I.mayReadFromMemory()) 230 MR |= ModRefInfo::Ref; 231 if (MR == ModRefInfo::NoModRef) 232 continue; 233 234 std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(&I); 235 if (!Loc) { 236 // If no location is known, conservatively assume anything can be 237 // accessed. 238 ME |= MemoryEffects(MR); 239 continue; 240 } 241 242 // Volatile operations may access inaccessible memory. 243 if (I.isVolatile()) 244 ME |= MemoryEffects::inaccessibleMemOnly(MR); 245 246 addLocAccess(ME, *Loc, MR, AAR); 247 } 248 249 return {OrigME & ME, RecursiveArgME}; 250 } 251 252 MemoryEffects llvm::computeFunctionBodyMemoryAccess(Function &F, 253 AAResults &AAR) { 254 return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {}).first; 255 } 256 257 /// Deduce readonly/readnone/writeonly attributes for the SCC. 258 template <typename AARGetterT> 259 static void addMemoryAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter, 260 SmallSet<Function *, 8> &Changed) { 261 MemoryEffects ME = MemoryEffects::none(); 262 MemoryEffects RecursiveArgME = MemoryEffects::none(); 263 for (Function *F : SCCNodes) { 264 // Call the callable parameter to look up AA results for this function. 265 AAResults &AAR = AARGetter(*F); 266 // Non-exact function definitions may not be selected at link time, and an 267 // alternative version that writes to memory may be selected. See the 268 // comment on GlobalValue::isDefinitionExact for more details. 269 auto [FnME, FnRecursiveArgME] = 270 checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes); 271 ME |= FnME; 272 RecursiveArgME |= FnRecursiveArgME; 273 // Reached bottom of the lattice, we will not be able to improve the result. 274 if (ME == MemoryEffects::unknown()) 275 return; 276 } 277 278 // If the SCC accesses argmem, add recursive accesses resulting from that. 279 ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem); 280 if (ArgMR != ModRefInfo::NoModRef) 281 ME |= RecursiveArgME & MemoryEffects(ArgMR); 282 283 for (Function *F : SCCNodes) { 284 MemoryEffects OldME = F->getMemoryEffects(); 285 MemoryEffects NewME = ME & OldME; 286 if (NewME != OldME) { 287 ++NumMemoryAttr; 288 F->setMemoryEffects(NewME); 289 // Remove conflicting writable attributes. 290 if (!isModSet(NewME.getModRef(IRMemLocation::ArgMem))) 291 for (Argument &A : F->args()) 292 A.removeAttr(Attribute::Writable); 293 Changed.insert(F); 294 } 295 } 296 } 297 298 // Compute definitive function attributes for a function taking into account 299 // prevailing definitions and linkage types 300 static FunctionSummary *calculatePrevailingSummary( 301 ValueInfo VI, 302 DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary, 303 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 304 IsPrevailing) { 305 306 if (CachedPrevailingSummary.count(VI)) 307 return CachedPrevailingSummary[VI]; 308 309 /// At this point, prevailing symbols have been resolved. The following leads 310 /// to returning a conservative result: 311 /// - Multiple instances with local linkage. Normally local linkage would be 312 /// unique per module 313 /// as the GUID includes the module path. We could have a guid alias if 314 /// there wasn't any distinguishing path when each file was compiled, but 315 /// that should be rare so we'll punt on those. 316 317 /// These next 2 cases should not happen and will assert: 318 /// - Multiple instances with external linkage. This should be caught in 319 /// symbol resolution 320 /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our 321 /// knowledge meaning we have to go conservative. 322 323 /// Otherwise, we calculate attributes for a function as: 324 /// 1. If we have a local linkage, take its attributes. If there's somehow 325 /// multiple, bail and go conservative. 326 /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is 327 /// prevailing, take its attributes. 328 /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic 329 /// differences. However, if the prevailing copy is known it will be used 330 /// so take its attributes. If the prevailing copy is in a native file 331 /// all IR copies will be dead and propagation will go conservative. 332 /// 4. AvailableExternally summaries without a prevailing copy are known to 333 /// occur in a couple of circumstances: 334 /// a. An internal function gets imported due to its caller getting 335 /// imported, it becomes AvailableExternally but no prevailing 336 /// definition exists. Because it has to get imported along with its 337 /// caller the attributes will be captured by propagating on its 338 /// caller. 339 /// b. C++11 [temp.explicit]p10 can generate AvailableExternally 340 /// definitions of explicitly instanced template declarations 341 /// for inlining which are ultimately dropped from the TU. Since this 342 /// is localized to the TU the attributes will have already made it to 343 /// the callers. 344 /// These are edge cases and already captured by their callers so we 345 /// ignore these for now. If they become relevant to optimize in the 346 /// future this can be revisited. 347 /// 5. Otherwise, go conservative. 348 349 CachedPrevailingSummary[VI] = nullptr; 350 FunctionSummary *Local = nullptr; 351 FunctionSummary *Prevailing = nullptr; 352 353 for (const auto &GVS : VI.getSummaryList()) { 354 if (!GVS->isLive()) 355 continue; 356 357 FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject()); 358 // Virtual and Unknown (e.g. indirect) calls require going conservative 359 if (!FS || FS->fflags().HasUnknownCall) 360 return nullptr; 361 362 const auto &Linkage = GVS->linkage(); 363 if (GlobalValue::isLocalLinkage(Linkage)) { 364 if (Local) { 365 LLVM_DEBUG( 366 dbgs() 367 << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on " 368 "function " 369 << VI.name() << " from " << FS->modulePath() << ". Previous module " 370 << Local->modulePath() << "\n"); 371 return nullptr; 372 } 373 Local = FS; 374 } else if (GlobalValue::isExternalLinkage(Linkage)) { 375 assert(IsPrevailing(VI.getGUID(), GVS.get())); 376 Prevailing = FS; 377 break; 378 } else if (GlobalValue::isWeakODRLinkage(Linkage) || 379 GlobalValue::isLinkOnceODRLinkage(Linkage) || 380 GlobalValue::isWeakAnyLinkage(Linkage) || 381 GlobalValue::isLinkOnceAnyLinkage(Linkage)) { 382 if (IsPrevailing(VI.getGUID(), GVS.get())) { 383 Prevailing = FS; 384 break; 385 } 386 } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) { 387 // TODO: Handle these cases if they become meaningful 388 continue; 389 } 390 } 391 392 if (Local) { 393 assert(!Prevailing); 394 CachedPrevailingSummary[VI] = Local; 395 } else if (Prevailing) { 396 assert(!Local); 397 CachedPrevailingSummary[VI] = Prevailing; 398 } 399 400 return CachedPrevailingSummary[VI]; 401 } 402 403 bool llvm::thinLTOPropagateFunctionAttrs( 404 ModuleSummaryIndex &Index, 405 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> 406 IsPrevailing) { 407 // TODO: implement addNoAliasAttrs once 408 // there's more information about the return type in the summary 409 if (DisableThinLTOPropagation) 410 return false; 411 412 DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary; 413 bool Changed = false; 414 415 auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) { 416 // Assume we can propagate unless we discover otherwise 417 FunctionSummary::FFlags InferredFlags; 418 InferredFlags.NoRecurse = (SCCNodes.size() == 1); 419 InferredFlags.NoUnwind = true; 420 421 for (auto &V : SCCNodes) { 422 FunctionSummary *CallerSummary = 423 calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing); 424 425 // Function summaries can fail to contain information such as declarations 426 if (!CallerSummary) 427 return; 428 429 if (CallerSummary->fflags().MayThrow) 430 InferredFlags.NoUnwind = false; 431 432 for (const auto &Callee : CallerSummary->calls()) { 433 FunctionSummary *CalleeSummary = calculatePrevailingSummary( 434 Callee.first, CachedPrevailingSummary, IsPrevailing); 435 436 if (!CalleeSummary) 437 return; 438 439 if (!CalleeSummary->fflags().NoRecurse) 440 InferredFlags.NoRecurse = false; 441 442 if (!CalleeSummary->fflags().NoUnwind) 443 InferredFlags.NoUnwind = false; 444 445 if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse) 446 break; 447 } 448 } 449 450 if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) { 451 Changed = true; 452 for (auto &V : SCCNodes) { 453 if (InferredFlags.NoRecurse) { 454 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to " 455 << V.name() << "\n"); 456 ++NumThinLinkNoRecurse; 457 } 458 459 if (InferredFlags.NoUnwind) { 460 LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to " 461 << V.name() << "\n"); 462 ++NumThinLinkNoUnwind; 463 } 464 465 for (const auto &S : V.getSummaryList()) { 466 if (auto *FS = dyn_cast<FunctionSummary>(S.get())) { 467 if (InferredFlags.NoRecurse) 468 FS->setNoRecurse(); 469 470 if (InferredFlags.NoUnwind) 471 FS->setNoUnwind(); 472 } 473 } 474 } 475 } 476 }; 477 478 // Call propagation functions on each SCC in the Index 479 for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd(); 480 ++I) { 481 std::vector<ValueInfo> Nodes(*I); 482 PropagateAttributes(Nodes); 483 } 484 return Changed; 485 } 486 487 namespace { 488 489 /// For a given pointer Argument, this retains a list of Arguments of functions 490 /// in the same SCC that the pointer data flows into. We use this to build an 491 /// SCC of the arguments. 492 struct ArgumentGraphNode { 493 Argument *Definition; 494 SmallVector<ArgumentGraphNode *, 4> Uses; 495 }; 496 497 class ArgumentGraph { 498 // We store pointers to ArgumentGraphNode objects, so it's important that 499 // that they not move around upon insert. 500 using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>; 501 502 ArgumentMapTy ArgumentMap; 503 504 // There is no root node for the argument graph, in fact: 505 // void f(int *x, int *y) { if (...) f(x, y); } 506 // is an example where the graph is disconnected. The SCCIterator requires a 507 // single entry point, so we maintain a fake ("synthetic") root node that 508 // uses every node. Because the graph is directed and nothing points into 509 // the root, it will not participate in any SCCs (except for its own). 510 ArgumentGraphNode SyntheticRoot; 511 512 public: 513 ArgumentGraph() { SyntheticRoot.Definition = nullptr; } 514 515 using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator; 516 517 iterator begin() { return SyntheticRoot.Uses.begin(); } 518 iterator end() { return SyntheticRoot.Uses.end(); } 519 ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; } 520 521 ArgumentGraphNode *operator[](Argument *A) { 522 ArgumentGraphNode &Node = ArgumentMap[A]; 523 Node.Definition = A; 524 SyntheticRoot.Uses.push_back(&Node); 525 return &Node; 526 } 527 }; 528 529 /// This tracker checks whether callees are in the SCC, and if so it does not 530 /// consider that a capture, instead adding it to the "Uses" list and 531 /// continuing with the analysis. 532 struct ArgumentUsesTracker : public CaptureTracker { 533 ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} 534 535 void tooManyUses() override { Captured = true; } 536 537 bool captured(const Use *U) override { 538 CallBase *CB = dyn_cast<CallBase>(U->getUser()); 539 if (!CB) { 540 Captured = true; 541 return true; 542 } 543 544 Function *F = CB->getCalledFunction(); 545 if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) { 546 Captured = true; 547 return true; 548 } 549 550 assert(!CB->isCallee(U) && "callee operand reported captured?"); 551 const unsigned UseIndex = CB->getDataOperandNo(U); 552 if (UseIndex >= CB->arg_size()) { 553 // Data operand, but not a argument operand -- must be a bundle operand 554 assert(CB->hasOperandBundles() && "Must be!"); 555 556 // CaptureTracking told us that we're being captured by an operand bundle 557 // use. In this case it does not matter if the callee is within our SCC 558 // or not -- we've been captured in some unknown way, and we have to be 559 // conservative. 560 Captured = true; 561 return true; 562 } 563 564 if (UseIndex >= F->arg_size()) { 565 assert(F->isVarArg() && "More params than args in non-varargs call"); 566 Captured = true; 567 return true; 568 } 569 570 Uses.push_back(&*std::next(F->arg_begin(), UseIndex)); 571 return false; 572 } 573 574 // True only if certainly captured (used outside our SCC). 575 bool Captured = false; 576 577 // Uses within our SCC. 578 SmallVector<Argument *, 4> Uses; 579 580 const SCCNodeSet &SCCNodes; 581 }; 582 583 } // end anonymous namespace 584 585 namespace llvm { 586 587 template <> struct GraphTraits<ArgumentGraphNode *> { 588 using NodeRef = ArgumentGraphNode *; 589 using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator; 590 591 static NodeRef getEntryNode(NodeRef A) { return A; } 592 static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } 593 static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } 594 }; 595 596 template <> 597 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> { 598 static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } 599 600 static ChildIteratorType nodes_begin(ArgumentGraph *AG) { 601 return AG->begin(); 602 } 603 604 static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } 605 }; 606 607 } // end namespace llvm 608 609 /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. 610 static Attribute::AttrKind 611 determinePointerAccessAttrs(Argument *A, 612 const SmallPtrSet<Argument *, 8> &SCCNodes) { 613 SmallVector<Use *, 32> Worklist; 614 SmallPtrSet<Use *, 32> Visited; 615 616 // inalloca arguments are always clobbered by the call. 617 if (A->hasInAllocaAttr() || A->hasPreallocatedAttr()) 618 return Attribute::None; 619 620 bool IsRead = false; 621 bool IsWrite = false; 622 623 for (Use &U : A->uses()) { 624 Visited.insert(&U); 625 Worklist.push_back(&U); 626 } 627 628 while (!Worklist.empty()) { 629 if (IsWrite && IsRead) 630 // No point in searching further.. 631 return Attribute::None; 632 633 Use *U = Worklist.pop_back_val(); 634 Instruction *I = cast<Instruction>(U->getUser()); 635 636 switch (I->getOpcode()) { 637 case Instruction::BitCast: 638 case Instruction::GetElementPtr: 639 case Instruction::PHI: 640 case Instruction::Select: 641 case Instruction::AddrSpaceCast: 642 // The original value is not read/written via this if the new value isn't. 643 for (Use &UU : I->uses()) 644 if (Visited.insert(&UU).second) 645 Worklist.push_back(&UU); 646 break; 647 648 case Instruction::Call: 649 case Instruction::Invoke: { 650 CallBase &CB = cast<CallBase>(*I); 651 if (CB.isCallee(U)) { 652 IsRead = true; 653 // Note that indirect calls do not capture, see comment in 654 // CaptureTracking for context 655 continue; 656 } 657 658 // Given we've explictily handled the callee operand above, what's left 659 // must be a data operand (e.g. argument or operand bundle) 660 const unsigned UseIndex = CB.getDataOperandNo(U); 661 662 // Some intrinsics (for instance ptrmask) do not capture their results, 663 // but return results thas alias their pointer argument, and thus should 664 // be handled like GEP or addrspacecast above. 665 if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( 666 &CB, /*MustPreserveNullness=*/false)) { 667 for (Use &UU : CB.uses()) 668 if (Visited.insert(&UU).second) 669 Worklist.push_back(&UU); 670 } else if (!CB.doesNotCapture(UseIndex)) { 671 if (!CB.onlyReadsMemory()) 672 // If the callee can save a copy into other memory, then simply 673 // scanning uses of the call is insufficient. We have no way 674 // of tracking copies of the pointer through memory to see 675 // if a reloaded copy is written to, thus we must give up. 676 return Attribute::None; 677 // Push users for processing once we finish this one 678 if (!I->getType()->isVoidTy()) 679 for (Use &UU : I->uses()) 680 if (Visited.insert(&UU).second) 681 Worklist.push_back(&UU); 682 } 683 684 ModRefInfo ArgMR = CB.getMemoryEffects().getModRef(IRMemLocation::ArgMem); 685 if (isNoModRef(ArgMR)) 686 continue; 687 688 if (Function *F = CB.getCalledFunction()) 689 if (CB.isArgOperand(U) && UseIndex < F->arg_size() && 690 SCCNodes.count(F->getArg(UseIndex))) 691 // This is an argument which is part of the speculative SCC. Note 692 // that only operands corresponding to formal arguments of the callee 693 // can participate in the speculation. 694 break; 695 696 // The accessors used on call site here do the right thing for calls and 697 // invokes with operand bundles. 698 if (CB.doesNotAccessMemory(UseIndex)) { 699 /* nop */ 700 } else if (!isModSet(ArgMR) || CB.onlyReadsMemory(UseIndex)) { 701 IsRead = true; 702 } else if (!isRefSet(ArgMR) || 703 CB.dataOperandHasImpliedAttr(UseIndex, Attribute::WriteOnly)) { 704 IsWrite = true; 705 } else { 706 return Attribute::None; 707 } 708 break; 709 } 710 711 case Instruction::Load: 712 // A volatile load has side effects beyond what readonly can be relied 713 // upon. 714 if (cast<LoadInst>(I)->isVolatile()) 715 return Attribute::None; 716 717 IsRead = true; 718 break; 719 720 case Instruction::Store: 721 if (cast<StoreInst>(I)->getValueOperand() == *U) 722 // untrackable capture 723 return Attribute::None; 724 725 // A volatile store has side effects beyond what writeonly can be relied 726 // upon. 727 if (cast<StoreInst>(I)->isVolatile()) 728 return Attribute::None; 729 730 IsWrite = true; 731 break; 732 733 case Instruction::ICmp: 734 case Instruction::Ret: 735 break; 736 737 default: 738 return Attribute::None; 739 } 740 } 741 742 if (IsWrite && IsRead) 743 return Attribute::None; 744 else if (IsRead) 745 return Attribute::ReadOnly; 746 else if (IsWrite) 747 return Attribute::WriteOnly; 748 else 749 return Attribute::ReadNone; 750 } 751 752 /// Deduce returned attributes for the SCC. 753 static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes, 754 SmallSet<Function *, 8> &Changed) { 755 // Check each function in turn, determining if an argument is always returned. 756 for (Function *F : SCCNodes) { 757 // We can infer and propagate function attributes only when we know that the 758 // definition we'll get at link time is *exactly* the definition we see now. 759 // For more details, see GlobalValue::mayBeDerefined. 760 if (!F->hasExactDefinition()) 761 continue; 762 763 if (F->getReturnType()->isVoidTy()) 764 continue; 765 766 // There is nothing to do if an argument is already marked as 'returned'. 767 if (F->getAttributes().hasAttrSomewhere(Attribute::Returned)) 768 continue; 769 770 auto FindRetArg = [&]() -> Argument * { 771 Argument *RetArg = nullptr; 772 for (BasicBlock &BB : *F) 773 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 774 // Note that stripPointerCasts should look through functions with 775 // returned arguments. 776 auto *RetVal = 777 dyn_cast<Argument>(Ret->getReturnValue()->stripPointerCasts()); 778 if (!RetVal || RetVal->getType() != F->getReturnType()) 779 return nullptr; 780 781 if (!RetArg) 782 RetArg = RetVal; 783 else if (RetArg != RetVal) 784 return nullptr; 785 } 786 787 return RetArg; 788 }; 789 790 if (Argument *RetArg = FindRetArg()) { 791 RetArg->addAttr(Attribute::Returned); 792 ++NumReturned; 793 Changed.insert(F); 794 } 795 } 796 } 797 798 /// If a callsite has arguments that are also arguments to the parent function, 799 /// try to propagate attributes from the callsite's arguments to the parent's 800 /// arguments. This may be important because inlining can cause information loss 801 /// when attribute knowledge disappears with the inlined call. 802 static bool addArgumentAttrsFromCallsites(Function &F) { 803 if (!EnableNonnullArgPropagation) 804 return false; 805 806 bool Changed = false; 807 808 // For an argument attribute to transfer from a callsite to the parent, the 809 // call must be guaranteed to execute every time the parent is called. 810 // Conservatively, just check for calls in the entry block that are guaranteed 811 // to execute. 812 // TODO: This could be enhanced by testing if the callsite post-dominates the 813 // entry block or by doing simple forward walks or backward walks to the 814 // callsite. 815 BasicBlock &Entry = F.getEntryBlock(); 816 for (Instruction &I : Entry) { 817 if (auto *CB = dyn_cast<CallBase>(&I)) { 818 if (auto *CalledFunc = CB->getCalledFunction()) { 819 for (auto &CSArg : CalledFunc->args()) { 820 if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false)) 821 continue; 822 823 // If the non-null callsite argument operand is an argument to 'F' 824 // (the caller) and the call is guaranteed to execute, then the value 825 // must be non-null throughout 'F'. 826 auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo())); 827 if (FArg && !FArg->hasNonNullAttr()) { 828 FArg->addAttr(Attribute::NonNull); 829 Changed = true; 830 } 831 } 832 } 833 } 834 if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 835 break; 836 } 837 838 return Changed; 839 } 840 841 static bool addAccessAttr(Argument *A, Attribute::AttrKind R) { 842 assert((R == Attribute::ReadOnly || R == Attribute::ReadNone || 843 R == Attribute::WriteOnly) 844 && "Must be an access attribute."); 845 assert(A && "Argument must not be null."); 846 847 // If the argument already has the attribute, nothing needs to be done. 848 if (A->hasAttribute(R)) 849 return false; 850 851 // Otherwise, remove potentially conflicting attribute, add the new one, 852 // and update statistics. 853 A->removeAttr(Attribute::WriteOnly); 854 A->removeAttr(Attribute::ReadOnly); 855 A->removeAttr(Attribute::ReadNone); 856 // Remove conflicting writable attribute. 857 if (R == Attribute::ReadNone || R == Attribute::ReadOnly) 858 A->removeAttr(Attribute::Writable); 859 A->addAttr(R); 860 if (R == Attribute::ReadOnly) 861 ++NumReadOnlyArg; 862 else if (R == Attribute::WriteOnly) 863 ++NumWriteOnlyArg; 864 else 865 ++NumReadNoneArg; 866 return true; 867 } 868 869 /// Deduce nocapture attributes for the SCC. 870 static void addArgumentAttrs(const SCCNodeSet &SCCNodes, 871 SmallSet<Function *, 8> &Changed) { 872 ArgumentGraph AG; 873 874 // Check each function in turn, determining which pointer arguments are not 875 // captured. 876 for (Function *F : SCCNodes) { 877 // We can infer and propagate function attributes only when we know that the 878 // definition we'll get at link time is *exactly* the definition we see now. 879 // For more details, see GlobalValue::mayBeDerefined. 880 if (!F->hasExactDefinition()) 881 continue; 882 883 if (addArgumentAttrsFromCallsites(*F)) 884 Changed.insert(F); 885 886 // Functions that are readonly (or readnone) and nounwind and don't return 887 // a value can't capture arguments. Don't analyze them. 888 if (F->onlyReadsMemory() && F->doesNotThrow() && 889 F->getReturnType()->isVoidTy()) { 890 for (Argument &A : F->args()) { 891 if (A.getType()->isPointerTy() && !A.hasNoCaptureAttr()) { 892 A.addAttr(Attribute::NoCapture); 893 ++NumNoCapture; 894 Changed.insert(F); 895 } 896 } 897 continue; 898 } 899 900 for (Argument &A : F->args()) { 901 if (!A.getType()->isPointerTy()) 902 continue; 903 bool HasNonLocalUses = false; 904 if (!A.hasNoCaptureAttr()) { 905 ArgumentUsesTracker Tracker(SCCNodes); 906 PointerMayBeCaptured(&A, &Tracker); 907 if (!Tracker.Captured) { 908 if (Tracker.Uses.empty()) { 909 // If it's trivially not captured, mark it nocapture now. 910 A.addAttr(Attribute::NoCapture); 911 ++NumNoCapture; 912 Changed.insert(F); 913 } else { 914 // If it's not trivially captured and not trivially not captured, 915 // then it must be calling into another function in our SCC. Save 916 // its particulars for Argument-SCC analysis later. 917 ArgumentGraphNode *Node = AG[&A]; 918 for (Argument *Use : Tracker.Uses) { 919 Node->Uses.push_back(AG[Use]); 920 if (Use != &A) 921 HasNonLocalUses = true; 922 } 923 } 924 } 925 // Otherwise, it's captured. Don't bother doing SCC analysis on it. 926 } 927 if (!HasNonLocalUses && !A.onlyReadsMemory()) { 928 // Can we determine that it's readonly/readnone/writeonly without doing 929 // an SCC? Note that we don't allow any calls at all here, or else our 930 // result will be dependent on the iteration order through the 931 // functions in the SCC. 932 SmallPtrSet<Argument *, 8> Self; 933 Self.insert(&A); 934 Attribute::AttrKind R = determinePointerAccessAttrs(&A, Self); 935 if (R != Attribute::None) 936 if (addAccessAttr(&A, R)) 937 Changed.insert(F); 938 } 939 } 940 } 941 942 // The graph we've collected is partial because we stopped scanning for 943 // argument uses once we solved the argument trivially. These partial nodes 944 // show up as ArgumentGraphNode objects with an empty Uses list, and for 945 // these nodes the final decision about whether they capture has already been 946 // made. If the definition doesn't have a 'nocapture' attribute by now, it 947 // captures. 948 949 for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) { 950 const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I; 951 if (ArgumentSCC.size() == 1) { 952 if (!ArgumentSCC[0]->Definition) 953 continue; // synthetic root node 954 955 // eg. "void f(int* x) { if (...) f(x); }" 956 if (ArgumentSCC[0]->Uses.size() == 1 && 957 ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) { 958 Argument *A = ArgumentSCC[0]->Definition; 959 A->addAttr(Attribute::NoCapture); 960 ++NumNoCapture; 961 Changed.insert(A->getParent()); 962 963 // Infer the access attributes given the new nocapture one 964 SmallPtrSet<Argument *, 8> Self; 965 Self.insert(&*A); 966 Attribute::AttrKind R = determinePointerAccessAttrs(&*A, Self); 967 if (R != Attribute::None) 968 addAccessAttr(A, R); 969 } 970 continue; 971 } 972 973 bool SCCCaptured = false; 974 for (ArgumentGraphNode *Node : ArgumentSCC) { 975 if (Node->Uses.empty() && !Node->Definition->hasNoCaptureAttr()) { 976 SCCCaptured = true; 977 break; 978 } 979 } 980 if (SCCCaptured) 981 continue; 982 983 SmallPtrSet<Argument *, 8> ArgumentSCCNodes; 984 // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for 985 // quickly looking up whether a given Argument is in this ArgumentSCC. 986 for (ArgumentGraphNode *I : ArgumentSCC) { 987 ArgumentSCCNodes.insert(I->Definition); 988 } 989 990 for (ArgumentGraphNode *N : ArgumentSCC) { 991 for (ArgumentGraphNode *Use : N->Uses) { 992 Argument *A = Use->Definition; 993 if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A)) 994 continue; 995 SCCCaptured = true; 996 break; 997 } 998 if (SCCCaptured) 999 break; 1000 } 1001 if (SCCCaptured) 1002 continue; 1003 1004 for (ArgumentGraphNode *N : ArgumentSCC) { 1005 Argument *A = N->Definition; 1006 A->addAttr(Attribute::NoCapture); 1007 ++NumNoCapture; 1008 Changed.insert(A->getParent()); 1009 } 1010 1011 // We also want to compute readonly/readnone/writeonly. With a small number 1012 // of false negatives, we can assume that any pointer which is captured 1013 // isn't going to be provably readonly or readnone, since by definition 1014 // we can't analyze all uses of a captured pointer. 1015 // 1016 // The false negatives happen when the pointer is captured by a function 1017 // that promises readonly/readnone behaviour on the pointer, then the 1018 // pointer's lifetime ends before anything that writes to arbitrary memory. 1019 // Also, a readonly/readnone pointer may be returned, but returning a 1020 // pointer is capturing it. 1021 1022 auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) { 1023 if (A == B) 1024 return A; 1025 if (A == Attribute::ReadNone) 1026 return B; 1027 if (B == Attribute::ReadNone) 1028 return A; 1029 return Attribute::None; 1030 }; 1031 1032 Attribute::AttrKind AccessAttr = Attribute::ReadNone; 1033 for (ArgumentGraphNode *N : ArgumentSCC) { 1034 Argument *A = N->Definition; 1035 Attribute::AttrKind K = determinePointerAccessAttrs(A, ArgumentSCCNodes); 1036 AccessAttr = meetAccessAttr(AccessAttr, K); 1037 if (AccessAttr == Attribute::None) 1038 break; 1039 } 1040 1041 if (AccessAttr != Attribute::None) { 1042 for (ArgumentGraphNode *N : ArgumentSCC) { 1043 Argument *A = N->Definition; 1044 if (addAccessAttr(A, AccessAttr)) 1045 Changed.insert(A->getParent()); 1046 } 1047 } 1048 } 1049 } 1050 1051 /// Tests whether a function is "malloc-like". 1052 /// 1053 /// A function is "malloc-like" if it returns either null or a pointer that 1054 /// doesn't alias any other pointer visible to the caller. 1055 static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) { 1056 SmallSetVector<Value *, 8> FlowsToReturn; 1057 for (BasicBlock &BB : *F) 1058 if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1059 FlowsToReturn.insert(Ret->getReturnValue()); 1060 1061 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1062 Value *RetVal = FlowsToReturn[i]; 1063 1064 if (Constant *C = dyn_cast<Constant>(RetVal)) { 1065 if (!C->isNullValue() && !isa<UndefValue>(C)) 1066 return false; 1067 1068 continue; 1069 } 1070 1071 if (isa<Argument>(RetVal)) 1072 return false; 1073 1074 if (Instruction *RVI = dyn_cast<Instruction>(RetVal)) 1075 switch (RVI->getOpcode()) { 1076 // Extend the analysis by looking upwards. 1077 case Instruction::BitCast: 1078 case Instruction::GetElementPtr: 1079 case Instruction::AddrSpaceCast: 1080 FlowsToReturn.insert(RVI->getOperand(0)); 1081 continue; 1082 case Instruction::Select: { 1083 SelectInst *SI = cast<SelectInst>(RVI); 1084 FlowsToReturn.insert(SI->getTrueValue()); 1085 FlowsToReturn.insert(SI->getFalseValue()); 1086 continue; 1087 } 1088 case Instruction::PHI: { 1089 PHINode *PN = cast<PHINode>(RVI); 1090 for (Value *IncValue : PN->incoming_values()) 1091 FlowsToReturn.insert(IncValue); 1092 continue; 1093 } 1094 1095 // Check whether the pointer came from an allocation. 1096 case Instruction::Alloca: 1097 break; 1098 case Instruction::Call: 1099 case Instruction::Invoke: { 1100 CallBase &CB = cast<CallBase>(*RVI); 1101 if (CB.hasRetAttr(Attribute::NoAlias)) 1102 break; 1103 if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction())) 1104 break; 1105 [[fallthrough]]; 1106 } 1107 default: 1108 return false; // Did not come from an allocation. 1109 } 1110 1111 if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) 1112 return false; 1113 } 1114 1115 return true; 1116 } 1117 1118 /// Deduce noalias attributes for the SCC. 1119 static void addNoAliasAttrs(const SCCNodeSet &SCCNodes, 1120 SmallSet<Function *, 8> &Changed) { 1121 // Check each function in turn, determining which functions return noalias 1122 // pointers. 1123 for (Function *F : SCCNodes) { 1124 // Already noalias. 1125 if (F->returnDoesNotAlias()) 1126 continue; 1127 1128 // We can infer and propagate function attributes only when we know that the 1129 // definition we'll get at link time is *exactly* the definition we see now. 1130 // For more details, see GlobalValue::mayBeDerefined. 1131 if (!F->hasExactDefinition()) 1132 return; 1133 1134 // We annotate noalias return values, which are only applicable to 1135 // pointer types. 1136 if (!F->getReturnType()->isPointerTy()) 1137 continue; 1138 1139 if (!isFunctionMallocLike(F, SCCNodes)) 1140 return; 1141 } 1142 1143 for (Function *F : SCCNodes) { 1144 if (F->returnDoesNotAlias() || 1145 !F->getReturnType()->isPointerTy()) 1146 continue; 1147 1148 F->setReturnDoesNotAlias(); 1149 ++NumNoAlias; 1150 Changed.insert(F); 1151 } 1152 } 1153 1154 /// Tests whether this function is known to not return null. 1155 /// 1156 /// Requires that the function returns a pointer. 1157 /// 1158 /// Returns true if it believes the function will not return a null, and sets 1159 /// \p Speculative based on whether the returned conclusion is a speculative 1160 /// conclusion due to SCC calls. 1161 static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes, 1162 bool &Speculative) { 1163 assert(F->getReturnType()->isPointerTy() && 1164 "nonnull only meaningful on pointer types"); 1165 Speculative = false; 1166 1167 SmallSetVector<Value *, 8> FlowsToReturn; 1168 for (BasicBlock &BB : *F) 1169 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) 1170 FlowsToReturn.insert(Ret->getReturnValue()); 1171 1172 auto &DL = F->getParent()->getDataLayout(); 1173 1174 for (unsigned i = 0; i != FlowsToReturn.size(); ++i) { 1175 Value *RetVal = FlowsToReturn[i]; 1176 1177 // If this value is locally known to be non-null, we're good 1178 if (isKnownNonZero(RetVal, DL)) 1179 continue; 1180 1181 // Otherwise, we need to look upwards since we can't make any local 1182 // conclusions. 1183 Instruction *RVI = dyn_cast<Instruction>(RetVal); 1184 if (!RVI) 1185 return false; 1186 switch (RVI->getOpcode()) { 1187 // Extend the analysis by looking upwards. 1188 case Instruction::BitCast: 1189 case Instruction::AddrSpaceCast: 1190 FlowsToReturn.insert(RVI->getOperand(0)); 1191 continue; 1192 case Instruction::GetElementPtr: 1193 if (cast<GEPOperator>(RVI)->isInBounds()) { 1194 FlowsToReturn.insert(RVI->getOperand(0)); 1195 continue; 1196 } 1197 return false; 1198 case Instruction::Select: { 1199 SelectInst *SI = cast<SelectInst>(RVI); 1200 FlowsToReturn.insert(SI->getTrueValue()); 1201 FlowsToReturn.insert(SI->getFalseValue()); 1202 continue; 1203 } 1204 case Instruction::PHI: { 1205 PHINode *PN = cast<PHINode>(RVI); 1206 for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1207 FlowsToReturn.insert(PN->getIncomingValue(i)); 1208 continue; 1209 } 1210 case Instruction::Call: 1211 case Instruction::Invoke: { 1212 CallBase &CB = cast<CallBase>(*RVI); 1213 Function *Callee = CB.getCalledFunction(); 1214 // A call to a node within the SCC is assumed to return null until 1215 // proven otherwise 1216 if (Callee && SCCNodes.count(Callee)) { 1217 Speculative = true; 1218 continue; 1219 } 1220 return false; 1221 } 1222 default: 1223 return false; // Unknown source, may be null 1224 }; 1225 llvm_unreachable("should have either continued or returned"); 1226 } 1227 1228 return true; 1229 } 1230 1231 /// Deduce nonnull attributes for the SCC. 1232 static void addNonNullAttrs(const SCCNodeSet &SCCNodes, 1233 SmallSet<Function *, 8> &Changed) { 1234 // Speculative that all functions in the SCC return only nonnull 1235 // pointers. We may refute this as we analyze functions. 1236 bool SCCReturnsNonNull = true; 1237 1238 // Check each function in turn, determining which functions return nonnull 1239 // pointers. 1240 for (Function *F : SCCNodes) { 1241 // Already nonnull. 1242 if (F->getAttributes().hasRetAttr(Attribute::NonNull)) 1243 continue; 1244 1245 // We can infer and propagate function attributes only when we know that the 1246 // definition we'll get at link time is *exactly* the definition we see now. 1247 // For more details, see GlobalValue::mayBeDerefined. 1248 if (!F->hasExactDefinition()) 1249 return; 1250 1251 // We annotate nonnull return values, which are only applicable to 1252 // pointer types. 1253 if (!F->getReturnType()->isPointerTy()) 1254 continue; 1255 1256 bool Speculative = false; 1257 if (isReturnNonNull(F, SCCNodes, Speculative)) { 1258 if (!Speculative) { 1259 // Mark the function eagerly since we may discover a function 1260 // which prevents us from speculating about the entire SCC 1261 LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName() 1262 << " as nonnull\n"); 1263 F->addRetAttr(Attribute::NonNull); 1264 ++NumNonNullReturn; 1265 Changed.insert(F); 1266 } 1267 continue; 1268 } 1269 // At least one function returns something which could be null, can't 1270 // speculate any more. 1271 SCCReturnsNonNull = false; 1272 } 1273 1274 if (SCCReturnsNonNull) { 1275 for (Function *F : SCCNodes) { 1276 if (F->getAttributes().hasRetAttr(Attribute::NonNull) || 1277 !F->getReturnType()->isPointerTy()) 1278 continue; 1279 1280 LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n"); 1281 F->addRetAttr(Attribute::NonNull); 1282 ++NumNonNullReturn; 1283 Changed.insert(F); 1284 } 1285 } 1286 } 1287 1288 /// Deduce noundef attributes for the SCC. 1289 static void addNoUndefAttrs(const SCCNodeSet &SCCNodes, 1290 SmallSet<Function *, 8> &Changed) { 1291 // Check each function in turn, determining which functions return noundef 1292 // values. 1293 for (Function *F : SCCNodes) { 1294 // Already noundef. 1295 if (F->getAttributes().hasRetAttr(Attribute::NoUndef)) 1296 continue; 1297 1298 // We can infer and propagate function attributes only when we know that the 1299 // definition we'll get at link time is *exactly* the definition we see now. 1300 // For more details, see GlobalValue::mayBeDerefined. 1301 if (!F->hasExactDefinition()) 1302 return; 1303 1304 // MemorySanitizer assumes that the definition and declaration of a 1305 // function will be consistent. A function with sanitize_memory attribute 1306 // should be skipped from inference. 1307 if (F->hasFnAttribute(Attribute::SanitizeMemory)) 1308 continue; 1309 1310 if (F->getReturnType()->isVoidTy()) 1311 continue; 1312 1313 if (all_of(*F, [](BasicBlock &BB) { 1314 if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) { 1315 // TODO: perform context-sensitive analysis? 1316 return isGuaranteedNotToBeUndefOrPoison(Ret->getReturnValue()); 1317 } 1318 return true; 1319 })) { 1320 F->addRetAttr(Attribute::NoUndef); 1321 ++NumNoUndefReturn; 1322 Changed.insert(F); 1323 } 1324 } 1325 } 1326 1327 namespace { 1328 1329 /// Collects a set of attribute inference requests and performs them all in one 1330 /// go on a single SCC Node. Inference involves scanning function bodies 1331 /// looking for instructions that violate attribute assumptions. 1332 /// As soon as all the bodies are fine we are free to set the attribute. 1333 /// Customization of inference for individual attributes is performed by 1334 /// providing a handful of predicates for each attribute. 1335 class AttributeInferer { 1336 public: 1337 /// Describes a request for inference of a single attribute. 1338 struct InferenceDescriptor { 1339 1340 /// Returns true if this function does not have to be handled. 1341 /// General intent for this predicate is to provide an optimization 1342 /// for functions that do not need this attribute inference at all 1343 /// (say, for functions that already have the attribute). 1344 std::function<bool(const Function &)> SkipFunction; 1345 1346 /// Returns true if this instruction violates attribute assumptions. 1347 std::function<bool(Instruction &)> InstrBreaksAttribute; 1348 1349 /// Sets the inferred attribute for this function. 1350 std::function<void(Function &)> SetAttribute; 1351 1352 /// Attribute we derive. 1353 Attribute::AttrKind AKind; 1354 1355 /// If true, only "exact" definitions can be used to infer this attribute. 1356 /// See GlobalValue::isDefinitionExact. 1357 bool RequiresExactDefinition; 1358 1359 InferenceDescriptor(Attribute::AttrKind AK, 1360 std::function<bool(const Function &)> SkipFunc, 1361 std::function<bool(Instruction &)> InstrScan, 1362 std::function<void(Function &)> SetAttr, 1363 bool ReqExactDef) 1364 : SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan), 1365 SetAttribute(SetAttr), AKind(AK), 1366 RequiresExactDefinition(ReqExactDef) {} 1367 }; 1368 1369 private: 1370 SmallVector<InferenceDescriptor, 4> InferenceDescriptors; 1371 1372 public: 1373 void registerAttrInference(InferenceDescriptor AttrInference) { 1374 InferenceDescriptors.push_back(AttrInference); 1375 } 1376 1377 void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed); 1378 }; 1379 1380 /// Perform all the requested attribute inference actions according to the 1381 /// attribute predicates stored before. 1382 void AttributeInferer::run(const SCCNodeSet &SCCNodes, 1383 SmallSet<Function *, 8> &Changed) { 1384 SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors; 1385 // Go through all the functions in SCC and check corresponding attribute 1386 // assumptions for each of them. Attributes that are invalid for this SCC 1387 // will be removed from InferInSCC. 1388 for (Function *F : SCCNodes) { 1389 1390 // No attributes whose assumptions are still valid - done. 1391 if (InferInSCC.empty()) 1392 return; 1393 1394 // Check if our attributes ever need scanning/can be scanned. 1395 llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) { 1396 if (ID.SkipFunction(*F)) 1397 return false; 1398 1399 // Remove from further inference (invalidate) when visiting a function 1400 // that has no instructions to scan/has an unsuitable definition. 1401 return F->isDeclaration() || 1402 (ID.RequiresExactDefinition && !F->hasExactDefinition()); 1403 }); 1404 1405 // For each attribute still in InferInSCC that doesn't explicitly skip F, 1406 // set up the F instructions scan to verify assumptions of the attribute. 1407 SmallVector<InferenceDescriptor, 4> InferInThisFunc; 1408 llvm::copy_if( 1409 InferInSCC, std::back_inserter(InferInThisFunc), 1410 [F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); }); 1411 1412 if (InferInThisFunc.empty()) 1413 continue; 1414 1415 // Start instruction scan. 1416 for (Instruction &I : instructions(*F)) { 1417 llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) { 1418 if (!ID.InstrBreaksAttribute(I)) 1419 return false; 1420 // Remove attribute from further inference on any other functions 1421 // because attribute assumptions have just been violated. 1422 llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) { 1423 return D.AKind == ID.AKind; 1424 }); 1425 // Remove attribute from the rest of current instruction scan. 1426 return true; 1427 }); 1428 1429 if (InferInThisFunc.empty()) 1430 break; 1431 } 1432 } 1433 1434 if (InferInSCC.empty()) 1435 return; 1436 1437 for (Function *F : SCCNodes) 1438 // At this point InferInSCC contains only functions that were either: 1439 // - explicitly skipped from scan/inference, or 1440 // - verified to have no instructions that break attribute assumptions. 1441 // Hence we just go and force the attribute for all non-skipped functions. 1442 for (auto &ID : InferInSCC) { 1443 if (ID.SkipFunction(*F)) 1444 continue; 1445 Changed.insert(F); 1446 ID.SetAttribute(*F); 1447 } 1448 } 1449 1450 struct SCCNodesResult { 1451 SCCNodeSet SCCNodes; 1452 bool HasUnknownCall; 1453 }; 1454 1455 } // end anonymous namespace 1456 1457 /// Helper for non-Convergent inference predicate InstrBreaksAttribute. 1458 static bool InstrBreaksNonConvergent(Instruction &I, 1459 const SCCNodeSet &SCCNodes) { 1460 const CallBase *CB = dyn_cast<CallBase>(&I); 1461 // Breaks non-convergent assumption if CS is a convergent call to a function 1462 // not in the SCC. 1463 return CB && CB->isConvergent() && 1464 !SCCNodes.contains(CB->getCalledFunction()); 1465 } 1466 1467 /// Helper for NoUnwind inference predicate InstrBreaksAttribute. 1468 static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { 1469 if (!I.mayThrow(/* IncludePhaseOneUnwind */ true)) 1470 return false; 1471 if (const auto *CI = dyn_cast<CallInst>(&I)) { 1472 if (Function *Callee = CI->getCalledFunction()) { 1473 // I is a may-throw call to a function inside our SCC. This doesn't 1474 // invalidate our current working assumption that the SCC is no-throw; we 1475 // just have to scan that other function. 1476 if (SCCNodes.contains(Callee)) 1477 return false; 1478 } 1479 } 1480 return true; 1481 } 1482 1483 /// Helper for NoFree inference predicate InstrBreaksAttribute. 1484 static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) { 1485 CallBase *CB = dyn_cast<CallBase>(&I); 1486 if (!CB) 1487 return false; 1488 1489 if (CB->hasFnAttr(Attribute::NoFree)) 1490 return false; 1491 1492 // Speculatively assume in SCC. 1493 if (Function *Callee = CB->getCalledFunction()) 1494 if (SCCNodes.contains(Callee)) 1495 return false; 1496 1497 return true; 1498 } 1499 1500 // Return true if this is an atomic which has an ordering stronger than 1501 // unordered. Note that this is different than the predicate we use in 1502 // Attributor. Here we chose to be conservative and consider monotonic 1503 // operations potentially synchronizing. We generally don't do much with 1504 // monotonic operations, so this is simply risk reduction. 1505 static bool isOrderedAtomic(Instruction *I) { 1506 if (!I->isAtomic()) 1507 return false; 1508 1509 if (auto *FI = dyn_cast<FenceInst>(I)) 1510 // All legal orderings for fence are stronger than monotonic. 1511 return FI->getSyncScopeID() != SyncScope::SingleThread; 1512 else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I)) 1513 return true; 1514 else if (auto *SI = dyn_cast<StoreInst>(I)) 1515 return !SI->isUnordered(); 1516 else if (auto *LI = dyn_cast<LoadInst>(I)) 1517 return !LI->isUnordered(); 1518 else { 1519 llvm_unreachable("unknown atomic instruction?"); 1520 } 1521 } 1522 1523 static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { 1524 // Volatile may synchronize 1525 if (I.isVolatile()) 1526 return true; 1527 1528 // An ordered atomic may synchronize. (See comment about on monotonic.) 1529 if (isOrderedAtomic(&I)) 1530 return true; 1531 1532 auto *CB = dyn_cast<CallBase>(&I); 1533 if (!CB) 1534 // Non call site cases covered by the two checks above 1535 return false; 1536 1537 if (CB->hasFnAttr(Attribute::NoSync)) 1538 return false; 1539 1540 // Non volatile memset/memcpy/memmoves are nosync 1541 // NOTE: Only intrinsics with volatile flags should be handled here. All 1542 // others should be marked in Intrinsics.td. 1543 if (auto *MI = dyn_cast<MemIntrinsic>(&I)) 1544 if (!MI->isVolatile()) 1545 return false; 1546 1547 // Speculatively assume in SCC. 1548 if (Function *Callee = CB->getCalledFunction()) 1549 if (SCCNodes.contains(Callee)) 1550 return false; 1551 1552 return true; 1553 } 1554 1555 /// Attempt to remove convergent function attribute when possible. 1556 /// 1557 /// Returns true if any changes to function attributes were made. 1558 static void inferConvergent(const SCCNodeSet &SCCNodes, 1559 SmallSet<Function *, 8> &Changed) { 1560 AttributeInferer AI; 1561 1562 // Request to remove the convergent attribute from all functions in the SCC 1563 // if every callsite within the SCC is not convergent (except for calls 1564 // to functions within the SCC). 1565 // Note: Removal of the attr from the callsites will happen in 1566 // InstCombineCalls separately. 1567 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1568 Attribute::Convergent, 1569 // Skip non-convergent functions. 1570 [](const Function &F) { return !F.isConvergent(); }, 1571 // Instructions that break non-convergent assumption. 1572 [SCCNodes](Instruction &I) { 1573 return InstrBreaksNonConvergent(I, SCCNodes); 1574 }, 1575 [](Function &F) { 1576 LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName() 1577 << "\n"); 1578 F.setNotConvergent(); 1579 }, 1580 /* RequiresExactDefinition= */ false}); 1581 // Perform all the requested attribute inference actions. 1582 AI.run(SCCNodes, Changed); 1583 } 1584 1585 /// Infer attributes from all functions in the SCC by scanning every 1586 /// instruction for compliance to the attribute assumptions. 1587 /// 1588 /// Returns true if any changes to function attributes were made. 1589 static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes, 1590 SmallSet<Function *, 8> &Changed) { 1591 AttributeInferer AI; 1592 1593 if (!DisableNoUnwindInference) 1594 // Request to infer nounwind attribute for all the functions in the SCC if 1595 // every callsite within the SCC is not throwing (except for calls to 1596 // functions within the SCC). Note that nounwind attribute suffers from 1597 // derefinement - results may change depending on how functions are 1598 // optimized. Thus it can be inferred only from exact definitions. 1599 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1600 Attribute::NoUnwind, 1601 // Skip non-throwing functions. 1602 [](const Function &F) { return F.doesNotThrow(); }, 1603 // Instructions that break non-throwing assumption. 1604 [&SCCNodes](Instruction &I) { 1605 return InstrBreaksNonThrowing(I, SCCNodes); 1606 }, 1607 [](Function &F) { 1608 LLVM_DEBUG(dbgs() 1609 << "Adding nounwind attr to fn " << F.getName() << "\n"); 1610 F.setDoesNotThrow(); 1611 ++NumNoUnwind; 1612 }, 1613 /* RequiresExactDefinition= */ true}); 1614 1615 if (!DisableNoFreeInference) 1616 // Request to infer nofree attribute for all the functions in the SCC if 1617 // every callsite within the SCC does not directly or indirectly free 1618 // memory (except for calls to functions within the SCC). Note that nofree 1619 // attribute suffers from derefinement - results may change depending on 1620 // how functions are optimized. Thus it can be inferred only from exact 1621 // definitions. 1622 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1623 Attribute::NoFree, 1624 // Skip functions known not to free memory. 1625 [](const Function &F) { return F.doesNotFreeMemory(); }, 1626 // Instructions that break non-deallocating assumption. 1627 [&SCCNodes](Instruction &I) { 1628 return InstrBreaksNoFree(I, SCCNodes); 1629 }, 1630 [](Function &F) { 1631 LLVM_DEBUG(dbgs() 1632 << "Adding nofree attr to fn " << F.getName() << "\n"); 1633 F.setDoesNotFreeMemory(); 1634 ++NumNoFree; 1635 }, 1636 /* RequiresExactDefinition= */ true}); 1637 1638 AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ 1639 Attribute::NoSync, 1640 // Skip already marked functions. 1641 [](const Function &F) { return F.hasNoSync(); }, 1642 // Instructions that break nosync assumption. 1643 [&SCCNodes](Instruction &I) { 1644 return InstrBreaksNoSync(I, SCCNodes); 1645 }, 1646 [](Function &F) { 1647 LLVM_DEBUG(dbgs() 1648 << "Adding nosync attr to fn " << F.getName() << "\n"); 1649 F.setNoSync(); 1650 ++NumNoSync; 1651 }, 1652 /* RequiresExactDefinition= */ true}); 1653 1654 // Perform all the requested attribute inference actions. 1655 AI.run(SCCNodes, Changed); 1656 } 1657 1658 static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, 1659 SmallSet<Function *, 8> &Changed) { 1660 // Try and identify functions that do not recurse. 1661 1662 // If the SCC contains multiple nodes we know for sure there is recursion. 1663 if (SCCNodes.size() != 1) 1664 return; 1665 1666 Function *F = *SCCNodes.begin(); 1667 if (!F || !F->hasExactDefinition() || F->doesNotRecurse()) 1668 return; 1669 1670 // If all of the calls in F are identifiable and are to norecurse functions, F 1671 // is norecurse. This check also detects self-recursion as F is not currently 1672 // marked norecurse, so any called from F to F will not be marked norecurse. 1673 for (auto &BB : *F) 1674 for (auto &I : BB.instructionsWithoutDebug()) 1675 if (auto *CB = dyn_cast<CallBase>(&I)) { 1676 Function *Callee = CB->getCalledFunction(); 1677 if (!Callee || Callee == F || 1678 (!Callee->doesNotRecurse() && 1679 !(Callee->isDeclaration() && 1680 Callee->hasFnAttribute(Attribute::NoCallback)))) 1681 // Function calls a potentially recursive function. 1682 return; 1683 } 1684 1685 // Every call was to a non-recursive function other than this function, and 1686 // we have no indirect recursion as the SCC size is one. This function cannot 1687 // recurse. 1688 F->setDoesNotRecurse(); 1689 ++NumNoRecurse; 1690 Changed.insert(F); 1691 } 1692 1693 static bool instructionDoesNotReturn(Instruction &I) { 1694 if (auto *CB = dyn_cast<CallBase>(&I)) 1695 return CB->hasFnAttr(Attribute::NoReturn); 1696 return false; 1697 } 1698 1699 // A basic block can only return if it terminates with a ReturnInst and does not 1700 // contain calls to noreturn functions. 1701 static bool basicBlockCanReturn(BasicBlock &BB) { 1702 if (!isa<ReturnInst>(BB.getTerminator())) 1703 return false; 1704 return none_of(BB, instructionDoesNotReturn); 1705 } 1706 1707 // FIXME: this doesn't handle recursion. 1708 static bool canReturn(Function &F) { 1709 SmallVector<BasicBlock *, 16> Worklist; 1710 SmallPtrSet<BasicBlock *, 16> Visited; 1711 1712 Visited.insert(&F.front()); 1713 Worklist.push_back(&F.front()); 1714 1715 do { 1716 BasicBlock *BB = Worklist.pop_back_val(); 1717 if (basicBlockCanReturn(*BB)) 1718 return true; 1719 for (BasicBlock *Succ : successors(BB)) 1720 if (Visited.insert(Succ).second) 1721 Worklist.push_back(Succ); 1722 } while (!Worklist.empty()); 1723 1724 return false; 1725 } 1726 1727 // Set the noreturn function attribute if possible. 1728 static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, 1729 SmallSet<Function *, 8> &Changed) { 1730 for (Function *F : SCCNodes) { 1731 if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) || 1732 F->doesNotReturn()) 1733 continue; 1734 1735 if (!canReturn(*F)) { 1736 F->setDoesNotReturn(); 1737 Changed.insert(F); 1738 } 1739 } 1740 } 1741 1742 static bool functionWillReturn(const Function &F) { 1743 // We can infer and propagate function attributes only when we know that the 1744 // definition we'll get at link time is *exactly* the definition we see now. 1745 // For more details, see GlobalValue::mayBeDerefined. 1746 if (!F.hasExactDefinition()) 1747 return false; 1748 1749 // Must-progress function without side-effects must return. 1750 if (F.mustProgress() && F.onlyReadsMemory()) 1751 return true; 1752 1753 // Can only analyze functions with a definition. 1754 if (F.isDeclaration()) 1755 return false; 1756 1757 // Functions with loops require more sophisticated analysis, as the loop 1758 // may be infinite. For now, don't try to handle them. 1759 SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges; 1760 FindFunctionBackedges(F, Backedges); 1761 if (!Backedges.empty()) 1762 return false; 1763 1764 // If there are no loops, then the function is willreturn if all calls in 1765 // it are willreturn. 1766 return all_of(instructions(F), [](const Instruction &I) { 1767 return I.willReturn(); 1768 }); 1769 } 1770 1771 // Set the willreturn function attribute if possible. 1772 static void addWillReturn(const SCCNodeSet &SCCNodes, 1773 SmallSet<Function *, 8> &Changed) { 1774 for (Function *F : SCCNodes) { 1775 if (!F || F->willReturn() || !functionWillReturn(*F)) 1776 continue; 1777 1778 F->setWillReturn(); 1779 NumWillReturn++; 1780 Changed.insert(F); 1781 } 1782 } 1783 1784 static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) { 1785 SCCNodesResult Res; 1786 Res.HasUnknownCall = false; 1787 for (Function *F : Functions) { 1788 if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) || 1789 F->isPresplitCoroutine()) { 1790 // Treat any function we're trying not to optimize as if it were an 1791 // indirect call and omit it from the node set used below. 1792 Res.HasUnknownCall = true; 1793 continue; 1794 } 1795 // Track whether any functions in this SCC have an unknown call edge. 1796 // Note: if this is ever a performance hit, we can common it with 1797 // subsequent routines which also do scans over the instructions of the 1798 // function. 1799 if (!Res.HasUnknownCall) { 1800 for (Instruction &I : instructions(*F)) { 1801 if (auto *CB = dyn_cast<CallBase>(&I)) { 1802 if (!CB->getCalledFunction()) { 1803 Res.HasUnknownCall = true; 1804 break; 1805 } 1806 } 1807 } 1808 } 1809 Res.SCCNodes.insert(F); 1810 } 1811 return Res; 1812 } 1813 1814 template <typename AARGetterT> 1815 static SmallSet<Function *, 8> 1816 deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter, 1817 bool ArgAttrsOnly) { 1818 SCCNodesResult Nodes = createSCCNodeSet(Functions); 1819 1820 // Bail if the SCC only contains optnone functions. 1821 if (Nodes.SCCNodes.empty()) 1822 return {}; 1823 1824 SmallSet<Function *, 8> Changed; 1825 if (ArgAttrsOnly) { 1826 addArgumentAttrs(Nodes.SCCNodes, Changed); 1827 return Changed; 1828 } 1829 1830 addArgumentReturnedAttrs(Nodes.SCCNodes, Changed); 1831 addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed); 1832 addArgumentAttrs(Nodes.SCCNodes, Changed); 1833 inferConvergent(Nodes.SCCNodes, Changed); 1834 addNoReturnAttrs(Nodes.SCCNodes, Changed); 1835 addWillReturn(Nodes.SCCNodes, Changed); 1836 addNoUndefAttrs(Nodes.SCCNodes, Changed); 1837 1838 // If we have no external nodes participating in the SCC, we can deduce some 1839 // more precise attributes as well. 1840 if (!Nodes.HasUnknownCall) { 1841 addNoAliasAttrs(Nodes.SCCNodes, Changed); 1842 addNonNullAttrs(Nodes.SCCNodes, Changed); 1843 inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed); 1844 addNoRecurseAttrs(Nodes.SCCNodes, Changed); 1845 } 1846 1847 // Finally, infer the maximal set of attributes from the ones we've inferred 1848 // above. This is handling the cases where one attribute on a signature 1849 // implies another, but for implementation reasons the inference rule for 1850 // the later is missing (or simply less sophisticated). 1851 for (Function *F : Nodes.SCCNodes) 1852 if (F) 1853 if (inferAttributesFromOthers(*F)) 1854 Changed.insert(F); 1855 1856 return Changed; 1857 } 1858 1859 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, 1860 CGSCCAnalysisManager &AM, 1861 LazyCallGraph &CG, 1862 CGSCCUpdateResult &) { 1863 // Skip non-recursive functions if requested. 1864 // Only infer argument attributes for non-recursive functions, because 1865 // it can affect optimization behavior in conjunction with noalias. 1866 bool ArgAttrsOnly = false; 1867 if (C.size() == 1 && SkipNonRecursive) { 1868 LazyCallGraph::Node &N = *C.begin(); 1869 if (!N->lookup(N)) 1870 ArgAttrsOnly = true; 1871 } 1872 1873 FunctionAnalysisManager &FAM = 1874 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); 1875 1876 // We pass a lambda into functions to wire them up to the analysis manager 1877 // for getting function analyses. 1878 auto AARGetter = [&](Function &F) -> AAResults & { 1879 return FAM.getResult<AAManager>(F); 1880 }; 1881 1882 SmallVector<Function *, 8> Functions; 1883 for (LazyCallGraph::Node &N : C) { 1884 Functions.push_back(&N.getFunction()); 1885 } 1886 1887 auto ChangedFunctions = 1888 deriveAttrsInPostOrder(Functions, AARGetter, ArgAttrsOnly); 1889 if (ChangedFunctions.empty()) 1890 return PreservedAnalyses::all(); 1891 1892 // Invalidate analyses for modified functions so that we don't have to 1893 // invalidate all analyses for all functions in this SCC. 1894 PreservedAnalyses FuncPA; 1895 // We haven't changed the CFG for modified functions. 1896 FuncPA.preserveSet<CFGAnalyses>(); 1897 for (Function *Changed : ChangedFunctions) { 1898 FAM.invalidate(*Changed, FuncPA); 1899 // Also invalidate any direct callers of changed functions since analyses 1900 // may care about attributes of direct callees. For example, MemorySSA cares 1901 // about whether or not a call's callee modifies memory and queries that 1902 // through function attributes. 1903 for (auto *U : Changed->users()) { 1904 if (auto *Call = dyn_cast<CallBase>(U)) { 1905 if (Call->getCalledFunction() == Changed) 1906 FAM.invalidate(*Call->getFunction(), FuncPA); 1907 } 1908 } 1909 } 1910 1911 PreservedAnalyses PA; 1912 // We have not added or removed functions. 1913 PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); 1914 // We already invalidated all relevant function analyses above. 1915 PA.preserveSet<AllAnalysesOn<Function>>(); 1916 return PA; 1917 } 1918 1919 void PostOrderFunctionAttrsPass::printPipeline( 1920 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1921 static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline( 1922 OS, MapClassName2PassName); 1923 if (SkipNonRecursive) 1924 OS << "<skip-non-recursive-function-attrs>"; 1925 } 1926 1927 template <typename AARGetterT> 1928 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { 1929 SmallVector<Function *, 8> Functions; 1930 for (CallGraphNode *I : SCC) { 1931 Functions.push_back(I->getFunction()); 1932 } 1933 1934 return !deriveAttrsInPostOrder(Functions, AARGetter).empty(); 1935 } 1936 1937 static bool addNoRecurseAttrsTopDown(Function &F) { 1938 // We check the preconditions for the function prior to calling this to avoid 1939 // the cost of building up a reversible post-order list. We assert them here 1940 // to make sure none of the invariants this relies on were violated. 1941 assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!"); 1942 assert(!F.doesNotRecurse() && 1943 "This function has already been deduced as norecurs!"); 1944 assert(F.hasInternalLinkage() && 1945 "Can only do top-down deduction for internal linkage functions!"); 1946 1947 // If F is internal and all of its uses are calls from a non-recursive 1948 // functions, then none of its calls could in fact recurse without going 1949 // through a function marked norecurse, and so we can mark this function too 1950 // as norecurse. Note that the uses must actually be calls -- otherwise 1951 // a pointer to this function could be returned from a norecurse function but 1952 // this function could be recursively (indirectly) called. Note that this 1953 // also detects if F is directly recursive as F is not yet marked as 1954 // a norecurse function. 1955 for (auto &U : F.uses()) { 1956 auto *I = dyn_cast<Instruction>(U.getUser()); 1957 if (!I) 1958 return false; 1959 CallBase *CB = dyn_cast<CallBase>(I); 1960 if (!CB || !CB->isCallee(&U) || 1961 !CB->getParent()->getParent()->doesNotRecurse()) 1962 return false; 1963 } 1964 F.setDoesNotRecurse(); 1965 ++NumNoRecurse; 1966 return true; 1967 } 1968 1969 static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) { 1970 // We only have a post-order SCC traversal (because SCCs are inherently 1971 // discovered in post-order), so we accumulate them in a vector and then walk 1972 // it in reverse. This is simpler than using the RPO iterator infrastructure 1973 // because we need to combine SCC detection and the PO walk of the call 1974 // graph. We can also cheat egregiously because we're primarily interested in 1975 // synthesizing norecurse and so we can only save the singular SCCs as SCCs 1976 // with multiple functions in them will clearly be recursive. 1977 1978 SmallVector<Function *, 16> Worklist; 1979 CG.buildRefSCCs(); 1980 for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { 1981 for (LazyCallGraph::SCC &SCC : RC) { 1982 if (SCC.size() != 1) 1983 continue; 1984 Function &F = SCC.begin()->getFunction(); 1985 if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) 1986 Worklist.push_back(&F); 1987 } 1988 } 1989 bool Changed = false; 1990 for (auto *F : llvm::reverse(Worklist)) 1991 Changed |= addNoRecurseAttrsTopDown(*F); 1992 1993 return Changed; 1994 } 1995 1996 PreservedAnalyses 1997 ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) { 1998 auto &CG = AM.getResult<LazyCallGraphAnalysis>(M); 1999 2000 if (!deduceFunctionAttributeInRPO(M, CG)) 2001 return PreservedAnalyses::all(); 2002 2003 PreservedAnalyses PA; 2004 PA.preserve<LazyCallGraphAnalysis>(); 2005 return PA; 2006 } 2007